package astra.learn.library;

import astra.core.Rule;
import astra.explanation.ExplanationEngine;
import astra.formula.Formula;
import astra.learn.library.RLModel.BeliefUpdateScoreTuple;
import astra.learn.library.RLModel.RLDataModelSARSA;
import java.util.ArrayList;

/* loaded from: input_file:astra/learn/library/SARSA.class */
public class SARSA extends QLearning {
    public SARSA() {
        this.model = new RLDataModelSARSA();
        this.metrics.put(QLearning.TOTAL_REWARD, this.totalUndiscountedReward);
        this.metrics.put(QLearning.TOTAL_DISCOUNTED_REWARD, this.totalDiscountedReward);
    }

    @Override // astra.learn.library.QLearning, astra.learn.library.Algorithm
    public void applyLearningFunction() throws Exception {
        if (this.pause || this.learningComplete) {
            if (this.debug) {
                System.out.println("Learning is paused / complete");
                return;
            }
            return;
        }
        if (this.actions.isEmpty()) {
            if (this.debug) {
                System.out.println("No actions.");
            }
            this.inputs = new ArrayList();
            this.actions = new ArrayList();
            this.reward = new ArrayList();
            return;
        }
        if (this.debug) {
            System.out.println("Getting next action for current state");
        }
        double random = Math.random();
        if (this.debug) {
            System.out.println("epsilon: " + this.epsilon);
        }
        if (this.debug) {
            System.out.println("evaluate: " + this.evaluate);
        }
        if (this.debug) {
            System.out.println("model: " + this.model);
        }
        this.model.parseActions(this.actions);
        this.model.parseState(this.inputs);
        this.model.parseReward(this.reward);
        BeliefUpdateScoreTuple nextActionHighestValue = (random > this.epsilon || this.evaluate) ? this.model.nextActionHighestValue() : this.model.nextActionRandom();
        this.model.update(this.inputs, this.actions, this.reward, this.gamma, this.alpha, this.metrics, this.evaluate);
        if (nextActionHighestValue == null) {
            if (this.debug) {
                System.out.println("No rule to add ");
            }
            this.inputs = new ArrayList();
            this.actions = new ArrayList();
            this.reward = new ArrayList();
            return;
        }
        Formula generateContext = generateContext(nextActionHighestValue.getBeliefUpdate());
        if (this.debug) {
            System.out.println("BU Context: " + generateContext.toString());
        }
        this.inputs = new ArrayList();
        this.actions = new ArrayList();
        this.reward = new ArrayList();
        Rule rule = new Rule(this.event, generateContext, nextActionHighestValue.getBeliefUpdate());
        if (this.debug) {
            System.out.println("Adding rule: " + rule.toString());
        }
        this.agent.addOrReplaceRule(rule);
        if (this.explain) {
            ExplanationEngine explanations = this.agent.explanations();
            explanations.addExplanations(explanations.unitBuilder().build(rule, this.learningProcessNamespace, nextActionHighestValue.getScore()));
        }
    }
}
