package org.encog.ml.world.learning.mdp;

import java.util.Iterator;
import org.encog.ml.world.Action;
import org.encog.ml.world.State;
import org.encog.ml.world.SuccessorState;
import org.encog.ml.world.World;

/* loaded from: classes.dex */
public class ValueIteration extends MarkovDecisionProcess {
    private double discountFactor;

    public ValueIteration(World world, double d2) {
        super(world);
        this.discountFactor = d2;
    }

    public void calculateValue(State state) {
        if (getWorld().isGoalState(state)) {
            state.getPolicyValue()[0] = state.getReward();
            return;
        }
        Iterator it = getWorld().getActions().iterator();
        double d2 = Double.NEGATIVE_INFINITY;
        while (it.hasNext()) {
            double d3 = 0.0d;
            for (SuccessorState successorState : getWorld().getProbability().determineSuccessorStates(state, (Action) it.next())) {
                d3 += successorState.getProbability() * successorState.getState().getPolicyValue()[0];
            }
            d2 = Math.max(d2, d3 * this.discountFactor);
        }
        state.getPolicyValue()[0] = state.getReward() + d2;
    }

    public void iteration() {
        Iterator it = getWorld().getStates().iterator();
        while (it.hasNext()) {
            calculateValue((State) it.next());
        }
    }
}
