-- data from here: https://medium.freecodecamp.org/diving-deeper-into-reinforcement-learning-with-q-learning-c18d0db58efe
-- can take diagonals:
step |A> => |B> + |E> + |D>
step |B> => |A> + |D> + |E> + |F> + |C>
step |C> => |B> + |E> + |F>
step |D> => |A> + |B> + |E>
step |E> => |E>
step |F> => |E> + |B> + |C> + |F>
-- can not take diagonals:
step |A> => |B> + |D>
step |B> => |A> + |E> + |C>
step |C> => |B> + |F>
step |D> => |A> + |E>
step |E> => |E>
step |F> => |E> + |C> + |F>
reward |A> => |0>
reward |B> => |1>
reward |C> => |0>
reward |D> => |2>
reward |E> => |-10>
reward |F> => |10>
-- q-learn[iterations, alpha, gamma, op] set-of-terminal-states:
|null> => q-learn[1000, 1, 0.8, step] (|E> + |F>)
-- now display the results in a table:
|null> => table[transition, norm-Q] ket-sort rel-kets[norm-Q] |>
-- show the walk sequences:
walk |*> #=> q-walk |_self>
|null> => table[start, walk] rel-kets[step] |>