vacuum

Support code for defining the discrete states and discrete actions of a vacuum cleaning robot.

State

test_eq(rooms, ['Living Room', 'Kitchen', 'Office', 'Hallway', 'Dining Room'])

Actions

test_eq(action_space, ['L', 'R', 'U', 'D'])

VARIABLES = Variables()

X = VARIABLES.discrete_series("X", [1, 2, 3], rooms) # states for times 1,2 and 3
A = VARIABLES.discrete_series("A", [1, 2], action_space) # actions for times 1 and 2
motion_model = gtsam.DiscreteConditional(X[2], [X[1], A[1]], action_spec)
pretty(motion_model)

P(X2|X1,A1):

X1	A1	0	1	2	3	4
0	0	1	0	0	0	0
0	1	0.2	0.8	0	0	0
0	2	1	0	0	0	0
0	3	0.2	0	0	0.8	0
1	0	0.8	0.2	0	0	0
1	1	0	1	0	0	0
1	2	0	1	0	0	0
1	3	0	0.2	0	0	0.8
2	0	0	0	1	0	0
2	1	0	0	0.2	0.8	0
2	2	0	0	1	0	0
2	3	0	0	1	0	0
3	0	0	0	0.8	0.2	0
3	1	0	0	0	0.2	0.8
3	2	0.8	0	0	0.2	0
3	3	0	0	0	1	0
4	0	0	0	0	0.8	0.2
4	1	0	0	0	0	1
4	2	0	0.8	0	0	0.2
4	3	0	0	0	0	1

Sensing

test_eq(sensor_spec, '1/1/8 1/1/8 2/7/1 8/1/1 1/8/1')

RL

source

calculate_value_function

 calculate_value_function (R:<built-infunctionarray>, T:<built-
                           infunctionarray>, pi:<built-infunctionarray>,
                           gamma=0.9)

Calculate value function for given policy

	Type	Default	Details
R	array		reward function as a tensor
T	array		transition probabilities as a tensor
pi	array		policy, as a vector
gamma	float	0.9	discount factor

source

calculate_value_system

 calculate_value_system (R:<built-infunctionarray>, T:<built-
                         infunctionarray>, pi:<built-infunctionarray>,
                         gamma=0.9)

Calculate A, b matrix of linear system for value computation.

	Type	Default	Details
R	array		reward function as a tensor
T	array		transition probabilities as a tensor
pi	array		policy, as a vector
gamma	float	0.9	discount factor

# From section 3.5:
conditional = gtsam.DiscreteConditional((2,5), [(0,5), (1,4)], action_spec)
R = np.empty((5, 4, 5), float)
T = np.empty((5, 4, 5), float)
for assignment, value in conditional.enumerate():
    x, a, y = assignment[0], assignment[1], assignment[2]
    R[x, a, y] = 10.0 if y == rooms.index("Living Room") else 0.0
    T[x, a, y] = value

test_eq(R[2, 1], [10,  0,  0,  0,  0])

Calculating the value function of a given policy pi:

reasonable_policy = [2, 1, 0, 2, 1]
AA, b = calculate_value_system(R, T, reasonable_policy)
test_close(
    AA,
    np.array(
        [
            [0.1, 0, 0, 0, 0],
            [0, 0.1, 0, 0, 0],
            [0, 0, 0.1, 0, 0],
            [-0.72, 0, 0, 0.82, 0],
            [0, 0, 0, 0, 0.1],
        ]
    ),
)
test_close(b, np.array([10, 0, 0, 8, 0]))

value_for_pi = calculate_value_function(R, T, reasonable_policy)
test_close(value_for_pi, np.array([100, 0, 0, 97.56097561, 0]))

optimal_policy = [0, 0, 1, 2, 2]
value_for_pi = calculate_value_function(R, T, optimal_policy)
test_close(
    value_for_pi,
    np.array([100, 97.56097561, 85.66329566, 97.56097561, 85.66329566]),
)