I have created a custom environment for reinforcement learning with tf-agents
(not needed to answer this question), which works fine if I instantiate one thread by setting num_parallel_environments
to 1
, but throws infrequent and seemingly random errors like an IndexError
inside random.shuffle()
, when I increase num_parallel_environments
to 50
. Here's the code:
inside train.py
tf_env = tf_py_environment.TFPyEnvironment(
batched_py_environment.BatchedPyEnvironment(
[environment.CardGameEnv()] * num_parallel_environments))
inside my environment, this is run in threads
self.cardStack = getFullDeck()
random.shuffle(self.cardStack)
this is a normal function, imported in every thread class
def getFullDeck():
deck = []
for rank in Ranks:
for suit in Suits:
deck.append(Card(rank, suit))
return deck
And here's one of the possible errors:
Traceback (most recent call last):
File "e:\Users\tmp\.vscode\extensions\ms-python.python-2019.1.0\pythonFiles\ptvsd_launcher.py", line 45, in <module>
main(ptvsdArgs)
File "e:\Users\tmp\.vscode\extensions\ms-python.python-2019.1.0\pythonFiles\lib\python\ptvsd\__main__.py", line 348, in main
run()
File "e:\Users\tmp\.vscode\extensions\ms-python.python-2019.1.0\pythonFiles\lib\python\ptvsd\__main__.py", line 253, in run_file
runpy.run_path(target, run_name='__main__')
File "C:\Python37\lib\runpy.py", line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File "C:\Python37\lib\runpy.py", line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File "C:\Python37\lib\runpy.py", line 85, in _run_code
exec(code, run_globals)
File "e:\Users\tmp\Documents\Programming\Neural Nets\Poker_AI\train_v2.py", line 320, in <module>
app.run(main)
File "C:\Python37\lib\site-packages\absl\app.py", line 300, in run
_run_main(main, args)
File "C:\Python37\lib\site-packages\absl\app.py", line 251, in _run_main
sys.exit(main(argv))
File "e:\Users\tmp\Documents\Programming\Neural Nets\Poker_AI\train_v2.py", line 315, in main
num_eval_episodes=FLAGS.num_eval_episodes)
File "E:\Users\tmp\AppData\Roaming\Python\Python37\site-packages\gin\config.py", line 1032, in wrapper
utils.augment_exception_message_and_reraise(e, err_str)
File "E:\Users\tmp\AppData\Roaming\Python\Python37\site-packages\gin\utils.py", line 49, in augment_exception_message_and_reraise
six.raise_from(proxy.with_traceback(exception.__traceback__), None)
File "<string>", line 3, in raise_from
File "E:\Users\tmp\AppData\Roaming\Python\Python37\site-packages\gin\config.py", line 1009, in wrapper
return fn(*new_args, **new_kwargs)
File "e:\Users\tmp\Documents\Programming\Neural Nets\Poker_AI\train_v2.py", line 251, in train_eval
collect_driver.run()
File "C:\Python37\lib\site-packages\tf_agents\drivers\dynamic_episode_driver.py", line 149, in run
maximum_iterations=maximum_iterations)
File "C:\Python37\lib\site-packages\tf_agents\utils\common.py", line 111, in with_check_resource_vars
return fn(*fn_args, **fn_kwargs)
File "C:\Python37\lib\site-packages\tf_agents\drivers\dynamic_episode_driver.py", line 180, in _run
name='driver_loop'
File "C:\Python37\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2457, in while_loop_v2
return_same_structure=True)
File "C:\Python37\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2689, in while_loop
loop_vars = body(*loop_vars)
File "C:\Python37\lib\site-packages\tf_agents\drivers\dynamic_episode_driver.py", line 103, in loop_body
next_time_step = self.env.step(action_step.action)
File "C:\Python37\lib\site-packages\tf_agents\environments\tf_environment.py", line 232, in step
return self._step(action)
File "C:\Python37\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 232, in graph_wrapper
return func(*args, **kwargs)
File "C:\Python37\lib\site-packages\tf_agents\environments\tf_py_environment.py", line 218, in _step
_step_py, flat_actions, self._time_step_dtypes, name='step_py_func')
File "C:\Python37\lib\site-packages\tensorflow\python\ops\script_ops.py", line 488, in numpy_function
return py_func_common(func, inp, Tout, stateful=True, name=name)
File "C:\Python37\lib\site-packages\tensorflow\python\ops\script_ops.py", line 452, in py_func_common
result = func(*[x.numpy() for x in inp])
File "C:\Python37\lib\site-packages\tf_agents\environments\tf_py_environment.py", line 203, in _step_py
self._time_step = self._env.step(packed)
File "C:\Python37\lib\site-packages\tf_agents\environments\py_environment.py", line 174, in step
self._current_time_step = self._step(action)
File "C:\Python37\lib\site-packages\tf_agents\environments\batched_py_environment.py", line 140, in _step
zip(self._envs, unstacked_actions))
File "C:\Python37\lib\multiprocessing\pool.py", line 268, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "C:\Python37\lib\multiprocessing\pool.py", line 657, in get
raise self._value
File "C:\Python37\lib\multiprocessing\pool.py", line 121, in worker
result = (True, func(*args, **kwds))
File "C:\Python37\lib\multiprocessing\pool.py", line 44, in mapstar
return list(map(*args))
File "C:\Python37\lib\site-packages\tf_agents\environments\batched_py_environment.py", line 139, in <lambda>
lambda env_action: env_action[0].step(env_action[1]),
File "C:\Python37\lib\site-packages\tf_agents\environments\py_environment.py", line 174, in step
self._current_time_step = self._step(action)
File "e:\Users\tmp\Documents\Programming\Neural Nets\Poker_AI\environment.py", line 116, in _step
canRoundContinue = self._table.runUntilChoice(action)
File "e:\Users\tmp\Documents\Programming\Neural Nets\Poker_AI\table.py", line 326, in runUntilChoice
random.shuffle(self.cardStack)
File "C:\Python37\lib\random.py", line 278, in shuffle
x[i], x[j] = x[j], x[i]
IndexError: list index out of range
In call to configurable 'train_eval' (<function train_eval at 0x000002722713A158>)
I suspect this error occurs because the threads are changing the array simultaneously, but I do not see why this would be the case:
Everything happens inside a class instance and the array getFullDeck()
is returning is recreated every time the function is called, so there should be no way multiple threads have access to the same reference, right?