我在使用StyleGAN2 存储库训练我的第一个网络时遇到了很多问题,在更改为较小的 GPU 批量大小 2 后,因为我只有 11GB 的 VRAM 可用,训练在返回这些之前设法通过 1-4 个滴答声超出范围错误。
锐龙 3950x
RTX 2080ti
32GB DDR4 RAM
Windows 10
Tensorflow-gpu 1.4
Building TensorFlow graph...
Initializing logs...
Training for 25000 kimg...
tick 0 kimg 10065.1 lod 0.00 minibatch 32 time 1m 17s sec/tick 77.4 sec/kimg 605.07 maintenance 0.0 gpumem 8.6
Traceback (most recent call last):
File "C:\Users\TE 1\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1356, in _do_call
return fn(*args)
File "C:\Users\TE 1\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1341, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "C:\Users\TE 1\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1429, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.OutOfRangeError: 2 root error(s) found.
(0) Out of range: End of sequence
[[{{node GPU0/DataFetch/IteratorGetNext}}]]
[[GPU0/DataFetch/UpscaleLOD/Cast/_5109]]
(1) Out of range: End of sequence
[[{{node GPU0/DataFetch/IteratorGetNext}}]]
0 successful operations.
0 derived errors ignored.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "run_training.py", line 202, in <module>
main()
File "run_training.py", line 197, in main
run(**vars(args))
File "run_training.py", line 128, in run
dnnlib.submit_run(**kwargs)
File "C:\ML\stylegan2dv\dnnlib\submission\submit.py", line 343, in submit_run
return farm.submit(submit_config, host_run_dir)
File "C:\ML\stylegan2dv\dnnlib\submission\internal\local.py", line 22, in submit
return run_wrapper(submit_config)
File "C:\ML\stylegan2dv\dnnlib\submission\submit.py", line 280, in run_wrapper
run_func_obj(**submit_config.run_func_kwargs)
File "C:\ML\stylegan2dv\training\training_loop.py", line 308, in training_loop
tflib.run(data_fetch_op, feed_dict)
File "C:\ML\stylegan2dv\dnnlib\tflib\tfutil.py", line 31, in run
return tf.get_default_session().run(*args, **kwargs)
File "C:\Users\TE 1\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 950, in run
run_metadata_ptr)
File "C:\Users\TE 1\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1173, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Users\TE 1\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1350, in _do_run
run_metadata)
File "C:\Users\TE 1\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\client\session.py", line 1370, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.OutOfRangeError: 2 root error(s) found.
(0) Out of range: End of sequence
[[node GPU0/DataFetch/IteratorGetNext (defined at C:\ML\stylegan2dv\training\dataset.py:136) ]]
[[GPU0/DataFetch/UpscaleLOD/Cast/_5109]]
(1) Out of range: End of sequence
[[node GPU0/DataFetch/IteratorGetNext (defined at C:\ML\stylegan2dv\training\dataset.py:136) ]]
0 successful operations.
0 derived errors ignored.
Errors may have originated from an input operation.
Input Source operations connected to node GPU0/DataFetch/IteratorGetNext:
Dataset/IteratorV2 (defined at C:\ML\stylegan2dv\training\dataset.py:119)
Input Source operations connected to node GPU0/DataFetch/IteratorGetNext:
Dataset/IteratorV2 (defined at C:\ML\stylegan2dv\training\dataset.py:119)
Original stack trace for 'GPU0/DataFetch/IteratorGetNext':
File "run_training.py", line 202, in <module>
main()
File "run_training.py", line 197, in main
run(**vars(args))
File "run_training.py", line 128, in run
dnnlib.submit_run(**kwargs)
File "C:\ML\stylegan2dv\dnnlib\submission\submit.py", line 343, in submit_run
return farm.submit(submit_config, host_run_dir)
File "C:\ML\stylegan2dv\dnnlib\submission\internal\local.py", line 22, in submit
return run_wrapper(submit_config)
File "C:\ML\stylegan2dv\dnnlib\submission\submit.py", line 280, in run_wrapper
run_func_obj(**submit_config.run_func_kwargs)
File "C:\ML\stylegan2dv\training\training_loop.py", line 208, in training_loop
reals_write, labels_write = training_set.get_minibatch_tf()
File "C:\ML\stylegan2dv\training\dataset.py", line 136, in get_minibatch_tf
return self._tf_iterator.get_next()
File "C:\Users\TE 1\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\data\ops\iterator_ops.py", line 426, in get_next
output_shapes=self._structure._flat_shapes, name=name)
File "C:\Users\TE 1\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\gen_dataset_ops.py", line 1974, in iterator_get_next
output_shapes=output_shapes, name=name)
File "C:\Users\TE 1\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 788, in _apply_op_helper
op_def=op_def)
File "C:\Users\TE 1\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\util\deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "C:\Users\TE 1\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 3616, in create_op
op_def=op_def)
File "C:\Users\TE 1\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 2005, in __init__
self._traceback = tf_stack.extract_stack()