目录
pbar PermissionError: [WinError 5] 拒绝访问
pbar 用法
from tqdm import tqdm
import time
# 创建一个列表
my_list = range(1000)
# 创建一个tqdm对象
pbar = tqdm(my_list)
for i in pbar:
# 模拟一些计算
time.sleep(0.1)
# 更新进度条的描述
pbar.set_description(f"Processing {i}")
# 或者在进度条的后面添加额外的信息
pbar.set_postfix({"Current item": i})
pbar用法2
from tqdm import tqdm
import time
my_list = range(100)
pbar = tqdm(my_list)
ok_count=0
err_count=10
score_small=-1
for i in pbar:
show_txt = f'ok:{ok_count} err:{err_count} {i} small:{score_small} '
time.sleep(0.1)
pbar.set_description(show_txt)
pbar多次使用,前一次用完需要close一下
from tqdm import tqdm
import time
# 创建一个列表
my_list = range(10663)
# 创建一个tqdm对象
pbar = tqdm(total=len(my_list))
# 初始化你的变量
ok = err = small = 0
for i in my_list:
# 模拟一些计算
time.sleep(0.01)
# 更新你的变量
ok += 1
if ok % 50 == 0:
err += 1
if ok % 100 == 0:
small += 1
# 更新进度条并显示额外信息
pbar.set_postfix({'ok': ok, 'err': err, 'small': small})
pbar.update()
# 关闭进度条
pbar.close()
pbar PermissionError: [WinError 5] 拒绝访问
Error in atexit._run_exitfuncs:
File "D:/yolov5/jacke121-yolov5-v3.0_lbg/train_new.py", line 237, in train
for i, (imgs, targets, paths, _) in pbar: # batch -------------------------------------------------------------
File "C:\ProgramData\Anaconda3\lib\site-packages\tqdm\std.py", line 1081, in __iter__
for obj in iterable:
File "C:\ProgramData\Anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 363, in __next__
data = self._next_data()
File "C:\ProgramData\Anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 963, in _next_data
self._shutdown_workers()
File "C:\ProgramData\Anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 1081, in _shutdown_workers
w.terminate()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\process.py", line 124, in terminate
self._popen.terminate()
File "C:\ProgramData\Anaconda3\lib\multiprocessing\popen_spawn_win32.py", line 119, in terminate
_winapi.TerminateProcess(int(self._handle), TERMINATE)
PermissionError: [WinError 5] 拒绝访问。
Error in atexit._run_exitfuncs:
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\multiprocessing\popen_spawn_win32.py", line 119, in terminate
_winapi.TerminateProcess(int(self._handle), TERMINATE)
原因还没找到
有一个类似的错误:
http://www.voidcn.com/article/p-nyltbcos-bvc.html
import multiprocessing, Queue
def wrapper(queue, lst):
lst.append(1)
queue.put(lst)
queue.close()
def timeout(timeout, lst):
q = multiprocessing.Queue(1)
proc = multiprocessing.Process(target=wrapper, args=(q, lst))
proc.start()
try:
result = q.get(True, timeout)
except Queue.Empty:
return None
finally:
proc.terminate()
return result
if __name__ == "__main__":
# lst = [0]*417912040 # this works fine
# lst = [0]*467912040 # this works fine
lst = [0] * 517912040 # this does not
print "List length:",len(lst)
timeout(60*30, lst)
输出(包括错误):
List length: 517912040
Traceback (most recent call last):
File ".\multiprocessing_error.py", line 29, in <module>
print "List length:",len(lst)
File ".\multiprocessing_error.py", line 21, in timeout
proc.terminate()
File "C:\Python27\lib\multiprocessing\process.py", line 137, in terminate
self._popen.terminate()
File "C:\Python27\lib\multiprocessing\forking.py", line 306, in terminate
_subprocess.TerminateProcess(int(self._handle), TERMINATE)
WindowsError: [Error 5] Access is denied
解决方法.
在finally子句中添加try-except块.
finally:
try:
proc.terminate()
except WindowsError:
pass
参考我的另一篇博客:pytorch dataloader num_workers参数设置导致训练阻塞_num_workers反而-CSDN博客
问题描述:
最近在用RFBnet (源码是pytorch的)训练RSNA的比赛数据,除了要修改一点代码支持RSNA的数据集外(打算后续再写个博客),发现在使用dataloader读取数据时,如果设置num_workers为0,也就是用主进程读取数据,模型训练程序运行正常。如果设置num_workers为其他任何一个大于0的整数,也就是使用子进程读取数据时,训练程序会卡住,卡在训练之前,GPU使用率和显存占用率都为0。
解决过程:
由于是多进程就,用print调试大法,定位到是一行opencv代码的问题,在dataloader子类的__getitem__方法里面调用了
image_array = cv2.cvtColor(image_array, cv2.COLOR_GRAY2BGR)
所有子进程的代码都卡在这里了。之前也有遇到过类似的问题,python多进程调用opencv的代码会出现问题。于是就用numpy的concate替代了这个方法
image_array = np.concatenate([image_array,image_array,image_array],axis=2)
绕过这个问题就可以正常训练了。
问题探索:
其实这个问题,我在另外一台机器上是没有遇到了,该机器是python3.6+opencv3.4.2
遇到问题的环境是docker环境,python3.5+opencv3.2
我感觉跟opencv的版本可能有关系,等后续训练完成之后把docker里面opencv的版本升级到3.4.2再试一下