我正在做一个从图像中读取文本的小项目。现在,在 Ubuntu 14.0 上运行 20k 图像需要将近 10 个小时 下面是需要花费大量时间的部分代码。请建议/帮助提高速度或如何对下面提到的代码使用多处理?
问候, 施里
代码:
row = 1
for f1 in files3:
if f1.lower().endswith( ('.png', '.jpg', '.jpeg') ):
try:
image_path1 = files_dir3 + '/' + f1
txt = pytesseract.image_to_string( Image.open( image_path1 ) )
print (txt)
if txt != '':
print ('0')
worksheet4.write( row, 1, '0' )
worksheet4.write( row, 2, txt )
worksheet4.write( row, 0, image_path1 )
else:
worksheet4.write( row, 1, '1' )
worksheet4.write( row, 2, 'No Text On Image' )
worksheet4.write( row, 0, image_path1 )
image_path3 = files_dir3 + '/' + f1
img = cv2.imread( image_path3 )
mask = np.zeros( img.shape[:2], np.uint8 )
bgdModel = np.zeros( (1, 65), np.float64 )
fgdModel = np.zeros( (1, 65), np.float64 )
rect = (50, 50, 450, 290)
cv2.grabCut( img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT )
mask2 = np.where( (mask == 2) | (mask == 0), 0, 1 ).astype( 'uint8' )
img = img * mask2[:, :, np.newaxis]
# plt.imshow(img),plt.colorbar(),plt.show()
cv2.imwrite( os.path.join( files_dir1, f1 ), img )
print ("Image copied: " + f1)
if f2.lower().endswith( ('.png', '.jpg', '.jpeg') ):
# image_path1 = files_dir + '\\' + f
image_path2 = files_dir1 + '/' + f2
print (f2)
txt = pytesseract.image_to_string( Image.open( image_path2 ) )
print (txt)
if txt != '':
print ('0')
worksheet4.write( row, 1, '0' )
worksheet4.write( row, 2, txt )
worksheet4.write( row, 0, image_path2 )
else:
print ('1')
worksheet4.write( row, 1, '1' )
worksheet4.write( row, 2, 'No Text On Image' )
worksheet4.write( row, 0, image_path2 )
except:
print ("Moving On")
row += 1