我正在从事一个小型个人项目,以自动从不同的测量设备(没有数字输出)采集数据;万用表或发光计。 输入图像的示例。 我已经阅读了有关该主题的一些教程和论坛帖子。即:来自stackowerflow的这篇文章和 本教程。
主要有2个问题:
查找显示 ROI
光学字符识别
我暂时跳过了第一点,因为简单的 canny>>contours>>aproxpoly 在所有情况下都不起作用。在某些情况下,最大的元素不是显示器,而是发光计本身或图像中的其他东西。
我主要处理来自显示器的数字分割及其识别。分割是通过使用自适应阈值对图像本身进行预处理并从图像中删除一些小的连通分量来解决的。对数字进行分段后,我尝试应用上述教程中使用的相同方法。我添加了对单个数字的一些额外预处理,以防第一次无法识别它们(旋转 +-5 度)。
在这一点上,为预处理和分割方法调整单个参数变得相当复杂,这将同时在多个设备上工作。
我正在征求意见,是否只使用计算机视觉技术将这个项目推到最后,或者我应该制作某种混合机器学习/CV 解决方案(例如训练一个 CNN 来识别单个数字或一个 YOLO找到图像中的显示等)?
PS:我已经尝试过 tesseract,但是在它无法识别单个数字并阅读它的文档之后 - 这解释了,这个网络更适合识别文本或单词块,而不是图像中某处的单个数字 - 我给了之上。请注意,相机和测量设备或单个设备显示器的相对位置是不保证的,我会添加一些额外的照片。
我的代码:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
## MAKE THE PROCESS MORE 'VERBOSE'
show_images=True
#checking active 7 segmentDP segments in digit_roi
def search4segments(digit_roi):
dig_H, dig_W = digit_roi.shape
seg_short_side= dig_W // 4
# define a set of tuples for creating the mask for the 7 segments
segments = [
((0, 0), (seg_short_side, dig_W)), # top
((0, 0), (dig_H // 2, seg_short_side)), # top-left
((0, dig_W - seg_short_side), (dig_H // 2, dig_W)), # top-right
((dig_H // 2 - seg_short_side // 2, 0), (dig_H // 2 + seg_short_side // 2, dig_W)), # center
((dig_H // 2, 0), (dig_H, seg_short_side)), # bottom-left
((dig_H // 2, dig_W - seg_short_side), (dig_H, dig_W)), # bottom-right
((dig_H - seg_short_side, 0), (dig_H, dig_W)) # bottom
]
# set of activated segments
on = [0] * len(segments)
# loop over the individual segments
for (i, ((xA, yA), (xB, yB))) in enumerate(segments):
# extract the segment ROI, count the total number of
# thresholded pixels in the segment, and then compute
# the area of the segment
segROI = digit[xA:xB, yA:yB]
total = cv2.countNonZero(segROI)
area = (xB - xA) * (yB - yA)
# if the total number of non-zero pixels is greater than
# 40% of the area, mark the segment as "on"
if total / float(area + 1e-1) > 0.5:
on[i] = 1
return on
#function for rotating an image by other angles than 90, 180 or 270
def rotate_image(image, angle):
image_center = tuple(np.array(image.shape[1::-1]) / 2)
rot_mat = cv2.getRotationMatrix2D(image_center, angle, 1.0)
result = cv2.warpAffine(image, rot_mat, image.shape[1::-1], flags=cv2.INTER_LINEAR)
return result
#trimmean function definition
def trimmean(arr, percent):
n = len(arr)
k = int(round(n*(float(percent)/100)/2))
return np.mean(arr[k+1:n-k])
#read in the image paths to a list
current_dir=os.getcwd()
relative_imdir=r'\RealWorldImages'
img_paths=[os.path.join(current_dir+relative_imdir,path) for path in os.listdir(current_dir+relative_imdir)]
img=cv2.imread(img_paths[1],0)
#blurr the imag to get rid of high frequency noise
cv2.medianBlur(img,5,img)
#OPTIONAL: resize the image, so it can be displayed with cv2.imshow for ROI selection recommended for large images, which would not fit on the display screen
img=cv2.resize(img,(int(img.shape[1]/1.2),int(img.shape[0]/1.2)))
#Manual selection of ROI
roi = cv2.selectROI(img)
cv2.destroyAllWindows()
#ROI image
img_roi = img[int(roi[1]):int(roi[1] + roi[3]), int(roi[0]):int(roi[0] + roi[2])]
#kernel size for adaptive thresholding (depending on the size of the ROI ->expected No. of digits in ROI 4-6)
ksize=2*(roi[2]//8)+1 #make sure it has odd dimensions
#adaptive tresholding
img_bw=cv2.adaptiveThreshold(img_roi,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY_INV,ksize,3)
#getting connected components with stats
img_bw_CC = cv2.connectedComponentsWithStats(img_bw)
#create and populate a matrix with only the relevant information from CC with stats
CC_stats=np.zeros((len(img_bw_CC[2]), 4), np.uint32)
CC_stats[:, 0]=range(len(img_bw_CC[2]))
CC_stats[:, 1]= img_bw_CC[2][:, -1]
CC_stats[:, 2:]= img_bw_CC[2][:, 2:-1]
#remove all CC, which have a bounding box that touches the border of the ROI - probably not a digit
relevant_cc_stats =np.delete(CC_stats, \
np.logical_or(np.logical_or(img_bw_CC[2][:, 0] == 0, img_bw_CC[2][:, 1] == 0), \
np.logical_or(img_bw_CC[2][:, 0] + img_bw_CC[2][:, 2] == img_roi.shape[1], img_bw_CC[2][:, 1] + img_bw_CC[2][:, 3] == img_roi.shape[0])), 0)
#sort the matrix rows according to the pixelcount of regions (from largest -> smallest)
relevant_cc_stats=relevant_cc_stats[np.argsort(-relevant_cc_stats[:, 1])]
#TODO: FIX THE STATISTIC
#trimmed mean of CC sizes (calculated from the middle 40%)
mean=trimmean(relevant_cc_stats[:, 1], 60)
#standard deviation
stddev=np.std(relevant_cc_stats[1:, 1])
#remove all components which are smaller than a third of the mean (NOT QUITE ROBUST)
relevant_cc_stats=relevant_cc_stats[relevant_cc_stats[:, 1] > (mean / 4)]# relevant_cc_stats#relevant_cc_stats#relevant_cc_stats[np.bitwise_and(relevant_cc_stats[:,1]<mean+2*stddev, relevant_cc_stats[:,1]>mean-2*stddev)] ###
#creating an empty image to draw the remaining components
masks=np.zeros((img_bw.shape),dtype=np.uint8)
for i in range(relevant_cc_stats.shape[0]):
cv2.bitwise_or(masks, np.array(img_bw_CC[1] == relevant_cc_stats[i, 0], dtype=np.uint8), masks)
roi_thinned=cv2.ximgproc.thinning(masks)
lines=cv2.HoughLines(roi_thinned,1,np.pi/180,30)
if show_images:
#zobrazeni vysledku predzpracovani
_, axs = plt.subplots(1, 2)
axs[0].imshow(img_bw,cmap='binary')
# kernel=cv2.getStructuringElement(cv2.MORPH_ELLIPSE,((2*int(mean/50)+1),(2*int(mean/50)+1)))
# # cv2.morphologyEx(masks,cv2.MORPH_OPEN,kernel,masks)
axs[1].imshow(masks,cmap='binary')
plt.show()
#getting the images projection on the horizontal axis
ver_proj=np.sum(masks,axis=0)
ver_proj=np.sum(masks,axis=0)
#Threshold the vector according to its mean (1/5)
ver_proj[ver_proj<np.mean(ver_proj)/5]=0
ver_proj[ver_proj>=np.mean(ver_proj)]=1
ver_proj=ver_proj.astype(np.bool)
########################################################################
########################################################################
##################### DIGIT SEGMENTATION ###############################
########################################################################
########################################################################
#getting the borders (places where the thresholded projection changes)
borders=[]
for i in range(1,len(ver_proj)):
if ver_proj[i-1] ^ ver_proj[i]:
borders.append(i)
#getting the outermost borders
lengths=list(np.diff(borders))
if len(borders)>2:
if borders[0]>2:
lengths.insert(0,borders[0])
if sum(lengths)<masks.shape[1]:
lengths.append(masks.shape[1]-borders[-1])
else:
raise RuntimeError
#cutting the image to smaller images and storing these in a list
img_segments=[]
for i in range(1,len(borders)):
img_segments.append(masks[:,borders[i-1]:borders[i]])
if show_images:
_, axs = plt.subplots(1, len(img_segments))
for i in range(len(img_segments)):
axs[i].imshow(img_segments[i],cmap='binary')
plt.show()
#distinguishing the digits from the supposedly empty in-between-digit regions
digits=[]
i=0
for i in range(len(img_segments)):
if cv2.countNonZero(img_segments[i])>img_segments[i].size/5:
digits.append(img_segments[i])
if show_images:
_, axs = plt.subplots(1, len(digits))
for i in range(len(digits)):
axs[i].imshow(digits[i],cmap='binary')
plt.show()
#projecting the rows in sub-images to the vertical axis
digits_hor_proj=[np.sum(digit,axis=1) for digit in digits]
#further shrinking the images to get the most tight bounding boxes
for i in range(len(digits)):
tmp=np.where(digits_hor_proj[i]>np.mean(digits_hor_proj[i]/2))
digits[i]=digits[i][int(tmp[0][0]):int(tmp[0][-1])+1,:]
if show_images:
_, axs = plt.subplots(1, len(digits))
for i in range(len(digits)):
axs[i].imshow(digits[i],cmap='binary')
plt.show()
digits_closed=[]
for i in range(len(digits)):
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (int(digits[i].shape[1]/5), int(digits[i].shape[1]/5)))
digits_closed.append(cv2.morphologyEx(digits[i],cv2.MORPH_DILATE,kernel))
##########################################################################
##########################################################################
####################### DIGIT RECOGNITION ################################
##########################################################################
##########################################################################
# define the dictionary of digit segments
DIGITS_LOOKUP = {
(1, 1, 1, 0, 1, 1, 1): 0,
# (0, 0, 1, 0, 0, 1, 0): 1,#calculated differently
(1, 0, 1, 1, 1, 0, 1): 2,
(1, 0, 1, 1, 0, 1, 1): 3,
(0, 1, 1, 1, 0, 1, 0): 4,
(1, 1, 0, 1, 0, 1, 1): 5,
(1, 1, 0, 1, 1, 1, 1): 6,
(1, 0, 1, 0, 0, 1, 0): 7,
(1, 1, 1, 1, 1, 1, 1): 8,
(1, 1, 1, 1, 0, 1, 1): 9
}
#dreating an output list for recognised digits
digit_rec=[]
for digit in digits:
dig_H, dig_W = digit.shape
# if dig_H > 3 * dig_W:
# expanded_one = np.zeros((dig_H, 3 * dig_W), dtype=np.uint8)
# expanded_one[:, 2 * dig_W:] = digit
# digit = expanded_one
# dig_H, dig_W = digit.shape
seg_short_side= dig_W // 3
#slight rotation in case of not recognising the digit on the first try
trials=0
rot_pos=[-5,-3,3,5]
#for recognising a one
#TODO: expand the image with the one, as it is on its original position in the digit space (left side)
if dig_H>3*dig_W and cv2.countNonZero(digit)>0.4*dig_W*dig_H: #second part is commented out, as it fails on skewed ones
digit_rec.append(1)
else:
#if the digit does not fill at least 30% of its space, delate it
if cv2.countNonZero(digit)>0.3*dig_W*dig_H:
pass
else:
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(int(digits[i].shape[1] / 5), int(digits[i].shape[1] / 5)))
cv2.morphologyEx(digits[i], cv2.MORPH_DILATE, kernel,digits[i])
#cycle for multiple rotationary positions
while trials<len(rot_pos):
if trials==0:
on=search4segments(digit)
#lookup the digit and draw it on the image, append None, if the segment was not succesfully recognised
#(but to know, that there was something)
try:
digit_rec.append(DIGITS_LOOKUP[tuple(on)])
break
except KeyError:
rot_digit=rotate_image(digit,rot_pos[trials])
on = search4segments(rot_digit)
print(trials)
trials+=1
if trials==4:
digit_rec.append(None)
if show_images:
_, axs = plt.subplots(1, len(digits))
for i in range(len(digits)):
axs[i].imshow(digits[i],cmap='binary')
axs[i].title.set_text(digit_rec[i])
plt.show()
#Print the result vector
print(digit_rec)