实现内容:OCR (get word from image) API:VisionAPI(Google) 语言:Python3.x
Q. 所有响应(块/段落/单词/符号)都应该有信心(param)。当然块/段落/单词有信心,但符号很少没有信心。概率是(67个字符/ 48036个字符)=0.14% ......我无法理解这种现象......
CODE(确定上述概率)
import json
import glob
json_path_from_list=glob.glob('./RawJSON/*')
#number of characters not to have confidence
no_confidence_number=0
#number of all characters
char_number=0
for json_path_from in json_path_from_list:
with open(json_path_from) as fr:
json_data = json.load(fr)
for response_j in json_data["responses"]:
for page_j in response_j["fullTextAnnotation"]["pages"]:
for block_j in page_j["blocks"]:
x=block_j["confidence"]
for paragraph_j in block_j["paragraphs"]:
x=paragraph_j["confidence"]
for word_j in paragraph_j["words"]:
x=word_j["confidence"]
#initialize (confidence list (character having confidence))
symbol_confidence_list=[]
#initialize (word including characters not having confidence)
word=""
#initialize (print flag)
flag=0
for symbol_j in word_j["symbols"]:
#count
char_number+=1
try:
#whether character has confidence or not
x=symbol_j["confidence"]
#word including characters not having confidence
word=word+symbol_j["text"]
#confidence list (character having confidence)
symbol_confidence_list.append(symbol_j["confidence"])
except:
#word including characters not having confidence
word=word+symbol_j["text"]
#character not having confidence
no_confidence_text=symbol_j
#print flag
flag=1
#print flag=1
if flag==1:
#count
no_confidence_number+=1
print(json_path_from)
print(word)
print(no_confidence_text)
#Wrapper's confidence
print(word_j["confidence"])
print(paragraph_j["confidence"])
print(block_j["confidence"])
print(symbol_confidence_list)
print("________________________________________________")
else:pass
print(no_confidence_number)
print(char_number)
为了确定,请检查代码(从 VisionAPI 获取响应,并保存 .json)
import json
import base64
import requests
#function to encode base64
def convert_img_to_base64(img_path):
img_data = open(img_path, 'rb').read()
img_base64 = base64.b64encode(img_data).decode()
return img_base64
#OCR class
class OCR:
def __init__(self,api_key):
self.api_key=api_key
def get_json_responce_from_img(self,img_base64,json_request):
json_request['requests'][0]['image']['content']=img_base64
url="https://vision.googleapis.com/v1/images:annotate?key="+self.api_key
result=requests.post(url,json.dumps(json_request),headers={'Content-Type': 'application/json'})
return result.text
#API_key
API_KEY="XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
#image path
img_path="./test.jpg"
#JSON REQUEST
json_request={'requests':[{'image':{'content':""},'features': {'type': "DOCUMENT_TEXT_DETECTION"},'imageContext':{'languageHints':"ja"}}]}
img_base64=convert_img_to_base64(img_path)
#create instance OCR class
ocr=OCR(API_KEY)
#JSON RESPONSE
data=json.loads(ocr.get_json_responce_from_img(img_base64,json_request))
#save JSON RESPONSE as .json
with open("./test.json",'w') as fw:
json.dump(data,fw,indent=4)