我们正在研究将文档 OCR 包含在我们的产品中的可能性,并且更愿意使用 Azure 表单识别器。但是,当使用自定义或组合模型进行文档 OCR 时,我们会遇到性能非常缓慢的问题 - 通常超过 10 秒。这是正常的吗?如果没有,我们如何提高性能。这是在本地区域的 S0 层上,我们使用的是 Azure.AI.FormRecognizer v3.1.1 .NET 客户端:
string endpoint = @"https://123.cognitiveservices.azure.com/";
string licenseKey = "123";
var credential = new AzureKeyCredential(licenseKey);
FormRecognizerClient client = new FormRecognizerClient(new Uri(endpoint), credential);
FormRecognizeResponse response = new FormRecognizeResponse()
{
ImageID = imageID,
ImageTypeID = imageTypeID,
ImageTypeName = imageTypeName
};
//https://docs.microsoft.com/en-us/dotnet/api/overview/azure/ai.formrecognizer-readme
//ID Documents sample: https://github.com/Azure/azure-sdk-for-net/blob/Azure.AI.FormRecognizer_3.1.1/sdk/formrecognizer/Azure.AI.FormRecognizer/samples/Sample11_RecognizeIdentityDocuments.md
Stopwatch sw = Stopwatch.StartNew();
if (imageToDetect != null && imageToDetect.Length > 0)
{
//Custom forms:
var options = new RecognizeCustomFormsOptions()
{
IncludeFieldElements = false, //TODO: OK? We not using this in mapping,
//Pages = {"1-3","5-6"}
//ContentType = FormContentType.Jpeg
};
using (var stream = new MemoryStream(imageToDetect))
{
try
{
RecognizeCustomFormsOperation operation = client.StartRecognizeCustomForms(modelID, stream, options);
Response<RecognizedFormCollection> operationResponse = operation.WaitForCompletionAsync().Result;
//RecognizedFormCollection forms = operationResponse.Value;
response = MapToModel(imageID, imageTypeID, imageTypeName, operationResponse, false); //Pass fields and reset in caller
}
catch (RequestFailedException rfEx)
{
response.ErrorResponse = new FormRecognizeError()
{
code = rfEx.ErrorCode,
statusCode = rfEx.Status,
message = rfEx.Message
};
Console.WriteLine($"ERROR: {rfEx.ToString()}");
}
catch (Exception ex)
{
response.ErrorResponse = new FormRecognizeError()
{
statusCode = 400, //(int)HttpStatusCode.BadRequest,
message = ex.Message
};
Console.WriteLine($"ERROR: {ex.ToString()}");
}
}
sw.Stop();
Console.WriteLine("---------------------------------------------------------------------------------");
if(response.RecognizedForms?.Count() > 0)
Console.WriteLine($"{sw.ElapsedMilliseconds} Milliseconds --> DetectForm {imageTypeName} response: RecognizedForms:{response.RecognizedForms?.Count()}. Confidence: {response.RecognizedForms[0].TypeConfidence} Error:{response.ErrorResponse?.message}");
else
Console.WriteLine($"{sw.ElapsedMilliseconds} Milliseconds --> DetectForm {imageTypeName} response: No forms detected. Error:{response.ErrorResponse?.message}");
Console.WriteLine("---------------------------------------------------------------------------------");
if (printDetail)
{
Console.WriteLine(JsonConvert.SerializeObject(response, Formatting.Indented));
Console.WriteLine("---------------------------------------------------------------------------------");
}
Console.WriteLine("");
//Error Logged in caller?
}
else
{
Console.WriteLine($"ERROR: Empty image byte array");
}