我正在评估 customvision.ai 以训练图像分类模型,然后将该模型下载为 onnx 文件,该文件将在 .Net Windows 窗体应用程序中使用。
我创建了一个新项目,上传了几张图片,标记了它们,并能够从 Customvision.ai 中的模型中获取预测。模型的准确性是可以接受的。CustomVision 允许您将模型下载为可以部署在跨平台应用程序中的 ONNX 文件。就我而言,我计划在 Windows 窗体应用程序中部署和使用模型。
当我将模型下载为 onnx 时,我收到一个包含 .onnx 文件和其他一些文件的 zip 文件。
其中一个文件是 Metadata_properties.json,它具有以下内容:
{
"CustomVision.Metadata.AdditionalModelInfo": "",
"CustomVision.Metadata.Version": "1.2",
"CustomVision.Postprocess.Method": "ClassificationMultiClass",
"CustomVision.Postprocess.Yolo.Biases": "[]",
"CustomVision.Postprocess.Yolo.NmsThreshold": "0.0",
"CustomVision.Preprocess.CropHeight": "0",
"CustomVision.Preprocess.CropMethod": "FullImageShorterSide",
"CustomVision.Preprocess.CropWidth": "0",
"CustomVision.Preprocess.MaxDimension": "0",
"CustomVision.Preprocess.MaxScale": "0.0",
"CustomVision.Preprocess.MinDimension": "0",
"CustomVision.Preprocess.MinScale": "0.0",
"CustomVision.Preprocess.NormalizeMean": "[0.0, 0.0, 0.0]",
"CustomVision.Preprocess.NormalizeStd": "[1.0, 1.0, 1.0]",
"CustomVision.Preprocess.ResizeMethod": "Stretch",
"CustomVision.Preprocess.TargetHeight": "300",
"CustomVision.Preprocess.TargetWidth": "300",
"Image.BitmapPixelFormat": "Rgb8",
"Image.ColorSpaceGamma": "SRGB",
"Image.NominalPixelRange": "Normalized_0_1"
}
我从这个文件中了解到的是,将提供给模型进行推理的最终张量需要拉伸调整为 300x300,在 0 和 1 之间进行归一化,平均值设置为零,标准偏差设置为 1。为了使用这个我的代码中的模型,这是我从各种在线资源中汇总的内容:
using SixLabors.ImageSharp;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Processing;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
//using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using Microsoft.ML.OnnxRuntime.Tensors;
using Microsoft.ML.OnnxRuntime;
using System.IO;
namespace TestONNXRunner
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
RunModel();
}
public void RunModel()
{
// Read paths
string modelFilePath = @"C:\ImageMLProjects\MarbleImagesDataset\OnnxModel\onnxdataset\model.onnx";
var LabelsDict = GetLabelMap(@"C:\ImageMLProjects\MarbleImagesDataset\OnnxModel\onnxdataset\labels.txt");
string imageFilePath = @"";
OpenFileDialog openFileDialog1 = new OpenFileDialog
{
InitialDirectory = @"C:\",
Title = "Browse Image Files",
CheckFileExists = true,
CheckPathExists = true,
FilterIndex = 2,
RestoreDirectory = true,
ReadOnlyChecked = true,
ShowReadOnly = true
};
if (openFileDialog1.ShowDialog() == DialogResult.OK)
{
imageFilePath = openFileDialog1.FileName;
// Read image
using Image<Rgb24> image = Image.Load<Rgb24>(imageFilePath);
// Resize image
image.Mutate(x =>
{
x.Resize(new ResizeOptions
{
Size = new SixLabors.ImageSharp.Size(300, 300),
Mode = ResizeMode.Stretch
});
});
// Preprocess image
Tensor<float> input = new DenseTensor<float>(new[] { 1, 3, image.Height, image.Width });
var mean = new[] { 0f, 0f, 0f };
var stddev = new[] { 1f, 1f, 1f };
for (int y = 0; y < image.Height; y++)
{
Span<Rgb24> pixelSpan = image.GetPixelRowSpan(y);
for (int x = 0; x < image.Width; x++)
{
input[0, 0, x, y] = ((pixelSpan[x].R / 255f) - mean[0]) / stddev[0];
input[0, 1, x, y] = ((pixelSpan[x].G / 255f) - mean[1]) / stddev[1];
input[0, 2, x, y] = ((pixelSpan[x].B / 255f) - mean[2]) / stddev[2];
}
}
// Setup inputs
var inputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor("data", input)
};
// Run inference
//int gpuDeviceId = 0; // The GPU device ID to execute on
//var session = new InferenceSession("model.onnx", SessionOptions.MakeSessionOptionWithCudaProvider(gpuDeviceId));
using var session = new InferenceSession(modelFilePath);
using IDisposableReadOnlyCollection<DisposableNamedOnnxValue> results = session.Run(inputs);
// Postprocess to get softmax vector
IEnumerable<float> output = results.First().AsEnumerable<float>();
float sum = output.Sum(x => (float)Math.Exp(x));
IEnumerable<float> softmax = output.Select(x => (float)Math.Exp(x) / sum);
// Extract top 10 predicted classes
IEnumerable<Prediction> top10 = softmax.Select((x, i) => new Prediction { Label = LabelsDict[i], Confidence = x })
.OrderByDescending(x => x.Confidence)
.Take(10);
// Print results to console
Console.WriteLine("Top 10 predictions for ResNet50 v2...");
Console.WriteLine("--------------------------------------------------------------");
foreach (var t in top10)
{
Console.WriteLine($"Label: {t.Label}, Confidence: {t.Confidence}");
}
}
}
public Dictionary<int, string> GetLabelMap(string LabelMapFile)
{
Dictionary<int, string> labelsDict = new Dictionary<int, string>();
if(File.Exists(LabelMapFile))
{
string data = File.ReadAllText(LabelMapFile);
string[] labels = data.Split('\n');
int i = 0;
foreach (var label in labels)
{
labelsDict.Add(i, label);
i++;
}
}
return labelsDict;
}
internal class Prediction
{
public string Label { get; set; }
public float Confidence { get; set; }
}
}
}
现在有什么问题?
我没有看到任何错误,无论我使用什么图像进行推理,我都会得到相同的结果。
问题
- 我应该以不同的方式构造张量吗?我不确定这是否与张量的结构方式有关。
- Github 上 CustomVision 页面的最后一次更新是几年前,CustomVision 是否推荐用于 2021 年的生产用途?我应该寻找其他东西吗?这个想法是能够使用低/零代码方法构建/训练高质量的图像分类模型,然后将模型部署到本地计算机上以用于低延迟应用程序。
在这方面的任何帮助将不胜感激