tensorflow - 使用使用 make_image_classifier 创建的模型时在 TensorFlow Lite C API 中调整输入尺寸

Question

抱歉，如果这个问题看起来很熟悉，我之前已经发布了更广泛的问题描述，但我已经删除了它，因为我在调查中取得了一些进展并且可以缩小到更具体的问题。

语境：

我正在使用make_image_classifier.
我想使用 C API 来加载生成的模型和标签图像。我在这里遇到数据输入问题。
我可以使用label_image.py 示例标记图像，因此模型很好，问题在于我对 C API 的使用。
如果我理解make_image_classifier正确，它会生成一个需要 4 维输入的模型。我们正在处理超出宽度、高度和通道的图像，我不知道第 4 维是什么。这种缺乏理解可能是我问题的根源。
我在代码中包含了一些错误处理，并且在调整大小后尝试从输入缓冲区复制时发生了我遇到的错误。

问题：

Q1：为什么生成的模型make_image_classifier需要 4 维输入？有高度、宽度和通道，但第四个是什么？

当我使用 C API 执行以下操作以使用我的图像输入运行模型时：

int inputDims[3] = {224, 224, 3};
tflStatus = TfLiteInterpreterResizeInputTensor(interpreter, 0, inputDims, 3);

我得到：

ERROR: tensorflow/lite/kernels/conv.cc:329 input->dims->size != 4 (3 != 4)
ERROR: Node number 2 (CONV_2D) failed to prepare.

所以我最终做了：

int inputDims[4] = {1, 224, 224, 3};
tflStatus = TfLiteInterpreterResizeInputTensor(interpreter, 0, inputDims, 4);

据我所知，第一个维度大小用于批量大小，以防我要处理多个图像。它是否正确？

Q2：我应该在调用时使用相同的维度结构来构建我的数据输入TfLiteInterpreterResizeInputTensor吗？我得到这个图像RGB输入缓冲区的错误：

// RGB range is 0-255. Scale it to 0-1.
for(int i = 0; i < imageSize; i++){
    imageDataBuffer[i] = (float)pImage[i] / 255.0;
}

在构建一个模仿给定的输入维度的输入时，我也会遇到错误TfLiteInterpreterResizeInputTensor，但这似乎很愚蠢：

float imageData[1][224][224][3];
int j = 0;
for(int h = 0; h < 224; h++){
  for(int w = 0; w < 224; w++){
    imageData[0][h][w][0] = (float)pImage[j] * (1.0 / 255.0);
    imageData[0][h][w][1] = (float)pImage[j+1] * (1.0 / 255.0);
    imageData[0][h][w][2] = (float)pImage[j+2] * (1.0 / 255.0);

    j = j + 3;
  }
}

最后一个输入结构类似于 Pythonlabel_image.py在执行此操作时使用的输入结构：

input_data = np.expand_dims(img, axis=0)

Q3：我的输入缓冲区有什么问题导致TfLiteTensorCopyFromBuffer返回错误代码？

谢谢！

完整代码：

#include "tensorflow/lite/c/c_api.h"
#include "tensorflow/lite/c/c_api_experimental.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/ujpeg.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// Dispose of the model and interpreter objects.
int disposeTfLiteObjects(TfLiteModel* pModel, TfLiteInterpreter* pInterpreter)
{
    if(pModel != NULL)
    {
      TfLiteModelDelete(pModel);
    }

    if(pInterpreter)
    {
      TfLiteInterpreterDelete(pInterpreter);
    }
}

// The main function.
int main(void) 
{
    TfLiteStatus tflStatus;

    // Create JPEG image object.
    ujImage img = ujCreate();

    // Decode the JPEG file.
    ujDecodeFile(img, "image_224x224.jpeg");

    // Check if decoding was successful.
    if(ujIsValid(img) == 0){
        return 1;
    }
    
    // There will always be 3 channels.
    int channel = 3;

    // Height will always be 224, no need for resizing.
    int height = ujGetHeight(img);

    // Width will always be 224, no need for resizing.
    int width = ujGetWidth(img);

    // The image size is channel * height * width.
    int imageSize = ujGetImageSize(img);

    // Fetch RGB data from the decoded JPEG image input file.
    uint8_t* pImage = (uint8_t*)ujGetImage(img, NULL);

    // The array that will collect the JPEG RGB values.
    float imageDataBuffer[imageSize];

    // RGB range is 0-255. Scale it to 0-1.
    int j=0;
    for(int i = 0; i < imageSize; i++){
        imageDataBuffer[i] = (float)pImage[i] / 255.0;
    }

    // Load model.
    TfLiteModel* model = TfLiteModelCreateFromFile("model.tflite");

    // Create the interpreter.
    TfLiteInterpreter* interpreter = TfLiteInterpreterCreate(model, NULL);

    // Allocate tensors.
    tflStatus = TfLiteInterpreterAllocateTensors(interpreter);

    // Log and exit in case of error.
    if(tflStatus != kTfLiteOk)
    {
      printf("Error allocating tensors.\n");
      disposeTfLiteObjects(model, interpreter);
      return 1;
    }
    
    int inputDims[4] = {1, 224, 224, 3};
    tflStatus = TfLiteInterpreterResizeInputTensor(interpreter, 0, inputDims, 4);

    // Log and exit in case of error.
    if(tflStatus != kTfLiteOk)
    {
      printf("Error resizing tensor.\n");
      disposeTfLiteObjects(model, interpreter);
      return 1;
    }

    tflStatus = TfLiteInterpreterAllocateTensors(interpreter);

    // Log and exit in case of error.
    if(tflStatus != kTfLiteOk)
    {
      printf("Error allocating tensors after resize.\n");
      disposeTfLiteObjects(model, interpreter);
      return 1;
    }

    // The input tensor.
    TfLiteTensor* inputTensor = TfLiteInterpreterGetInputTensor(interpreter, 0);

    // Copy the JPEG image data into into the input tensor.
    tflStatus = TfLiteTensorCopyFromBuffer(inputTensor, imageDataBuffer, imageSize);
    
    // Log and exit in case of error.
    // FIXME: Error occurs here.
    if(tflStatus != kTfLiteOk)
    {
      printf("Error copying input from buffer.\n");
      disposeTfLiteObjects(model, interpreter);
      return 1;
    }

    // Invoke interpreter.
    tflStatus = TfLiteInterpreterInvoke(interpreter);

    // Log and exit in case of error.
    if(tflStatus != kTfLiteOk)
    {
      printf("Error invoking interpreter.\n");
      disposeTfLiteObjects(model, interpreter);
      return 1;
    }

    // Extract the output tensor data.
    const TfLiteTensor* outputTensor = TfLiteInterpreterGetOutputTensor(interpreter, 0);

    // There are three possible labels. Size the output accordingly.
    float output[3];

    tflStatus = TfLiteTensorCopyToBuffer(outputTensor, output, 3 * sizeof(float));

    // Log and exit in case of error.
    if(tflStatus != kTfLiteOk)
    {
      printf("Error copying output to buffer.\n");
      disposeTfLiteObjects(model, interpreter);
      return 1;
    }

    // Print out classification result.
    printf("Confidences: %f, %f, %f.\n", output[0], output[1], output[2]); 

    // Dispose of the TensorFlow objects.
    disposeTfLiteObjects(model, interpreter);
    
    // Dispoice of the image object.
    ujFree(img);
    
    return 0;
}

编辑＃1：好的，所以在里面TfLiteTensorCopyFromBuffer：

TfLiteStatus TfLiteTensorCopyFromBuffer(TfLiteTensor* tensor,
                                    const void* input_data,
                                    size_t input_data_size) {
    if (tensor->bytes != input_data_size) {
        return kTfLiteError;
    }

    memcpy(tensor->data.raw, input_data, input_data_size);
    return kTfLiteOk;
}

我的input_data_size值为 150,528（3 通道 x 224 像素高 x 224 像素宽），但tensor->bytes为 602,112（3 通道 x 448 像素高 x 224 像素 448，我假设？）。我不明白这种差异，尤其是因为我调用TfLiteInterpreterResizeInputTensor了{1, 224, 224, 3}.

编辑#2：我相信我在这里找到了答案。确认后将解决此帖子。

score 0 · Accepted Answer

我在 EDIT #2 上链接的解决方案就是答案。最后，我只需要更换：

TfLiteTensorCopyFromBuffer(inputTensor, imageDataBuffer, imageSize);

和：

TfLiteTensorCopyFromBuffer(inputTensor, imageDataBuffer, imageSize * sizeof(float));

干杯!

tensorflow - 使用使用 make_image_classifier 创建的模型时在 TensorFlow Lite C API 中调整输入尺寸

1 回答 1

Related

Reference