0

我正在从“ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8”为 tensorflow.js 构建对象检测模型。我已经训练了模型并使用tensorflowjs_convertercli 工具对其进行了转换。当我在 jupyter 中运行模型时,我得到以下输出:

jupyter 输出

但是当我将模型加载到 tensorflow.js 中时,它没有找到任何边界框。它确实在边界框张量中吐出了一些东西,如下所示:

tfjs 模型输出

大多是随机的。这些盒子都没有与之关联的类。最初,我认为 tfjs 模型不太准确,因为它看起来像转换器运行了一些优化。但是在提高了python notebook中的准确率后,tfjs的准确率并没有提高。

看起来没有办法关闭tensorflowjs_converter优化。真的吗?

还有什么我可以尝试让我的模型在 tfjs 中运行的吗?

这是我的pipeline_file.config

# SSD with Mobilenet v2 FPN-lite (go/fpn-lite) feature extractor, shared box
# predictor and focal loss (a mobile version of Retinanet).
# Retinanet: see Lin et al, https://arxiv.org/abs/1708.02002
# Trained on COCO, initialized from Imagenet classification checkpoint
# Train on TPU-8
#
# Achieves 28.2 mAP on COCO17 Val

model {
  ssd {
    inplace_batchnorm_update: true
    freeze_batchnorm: false
    num_classes: 6
    box_coder {
      faster_rcnn_box_coder {
        y_scale: 10.0
        x_scale: 10.0
        height_scale: 5.0
        width_scale: 5.0
      }
    }
    matcher {
      argmax_matcher {
        matched_threshold: 0.5
        unmatched_threshold: 0.5
        ignore_thresholds: false
        negatives_lower_than_unmatched: true
        force_match_for_each_row: true
        use_matmul_gather: true
      }
    }
    similarity_calculator {
      iou_similarity {
      }
    }
    encode_background_as_zeros: true
    anchor_generator {
      multiscale_anchor_generator {
        min_level: 3
        max_level: 7
        anchor_scale: 4.0
        aspect_ratios: [1.0, 2.0, 0.5]
        scales_per_octave: 2
      }
    }
    image_resizer {
      fixed_shape_resizer {
        height: 640
        width: 640
      }
    }
    box_predictor {
      weight_shared_convolutional_box_predictor {
        depth: 128
        class_prediction_bias_init: -4.6
        conv_hyperparams {
          activation: RELU_6,
          regularizer {
            l2_regularizer {
              weight: 0.00004
            }
          }
          initializer {
            random_normal_initializer {
              stddev: 0.01
              mean: 0.0
            }
          }
          batch_norm {
            scale: true,
            decay: 0.997,
            epsilon: 0.001,
          }
        }
        num_layers_before_predictor: 4
        share_prediction_tower: true
        use_depthwise: true
        kernel_size: 3
      }
    }
    feature_extractor {
      type: 'ssd_mobilenet_v2_fpn_keras'
      use_depthwise: true
      fpn {
        min_level: 3
        max_level: 7
        additional_layer_depth: 128
      }
      min_depth: 16
      depth_multiplier: 1.0
      conv_hyperparams {
        activation: RELU_6,
        regularizer {
          l2_regularizer {
            weight: 0.00004
          }
        }
        initializer {
          random_normal_initializer {
            stddev: 0.01
            mean: 0.0
          }
        }
        batch_norm {
          scale: true,
          decay: 0.997,
          epsilon: 0.001,
        }
      }
      override_base_feature_extractor_hyperparams: true
    }
    loss {
      classification_loss {
        weighted_sigmoid_focal {
          alpha: 0.25
          gamma: 2.0
        }
      }
      localization_loss {
        weighted_smooth_l1 {
        }
      }
      classification_weight: 1.0
      localization_weight: 1.0
    }
    normalize_loss_by_num_matches: true
    normalize_loc_loss_by_codesize: true
    post_processing {
      batch_non_max_suppression {
        score_threshold: 1e-8
        iou_threshold: 0.6
        max_detections_per_class: 100
        max_total_detections: 100
      }
      score_converter: SIGMOID
    }
  }
}

train_config: {
  fine_tune_checkpoint_version: V2
  fine_tune_checkpoint: "/content/drive/MyDrive/nespresso_detection/models/research/deploy/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/checkpoint/ckpt-0"
  fine_tune_checkpoint_type: "detection"
  batch_size: 16
  sync_replicas: true
  startup_delay_steps: 0
  replicas_to_aggregate: 8
  num_steps: 8000
  data_augmentation_options {
    random_horizontal_flip {
    }
  }
  data_augmentation_options {
    random_crop_image {
      min_object_covered: 0.0
      min_aspect_ratio: 0.75
      max_aspect_ratio: 3.0
      min_area: 0.75
      max_area: 1.0
      overlap_thresh: 0.0
    }
  }
  optimizer {
    momentum_optimizer: {
      learning_rate: {
        cosine_decay_learning_rate {
          learning_rate_base: .08
          total_steps: 50000
          warmup_learning_rate: .026666
          warmup_steps: 1000
        }
      }
      momentum_optimizer_value: 0.9
    }
    use_moving_average: false
  }
  max_number_of_boxes: 100
  unpad_groundtruth_tensors: false
}

train_input_reader: {
  label_map_path: "/content/drive/MyDrive/nespresso_detection/train/VertuoPlus_label_map.pbtxt"
  tf_record_input_reader {
    input_path: "/content/drive/MyDrive/nespresso_detection/train/VertuoPlus.tfrecord"
  }
}

eval_config: {
  metrics_set: "coco_detection_metrics"
  use_moving_averages: false
}

eval_input_reader: {
  label_map_path: "/content/drive/MyDrive/nespresso_detection/train/VertuoPlus_label_map.pbtxt"
  shuffle: false
  num_epochs: 1
  tf_record_input_reader {
    input_path: "/content/drive/MyDrive/nespresso_detection/valid/VertuoPlus.tfrecord"
  }
}

这是model.json转换器创建的文件:

{
    "format": "graph-model",
    "generatedBy": "2.4.0",
    "convertedBy": "TensorFlow.js Converter v2.8.3",
    "signature": {
        "inputs": {
            "input_tensor:0": {
                "name": "input_tensor:0",
                "dtype": "DT_UINT8",
                "tensorShape": {
                    "dim": [
                        {
                            "size": "1"
                        },
                        {
                            "size": "-1"
                        },
                        {
                            "size": "-1"
                        },
                        {
                            "size": "3"
                        }
                    ]
                }
            }
        },
        "outputs": {
            "Identity_1:0": {
                "name": "Identity_1:0",
                "dtype": "DT_FLOAT",
                "tensorShape": {
                    "dim": [
                        {
                            "size": "1"
                        },
                        {
                            "size": "100"
                        },
                        {
                            "size": "4"
                        }
                    ]
                }
            },
            "Identity_3:0": {
                "name": "Identity_3:0",
                "dtype": "DT_FLOAT",
                "tensorShape": {
                    "dim": [
                        {
                            "size": "1"
                        },
                        {
                            "size": "100"
                        },
                        {
                            "size": "7"
                        }
                    ]
                }
            },
            "Identity_5:0": {
                "name": "Identity_5:0",
                "dtype": "DT_FLOAT",
                "tensorShape": {
                    "dim": [
                        {
                            "size": "1"
                        }
                    ]
                }
            },
            "Identity:0": {
                "name": "Identity:0",
                "dtype": "DT_FLOAT",
                "tensorShape": {
                    "dim": [
                        {
                            "size": "1"
                        },
                        {
                            "size": "100"
                        }
                    ]
                }
            },
            "Identity_7:0": {
                "name": "Identity_7:0",
                "dtype": "DT_FLOAT",
                "tensorShape": {
                    "dim": [
                        {
                            "size": "1"
                        },
                        {
                            "size": "51150"
                        },
                        {
                            "size": "7"
                        }
                    ]
                }
            },
            "Identity_2:0": {
                "name": "Identity_2:0",
                "dtype": "DT_FLOAT",
                "tensorShape": {
                    "dim": [
                        {
                            "size": "1"
                        },
                        {
                            "size": "100"
                        }
                    ]
                }
            },
            "Identity_4:0": {
                "name": "Identity_4:0",
                "dtype": "DT_FLOAT",
                "tensorShape": {
                    "dim": [
                        {
                            "size": "1"
                        },
                        {
                            "size": "100"
                        }
                    ]
                }
            },
            "Identity_6:0": {
                "name": "Identity_6:0",
                "dtype": "DT_FLOAT",
                "tensorShape": {
                    "dim": [
                        {
                            "size": "1"
                        },
                        {
                            "size": "51150"
                        },
                        {
                            "size": "4"
                        }
                    ]
                }
            }
        }
    },
    "modelTopology": {
        "node": [
            {
                "name": "StatefulPartitionedCall/Postprocessor/BatchMultiClassNonMaxSuppression/PadOrClipBoxList/zeros_7",
                "op": "Const",
                "attr": {
                    "dtype": {
                        "type": "DT_INT32"
                    },
                    "value": {
                        "tensor": {
                            "dtype": "DT_INT32",
                            "tensorShape": {
                                "dim": [
                                    {
                                        "size": "1"
                                    }
                                ]
                            }
                        }
                    }
                }
            },
... to many nodes to list here ...
            {
                    "name": "ConstantFolding/StatefulPartitionedCall/Postprocessor/BatchMultiClassNonMaxSuppression/stack_7_const_axis",
                    "shape": [],
                    "dtype": "int32"
                }
            ]
        }
    ]
}

还有我的转换器脚本(物有所值):

!tensorflowjs_converter \
    --input_format=tf_saved_model \
    --output_format=tfjs_graph_model \
    --signature_name=serving_default \
    --saved_model_tags=serve \
    ./saved_model \
    ./tfjs
4

1 回答 1

1

事实证明,发生了几件事。

  1. WASM 后端返回不同的数字,并且可能不正确。我无法完全解析 WASM 后端的输出。

  2. 使用 CPU 或 webgl 后端,输出张量没有标记,因此需要一些猜测来解释数据。该模型将始终输出 100 个边界框。一个张量将具有准确度分数,一个将具有该对象的分类(整数值),一个将具有坐标(浮点数,从左上角的百分比),一个将具有原始分类数据(浮点数,精度就我而言,每个班级的评分为(1,100,7))。

第一步是了解输出张量的含义,然后过滤掉得分低 (< .8) 的对象。然后,我必须将这些类与给定的整数值相匹配,这可能在标记后发生了变化。然后数据是有意义的。

如果您像我一样迷失在数据中,我希望这可以为其他人提供一条好路。

于 2021-02-10T18:04:04.940 回答