这是我用来将 Coco 格式转换为 AutoML CSV 格式的函数,用于图像对象检测注释数据:
def convert_coco_json_to_csv(filename,bucket):
import pandas as pd
import json
s = json.load(open(filename, 'r'))
out_file = filename[:-5] + '.csv'
with open(out_file, 'w') as out:
out.write('GCS_FILE_PATH,label,X_MIN,Y_MIN,,,X_MAX,Y_MAX,,\n')
file_names = [f"{bucket}/{image['file_name']}" for image in s['images']]
categories = [cat['name'] for cat in s['categories']]
for label in s['annotations']:
#The COCO bounding box format is [top left x position, top left y position, width, height].
# for AutoML: For example, a bounding box for the entire image is expressed as (0.0,0.0,,,1.0,1.0,,), or (0.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0).
HEIGHT = s['images'][label['image_id']]['height']
WIDTH = s['images'][label['image_id']]['width']
X_MIN = label['bbox'][0]/WIDTH
X_MAX = (label['bbox'][0] + label['bbox'][2]) / WIDTH
Y_MIN = label['bbox'][1] / HEIGHT
Y_MAX = (label['bbox'][1] + label['bbox'][3]) / HEIGHT
out.write(f"{file_names[label['image_id']]},{categories[label['category_id']]},{X_MIN},{Y_MIN},,,{X_MAX},{Y_MAX},,\n")
只需使用文件名和上传图像的 gs 存储调用函数即可使用它:
convert_coco_json_to_csv("/content/train_annotations.coco.json", "gs://[bucket name]")