我正在尝试为我的项目实施 Mask R-CNN。我一直在尝试从图像中检测摩托车。我已经使用 supervise.ly https://supervise.ly对图像进行了注释。在代码进行各种更改后,错误文件“skimage/draw/_draw.pyx”,第 217 行,在 skimage.draw._draw._polygon OverflowError: Python int too large to convert to C ssize_t
显示
注释如下:
{"description": "", "tags": [], "size": {"height": 720, "width": 1280}, "objects": [{"description": "", "bitmap": null, "tags": [], "classTitle": "bike", "points": {"exterior": [[854, 431], [868, 426], [888, 428], [891, 421], [885, 412], [888, 399], [896, 396], [901, 395], [906, 387], [913, 377], [919, 373], [925, 370], [936, 367], [954, 365], [966, 362], [975, 359], [982, 352], [980, 342], [984, 328], [991, 320], [996, 317], [1002, 316], [1009, 318], [1014, 327], [1016, 337], [1016, 343], [1021, 352], [1028, 358], [1033, 364], [1037, 374], [1037, 379], [1035, 396], [1039, 401], [1051, 402], [1061, 408], [1066, 422], [1069, 429], [1071, 449], [1071, 455], [1071, 465], [1069, 471], [1058, 471], [1054, 479], [1046, 485], [1036, 491], [1024, 494], [1010, 495], [998, 490], [993, 482], [980, 482], [974, 482], [968, 482], [959, 483], [943, 486], [936, 483], [933, 474], [931, 469], [919, 465], [905, 473], [893, 481], [884, 484], [876, 485], [871, 486], [859, 486], [850, 486], [844, 484], [839, 481], [833, 465], [837, 454], [844, 444], [849, 438]], "interior": []}}]}
我被这些图像困住了。要么我必须重新开始并手动注释许多图像。或者我可以监督使用,这在注释中非常有用。我猜罪魁祸首是数据集类。
############################################################
# Dataset
############################################################
def sort_filenames(alist):
import re
def atoi(text):
return int(text) if text.isdigit() else text
def natural_keys(text):
return [ atoi(c) for c in re.split(r'(\d+)', text) ]
#alist=["something1","something12","something17","something2","something25","so mething29"]
alist.sort(key=natural_keys)
#print(alist)
class bikeDataset(utils.Dataset):
def load_bike(self, dataset_dir, subset,start_size,end_size):
"""Load a subset of the bike dataset.
dataset_dir: Root directory of the dataset.
subset: Subset to load: train or val
"""
# Add classes. We have only one class to add.
self.add_class("bike", 1, "bike")
# Train or validation dataset?
assert subset in ["train", "val"]
dataset_dir = os.path.join("/home/hunain/Mask_RCNN/samples/bike/dataset", subset)
# Load annotations
# VGG Image Annotator (up to version 1.6) saves each image in the form:
# { 'filename': '28503151_5b5b7ec140_b.jpg',
# 'regions': {
# '0': {
# 'region_attributes': {},
# 'shape_attributes': {
# 'all_points_x': [...],
# 'all_points_y': [...],
# 'name': 'polygon'}},
# ... more regions ...
# },
# 'size': 100202
# }
# We mostly care about the x and y coordinates of each region
# Note: In VIA 2.0, regions was changed from a dict to a list.
#annotations = json.load(open(os.path.join(dataset_dir, "via_region_data.json")))
print(dataset_dir+'..........................................................................')
filenames= os.listdir(dataset_dir)
list_file =[]
name_count=0
for f in filenames:
temp1 , temp2 = (f.split('.',1))
temp3 = temp1 + ".png"
list_file.append(temp3)
name_count=name_count+1
sort_filenames(list_file)
#print(str(name_count)+'....................................................................................')
#comment hata kay dekh lo, naam print kardeta hai image wali file kay
annotations=[]
for i in range(start_size,end_size):
annotations.append(json.load(open(os.path.join(dataset_dir, "out"+str(i)+".png.json"))))
#print(annotations)
#annotations = [a for a in annotations if a['objects']]
#print(annotations)
count=0
image_dir=""
for annot in annotations:
count=count+1
if annot['objects']:
for i in annot['objects']:
points = i['points']
exterior = points['exterior']
#print (exterior)
# load_mask() needs the image size to convert polygons to masks.
# Unfortunately, VIA doesn't include it in JSON, so we must read
# the image. This is only managable since the dataset is tiny.
if (subset=="train"):
image_dir="/home/hunain/Mask_RCNN/samples/bike/dataset/images"
else:
image_dir="/home/hunain/Mask_RCNN/samples/bike/dataset/images2"
image_path = os.path.join(image_dir, list_file[count])
image = skimage.io.imread(image_path)
height, width = image.shape[:2]
all_x=[]
all_y=[]
polygon=[]
for i in exterior:
all_x.append(i[0])
all_y.append(i[1])
#print(all_x)
#print(all_y)
polygon.append(all_x)
polygon.append(all_y)
#print(polygon)
self.add_image(
"bike",
image_id=list_file[count], # use file name as a unique image id
path=image_path,
width=1280, height=720,
polygons=copy.deepcopy(polygon))
all_x.clear()
all_y.clear()
polygon.clear()
def load_mask(self, image_id):
print('We are in Load mask function.................')
"""Generate instance masks for an image.
Returns:
masks: A bool array of shape [height, width, instance count] with
one mask per instance.
class_ids: a 1D array of class IDs of the instance masks.
"""
# If not a bike dataset image, delegate to parent class.
image_info = self.image_info[image_id]
#print(image_info['polygons'])
if image_info["source"] != "bike":
return super(self.__class__, self).load_mask(image_id)
# Convert polygons to a bitmap mask of shape
# [height, width, instance_count]
info = self.image_info[image_id]
mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
dtype=np.uint8)
for i, p in enumerate(info["polygons"]):
#print(p)
# Get indexes of pixels inside the polygon and set them to 1
rr, cc = skimage.draw.polygon(p[0], p[1])
mask[rr, cc, i] = 1
# Return mask, and array of class IDs of each instance. Since we have
# one class ID only, we return an array of 1s
return mask.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32)
def image_reference(self, image_id):
print('We are in image_reference function..............')
"""Return the path of the image."""
info = self.image_info[image_id]
if info["source"] == "bike":
return info["path"]
else:
super(self.__class__, self).image_reference(image_id)
def train(model):
print('We are in train function...............')
"""Train the model."""
# Training dataset.
dataset_train = bikeDataset()
dataset_train.load_bike(args.dataset, "train",1,325)
dataset_train.prepare()
# needs a revision. isko revise karna hai file_numers ko.
# Validation dataset
dataset_val = bikeDataset()
dataset_val.load_bike(args.dataset, "val",326,400)
dataset_val.prepare()
# *** This training schedule is an example. Update to your needs ***
# Since we're using a very small dataset, and starting from
# COCO trained weights, we don't need to train too long. Also,
# no need to train all layers, just the heads should do it.
print("Training network heads")
model.train(dataset_train, dataset_val,
learning_rate=config.LEARNING_RATE,
epochs=2,
layers='heads')
def color_splash(image, mask):
print('We are in color_splash function.......................')
"""Apply color splash effect.
image: RGB image [height, width, 3]
mask: instance segmentation mask [height, width, instance count]
Returns result image.
"""
# Make a grayscale copy of the image. The grayscale copy still
# has 3 RGB channels, though.
gray = skimage.color.gray2rgb(skimage.color.rgb2gray(image)) * 255
# Copy color pixels from the original color image where mask is set
if mask.shape[-1] > 0:
# We're treating all instances as one, so collapse the mask into one layer
mask = (np.sum(mask, -1, keepdims=True) >= 1)
splash = np.where(mask, image, gray).astype(np.uint8)
else:
splash = gray.astype(np.uint8)
return splash
def detect_and_color_splash(model, image_path=None, video_path=None):
print('We are in detect_and_color_splash function................')
assert image_path or video_path
# Image or video?
if image_path:
# Run model detection and generate the color splash effect
print("Running on {}".format(args.image))
# Read image
image = skimage.io.imread(args.image)
# Detect objects
r = model.detect([image], verbose=1)[0]
# Color splash
splash = color_splash(image, r['masks'])
# Save output
file_name = "splash_{:%Y%m%dT%H%M%S}.png".format(datetime.datetime.now())
skimage.io.imsave(file_name, splash)
elif video_path:
import cv2
# Video capture
vcapture = cv2.VideoCapture(video_path)
width = int(vcapture.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(vcapture.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = vcapture.get(cv2.CAP_PROP_FPS)
# Define codec and create video writer
file_name = "splash_{:%Y%m%dT%H%M%S}.avi".format(datetime.datetime.now())
vwriter = cv2.VideoWriter(file_name,
cv2.VideoWriter_fourcc(*'MJPG'),
fps, (width, height))
count = 0
success = True
while success:
print("frame: ", count)
# Read next image
success, image = vcapture.read()
if success:
# OpenCV returns images as BGR, convert to RGB
image = image[..., ::-1]
# Detect objects
r = model.detect([image], verbose=0)[0]
# Color splash
splash = color_splash(image, r['masks'])
# RGB -> BGR to save image to video
splash = splash[..., ::-1]
# Add image to video writer
vwriter.write(splash)
count += 1
vwriter.release()
print("Saved to ", file_name)
我希望模型能够获取图像并开始训练这些图像。谢谢您的帮助。