python - 使用 dlib 进行多目标跟踪

Question

我正在尝试从 pyimagesearch web 实现以下代码，以使用 dlib 进行多对象跟踪。我尝试自定义它以使用detectron2 而不是Mobilenet + SSD Caffe 模型运行它。这是代码

#!/usr/bin/python
# -*- coding: utf-8 -*-

import cv2
import numpy as np
import multiprocessing
import torch
import imutils
import dlib
import detectron2
import argparse
from detectron2.utils.logger import setup_logger
setup_logger()
from imutils.video import FPS
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog
from detectron2.structures import Boxes, BoxMode, pairwise_iou


# dlib tracker

def start_tracker(
    boxes,
    label,
    rgb,
    inputQueue,
    outputQueue,
    ):

    # construct a dlib rectangle object from the bounding box
    # coordinates and then start the correlation tracker

    t = dlib.correlation_tracker()
    left = boxes[0]
    top = boxes[1]
    right = boxes[2]
    bottom = boxes[3]
    rect = dlib.rectangle(int(left), int(top), int(right), int(bottom))
    t.start_track(rgb, rect)

    # loop indefinitely -- this function will be called as a daemon
    # process so we don't need to worry about joining it

    while True:

        # attempt to grab the next frame from the input queue

        rgb = inputQueue.get()

        # if there was an entry in our queue, process it

        if rgb is not None:

            # update the tracker and grab the position of the tracked
            # object

            t.update(rgb)
            pos = t.get_position()

            # unpack the position object

            startX = int(pos.left())
            startY = int(pos.top())
            endX = int(pos.right())
            endY = int(pos.bottom())

            # add the label + bounding box coordinates to the output
            # queue

            outputQueue.put((label, (startX, startY, endX, endY)))


ap = argparse.ArgumentParser()
ap.add_argument('-v', '--video', required=True,
                help='path to input video file')
ap.add_argument('-o', '--output', type=str,
                help='path to optional output video file')
ap.add_argument('-c', '--confidence', type=float, default=0.990,
                help='minimum probability to filter weak detections')
args = vars(ap.parse_args())

# initialize our list of queues -- both input queue and output queue
# for *every* object that we will be tracking

inputQueues = []
outputQueues = []

# initialize the video stream and output video writer

print '[INFO] starting video stream...'
vs = cv2.VideoCapture(args['video'])
writer = None
label = ''

# start the frames per second throughput estimator

fps = FPS().start()

# Detectron

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file('COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml'
                    ))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7  # set threshold for this model
cfg.MODEL.WEIGHTS = \
    model_zoo.get_checkpoint_url('COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml'
                                 )
predictor = DefaultPredictor(cfg)

# loop over frames from the video file stream

while True:

    # grab the next frame from the video file

    (grabbed, frame) = vs.read()

    # check to see if we have reached the end of the video file

    if frame is None:
        break

    # resize the frame for faster processing and then convert the
    # frame from BGR to RGB ordering (dlib needs RGB ordering)

    frame = imutils.resize(frame, width=600)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # if we are supposed to be writing a video to disk, initialize
    # the writer

    if args['output'] is not None and writer is None:
        fourcc = cv2.VideoWriter_fourcc(*'MJPG')
        writer = cv2.VideoWriter(args['output'], fourcc, 30,
                                 (frame.shape[1], frame.shape[0]), True)

    # if our list of queues is empty then we know we have yet to
    # create our first object tracker

    if len(inputQueues) == 0:
        outputs = predictor(rgb)
        instances = outputs['instances']
        scores = instances.scores.cpu().numpy()

            # loop over the detections

        for i in np.arange(start=0, stop=len(instances), step=1):

            # extract the confidence (i.e., probability) associated
            # with the prediction

            confidence = scores[i]

            # filter out weak detections by requiring a minimum
            # confidence

            if confidence > args['confidence']:

                # compute the (x, y)-coordinates of the bounding box
                # for the object

                boxes = instances.pred_boxes.tensor.cpu().numpy()
                boxes = BoxMode.convert(boxes, BoxMode.XYXY_ABS,
                        BoxMode.XYWH_ABS)
                boxes = boxes.tolist()
                boxes = boxes[i]
                (startX, startY, endX, endY) = boxes
                bb = (startX, startY, endX, endY)
                label = 'Id:' + str(i)

                # create two brand new input and output queues,
                # respectively

                iq = multiprocessing.Queue()
                oq = multiprocessing.Queue()
                inputQueues.append(iq)
                outputQueues.append(oq)

                # spawn a daemon process for a new object tracker

                p = multiprocessing.Process(target=start_tracker,
                        args=(bb, label, rgb, iq, oq))
                p.daemon = True
                p.start()

                # grab the corresponding class label for the detection
                # and draw the bounding box

                cv2.rectangle(frame, (int(startX), int(startY)),
                              (int(endX), int(endY)), (int(0),
                              int(0xFF), int(0)), int(2))
                cv2.putText(
                    frame,
                    label,
                    (int(startX), int(startY - 15)),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    int(0.45),
                    (int(0), int(0xFF), int(0)),
                    int(2),
                    )
    else:

        # otherwise, we've already performed detection so let's track
        # multiple objects
        # loop over each of our input ques and add the input RGB
        # frame to it, enabling us to update each of the respective
        # object trackers running in separate processes

        for iq in inputQueues:
            iq.put(rgb)

        # loop over each of the output queues

        for oq in outputQueues:

            # grab the updated bounding box coordinates for the
            # object -- the .get method is a blocking operation so
            # this will pause our execution until the respective
            # process finishes the tracking update

            (label, startX, startY, endX, endY) = oq.get()

            # draw the bounding box from the correlation object
            # tracker

            cv2.rectangle(frame, (int(startX), int(startY)),
                          (int(endX), int(endY)), (int(0), int(0xFF),
                          int(0)), int(2))
            cv2.putText(
                frame,
                label,
                (int(startX), int(startY - 15)),
                cv2.FONT_HERSHEY_SIMPLEX,
                int(0.45),
                (int(0), int(0xFF), int(0)),
                int(2),
                )

    # check to see if we should write the frame to disk

    if writer is not None:
        writer.write(frame)

    # show the output frame
    # cv2.imshow("Frame", frame)

    key = cv2.waitKey(1) & 0xFF

    # if the `q` key was pressed, break from the loop

    if key == ord('q'):
        break

    # update the FPS counter

    fps.update()

# stop the timer and display FPS information

fps.stop()
print '[INFO] elapsed time: {:.2f}'.format(fps.elapsed())
print '[INFO] approx. FPS: {:.2f}'.format(fps.fps())

# check to see if we need to release the video writer pointer

if writer is not None:
    writer.release()

# do a bit of cleanup
# cv2.destroyAllWindows()

vs.release()

无论如何，我一遍又一遍地遇到这个错误，此时我不知道我做错了什么

Process Process-1:
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "Detection-Tracking.py", line 42, in start_tracker
    t.start_track(rgb, rect)
RuntimeError: 
Error detected at line 61.
Error detected in file /tmp/pip-wheel-66glv9rf/dlib/dlib/../dlib/image_processing/correlation_tracker.h.
Error detected in function void dlib::correlation_tracker::start_track(const image_type&, const dlib::drectangle&) [with image_type = dlib::numpy_image<dlib::rgb_pixel>].

Failing expression was p.is_empty() == false.
     You can't give an empty rectangle.  
     void correlation_tracker::start_track()

我已经检查了 dlib.rectangle 的输入顺序是否正确，所以我愿意接受你能给我的任何想法。

score 0 · Accepted Answer

现在可能为时已晚，但试试这个：

pip install cmake
pip install dlib

并查看代码是否运行。

python - 使用 dlib 进行多目标跟踪

1 回答 1

Related

Reference