2

我有一个 python 脚本,用于通过我的家庭网络从 ip 摄像头获取图像并添加日期时间信息。在 12 小时内,它抓取了大约 200,000 张图片。但是当使用zoneminder(摄像头监控软件)时,摄像头在 7 小时内管理 250,000 个。

我想知道是否有人可以帮助我提高脚本效率我曾尝试使用线程模块创建 2 个线程,但它没有帮助我不确定我是否实现了错误。以下是我目前正在使用的代码:

#!/usr/bin/env python

# My First python script to grab images from an ip camera

import requests
import time
import urllib2
import sys
import os
import PIL
from PIL import ImageFont
from PIL import Image
from PIL import ImageDraw
import datetime
from datetime import datetime
import threading

timecount = 43200
lock = threading.Lock()

wdir = "/workdir/"

y = len([f for f in os.listdir(wdir) 
     if f.startswith('Cam1') and os.path.isfile(os.path.join(wdir, f))])

def looper(timeCount):
   global y
   start = time.time()
   keepLooping = True
   while keepLooping:
    with lock:
        y += 1
    now = datetime.now()
    dte = str(now.day) + ":" +  str(now.month) + ":" + str(now.year)
    dte1 = str(now.hour) + ":" + str(now.minute) + ":" + str(now.second) + "." + str(now.microsecond)
    cname = "Cam1:"
    dnow = """Date: %s """ % (dte)
    dnow1 = """Time: %s""" % (dte1)
    buffer = urllib2.urlopen('http://(ip address)/snapshot.cgi?user=uname&pwd=password').read()
    img = str(wdir) + "Cam1-" + str('%010d' % y) + ".jpg"
    f = open(img, 'wb')
    f.write(buffer) 
    f.close()
    if time.time()-start > timeCount:
           keepLooping = False
    font = ImageFont.truetype("/usr/share/fonts/truetype/ttf-dejavu/DejaVuSans.ttf",10)
    img=Image.open(img)
    draw = ImageDraw.Draw(img)
    draw.text((0, 0),cname,fill="white",font=font)
    draw.text((0, 10),dnow,fill="white",font=font)
    draw.text((0, 20),dnow1,fill="white",font=font)
    draw = ImageDraw.Draw(img)
    draw = ImageDraw.Draw(img)
    img.save(str(wdir) + "Cam1-" + str('%010d' % y) + ".jpg")

for i in range(2):
        thread = threading.Thread(target=looper,args=(timecount,))
        thread.start()
        thread.join()

我该如何改进这个脚本,或者如何从相机打开一个流然后从流中抓取图像?这甚至会提高效率/捕获率吗?

编辑:

感谢 kobejohn 的帮助,我提出了以下实现。运行 12 小时后,它从 2 个单独的相机(在同一时间)获得了超过 420,000 张图片,每个相机同时在自己的线程上运行,而我上面的原始实现大约有 200,000 张。以下代码将并行运行 2 个摄像头(或足够接近)并向它们添加文本:

import base64
from datetime import datetime
import httplib
import io
import os
import time

from PIL import ImageFont
from PIL import Image
from PIL import ImageDraw

import multiprocessing

wdir = "/workdir/"
stream_urlA = '192.168.3.21'
stream_urlB = '192.168.3.23'
usernameA = ''
usernameB = ''
password = ''

y = sum(1 for f in os.listdir(wdir) if f.startswith('CamA') and os.path.isfile(os.path.join(wdir, f)))
x = sum(1 for f in os.listdir(wdir) if f.startswith('CamB') and os.path.isfile(os.path.join(wdir, f)))

def main():
    time_count = 43200
#    time_count = 1
    procs = list()
    for i in range(1):
        p = multiprocessing.Process(target=CameraA, args=(time_count, y,))
        q = multiprocessing.Process(target=CameraB, args=(time_count, x,))
        procs.append(p)
        procs.append(q)
        p.start()
        q.start()
    for p in procs:
        p.join()

def CameraA(time_count, y):
    y = y
    h = httplib.HTTP(stream_urlA)
    h.putrequest('GET', '/videostream.cgi')
    h.putheader('Authorization', 'Basic %s' % base64.encodestring('%s:%s' % (usernameA, password))[:-1])
    h.endheaders()
    errcode, errmsg, headers = h.getreply()
    stream_file = h.getfile()
    start = time.time()
    end = start + time_count
    while time.time() <= end:
    y += 1
        now = datetime.now()
        dte = str(now.day) + "-" + str(now.month) + "-" + str(now.year)
        dte1 = str(now.hour) + ":" + str(now.minute) + ":" + str(now.second) + "." + str(now.microsecond)
        cname = "Cam#: CamA"
        dnow = """Date: %s """ % dte
        dnow1 = """Time: %s""" % dte1
        # your camera may have a different streaming format
        # but I think you can figure it out from the debug style below
        source_name = stream_file.readline()    # '--ipcamera'
        content_type = stream_file.readline()    # 'Content-Type: image/jpeg'
        content_length = stream_file.readline()   # 'Content-Length: 19565'
        #print 'confirm/adjust content (source?): ' + source_name
        #print 'confirm/adjust content (type?): ' + content_type
        #print 'confirm/adjust content (length?): ' + content_length
        # find the beginning of the jpeg data BEFORE pulling the jpeg framesize
        # there must be a more efficient way, but hopefully this is not too bad
        b1 = b2 = b''
        while True:
            b1 = stream_file.read(1)
            while b1 != chr(0xff):
                b1 = stream_file.read(1)
            b2 = stream_file.read(1)
            if b2 == chr(0xd8):
                break
        # pull the jpeg data
        framesize = int(content_length[16:])
        jpeg_stripped = b''.join((b1, b2, stream_file.read(framesize - 2)))
        # throw away the remaining stream data. Sorry I have no idea what it is
        junk_for_now = stream_file.readline()
        # convert directly to an Image instead of saving / reopening
        # thanks to SO: http://stackoverflow.com/a/12020860/377366
        image_as_file = io.BytesIO(jpeg_stripped)
        image_as_pil = Image.open(image_as_file)
        draw = ImageDraw.Draw(image_as_pil)
        draw.text((0, 0), cname, fill="white")
        draw.text((0, 10), dnow, fill="white")
        draw.text((0, 20), dnow1, fill="white")
        img_name = "CamA-" + str('%010d' % y) + ".jpg"
        img_path = os.path.join(wdir, img_name)
        image_as_pil.save(img_path)

def CameraB(time_count, x):
    x = x
    h = httplib.HTTP(stream_urlB)
    h.putrequest('GET', '/videostream.cgi')
    h.putheader('Authorization', 'Basic %s' % base64.encodestring('%s:%s' % (usernameB, password))[:-1])
    h.endheaders()
    errcode, errmsg, headers = h.getreply()
    stream_file = h.getfile()
    start = time.time()
    end = start + time_count
    while time.time() <= end:
    x += 1
        now = datetime.now()
        dte = str(now.day) + "-" + str(now.month) + "-" + str(now.year)
        dte1 = str(now.hour) + ":" + str(now.minute) + ":" + str(now.second) + "." + str(now.microsecond)
        cname = "Cam#: CamB"
        dnow = """Date: %s """ % dte
        dnow1 = """Time: %s""" % dte1
        # your camera may have a different streaming format
        # but I think you can figure it out from the debug style below
        source_name = stream_file.readline()    # '--ipcamera'
        content_type = stream_file.readline()    # 'Content-Type: image/jpeg'
        content_length = stream_file.readline()   # 'Content-Length: 19565'
        #print 'confirm/adjust content (source?): ' + source_name
        #print 'confirm/adjust content (type?): ' + content_type
        #print 'confirm/adjust content (length?): ' + content_length
        # find the beginning of the jpeg data BEFORE pulling the jpeg framesize
        # there must be a more efficient way, but hopefully this is not too bad
        b1 = b2 = b''
        while True:
            b1 = stream_file.read(1)
            while b1 != chr(0xff):
                b1 = stream_file.read(1)
            b2 = stream_file.read(1)
            if b2 == chr(0xd8):
                break
        # pull the jpeg data
        framesize = int(content_length[16:])
        jpeg_stripped = b''.join((b1, b2, stream_file.read(framesize - 2)))
        # throw away the remaining stream data. Sorry I have no idea what it is
        junk_for_now = stream_file.readline()
        # convert directly to an Image instead of saving / reopening
        # thanks to SO: http://stackoverflow.com/a/12020860/377366
        image_as_file = io.BytesIO(jpeg_stripped)
        image_as_pil = Image.open(image_as_file)
        draw = ImageDraw.Draw(image_as_pil)
        draw.text((0, 0), cname, fill="white")
        draw.text((0, 10), dnow, fill="white")
        draw.text((0, 20), dnow1, fill="white")
        img_name = "CamB-" + str('%010d' % x) + ".jpg"
        img_path = os.path.join(wdir, img_name)
        image_as_pil.save(img_path)

if __name__ == '__main__':
    main()

编辑(2014 年 5 月 26 日):

我花了 2 个月的大部分时间试图更新这个脚本/程序以使用 python 3,但完全无法让它做任何事情。有人能指出我正确的方向吗?

我已经尝试过 2to3 脚本,但它只是更改了几个条目,我仍然无法让它运行。

4

3 回答 3

3

*编辑我之前指责 GIL 的行为很愚蠢。这是一个 I/O 绑定进程,而不是 CPU 绑定进程。所以多处理不是一个有意义的解决方案。


*更新我终于找到了一个与你的流接口相同的演示网络摄像机(我想)。使用流接口,它只建立一次连接,然后从数据流中读取,就好像它是一个文件以提取 jpg 图像帧。使用下面的代码,我抓取了 2 秒 ==> 27 帧,我相信这可以在 7 小时内推断出大约 300k 图像。

如果您想获得更多,您可以将图像修改和文件写入移动到单独的线程并让工作人员执行此操作,而主线程只是从流中抓取并将 jpeg 数据发送给工作人员。

import base64
from datetime import datetime
import httplib
import io
import os
import time

from PIL import ImageFont
from PIL import Image
from PIL import ImageDraw


wdir = "workdir"
stream_url = ''
username = ''
password = ''


def main():
    time_count = 2
    looper_stream(time_count)


def looper_stream(time_count):
    h = httplib.HTTP(stream_url)
    h.putrequest('GET', '/videostream.cgi')
    h.putheader('Authorization', 'Basic %s' % base64.encodestring('%s:%s' % (username, password))[:-1])
    h.endheaders()
    errcode, errmsg, headers = h.getreply()
    stream_file = h.getfile()
    start = time.time()
    end = start + time_count
    while time.time() <= end:
        now = datetime.now()
        dte = str(now.day) + "-" + str(now.month) + "-" + str(now.year)
        dte1 = str(now.hour) + "-" + str(now.minute) + "-" + str(now.second) + "." + str(now.microsecond)
        cname = "Cam1-"
        dnow = """Date: %s """ % dte
        dnow1 = """Time: %s""" % dte1
        # your camera may have a different streaming format
        # but I think you can figure it out from the debug style below
        source_name = stream_file.readline()    # '--ipcamera'
        content_type = stream_file.readline()    # 'Content-Type: image/jpeg'
        content_length = stream_file.readline()   # 'Content-Length: 19565'
        print 'confirm/adjust content (source?): ' + source_name
        print 'confirm/adjust content (type?): ' + content_type
        print 'confirm/adjust content (length?): ' + content_length
        # find the beginning of the jpeg data BEFORE pulling the jpeg framesize
        # there must be a more efficient way, but hopefully this is not too bad
        b1 = b2 = b''
        while True:
            b1 = stream_file.read(1)
            while b1 != chr(0xff):
                b1 = stream_file.read(1)
            b2 = stream_file.read(1)
            if b2 == chr(0xd8):
                break
        # pull the jpeg data
        framesize = int(content_length[16:])
        jpeg_stripped = b''.join((b1, b2, stream_file.read(framesize - 2)))
        # throw away the remaining stream data. Sorry I have no idea what it is
        junk_for_now = stream_file.readline()
        # convert directly to an Image instead of saving / reopening
        # thanks to SO: http://stackoverflow.com/a/12020860/377366
        image_as_file = io.BytesIO(jpeg_stripped)
        image_as_pil = Image.open(image_as_file)
        draw = ImageDraw.Draw(image_as_pil)
        draw.text((0, 0), cname, fill="white")
        draw.text((0, 10), dnow, fill="white")
        draw.text((0, 20), dnow1, fill="white")
        img_name = "Cam1-" + dte + dte1 + ".jpg"
        img_path = os.path.join(wdir, img_name)
        image_as_pil.save(img_path)


if __name__ == '__main__':
    main()

下面的 *jpg 捕获似乎不够快,这是合乎逻辑的。发出如此多的http请求对任何事情都会很慢。

from datetime import datetime
import io
import threading
import os
import time

import urllib2

from PIL import ImageFont
from PIL import Image
from PIL import ImageDraw


wdir = "workdir"


def looper(time_count, loop_name):
    start = time.time()
    end = start + time_count
    font = ImageFont.truetype("/usr/share/fonts/truetype/ttf-dejavu/DejaVuSans.ttf", 10)
    while time.time() <= end:
        now = datetime.now()
        dte = str(now.day) + "-" + str(now.month) + "-" + str(now.year)
        dte1 = str(now.hour) + "-" + str(now.minute) + "-" + str(now.second) + "." + str(now.microsecond)
        cname = "Cam1-"
        dnow = """Date: %s """ % dte
        dnow1 = """Time: %s""" % dte1
        image = urllib2.urlopen('http://(ip address)/snapshot.cgi?user=uname&pwd=password').read()
        # convert directly to an Image instead of saving / reopening
        # thanks to SO: http://stackoverflow.com/a/12020860/377366
        image_as_file = io.BytesIO(image)
        image_as_pil = Image.open(image_as_file)
        draw = ImageDraw.Draw(image_as_pil)
        draw_text = "\n".join((cname, dnow, dnow1))
        draw.text((0, 0), draw_text, fill="white", font=font)
        #draw.text((0, 0), cname, fill="white", font=font)
        #draw.text((0, 10), dnow, fill="white", font=font)
        #draw.text((0, 20), dnow1, fill="white", font=font)
        img_name = "Cam1-" + dte + dte1 + "(" + loop_name + ").jpg"
        img_path = os.path.join(wdir, img_name)
        image_as_pil.save(img_path)


if __name__ == '__main__':
    time_count = 5
    threads = list()
    for i in range(2):
        name = str(i)
        t = threading.Thread(target=looper, args=(time_count, name))
        threads.append(p)
        t.start()
    for t in threads:
        t.join()
于 2013-10-11T13:50:09.910 回答
2

您为您提供的实施所获得的速度还不错。

您正在写入大约每秒 4.5 帧 (fps),而 zoneminder 正在写入近 10 fps。以下是您的流程图,其中包含一些注释以加快速度

  1. 您正在读取 url 缓冲区(网络延迟),
  2. 然后写入图像(磁盘延迟 1)(您可能不需要在此处将图像写入磁盘 - 考虑将其直接传递给您的 img 类)
  3. 读取图像(磁盘延迟 2)
  4. 然后使用字体、文本框等操作图像......(3 次图像绘制) - 你可以用换行符构建一个字符串,以便只调用一次 draw.text 函数吗?
  5. 写入输出图像(磁盘延迟 3)
于 2013-10-11T13:57:39.240 回答
0

有几件事可能会有所帮助。

  • 将 font-opening 从函数提升到主代码,然后传入字体对象(打开字体可能在时间上很重要,只需执行一次,就不会花费时间来处理每个图像;您永远不会动态修改字体,因此共享相同的字体对象应该没问题)。

  • 您可能可以废弃包含以下内容的三行中的两行:

    绘制 = ImageDraw.Draw(img)

于 2013-10-11T17:02:05.197 回答