python - 如何从 YUV2 流中检索原始数据

Question

我正在通过 Windows (usb) 上的主机应用程序连接 qvga 传感器流式传输 yuv2 格式数据。如何使用任何 opencv-python 示例应用程序从 yuv2 格式流式传输或捕获原始数据。

我怎样才能做到这一点？是否有任何测试示例可以这样做？

//opencv-python (host appl)
import cv2
import numpy as np
    
# open video0
cap = cv2.VideoCapture(0, cv2.CAP_MSMF)
# set width and height
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 340)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240)
# set fps
cap.set(cv2.CAP_PROP_FPS, 30)
while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()
    # Display the resulting frame
    cv2.imshow('frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

无需解码即可抓取视频帧的代码示例：

import cv2
import numpy as np

# open video0
# -------> Try replacing cv2.CAP_MSMF with cv2.CAP_FFMPEG):
cap = cv2.VideoCapture(0, cv2.CAP_FFMPEG)

# set width and height
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 340)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240)
# set fps
cap.set(cv2.CAP_PROP_FPS, 30)

# Fetch undecoded RAW video streams
cap.set(cv2.CAP_PROP_FORMAT, -1)  # Format of the Mat objects. Set value -1 to fetch undecoded RAW video streams (as Mat 8UC1)

for i in range(10):
    # Capture frame-by-frame
    ret, frame = cap.read()

    if not ret:
        break

    print('frame.shape = {}    frame.dtype = {}'.format(frame.shape, frame.dtype))

cap.release()

如果cv2.CAP_FFMPEG不起作用，请尝试以下代码示例：

import cv2
import numpy as np

# open video0
cap = cv2.VideoCapture(0, cv2.CAP_MSMF)

# set width and height
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 340)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240)
# set fps
cap.set(cv2.CAP_PROP_FPS, 30)

# -----> Try setting FOURCC and disable RGB conversion:
#########################################################
cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter.fourcc('Y','1','6',' ')) 
cap.set(cv2.CAP_PROP_CONVERT_RGB, 0)    
#########################################################

# Fetch undecoded RAW video streams
cap.set(cv2.CAP_PROP_FORMAT, -1)  # Format of the Mat objects. Set value -1 to fetch undecoded RAW video streams (as Mat 8UC1)

for i in range(10):
    # Capture frame-by-frame
    ret, frame = cap.read()

    if not ret:
        break

    print('frame.shape = {}    frame.dtype = {}'.format(frame.shape, frame.dtype))

cap.release()

将框架重塑uint8为 680x240 并另存为img.png：

import cv2
import numpy as np

# open video0
cap = cv2.VideoCapture(0, cv2.CAP_MSMF)

# set width and height
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 340)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240)
cap.set(cv2.CAP_PROP_FPS, 30) # set fps

# Disable the conversion to BGR by setting FOURCC to Y16 and `CAP_PROP_CONVERT_RGB` to 0.
cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter.fourcc('Y','1','6',' ')) 
cap.set(cv2.CAP_PROP_CONVERT_RGB, 0)    

# Fetch undecoded RAW video streams
cap.set(cv2.CAP_PROP_FORMAT, -1)  # Format of the Mat objects. Set value -1 to fetch undecoded RAW video streams (as Mat 8UC1)

for i in range(10):
    # Capture frame-by-frame
    ret, frame = cap.read()

    if not ret:
        break

    cols = 340*2
    rows = 240

    img = frame.reshape(rows, cols)

    cv2.imwrite('img.png', img)

cap.release()

//680x240 img.png

//存在热对象（img1.png）

//处理后的图像（热对象）

//使用小端（测试）

//使用 CAP_DSHOW 测试图像（捕获）

//使用 CAP_DSHOW 测试图像（已保存）

//680x240 (hand.png)

//680x240 (hand1.png)

//fing 预览

//fing.png

score 0 · Accepted Answer

视频中像素的真正格式是int16灰度像素，但它被标记为YUV2格式（可能是为了与不支持 16 位的采集器兼容）。

我看到RAVI 格式使用了相同的技术。

OpenCV 的默认行为是将帧从转换YUV2为 BGR 格式。
由于格式没有颜色（并且只是标记为YUV2），因此转换会弄乱您的数据。

我在这里可能是错的......但看起来格式是“大端”并且有 16 位符号。

这是用于抓取和显示视频的完整代码示例：

# open video0
cap = cv2.VideoCapture(0, cv2.CAP_MSMF)

# set width and height
cols, rows = 340, 240
cap.set(cv2.CAP_PROP_FRAME_WIDTH, cols)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, rows)
cap.set(cv2.CAP_PROP_FPS, 30) # set fps

# Disable the conversion to BGR by setting FOURCC to Y16 and `CAP_PROP_CONVERT_RGB` to 0.
cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter.fourcc('Y','1','6',' ')) 
cap.set(cv2.CAP_PROP_CONVERT_RGB, 0)    

# Fetch undecoded RAW video streams
cap.set(cv2.CAP_PROP_FORMAT, -1)  # Format of the Mat objects. Set value -1 to fetch undecoded RAW video streams (as Mat 8UC1)

while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    if not ret:
        break

    # Convert the frame from uint8 elements to big-endian signed int16 format.
    frame = frame.reshape(rows, cols*2) # Reshape to 680x240
    frame = frame.astype(np.uint16) # Convert uint8 elements to uint16 elements
    frame = (frame[:, 0::2] << 8) + frame[:, 1::2]  # Convert from little endian to big endian (apply byte swap), the result is 340x240.
    frame = frame.view(np.int16)  # The data is actually signed 16 bits - view it as int16 (16 bits singed).

    # Apply some processing for disapply (this part is just "cosmetics"):
    frame_roi = frame[:, 10:-10]  # Crop 320x240 (the left and right parts are not meant to be displayed).
    # frame_roi = cv2.medianBlur(frame_roi, 3)  # Clean the dead pixels (just for better viewing the image).
    frame_roi = frame_roi << 3  # Remove the 3 most left bits ???
    normed = cv2.normalize(frame_roi, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)  # Convert to uint8 with normalizing (just for viewing the image).

    cv2.imshow('normed', normed)  # Show the normalized video frame

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    # cv2.imwrite('normed.png', normed)

cap.release()
cv2.destroyAllWindows()

将每个像素左移 3 ( frame_roi = frame_roi << 3) 可以解决大部分问题。

可能是高 3 位没有到位，或者有一些不同的含义？

ROI 裁剪和标准化只是“化妆品”，因此您可以看到一些东西。

这是您发布的经过处理的图像（带有热对象）：

对于小端，替换以下行：

frame = frame.reshape(rows, cols*2) # Reshape to 680x240
frame = frame.astype(np.uint16) # Convert uint8 elements to uint16 elements
frame = (frame[:, 0::2] << 8) + frame[:, 1::2]  # Convert from little endian to big endian (apply byte swap), the result is 340x240.
frame = frame.view(np.int16)  # The data is actually signed 16 bits - view it as int16 (16 bits singed).

和：

frame = frame.view(np.int16).reshape(rows, cols)

如果值都是正数（uint16类型），请尝试：

frame = frame.view(np.uint16).reshape(rows, cols)

处理图像以供显示的草图代码：

frame = cv2.imread('hand1.png', cv2.IMREAD_UNCHANGED)  # Read input image (grayscale uint8)


# create a CLAHE object (Arguments are optional).
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))


# Convert the frame from uint8 elements to big-endian signed int16 format.
frame = frame.reshape(rows, cols * 2)  # Reshape to 680x240
frame = frame.astype(np.uint16)  # Convert uint8 elements to uint16 elements
frame = (frame[:, 0::2] << 8) + frame[:, 1::2]  # Convert from little endian to big endian (apply byte swap), the result is 340x240.
frame = frame.view(np.int16)  # The data is actually signed 16 bits - view it as int16 (16 bits singed).

# Apply some processing for display (this part is just "cosmetics"):
frame_roi = frame[:, 10:-10]  # Crop 320x240 (the left and right parts are not meant to be displayed).
# frame_roi = cv2.medianBlur(frame_roi, 3)  # Clean the dead pixels (just for better viewing the image).

#frame_roi = frame_roi << 3  # Remove the 3 most left bits ???
frame_roi = frame_roi << 1  # Remove the 1 most left bits ???

# Fix the offset difference between the odd and even columns (note: this is not a good solution).
#frame_as_uint16 = (frame_roi.astype(np.int32) + 32768).astype(np.uint16)
frame_as_uint16 = frame_roi.view(np.uint16)  # Try to interpret the data as unsigned
frame_as_float = frame_as_uint16.astype(np.float32) / 2  # Divide by 2 for avoiding overflow
med_odd = np.median(frame_as_float[:, 0::2])
med_evn = np.median(frame_as_float[:, 1::2])
med_dif = med_odd - med_evn
frame_as_float[:, 0::2] -= med_dif/2
frame_as_float[:, 1::2] += med_dif/2
frame_as_uint16 = np.round(frame_as_float).clip(0, 2**16-1).astype(np.uint16)

cl1 = clahe.apply(frame_as_uint16)  # Apply contrast enhancement.
normed = cv2.normalize(cl1, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)  # Convert to uint8 with normalizing (just for viewing the image).

cv2.imwrite('normed.png', normed)

cv2.imshow('normed', normed)
cv2.waitKey()
cv2.destroyAllWindows()

python - 如何从 YUV2 流中检索原始数据

1 回答 1

Related

Reference