我正在尝试以视频格式无损编码 10 位图像,最好使用 HEVC 编码。图像存储为 16 位 png 文件(但仅使用 10 位),我一直在使用 ffmpeg 创建和读回视频文件。
到目前为止,我最好的尝试是基于https://stackoverflow.com/a/66180140/17261462但正如那里提到的,我得到了一些像素强度差异,这可能是由于在 10 位和 16 位表示之间转换时舍入造成的。我尝试了几种不同的方法(位移、左位复制、基于浮点的缩放),但还没有弄清楚如何获得真正的无损重建。
下面是一小段代码来复制我的问题。我可能在那里做错了什么,因此我们将不胜感激。
import subprocess
import numpy as np
import matplotlib.pyplot as plt
import tempfile
import imageio
# Create simple image
bitdepth = 10
hbd = int(bitdepth/2)
im0 = np.zeros((1<<hbd,1<<hbd),dtype=np.uint16)
im0[:] = np.arange(0,1<<bitdepth).reshape(im0.shape)
print('im0',np.min(im0),np.max(im0),im0.shape,im0.dtype)
# tile it to be at least 64 pix
im0 = np.tile(im0, (2, 2))
print('im0',np.min(im0),np.max(im0),im0.shape,im0.dtype)
im0ref = im0
# bitshift it or rescale intensities
#im0 = (im0<<6)
#im0 = (im0<<6) + (im0>>4)
im0 = np.uint16(np.round(im0 * np.float64((1<<16)-1)/np.float64((1<<10)-1)))
print('im0',np.min(im0),np.max(im0),im0.shape,im0.dtype)
# Save it as png
tmp0 = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
print(f'Using tmp file: {tmp0.name}')
imageio.imwrite(tmp0.name,im0)
# Encode with ffmpeg
tmp1 = tempfile.NamedTemporaryFile(suffix='.mkv', delete=False)
# note that adding the following doesn't seem to impact the results
# + ' -bsf:v hevc_metadata=video_full_range_flag=1' \
mycmd = f'ffmpeg -y -i {tmp0.name}' \
+ ' -c:v libx265 -x265-params lossless=1' \
+ ' -pix_fmt gray10be' \
+ f' {tmp1.name}'
print(mycmd)
p = subprocess.run(mycmd.split(), capture_output=True)
print( 'stdout:', p.stdout.decode() )
print( 'stderr:', p.stderr.decode() )
tmp2 = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
mycmd = f'ffmpeg -y -i {tmp1.name}' \
+ ' -pix_fmt gray16be' \
+ f' {tmp2.name}'
print(mycmd)
p = subprocess.run(mycmd.split(), capture_output=True)
print( 'stdout:', p.stdout.decode() )
print( 'stderr:', p.stderr.decode() )
# Read back with ffmpeg
im1 = imageio.imread(tmp2.name)
print('im1',np.min(im1),np.max(im1),im1.shape,im1.dtype)
# Bitshift or scale back
im1pre = im1
#im1 = (im1>>6)
im1 = np.uint16(np.round(im1 * np.float64((1<<10)-1)/np.float64((1<<16)-1)))
# check the result
plt.figure()
plt.imshow(im0ref)
plt.colorbar()
plt.figure()
plt.imshow(im1)
plt.colorbar()
plt.figure()
plt.imshow(np.int32(im1)-np.int32(im0ref))
plt.colorbar()
print('err: ',np.linalg.norm((np.float32(im1)-np.float32(im0ref)).ravel()))
plt.show()
编辑:我现在也在 FFmpeg 用户列表上发布了我的问题:http: //ffmpeg.org/pipermail/ffmpeg-user/2021-November/053761.html
同样为方便起见,下面提供了一个简单的脚本来生成使用 16 位和 10 位数据的不同变体:
import numpy as np
import imageio
# Create simple image with gradient from
# 0 to (2^bitdepth - 1)
bitdepth = 10
unusedbitdepth = 16-bitdepth
hbd = int(bitdepth/2)
im0 = np.zeros((1<<hbd,1<<hbd),dtype=np.uint16)
im0[:] = np.arange(0,1<<bitdepth).reshape(im0.shape)
# Tile it to be at least 64 pix as ffmpeg encoder may only work
# with image of size 64 and up
im0 = np.tile(im0, (2, 2))
print('im0',np.min(im0),np.max(im0),im0.shape,im0.dtype)
# Save it
imageio.imwrite('gradient10bit-lsb.png',im0)
# Bitshift the values to use most significant bits
im1 = (im0<<unusedbitdepth)
print('im1',np.min(im1),np.max(im1),im1.shape,im1.dtype)
imageio.imwrite('gradient10bit-msb.png',im1)
# Scale the values use all 16 bits
im2 = np.uint16(np.round(im0 * np.float64((1<<16)-1)/np.float64((1<<bitdepth)-1)))
print('im2',np.min(im2),np.max(im2),im2.shape,im2.dtype)
imageio.imwrite('gradient10bit-scaledto16bits.png',im2)
# Left bit replication as a cost-effective approximation of scaling
# See http://www.libpng.org/pub/png/spec/1.1/PNG-Encoders.html
im3 = (im0<<unusedbitdepth) + (im0>>(bitdepth-unusedbitdepth))
print('im3',np.min(im3),np.max(im3),im3.shape,im3.dtype)
imageio.imwrite('gradient10bit-leftbitreplication.png',im3)
以及原始 ffmpeg / image magick 命令。
编码:
ffmpeg -y -i gradient10bit-scaledto16bits.png -c:v libx265 -x265-params lossless=1 -pix_fmt gray10be gradient10bit-scaledto16bits.mkv
解码回png:
ffmpeg -y -i gradient10bit-scaledto16bits.mkv -pix_fmt gray16be recons-gradient10bit-scaledto16bits.png
比较:
magick compare -verbose -metric mae gradient10bit-scaledto16bits.png recons-gradient10bit-scaledto16bits.png diff-scaledto16bits.png
非常感谢,
汤姆