PyAV库使用介绍

PyAV

Posted by Shaozi on March 26, 2020

目前使用最广泛的视频编解码库为FFmepg,虽然该库有Python的封装ffmpeg-python,但是该封装功能简单,只能替代在python中调用命令行工具罢了。PyAV同样是FFmpeg封装,不过功能更为强大,能够灵活的编解码视频和音频,并且支持Python常用的数据格式(如numpy)。由于PyAV在ffmpeg的基础上进行开发,所以并不会提升编解码的效率。但是能够方便Python的开发者对视频或者音频数据进行处理。在模型的训练测试中能够发挥很好的作用。

安装

# 通过conda-forge,该方法会同时安装需要的ffmpeg库,比较方便
conda install av -c conda-forge

# 如果想使用现有的ffmpeg库,可以使用pip安装
pip install av

# 另外也可以通过源码安装
git clone git@github.com:mikeboers/PyAV
cd PyAV
source scripts/activate.sh
## Either install the testing dependencies:
pip install --upgrade -r tests/requirements.txt
## or have it all, including FFmpeg, built/installed for you:
./scripts/build-deps
## Build PyAV.
make

使用示例

抽取所有帧

import av

container = av.open(path_to_video)

for frame in container.decode(video=0):
    frame.to_image().save('frame-%04d.jpg' % frame.index)

抽取关键帧

import av
import av.datasets


content = 'video_pth'
with av.open(content) as container:
    # Signal that we only want to look at keyframes.
    stream = container.streams.video[0]
    stream.codec_context.skip_frame = 'NONKEY'

    for frame in container.decode(stream):

        print(frame)

        # We use `frame.pts` as `frame.index` won't make must sense with the `skip_frame`.
        frame.to_image().save(
            'night-sky.{:04d}.jpg'.format(frame.pts),
            quality=80,
        )

合成视频

import numpy as np
import av

duration = 4
fps = 24
total_frames = duration * fps
container = av.open('test.mp4', mode='w')
stream = container.add_stream('mpeg4', rate=fps)
stream.width = 480
stream.height = 320
stream.pix_fmt = 'yuv420p'

for frame_i in range(total_frames):

    img = np.empty((480, 320, 3))
    img[:, :, 0] = 0.5 + 0.5 * np.sin(2 * np.pi * (0 / 3 + frame_i / total_frames))
    img[:, :, 1] = 0.5 + 0.5 * np.sin(2 * np.pi * (1 / 3 + frame_i / total_frames))
    img[:, :, 2] = 0.5 + 0.5 * np.sin(2 * np.pi * (2 / 3 + frame_i / total_frames))

    img = np.round(255 * img).astype(np.uint8)
    img = np.clip(img, 0, 255)

    frame = av.VideoFrame.from_ndarray(img, format='rgb24')
    for packet in stream.encode(frame):
        container.mux(packet)
# Flush stream
for packet in stream.encode():
    container.mux(packet)
# Close the file
container.close()

提取音频

import av
in_container = av.open(video_path)
in_stream = in_container.streams.get(audio=0)[0]

out_container = av.open(self.audio_pth, 'w')
out_stream = out_container.add_stream('mp3')

for frame in in_container.decode(in_stream):
    print('\rAudio frame: pts %d, time %f'%(frame.pts, frame.time), end = '')
    frame.pts = None
    for packet in out_stream.encode(frame):
        out_container.mux(packet)
    for packet in out_stream.encode(None):
        out_container.mux(packet)
    out_container.close()

提取音频帧

import av
import numpy as np
in_container = av.open(audio_path)
stream = in_container.streams.get(audio=0)[0]
for frame in in_container.decode(stream):
    print('\rFrame info: pts %d, time %f'%(frame.pts, frame.time), end = '')
    frame_value = frame.to_ndarray()[0]

访问网络视频流

import av
import cv2

#video = av.open('rtsp://admin:ts123456@10.21.38.241:554', 'r')
video = av.open('url', 'r')
index = 0
try:
    for frame in video.decode():
        img = frame.to_nd_array(format='bgr24')
        cv2.imshow("Test", img)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
except Exception as e:
    print('fate erro:{}'.format(e))
cv2.destroyAllWindows()

以上代码示例基本满足通常使用需求,更多内容可以参考文档和git主页