Green frames during playback after encoding raw yuv420p frames ([]byte) to mp4.
ymosleh opened this issue · comments
Hello,
I am running into an issue where I am observing green blank frames during playback using FFplay and VLC when encoding raw yuv frames into an mp4 container.
The frames are received from a video ingestion service that for testing purposes is currently streaming frames from an mp4 video file. The frames are sent to a video analytics service that does some video inferencing and are then sent to a storage service where the frames are stored into a MinIO database.
I am adding an encoding feature to the storage service that takes the processed frames from the analytics service and encodes them to an mp4 to be saved into MinIO. I am using your Go bindings since the storage service is written in Go. I am using the ffmpeg muxer example (muxing.c) as a template for my implementation.
Here is my code:
1.) Initialize the encoder:
type Encoder struct {
codec *avcodec.Codec
codecCtx *avcodec.Context
cid avcodec.CodecId
codecParams *avcodec.CodecParameters
fmtCtx *avformat.Context
outFmt *avformat.OutputFormat
stream *avformat.Stream
ctxAvIO *avformat.AvIOContext
imgFrame *avutil.Frame
Dict *avutil.Dictionary
pixFmt avutil.PixelFormat
frameNum int64 // Number of frames encoded
height int
width int
}
// Init encoder.
func (enc *Encoder) init() ( int) {
enc.cid = avcodec.CodecId(avcodec.AV_CODEC_ID_H264)
enc.pixFmt = avutil.PixelFormatFromString("yuv420p")
enc.height = 960
enc.width = 540
enc.frameNum = 0
encodeStop = false
var opts *avutil.Dictionary // options.
avutil.AvDictSet(&opts, "title", "FFMPEG Test", 0)
// Set output format based on filename - mp4.
if enc.outFmt = avformat.AvGuessFormat("", fileName, ""); enc.outFmt == nil {
glog.Errorf("Failed to create out.mp4")
return -1
}
// Allocate output format context.
if retErr := avformat.AvformatAllocOutputContext2(&enc.fmtCtx, enc.outFmt, "", fileName); retErr < 0 {
glog.Errorf("Failed to allocate output format context")
return -1
}
if ret := enc.addStream(); ret < 0{
glog.Errorf("Failed in adding video stream")
return -1
}
// Open the encoder device.
if retErr := enc.codecCtx.AvcodecOpen2(enc.codec, &opts); retErr < 0 {
glog.Errorf("Failed to open encoder device, error value: %s", avutil.AvStrerr(retErr))
return -1
} else {
glog.Infof("Success in opening encoder device")
}
// Alloc avframe - yuv420.
enc.imgFrame = enc.allocImage(enc.codecCtx.PixFmt(), enc.codecCtx.Width(), enc.codecCtx.Height())
if enc.imgFrame == nil {
glog.Errorf("Failed to allocate image")
return -1
}
// Fill the parameters struct based on the values from the supplied codec context.
if retErr := avcodec.AvcodecParametersFromContext(enc.stream.CodecParameters(), enc.codecCtx); retErr < 0 {
glog.Errorf("Failed to set codec params struct. Error value: %s", avutil.AvStrerr(retErr))
return -1
}
glog.Infof("Output stream format: \n")
enc.fmtCtx.AvDumpFormat(0, fileName, 1)
// Open media stream output file. Make sure directory is not read only!
if enc.fmtCtx.Flags()&avformat.AVFMT_NOFILE == 0 {
if retErr := avformat.AvIOOpen(&enc.ctxAvIO, "/home/user/saved_videos/" + fileName, avformat.AVIO_FLAG_WRITE); retErr < 0 {
glog.Infof("Failed to open %s for writing encoded frames. Err: %s", fileName, avutil.AvStrerr(retErr))
return -1
}
enc.fmtCtx.SetPb(enc.ctxAvIO)
}
// Allocate the stream private data in the AVDictionary object and write the stream header to an output media file.
if retErr := enc.fmtCtx.AvformatWriteHeader(&opts); retErr < 0 {
glog.Errorf("Failed to write header data: %s", avutil.AvStrerr(retErr))
return -1
}
return 0
} // END Init encoder.
1a.) addSream function:
func (enc *Encoder) addStream() ( int) {
// Find the H264 encoder.
enc.codec = avcodec.AvcodecFindEncoder(enc.cid)
if enc.codec == nil {
glog.Errorf("Failed to find encoder: %s", avcodec.AvcodecGetName(enc.cid) )
return -1
} else {
glog.Infof("Success in finding encoder: %s", avcodec.AvcodecGetName(enc.cid))
}
// Create output stream.
if enc.stream = enc.fmtCtx.AvformatNewStream(nil); enc.stream == nil {
glog.Errorf("Failed to create a new media output stream")
return -1
}
// Allocate the encoder context.
enc.codecCtx = enc.codec.AvcodecAllocContext3()
if enc.codecCtx == nil {
glog.Errorf("Failed to allocate an encoding context")
return -1
} else {
glog.Infof("Success in allocating encoding context")
}
// For now handle video.
switch codecType := enc.codecCtx.CodecType(); codecType {
case avutil.AVMEDIA_TYPE_VIDEO:
// set the encoder params.
var bitrate int64
bitrate = 150
enc.codecCtx.SetBitRate(bitrate * 1000)
enc.codecCtx.SetWidth(enc.width)
enc.codecCtx.SetHeight(enc.height)
enc.codecCtx.SetTimeBase(avutil.NewRational(1,20))
enc.codecCtx.SetSampleAspectRatio(avutil.NewRational(1,1))
enc.codecCtx.SetFramerate(avutil.NewRational(20,1))
enc.codecCtx.SetGopSize(12)
enc.codecCtx.SetMaxBFrames(2)
enc.codecCtx.SetPixFmt(enc.pixFmt)
case avutil.AVMEDIA_TYPE_AUDIO:
glog.Infof("Muxing audio into mp4 not yet supported")
default:
glog.Infof("Encoder does not have codec type: %v", enc.codecCtx.CodecType)
return -1
}
// Some formats want stream headers to be separate.
if enc.fmtCtx.Flags()&avformat.AVFMT_GLOBALHEADER == 1 {
fmtFlag := enc.codecCtx.Flags() | avcodec.AV_CODEC_FLAG_GLOBAL_HEADER
enc.codecCtx.SetFlags(fmtFlag)
}
glog.Infof("Success in adding stream!")
return 0
}
1b.) AllocImage function:
func (enc *Encoder) allocImage(pformat avutil.PixelFormat, width int, height int ) (*avutil.Frame) {
var img *avutil.Frame
img = avutil.AvFrameAlloc()
img.SetFormat(int(pformat))
img.SetWidth(width)
img.SetHeight(height)
// Alloc frame data buffers.
if retErr := avutil.AvFrameGetBuffer(img, 0); retErr < 0 {
glog.Errorf("Failed to allocate video buffers. Error value: %s", avutil.AvStrerr(retErr))
return nil
}
return img
}
2.) Encode function:
// Main encode function.
func (enc *Encoder) EncodeFrame(Frame []byte) (int) {
if encodeStop == true {
return 1
}
// Encode and write to mp4 container.
if ret := enc.writeFrame(setFrame(enc, Frame)); ret == 1 {
glog.Infof("Finished encoding - EOF")
encodeStop = true
} else if ret < 0 {
return -1
}
if encodeStop == true {
// Write the stream trailer once encoding is finished finalizing the output file.
if retErr := enc.fmtCtx.AvWriteTrailer(); retErr < 0 {
glog.Errorf("Failed to write trailer. Error value: %s ", avutil.AvStrerr(retErr))
}
// Close codec.
enc.closeStream()
// Close output stream file.
if enc.fmtCtx.Flags()&avformat.AVFMT_NOFILE == 0 {
if retErr := avformat.AvIOClosep(&enc.ctxAvIO); retErr < 0 {
glog.Errorf("Failed to close output stream. Error value: %s", avutil.AvStrerr(retErr))
}
}
enc.fmtCtx.AvformatFreeContext()
}
return 0
} // END EncodeFrame.
2a.) setFrame function:
func setFrame(enc *Encoder, Frame []byte) *avutil.Frame {
// Check if we encoded max number of frames.
if enc.frameNum > 90 {
return nil
}
// Ensure that the frame data is writable, avoiding data copy if possible.
if avret := avutil.AvFrameMakeWritable(enc.imgFrame); avret < 0 {
glog.Errorf("failed to make avframe writable")
os.Exit(1)
}
// Fill AVFrame with image data.
avret := avutil.AvImageFillArrays([8]*uint8{enc.imgFrame.Data()},
[8]int32{int32(enc.imgFrame.Linesize())},
Frame,
enc.codecCtx.PixFmt(),
enc.imgFrame.Width(),
enc.imgFrame.Height(),
1)
if avret < 0 {
glog.Errorf("Failed to fill AVFrame: %s", avutil.AvStrerr(avret))
os.Exit(1)
}
enc.imgFrame.SetPts(enc.frameNum)
glog.Infof("Number of frames encoded: %d", enc.frameNum)
enc.frameNum++
return enc.imgFrame
}
2b.) writeFrame function:
func (enc *Encoder) writeFrame(frame *avutil.Frame) (int) {
// Send frame to encoder.
var avret int
if avret = avcodec.AvcodecSendFrame( enc.codecCtx, frame); avret < 0 {
glog.Errorf("Failed to send frame to encoder: %s", avutil.AvStrerr(avret))
return -1
}
// Get encoded frames in Av packets.
for {
pkt := avcodec.AvPacketAlloc()
avret = avcodec.AvcodecReceivePacket(enc.codecCtx, pkt )
if avret == avutil.AVERROR_EAGAIN || avret == avutil.AVERROR_EOF {
break
} else if avret < 0 {
glog.Errorf("Error encoding frame: %s", avutil.AvStrerr(avret))
return -1
}
// Rescale output packet timestamp values from codec to stream timebase.
pkt.AvPacketRescaleTs(enc.codecCtx.TimeBase(), enc.stream.TimeBase())
pkt.SetStreamIndex(enc.stream.Index())
// Write compressed frames to media file.
avret = enc.fmtCtx.AvInterleavedWriteFrame( (*avformat.Packet)(unsafe.Pointer(pkt)) )
pkt.AvPacketUnref()
if avret < 0 {
glog.Errorf("Failed to write frame to media output stream: %s", avutil.AvStrerr(avret))
return -1
}
}
// Check for EOF.
if avret == avutil.AVERROR_EOF {
return 1
} else {
return 0
}
}
2d.) closeStream function:
// Free resources.
func (enc *Encoder) closeStream() {
avcodec.AvcodecFreeContext(enc.codecCtx)
avutil.AvFrameFree(enc.imgFrame)
enc.codecCtx.AvcodecClose()
}
The code seems to work as there are no runtime errors. However, i am at a loss as to why I am getting green output when attempting to play the newly created mp4 file. I would appreciate any suggestions, feedback or help.
Kind Regards.
I think you have to use av_image_copy
after using av_image_fill_arrays
otherwise it just initializes buffers and that's why you're getting a green image.
Unfortunately av_image_copy
is not implemented in the lib, but you can fork the project, copy what AvImageFillArrays
does and submit a PR once you got your code working.
Thank you, let me give your suggestion a try and see how it goes.
Hello,
Thank you again for your help, I added the av_image_copy
Go bindings, but I am getting a seg fault when I try to use it. Here is the implementation in picture.go:
// Copy image in src_data to dst_data.
func AvImageCopy(dstData [8]*uint8, dstLinesize [8]int32, srcData [8]*uint8, srcLinesize [8]int32, pixFmt PixelFormat, width, height int ) {
cDstData := (**C.uint8_t)(unsafe.Pointer(&dstData[0]))
cDstLinesize := (*C.int)(unsafe.Pointer(&dstLinesize[0]))
cSrcData := (**C.uint8_t)(unsafe.Pointer(&srcData[0]))
cSrcLinesize := (*C.int)(unsafe.Pointer(&srcLinesize[0]))
cPixFmt := (C.enum_AVPixelFormat)(pixFmt)
C.av_image_copy(cDstData, cDstLinesize, cSrcData, cSrcLinesize, cPixFmt, C.int(width), C.int(height))
}
Here is how I am calling the function:
numBytes := avutil.AvImageGetBufferSize(enc.codecCtx.PixFmt(), enc.codecCtx.Width(), enc.codecCtx.Height(), 1)
buffer := avutil.AvAllocateImageBuffer(numBytes)
// Copy frame bytes to buffer to avoid 'cgo arg has Go pointer to Go pointer' when passing Frame []byte to AvImageCopy.
C.memcpy(unsafe.Pointer(&buffer[0]), unsafe.Pointer(&Frame[0]), C.size_t(numBytes))
// Init frame buffers.
avret := avutil.AvImageFillArrays([8]*uint8{enc.imgFrame.Data()},
[8]int32{int32(enc.imgFrame.Linesize())},
buffer,
enc.codecCtx.PixFmt(),
enc.imgFrame.Width(),
enc.imgFrame.Height(),
1)
if avret < 0 {
glog.Errorf("Failed to fill AVFrame: %s", avutil.AvStrerr(avret))
os.Exit(1)
}
// Copy frame data. Seg fault happens on this function call.
avutil.AvImageCopy([8]*uint8{enc.imgFrame.Data()}, // Destination data
[8]int32{int32(enc.imgFrame.Linesize())}, // Destination linesize
[8]*uint8{&buffer[0]}, // Source data
[8]int32{int32(enc.imgFrame.Linesize())}, // Can I re-use destination linesize as src linesize?
enc.codecCtx.PixFmt(),
enc.imgFrame.Width(),
enc.imgFrame.Height())
I am wondering if it is a problem to use the same linesize array as the destination and source argument for AvImageCopy
? Anything else I may be missing?
In all honesty, I have doubts regarding the following signature
func AvImageFillArrays(dstData [8]*uint8, dstLinesize [8]int32, src []uint8, pixFmt PixelFormat, width, height, align int) int
It was implemented in the parent fork of this project but I've never tested it.
However, I recently added something similar for audio but did it successfully another way, maybe you could replicate that behavior instead.
Check out this commit and see how I'm inputing **uint8
instead of [8]*uint8
in the function. I use it like this (f
being a *avutil.Frame
):
avutil.AvSamplesAlloc(f.DataItem(0), f.LinesizePtr(), 2, 960, avutil.AV_SAMPLE_FMT_S16, 0)
I think you should use something similar instead of something like [8]*uint8{enc.imgFrame.Data()}
I made the changes based on your feedback. However, I am still observing a seg fault.
Here are the changes I made:
picture.go:
func AvImageCopy(dstData **uint8, dstLinesize *int, srcData **uint8, srcLinesize *int, pixFmt PixelFormat, width, height int ) {
cDstData := (**C.uint8_t)(unsafe.Pointer(dstData))
cDstLinesize := (*C.int)(unsafe.Pointer(dstLinesize))
cSrcData := (**C.uint8_t)(unsafe.Pointer(srcData))
cSrcLinesize := (*C.int)(unsafe.Pointer(srcLinesize))
cPixFmt := (C.enum_AVPixelFormat)(pixFmt)
C.av_image_copy(cDstData, cDstLinesize, cSrcData, cSrcLinesize, cPixFmt, C.int(width), C.int(height))
}
func AvImageFillArrays(dstData **uint8, dstLinesize *int, src *uint8,
pixFmt PixelFormat, width, height, align int) int {
cData := (**C.uint8_t)(unsafe.Pointer(dstData))
cLinesize := (*C.int)(unsafe.Pointer(dstLinesize))
cSrc := (*C.uint8_t)(unsafe.Pointer(src))
cPixFmt := (C.enum_AVPixelFormat)(pixFmt)
return int(C.av_image_fill_arrays(cData, cLinesize, cSrc, cPixFmt, C.int(width), C.int(height), C.int(align)))
}
Here is how I am using these functions:
numBytes := uintptr(avutil.AvImageGetBufferSize(enc.codecCtx.PixFmt(), enc.codecCtx.Width(), enc.codecCtx.Height(), 1))
buffer := avutil.AvMalloc(numBytes)
glog.Infof("TEST - Number of bytes: %d", numBytes)
// Copy raw frame bytes to buffer to avoid 'cgo arg has Go pointer to Go pointer' when sending frames to AvImageCopy.
C.memcpy(buffer, unsafe.Pointer(&Frame[0]), C.size_t(numBytes))
// Init frame buffers.
avret := avutil.AvImageFillArrays(enc.imgFrame.DataItem(0),
enc.imgFrame.LinesizePtr(),
(*uint8)(buffer),
enc.codecCtx.PixFmt(),
enc.imgFrame.Width(),
enc.imgFrame.Height(),
1)
if avret < 0 {
glog.Errorf("Failed to fill AVFrame: %s", avutil.AvStrerr(avret))
os.Exit(1)
}
var tmpFrame *avutil.Frame
tmpFrame = avutil.AvFrameAlloc()
tmpFrame.SetFormat(int(enc.codecCtx.PixFmt()))
tmpFrame.SetWidth(enc.imgFrame.Width())
tmpFrame.SetHeight(enc.imgFrame.Height())
// Alloc frame data buffers.
if retErr := avutil.AvFrameGetBuffer(tmpFrame, 0); retErr < 0 {
glog.Errorf("Failed to allocate video buffers. Error value: %s", avutil.AvStrerr(retErr))
return nil
}
glog.Infof("yuv420 frame linesize: %d", tmpFrame.Linesize())
// Copy data
avutil.AvImageCopy(enc.imgFrame.DataItem(0),
enc.imgFrame.LinesizePtr(),
(**uint8)(unsafe.Pointer(buffer)),
tmpFrame.LinesizePtr(),
enc.codecCtx.PixFmt(),
enc.imgFrame.Width(),
enc.imgFrame.Height())
Thanks again for the help.
I'd try this workflow instead:
- f := avutil.AvFrameAlloc()
- avutil.AvFrameGetBuffer(f, 0)
- avutil.AvImageAlloc(f.DataItem(0), f.LinesizePtr(), [...]) (signature of
avutil.AvImageAlloc
would have to be changed as well) - avutil.AvImageCopy(enc.imgFrame.DataItem(0), enc.imgFrame.LinesizePtr(), f.DataItem(0), f.LinesizePtr(), [...])
It uses AvImageAlloc
instead of AvImageFillArrays
which is only a subset of AvImageAlloc
. It also doesn't use buffer
anymore and make sure that f.DataItem(0)
is used in avutil.AvImageCopy
instead of (**uint8)(unsafe.Pointer(buffer))
.
Hello and thanks again for your response.
Unless I am missing something, it looks like that f *avutil.Frame
does not have any frame data associated with it. I see that we allocate f
and then we copy f
to enc.ImgFrame
but I do not see how the frame data is set for f.DataItem(0)
?
Currently, I receive a frame as a byte array and I want to copy this frame data into the enc.imgFrame.DataItem(0)
. What is the best way to accomplish this without using a buffer?
OK I understand better your use case.
Question is, what is in your Frame
? You say []byte
but where is it coming from ?
As you said, key is to transform this []byte
into a valid **uint8_t
that libav
can understand. I never had this use case.
The frames are coming from an mp4 file that is being read in by OpenCV in another process. The frames are actually already encoded to jpg format. After some further research I realize I was wasting cycles in re-encoding the frame data and instead just sent the frames to the muxer directly. This approach worked and I was able to generate the mp4 video with no problems during playback.
I just had to change the pixel format to AV_PIX_FMT_YUVJ420P
and use the MJPEG codec. Here is the working code snippet of setting Frame []byte
to *uiint8
:
func (enc *Encoder) writeFrames2pkts(Frame []byte) (int) {
pkt := avcodec.AvPacketAlloc()
numBytes := uintptr(avutil.AvImageGetBufferSize(enc.codecCtx.PixFmt(), enc.codecCtx.Width(), enc.codecCtx.Height(), 1))
buffer := avutil.AvMalloc(numBytes)
frameSize := int(numBytes)
pkt.SetSize(frameSize)
C.memcpy(buffer, unsafe.Pointer(&Frame[0]), C.size_t(numBytes))
bufferPtr := (*uint8)(unsafe.Pointer(buffer))
pkt.SetData(bufferPtr)
pkt.SetPts(enc.frameNum)
pkt.SetDts(enc.frameNum)
pkt.AvPacketRescaleTs(enc.codecCtx.TimeBase(), enc.stream.TimeBase())
pkt.SetStreamIndex(enc.stream.Index())
avret := enc.fmtCtx.AvInterleavedWriteFrame( (*avformat.Packet)(unsafe.Pointer(pkt)) )
pkt.AvPacketUnref()
if avret < 0 {
glog.Errorf("Failed to write frame to media output stream: %s", avutil.AvStrerr(avret))
avutil.AvFree(buffer)
return -1
}
enc.frameNum++
avutil.AvFree(buffer)
return 0
}
Thank you so much for the help and for your work on the FFmpeg Go bindings!
Cheers ❤️