Green frames during playback after encoding raw yuv420p frames ([]byte) to mp4.

Question

Green frames during playback after encoding raw yuv420p frames ([]byte) to mp4.

ymosleh opened this issue 3 years ago · comments

Hello,
I am running into an issue where I am observing green blank frames during playback using FFplay and VLC when encoding raw yuv frames into an mp4 container.

The frames are received from a video ingestion service that for testing purposes is currently streaming frames from an mp4 video file. The frames are sent to a video analytics service that does some video inferencing and are then sent to a storage service where the frames are stored into a MinIO database.

I am adding an encoding feature to the storage service that takes the processed frames from the analytics service and encodes them to an mp4 to be saved into MinIO. I am using your Go bindings since the storage service is written in Go. I am using the ffmpeg muxer example (muxing.c) as a template for my implementation.

Here is my code:

1.) Initialize the encoder:

type Encoder struct {
	codec    	*avcodec.Codec
	codecCtx 	*avcodec.Context
	cid       	avcodec.CodecId
	codecParams *avcodec.CodecParameters
	fmtCtx   	*avformat.Context 
	outFmt   	*avformat.OutputFormat
	stream   	*avformat.Stream
	ctxAvIO 	*avformat.AvIOContext
	imgFrame 	*avutil.Frame
	Dict        *avutil.Dictionary
	pixFmt   	avutil.PixelFormat
	frameNum 	int64 // Number of frames encoded
	height   	int
	width    	int
}

	// Init encoder.
func (enc *Encoder) init() ( int) {
	enc.cid    = avcodec.CodecId(avcodec.AV_CODEC_ID_H264)
	enc.pixFmt = avutil.PixelFormatFromString("yuv420p")
	enc.height = 960
	enc.width  = 540
	enc.frameNum = 0
	encodeStop = false

	var opts *avutil.Dictionary // options.
	avutil.AvDictSet(&opts, "title", "FFMPEG Test", 0)

	// Set output format based on filename - mp4.
	if enc.outFmt = avformat.AvGuessFormat("", fileName, ""); enc.outFmt == nil {
		glog.Errorf("Failed to create out.mp4")
		return -1
	} 
	
	// Allocate output format context.
	if retErr := avformat.AvformatAllocOutputContext2(&enc.fmtCtx, enc.outFmt, "", fileName); retErr < 0 {
		glog.Errorf("Failed to allocate output format context")
		return -1
		
	}

	if ret := enc.addStream(); ret < 0{
		glog.Errorf("Failed in adding video stream")
		return -1
	}

	// Open the encoder device.
	if retErr := enc.codecCtx.AvcodecOpen2(enc.codec, &opts); retErr < 0 {
		glog.Errorf("Failed to open encoder device, error value: %s", avutil.AvStrerr(retErr))
		return -1
	} else {
		glog.Infof("Success in opening encoder device")
	}

	// Alloc avframe - yuv420.
	enc.imgFrame = enc.allocImage(enc.codecCtx.PixFmt(), enc.codecCtx.Width(), enc.codecCtx.Height())
	if enc.imgFrame == nil {
		glog.Errorf("Failed to allocate image")
		return -1
	}

		// Fill the parameters struct based on the values from the supplied codec context. 
	if retErr := avcodec.AvcodecParametersFromContext(enc.stream.CodecParameters(), enc.codecCtx); retErr < 0 {
		glog.Errorf("Failed to set codec params struct. Error value: %s", avutil.AvStrerr(retErr))
		return -1
	} 
	
	glog.Infof("Output stream format: \n")
	enc.fmtCtx.AvDumpFormat(0, fileName, 1)

		// Open media stream output file. Make sure directory is not read only!
	if enc.fmtCtx.Flags()&avformat.AVFMT_NOFILE == 0 {
		if retErr := avformat.AvIOOpen(&enc.ctxAvIO, "/home/user/saved_videos/" + fileName, avformat.AVIO_FLAG_WRITE); retErr < 0 {
			glog.Infof("Failed to open %s for writing encoded frames. Err: %s", fileName, avutil.AvStrerr(retErr))
			return -1
		} 
		enc.fmtCtx.SetPb(enc.ctxAvIO)
	}
	
		// Allocate the stream private data in the AVDictionary object and write the stream header to an output media file.
	if retErr := enc.fmtCtx.AvformatWriteHeader(&opts); retErr < 0 {
		glog.Errorf("Failed to write header data: %s", avutil.AvStrerr(retErr))
		return -1
	}


	return 0
} // END Init encoder.

1a.) addSream function:

func (enc *Encoder) addStream() ( int) {

	// Find the H264 encoder.
	enc.codec = avcodec.AvcodecFindEncoder(enc.cid) 
	if enc.codec == nil {
		glog.Errorf("Failed to find encoder: %s", avcodec.AvcodecGetName(enc.cid) )
		return -1
	} else {
		glog.Infof("Success in finding encoder: %s",  avcodec.AvcodecGetName(enc.cid))
	}

		// Create output stream.
	if enc.stream = enc.fmtCtx.AvformatNewStream(nil); enc.stream == nil {
		glog.Errorf("Failed to create a new media output stream")
		return -1
	}

		// Allocate the encoder context.
	enc.codecCtx = enc.codec.AvcodecAllocContext3()
	if enc.codecCtx == nil {
		glog.Errorf("Failed to allocate an encoding context")
		return -1
	} else  {
		glog.Infof("Success in allocating encoding context")
	}

		// For now handle video.
	switch codecType := enc.codecCtx.CodecType(); codecType {
		case avutil.AVMEDIA_TYPE_VIDEO:
			// set the encoder params.
			var bitrate int64
			bitrate = 150
			enc.codecCtx.SetBitRate(bitrate * 1000)
			enc.codecCtx.SetWidth(enc.width) 
			enc.codecCtx.SetHeight(enc.height)
			enc.codecCtx.SetTimeBase(avutil.NewRational(1,20)) 
			enc.codecCtx.SetSampleAspectRatio(avutil.NewRational(1,1))
			enc.codecCtx.SetFramerate(avutil.NewRational(20,1))
			enc.codecCtx.SetGopSize(12)
			enc.codecCtx.SetMaxBFrames(2)
			enc.codecCtx.SetPixFmt(enc.pixFmt)
		case avutil.AVMEDIA_TYPE_AUDIO:
			glog.Infof("Muxing audio into mp4 not yet supported")
		default:
			glog.Infof("Encoder does not have codec type: %v", enc.codecCtx.CodecType)
			return -1
	}

		// Some formats want stream headers to be separate.
	if enc.fmtCtx.Flags()&avformat.AVFMT_GLOBALHEADER == 1 {
		fmtFlag := enc.codecCtx.Flags() | avcodec.AV_CODEC_FLAG_GLOBAL_HEADER
		enc.codecCtx.SetFlags(fmtFlag)
	}
	glog.Infof("Success in adding stream!")
	return 0
}

1b.) AllocImage function:

func (enc *Encoder) allocImage(pformat avutil.PixelFormat, width int, height int ) (*avutil.Frame) {
	var img *avutil.Frame
	img = avutil.AvFrameAlloc()
	img.SetFormat(int(pformat))
	img.SetWidth(width)
	img.SetHeight(height)

	// Alloc frame data buffers. 
	if retErr := avutil.AvFrameGetBuffer(img, 0); retErr < 0 {
		glog.Errorf("Failed to allocate video buffers. Error value: %s", avutil.AvStrerr(retErr))
		return nil
	}
		
	return img
}

2.) Encode function:

	// Main encode function.
func (enc *Encoder) EncodeFrame(Frame []byte) (int) {

	if encodeStop == true {
		return 1
	}

		// Encode and write to mp4 container.
	if ret := enc.writeFrame(setFrame(enc, Frame)); ret == 1 {
		glog.Infof("Finished encoding - EOF")
		encodeStop = true
	} else if ret < 0 {
		return -1
	}
	
	if encodeStop == true {
			// Write the stream trailer once encoding is finished finalizing the output file.
		if retErr := enc.fmtCtx.AvWriteTrailer(); retErr < 0 {
			glog.Errorf("Failed to write trailer. Error value: %s ", avutil.AvStrerr(retErr))
		}
			// Close codec.
		enc.closeStream()

			// Close output stream file.
		if enc.fmtCtx.Flags()&avformat.AVFMT_NOFILE == 0 {
			if retErr := avformat.AvIOClosep(&enc.ctxAvIO); retErr < 0 {
				glog.Errorf("Failed to close output stream. Error value: %s", avutil.AvStrerr(retErr))
			}
		}
		enc.fmtCtx.AvformatFreeContext()
	}
	return 0
} // END EncodeFrame.

2a.) setFrame function:

func setFrame(enc *Encoder, Frame []byte) *avutil.Frame {
	// Check if we encoded max number of frames.
	if enc.frameNum > 90 {
		return nil
	}

		// Ensure that the frame data is writable, avoiding data copy if possible. 
	if avret := avutil.AvFrameMakeWritable(enc.imgFrame); avret < 0 {
		glog.Errorf("failed to make avframe writable")
		os.Exit(1)
	}


	// Fill AVFrame with image data.
	avret := avutil.AvImageFillArrays([8]*uint8{enc.imgFrame.Data()},
									[8]int32{int32(enc.imgFrame.Linesize())},
									Frame,
									enc.codecCtx.PixFmt(),
									enc.imgFrame.Width(),
									enc.imgFrame.Height(),
									1)

	if avret < 0 {
		glog.Errorf("Failed to fill AVFrame: %s",  avutil.AvStrerr(avret))
		os.Exit(1)
	}

	enc.imgFrame.SetPts(enc.frameNum)
	glog.Infof("Number of frames encoded: %d", enc.frameNum)
	enc.frameNum++
	return enc.imgFrame
}

2b.) writeFrame function:

func (enc *Encoder) writeFrame(frame *avutil.Frame) (int) {
	// Send frame to encoder.
	var avret int
	if avret = avcodec.AvcodecSendFrame( enc.codecCtx, frame); avret < 0 {
		glog.Errorf("Failed to send frame to encoder: %s",  avutil.AvStrerr(avret))
		return -1
	}
	// Get encoded frames in Av packets.
	for {
		pkt := avcodec.AvPacketAlloc()	

		avret = avcodec.AvcodecReceivePacket(enc.codecCtx, pkt )
		if avret == avutil.AVERROR_EAGAIN || avret == avutil.AVERROR_EOF {
			break
		} else if avret < 0 {
			glog.Errorf("Error encoding frame: %s", avutil.AvStrerr(avret))
			return -1
		}

		// Rescale output packet timestamp values from codec to stream timebase.
		pkt.AvPacketRescaleTs(enc.codecCtx.TimeBase(), enc.stream.TimeBase())
		pkt.SetStreamIndex(enc.stream.Index())

		// Write compressed frames to media file.
		avret = enc.fmtCtx.AvInterleavedWriteFrame( (*avformat.Packet)(unsafe.Pointer(pkt)) ) 
		pkt.AvPacketUnref()
		if avret < 0 {
			glog.Errorf("Failed to write frame to media output stream: %s", avutil.AvStrerr(avret))
			return -1		
		}
	}

	// Check for EOF.
	if avret == avutil.AVERROR_EOF {
		return 1
	} else {
		return 0
	}
}

2d.) closeStream function:

	// Free resources.
func (enc *Encoder) closeStream() {
	avcodec.AvcodecFreeContext(enc.codecCtx) 
	avutil.AvFrameFree(enc.imgFrame) 		
	enc.codecCtx.AvcodecClose()
}

The code seems to work as there are no runtime errors. However, i am at a loss as to why I am getting green output when attempting to play the newly created mp4 file. I would appreciate any suggestions, feedback or help.

Kind Regards.

Quentin Renard · Answer 1 · Mon Dec 07 2020 17:25:38 GMT+0800 (China Standard Time)

I think you have to use av_image_copy after using av_image_fill_arrays otherwise it just initializes buffers and that's why you're getting a green image.

Unfortunately av_image_copy is not implemented in the lib, but you can fork the project, copy what AvImageFillArrays does and submit a PR once you got your code working.

ymosleh · Answer 2 · Tue Dec 08 2020 14:16:13 GMT+0800 (China Standard Time)

Thank you, let me give your suggestion a try and see how it goes.

ymosleh · Answer 3 · Wed Dec 09 2020 00:30:21 GMT+0800 (China Standard Time)

Hello,
Thank you again for your help, I added the av_image_copy Go bindings, but I am getting a seg fault when I try to use it. Here is the implementation in picture.go:

// Copy image in src_data to dst_data. 
func AvImageCopy(dstData [8]*uint8, dstLinesize [8]int32, srcData [8]*uint8, srcLinesize [8]int32, pixFmt PixelFormat, width, height int ) {
	cDstData 	 := (**C.uint8_t)(unsafe.Pointer(&dstData[0]))
	cDstLinesize := (*C.int)(unsafe.Pointer(&dstLinesize[0]))
	cSrcData 	 := (**C.uint8_t)(unsafe.Pointer(&srcData[0]))
	cSrcLinesize := (*C.int)(unsafe.Pointer(&srcLinesize[0]))
	cPixFmt      := (C.enum_AVPixelFormat)(pixFmt)

	C.av_image_copy(cDstData, cDstLinesize, cSrcData, cSrcLinesize, cPixFmt, C.int(width), C.int(height))
}

Here is how I am calling the function:

	numBytes := avutil.AvImageGetBufferSize(enc.codecCtx.PixFmt(), enc.codecCtx.Width(), enc.codecCtx.Height(), 1)
	buffer   := avutil.AvAllocateImageBuffer(numBytes)

	// Copy frame bytes to buffer to avoid 'cgo arg has Go pointer to Go pointer' when passing Frame []byte to AvImageCopy.
	C.memcpy(unsafe.Pointer(&buffer[0]), unsafe.Pointer(&Frame[0]), C.size_t(numBytes)) 

	// Init frame buffers.
	avret := avutil.AvImageFillArrays([8]*uint8{enc.imgFrame.Data()},
							[8]int32{int32(enc.imgFrame.Linesize())},
							buffer,
							enc.codecCtx.PixFmt(),
							enc.imgFrame.Width(),
							enc.imgFrame.Height(),
							1)
       if avret < 0 {
	   glog.Errorf("Failed to fill AVFrame: %s",  avutil.AvStrerr(avret))
	   os.Exit(1)
	}

	// Copy frame data. Seg fault happens on this function call.
	avutil.AvImageCopy([8]*uint8{enc.imgFrame.Data()}, 	     // Destination data
			   [8]int32{int32(enc.imgFrame.Linesize())}, // Destination linesize
			   [8]*uint8{&buffer[0]},                    // Source data
			   [8]int32{int32(enc.imgFrame.Linesize())}, // Can I re-use destination linesize as src linesize?
			   enc.codecCtx.PixFmt(),
			   enc.imgFrame.Width(),
			   enc.imgFrame.Height())

I am wondering if it is a problem to use the same linesize array as the destination and source argument for AvImageCopy? Anything else I may be missing?

Quentin Renard · Answer 4 · Wed Dec 09 2020 16:40:14 GMT+0800 (China Standard Time)

In all honesty, I have doubts regarding the following signature

func AvImageFillArrays(dstData [8]*uint8, dstLinesize [8]int32, src []uint8, pixFmt PixelFormat, width, height, align int) int

It was implemented in the parent fork of this project but I've never tested it.

However, I recently added something similar for audio but did it successfully another way, maybe you could replicate that behavior instead.

Check out this commit and see how I'm inputing **uint8 instead of [8]*uint8 in the function. I use it like this (f being a *avutil.Frame):

avutil.AvSamplesAlloc(f.DataItem(0), f.LinesizePtr(), 2, 960, avutil.AV_SAMPLE_FMT_S16, 0)

I think you should use something similar instead of something like [8]*uint8{enc.imgFrame.Data()}

ymosleh · Answer 5 · Thu Dec 10 2020 11:29:55 GMT+0800 (China Standard Time)

I made the changes based on your feedback. However, I am still observing a seg fault.
Here are the changes I made:

picture.go:

func AvImageCopy(dstData **uint8, dstLinesize *int, srcData **uint8, srcLinesize *int, pixFmt PixelFormat, width, height int ) {
	cDstData 	 := (**C.uint8_t)(unsafe.Pointer(dstData))
	cDstLinesize := (*C.int)(unsafe.Pointer(dstLinesize))
	cSrcData 	 := (**C.uint8_t)(unsafe.Pointer(srcData))
	cSrcLinesize := (*C.int)(unsafe.Pointer(srcLinesize))
	cPixFmt      := (C.enum_AVPixelFormat)(pixFmt)

	C.av_image_copy(cDstData, cDstLinesize, cSrcData, cSrcLinesize, cPixFmt, C.int(width), C.int(height))
}

func AvImageFillArrays(dstData **uint8, dstLinesize *int, src *uint8,
	pixFmt PixelFormat, width, height, align int) int {
	cData := (**C.uint8_t)(unsafe.Pointer(dstData))
	cLinesize := (*C.int)(unsafe.Pointer(dstLinesize))
	cSrc := (*C.uint8_t)(unsafe.Pointer(src))
	cPixFmt := (C.enum_AVPixelFormat)(pixFmt)

	return int(C.av_image_fill_arrays(cData, cLinesize, cSrc, cPixFmt, C.int(width), C.int(height), C.int(align)))
}

Here is how I am using these functions:

	numBytes := uintptr(avutil.AvImageGetBufferSize(enc.codecCtx.PixFmt(), enc.codecCtx.Width(), enc.codecCtx.Height(), 1))
	buffer   := avutil.AvMalloc(numBytes)
	glog.Infof("TEST - Number of bytes: %d", numBytes)

	// Copy raw frame bytes to buffer to avoid 'cgo arg has Go pointer to Go pointer' when sending frames to AvImageCopy.
	C.memcpy(buffer, unsafe.Pointer(&Frame[0]), C.size_t(numBytes)) 

	// Init frame buffers.
	avret := avutil.AvImageFillArrays(enc.imgFrame.DataItem(0),
					 enc.imgFrame.LinesizePtr(),
					 (*uint8)(buffer),
					 enc.codecCtx.PixFmt(),
					 enc.imgFrame.Width(),
					 enc.imgFrame.Height(),
                                          1)

	if avret < 0 {
		glog.Errorf("Failed to fill AVFrame: %s",  avutil.AvStrerr(avret))
		os.Exit(1)
	}

	var tmpFrame *avutil.Frame
	tmpFrame = avutil.AvFrameAlloc()
	tmpFrame.SetFormat(int(enc.codecCtx.PixFmt()))
	tmpFrame.SetWidth(enc.imgFrame.Width())
	tmpFrame.SetHeight(enc.imgFrame.Height())

	// Alloc frame data buffers. 
	if retErr := avutil.AvFrameGetBuffer(tmpFrame, 0); retErr < 0 {
		glog.Errorf("Failed to allocate video buffers. Error value: %s", avutil.AvStrerr(retErr))
		return nil
	}
	glog.Infof("yuv420 frame linesize: %d", tmpFrame.Linesize())

	// Copy data 
	avutil.AvImageCopy(enc.imgFrame.DataItem(0), 			 
				enc.imgFrame.LinesizePtr(), 			
				(**uint8)(unsafe.Pointer(buffer)),    
				tmpFrame.LinesizePtr(), 				 
				enc.codecCtx.PixFmt(),
				enc.imgFrame.Width(),
				enc.imgFrame.Height())

Thanks again for the help.

Quentin Renard · Answer 6 · Thu Dec 10 2020 16:24:43 GMT+0800 (China Standard Time)

I'd try this workflow instead:

f := avutil.AvFrameAlloc()
avutil.AvFrameGetBuffer(f, 0)
avutil.AvImageAlloc(f.DataItem(0), f.LinesizePtr(), [...]) (signature of avutil.AvImageAlloc would have to be changed as well)
avutil.AvImageCopy(enc.imgFrame.DataItem(0), enc.imgFrame.LinesizePtr(), f.DataItem(0), f.LinesizePtr(), [...])

It uses AvImageAlloc instead of AvImageFillArrays which is only a subset of AvImageAlloc. It also doesn't use buffer anymore and make sure that f.DataItem(0) is used in avutil.AvImageCopy instead of (**uint8)(unsafe.Pointer(buffer)).

ymosleh · Answer 7 · Fri Dec 11 2020 15:16:17 GMT+0800 (China Standard Time)

Hello and thanks again for your response.

Unless I am missing something, it looks like that f *avutil.Frame does not have any frame data associated with it. I see that we allocate f and then we copy f to enc.ImgFrame but I do not see how the frame data is set for f.DataItem(0)?

Currently, I receive a frame as a byte array and I want to copy this frame data into the enc.imgFrame.DataItem(0). What is the best way to accomplish this without using a buffer?

Quentin Renard · Answer 8 · Fri Dec 11 2020 16:39:11 GMT+0800 (China Standard Time)

OK I understand better your use case.

Question is, what is in your Frame? You say []byte but where is it coming from ?

As you said, key is to transform this []byte into a valid **uint8_t that libav can understand. I never had this use case.

ymosleh · Answer 9 · Sat Dec 12 2020 02:36:47 GMT+0800 (China Standard Time)

The frames are coming from an mp4 file that is being read in by OpenCV in another process. The frames are actually already encoded to jpg format. After some further research I realize I was wasting cycles in re-encoding the frame data and instead just sent the frames to the muxer directly. This approach worked and I was able to generate the mp4 video with no problems during playback.

I just had to change the pixel format to AV_PIX_FMT_YUVJ420P and use the MJPEG codec. Here is the working code snippet of setting Frame []byte to *uiint8:

func (enc *Encoder) writeFrames2pkts(Frame []byte) (int) {
	pkt      := avcodec.AvPacketAlloc()
	numBytes := uintptr(avutil.AvImageGetBufferSize(enc.codecCtx.PixFmt(), enc.codecCtx.Width(), enc.codecCtx.Height(), 1))
	buffer   := avutil.AvMalloc(numBytes)

	frameSize := int(numBytes)
	pkt.SetSize(frameSize)

	C.memcpy(buffer, unsafe.Pointer(&Frame[0]), C.size_t(numBytes)) 
	bufferPtr := (*uint8)(unsafe.Pointer(buffer))

	pkt.SetData(bufferPtr)
	pkt.SetPts(enc.frameNum)
	pkt.SetDts(enc.frameNum)
	pkt.AvPacketRescaleTs(enc.codecCtx.TimeBase(), enc.stream.TimeBase())	
	pkt.SetStreamIndex(enc.stream.Index())

	avret := enc.fmtCtx.AvInterleavedWriteFrame( (*avformat.Packet)(unsafe.Pointer(pkt)) ) 
	pkt.AvPacketUnref()
	if avret < 0 {
		glog.Errorf("Failed to write frame to media output stream: %s", avutil.AvStrerr(avret))
		avutil.AvFree(buffer)
		return -1		
	}

	enc.frameNum++
	avutil.AvFree(buffer)

	return 0
}

Thank you so much for the help and for your work on the FFmpeg Go bindings!

Quentin Renard · Answer 10 · Sun Dec 13 2020 18:59:32 GMT+0800 (China Standard Time)

Cheers ❤️