Floating sync with audio while 2 Basler daA1920-160uc hardware triggered by arduino

Question

Floating sync with audio while 2 Basler daA1920-160uc hardware triggered by arduino

AndreySibiryakov opened this issue 3 months ago · comments

Andrii Sybiriakov commented 3 months ago

Describe what you want to implement and what the issue & the steps to reproduce it are:

Two cameras are triggered by an Arduino pulse and are perfectly synchronized with each other.
Video is recorded at 30fps.
Audio is also recorded, and I'm clapping my hands to check for sync drift later.
I've recorded several 60-second clips with clapping at the beginning and end.
In Adobe Premiere, the audio and video clips are visually synced by the claps at the beginning, but the claps at the end can be 1-2 frames off.
Audio is recorded on a prosumer recorder, so there should be no issues there.
Please help me find the cause of the sync issue.
The code for Arduino and Python is provided below.

from pypylon import pylon
import cv2
import time
import serial
import numpy as np
import subprocess

class FFmpegWriter:
    def __init__(self, filename, fps, width, height):
        self.filename = filename
        self.fps = fps
        self.process = subprocess.Popen([
            'ffmpeg',
            '-y',
            '-f', 'rawvideo',
            '-vcodec', 'rawvideo',
            '-s', f'{width}x{height}',
            '-pix_fmt', 'gray',
            '-r', str(fps),
            '-i', '-',
            '-c:v', 'mjpeg',
            '-q:v', '2',
            '-movflags', '+faststart',
            '-an',
            filename
        ], stdin=subprocess.PIPE)


    def write(self, frame):
        self.process.stdin.write(frame.tobytes())

    def release(self, start_timecode):
        self.process.stdin.close()
        self.process.wait()


def add_custom_text(frame, text):
    # Define the text and position
    # text = "Your Custom Text"
    position = (10, 30)  # Position (x, y) at top left corner

    # Define font, scale, color, and thickness
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 1
    color = (255, 255, 255)  # White color in BGR
    thickness = 2

    # Add the text to the frame
    cv2.putText(frame, text, position, font, font_scale, color, thickness, cv2.LINE_AA)
    return frame


def rotate_image(image, angle):
    if angle == 90:
        return cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
    elif angle == -90:
        return cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
    else:
        return image


tlFactory = pylon.TlFactory.GetInstance()
devices = tlFactory.EnumerateDevices()
if len(devices) < 2:
    raise pylon.RuntimeException("At least two cameras are required.")

camera1 = pylon.InstantCamera(pylon.TlFactory.GetInstance().CreateDevice(devices[0]))
camera1.Open()
camera2 = pylon.InstantCamera(pylon.TlFactory.GetInstance().CreateDevice(devices[1]))
camera2.Open()

for camera in [camera1, camera2]:
    camera.LineSelector.SetValue("Line3")
    camera.LineMode.SetValue("Input")
    camera.TriggerSelector.SetValue("FrameStart")
    camera.TriggerSource.SetValue("Line3")
    camera.TriggerMode.SetValue("On")
    camera.TriggerActivation.SetValue("RisingEdge")
    camera.ExposureTime.SetValue(5000)
    camera.PixelFormat.SetValue("Mono8")

window_width = 600
window_height = 960
combined_width = window_width * 2
combined_height = window_height
cv2.namedWindow('Combined Camera Streams', cv2.WINDOW_NORMAL)
cv2.resizeWindow('Combined Camera Streams', combined_width, combined_height)

fourcc = cv2.VideoWriter_fourcc(*'MJPG')
out1 = FFmpegWriter('top.mov', 30.0, 1200, 1920)
out2 = FFmpegWriter('bot.mov', 30.0, 1200, 1920)

recording = False  # Start recording when "s" is pressed
num_frames1 = 0
num_frames2 = 0

ser = serial.Serial('COM4', 9600, timeout=2)
time.sleep(2)

initial_message = ser.readline().decode().strip()
print(f"Arduino says: {initial_message}")

ser.write('s\n'.encode())
time.sleep(0.1)
response = ser.readline().decode().strip()
print(f"Arduino response: {response}")

print('Press "r" to start acquisition, "s" to stop acquisition')

camera1.StartGrabbing(pylon.GrabStrategy_OneByOne)
camera2.StartGrabbing(pylon.GrabStrategy_OneByOne)

try:
    while camera1.IsGrabbing() and camera2.IsGrabbing():
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            print("Quit command received.")
            break

        grab1 = camera1.RetrieveResult(1000, pylon.TimeoutHandling_ThrowException)
        grab2 = camera2.RetrieveResult(1000, pylon.TimeoutHandling_ThrowException)

        if grab1.GrabSucceeded() and grab2.GrabSucceeded():
            frame1 = grab1.Array
            frame2 = grab2.Array

            if recording:
                cv2_message = 'Recording Now'
                out1.write(rotate_image(frame1, -90))
                out2.write(rotate_image(frame2, 90))
                num_frames1 += 1
                num_frames2 += 1
            else:
                cv2_message = 'Preview Only, Not Recording'

            scale_factor = 0.5
            resized_frame_1 = cv2.resize(rotate_image(frame1, -90), None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_AREA)
            resized_frame_2 = cv2.resize(rotate_image(frame2, 90), None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_AREA)
            combined_frame = np.hstack((resized_frame_1, resized_frame_2))
            combined_frame = add_custom_text(combined_frame, cv2_message)
            cv2.imshow('Combined Camera Streams', combined_frame)

        if key == ord('r'):
            out1 = FFmpegWriter('top.mov', 30.0, 1200, 1920)
            out2 = FFmpegWriter('bot.mov', 30.0, 1200, 1920)
            cv2.imshow('Combined Camera Streams', combined_frame)
            recording = True
            start_time = time.time()

        elif key == ord('s'):
            recording = False
            end_time = time.time()
            elapsed_time = end_time - start_time
            frameRate = num_frames1 / elapsed_time
            print(f"Recording finished. Elapsed time: {elapsed_time:.2f} seconds")
            print(f"Number of frames acquired (Camera 1): {num_frames1}")
            print(f"Number of frames acquired (Camera 2): {num_frames2}")
            print(f"Frame rate is: {frameRate:.2f} fps")
            num_frames1 = 0
            num_frames2 = 0
            out1.release()
            out2.release()


except KeyboardInterrupt:
    print('Interrupted by user.')

finally:
    print('Cleaning up...')
    camera1.StopGrabbing()
    camera1.Close()
    cv2.destroyAllWindows()
    ser.write('e\n'.encode())  # Send stop command to Arduino
    time.sleep(0.1)
    stop_response = ser.readline().decode().strip()
    print(f"Arduino stop response: {stop_response}")
    ser.close()
    # stop_read_ltc()
    if recording:
        out1.release()
        out2.release()
    print('Done.')

#include <TimerOne.h>

const int triggerPin = 2;  // Make sure this matches your hardware connection
const unsigned long pulseWidth = 500; // microseconds (adjust based on camera documentation)
const unsigned long frameInterval = 33333; // Frame interval for 60 fps in microseconds 16667, for 30 fps is 33334

volatile bool generateTriggers = false;

void setup() {
  pinMode(triggerPin, OUTPUT);
  digitalWrite(triggerPin, LOW);

  Serial.begin(9600);
  
  Timer1.initialize(frameInterval); // Initialize Timer1 with the frame interval
  Timer1.attachInterrupt(triggerPulse); // Attach the interrupt service routine

  Serial.println("Setup complete. Send 's' to start or 'e' to stop triggering.");
}

void loop() {
  if (Serial.available() > 0) {
    char command = Serial.read();
    if (command == 's') {
      generateTriggers = true;
      Serial.println("Trigger generation started.");
    } else if (command == 'e') {
      generateTriggers = false;
      digitalWrite(triggerPin, LOW);
      Serial.println("Trigger generation stopped.");
    }
  }
}

void triggerPulse() {
  if (generateTriggers) {
    digitalWrite(triggerPin, HIGH);
    delayMicroseconds(pulseWidth);
    digitalWrite(triggerPin, LOW);
  }
}

Is your camera operational in Basler pylon viewer on your platform

Yes

Hardware setup & camera model(s) used

cameras: Basler daA1920-160uc
connection: 1.5m usb cables
system: Windows 10
cpu: i5 10400f
gpu: 1080
ssd: Samsung 980

Runtime information:

python: 3.11.3 (tags/v3.11.3:f3909b8, Apr  4 2023, 23:49:59) [MSC v.1934 64 bit (AMD64)]
platform: win32/AMD64/10
pypylon: 3.0.1 / 7.4.0.38864

Thies Möller · Answer 1 · Fri Jul 05 2024 14:58:59 GMT+0800 (China Standard Time)

Some thoughts to your detailed description (thanks):

You start the Arduino before the cameras.
So there will be trigger pulses all the time.
As you start the cameras sequentially, the first camera might already have one or two frames captured.

Recommendation is to activate the cameras first then send serial command to Arduino to start the trigger.

You don't release the grab results, this will leave you without buffers after a while.

The CV.waitkey should be after releasing the buffers so you fully service the grab engine as fast as possible.

The crystal on your Arduino is possibly way more off than your prosumer recording equipment. https://jorisvr.nl/article/arduino-frequency#:~:text=Frequency%20stability&text=The%20crystal%20is%20quite%20stable,resonator%20is%20much%20more%20variable.

To debug, you can record the timestamps of the images grab1.TimeStamp and compare the diff of last and first timestamp to the time of your prosumer time

Andrii Sybiriakov · Answer 2 · Fri Jul 05 2024 15:31:48 GMT+0800 (China Standard Time)

Thank you for the help, Thies!