osxaudio: Interpolate clock by counting elapsed time since render calls

When advancing the ringbuffer, store the processed CoreAudio sample time, then interpolate the clock in the _get_delay() calls to smooth the clock. CoreAudio's "latency" report is always a constant and otherwise leads to the clock generating a latency-time staircase. Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/5140>
2025-02-16 19:25:18 +00:00 · 2023-08-03 17:46:57 +10:00 · 2023-08-03 17:46:57 +10:00 · 461f943b52
commit 461f943b52
parent e22c7fb3e4
4 changed files with 123 additions and 1 deletions
--- a/subprojects/gst-plugins-good/sys/osxaudio/gstosxaudiosink.c
+++ b/subprojects/gst-plugins-good/sys/osxaudio/gstosxaudiosink.c
@ -544,7 +544,13 @@ gst_osx_audio_sink_io_proc (GstOsxAudioRingBuffer * buf,
      gst_audio_ring_buffer_clear (GST_AUDIO_RING_BUFFER (buf), readseg);

      /* we wrote one segment */
+      CORE_AUDIO_TIMING_LOCK (buf->core_audio);
      gst_audio_ring_buffer_advance (GST_AUDIO_RING_BUFFER (buf), 1);
+      /* FIXME: Update the timestamp and reported frames in smaller increments
+       * when the segment size is larger than the total inNumberFrames */
+      gst_core_audio_update_timing (buf->core_audio, inTimeStamp,
+          inNumberFrames);
+      CORE_AUDIO_TIMING_UNLOCK (buf->core_audio);

      buf->segoffset = 0;
    }
--- a/subprojects/gst-plugins-good/sys/osxaudio/gstosxaudiosrc.c
+++ b/subprojects/gst-plugins-good/sys/osxaudio/gstosxaudiosrc.c
@ -395,7 +395,13 @@ gst_osx_audio_src_io_proc (GstOsxAudioRingBuffer * buf,
          writeseg, ts);

      /* we wrote one segment */
+      CORE_AUDIO_TIMING_LOCK (buf->core_audio);
      gst_audio_ring_buffer_advance (GST_AUDIO_RING_BUFFER (buf), 1);
+      /* FIXME: Update the timestamp and reported frames in smaller increments
+       * when the segment size is larger than the total inNumberFrames */
+      gst_core_audio_update_timing (buf->core_audio, inTimeStamp,
+          inNumberFrames);
+      CORE_AUDIO_TIMING_UNLOCK (buf->core_audio);

      buf->segoffset = 0;
    }
--- a/subprojects/gst-plugins-good/sys/osxaudio/gstosxcoreaudio.c
+++ b/subprojects/gst-plugins-good/sys/osxaudio/gstosxcoreaudio.c
@ -35,10 +35,20 @@ G_DEFINE_TYPE (GstCoreAudio, gst_core_audio, G_TYPE_OBJECT);
 #include "gstosxcoreaudiohal.c"
 #endif

+static void
+gst_core_audio_finalize (GObject * object)
+{
+  GstCoreAudio *core_audio = GST_CORE_AUDIO (object);
+  g_mutex_clear (&core_audio->timing_lock);
+
+  G_OBJECT_CLASS (gst_core_audio_parent_class)->finalize (object);
+}

 static void
 gst_core_audio_class_init (GstCoreAudioClass * klass)
 {
+  GObjectClass *object_klass = G_OBJECT_CLASS (klass);
+  object_klass->finalize = gst_core_audio_finalize;
 }

 static void
@ -54,6 +64,8 @@ gst_core_audio_init (GstCoreAudio * core_audio)
  core_audio->hog_pid = -1;
  core_audio->disabled_mixing = FALSE;
 #endif
+
+  g_mutex_init (&core_audio->timing_lock);
 }

 static gboolean
@ -202,8 +214,94 @@ gboolean
 gst_core_audio_get_samples_and_latency (GstCoreAudio * core_audio,
    gdouble rate, guint * samples, gdouble * latency)
 {
-  return gst_core_audio_get_samples_and_latency_impl (core_audio, rate,
+  uint64_t now_ns = AudioConvertHostTimeToNanos (AudioGetCurrentHostTime ());
+  gboolean ret = gst_core_audio_get_samples_and_latency_impl (core_audio, rate,
      samples, latency);
+
+  if (!ret)
+    return FALSE;
+
+  CORE_AUDIO_TIMING_LOCK (core_audio);
+
+  uint32_t samples_remain = 0;
+  uint64_t anchor_ns =
+      AudioConvertHostTimeToNanos (core_audio->anchor_hosttime);
+
+  if (core_audio->is_src) {
+    int64_t captured_ns =
+        core_audio->rate_scalar * (int64_t) (now_ns - anchor_ns);
+
+    /* src, the anchor time is the timestamp of the first sample in the last
+     * packet received, and we increment up from there, unless the device gets stopped. */
+    if (captured_ns > 0) {
+      if (core_audio->io_proc_active) {
+        samples_remain = (uint32_t) (captured_ns * rate / GST_SECOND);
+      } else {
+        samples_remain = core_audio->anchor_pend_samples;
+      }
+    } else {
+      /* Time went backward. This shouldn't happen for sources, but report something anyway */
+      samples_remain =
+          (uint32_t) (-captured_ns * rate / GST_SECOND) +
+          core_audio->anchor_pend_samples;
+    }
+
+    GST_DEBUG_OBJECT (core_audio,
+        "now_ns %" G_GUINT64_FORMAT " anchor %" G_GUINT64_FORMAT " elapsed ns %"
+        G_GINT64_FORMAT " rate %f captured_ns %" G_GINT64_FORMAT
+        " anchor_pend_samples %u samples_remain %u", now_ns, anchor_ns,
+        now_ns - anchor_ns, rate, captured_ns, core_audio->anchor_pend_samples,
+        samples_remain);
+  } else {
+    /* Sink, the anchor time is the time the most recent buffer will commence play out,
+     * and we count down to 0 for unplayed samples beyond that */
+    int64_t unplayed_ns =
+        core_audio->rate_scalar * (int64_t) (anchor_ns - now_ns);
+    if (unplayed_ns > 0) {
+      samples_remain =
+          (uint32_t) (unplayed_ns * rate / GST_SECOND) +
+          core_audio->anchor_pend_samples;
+    } else {
+      uint32_t samples_played = (uint32_t) (-unplayed_ns * rate / GST_SECOND);
+      if (samples_played < core_audio->anchor_pend_samples) {
+        samples_remain = core_audio->anchor_pend_samples - samples_played;
+      }
+    }
+
+    GST_DEBUG_OBJECT (core_audio,
+        "now_ns %" G_GUINT64_FORMAT " anchor %" G_GUINT64_FORMAT " elapsed ns %"
+        G_GINT64_FORMAT " rate %f unplayed_ns %" G_GINT64_FORMAT
+        " anchor_pend_samples %u", now_ns, anchor_ns, now_ns - anchor_ns, rate,
+        unplayed_ns, core_audio->anchor_pend_samples);
+  }
+
+  CORE_AUDIO_TIMING_UNLOCK (core_audio);
+
+  GST_DEBUG_OBJECT (core_audio, "samples = %u latency %f", samples_remain,
+      *latency);
+
+  *samples = samples_remain;
+  return TRUE;
+}
+
+void
+gst_core_audio_update_timing (GstCoreAudio * core_audio,
+    const AudioTimeStamp * inTimeStamp, unsigned int inNumberFrames)
+{
+  AudioTimeStampFlags target_flags =
+      kAudioTimeStampSampleHostTimeValid | kAudioTimeStampRateScalarValid;
+
+  if ((inTimeStamp->mFlags & target_flags) == target_flags) {
+    core_audio->anchor_hosttime = inTimeStamp->mHostTime;
+    core_audio->anchor_pend_samples = inNumberFrames;
+    core_audio->rate_scalar = inTimeStamp->mRateScalar;
+
+    GST_DEBUG_OBJECT (core_audio,
+        "anchor hosttime_ns %" G_GUINT64_FORMAT
+        " scalar_rate %f anchor_pend_samples %u",
+        AudioConvertHostTimeToNanos (core_audio->anchor_hosttime),
+        core_audio->rate_scalar, core_audio->anchor_pend_samples);
+  }
 }

 gboolean
--- a/subprojects/gst-plugins-good/sys/osxaudio/gstosxcoreaudio.h
+++ b/subprojects/gst-plugins-good/sys/osxaudio/gstosxcoreaudio.h
@ -77,6 +77,9 @@ G_BEGIN_DECLS
 typedef struct _GstCoreAudio GstCoreAudio;
 typedef struct _GstCoreAudioClass GstCoreAudioClass;

+#define CORE_AUDIO_TIMING_LOCK(core_audio) (g_mutex_lock(&(core_audio->timing_lock)))
+#define CORE_AUDIO_TIMING_UNLOCK(core_audio) (g_mutex_unlock(&(core_audio->timing_lock)))
+
 struct _GstCoreAudio
 {
  GObject object;
@ -107,6 +110,11 @@ struct _GstCoreAudio
  AudioStreamBasicDescription original_format, stream_format;
  AudioDeviceIOProcID procID;
 #endif
+
+  GMutex timing_lock;
+  uint64_t anchor_hosttime;
+  uint32_t anchor_pend_samples;
+  float rate_scalar;
 };

 struct _GstCoreAudioClass
@ -143,6 +151,10 @@ gboolean gst_core_audio_get_samples_and_latency              (GstCoreAudio * cor
                                                              guint *samples,
                                                              gdouble *latency);

+void gst_core_audio_update_timing                            (GstCoreAudio * core_audio,
+                                                              const AudioTimeStamp * inTimeStamp,
+                                                              unsigned int inNumberFrames);
+
 void  gst_core_audio_set_volume                              (GstCoreAudio *core_audio,
                                                              gfloat volume);