diff --git a/subprojects/gst-plugins-base/gst-libs/gst/audio/gstaudioringbuffer.c b/subprojects/gst-plugins-base/gst-libs/gst/audio/gstaudioringbuffer.c
index 15de9c1d55..c231910584 100644
--- a/subprojects/gst-plugins-base/gst-libs/gst/audio/gstaudioringbuffer.c
+++ b/subprojects/gst-plugins-base/gst-libs/gst/audio/gstaudioringbuffer.c
@@ -40,6 +40,7 @@
 #include <string.h>
 
 #include <gst/audio/audio.h>
+#include <gst/audio/gstdsd.h>
 #include "gstaudioringbuffer.h"
 
 GST_DEBUG_CATEGORY_STATIC (gst_audio_ring_buffer_debug);
@@ -313,6 +314,60 @@ gst_audio_ring_buffer_parse_caps (GstAudioRingBufferSpec * spec, GstCaps * caps)
     gst_structure_get_int (structure, "channels", &info.channels);
     spec->type = GST_AUDIO_RING_BUFFER_FORMAT_TYPE_FLAC;
     info.bpf = 1;
+  } else if (g_str_equal (mimetype, GST_DSD_MEDIA_TYPE)) {
+
+    /* Notes about what the "rate" means in DSD:
+     *
+     * In DSD, "sample formats" don't actually exist. There is only the DSD bit;
+     * this is what could be considered the closest equivalent to a "sample format".
+     * But since it is impractical to deal with individual bits in software, the
+     * bits are typically grouped into words (8/16/32 bit words). These are the
+     * DSDU8, DSDU16LE etc. "grouping formats".
+     *
+     * The "rate" in DSD information refers to the number of DSD _bytes_ per second
+     * (not bits per second, because, as said, per-bit handling in software does
+     * not usually make sense). The way the GstAudioRingBuffer works however requires
+     * the rate to be interpreted as the number of DSD _words_ per minute. This is
+     * in part because that's how ALSA uses the rate.
+     *
+     * If the word format is DSDU8, then there's no difference to just using the
+     * original byte rate. But if for example it is DSDU16LE, then the ringbuffer's
+     * rate needs to be half of the rate from GstDsdInfo. For this reason, it is
+     * essential to divide the rate from the DSD info by the word length (in bytes).
+     *
+     * Furthermore, the BPF is set to the stride (= format width * num channels).
+     * The GstAudioRingBuffer can only handle interleaved DSD. This means that
+     * there is a "stride", that is, the DSD word of channel #1 is stored first,
+     * followed by the DSD word of channel #2 etc. and then again we get a DSD
+     * word from channel #1, and so forth. This is similar to how interleaved
+     * PCM works. The stride is then the size (in bytes) of the DSD words for
+     * each channel that are played at the same time. Using this as the BPF is
+     * very important. Otherweise, timestamp and duration figures can be off,
+     * the segment sizes may not be an integer multiple of the DSD stride, etc.
+     */
+
+    GstDsdInfo dsd_info;
+    guint format_width;
+
+    if (!gst_dsd_info_from_caps (&dsd_info, caps))
+      goto parse_error;
+
+    format_width = gst_dsd_format_get_width (dsd_info.format);
+
+    info.rate = dsd_info.rate / format_width;
+    info.channels = dsd_info.channels;
+    info.bpf = format_width * dsd_info.channels;
+
+    GST_INFO ("using DSD word rate %d instead of DSD byte rate %d "
+        "for ringbuffer", info.rate, dsd_info.rate);
+
+    memcpy (info.position, dsd_info.positions,
+        sizeof (GstAudioChannelPosition) * dsd_info.channels);
+
+    GST_AUDIO_RING_BUFFER_SPEC_DSD_FORMAT (spec) =
+        GST_DSD_INFO_FORMAT (&dsd_info);
+
+    spec->type = GST_AUDIO_RING_BUFFER_FORMAT_TYPE_DSD;
   } else {
     goto parse_error;
   }
@@ -655,13 +710,19 @@ gst_audio_ring_buffer_acquire (GstAudioRingBuffer * buf,
   g_free (buf->empty_seg);
   buf->empty_seg = g_malloc (segsize);
 
-  if (buf->spec.type == GST_AUDIO_RING_BUFFER_FORMAT_TYPE_RAW) {
-    gst_audio_format_info_fill_silence (buf->spec.info.finfo, buf->empty_seg,
-        segsize);
-  } else {
-    /* FIXME, non-raw formats get 0 as the empty sample */
-    memset (buf->empty_seg, 0, segsize);
+  switch (buf->spec.type) {
+    case GST_AUDIO_RING_BUFFER_FORMAT_TYPE_RAW:
+      gst_audio_format_info_fill_silence (buf->spec.info.finfo, buf->empty_seg,
+          segsize);
+      break;
+    case GST_AUDIO_RING_BUFFER_FORMAT_TYPE_DSD:
+      memset (buf->empty_seg, GST_DSD_SILENCE_PATTERN_BYTE, segsize);
+      break;
+    default:
+      /* FIXME, non-raw formats get 0 as the empty sample */
+      memset (buf->empty_seg, 0, segsize);
   }
+
   GST_DEBUG_OBJECT (buf, "acquired device");
 
 done:
diff --git a/subprojects/gst-plugins-base/gst-libs/gst/audio/gstaudioringbuffer.h b/subprojects/gst-plugins-base/gst-libs/gst/audio/gstaudioringbuffer.h
index f33ceb14c9..cde57cb457 100644
--- a/subprojects/gst-plugins-base/gst-libs/gst/audio/gstaudioringbuffer.h
+++ b/subprojects/gst-plugins-base/gst-libs/gst/audio/gstaudioringbuffer.h
@@ -27,6 +27,8 @@
 #ifndef __GST_AUDIO_RING_BUFFER_H__
 #define __GST_AUDIO_RING_BUFFER_H__
 
+#include <gst/audio/gstdsdformat.h>
+
 G_BEGIN_DECLS
 
 #define GST_TYPE_AUDIO_RING_BUFFER             (gst_audio_ring_buffer_get_type())
@@ -88,6 +90,7 @@ typedef enum {
  * @GST_AUDIO_RING_BUFFER_FORMAT_TYPE_MPEG2_AAC_RAW: samples in MPEG-2 AAC raw format (Since: 1.12)
  * @GST_AUDIO_RING_BUFFER_FORMAT_TYPE_MPEG4_AAC_RAW: samples in MPEG-4 AAC raw format (Since: 1.12)
  * @GST_AUDIO_RING_BUFFER_FORMAT_TYPE_FLAC: samples in FLAC format (Since: 1.12)
+ * @GST_AUDIO_RING_BUFFER_FORMAT_TYPE_DSD: samples in DSD format (Since: 1.24)
  *
  * The format of the samples in the ringbuffer.
  */
@@ -107,7 +110,8 @@ typedef enum
   GST_AUDIO_RING_BUFFER_FORMAT_TYPE_MPEG4_AAC,
   GST_AUDIO_RING_BUFFER_FORMAT_TYPE_MPEG2_AAC_RAW,
   GST_AUDIO_RING_BUFFER_FORMAT_TYPE_MPEG4_AAC_RAW,
-  GST_AUDIO_RING_BUFFER_FORMAT_TYPE_FLAC
+  GST_AUDIO_RING_BUFFER_FORMAT_TYPE_FLAC,
+  GST_AUDIO_RING_BUFFER_FORMAT_TYPE_DSD
 } GstAudioRingBufferFormatType;
 
 /**
@@ -121,8 +125,13 @@ typedef enum
  * @segtotal: the total number of segments
  * @seglatency: number of segments queued in the lower level device,
  *  defaults to segtotal
+ * @dsd_format: the #GstDsdFormat (Since: 1.24)
  *
  * The structure containing the format specification of the ringbuffer.
+ *
+ * When @type is GST_AUDIO_RING_BUFFER_FORMAT_TYPE_DSD, the @dsd_format
+ * is valid (otherwise it is unused). Also, when DSD is the sample type,
+ * only the rate, channels, position, and bpf fields in @info are populated.
  */
 struct _GstAudioRingBufferSpec
 {
@@ -152,8 +161,15 @@ struct _GstAudioRingBufferSpec
   gint     seglatency;          /* number of segments queued in the lower
 				 * level device, defaults to segtotal. */
 
-  /*< private >*/
-  gpointer _gst_reserved[GST_PADDING];
+  /* Union preserves padded struct size for backwards compat
+   * Consumer code should use the accessor macros for fields */
+  union {
+    struct { /* < skip > */
+      GstDsdFormat  dsd_format;
+    } abi;
+    /*< private >*/
+    gpointer _gst_reserved[GST_PADDING];
+  } ABI;
 };
 
 #define GST_AUDIO_RING_BUFFER_SPEC_FORMAT_TYPE(spec)   ((spec)->type)
@@ -163,6 +179,7 @@ struct _GstAudioRingBufferSpec
 #define GST_AUDIO_RING_BUFFER_SPEC_SEGSIZE(spec)       ((spec)->segsize)
 #define GST_AUDIO_RING_BUFFER_SPEC_SEGTOTAL(spec)      ((spec)->segtotal)
 #define GST_AUDIO_RING_BUFFER_SPEC_SEGLATENCY(spec)    ((spec)->seglatency)
+#define GST_AUDIO_RING_BUFFER_SPEC_DSD_FORMAT(spec)    ((spec)->ABI.abi.dsd_format)
 
 #define GST_AUDIO_RING_BUFFER_GET_COND(buf) (&(((GstAudioRingBuffer *)buf)->cond))
 #define GST_AUDIO_RING_BUFFER_WAIT(buf)     (g_cond_wait (GST_AUDIO_RING_BUFFER_GET_COND (buf), GST_OBJECT_GET_LOCK (buf)))