mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-12-19 06:46:38 +00:00
opusenc: Encode exactly the amount of samples we got as input and put correct timestamps on it
The first frame has lookahead less samples, the last frame might have some padding or we might have to encode another frame of silence to get all our input into the encoded data. This is because of a) the lookahead at the beginning of the encoding, which shifts all data by that amount of samples and b) the padding needed to fill the very last frame completely. Ideally we would use LPC to calculate something better than silence for the padding to make the encoding as smooth as possible. With this we get exactly the same amount of samples again in an opusenc ! opusdec pipeline. https://bugzilla.gnome.org/show_bug.cgi?id=757153
This commit is contained in:
parent
c7d785a512
commit
f66afc47ac
2 changed files with 57 additions and 12 deletions
|
@ -412,6 +412,7 @@ gst_opus_enc_start (GstAudioEncoder * benc)
|
|||
|
||||
GST_DEBUG_OBJECT (enc, "start");
|
||||
enc->encoded_samples = 0;
|
||||
enc->consumed_samples = 0;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
@ -766,6 +767,7 @@ gst_opus_enc_setup (GstOpusEnc * enc)
|
|||
lookahead);
|
||||
|
||||
/* lookahead is samples, the Opus header wants it in 48kHz samples */
|
||||
enc->lookahead = enc->pending_lookahead = lookahead;
|
||||
lookahead = lookahead * 48000 / enc->sample_rate;
|
||||
|
||||
gst_opus_header_create_caps (&caps, NULL, lookahead, enc->sample_rate,
|
||||
|
@ -807,6 +809,7 @@ gst_opus_enc_sink_event (GstAudioEncoder * benc, GstEvent * event)
|
|||
}
|
||||
case GST_EVENT_SEGMENT:
|
||||
enc->encoded_samples = 0;
|
||||
enc->consumed_samples = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -899,13 +902,13 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
|
|||
GstClockTime duration;
|
||||
|
||||
guint max_payload_size;
|
||||
gint frame_samples;
|
||||
gint frame_samples, input_samples, output_samples;
|
||||
|
||||
g_mutex_lock (&enc->property_lock);
|
||||
|
||||
bytes = enc->frame_samples * enc->n_channels * 2;
|
||||
max_payload_size = enc->max_payload_size;
|
||||
frame_samples = enc->frame_samples;
|
||||
frame_samples = input_samples = enc->frame_samples;
|
||||
|
||||
g_mutex_unlock (&enc->property_lock);
|
||||
|
||||
|
@ -915,20 +918,23 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
|
|||
bsize = map.size;
|
||||
|
||||
if (G_UNLIKELY (bsize % bytes)) {
|
||||
gint64 diff;
|
||||
|
||||
GST_DEBUG_OBJECT (enc, "draining; adding silence samples");
|
||||
g_assert (bsize < bytes);
|
||||
|
||||
/* If encoding part of a frame, and we have no set stop time on
|
||||
* the output segment, we update the segment stop time to reflect
|
||||
* the last sample. This will let oggmux set the last page's
|
||||
* granpos to tell a decoder the dummy samples should be clipped.
|
||||
*/
|
||||
input_samples = bsize / (enc->n_channels * 2);
|
||||
segment = &GST_AUDIO_ENCODER_OUTPUT_SEGMENT (enc);
|
||||
if (!GST_CLOCK_TIME_IS_VALID (segment->stop)) {
|
||||
int input_samples = bsize / (enc->n_channels * 2);
|
||||
GST_DEBUG_OBJECT (enc,
|
||||
"No stop time and partial frame, updating segment");
|
||||
duration =
|
||||
gst_util_uint64_scale (enc->encoded_samples + input_samples,
|
||||
gst_util_uint64_scale_ceil (enc->consumed_samples + input_samples,
|
||||
GST_SECOND, enc->sample_rate);
|
||||
segment->stop = segment->start + duration;
|
||||
GST_DEBUG_OBJECT (enc, "new output segment %" GST_SEGMENT_FORMAT,
|
||||
|
@ -937,6 +943,21 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
|
|||
gst_event_new_segment (segment));
|
||||
}
|
||||
|
||||
diff =
|
||||
(enc->encoded_samples + frame_samples) - (enc->consumed_samples +
|
||||
input_samples);
|
||||
if (diff >= 0) {
|
||||
GST_DEBUG_OBJECT (enc,
|
||||
"%" G_GINT64_FORMAT " extra samples of padding in this frame",
|
||||
diff);
|
||||
output_samples = frame_samples - diff;
|
||||
} else {
|
||||
GST_DEBUG_OBJECT (enc,
|
||||
"Need to add %" G_GINT64_FORMAT " extra samples in the next frame",
|
||||
-diff);
|
||||
output_samples = frame_samples;
|
||||
}
|
||||
|
||||
size = ((bsize / bytes) + 1) * bytes;
|
||||
mdata = g_malloc0 (size);
|
||||
memcpy (mdata, bdata, bsize);
|
||||
|
@ -944,10 +965,34 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
|
|||
} else {
|
||||
data = bdata;
|
||||
size = bsize;
|
||||
|
||||
/* Adjust for lookahead here */
|
||||
if (enc->pending_lookahead) {
|
||||
if (input_samples > enc->pending_lookahead) {
|
||||
output_samples = input_samples - enc->pending_lookahead;
|
||||
enc->pending_lookahead = 0;
|
||||
} else {
|
||||
enc->pending_lookahead -= input_samples;
|
||||
output_samples = 0;
|
||||
}
|
||||
} else {
|
||||
output_samples = input_samples;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
GST_DEBUG_OBJECT (enc, "nothing to drain");
|
||||
goto done;
|
||||
if (enc->encoded_samples < enc->consumed_samples) {
|
||||
data = mdata = g_malloc0 (bytes);
|
||||
size = bytes;
|
||||
output_samples = enc->consumed_samples - enc->encoded_samples;
|
||||
input_samples = 0;
|
||||
GST_DEBUG_OBJECT (enc, "draining %d samples", output_samples);
|
||||
} else if (enc->encoded_samples == enc->consumed_samples) {
|
||||
GST_DEBUG_OBJECT (enc, "nothing to drain");
|
||||
goto done;
|
||||
} else {
|
||||
g_assert_not_reached ();
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
g_assert (size == bytes);
|
||||
|
@ -963,9 +1008,6 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
|
|||
|
||||
gst_buffer_map (outbuf, &omap, GST_MAP_WRITE);
|
||||
|
||||
GST_DEBUG_OBJECT (enc, "encoding %d samples (%d bytes)",
|
||||
frame_samples, (int) bytes);
|
||||
|
||||
outsize =
|
||||
opus_multistream_encode (enc->state, (const gint16 *) data,
|
||||
frame_samples, omap.data, max_payload_size * enc->n_channels);
|
||||
|
@ -987,10 +1029,12 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
|
|||
GST_DEBUG_OBJECT (enc, "Output packet is %u bytes", outsize);
|
||||
gst_buffer_set_size (outbuf, outsize);
|
||||
|
||||
|
||||
ret =
|
||||
gst_audio_encoder_finish_frame (GST_AUDIO_ENCODER (enc), outbuf,
|
||||
frame_samples);
|
||||
enc->encoded_samples += frame_samples;
|
||||
output_samples);
|
||||
enc->encoded_samples += output_samples;
|
||||
enc->consumed_samples += input_samples;
|
||||
|
||||
done:
|
||||
|
||||
|
|
|
@ -79,7 +79,8 @@ struct _GstOpusEnc {
|
|||
gint n_channels;
|
||||
gint sample_rate;
|
||||
|
||||
guint64 encoded_samples;
|
||||
guint64 encoded_samples, consumed_samples;
|
||||
guint16 lookahead, pending_lookahead;
|
||||
|
||||
guint8 channel_mapping_family;
|
||||
guint8 encoding_channel_mapping[256];
|
||||
|
|
Loading…
Reference in a new issue