diff --git a/common b/common index 1ccbe098d6..c3cafe123f 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit 1ccbe098d6379612fcef09f4000da23585af980a +Subproject commit c3cafe123f3a363d337a29ad32fdd6d3631f52c0 diff --git a/configure.ac b/configure.ac index 7dc3ff3db0..2388b417db 100644 --- a/configure.ac +++ b/configure.ac @@ -768,9 +768,9 @@ AG_GST_CHECK_FEATURE(JACK, Jack, jack, [ AC_SUBST(JACK_LIBS) dnl upcomming jack2 (1.9.7 will have the new api as well - AG_GST_PKG_CHECK_MODULES(JACK_0_120_2, jack >= 0.120.2 jack < 1.0) - if test x$HAVE_JACK_0_120_2 = xyes; then - AC_DEFINE(HAVE_JACK_0_120_2, 1, [defined if jack >= 0.120.2 is available]) + AG_GST_PKG_CHECK_MODULES(JACK_0_120_1, jack >= 0.120.1 jack < 1.0) + if test x$HAVE_JACK_0_120_1 = xyes; then + AC_DEFINE(HAVE_JACK_0_120_1, 1, [defined if jack >= 0.120.1 is available]) fi ]) diff --git a/ext/flac/gstflacdec.c b/ext/flac/gstflacdec.c index b879ec14b5..5132dcf22a 100644 --- a/ext/flac/gstflacdec.c +++ b/ext/flac/gstflacdec.c @@ -1304,6 +1304,53 @@ gst_flac_dec_sink_event (GstPad * pad, GstEvent * event) return res; } +static gboolean +gst_flac_dec_chain_parse_headers (GstFlacDec * dec) +{ + guint8 marker[4]; + guint avail, off; + + avail = gst_adapter_available (dec->adapter); + if (avail < 4) + return FALSE; + + gst_adapter_copy (dec->adapter, marker, 0, 4); + if (strncmp ((const gchar *) marker, "fLaC", 4) != 0) { + GST_ERROR_OBJECT (dec, "Unexpected header, expected fLaC header"); + return TRUE; /* abort header parsing */ + } + + GST_DEBUG_OBJECT (dec, "fLaC header : len 4 @ %7u", 0); + + off = 4; + while (avail > (off + 1 + 3)) { + gboolean is_last; + guint8 mb_hdr[4]; + guint len, block_type; + + gst_adapter_copy (dec->adapter, mb_hdr, off, 4); + + is_last = ((mb_hdr[0] & 0x80) == 0x80); + block_type = mb_hdr[0] & 0x7f; + len = GST_READ_UINT24_BE (mb_hdr + 1); + GST_DEBUG_OBJECT (dec, "Metadata block type %u: len %7u + 4 @ %7u%s", + block_type, len, off, (is_last) ? " (last)" : ""); + off += 4 + len; + + if (is_last) + break; + + if (off >= avail) { + GST_LOG_OBJECT (dec, "Need more data: next offset %u > avail %u", off, + avail); + return FALSE; + } + } + + /* want metadata blocks plus at least one frame */ + return (off + FLAC__MAX_BLOCK_SIZE >= avail); +} + static GstFlowReturn gst_flac_dec_chain (GstPad * pad, GstBuffer * buf) { @@ -1368,6 +1415,15 @@ gst_flac_dec_chain (GstPad * pad, GstBuffer * buf) dec->last_flow = GST_FLOW_OK; if (!dec->framed) { + if (G_UNLIKELY (!dec->got_headers)) { + if (!gst_flac_dec_chain_parse_headers (dec)) { + GST_LOG_OBJECT (dec, "don't have metadata blocks yet, need more data"); + goto out; + } + GST_INFO_OBJECT (dec, "have all metadata blocks now"); + dec->got_headers = TRUE; + } + /* wait until we have at least 64kB because libflac's StreamDecoder * interface is a bit dumb it seems (if we don't have as much data as * it wants it will call our read callback repeatedly and the only @@ -1403,6 +1459,8 @@ gst_flac_dec_chain (GstPad * pad, GstBuffer * buf) GST_DEBUG_OBJECT (dec, "don't have all headers yet"); } +out: + return dec->last_flow; } @@ -1992,6 +2050,7 @@ gst_flac_dec_sink_activate_push (GstPad * sinkpad, gboolean active) if (active) { gst_flac_dec_setup_decoder (dec); dec->streaming = TRUE; + dec->got_headers = FALSE; } return TRUE; } diff --git a/ext/flac/gstflacdec.h b/ext/flac/gstflacdec.h index 4aebd20c7c..aa56b4b801 100644 --- a/ext/flac/gstflacdec.h +++ b/ext/flac/gstflacdec.h @@ -48,6 +48,8 @@ struct _GstFlacDec { gboolean framed; gboolean streaming; + gboolean got_headers; /* if we've parsed the headers (unframed push mode only) */ + GstPad *sinkpad; GstPad *srcpad; diff --git a/ext/flac/gstflacenc.c b/ext/flac/gstflacenc.c index 35a6bf3a9b..a3de543334 100644 --- a/ext/flac/gstflacenc.c +++ b/ext/flac/gstflacenc.c @@ -473,6 +473,7 @@ gst_flac_enc_set_metadata (GstFlacEnc * flacenc, guint64 total_samples) const GstTagList *user_tags; GstTagList *copy; gint entries = 1; + gint n_images, n_preview_images; g_return_if_fail (flacenc != NULL); user_tags = gst_tag_setter_get_tag_list (GST_TAG_SETTER (flacenc)); @@ -481,33 +482,81 @@ gst_flac_enc_set_metadata (GstFlacEnc * flacenc, guint64 total_samples) } copy = gst_tag_list_merge (user_tags, flacenc->tags, gst_tag_setter_get_tag_merge_mode (GST_TAG_SETTER (flacenc))); - flacenc->meta = g_new0 (FLAC__StreamMetadata *, 3); + n_images = gst_tag_list_get_tag_size (copy, GST_TAG_IMAGE); + n_preview_images = gst_tag_list_get_tag_size (copy, GST_TAG_PREVIEW_IMAGE); + + flacenc->meta = + g_new0 (FLAC__StreamMetadata *, 3 + n_images + n_preview_images); flacenc->meta[0] = FLAC__metadata_object_new (FLAC__METADATA_TYPE_VORBIS_COMMENT); gst_tag_list_foreach (copy, add_one_tag, flacenc); + if (n_images + n_preview_images > 0) { + GstBuffer *buffer; + GstCaps *caps; + GstStructure *structure; + GstTagImageType image_type = GST_TAG_IMAGE_TYPE_NONE; + gint i; + + for (i = 0; i < n_images + n_preview_images; i++) { + if (i < n_images) { + if (!gst_tag_list_get_buffer_index (copy, GST_TAG_IMAGE, i, &buffer)) + continue; + } else { + if (!gst_tag_list_get_buffer_index (copy, GST_TAG_PREVIEW_IMAGE, + i - n_images, &buffer)) + continue; + } + + flacenc->meta[entries] = + FLAC__metadata_object_new (FLAC__METADATA_TYPE_PICTURE); + + caps = gst_buffer_get_caps (buffer); + structure = gst_caps_get_structure (caps, 0); + + gst_structure_get (structure, "image-type", GST_TYPE_TAG_IMAGE_TYPE, + &image_type, NULL); + /* Convert to ID3v2 APIC image type */ + if (image_type == GST_TAG_IMAGE_TYPE_NONE) + image_type = (i < n_images) ? 0x00 : 0x01; + else + image_type = image_type + 2; + + FLAC__metadata_object_picture_set_data (flacenc->meta[entries], + GST_BUFFER_DATA (buffer), GST_BUFFER_SIZE (buffer), TRUE); + /* FIXME: There's no way to set the picture type in libFLAC */ + flacenc->meta[entries]->data.picture.type = image_type; + FLAC__metadata_object_picture_set_mime_type (flacenc->meta[entries], + (char *) gst_structure_get_name (structure), TRUE); + + gst_caps_unref (caps); + gst_buffer_unref (buffer); + entries++; + } + } + if (flacenc->seekpoints && total_samples != GST_CLOCK_TIME_NONE) { gboolean res; guint samples; - flacenc->meta[1] = + flacenc->meta[entries] = FLAC__metadata_object_new (FLAC__METADATA_TYPE_SEEKTABLE); if (flacenc->seekpoints > 0) { res = FLAC__metadata_object_seektable_template_append_spaced_points - (flacenc->meta[1], flacenc->seekpoints, total_samples); + (flacenc->meta[entries], flacenc->seekpoints, total_samples); } else { samples = -flacenc->seekpoints * flacenc->sample_rate; res = FLAC__metadata_object_seektable_template_append_spaced_points_by_samples - (flacenc->meta[1], samples, total_samples); + (flacenc->meta[entries], samples, total_samples); } if (!res) { GST_DEBUG_OBJECT (flacenc, "adding seekpoint template %d failed", flacenc->seekpoints); FLAC__metadata_object_delete (flacenc->meta[1]); - flacenc->meta[1] = NULL; + flacenc->meta[entries] = NULL; } else { entries++; } diff --git a/ext/jack/gstjackaudiosink.c b/ext/jack/gstjackaudiosink.c index 13f69b7365..da1c4ac53c 100644 --- a/ext/jack/gstjackaudiosink.c +++ b/ext/jack/gstjackaudiosink.c @@ -593,7 +593,7 @@ gst_jack_ring_buffer_delay (GstRingBuffer * buf) { GstJackAudioSink *sink; guint i, res = 0; -#ifdef HAVE_JACK_0_120_2 +#ifdef HAVE_JACK_0_120_1 jack_latency_range_t range; #else guint latency; @@ -604,7 +604,7 @@ gst_jack_ring_buffer_delay (GstRingBuffer * buf) client = gst_jack_audio_client_get_client (sink->client); for (i = 0; i < sink->port_count; i++) { -#ifdef HAVE_JACK_0_120_2 +#ifdef HAVE_JACK_0_120_1 jack_port_get_latency_range (sink->ports[i], JackPlaybackLatency, &range); if (range.max > res) res = range.max; diff --git a/ext/jack/gstjackaudiosrc.c b/ext/jack/gstjackaudiosrc.c index b4840574a3..68d11f85a0 100644 --- a/ext/jack/gstjackaudiosrc.c +++ b/ext/jack/gstjackaudiosrc.c @@ -604,7 +604,7 @@ gst_jack_ring_buffer_delay (GstRingBuffer * buf) { GstJackAudioSrc *src; guint i, res = 0; -#ifdef HAVE_JACK_0_120_2 +#ifdef HAVE_JACK_0_120_1 jack_latency_range_t range; #else guint latency; @@ -615,7 +615,7 @@ gst_jack_ring_buffer_delay (GstRingBuffer * buf) client = gst_jack_audio_client_get_client (src->client); for (i = 0; i < src->port_count; i++) { -#ifdef HAVE_JACK_0_120_2 +#ifdef HAVE_JACK_0_120_1 jack_port_get_latency_range (src->ports[i], JackCaptureLatency, &range); if (range.max > res) res = range.max; diff --git a/ext/pulse/pulsesink.c b/ext/pulse/pulsesink.c index 5a60cda0e6..d93e5c890a 100644 --- a/ext/pulse/pulsesink.c +++ b/ext/pulse/pulsesink.c @@ -783,12 +783,8 @@ gst_pulseringbuffer_acquire (GstRingBuffer * buf, GstRingBufferSpec * spec) /* create a stream */ GST_LOG_OBJECT (psink, "creating stream with name %s", name); - if (psink->proplist) { - if (!(pbuf->stream = pa_stream_new_with_proplist (pbuf->context, - name, &pbuf->sample_spec, &channel_map, psink->proplist))) - goto stream_failed; - } else if (!(pbuf->stream = pa_stream_new (pbuf->context, - name, &pbuf->sample_spec, &channel_map))) + if (!(pbuf->stream = pa_stream_new_with_proplist (pbuf->context, name, + &pbuf->sample_spec, &channel_map, psink->proplist))) goto stream_failed; /* install essential callbacks */ @@ -1081,6 +1077,13 @@ gst_pulseringbuffer_start (GstRingBuffer * buf) GST_DEBUG_OBJECT (psink, "starting"); pbuf->paused = FALSE; + + /* EOS needs running clock */ + if (GST_BASE_SINK_CAST (psink)->eos || + g_atomic_int_get (&GST_BASE_AUDIO_SINK (psink)->abidata. + ABI.eos_rendering)) + gst_pulsering_set_corked (pbuf, FALSE, FALSE); + pa_threaded_mainloop_unlock (mainloop); return TRUE; diff --git a/gst/flv/gstflvdemux.c b/gst/flv/gstflvdemux.c index 79f83bc2b8..820f689ec6 100644 --- a/gst/flv/gstflvdemux.c +++ b/gst/flv/gstflvdemux.c @@ -70,7 +70,8 @@ static GstStaticPadTemplate video_src_template = GST_PAD_SOMETIMES, GST_STATIC_CAPS ("video/x-flash-video; " "video/x-flash-screen; " - "video/x-vp6-flash; " "video/x-vp6-alpha; " "video/x-h264;") + "video/x-vp6-flash; " "video/x-vp6-alpha; " + "video/x-h264, stream-format=avc;") ); GST_DEBUG_CATEGORY_STATIC (flvdemux_debug); @@ -1057,7 +1058,9 @@ gst_flv_demux_video_negotiate (GstFlvDemux * demux, guint32 codec_tag) caps = gst_caps_new_simple ("video/x-vp6-alpha", NULL); break; case 7: - caps = gst_caps_new_simple ("video/x-h264", NULL); + caps = + gst_caps_new_simple ("video/x-h264", "stream-format", G_TYPE_STRING, + "avc", NULL); break; default: GST_WARNING_OBJECT (demux, "unsupported video codec tag %u", codec_tag); diff --git a/gst/flv/gstflvmux.c b/gst/flv/gstflvmux.c index c128d34132..005570d6ae 100644 --- a/gst/flv/gstflvmux.c +++ b/gst/flv/gstflvmux.c @@ -63,7 +63,8 @@ static GstStaticPadTemplate videosink_templ = GST_STATIC_PAD_TEMPLATE ("video", GST_PAD_REQUEST, GST_STATIC_CAPS ("video/x-flash-video; " "video/x-flash-screen; " - "video/x-vp6-flash; " "video/x-vp6-alpha; " "video/x-h264;") + "video/x-vp6-flash; " "video/x-vp6-alpha; " + "video/x-h264, stream-format=avc;") ); static GstStaticPadTemplate audiosink_templ = GST_STATIC_PAD_TEMPLATE ("audio", diff --git a/gst/matroska/matroska-mux.c b/gst/matroska/matroska-mux.c index 4b7fdaf7f5..7be2245557 100644 --- a/gst/matroska/matroska-mux.c +++ b/gst/matroska/matroska-mux.c @@ -197,7 +197,11 @@ static GstStaticPadTemplate audiosink_templ = "raversion = (int) { 1, 2, 8 }, " COMMON_AUDIO_CAPS "; " "audio/x-wma, " "wmaversion = (int) [ 1, 3 ], " "block_align = (int) [ 0, 65535 ], bitrate = (int) [ 0, 524288 ], " - COMMON_AUDIO_CAPS) + COMMON_AUDIO_CAPS ";" + "audio/x-alaw, " + "channels = (int) {1, 2}, " "rate = (int) [ 8000, 192000 ]; " + "audio/x-mulaw, " + "channels = (int) {1, 2}, " "rate = (int) [ 8000, 192000 ]") ); static GstStaticPadTemplate subtitlesink_templ = @@ -1632,41 +1636,61 @@ gst_matroska_mux_audio_pad_setcaps (GstPad * pad, GstCaps * caps) context->codec_priv_size = priv_data_size; } - } else if (!strcmp (mimetype, "audio/x-wma")) { + } else if (!strcmp (mimetype, "audio/x-wma") + || !strcmp (mimetype, "audio/x-alaw") + || !strcmp (mimetype, "audio/x-mulaw")) { guint8 *codec_priv; guint codec_priv_size; - guint16 format; + guint16 format = 0; gint block_align; gint bitrate; - gint wmaversion; - gint depth; - if (!gst_structure_get_int (structure, "wmaversion", &wmaversion) - || !gst_structure_get_int (structure, "block_align", &block_align) - || !gst_structure_get_int (structure, "bitrate", &bitrate) - || samplerate == 0 || channels == 0) { - GST_WARNING_OBJECT (mux, "Missing wmaversion/block_align/bitrate/" - "channels/rate on WMA caps"); + if (samplerate == 0 || channels == 0) { + GST_WARNING_OBJECT (mux, "Missing channels/samplerate on caps"); goto refuse_caps; } - switch (wmaversion) { - case 1: - format = GST_RIFF_WAVE_FORMAT_WMAV1; - break; - case 2: - format = GST_RIFF_WAVE_FORMAT_WMAV2; - break; - case 3: - format = GST_RIFF_WAVE_FORMAT_WMAV3; - break; - default: - GST_WARNING_OBJECT (mux, "Unexpected WMA version: %d", wmaversion); - goto refuse_caps; - } + if (!strcmp (mimetype, "audio/x-wma")) { + gint wmaversion; + gint depth; - if (gst_structure_get_int (structure, "depth", &depth)) - audiocontext->bitdepth = depth; + if (!gst_structure_get_int (structure, "wmaversion", &wmaversion) + || !gst_structure_get_int (structure, "block_align", &block_align) + || !gst_structure_get_int (structure, "bitrate", &bitrate)) { + GST_WARNING_OBJECT (mux, "Missing wmaversion/block_align/bitrate" + " on WMA caps"); + goto refuse_caps; + } + + switch (wmaversion) { + case 1: + format = GST_RIFF_WAVE_FORMAT_WMAV1; + break; + case 2: + format = GST_RIFF_WAVE_FORMAT_WMAV2; + break; + case 3: + format = GST_RIFF_WAVE_FORMAT_WMAV3; + break; + default: + GST_WARNING_OBJECT (mux, "Unexpected WMA version: %d", wmaversion); + goto refuse_caps; + } + + if (gst_structure_get_int (structure, "depth", &depth)) + audiocontext->bitdepth = depth; + } else if (!strcmp (mimetype, "audio/x-alaw") + || !strcmp (mimetype, "audio/x-mulaw")) { + audiocontext->bitdepth = 8; + if (!strcmp (mimetype, "audio/x-alaw")) + format = GST_RIFF_WAVE_FORMAT_ALAW; + else + format = GST_RIFF_WAVE_FORMAT_MULAW; + + block_align = channels; + bitrate = block_align * samplerate; + } + g_assert (format != 0); codec_priv_size = WAVEFORMATEX_SIZE; if (buf) diff --git a/gst/qtdemux/qtdemux.c b/gst/qtdemux/qtdemux.c index b043920f5f..4bcd7a56bf 100644 --- a/gst/qtdemux/qtdemux.c +++ b/gst/qtdemux/qtdemux.c @@ -7557,6 +7557,10 @@ qtdemux_tag_add_year (GstQTDemux * qtdemux, const char *tag, const char *dummy, return; y = QT_UINT16 ((guint8 *) node->data + 12); + if (y == 0) { + GST_DEBUG_OBJECT (qtdemux, "year: %u is not a valid year", y); + return; + } GST_DEBUG_OBJECT (qtdemux, "year: %u", y); date = g_date_new_dmy (1, 1, y); @@ -7580,6 +7584,12 @@ qtdemux_tag_add_classification (GstQTDemux * qtdemux, const char *tag, offset = 12; entity = (guint8 *) node->data + offset; + if (entity[0] == 0 || entity[1] == 0 || entity[2] == 0 || entity[3] == 0) { + GST_DEBUG_OBJECT (qtdemux, + "classification info: %c%c%c%c invalid classification entity", + entity[0], entity[1], entity[2], entity[3]); + return; + } offset += 4; table = QT_UINT16 ((guint8 *) node->data + offset); diff --git a/gst/rtpmanager/gstrtpjitterbuffer.c b/gst/rtpmanager/gstrtpjitterbuffer.c index 1d253eb829..861e1e3dde 100644 --- a/gst/rtpmanager/gstrtpjitterbuffer.c +++ b/gst/rtpmanager/gstrtpjitterbuffer.c @@ -881,8 +881,6 @@ static void gst_rtp_jitter_buffer_flush_stop (GstRtpJitterBuffer * jitterbuffer) { GstRtpJitterBufferPrivate *priv; - GstClock *clock; - GstClockTime ts; priv = jitterbuffer->priv; @@ -904,18 +902,6 @@ gst_rtp_jitter_buffer_flush_stop (GstRtpJitterBuffer * jitterbuffer) GST_DEBUG_OBJECT (jitterbuffer, "flush and reset jitterbuffer"); rtp_jitter_buffer_flush (priv->jbuf); rtp_jitter_buffer_reset_skew (priv->jbuf); - /* sync_time for scheduling timeouts needs proper element base_time - * However, following a seek new base_time only trickles down upon PLAYING - * upon which time quite some processing has already passed - * (which also needs correct base time) */ - clock = gst_element_get_clock (GST_ELEMENT_CAST (jitterbuffer)); - if (clock) { - ts = gst_clock_get_time (clock); - GST_DEBUG_OBJECT (jitterbuffer, "new base time %" GST_TIME_FORMAT, - GST_TIME_ARGS (ts)); - gst_object_unref (clock); - gst_element_set_base_time (GST_ELEMENT_CAST (jitterbuffer), ts); - } JBUF_UNLOCK (priv); } diff --git a/gst/rtsp/gstrtspsrc.c b/gst/rtsp/gstrtspsrc.c index 5ea5a4b383..4c6241eccc 100644 --- a/gst/rtsp/gstrtspsrc.c +++ b/gst/rtsp/gstrtspsrc.c @@ -1026,9 +1026,8 @@ gst_rtspsrc_create_stream (GstRTSPSrc * src, GstSDPMessage * sdp, gint idx) * configure the transport of the stream and is used to identity the stream in * the RTP-Info header field returned from PLAY. */ control_url = gst_sdp_media_get_attribute_val (media, "control"); - if (control_url == NULL) { + if (control_url == NULL) control_url = gst_sdp_message_get_attribute_val_n (sdp, "control", 0); - } GST_DEBUG_OBJECT (src, "stream %d, (%p)", stream->id, stream); GST_DEBUG_OBJECT (src, " pt: %d", stream->pt); @@ -1665,6 +1664,16 @@ gst_rtspsrc_flush (GstRTSPSrc * src, gboolean flush) gst_rtspsrc_push_event (src, event, FALSE); gst_rtspsrc_loop_send_cmd (src, cmd, flush); + /* set up manager before data-flow resumes */ + /* to manage jitterbuffer buffer mode */ + if (src->manager) { + gst_element_set_base_time (GST_ELEMENT_CAST (src->manager), base_time); + /* and to have base_time trickle further down, + * e.g. to jitterbuffer for its timeout handling */ + if (base_time != -1) + gst_element_set_state (GST_ELEMENT_CAST (src->manager), state); + } + /* make running time start start at 0 again */ for (walk = src->streams; walk; walk = g_list_next (walk)) { GstRTSPStream *stream = (GstRTSPStream *) walk->data; @@ -1681,9 +1690,6 @@ gst_rtspsrc_flush (GstRTSPSrc * src, gboolean flush) /* for tcp interleaved case */ if (base_time != -1) gst_element_set_base_time (GST_ELEMENT_CAST (src), base_time); - /* to manage jitterbuffer buffer mode */ - if (src->manager) - gst_element_set_base_time (GST_ELEMENT_CAST (src->manager), base_time); } static GstRTSPResult diff --git a/gst/videomixer/blend.c b/gst/videomixer/blend.c index 9ca6168ff6..e2d736ff43 100644 --- a/gst/videomixer/blend.c +++ b/gst/videomixer/blend.c @@ -41,9 +41,9 @@ GST_DEBUG_CATEGORY_STATIC (gst_videomixer_blend_debug); /* Below are the implementations of everything */ /* A32 is for AYUV, ARGB and BGRA */ -#define BLEND_A32(name, LOOP) \ +#define BLEND_A32(name, method, LOOP) \ static void \ -blend_##name (const guint8 * src, gint xpos, gint ypos, \ +method##_ ##name (const guint8 * src, gint xpos, gint ypos, \ gint src_width, gint src_height, gdouble src_alpha, \ guint8 * dest, gint dest_width, gint dest_height) \ { \ @@ -83,25 +83,31 @@ blend_##name (const guint8 * src, gint xpos, gint ypos, \ LOOP (dest, src, src_height, src_width, src_stride, dest_stride, s_alpha); \ } -#define BLEND_A32_LOOP(name) \ +#define BLEND_A32_LOOP(name, method) \ static inline void \ -_blend_loop_##name (guint8 * dest, const guint8 * src, gint src_height, \ +_##method##_loop_##name (guint8 * dest, const guint8 * src, gint src_height, \ gint src_width, gint src_stride, gint dest_stride, guint s_alpha) \ { \ s_alpha = MIN (255, s_alpha); \ - orc_blend_##name (dest, dest_stride, src, src_stride, \ + orc_##method##_##name (dest, dest_stride, src, src_stride, \ s_alpha, src_width, src_height); \ } -BLEND_A32_LOOP (argb); -BLEND_A32_LOOP (bgra); +BLEND_A32_LOOP (argb, blend); +BLEND_A32_LOOP (bgra, blend); +BLEND_A32_LOOP (argb, overlay); +BLEND_A32_LOOP (bgra, overlay); #if G_BYTE_ORDER == LITTLE_ENDIAN -BLEND_A32 (argb, _blend_loop_argb); -BLEND_A32 (bgra, _blend_loop_bgra); +BLEND_A32 (argb, blend, _blend_loop_argb); +BLEND_A32 (bgra, blend, _blend_loop_bgra); +BLEND_A32 (argb, overlay, _overlay_loop_argb); +BLEND_A32 (bgra, overlay, _overlay_loop_bgra); #else -BLEND_A32 (argb, _blend_loop_bgra); -BLEND_A32 (bgra, _blend_loop_argb); +BLEND_A32 (argb, blend, _blend_loop_bgra); +BLEND_A32 (bgra, blend, _blend_loop_argb); +BLEND_A32 (argb, overlay, _overlay_loop_bgra); +BLEND_A32 (bgra, overlay, _overlay_loop_argb); #endif #define A32_CHECKER_C(name, RGB, A, C1, C2, C3) \ @@ -666,6 +672,8 @@ PACKED_422_FILL_COLOR (uyvy, 16, 24, 0, 8); /* Init function */ BlendFunction gst_video_mixer_blend_argb; BlendFunction gst_video_mixer_blend_bgra; +BlendFunction gst_video_mixer_overlay_argb; +BlendFunction gst_video_mixer_overlay_bgra; /* AYUV/ABGR is equal to ARGB, RGBA is equal to BGRA */ BlendFunction gst_video_mixer_blend_y444; BlendFunction gst_video_mixer_blend_y42b; @@ -724,6 +732,8 @@ gst_video_mixer_init_blend (void) gst_video_mixer_blend_argb = blend_argb; gst_video_mixer_blend_bgra = blend_bgra; + gst_video_mixer_overlay_argb = overlay_argb; + gst_video_mixer_overlay_bgra = overlay_bgra; gst_video_mixer_blend_i420 = blend_i420; gst_video_mixer_blend_y444 = blend_y444; gst_video_mixer_blend_y42b = blend_y42b; diff --git a/gst/videomixer/blend.h b/gst/videomixer/blend.h index 4f770cbbc9..ef60c915a7 100644 --- a/gst/videomixer/blend.h +++ b/gst/videomixer/blend.h @@ -31,6 +31,11 @@ extern BlendFunction gst_video_mixer_blend_bgra; #define gst_video_mixer_blend_ayuv gst_video_mixer_blend_argb #define gst_video_mixer_blend_abgr gst_video_mixer_blend_argb #define gst_video_mixer_blend_rgba gst_video_mixer_blend_bgra +extern BlendFunction gst_video_mixer_overlay_argb; +extern BlendFunction gst_video_mixer_overlay_bgra; +#define gst_video_mixer_overlay_ayuv gst_video_mixer_overlay_argb +#define gst_video_mixer_overlay_abgr gst_video_mixer_overlay_argb +#define gst_video_mixer_overlay_rgba gst_video_mixer_overlay_bgra extern BlendFunction gst_video_mixer_blend_i420; #define gst_video_mixer_blend_yv12 gst_video_mixer_blend_i420 extern BlendFunction gst_video_mixer_blend_y41b; diff --git a/gst/videomixer/blendorc-dist.c b/gst/videomixer/blendorc-dist.c index 06d012c106..8ead5bb3e3 100644 --- a/gst/videomixer/blendorc-dist.c +++ b/gst/videomixer/blendorc-dist.c @@ -32,6 +32,7 @@ typedef unsigned __int16 orc_uint16; typedef unsigned __int32 orc_uint32; typedef unsigned __int64 orc_uint64; #define ORC_UINT64_C(x) (x##Ui64) +#define inline __inline #else #include typedef signed char orc_int8; @@ -80,6 +81,10 @@ void orc_blend_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, int p1, int n, int m); void orc_blend_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, int p1, int n, int m); +void orc_overlay_argb (guint8 * d1, int d1_stride, const guint8 * s1, + int s1_stride, int p1, int n, int m); +void orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, + int s1_stride, int p1, int n, int m); /* begin Orc C target preamble */ @@ -518,7 +523,7 @@ orc_blend_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, var39.x4[2] = p1; var39.x4[3] = p1; /* 16: loadpl */ - var40.i = 0x000000ff; /* 255 or 1.25987e-321f */ + var40.i = (int) 0x000000ff; /* 255 or 1.25987e-321f */ for (i = 0; i < n; i++) { /* 0: loadl */ @@ -570,16 +575,16 @@ orc_blend_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, var52.x4[3] = (var51.x4[3] * var47.x4[3]) & 0xffff; /* 13: div255w */ var53.x4[0] = - ((uint16_t) (((orc_uint16) (var52.x4[0] + 128)) + + ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) + (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8; var53.x4[1] = - ((uint16_t) (((orc_uint16) (var52.x4[1] + 128)) + + ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) + (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8; var53.x4[2] = - ((uint16_t) (((orc_uint16) (var52.x4[2] + 128)) + + ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) + (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8; var53.x4[3] = - ((uint16_t) (((orc_uint16) (var52.x4[3] + 128)) + + ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) + (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8; /* 14: addw */ var54.x4[0] = var50.x4[0] + var53.x4[0]; @@ -639,7 +644,7 @@ _backup_orc_blend_argb (OrcExecutor * ORC_RESTRICT ex) var39.x4[2] = ex->params[24]; var39.x4[3] = ex->params[24]; /* 16: loadpl */ - var40.i = 0x000000ff; /* 255 or 1.25987e-321f */ + var40.i = (int) 0x000000ff; /* 255 or 1.25987e-321f */ for (i = 0; i < n; i++) { /* 0: loadl */ @@ -691,16 +696,16 @@ _backup_orc_blend_argb (OrcExecutor * ORC_RESTRICT ex) var52.x4[3] = (var51.x4[3] * var47.x4[3]) & 0xffff; /* 13: div255w */ var53.x4[0] = - ((uint16_t) (((orc_uint16) (var52.x4[0] + 128)) + + ((orc_uint16) (((orc_uint16) (var52.x4[0] + 128)) + (((orc_uint16) (var52.x4[0] + 128)) >> 8))) >> 8; var53.x4[1] = - ((uint16_t) (((orc_uint16) (var52.x4[1] + 128)) + + ((orc_uint16) (((orc_uint16) (var52.x4[1] + 128)) + (((orc_uint16) (var52.x4[1] + 128)) >> 8))) >> 8; var53.x4[2] = - ((uint16_t) (((orc_uint16) (var52.x4[2] + 128)) + + ((orc_uint16) (((orc_uint16) (var52.x4[2] + 128)) + (((orc_uint16) (var52.x4[2] + 128)) >> 8))) >> 8; var53.x4[3] = - ((uint16_t) (((orc_uint16) (var52.x4[3] + 128)) + + ((orc_uint16) (((orc_uint16) (var52.x4[3] + 128)) + (((orc_uint16) (var52.x4[3] + 128)) >> 8))) >> 8; /* 14: addw */ var54.x4[0] = var50.x4[0] + var53.x4[0]; @@ -848,7 +853,7 @@ orc_blend_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, var40.x4[2] = p1; var40.x4[3] = p1; /* 17: loadpl */ - var41.i = 0xff000000; /* -16777216 or 2.11371e-314f */ + var41.i = (int) 0xff000000; /* -16777216 or 2.11371e-314f */ for (i = 0; i < n; i++) { /* 0: loadl */ @@ -902,16 +907,16 @@ orc_blend_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, var54.x4[3] = (var53.x4[3] * var49.x4[3]) & 0xffff; /* 14: div255w */ var55.x4[0] = - ((uint16_t) (((orc_uint16) (var54.x4[0] + 128)) + + ((orc_uint16) (((orc_uint16) (var54.x4[0] + 128)) + (((orc_uint16) (var54.x4[0] + 128)) >> 8))) >> 8; var55.x4[1] = - ((uint16_t) (((orc_uint16) (var54.x4[1] + 128)) + + ((orc_uint16) (((orc_uint16) (var54.x4[1] + 128)) + (((orc_uint16) (var54.x4[1] + 128)) >> 8))) >> 8; var55.x4[2] = - ((uint16_t) (((orc_uint16) (var54.x4[2] + 128)) + + ((orc_uint16) (((orc_uint16) (var54.x4[2] + 128)) + (((orc_uint16) (var54.x4[2] + 128)) >> 8))) >> 8; var55.x4[3] = - ((uint16_t) (((orc_uint16) (var54.x4[3] + 128)) + + ((orc_uint16) (((orc_uint16) (var54.x4[3] + 128)) + (((orc_uint16) (var54.x4[3] + 128)) >> 8))) >> 8; /* 15: addw */ var56.x4[0] = var52.x4[0] + var55.x4[0]; @@ -972,7 +977,7 @@ _backup_orc_blend_bgra (OrcExecutor * ORC_RESTRICT ex) var40.x4[2] = ex->params[24]; var40.x4[3] = ex->params[24]; /* 17: loadpl */ - var41.i = 0xff000000; /* -16777216 or 2.11371e-314f */ + var41.i = (int) 0xff000000; /* -16777216 or 2.11371e-314f */ for (i = 0; i < n; i++) { /* 0: loadl */ @@ -1026,16 +1031,16 @@ _backup_orc_blend_bgra (OrcExecutor * ORC_RESTRICT ex) var54.x4[3] = (var53.x4[3] * var49.x4[3]) & 0xffff; /* 14: div255w */ var55.x4[0] = - ((uint16_t) (((orc_uint16) (var54.x4[0] + 128)) + + ((orc_uint16) (((orc_uint16) (var54.x4[0] + 128)) + (((orc_uint16) (var54.x4[0] + 128)) >> 8))) >> 8; var55.x4[1] = - ((uint16_t) (((orc_uint16) (var54.x4[1] + 128)) + + ((orc_uint16) (((orc_uint16) (var54.x4[1] + 128)) + (((orc_uint16) (var54.x4[1] + 128)) >> 8))) >> 8; var55.x4[2] = - ((uint16_t) (((orc_uint16) (var54.x4[2] + 128)) + + ((orc_uint16) (((orc_uint16) (var54.x4[2] + 128)) + (((orc_uint16) (var54.x4[2] + 128)) >> 8))) >> 8; var55.x4[3] = - ((uint16_t) (((orc_uint16) (var54.x4[3] + 128)) + + ((orc_uint16) (((orc_uint16) (var54.x4[3] + 128)) + (((orc_uint16) (var54.x4[3] + 128)) >> 8))) >> 8; /* 15: addw */ var56.x4[0] = var52.x4[0] + var55.x4[0]; @@ -1145,3 +1150,1063 @@ orc_blend_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, func (ex); } #endif + + +/* orc_overlay_argb */ +#ifdef DISABLE_ORC +void +orc_overlay_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, + int p1, int n, int m) +{ + int i; + int j; + orc_union32 *ORC_RESTRICT ptr0; + const orc_union32 *ORC_RESTRICT ptr4; + orc_union64 var41; + orc_union32 var42; + orc_union32 var43; + orc_union32 var44; + orc_union16 var45; + orc_int8 var46; + orc_union32 var47; + orc_union64 var48; + orc_union64 var49; + orc_union64 var50; + orc_union64 var51; + orc_union64 var52; + orc_union32 var53; + orc_union64 var54; + orc_union64 var55; + orc_union32 var56; + orc_union16 var57; + orc_int8 var58; + orc_union16 var59; + orc_union32 var60; + orc_union64 var61; + orc_union64 var62; + orc_union64 var63; + orc_union64 var64; + orc_union64 var65; + orc_union64 var66; + orc_union64 var67; + orc_union64 var68; + orc_union32 var69; + orc_union32 var70; + orc_union32 var71; + orc_union32 var72; + orc_union32 var73; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j); + ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j); + + /* 5: loadpw */ + var41.x4[0] = p1; + var41.x4[1] = p1; + var41.x4[2] = p1; + var41.x4[3] = p1; + /* 10: loadpl */ + var53.i = (int) 0xffffffff; /* -1 or 2.122e-314f */ + /* 27: loadpl */ + var42.i = (int) 0xffffff00; /* -256 or 2.122e-314f */ + /* 30: loadpl */ + var43.i = (int) 0x000000ff; /* 255 or 1.25987e-321f */ + + for (i = 0; i < n; i++) { + /* 0: loadl */ + var44 = ptr4[i]; + /* 1: convlw */ + var45.i = var44.i; + /* 2: convwb */ + var46 = var45.i; + /* 3: splatbl */ + var47.i = + ((var46 & 0xff) << 24) | ((var46 & 0xff) << 16) | ((var46 & 0xff) << + 8) | (var46 & 0xff); + /* 4: convubw */ + var48.x4[0] = (orc_uint8) var47.x4[0]; + var48.x4[1] = (orc_uint8) var47.x4[1]; + var48.x4[2] = (orc_uint8) var47.x4[2]; + var48.x4[3] = (orc_uint8) var47.x4[3]; + /* 6: mullw */ + var49.x4[0] = (var48.x4[0] * var41.x4[0]) & 0xffff; + var49.x4[1] = (var48.x4[1] * var41.x4[1]) & 0xffff; + var49.x4[2] = (var48.x4[2] * var41.x4[2]) & 0xffff; + var49.x4[3] = (var48.x4[3] * var41.x4[3]) & 0xffff; + /* 7: shruw */ + var50.x4[0] = ((orc_uint16) var49.x4[0]) >> 8; + var50.x4[1] = ((orc_uint16) var49.x4[1]) >> 8; + var50.x4[2] = ((orc_uint16) var49.x4[2]) >> 8; + var50.x4[3] = ((orc_uint16) var49.x4[3]) >> 8; + /* 8: convubw */ + var51.x4[0] = (orc_uint8) var44.x4[0]; + var51.x4[1] = (orc_uint8) var44.x4[1]; + var51.x4[2] = (orc_uint8) var44.x4[2]; + var51.x4[3] = (orc_uint8) var44.x4[3]; + /* 9: mullw */ + var52.x4[0] = (var51.x4[0] * var50.x4[0]) & 0xffff; + var52.x4[1] = (var51.x4[1] * var50.x4[1]) & 0xffff; + var52.x4[2] = (var51.x4[2] * var50.x4[2]) & 0xffff; + var52.x4[3] = (var51.x4[3] * var50.x4[3]) & 0xffff; + /* 11: convubw */ + var54.x4[0] = (orc_uint8) var53.x4[0]; + var54.x4[1] = (orc_uint8) var53.x4[1]; + var54.x4[2] = (orc_uint8) var53.x4[2]; + var54.x4[3] = (orc_uint8) var53.x4[3]; + /* 12: subw */ + var55.x4[0] = var54.x4[0] - var50.x4[0]; + var55.x4[1] = var54.x4[1] - var50.x4[1]; + var55.x4[2] = var54.x4[2] - var50.x4[2]; + var55.x4[3] = var54.x4[3] - var50.x4[3]; + /* 13: loadl */ + var56 = ptr0[i]; + /* 14: convlw */ + var57.i = var56.i; + /* 15: convwb */ + var58 = var57.i; + /* 16: convubw */ + var59.i = (orc_uint8) var58; + /* 17: splatbl */ + var60.i = + ((var58 & 0xff) << 24) | ((var58 & 0xff) << 16) | ((var58 & 0xff) << + 8) | (var58 & 0xff); + /* 18: convubw */ + var61.x4[0] = (orc_uint8) var60.x4[0]; + var61.x4[1] = (orc_uint8) var60.x4[1]; + var61.x4[2] = (orc_uint8) var60.x4[2]; + var61.x4[3] = (orc_uint8) var60.x4[3]; + /* 19: mullw */ + var62.x4[0] = (var61.x4[0] * var55.x4[0]) & 0xffff; + var62.x4[1] = (var61.x4[1] * var55.x4[1]) & 0xffff; + var62.x4[2] = (var61.x4[2] * var55.x4[2]) & 0xffff; + var62.x4[3] = (var61.x4[3] * var55.x4[3]) & 0xffff; + /* 20: div255w */ + var63.x4[0] = + ((orc_uint16) (((orc_uint16) (var62.x4[0] + 128)) + + (((orc_uint16) (var62.x4[0] + 128)) >> 8))) >> 8; + var63.x4[1] = + ((orc_uint16) (((orc_uint16) (var62.x4[1] + 128)) + + (((orc_uint16) (var62.x4[1] + 128)) >> 8))) >> 8; + var63.x4[2] = + ((orc_uint16) (((orc_uint16) (var62.x4[2] + 128)) + + (((orc_uint16) (var62.x4[2] + 128)) >> 8))) >> 8; + var63.x4[3] = + ((orc_uint16) (((orc_uint16) (var62.x4[3] + 128)) + + (((orc_uint16) (var62.x4[3] + 128)) >> 8))) >> 8; + /* 21: convubw */ + var64.x4[0] = (orc_uint8) var56.x4[0]; + var64.x4[1] = (orc_uint8) var56.x4[1]; + var64.x4[2] = (orc_uint8) var56.x4[2]; + var64.x4[3] = (orc_uint8) var56.x4[3]; + /* 22: mullw */ + var65.x4[0] = (var64.x4[0] * var63.x4[0]) & 0xffff; + var65.x4[1] = (var64.x4[1] * var63.x4[1]) & 0xffff; + var65.x4[2] = (var64.x4[2] * var63.x4[2]) & 0xffff; + var65.x4[3] = (var64.x4[3] * var63.x4[3]) & 0xffff; + /* 23: addw */ + var66.x4[0] = var65.x4[0] + var52.x4[0]; + var66.x4[1] = var65.x4[1] + var52.x4[1]; + var66.x4[2] = var65.x4[2] + var52.x4[2]; + var66.x4[3] = var65.x4[3] + var52.x4[3]; + /* 24: addw */ + var67.x4[0] = var63.x4[0] + var50.x4[0]; + var67.x4[1] = var63.x4[1] + var50.x4[1]; + var67.x4[2] = var63.x4[2] + var50.x4[2]; + var67.x4[3] = var63.x4[3] + var50.x4[3]; + /* 25: divluw */ + var68.x4[0] = + ((var67.x4[0] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[0]) / + ((orc_uint16) var67.x4[0] & 0xff)); + var68.x4[1] = + ((var67.x4[1] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[1]) / + ((orc_uint16) var67.x4[1] & 0xff)); + var68.x4[2] = + ((var67.x4[2] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[2]) / + ((orc_uint16) var67.x4[2] & 0xff)); + var68.x4[3] = + ((var67.x4[3] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[3]) / + ((orc_uint16) var67.x4[3] & 0xff)); + /* 26: convwb */ + var69.x4[0] = var68.x4[0]; + var69.x4[1] = var68.x4[1]; + var69.x4[2] = var68.x4[2]; + var69.x4[3] = var68.x4[3]; + /* 28: andl */ + var70.i = var69.i & var42.i; + /* 29: convwb */ + var71.x4[0] = var67.x4[0]; + var71.x4[1] = var67.x4[1]; + var71.x4[2] = var67.x4[2]; + var71.x4[3] = var67.x4[3]; + /* 31: andl */ + var72.i = var71.i & var43.i; + /* 32: orl */ + var73.i = var70.i | var72.i; + /* 33: storel */ + ptr0[i] = var73; + } + } + +} + +#else +static void +_backup_orc_overlay_argb (OrcExecutor * ORC_RESTRICT ex) +{ + int i; + int j; + int n = ex->n; + int m = ex->params[ORC_VAR_A1]; + orc_union32 *ORC_RESTRICT ptr0; + const orc_union32 *ORC_RESTRICT ptr4; + orc_union64 var41; + orc_union32 var42; + orc_union32 var43; + orc_union32 var44; + orc_union16 var45; + orc_int8 var46; + orc_union32 var47; + orc_union64 var48; + orc_union64 var49; + orc_union64 var50; + orc_union64 var51; + orc_union64 var52; + orc_union32 var53; + orc_union64 var54; + orc_union64 var55; + orc_union32 var56; + orc_union16 var57; + orc_int8 var58; + orc_union16 var59; + orc_union32 var60; + orc_union64 var61; + orc_union64 var62; + orc_union64 var63; + orc_union64 var64; + orc_union64 var65; + orc_union64 var66; + orc_union64 var67; + orc_union64 var68; + orc_union32 var69; + orc_union32 var70; + orc_union32 var71; + orc_union32 var72; + orc_union32 var73; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j); + ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j); + + /* 5: loadpw */ + var41.x4[0] = ex->params[24]; + var41.x4[1] = ex->params[24]; + var41.x4[2] = ex->params[24]; + var41.x4[3] = ex->params[24]; + /* 10: loadpl */ + var53.i = (int) 0xffffffff; /* -1 or 2.122e-314f */ + /* 27: loadpl */ + var42.i = (int) 0xffffff00; /* -256 or 2.122e-314f */ + /* 30: loadpl */ + var43.i = (int) 0x000000ff; /* 255 or 1.25987e-321f */ + + for (i = 0; i < n; i++) { + /* 0: loadl */ + var44 = ptr4[i]; + /* 1: convlw */ + var45.i = var44.i; + /* 2: convwb */ + var46 = var45.i; + /* 3: splatbl */ + var47.i = + ((var46 & 0xff) << 24) | ((var46 & 0xff) << 16) | ((var46 & 0xff) << + 8) | (var46 & 0xff); + /* 4: convubw */ + var48.x4[0] = (orc_uint8) var47.x4[0]; + var48.x4[1] = (orc_uint8) var47.x4[1]; + var48.x4[2] = (orc_uint8) var47.x4[2]; + var48.x4[3] = (orc_uint8) var47.x4[3]; + /* 6: mullw */ + var49.x4[0] = (var48.x4[0] * var41.x4[0]) & 0xffff; + var49.x4[1] = (var48.x4[1] * var41.x4[1]) & 0xffff; + var49.x4[2] = (var48.x4[2] * var41.x4[2]) & 0xffff; + var49.x4[3] = (var48.x4[3] * var41.x4[3]) & 0xffff; + /* 7: shruw */ + var50.x4[0] = ((orc_uint16) var49.x4[0]) >> 8; + var50.x4[1] = ((orc_uint16) var49.x4[1]) >> 8; + var50.x4[2] = ((orc_uint16) var49.x4[2]) >> 8; + var50.x4[3] = ((orc_uint16) var49.x4[3]) >> 8; + /* 8: convubw */ + var51.x4[0] = (orc_uint8) var44.x4[0]; + var51.x4[1] = (orc_uint8) var44.x4[1]; + var51.x4[2] = (orc_uint8) var44.x4[2]; + var51.x4[3] = (orc_uint8) var44.x4[3]; + /* 9: mullw */ + var52.x4[0] = (var51.x4[0] * var50.x4[0]) & 0xffff; + var52.x4[1] = (var51.x4[1] * var50.x4[1]) & 0xffff; + var52.x4[2] = (var51.x4[2] * var50.x4[2]) & 0xffff; + var52.x4[3] = (var51.x4[3] * var50.x4[3]) & 0xffff; + /* 11: convubw */ + var54.x4[0] = (orc_uint8) var53.x4[0]; + var54.x4[1] = (orc_uint8) var53.x4[1]; + var54.x4[2] = (orc_uint8) var53.x4[2]; + var54.x4[3] = (orc_uint8) var53.x4[3]; + /* 12: subw */ + var55.x4[0] = var54.x4[0] - var50.x4[0]; + var55.x4[1] = var54.x4[1] - var50.x4[1]; + var55.x4[2] = var54.x4[2] - var50.x4[2]; + var55.x4[3] = var54.x4[3] - var50.x4[3]; + /* 13: loadl */ + var56 = ptr0[i]; + /* 14: convlw */ + var57.i = var56.i; + /* 15: convwb */ + var58 = var57.i; + /* 16: convubw */ + var59.i = (orc_uint8) var58; + /* 17: splatbl */ + var60.i = + ((var58 & 0xff) << 24) | ((var58 & 0xff) << 16) | ((var58 & 0xff) << + 8) | (var58 & 0xff); + /* 18: convubw */ + var61.x4[0] = (orc_uint8) var60.x4[0]; + var61.x4[1] = (orc_uint8) var60.x4[1]; + var61.x4[2] = (orc_uint8) var60.x4[2]; + var61.x4[3] = (orc_uint8) var60.x4[3]; + /* 19: mullw */ + var62.x4[0] = (var61.x4[0] * var55.x4[0]) & 0xffff; + var62.x4[1] = (var61.x4[1] * var55.x4[1]) & 0xffff; + var62.x4[2] = (var61.x4[2] * var55.x4[2]) & 0xffff; + var62.x4[3] = (var61.x4[3] * var55.x4[3]) & 0xffff; + /* 20: div255w */ + var63.x4[0] = + ((orc_uint16) (((orc_uint16) (var62.x4[0] + 128)) + + (((orc_uint16) (var62.x4[0] + 128)) >> 8))) >> 8; + var63.x4[1] = + ((orc_uint16) (((orc_uint16) (var62.x4[1] + 128)) + + (((orc_uint16) (var62.x4[1] + 128)) >> 8))) >> 8; + var63.x4[2] = + ((orc_uint16) (((orc_uint16) (var62.x4[2] + 128)) + + (((orc_uint16) (var62.x4[2] + 128)) >> 8))) >> 8; + var63.x4[3] = + ((orc_uint16) (((orc_uint16) (var62.x4[3] + 128)) + + (((orc_uint16) (var62.x4[3] + 128)) >> 8))) >> 8; + /* 21: convubw */ + var64.x4[0] = (orc_uint8) var56.x4[0]; + var64.x4[1] = (orc_uint8) var56.x4[1]; + var64.x4[2] = (orc_uint8) var56.x4[2]; + var64.x4[3] = (orc_uint8) var56.x4[3]; + /* 22: mullw */ + var65.x4[0] = (var64.x4[0] * var63.x4[0]) & 0xffff; + var65.x4[1] = (var64.x4[1] * var63.x4[1]) & 0xffff; + var65.x4[2] = (var64.x4[2] * var63.x4[2]) & 0xffff; + var65.x4[3] = (var64.x4[3] * var63.x4[3]) & 0xffff; + /* 23: addw */ + var66.x4[0] = var65.x4[0] + var52.x4[0]; + var66.x4[1] = var65.x4[1] + var52.x4[1]; + var66.x4[2] = var65.x4[2] + var52.x4[2]; + var66.x4[3] = var65.x4[3] + var52.x4[3]; + /* 24: addw */ + var67.x4[0] = var63.x4[0] + var50.x4[0]; + var67.x4[1] = var63.x4[1] + var50.x4[1]; + var67.x4[2] = var63.x4[2] + var50.x4[2]; + var67.x4[3] = var63.x4[3] + var50.x4[3]; + /* 25: divluw */ + var68.x4[0] = + ((var67.x4[0] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[0]) / + ((orc_uint16) var67.x4[0] & 0xff)); + var68.x4[1] = + ((var67.x4[1] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[1]) / + ((orc_uint16) var67.x4[1] & 0xff)); + var68.x4[2] = + ((var67.x4[2] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[2]) / + ((orc_uint16) var67.x4[2] & 0xff)); + var68.x4[3] = + ((var67.x4[3] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var66.x4[3]) / + ((orc_uint16) var67.x4[3] & 0xff)); + /* 26: convwb */ + var69.x4[0] = var68.x4[0]; + var69.x4[1] = var68.x4[1]; + var69.x4[2] = var68.x4[2]; + var69.x4[3] = var68.x4[3]; + /* 28: andl */ + var70.i = var69.i & var42.i; + /* 29: convwb */ + var71.x4[0] = var67.x4[0]; + var71.x4[1] = var67.x4[1]; + var71.x4[2] = var67.x4[2]; + var71.x4[3] = var67.x4[3]; + /* 31: andl */ + var72.i = var71.i & var43.i; + /* 32: orl */ + var73.i = var70.i | var72.i; + /* 33: storel */ + ptr0[i] = var73; + } + } + +} + +void +orc_overlay_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, + int p1, int n, int m) +{ + OrcExecutor _ex, *ex = &_ex; + static int p_inited = 0; + static OrcProgram *p = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcCompileResult result; + + p = orc_program_new (); + orc_program_set_2d (p); + orc_program_set_name (p, "orc_overlay_argb"); + orc_program_set_backup_function (p, _backup_orc_overlay_argb); + orc_program_add_destination (p, 4, "d1"); + orc_program_add_source (p, 4, "s1"); + orc_program_add_constant (p, 4, 0xffffffff, "c1"); + orc_program_add_constant (p, 4, 0x000000ff, "c2"); + orc_program_add_constant (p, 4, 0xffffff00, "c3"); + orc_program_add_constant (p, 4, 0x00000008, "c4"); + orc_program_add_parameter (p, 2, "p1"); + orc_program_add_temporary (p, 4, "t1"); + orc_program_add_temporary (p, 2, "t2"); + orc_program_add_temporary (p, 1, "t3"); + orc_program_add_temporary (p, 8, "t4"); + orc_program_add_temporary (p, 8, "t5"); + orc_program_add_temporary (p, 8, "t6"); + orc_program_add_temporary (p, 4, "t7"); + orc_program_add_temporary (p, 8, "t8"); + orc_program_add_temporary (p, 8, "t9"); + + orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T7, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 2, ORC_VAR_T4, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "mullw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_P1, + ORC_VAR_D1); + orc_program_append_2 (p, "shruw", 2, ORC_VAR_T4, ORC_VAR_T4, ORC_VAR_C4, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 2, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "mullw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T4, + ORC_VAR_D1); + orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T7, ORC_VAR_C1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "subw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_T4, + ORC_VAR_D1); + orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convlw", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convwb", 0, ORC_VAR_T3, ORC_VAR_T2, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 0, ORC_VAR_T2, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T7, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "mullw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5, + ORC_VAR_D1); + orc_program_append_2 (p, "div255w", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 2, ORC_VAR_T8, ORC_VAR_T1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "mullw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T6, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T9, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T4, + ORC_VAR_D1); + orc_program_append_2 (p, "divluw", 2, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_T6, + ORC_VAR_D1); + orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T8, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3, + ORC_VAR_D1); + orc_program_append_2 (p, "convwb", 2, ORC_VAR_T7, ORC_VAR_T6, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "andl", 0, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_C2, + ORC_VAR_D1); + orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T7, + ORC_VAR_D1); + orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, + ORC_VAR_D1); + + result = orc_program_compile (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->program = p; + + ex->n = n; + ORC_EXECUTOR_M (ex) = m; + ex->arrays[ORC_VAR_D1] = d1; + ex->params[ORC_VAR_D1] = d1_stride; + ex->arrays[ORC_VAR_S1] = (void *) s1; + ex->params[ORC_VAR_S1] = s1_stride; + ex->params[ORC_VAR_P1] = p1; + + func = p->code_exec; + func (ex); +} +#endif + + +/* orc_overlay_bgra */ +#ifdef DISABLE_ORC +void +orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, + int p1, int n, int m) +{ + int i; + int j; + orc_union32 *ORC_RESTRICT ptr0; + const orc_union32 *ORC_RESTRICT ptr4; + orc_union64 var42; + orc_union32 var43; + orc_union32 var44; + orc_union32 var45; + orc_union32 var46; + orc_union16 var47; + orc_int8 var48; + orc_union32 var49; + orc_union64 var50; + orc_union64 var51; + orc_union64 var52; + orc_union64 var53; + orc_union64 var54; + orc_union32 var55; + orc_union64 var56; + orc_union64 var57; + orc_union32 var58; + orc_union32 var59; + orc_union16 var60; + orc_int8 var61; + orc_union16 var62; + orc_union32 var63; + orc_union64 var64; + orc_union64 var65; + orc_union64 var66; + orc_union64 var67; + orc_union64 var68; + orc_union64 var69; + orc_union64 var70; + orc_union64 var71; + orc_union32 var72; + orc_union32 var73; + orc_union32 var74; + orc_union32 var75; + orc_union32 var76; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j); + ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j); + + /* 6: loadpw */ + var42.x4[0] = p1; + var42.x4[1] = p1; + var42.x4[2] = p1; + var42.x4[3] = p1; + /* 11: loadpl */ + var55.i = (int) 0xffffffff; /* -1 or 2.122e-314f */ + /* 29: loadpl */ + var43.i = (int) 0x00ffffff; /* 16777215 or 8.28905e-317f */ + /* 32: loadpl */ + var44.i = (int) 0xff000000; /* -16777216 or 2.11371e-314f */ + + for (i = 0; i < n; i++) { + /* 0: loadl */ + var45 = ptr4[i]; + /* 1: shrul */ + var46.i = ((orc_uint32) var45.i) >> 24; + /* 2: convlw */ + var47.i = var45.i; + /* 3: convwb */ + var48 = var47.i; + /* 4: splatbl */ + var49.i = + ((var48 & 0xff) << 24) | ((var48 & 0xff) << 16) | ((var48 & 0xff) << + 8) | (var48 & 0xff); + /* 5: convubw */ + var50.x4[0] = (orc_uint8) var49.x4[0]; + var50.x4[1] = (orc_uint8) var49.x4[1]; + var50.x4[2] = (orc_uint8) var49.x4[2]; + var50.x4[3] = (orc_uint8) var49.x4[3]; + /* 7: mullw */ + var51.x4[0] = (var50.x4[0] * var42.x4[0]) & 0xffff; + var51.x4[1] = (var50.x4[1] * var42.x4[1]) & 0xffff; + var51.x4[2] = (var50.x4[2] * var42.x4[2]) & 0xffff; + var51.x4[3] = (var50.x4[3] * var42.x4[3]) & 0xffff; + /* 8: shruw */ + var52.x4[0] = ((orc_uint16) var51.x4[0]) >> 8; + var52.x4[1] = ((orc_uint16) var51.x4[1]) >> 8; + var52.x4[2] = ((orc_uint16) var51.x4[2]) >> 8; + var52.x4[3] = ((orc_uint16) var51.x4[3]) >> 8; + /* 9: convubw */ + var53.x4[0] = (orc_uint8) var45.x4[0]; + var53.x4[1] = (orc_uint8) var45.x4[1]; + var53.x4[2] = (orc_uint8) var45.x4[2]; + var53.x4[3] = (orc_uint8) var45.x4[3]; + /* 10: mullw */ + var54.x4[0] = (var53.x4[0] * var52.x4[0]) & 0xffff; + var54.x4[1] = (var53.x4[1] * var52.x4[1]) & 0xffff; + var54.x4[2] = (var53.x4[2] * var52.x4[2]) & 0xffff; + var54.x4[3] = (var53.x4[3] * var52.x4[3]) & 0xffff; + /* 12: convubw */ + var56.x4[0] = (orc_uint8) var55.x4[0]; + var56.x4[1] = (orc_uint8) var55.x4[1]; + var56.x4[2] = (orc_uint8) var55.x4[2]; + var56.x4[3] = (orc_uint8) var55.x4[3]; + /* 13: subw */ + var57.x4[0] = var56.x4[0] - var52.x4[0]; + var57.x4[1] = var56.x4[1] - var52.x4[1]; + var57.x4[2] = var56.x4[2] - var52.x4[2]; + var57.x4[3] = var56.x4[3] - var52.x4[3]; + /* 14: loadl */ + var58 = ptr0[i]; + /* 15: shrul */ + var59.i = ((orc_uint32) var58.i) >> 24; + /* 16: convlw */ + var60.i = var58.i; + /* 17: convwb */ + var61 = var60.i; + /* 18: convubw */ + var62.i = (orc_uint8) var61; + /* 19: splatbl */ + var63.i = + ((var61 & 0xff) << 24) | ((var61 & 0xff) << 16) | ((var61 & 0xff) << + 8) | (var61 & 0xff); + /* 20: convubw */ + var64.x4[0] = (orc_uint8) var63.x4[0]; + var64.x4[1] = (orc_uint8) var63.x4[1]; + var64.x4[2] = (orc_uint8) var63.x4[2]; + var64.x4[3] = (orc_uint8) var63.x4[3]; + /* 21: mullw */ + var65.x4[0] = (var64.x4[0] * var57.x4[0]) & 0xffff; + var65.x4[1] = (var64.x4[1] * var57.x4[1]) & 0xffff; + var65.x4[2] = (var64.x4[2] * var57.x4[2]) & 0xffff; + var65.x4[3] = (var64.x4[3] * var57.x4[3]) & 0xffff; + /* 22: div255w */ + var66.x4[0] = + ((orc_uint16) (((orc_uint16) (var65.x4[0] + 128)) + + (((orc_uint16) (var65.x4[0] + 128)) >> 8))) >> 8; + var66.x4[1] = + ((orc_uint16) (((orc_uint16) (var65.x4[1] + 128)) + + (((orc_uint16) (var65.x4[1] + 128)) >> 8))) >> 8; + var66.x4[2] = + ((orc_uint16) (((orc_uint16) (var65.x4[2] + 128)) + + (((orc_uint16) (var65.x4[2] + 128)) >> 8))) >> 8; + var66.x4[3] = + ((orc_uint16) (((orc_uint16) (var65.x4[3] + 128)) + + (((orc_uint16) (var65.x4[3] + 128)) >> 8))) >> 8; + /* 23: convubw */ + var67.x4[0] = (orc_uint8) var58.x4[0]; + var67.x4[1] = (orc_uint8) var58.x4[1]; + var67.x4[2] = (orc_uint8) var58.x4[2]; + var67.x4[3] = (orc_uint8) var58.x4[3]; + /* 24: mullw */ + var68.x4[0] = (var67.x4[0] * var66.x4[0]) & 0xffff; + var68.x4[1] = (var67.x4[1] * var66.x4[1]) & 0xffff; + var68.x4[2] = (var67.x4[2] * var66.x4[2]) & 0xffff; + var68.x4[3] = (var67.x4[3] * var66.x4[3]) & 0xffff; + /* 25: addw */ + var69.x4[0] = var68.x4[0] + var54.x4[0]; + var69.x4[1] = var68.x4[1] + var54.x4[1]; + var69.x4[2] = var68.x4[2] + var54.x4[2]; + var69.x4[3] = var68.x4[3] + var54.x4[3]; + /* 26: addw */ + var70.x4[0] = var66.x4[0] + var52.x4[0]; + var70.x4[1] = var66.x4[1] + var52.x4[1]; + var70.x4[2] = var66.x4[2] + var52.x4[2]; + var70.x4[3] = var66.x4[3] + var52.x4[3]; + /* 27: divluw */ + var71.x4[0] = + ((var70.x4[0] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[0]) / + ((orc_uint16) var70.x4[0] & 0xff)); + var71.x4[1] = + ((var70.x4[1] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[1]) / + ((orc_uint16) var70.x4[1] & 0xff)); + var71.x4[2] = + ((var70.x4[2] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[2]) / + ((orc_uint16) var70.x4[2] & 0xff)); + var71.x4[3] = + ((var70.x4[3] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[3]) / + ((orc_uint16) var70.x4[3] & 0xff)); + /* 28: convwb */ + var72.x4[0] = var71.x4[0]; + var72.x4[1] = var71.x4[1]; + var72.x4[2] = var71.x4[2]; + var72.x4[3] = var71.x4[3]; + /* 30: andl */ + var73.i = var72.i & var43.i; + /* 31: convwb */ + var74.x4[0] = var70.x4[0]; + var74.x4[1] = var70.x4[1]; + var74.x4[2] = var70.x4[2]; + var74.x4[3] = var70.x4[3]; + /* 33: andl */ + var75.i = var74.i & var44.i; + /* 34: orl */ + var76.i = var73.i | var75.i; + /* 35: storel */ + ptr0[i] = var76; + } + } + +} + +#else +static void +_backup_orc_overlay_bgra (OrcExecutor * ORC_RESTRICT ex) +{ + int i; + int j; + int n = ex->n; + int m = ex->params[ORC_VAR_A1]; + orc_union32 *ORC_RESTRICT ptr0; + const orc_union32 *ORC_RESTRICT ptr4; + orc_union64 var42; + orc_union32 var43; + orc_union32 var44; + orc_union32 var45; + orc_union32 var46; + orc_union16 var47; + orc_int8 var48; + orc_union32 var49; + orc_union64 var50; + orc_union64 var51; + orc_union64 var52; + orc_union64 var53; + orc_union64 var54; + orc_union32 var55; + orc_union64 var56; + orc_union64 var57; + orc_union32 var58; + orc_union32 var59; + orc_union16 var60; + orc_int8 var61; + orc_union16 var62; + orc_union32 var63; + orc_union64 var64; + orc_union64 var65; + orc_union64 var66; + orc_union64 var67; + orc_union64 var68; + orc_union64 var69; + orc_union64 var70; + orc_union64 var71; + orc_union32 var72; + orc_union32 var73; + orc_union32 var74; + orc_union32 var75; + orc_union32 var76; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j); + ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j); + + /* 6: loadpw */ + var42.x4[0] = ex->params[24]; + var42.x4[1] = ex->params[24]; + var42.x4[2] = ex->params[24]; + var42.x4[3] = ex->params[24]; + /* 11: loadpl */ + var55.i = (int) 0xffffffff; /* -1 or 2.122e-314f */ + /* 29: loadpl */ + var43.i = (int) 0x00ffffff; /* 16777215 or 8.28905e-317f */ + /* 32: loadpl */ + var44.i = (int) 0xff000000; /* -16777216 or 2.11371e-314f */ + + for (i = 0; i < n; i++) { + /* 0: loadl */ + var45 = ptr4[i]; + /* 1: shrul */ + var46.i = ((orc_uint32) var45.i) >> 24; + /* 2: convlw */ + var47.i = var45.i; + /* 3: convwb */ + var48 = var47.i; + /* 4: splatbl */ + var49.i = + ((var48 & 0xff) << 24) | ((var48 & 0xff) << 16) | ((var48 & 0xff) << + 8) | (var48 & 0xff); + /* 5: convubw */ + var50.x4[0] = (orc_uint8) var49.x4[0]; + var50.x4[1] = (orc_uint8) var49.x4[1]; + var50.x4[2] = (orc_uint8) var49.x4[2]; + var50.x4[3] = (orc_uint8) var49.x4[3]; + /* 7: mullw */ + var51.x4[0] = (var50.x4[0] * var42.x4[0]) & 0xffff; + var51.x4[1] = (var50.x4[1] * var42.x4[1]) & 0xffff; + var51.x4[2] = (var50.x4[2] * var42.x4[2]) & 0xffff; + var51.x4[3] = (var50.x4[3] * var42.x4[3]) & 0xffff; + /* 8: shruw */ + var52.x4[0] = ((orc_uint16) var51.x4[0]) >> 8; + var52.x4[1] = ((orc_uint16) var51.x4[1]) >> 8; + var52.x4[2] = ((orc_uint16) var51.x4[2]) >> 8; + var52.x4[3] = ((orc_uint16) var51.x4[3]) >> 8; + /* 9: convubw */ + var53.x4[0] = (orc_uint8) var45.x4[0]; + var53.x4[1] = (orc_uint8) var45.x4[1]; + var53.x4[2] = (orc_uint8) var45.x4[2]; + var53.x4[3] = (orc_uint8) var45.x4[3]; + /* 10: mullw */ + var54.x4[0] = (var53.x4[0] * var52.x4[0]) & 0xffff; + var54.x4[1] = (var53.x4[1] * var52.x4[1]) & 0xffff; + var54.x4[2] = (var53.x4[2] * var52.x4[2]) & 0xffff; + var54.x4[3] = (var53.x4[3] * var52.x4[3]) & 0xffff; + /* 12: convubw */ + var56.x4[0] = (orc_uint8) var55.x4[0]; + var56.x4[1] = (orc_uint8) var55.x4[1]; + var56.x4[2] = (orc_uint8) var55.x4[2]; + var56.x4[3] = (orc_uint8) var55.x4[3]; + /* 13: subw */ + var57.x4[0] = var56.x4[0] - var52.x4[0]; + var57.x4[1] = var56.x4[1] - var52.x4[1]; + var57.x4[2] = var56.x4[2] - var52.x4[2]; + var57.x4[3] = var56.x4[3] - var52.x4[3]; + /* 14: loadl */ + var58 = ptr0[i]; + /* 15: shrul */ + var59.i = ((orc_uint32) var58.i) >> 24; + /* 16: convlw */ + var60.i = var58.i; + /* 17: convwb */ + var61 = var60.i; + /* 18: convubw */ + var62.i = (orc_uint8) var61; + /* 19: splatbl */ + var63.i = + ((var61 & 0xff) << 24) | ((var61 & 0xff) << 16) | ((var61 & 0xff) << + 8) | (var61 & 0xff); + /* 20: convubw */ + var64.x4[0] = (orc_uint8) var63.x4[0]; + var64.x4[1] = (orc_uint8) var63.x4[1]; + var64.x4[2] = (orc_uint8) var63.x4[2]; + var64.x4[3] = (orc_uint8) var63.x4[3]; + /* 21: mullw */ + var65.x4[0] = (var64.x4[0] * var57.x4[0]) & 0xffff; + var65.x4[1] = (var64.x4[1] * var57.x4[1]) & 0xffff; + var65.x4[2] = (var64.x4[2] * var57.x4[2]) & 0xffff; + var65.x4[3] = (var64.x4[3] * var57.x4[3]) & 0xffff; + /* 22: div255w */ + var66.x4[0] = + ((orc_uint16) (((orc_uint16) (var65.x4[0] + 128)) + + (((orc_uint16) (var65.x4[0] + 128)) >> 8))) >> 8; + var66.x4[1] = + ((orc_uint16) (((orc_uint16) (var65.x4[1] + 128)) + + (((orc_uint16) (var65.x4[1] + 128)) >> 8))) >> 8; + var66.x4[2] = + ((orc_uint16) (((orc_uint16) (var65.x4[2] + 128)) + + (((orc_uint16) (var65.x4[2] + 128)) >> 8))) >> 8; + var66.x4[3] = + ((orc_uint16) (((orc_uint16) (var65.x4[3] + 128)) + + (((orc_uint16) (var65.x4[3] + 128)) >> 8))) >> 8; + /* 23: convubw */ + var67.x4[0] = (orc_uint8) var58.x4[0]; + var67.x4[1] = (orc_uint8) var58.x4[1]; + var67.x4[2] = (orc_uint8) var58.x4[2]; + var67.x4[3] = (orc_uint8) var58.x4[3]; + /* 24: mullw */ + var68.x4[0] = (var67.x4[0] * var66.x4[0]) & 0xffff; + var68.x4[1] = (var67.x4[1] * var66.x4[1]) & 0xffff; + var68.x4[2] = (var67.x4[2] * var66.x4[2]) & 0xffff; + var68.x4[3] = (var67.x4[3] * var66.x4[3]) & 0xffff; + /* 25: addw */ + var69.x4[0] = var68.x4[0] + var54.x4[0]; + var69.x4[1] = var68.x4[1] + var54.x4[1]; + var69.x4[2] = var68.x4[2] + var54.x4[2]; + var69.x4[3] = var68.x4[3] + var54.x4[3]; + /* 26: addw */ + var70.x4[0] = var66.x4[0] + var52.x4[0]; + var70.x4[1] = var66.x4[1] + var52.x4[1]; + var70.x4[2] = var66.x4[2] + var52.x4[2]; + var70.x4[3] = var66.x4[3] + var52.x4[3]; + /* 27: divluw */ + var71.x4[0] = + ((var70.x4[0] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[0]) / + ((orc_uint16) var70.x4[0] & 0xff)); + var71.x4[1] = + ((var70.x4[1] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[1]) / + ((orc_uint16) var70.x4[1] & 0xff)); + var71.x4[2] = + ((var70.x4[2] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[2]) / + ((orc_uint16) var70.x4[2] & 0xff)); + var71.x4[3] = + ((var70.x4[3] & 0xff) == + 0) ? 255 : ORC_CLAMP_UB (((orc_uint16) var69.x4[3]) / + ((orc_uint16) var70.x4[3] & 0xff)); + /* 28: convwb */ + var72.x4[0] = var71.x4[0]; + var72.x4[1] = var71.x4[1]; + var72.x4[2] = var71.x4[2]; + var72.x4[3] = var71.x4[3]; + /* 30: andl */ + var73.i = var72.i & var43.i; + /* 31: convwb */ + var74.x4[0] = var70.x4[0]; + var74.x4[1] = var70.x4[1]; + var74.x4[2] = var70.x4[2]; + var74.x4[3] = var70.x4[3]; + /* 33: andl */ + var75.i = var74.i & var44.i; + /* 34: orl */ + var76.i = var73.i | var75.i; + /* 35: storel */ + ptr0[i] = var76; + } + } + +} + +void +orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, + int p1, int n, int m) +{ + OrcExecutor _ex, *ex = &_ex; + static int p_inited = 0; + static OrcProgram *p = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcCompileResult result; + + p = orc_program_new (); + orc_program_set_2d (p); + orc_program_set_name (p, "orc_overlay_bgra"); + orc_program_set_backup_function (p, _backup_orc_overlay_bgra); + orc_program_add_destination (p, 4, "d1"); + orc_program_add_source (p, 4, "s1"); + orc_program_add_constant (p, 4, 0xffffffff, "c1"); + orc_program_add_constant (p, 4, 0xff000000, "c2"); + orc_program_add_constant (p, 4, 0x00ffffff, "c3"); + orc_program_add_constant (p, 4, 0x00000018, "c4"); + orc_program_add_constant (p, 4, 0x00000008, "c5"); + orc_program_add_parameter (p, 2, "p1"); + orc_program_add_temporary (p, 4, "t1"); + orc_program_add_temporary (p, 4, "t2"); + orc_program_add_temporary (p, 2, "t3"); + orc_program_add_temporary (p, 1, "t4"); + orc_program_add_temporary (p, 8, "t5"); + orc_program_add_temporary (p, 8, "t6"); + orc_program_add_temporary (p, 8, "t7"); + orc_program_add_temporary (p, 4, "t8"); + orc_program_add_temporary (p, 8, "t9"); + orc_program_add_temporary (p, 8, "t10"); + + orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4, + ORC_VAR_D1); + orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 2, ORC_VAR_T5, ORC_VAR_T8, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "mullw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_P1, + ORC_VAR_D1); + orc_program_append_2 (p, "shruw", 2, ORC_VAR_T5, ORC_VAR_T5, ORC_VAR_C5, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 2, ORC_VAR_T10, ORC_VAR_T1, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "mullw", 2, ORC_VAR_T10, ORC_VAR_T10, ORC_VAR_T5, + ORC_VAR_D1); + orc_program_append_2 (p, "loadpl", 0, ORC_VAR_T8, ORC_VAR_C1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 2, ORC_VAR_T6, ORC_VAR_T8, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "subw", 2, ORC_VAR_T6, ORC_VAR_T6, ORC_VAR_T5, + ORC_VAR_D1); + orc_program_append_2 (p, "loadl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_C4, + ORC_VAR_D1); + orc_program_append_2 (p, "convlw", 0, ORC_VAR_T3, ORC_VAR_T1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convwb", 0, ORC_VAR_T4, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 0, ORC_VAR_T3, ORC_VAR_T4, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "splatbl", 0, ORC_VAR_T8, ORC_VAR_T4, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 2, ORC_VAR_T7, ORC_VAR_T8, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "mullw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T6, + ORC_VAR_D1); + orc_program_append_2 (p, "div255w", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convubw", 2, ORC_VAR_T9, ORC_VAR_T1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "mullw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T7, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T10, + ORC_VAR_D1); + orc_program_append_2 (p, "addw", 2, ORC_VAR_T7, ORC_VAR_T7, ORC_VAR_T5, + ORC_VAR_D1); + orc_program_append_2 (p, "divluw", 2, ORC_VAR_T9, ORC_VAR_T9, ORC_VAR_T7, + ORC_VAR_D1); + orc_program_append_2 (p, "convwb", 2, ORC_VAR_T1, ORC_VAR_T9, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "andl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C3, + ORC_VAR_D1); + orc_program_append_2 (p, "convwb", 2, ORC_VAR_T8, ORC_VAR_T7, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "andl", 0, ORC_VAR_T8, ORC_VAR_T8, ORC_VAR_C2, + ORC_VAR_D1); + orc_program_append_2 (p, "orl", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T8, + ORC_VAR_D1); + orc_program_append_2 (p, "storel", 0, ORC_VAR_D1, ORC_VAR_T1, ORC_VAR_D1, + ORC_VAR_D1); + + result = orc_program_compile (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->program = p; + + ex->n = n; + ORC_EXECUTOR_M (ex) = m; + ex->arrays[ORC_VAR_D1] = d1; + ex->params[ORC_VAR_D1] = d1_stride; + ex->arrays[ORC_VAR_S1] = (void *) s1; + ex->params[ORC_VAR_S1] = s1_stride; + ex->params[ORC_VAR_P1] = p1; + + func = p->code_exec; + func (ex); +} +#endif diff --git a/gst/videomixer/blendorc-dist.h b/gst/videomixer/blendorc-dist.h index 8a046ef21d..abc9aefd1c 100644 --- a/gst/videomixer/blendorc-dist.h +++ b/gst/videomixer/blendorc-dist.h @@ -35,6 +35,7 @@ typedef unsigned __int16 orc_uint16; typedef unsigned __int32 orc_uint32; typedef unsigned __int64 orc_uint64; #define ORC_UINT64_C(x) (x##Ui64) +#define inline __inline #else #include typedef signed char orc_int8; @@ -62,6 +63,8 @@ void orc_memcpy_u32 (guint32 * d1, const guint32 * s1, int n); void orc_blend_u8 (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, int p1, int n, int m); void orc_blend_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, int p1, int n, int m); void orc_blend_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, int p1, int n, int m); +void orc_overlay_argb (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, int p1, int n, int m); +void orc_overlay_bgra (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, int p1, int n, int m); #ifdef __cplusplus } diff --git a/gst/videomixer/blendorc.orc b/gst/videomixer/blendorc.orc index bb4601c166..a28451325d 100644 --- a/gst/videomixer/blendorc.orc +++ b/gst/videomixer/blendorc.orc @@ -95,3 +95,128 @@ x4 convwb t, d_wide orl t, t, a_alpha storel d, t + +.function orc_overlay_argb +.flags 2d +.dest 4 d guint8 +.source 4 s guint8 +.param 2 alpha +.temp 4 t +.temp 2 tw +.temp 1 tb +.temp 8 alpha_s +.temp 8 alpha_s_inv +.temp 8 alpha_d +.temp 4 a +.temp 8 d_wide +.temp 8 s_wide +.const 4 xfs 0xffffffff +.const 4 a_alpha 0x000000ff +.const 4 a_alpha_inv 0xffffff00 + +# calc source alpha as alpha_s = alpha_s * alpha / 256 +loadl t, s +convlw tw, t +convwb tb, tw +splatbl a, tb +x4 convubw alpha_s, a +x4 mullw alpha_s, alpha_s, alpha +x4 shruw alpha_s, alpha_s, 8 +x4 convubw s_wide, t +x4 mullw s_wide, s_wide, alpha_s + +# calc destination alpha as alpha_d = (255-alpha_s) * alpha_d / 255 +loadpl a, xfs +x4 convubw alpha_s_inv, a +x4 subw alpha_s_inv, alpha_s_inv, alpha_s +loadl t, d +convlw tw, t +convwb tb, tw +convubw tw, tb +splatbl a, tb +x4 convubw alpha_d, a +x4 mullw alpha_d, alpha_d, alpha_s_inv +x4 div255w alpha_d, alpha_d +x4 convubw d_wide, t +x4 mullw d_wide, d_wide, alpha_d + +# calc final pixel as pix_d = pix_s*alpha_s + pix_d*alpha_d*(255-alpha_s)/255 +x4 addw d_wide, d_wide, s_wide + +# calc the final destination alpha_d = alpha_s + alpha_d * (255-alpha_s)/255 +x4 addw alpha_d, alpha_d, alpha_s + +# now normalize the pix_d by the final alpha to make it associative +x4 divluw, d_wide, d_wide, alpha_d + +# pack the new alpha into the correct spot +x4 convwb t, d_wide +andl t, t, a_alpha_inv +x4 convwb a, alpha_d +andl a, a, a_alpha +orl t, t, a +storel d, t + +.function orc_overlay_bgra +.flags 2d +.dest 4 d guint8 +.source 4 s guint8 +.param 2 alpha +.temp 4 t +.temp 4 t2 +.temp 2 tw +.temp 1 tb +.temp 8 alpha_s +.temp 8 alpha_s_inv +.temp 8 alpha_d +.temp 4 a +.temp 8 d_wide +.temp 8 s_wide +.const 4 xfs 0xffffffff +.const 4 a_alpha 0xff000000 +.const 4 a_alpha_inv 0x00ffffff + +# calc source alpha as alpha_s = alpha_s * alpha / 256 +loadl t, s +shrul t2, t, 24 +convlw tw, t +convwb tb, tw +splatbl a, tb +x4 convubw alpha_s, a +x4 mullw alpha_s, alpha_s, alpha +x4 shruw alpha_s, alpha_s, 8 +x4 convubw s_wide, t +x4 mullw s_wide, s_wide, alpha_s + +# calc destination alpha as alpha_d = (255-alpha_s) * alpha_d / 255 +loadpl a, xfs +x4 convubw alpha_s_inv, a +x4 subw alpha_s_inv, alpha_s_inv, alpha_s +loadl t, d +shrul t2, t, 24 +convlw tw, t +convwb tb, tw +convubw tw, tb +splatbl a, tb +x4 convubw alpha_d, a +x4 mullw alpha_d, alpha_d, alpha_s_inv +x4 div255w alpha_d, alpha_d +x4 convubw d_wide, t +x4 mullw d_wide, d_wide, alpha_d + +# calc final pixel as pix_d = pix_s*alpha_s + pix_d*alpha_d*(255-alpha_s)/255 +x4 addw d_wide, d_wide, s_wide + +# calc the final destination alpha_d = alpha_s + alpha_d * (255-alpha_s)/255 +x4 addw alpha_d, alpha_d, alpha_s + +# now normalize the pix_d by the final alpha to make it associative +x4 divluw, d_wide, d_wide, alpha_d + +# pack the new alpha into the correct spot +x4 convwb t, d_wide +andl t, t, a_alpha_inv +x4 convwb a, alpha_d +andl a, a, a_alpha +orl t, t, a +storel d, t diff --git a/gst/videomixer/videomixer.c b/gst/videomixer/videomixer.c index 1271d63216..7ab1de9cb5 100644 --- a/gst/videomixer/videomixer.c +++ b/gst/videomixer/videomixer.c @@ -89,6 +89,12 @@ #include "videomixer.h" #include "videomixer2.h" +#ifdef DISABLE_ORC +#define orc_memset memset +#else +#include +#endif + GST_DEBUG_CATEGORY_STATIC (gst_videomixer_debug); #define GST_CAT_DEFAULT gst_videomixer_debug @@ -533,6 +539,8 @@ gst_video_mixer_background_get_type (void) {VIDEO_MIXER_BACKGROUND_CHECKER, "Checker pattern", "checker"}, {VIDEO_MIXER_BACKGROUND_BLACK, "Black", "black"}, {VIDEO_MIXER_BACKGROUND_WHITE, "White", "white"}, + {VIDEO_MIXER_BACKGROUND_TRANSPARENT, + "Transparent Background to enable further mixing", "transparent"}, {0, NULL, NULL}, }; @@ -1025,6 +1033,7 @@ gst_videomixer_setcaps (GstPad * pad, GstCaps * caps) GST_INFO_OBJECT (mixer, "set src caps: %" GST_PTR_FORMAT, caps); mixer->blend = NULL; + mixer->overlay = NULL; mixer->fill_checker = NULL; mixer->fill_color = NULL; @@ -1034,114 +1043,133 @@ gst_videomixer_setcaps (GstPad * pad, GstCaps * caps) switch (mixer->fmt) { case GST_VIDEO_FORMAT_AYUV: mixer->blend = gst_video_mixer_blend_ayuv; + mixer->overlay = gst_video_mixer_overlay_ayuv; mixer->fill_checker = gst_video_mixer_fill_checker_ayuv; mixer->fill_color = gst_video_mixer_fill_color_ayuv; ret = TRUE; break; case GST_VIDEO_FORMAT_ARGB: mixer->blend = gst_video_mixer_blend_argb; + mixer->overlay = gst_video_mixer_overlay_argb; mixer->fill_checker = gst_video_mixer_fill_checker_argb; mixer->fill_color = gst_video_mixer_fill_color_argb; ret = TRUE; break; case GST_VIDEO_FORMAT_BGRA: mixer->blend = gst_video_mixer_blend_bgra; + mixer->overlay = gst_video_mixer_overlay_bgra; mixer->fill_checker = gst_video_mixer_fill_checker_bgra; mixer->fill_color = gst_video_mixer_fill_color_bgra; ret = TRUE; break; case GST_VIDEO_FORMAT_ABGR: mixer->blend = gst_video_mixer_blend_abgr; + mixer->overlay = gst_video_mixer_overlay_abgr; mixer->fill_checker = gst_video_mixer_fill_checker_abgr; mixer->fill_color = gst_video_mixer_fill_color_abgr; ret = TRUE; break; case GST_VIDEO_FORMAT_RGBA: mixer->blend = gst_video_mixer_blend_rgba; + mixer->overlay = gst_video_mixer_overlay_rgba; mixer->fill_checker = gst_video_mixer_fill_checker_rgba; mixer->fill_color = gst_video_mixer_fill_color_rgba; ret = TRUE; break; case GST_VIDEO_FORMAT_Y444: mixer->blend = gst_video_mixer_blend_y444; + mixer->overlay = mixer->blend; mixer->fill_checker = gst_video_mixer_fill_checker_y444; mixer->fill_color = gst_video_mixer_fill_color_y444; ret = TRUE; break; case GST_VIDEO_FORMAT_Y42B: mixer->blend = gst_video_mixer_blend_y42b; + mixer->overlay = mixer->blend; mixer->fill_checker = gst_video_mixer_fill_checker_y42b; mixer->fill_color = gst_video_mixer_fill_color_y42b; ret = TRUE; break; case GST_VIDEO_FORMAT_YUY2: mixer->blend = gst_video_mixer_blend_yuy2; + mixer->overlay = mixer->blend; mixer->fill_checker = gst_video_mixer_fill_checker_yuy2; mixer->fill_color = gst_video_mixer_fill_color_yuy2; ret = TRUE; break; case GST_VIDEO_FORMAT_UYVY: mixer->blend = gst_video_mixer_blend_uyvy; + mixer->overlay = mixer->blend; mixer->fill_checker = gst_video_mixer_fill_checker_uyvy; mixer->fill_color = gst_video_mixer_fill_color_uyvy; ret = TRUE; break; case GST_VIDEO_FORMAT_YVYU: mixer->blend = gst_video_mixer_blend_yvyu; + mixer->overlay = mixer->blend; mixer->fill_checker = gst_video_mixer_fill_checker_yvyu; mixer->fill_color = gst_video_mixer_fill_color_yvyu; ret = TRUE; break; case GST_VIDEO_FORMAT_I420: mixer->blend = gst_video_mixer_blend_i420; + mixer->overlay = mixer->blend; mixer->fill_checker = gst_video_mixer_fill_checker_i420; mixer->fill_color = gst_video_mixer_fill_color_i420; ret = TRUE; break; case GST_VIDEO_FORMAT_YV12: mixer->blend = gst_video_mixer_blend_yv12; + mixer->overlay = mixer->blend; mixer->fill_checker = gst_video_mixer_fill_checker_yv12; mixer->fill_color = gst_video_mixer_fill_color_yv12; ret = TRUE; break; case GST_VIDEO_FORMAT_Y41B: mixer->blend = gst_video_mixer_blend_y41b; + mixer->overlay = mixer->blend; mixer->fill_checker = gst_video_mixer_fill_checker_y41b; mixer->fill_color = gst_video_mixer_fill_color_y41b; ret = TRUE; break; case GST_VIDEO_FORMAT_RGB: mixer->blend = gst_video_mixer_blend_rgb; + mixer->overlay = mixer->blend; mixer->fill_checker = gst_video_mixer_fill_checker_rgb; mixer->fill_color = gst_video_mixer_fill_color_rgb; ret = TRUE; break; case GST_VIDEO_FORMAT_BGR: mixer->blend = gst_video_mixer_blend_bgr; + mixer->overlay = mixer->blend; mixer->fill_checker = gst_video_mixer_fill_checker_bgr; mixer->fill_color = gst_video_mixer_fill_color_bgr; ret = TRUE; break; case GST_VIDEO_FORMAT_xRGB: mixer->blend = gst_video_mixer_blend_xrgb; + mixer->overlay = mixer->blend; mixer->fill_checker = gst_video_mixer_fill_checker_xrgb; mixer->fill_color = gst_video_mixer_fill_color_xrgb; ret = TRUE; break; case GST_VIDEO_FORMAT_xBGR: mixer->blend = gst_video_mixer_blend_xbgr; + mixer->overlay = mixer->blend; mixer->fill_checker = gst_video_mixer_fill_checker_xbgr; mixer->fill_color = gst_video_mixer_fill_color_xbgr; ret = TRUE; break; case GST_VIDEO_FORMAT_RGBx: mixer->blend = gst_video_mixer_blend_rgbx; + mixer->overlay = mixer->blend; mixer->fill_checker = gst_video_mixer_fill_checker_rgbx; mixer->fill_color = gst_video_mixer_fill_color_rgbx; ret = TRUE; break; case GST_VIDEO_FORMAT_BGRx: mixer->blend = gst_video_mixer_blend_bgrx; + mixer->overlay = mixer->blend; mixer->fill_checker = gst_video_mixer_fill_checker_bgrx; mixer->fill_color = gst_video_mixer_fill_color_bgrx; ret = TRUE; @@ -1387,6 +1415,13 @@ static void gst_videomixer_blend_buffers (GstVideoMixer * mix, GstBuffer * outbuf) { GSList *walk; + BlendFunction blend; + if (mix->background == VIDEO_MIXER_BACKGROUND_TRANSPARENT) { + blend = mix->overlay; + } else { + blend = mix->blend; + } + walk = mix->sinkpads; while (walk) { /* We walk with this list because it's ordered */ @@ -1411,7 +1446,7 @@ gst_videomixer_blend_buffers (GstVideoMixer * mix, GstBuffer * outbuf) if (GST_CLOCK_TIME_IS_VALID (stream_time)) gst_object_sync_values (G_OBJECT (pad), stream_time); - mix->blend (GST_BUFFER_DATA (mixcol->buffer), + blend (GST_BUFFER_DATA (mixcol->buffer), pad->xpos, pad->ypos, pad->in_width, pad->in_height, pad->alpha, GST_BUFFER_DATA (outbuf), mix->out_width, mix->out_height); } @@ -1580,6 +1615,11 @@ gst_videomixer_collected (GstCollectPads * pads, GstVideoMixer * mix) mix->fill_color (GST_BUFFER_DATA (outbuf), mix->out_width, mix->out_height, 240, 128, 128); break; + case VIDEO_MIXER_BACKGROUND_TRANSPARENT: + orc_memset (GST_BUFFER_DATA (outbuf), 0, + gst_video_format_get_row_stride (mix->fmt, 0, + mix->out_width) * mix->out_height); + break; } gst_videomixer_blend_buffers (mix, outbuf); diff --git a/gst/videomixer/videomixer.h b/gst/videomixer/videomixer.h index 830f288533..fe55a5e20e 100644 --- a/gst/videomixer/videomixer.h +++ b/gst/videomixer/videomixer.h @@ -45,6 +45,7 @@ typedef struct _GstVideoMixerClass GstVideoMixerClass; * @VIDEO_MIXER_BACKGROUND_CHECKER: checker pattern background * @VIDEO_MIXER_BACKGROUND_BLACK: solid color black background * @VIDEO_MIXER_BACKGROUND_WHITE: solid color white background + * @VIDEO_MIXER_BACKGROUND_TRANSPARENT: background is left transparent and layers are composited using "A OVER B" composition rules. This is only applicable to AYUV and ARGB (and variants) as it preserves the alpha channel and allows for further mixing. * * The different backgrounds videomixer can blend over. */ @@ -52,7 +53,8 @@ typedef enum { VIDEO_MIXER_BACKGROUND_CHECKER, VIDEO_MIXER_BACKGROUND_BLACK, - VIDEO_MIXER_BACKGROUND_WHITE + VIDEO_MIXER_BACKGROUND_WHITE, + VIDEO_MIXER_BACKGROUND_TRANSPARENT, } GstVideoMixerBackground; @@ -112,7 +114,7 @@ struct _GstVideoMixer gdouble proportion; GstClockTime earliest_time; - BlendFunction blend; + BlendFunction blend, overlay; FillCheckerFunction fill_checker; FillColorFunction fill_color; diff --git a/gst/videomixer/videomixer2.c b/gst/videomixer/videomixer2.c index 16e5a35f3e..e7f6ba8574 100644 --- a/gst/videomixer/videomixer2.c +++ b/gst/videomixer/videomixer2.c @@ -93,6 +93,12 @@ #include +#ifdef DISABLE_ORC +#define orc_memset memset +#else +#include +#endif + GST_DEBUG_CATEGORY_STATIC (gst_videomixer2_debug); #define GST_CAT_DEFAULT gst_videomixer2_debug @@ -546,6 +552,8 @@ gst_videomixer2_background_get_type (void) {VIDEO_MIXER2_BACKGROUND_CHECKER, "Checker pattern", "checker"}, {VIDEO_MIXER2_BACKGROUND_BLACK, "Black", "black"}, {VIDEO_MIXER2_BACKGROUND_WHITE, "White", "white"}, + {VIDEO_MIXER2_BACKGROUND_TRANSPARENT, + "Transparent Background to enable further mixing", "transparent"}, {0, NULL, NULL}, }; @@ -810,6 +818,7 @@ gst_videomixer2_blend_buffers (GstVideoMixer2 * mix, GSList *l; GstFlowReturn ret; guint outsize; + BlendFunction composite; outsize = gst_video_format_get_size (mix->format, mix->width, mix->height); ret = gst_pad_alloc_buffer_and_set_caps (mix->srcpad, GST_BUFFER_OFFSET_NONE, @@ -820,6 +829,8 @@ gst_videomixer2_blend_buffers (GstVideoMixer2 * mix, GST_BUFFER_TIMESTAMP (*outbuf) = output_start_time; GST_BUFFER_DURATION (*outbuf) = output_end_time - output_start_time; + /* default to blending */ + composite = mix->blend; switch (mix->background) { case VIDEO_MIXER2_BACKGROUND_CHECKER: mix->fill_checker (GST_BUFFER_DATA (*outbuf), mix->width, mix->height); @@ -832,6 +843,13 @@ gst_videomixer2_blend_buffers (GstVideoMixer2 * mix, mix->fill_color (GST_BUFFER_DATA (*outbuf), mix->width, mix->height, 240, 128, 128); break; + case VIDEO_MIXER2_BACKGROUND_TRANSPARENT: + orc_memset (GST_BUFFER_DATA (*outbuf), 0, + gst_video_format_get_row_stride (mix->format, 0, + mix->width) * mix->height); + /* use overlay to keep background transparent */ + composite = mix->overlay; + break; } for (l = mix->sinkpads; l; l = l->next) { @@ -854,7 +872,7 @@ gst_videomixer2_blend_buffers (GstVideoMixer2 * mix, if (GST_CLOCK_TIME_IS_VALID (stream_time)) gst_object_sync_values (G_OBJECT (pad), stream_time); - mix->blend (GST_BUFFER_DATA (mixcol->buffer), + composite (GST_BUFFER_DATA (mixcol->buffer), pad->xpos, pad->ypos, pad->width, pad->height, pad->alpha, GST_BUFFER_DATA (*outbuf), mix->width, mix->height); } @@ -1387,6 +1405,7 @@ gst_videomixer2_src_setcaps (GstPad * pad, GstCaps * caps) GST_INFO_OBJECT (pad, "set src caps: %" GST_PTR_FORMAT, caps); mix->blend = NULL; + mix->overlay = NULL; mix->fill_checker = NULL; mix->fill_color = NULL; @@ -1416,114 +1435,133 @@ gst_videomixer2_src_setcaps (GstPad * pad, GstCaps * caps) switch (mix->format) { case GST_VIDEO_FORMAT_AYUV: mix->blend = gst_video_mixer_blend_ayuv; + mix->overlay = gst_video_mixer_overlay_ayuv; mix->fill_checker = gst_video_mixer_fill_checker_ayuv; mix->fill_color = gst_video_mixer_fill_color_ayuv; ret = TRUE; break; case GST_VIDEO_FORMAT_ARGB: mix->blend = gst_video_mixer_blend_argb; + mix->overlay = gst_video_mixer_overlay_argb; mix->fill_checker = gst_video_mixer_fill_checker_argb; mix->fill_color = gst_video_mixer_fill_color_argb; ret = TRUE; break; case GST_VIDEO_FORMAT_BGRA: mix->blend = gst_video_mixer_blend_bgra; + mix->overlay = gst_video_mixer_overlay_bgra; mix->fill_checker = gst_video_mixer_fill_checker_bgra; mix->fill_color = gst_video_mixer_fill_color_bgra; ret = TRUE; break; case GST_VIDEO_FORMAT_ABGR: mix->blend = gst_video_mixer_blend_abgr; + mix->overlay = gst_video_mixer_overlay_abgr; mix->fill_checker = gst_video_mixer_fill_checker_abgr; mix->fill_color = gst_video_mixer_fill_color_abgr; ret = TRUE; break; case GST_VIDEO_FORMAT_RGBA: mix->blend = gst_video_mixer_blend_rgba; + mix->overlay = gst_video_mixer_overlay_rgba; mix->fill_checker = gst_video_mixer_fill_checker_rgba; mix->fill_color = gst_video_mixer_fill_color_rgba; ret = TRUE; break; case GST_VIDEO_FORMAT_Y444: mix->blend = gst_video_mixer_blend_y444; + mix->overlay = mix->blend; mix->fill_checker = gst_video_mixer_fill_checker_y444; mix->fill_color = gst_video_mixer_fill_color_y444; ret = TRUE; break; case GST_VIDEO_FORMAT_Y42B: mix->blend = gst_video_mixer_blend_y42b; + mix->overlay = mix->blend; mix->fill_checker = gst_video_mixer_fill_checker_y42b; mix->fill_color = gst_video_mixer_fill_color_y42b; ret = TRUE; break; case GST_VIDEO_FORMAT_YUY2: mix->blend = gst_video_mixer_blend_yuy2; + mix->overlay = mix->blend; mix->fill_checker = gst_video_mixer_fill_checker_yuy2; mix->fill_color = gst_video_mixer_fill_color_yuy2; ret = TRUE; break; case GST_VIDEO_FORMAT_UYVY: mix->blend = gst_video_mixer_blend_uyvy; + mix->overlay = mix->blend; mix->fill_checker = gst_video_mixer_fill_checker_uyvy; mix->fill_color = gst_video_mixer_fill_color_uyvy; ret = TRUE; break; case GST_VIDEO_FORMAT_YVYU: mix->blend = gst_video_mixer_blend_yvyu; + mix->overlay = mix->blend; mix->fill_checker = gst_video_mixer_fill_checker_yvyu; mix->fill_color = gst_video_mixer_fill_color_yvyu; ret = TRUE; break; case GST_VIDEO_FORMAT_I420: mix->blend = gst_video_mixer_blend_i420; + mix->overlay = mix->blend; mix->fill_checker = gst_video_mixer_fill_checker_i420; mix->fill_color = gst_video_mixer_fill_color_i420; ret = TRUE; break; case GST_VIDEO_FORMAT_YV12: mix->blend = gst_video_mixer_blend_yv12; + mix->overlay = mix->blend; mix->fill_checker = gst_video_mixer_fill_checker_yv12; mix->fill_color = gst_video_mixer_fill_color_yv12; ret = TRUE; break; case GST_VIDEO_FORMAT_Y41B: mix->blend = gst_video_mixer_blend_y41b; + mix->overlay = mix->blend; mix->fill_checker = gst_video_mixer_fill_checker_y41b; mix->fill_color = gst_video_mixer_fill_color_y41b; ret = TRUE; break; case GST_VIDEO_FORMAT_RGB: mix->blend = gst_video_mixer_blend_rgb; + mix->overlay = mix->blend; mix->fill_checker = gst_video_mixer_fill_checker_rgb; mix->fill_color = gst_video_mixer_fill_color_rgb; ret = TRUE; break; case GST_VIDEO_FORMAT_BGR: mix->blend = gst_video_mixer_blend_bgr; + mix->overlay = mix->blend; mix->fill_checker = gst_video_mixer_fill_checker_bgr; mix->fill_color = gst_video_mixer_fill_color_bgr; ret = TRUE; break; case GST_VIDEO_FORMAT_xRGB: mix->blend = gst_video_mixer_blend_xrgb; + mix->overlay = mix->blend; mix->fill_checker = gst_video_mixer_fill_checker_xrgb; mix->fill_color = gst_video_mixer_fill_color_xrgb; ret = TRUE; break; case GST_VIDEO_FORMAT_xBGR: mix->blend = gst_video_mixer_blend_xbgr; + mix->overlay = mix->blend; mix->fill_checker = gst_video_mixer_fill_checker_xbgr; mix->fill_color = gst_video_mixer_fill_color_xbgr; ret = TRUE; break; case GST_VIDEO_FORMAT_RGBx: mix->blend = gst_video_mixer_blend_rgbx; + mix->overlay = mix->blend; mix->fill_checker = gst_video_mixer_fill_checker_rgbx; mix->fill_color = gst_video_mixer_fill_color_rgbx; ret = TRUE; break; case GST_VIDEO_FORMAT_BGRx: mix->blend = gst_video_mixer_blend_bgrx; + mix->overlay = mix->blend; mix->fill_checker = gst_video_mixer_fill_checker_bgrx; mix->fill_color = gst_video_mixer_fill_color_bgrx; ret = TRUE; @@ -1599,7 +1637,7 @@ gst_videomixer2_sink_event (GstCollectPads2 * pads, GstCollectData2 * cdata, GST_DEBUG_OBJECT (pad, "Got %s event on pad %s:%s", GST_EVENT_TYPE_NAME (event), GST_DEBUG_PAD_NAME (pad)); - // return FALSE => event will be forwarded + /* return FALSE => event will be forwarded */ switch (GST_EVENT_TYPE (event)) { case GST_EVENT_NEWSEGMENT:{ GstFormat fmt; diff --git a/gst/videomixer/videomixer2.h b/gst/videomixer/videomixer2.h index 14a82fe84a..2f2339d658 100644 --- a/gst/videomixer/videomixer2.h +++ b/gst/videomixer/videomixer2.h @@ -47,6 +47,7 @@ typedef struct _GstVideoMixer2Class GstVideoMixer2Class; * @VIDEO_MIXER2_BACKGROUND_CHECKER: checker pattern background * @VIDEO_MIXER2_BACKGROUND_BLACK: solid color black background * @VIDEO_MIXER2_BACKGROUND_WHITE: solid color white background + * @VIDEO_MIXER2_BACKGROUND_TRANSPARENT: background is left transparent and layers are composited using "A OVER B" composition rules. This is only applicable to AYUV and ARGB (and variants) as it preserves the alpha channel and allows for further mixing. * * The different backgrounds videomixer can blend over. */ @@ -54,7 +55,8 @@ typedef enum { VIDEO_MIXER2_BACKGROUND_CHECKER, VIDEO_MIXER2_BACKGROUND_BLACK, - VIDEO_MIXER2_BACKGROUND_WHITE + VIDEO_MIXER2_BACKGROUND_WHITE, + VIDEO_MIXER2_BACKGROUND_TRANSPARENT, } GstVideoMixer2Background; @@ -106,7 +108,7 @@ struct _GstVideoMixer2 GstClockTime earliest_time; guint64 qos_processed, qos_dropped; - BlendFunction blend; + BlendFunction blend, overlay; FillCheckerFunction fill_checker; FillColorFunction fill_color; }; diff --git a/sys/v4l2/gstv4l2radio.h b/sys/v4l2/gstv4l2radio.h index f1c99a277f..68b7ec3912 100644 --- a/sys/v4l2/gstv4l2radio.h +++ b/sys/v4l2/gstv4l2radio.h @@ -42,7 +42,6 @@ typedef struct _GstV4l2RadioClass GstV4l2RadioClass; /** * GstV4l2Radio: - * @v4l2object: private #GstV4l2Object * * Opaque video4linux2 radio tuner element */ diff --git a/sys/v4l2/gstv4l2src.h b/sys/v4l2/gstv4l2src.h index da8341f4e2..0dd794a711 100644 --- a/sys/v4l2/gstv4l2src.h +++ b/sys/v4l2/gstv4l2src.h @@ -49,7 +49,6 @@ typedef GstFlowReturn (*GstV4l2SrcGetFunc)(GstV4l2Src * v4l2src, GstBuffer ** bu /** * GstV4l2Src: - * @pushsrc: parent #GstPushSrc. * * Opaque object. */ @@ -83,7 +82,7 @@ struct _GstV4l2Src gint fps_d, fps_n; /* framerate if device is open */ GstClockTime duration; /* duration of one frame */ - + GstClockTime ctrl_time; GstV4l2SrcGetFunc get_frame; @@ -92,7 +91,7 @@ struct _GstV4l2Src struct _GstV4l2SrcClass { GstPushSrcClass parent_class; - + GList *v4l2_class_devices; }; diff --git a/sys/v4l2/gstv4l2xoverlay.c b/sys/v4l2/gstv4l2xoverlay.c index c341e19a18..80f114d202 100644 --- a/sys/v4l2/gstv4l2xoverlay.c +++ b/sys/v4l2/gstv4l2xoverlay.c @@ -418,9 +418,11 @@ gst_v4l2_xoverlay_set_window_handle (GstV4l2Object * v4l2object, guintptr id) /** * gst_v4l2_xoverlay_prepare_xwindow_id: - * @param v4l2object - * @param required TRUE if display is required (ie. TRUE for v4l2sink, but + * @v4l2object: the v4l2object + * @required: %TRUE if display is required (ie. TRUE for v4l2sink, but * FALSE for any other element with optional overlay capabilities) + * + * Helper function to create a windo if none is set from the application. */ void gst_v4l2_xoverlay_prepare_xwindow_id (GstV4l2Object * v4l2object, diff --git a/tests/check/elements/rtpbin.c b/tests/check/elements/rtpbin.c index 2666fe457b..63c6ae766a 100644 --- a/tests/check/elements/rtpbin.c +++ b/tests/check/elements/rtpbin.c @@ -37,12 +37,6 @@ GST_START_TEST (test_cleanup_send) fail_unless (rtp_sink != NULL); ASSERT_OBJECT_REFCOUNT (rtp_sink, "rtp_sink", 2); - /* request again */ - rtp_sink = gst_element_get_request_pad (rtpbin, "send_rtp_sink_0"); - fail_unless (rtp_sink != NULL); - ASSERT_OBJECT_REFCOUNT (rtp_sink, "rtp_sink", 3); - gst_object_unref (rtp_sink); - /* this static pad should be created automatically now */ rtp_src = gst_element_get_static_pad (rtpbin, "send_rtp_src_0"); fail_unless (rtp_src != NULL); @@ -58,12 +52,6 @@ GST_START_TEST (test_cleanup_send) fail_unless (rtcp_src != NULL); ASSERT_OBJECT_REFCOUNT (rtcp_src, "rtcp_src", 2); - /* second time */ - rtcp_src = gst_element_get_request_pad (rtpbin, "send_rtcp_src_0"); - fail_unless (rtcp_src != NULL); - ASSERT_OBJECT_REFCOUNT (rtcp_src, "rtcp_src", 3); - gst_object_unref (rtcp_src); - gst_element_release_request_pad (rtpbin, rtp_sink); /* we should only have our refs to the pads now */ ASSERT_OBJECT_REFCOUNT (rtp_sink, "rtp_sink", 1);