diff --git a/subprojects/gst-plugins-base/gst/compositor/blend.c b/subprojects/gst-plugins-base/gst/compositor/blend.c index f350165419..4e8e7e88c2 100644 --- a/subprojects/gst-plugins-base/gst/compositor/blend.c +++ b/subprojects/gst-plugins-base/gst/compositor/blend.c @@ -244,14 +244,15 @@ A32_COLOR (ayuv, 24, 16, 8, 0); A32_COLOR (vuya, 0, 8, 16, 24); /* Y444, Y42B, I420, YV12, Y41B */ -#define PLANAR_YUV_BLEND(format_name,format_enum,x_round,y_round,MEMCPY,BLENDLOOP) \ +#define PLANAR_YUV_BLEND(format_name,x_round,y_round,MEMCPY,BLENDLOOP,n_bits) \ inline static void \ _blend_##format_name (const guint8 * src, guint8 * dest, \ - gint src_stride, gint dest_stride, gint src_width, gint src_height, \ + gint src_stride, gint dest_stride, gint pstride, gint src_width, gint src_height, \ gdouble src_alpha, GstCompositorBlendMode mode) \ { \ gint i; \ gint b_alpha; \ + gint range; \ \ /* in source mode we just have to copy over things */ \ if (mode == COMPOSITOR_BLEND_MODE_SOURCE) { \ @@ -266,16 +267,18 @@ _blend_##format_name (const guint8 * src, guint8 * dest, \ \ /* If it's completely opaque, we do a fast copy */ \ if (G_UNLIKELY (src_alpha == 1.0)) { \ + gint width_in_bytes = src_width * pstride; \ GST_LOG ("Fast copy (alpha == 1.0)"); \ for (i = 0; i < src_height; i++) { \ - MEMCPY (dest, src, src_width); \ + MEMCPY (dest, src, width_in_bytes); \ src += src_stride; \ dest += dest_stride; \ } \ return; \ } \ \ - b_alpha = CLAMP ((gint) (src_alpha * 255), 0, 255); \ + range = (1 << n_bits) - 1; \ + b_alpha = CLAMP ((gint) (src_alpha * range), 0, range); \ \ BLENDLOOP(dest, dest_stride, src, src_stride, b_alpha, src_width, src_height);\ } \ @@ -299,6 +302,7 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ gint dest_width, dest_height; \ const GstVideoFormatInfo *info; \ gint src_width, src_height; \ + gint pstride; \ \ src_width = GST_VIDEO_FRAME_WIDTH (srcframe); \ src_height = GST_VIDEO_FRAME_HEIGHT (srcframe); \ @@ -350,14 +354,15 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ dest_comp_rowstride = GST_VIDEO_FRAME_COMP_STRIDE (destframe, 0); \ src_comp_width = GST_VIDEO_FORMAT_INFO_SCALE_WIDTH(info, 0, b_src_width); \ src_comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 0, b_src_height); \ + pstride = GST_VIDEO_FORMAT_INFO_PSTRIDE (info, 0); \ comp_xpos = (xpos == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_WIDTH (info, 0, xpos); \ comp_ypos = (ypos == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 0, ypos); \ comp_xoffset = (xoffset == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_WIDTH (info, 0, xoffset); \ comp_yoffset = (yoffset == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 0, yoffset); \ - _blend_##format_name (b_src + comp_xoffset + comp_yoffset * src_comp_rowstride, \ - b_dest + comp_xpos + comp_ypos * dest_comp_rowstride, \ + _blend_##format_name (b_src + comp_xoffset * pstride + comp_yoffset * src_comp_rowstride, \ + b_dest + comp_xpos * pstride + comp_ypos * dest_comp_rowstride, \ src_comp_rowstride, \ - dest_comp_rowstride, src_comp_width, src_comp_height, \ + dest_comp_rowstride, pstride, src_comp_width, src_comp_height, \ src_alpha, mode); \ \ b_src = GST_VIDEO_FRAME_COMP_DATA (srcframe, 1); \ @@ -366,14 +371,15 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ dest_comp_rowstride = GST_VIDEO_FRAME_COMP_STRIDE (destframe, 1); \ src_comp_width = GST_VIDEO_FORMAT_INFO_SCALE_WIDTH(info, 1, b_src_width); \ src_comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 1, b_src_height); \ + pstride = GST_VIDEO_FORMAT_INFO_PSTRIDE (info, 1); \ comp_xpos = (xpos == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_WIDTH (info, 1, xpos); \ comp_ypos = (ypos == 0) ? 0 : ypos >> info->h_sub[1]; \ comp_xoffset = (xoffset == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_WIDTH (info, 1, xoffset); \ comp_yoffset = (yoffset == 0) ? 0 : yoffset >> info->h_sub[1]; \ - _blend_##format_name (b_src + comp_xoffset + comp_yoffset * src_comp_rowstride, \ - b_dest + comp_xpos + comp_ypos * dest_comp_rowstride, \ + _blend_##format_name (b_src + comp_xoffset * pstride + comp_yoffset * src_comp_rowstride, \ + b_dest + comp_xpos * pstride + comp_ypos * dest_comp_rowstride, \ src_comp_rowstride, \ - dest_comp_rowstride, src_comp_width, src_comp_height, \ + dest_comp_rowstride, pstride, src_comp_width, src_comp_height, \ src_alpha, mode); \ \ b_src = GST_VIDEO_FRAME_COMP_DATA (srcframe, 2); \ @@ -382,14 +388,15 @@ blend_##format_name (GstVideoFrame * srcframe, gint xpos, gint ypos, \ dest_comp_rowstride = GST_VIDEO_FRAME_COMP_STRIDE (destframe, 2); \ src_comp_width = GST_VIDEO_FORMAT_INFO_SCALE_WIDTH(info, 2, b_src_width); \ src_comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 2, b_src_height); \ + pstride = GST_VIDEO_FORMAT_INFO_PSTRIDE (info, 2); \ comp_xpos = (xpos == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_WIDTH (info, 2, xpos); \ comp_ypos = (ypos == 0) ? 0 : ypos >> info->h_sub[2]; \ comp_xoffset = (xoffset == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_WIDTH (info, 2, xoffset); \ comp_yoffset = (yoffset == 0) ? 0 : yoffset >> info->h_sub[2]; \ - _blend_##format_name (b_src + comp_xoffset + comp_yoffset * src_comp_rowstride, \ - b_dest + comp_xpos + comp_ypos * dest_comp_rowstride, \ + _blend_##format_name (b_src + comp_xoffset * pstride + comp_yoffset * src_comp_rowstride, \ + b_dest + comp_xpos * pstride + comp_ypos * dest_comp_rowstride, \ src_comp_rowstride, \ - dest_comp_rowstride, src_comp_width, src_comp_height, \ + dest_comp_rowstride, pstride, src_comp_width, src_comp_height, \ src_alpha, mode); \ } @@ -493,26 +500,167 @@ fill_color_##format_name (GstVideoFrame * frame, \ } \ } +#define PLANAR_YUV_HIGH_FILL_CHECKER(format_name, nbits, endian, MEMSET) \ +static void \ +fill_checker_##format_name (GstVideoFrame * frame, guint y_start, guint y_end) \ +{ \ + gint i, j; \ + static const int tab[] = { 80 << (nbits - 8), 160 << (nbits - 8), 80 << (nbits - 8), 160 << (nbits - 8),}; \ + guint8 *p; \ + gint comp_width, comp_height; \ + gint rowstride, comp_yoffset; \ + gint pstride; \ + gint uv; \ + const GstVideoFormatInfo *info; \ + \ + info = frame->info.finfo; \ + p = GST_VIDEO_FRAME_COMP_DATA (frame, 0); \ + comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); \ + comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 0, y_end - y_start); \ + rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \ + pstride = GST_VIDEO_FRAME_COMP_PSTRIDE (frame, 0); \ + comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 0, y_start); \ + p += comp_yoffset * rowstride; \ + \ + for (i = 0; i < comp_height; i++) { \ + for (j = 0; j < comp_width; j++) { \ + GST_WRITE_UINT16_##endian (p, tab[(((i + y_start) & 0x8) >> 3) + ((j & 0x8) >> 3)]); \ + p += pstride; \ + } \ + p += rowstride - comp_width * pstride; \ + } \ + \ + uv = GUINT16_TO_##endian (1 << (nbits - 1)); \ + p = GST_VIDEO_FRAME_COMP_DATA (frame, 1); \ + comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 1); \ + comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 1, y_end - y_start); \ + rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 1); \ + pstride = GST_VIDEO_FRAME_COMP_PSTRIDE (frame, 1); \ + comp_yoffset = (y_start == 0) ? 0 : y_start >> info->h_sub[1]; \ + p += comp_yoffset * rowstride; \ + MEMSET (p, rowstride, uv, comp_width, comp_height); \ + \ + p = GST_VIDEO_FRAME_COMP_DATA (frame, 2); \ + comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 2); \ + comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 2, y_end - y_start); \ + rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 2); \ + pstride = GST_VIDEO_FRAME_COMP_PSTRIDE (frame, 1); \ + comp_yoffset = (y_start == 0) ? 0 : y_start >> info->h_sub[2]; \ + p += comp_yoffset * rowstride; \ + MEMSET (p, rowstride, uv, comp_width, comp_height); \ +} + +#define PLANAR_YUV_HIGH_FILL_COLOR(format_name,endian,MEMSET) \ +static void \ +fill_color_##format_name (GstVideoFrame * frame, \ + guint y_start, guint y_end, gint colY, gint colU, gint colV) \ +{ \ + guint8 *p; \ + gint comp_width, comp_height; \ + gint rowstride, comp_yoffset; \ + const GstVideoFormatInfo *info; \ + \ + info = frame->info.finfo; \ + p = GST_VIDEO_FRAME_COMP_DATA (frame, 0); \ + comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 0); \ + comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 0, y_end - y_start); \ + rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 0); \ + comp_yoffset = (y_start == 0) ? 0 : GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT (info, 0, y_start); \ + p += comp_yoffset * rowstride; \ + MEMSET (p, rowstride, GUINT16_TO_##endian (colY), comp_width, comp_height); \ + \ + p = GST_VIDEO_FRAME_COMP_DATA (frame, 1); \ + comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 1); \ + comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 1, y_end - y_start); \ + rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 1); \ + comp_yoffset = (y_start == 0) ? 0 : y_start >> info->h_sub[1]; \ + p += comp_yoffset * rowstride; \ + MEMSET (p, rowstride, GUINT16_TO_##endian (colU), comp_width, comp_height); \ + \ + p = GST_VIDEO_FRAME_COMP_DATA (frame, 2); \ + comp_width = GST_VIDEO_FRAME_COMP_WIDTH (frame, 2); \ + comp_height = GST_VIDEO_FORMAT_INFO_SCALE_HEIGHT(info, 2, y_end - y_start); \ + rowstride = GST_VIDEO_FRAME_COMP_STRIDE (frame, 2); \ + comp_yoffset = (y_start == 0) ? 0 : y_start >> info->h_sub[2]; \ + p += comp_yoffset * rowstride; \ + MEMSET (p, rowstride, GUINT16_TO_##endian (colV), comp_width, comp_height); \ +} + #define GST_ROUND_UP_1(x) (x) -PLANAR_YUV_BLEND (i420, GST_VIDEO_FORMAT_I420, GST_ROUND_UP_2, - GST_ROUND_UP_2, memcpy, compositor_orc_blend_u8); +PLANAR_YUV_BLEND (i420, GST_ROUND_UP_2, + GST_ROUND_UP_2, memcpy, compositor_orc_blend_u8, 8); PLANAR_YUV_FILL_CHECKER (i420, GST_VIDEO_FORMAT_I420, memset); PLANAR_YUV_FILL_COLOR (i420, GST_VIDEO_FORMAT_I420, memset); PLANAR_YUV_FILL_COLOR (yv12, GST_VIDEO_FORMAT_YV12, memset); -PLANAR_YUV_BLEND (y444, GST_VIDEO_FORMAT_Y444, GST_ROUND_UP_1, - GST_ROUND_UP_1, memcpy, compositor_orc_blend_u8); +PLANAR_YUV_BLEND (y444, GST_ROUND_UP_1, + GST_ROUND_UP_1, memcpy, compositor_orc_blend_u8, 8); PLANAR_YUV_FILL_CHECKER (y444, GST_VIDEO_FORMAT_Y444, memset); PLANAR_YUV_FILL_COLOR (y444, GST_VIDEO_FORMAT_Y444, memset); -PLANAR_YUV_BLEND (y42b, GST_VIDEO_FORMAT_Y42B, GST_ROUND_UP_2, - GST_ROUND_UP_1, memcpy, compositor_orc_blend_u8); +PLANAR_YUV_BLEND (y42b, GST_ROUND_UP_2, + GST_ROUND_UP_1, memcpy, compositor_orc_blend_u8, 8); PLANAR_YUV_FILL_CHECKER (y42b, GST_VIDEO_FORMAT_Y42B, memset); PLANAR_YUV_FILL_COLOR (y42b, GST_VIDEO_FORMAT_Y42B, memset); -PLANAR_YUV_BLEND (y41b, GST_VIDEO_FORMAT_Y41B, GST_ROUND_UP_4, - GST_ROUND_UP_1, memcpy, compositor_orc_blend_u8); +PLANAR_YUV_BLEND (y41b, GST_ROUND_UP_4, + GST_ROUND_UP_1, memcpy, compositor_orc_blend_u8, 8); PLANAR_YUV_FILL_CHECKER (y41b, GST_VIDEO_FORMAT_Y41B, memset); PLANAR_YUV_FILL_COLOR (y41b, GST_VIDEO_FORMAT_Y41B, memset); +#define BLEND_HIGH(format_name) \ +compositor_orc_blend_##format_name + +#if G_BYTE_ORDER == G_LITTLE_ENDIAN +PLANAR_YUV_BLEND (i420_10le, GST_ROUND_UP_2, GST_ROUND_UP_2, memcpy, + BLEND_HIGH (u10), 10); +PLANAR_YUV_BLEND (i420_10be, GST_ROUND_UP_2, GST_ROUND_UP_2, memcpy, + BLEND_HIGH (u10_swap), 10); + +PLANAR_YUV_BLEND (i420_12le, GST_ROUND_UP_2, GST_ROUND_UP_2, memcpy, + BLEND_HIGH (u12), 12); +PLANAR_YUV_BLEND (i420_12be, GST_ROUND_UP_2, GST_ROUND_UP_2, memcpy, + BLEND_HIGH (u12_swap), 12); + +PLANAR_YUV_BLEND (i422_10le, GST_ROUND_UP_2, GST_ROUND_UP_1, memcpy, + BLEND_HIGH (u10), 10); +PLANAR_YUV_BLEND (i422_10be, GST_ROUND_UP_2, GST_ROUND_UP_1, memcpy, + BLEND_HIGH (u10_swap), 10); + +PLANAR_YUV_BLEND (i422_12le, GST_ROUND_UP_2, GST_ROUND_UP_1, memcpy, + BLEND_HIGH (u12), 12); +PLANAR_YUV_BLEND (i422_12be, GST_ROUND_UP_2, GST_ROUND_UP_1, memcpy, + BLEND_HIGH (u12_swap), 12); +#else /* G_BYTE_ORDER == G_LITTLE_ENDIAN */ +PLANAR_YUV_BLEND (i420_10le, GST_ROUND_UP_2, GST_ROUND_UP_2, memcpy, + BLEND_HIGH (u10_swap), 10); +PLANAR_YUV_BLEND (i420_10be, GST_ROUND_UP_2, GST_ROUND_UP_2, memcpy, + BLEND_HIGH (u10), 10); + +PLANAR_YUV_BLEND (i420_12le, GST_ROUND_UP_2, GST_ROUND_UP_2, memcpy, + BLEND_HIGH (u12_swap), 12); +PLANAR_YUV_BLEND (i420_12be, GST_ROUND_UP_2, GST_ROUND_UP_2, memcpy, + BLEND_HIGH (u12), 12); + +PLANAR_YUV_BLEND (i422_10le, GST_ROUND_UP_2, GST_ROUND_UP_1, memcpy, + BLEND_HIGH (u10_swap), 10); +PLANAR_YUV_BLEND (i422_10be, GST_ROUND_UP_2, GST_ROUND_UP_1, memcpy, + BLEND_HIGH (u10), 10); + +PLANAR_YUV_BLEND (i422_12le, GST_ROUND_UP_2, GST_ROUND_UP_1, memcpy, + BLEND_HIGH (u12_swap), 12); +PLANAR_YUV_BLEND (i422_12be, GST_ROUND_UP_2, GST_ROUND_UP_1, memcpy, + BLEND_HIGH (u12), 12); +#endif /* G_BYTE_ORDER == G_LITTLE_ENDIAN */ + +PLANAR_YUV_HIGH_FILL_CHECKER (i420_10le, 10, LE, compositor_orc_memset_u16_2d); +PLANAR_YUV_HIGH_FILL_COLOR (i420_10le, LE, compositor_orc_memset_u16_2d); +PLANAR_YUV_HIGH_FILL_CHECKER (i420_10be, 10, BE, compositor_orc_memset_u16_2d); +PLANAR_YUV_HIGH_FILL_COLOR (i420_10be, BE, compositor_orc_memset_u16_2d); + +PLANAR_YUV_HIGH_FILL_CHECKER (i420_12le, 12, LE, compositor_orc_memset_u16_2d); +PLANAR_YUV_HIGH_FILL_COLOR (i420_12le, LE, compositor_orc_memset_u16_2d); +PLANAR_YUV_HIGH_FILL_CHECKER (i420_12be, 12, BE, compositor_orc_memset_u16_2d); +PLANAR_YUV_HIGH_FILL_COLOR (i420_12be, BE, compositor_orc_memset_u16_2d); + /* NV12, NV21 */ #define NV_YUV_BLEND(format_name,MEMCPY,BLENDLOOP) \ inline static void \ @@ -1073,6 +1221,14 @@ BlendFunction gst_compositor_blend_rgbx; /* BGRx, xRGB, xBGR are equal to RGBx */ BlendFunction gst_compositor_blend_yuy2; /* YVYU and UYVY are equal to YUY2 */ +BlendFunction gst_compositor_blend_i420_10le; +BlendFunction gst_compositor_blend_i420_10be; +BlendFunction gst_compositor_blend_i420_12le; +BlendFunction gst_compositor_blend_i420_12be; +BlendFunction gst_compositor_blend_i422_10le; +BlendFunction gst_compositor_blend_i422_10be; +BlendFunction gst_compositor_blend_i422_12le; +BlendFunction gst_compositor_blend_i422_12be; FillCheckerFunction gst_compositor_fill_checker_argb; FillCheckerFunction gst_compositor_fill_checker_bgra; @@ -1094,6 +1250,10 @@ FillCheckerFunction gst_compositor_fill_checker_rgbx; FillCheckerFunction gst_compositor_fill_checker_yuy2; /* YVYU is equal to YUY2 */ FillCheckerFunction gst_compositor_fill_checker_uyvy; +FillCheckerFunction gst_compositor_fill_checker_i420_10le; +FillCheckerFunction gst_compositor_fill_checker_i420_10be; +FillCheckerFunction gst_compositor_fill_checker_i420_12le; +FillCheckerFunction gst_compositor_fill_checker_i420_12be; FillColorFunction gst_compositor_fill_color_argb; FillColorFunction gst_compositor_fill_color_bgra; @@ -1117,6 +1277,10 @@ FillColorFunction gst_compositor_fill_color_bgrx; FillColorFunction gst_compositor_fill_color_yuy2; FillColorFunction gst_compositor_fill_color_yvyu; FillColorFunction gst_compositor_fill_color_uyvy; +FillColorFunction gst_compositor_fill_color_i420_10le; +FillColorFunction gst_compositor_fill_color_i420_10be; +FillColorFunction gst_compositor_fill_color_i420_12le; +FillColorFunction gst_compositor_fill_color_i420_12be; void gst_compositor_init_blend (void) @@ -1137,6 +1301,14 @@ gst_compositor_init_blend (void) gst_compositor_blend_rgb = GST_DEBUG_FUNCPTR (blend_rgb); gst_compositor_blend_xrgb = GST_DEBUG_FUNCPTR (blend_xrgb); gst_compositor_blend_yuy2 = GST_DEBUG_FUNCPTR (blend_yuy2); + gst_compositor_blend_i420_10le = GST_DEBUG_FUNCPTR (blend_i420_10le); + gst_compositor_blend_i420_10be = GST_DEBUG_FUNCPTR (blend_i420_10be); + gst_compositor_blend_i420_12le = GST_DEBUG_FUNCPTR (blend_i420_12le); + gst_compositor_blend_i420_12be = GST_DEBUG_FUNCPTR (blend_i420_12be); + gst_compositor_blend_i422_10le = GST_DEBUG_FUNCPTR (blend_i422_10le); + gst_compositor_blend_i422_10be = GST_DEBUG_FUNCPTR (blend_i422_10be); + gst_compositor_blend_i422_12le = GST_DEBUG_FUNCPTR (blend_i422_12le); + gst_compositor_blend_i422_12be = GST_DEBUG_FUNCPTR (blend_i422_12be); gst_compositor_fill_checker_argb = GST_DEBUG_FUNCPTR (fill_checker_argb_c); gst_compositor_fill_checker_bgra = GST_DEBUG_FUNCPTR (fill_checker_bgra_c); @@ -1153,6 +1325,14 @@ gst_compositor_init_blend (void) gst_compositor_fill_checker_rgbx = GST_DEBUG_FUNCPTR (fill_checker_rgbx_c); gst_compositor_fill_checker_yuy2 = GST_DEBUG_FUNCPTR (fill_checker_yuy2_c); gst_compositor_fill_checker_uyvy = GST_DEBUG_FUNCPTR (fill_checker_uyvy_c); + gst_compositor_fill_checker_i420_10le = + GST_DEBUG_FUNCPTR (fill_checker_i420_10le); + gst_compositor_fill_checker_i420_10be = + GST_DEBUG_FUNCPTR (fill_checker_i420_10be); + gst_compositor_fill_checker_i420_12le = + GST_DEBUG_FUNCPTR (fill_checker_i420_12le); + gst_compositor_fill_checker_i420_12be = + GST_DEBUG_FUNCPTR (fill_checker_i420_12be); gst_compositor_fill_color_argb = GST_DEBUG_FUNCPTR (fill_color_argb); gst_compositor_fill_color_bgra = GST_DEBUG_FUNCPTR (fill_color_bgra); @@ -1175,4 +1355,12 @@ gst_compositor_init_blend (void) gst_compositor_fill_color_yuy2 = GST_DEBUG_FUNCPTR (fill_color_yuy2); gst_compositor_fill_color_yvyu = GST_DEBUG_FUNCPTR (fill_color_yvyu); gst_compositor_fill_color_uyvy = GST_DEBUG_FUNCPTR (fill_color_uyvy); + gst_compositor_fill_color_i420_10le = + GST_DEBUG_FUNCPTR (fill_color_i420_10le); + gst_compositor_fill_color_i420_10be = + GST_DEBUG_FUNCPTR (fill_color_i420_10be); + gst_compositor_fill_color_i420_12le = + GST_DEBUG_FUNCPTR (fill_color_i420_12le); + gst_compositor_fill_color_i420_12be = + GST_DEBUG_FUNCPTR (fill_color_i420_12be); } diff --git a/subprojects/gst-plugins-base/gst/compositor/blend.h b/subprojects/gst-plugins-base/gst/compositor/blend.h index 2489d1755c..c60c199af5 100644 --- a/subprojects/gst-plugins-base/gst/compositor/blend.h +++ b/subprojects/gst-plugins-base/gst/compositor/blend.h @@ -81,6 +81,14 @@ extern BlendFunction gst_compositor_blend_rgbx; extern BlendFunction gst_compositor_blend_yuy2; #define gst_compositor_blend_uyvy gst_compositor_blend_yuy2; #define gst_compositor_blend_yvyu gst_compositor_blend_yuy2; +extern BlendFunction gst_compositor_blend_i420_10le; +extern BlendFunction gst_compositor_blend_i420_10be; +extern BlendFunction gst_compositor_blend_i420_12le; +extern BlendFunction gst_compositor_blend_i420_12be; +extern BlendFunction gst_compositor_blend_i422_10le; +extern BlendFunction gst_compositor_blend_i422_10be; +extern BlendFunction gst_compositor_blend_i422_12le; +extern BlendFunction gst_compositor_blend_i422_12be; extern FillCheckerFunction gst_compositor_fill_checker_argb; #define gst_compositor_fill_checker_abgr gst_compositor_fill_checker_argb @@ -104,6 +112,14 @@ extern FillCheckerFunction gst_compositor_fill_checker_xrgb; extern FillCheckerFunction gst_compositor_fill_checker_yuy2; #define gst_compositor_fill_checker_yvyu gst_compositor_fill_checker_yuy2; extern FillCheckerFunction gst_compositor_fill_checker_uyvy; +extern FillCheckerFunction gst_compositor_fill_checker_i420_10le; +#define gst_compositor_fill_checker_i422_10le gst_compositor_fill_checker_i420_10le +extern FillCheckerFunction gst_compositor_fill_checker_i420_10be; +#define gst_compositor_fill_checker_i422_10be gst_compositor_fill_checker_i420_10be +extern FillCheckerFunction gst_compositor_fill_checker_i420_12le; +#define gst_compositor_fill_checker_i422_12le gst_compositor_fill_checker_i420_12le +extern FillCheckerFunction gst_compositor_fill_checker_i420_12be; +#define gst_compositor_fill_checker_i422_12be gst_compositor_fill_checker_i420_12be extern FillColorFunction gst_compositor_fill_color_argb; extern FillColorFunction gst_compositor_fill_color_abgr; @@ -127,6 +143,14 @@ extern FillColorFunction gst_compositor_fill_color_bgrx; extern FillColorFunction gst_compositor_fill_color_yuy2; extern FillColorFunction gst_compositor_fill_color_yvyu; extern FillColorFunction gst_compositor_fill_color_uyvy; +extern FillColorFunction gst_compositor_fill_color_i420_10le; +#define gst_compositor_fill_color_i422_10le gst_compositor_fill_color_i420_10le +extern FillColorFunction gst_compositor_fill_color_i420_10be; +#define gst_compositor_fill_color_i422_10be gst_compositor_fill_color_i420_10be +extern FillColorFunction gst_compositor_fill_color_i420_12le; +#define gst_compositor_fill_color_i422_12le gst_compositor_fill_color_i420_12le +extern FillColorFunction gst_compositor_fill_color_i420_12be; +#define gst_compositor_fill_color_i422_12be gst_compositor_fill_color_i420_12be void gst_compositor_init_blend (void); diff --git a/subprojects/gst-plugins-base/gst/compositor/compositor.c b/subprojects/gst-plugins-base/gst/compositor/compositor.c index 8d88230e9d..6b34648bc1 100644 --- a/subprojects/gst-plugins-base/gst/compositor/compositor.c +++ b/subprojects/gst-plugins-base/gst/compositor/compositor.c @@ -103,9 +103,19 @@ GST_DEBUG_CATEGORY_STATIC (gst_compositor_debug); #define GST_CAT_DEFAULT gst_compositor_debug +#if G_BYTE_ORDER == G_LITTLE_ENDIAN #define FORMATS " { AYUV, VUYA, BGRA, ARGB, RGBA, ABGR, Y444, Y42B, YUY2, UYVY, "\ - " YVYU, I420, YV12, NV12, NV21, Y41B, RGB, BGR, xRGB, xBGR, "\ + " YVYU, I422_12LE, I422_12BE, I422_10LE, I422_10BE, "\ + " I420_12LE, I420_12BE, I420_10LE, I420_10BE, " \ + " I420, YV12, NV12, NV21, Y41B, RGB, BGR, xRGB, xBGR, "\ " RGBx, BGRx } " +#else +#define FORMATS " { AYUV, VUYA, BGRA, ARGB, RGBA, ABGR, Y444, Y42B, YUY2, UYVY, "\ + " YVYU, I422_12BE, I422_12LE, I422_10BE, I422_10LE, "\ + " I420_12BE, I420_12LE, I420_10BE, I420_10LE, "\ + " I420, YV12, NV12, NV21, Y41B, RGB, BGR, xRGB, xBGR, "\ + " RGBx, BGRx } " +#endif static GstStaticPadTemplate src_factory = GST_STATIC_PAD_TEMPLATE ("src", GST_PAD_SRC, @@ -876,6 +886,54 @@ set_functions (GstCompositor * self, const GstVideoInfo * info) self->fill_checker = gst_compositor_fill_checker_yvyu; self->fill_color = gst_compositor_fill_color_yvyu; break; + case GST_VIDEO_FORMAT_I422_12LE: + self->blend = gst_compositor_blend_i422_12le; + self->overlay = self->blend; + self->fill_checker = gst_compositor_fill_checker_i422_12le; + self->fill_color = gst_compositor_fill_color_i422_12le; + break; + case GST_VIDEO_FORMAT_I422_12BE: + self->blend = gst_compositor_blend_i422_12be; + self->overlay = self->blend; + self->fill_checker = gst_compositor_fill_checker_i422_12be; + self->fill_color = gst_compositor_fill_color_i422_12be; + break; + case GST_VIDEO_FORMAT_I422_10LE: + self->blend = gst_compositor_blend_i422_10le; + self->overlay = self->blend; + self->fill_checker = gst_compositor_fill_checker_i422_10le; + self->fill_color = gst_compositor_fill_color_i422_10le; + break; + case GST_VIDEO_FORMAT_I422_10BE: + self->blend = gst_compositor_blend_i422_10be; + self->overlay = self->blend; + self->fill_checker = gst_compositor_fill_checker_i422_10be; + self->fill_color = gst_compositor_fill_color_i422_10be; + break; + case GST_VIDEO_FORMAT_I420_12LE: + self->blend = gst_compositor_blend_i420_12le; + self->overlay = self->blend; + self->fill_checker = gst_compositor_fill_checker_i420_12le; + self->fill_color = gst_compositor_fill_color_i420_12le; + break; + case GST_VIDEO_FORMAT_I420_12BE: + self->blend = gst_compositor_blend_i420_12be; + self->overlay = self->blend; + self->fill_checker = gst_compositor_fill_checker_i420_12be; + self->fill_color = gst_compositor_fill_color_i420_12be; + break; + case GST_VIDEO_FORMAT_I420_10LE: + self->blend = gst_compositor_blend_i420_10le; + self->overlay = self->blend; + self->fill_checker = gst_compositor_fill_checker_i420_10le; + self->fill_color = gst_compositor_fill_color_i420_10le; + break; + case GST_VIDEO_FORMAT_I420_10BE: + self->blend = gst_compositor_blend_i420_10be; + self->overlay = self->blend; + self->fill_checker = gst_compositor_fill_checker_i420_10be; + self->fill_color = gst_compositor_fill_color_i420_10be; + break; case GST_VIDEO_FORMAT_I420: self->blend = gst_compositor_blend_i420; self->overlay = self->blend; diff --git a/subprojects/gst-plugins-base/gst/compositor/compositororc-dist.c b/subprojects/gst-plugins-base/gst/compositor/compositororc-dist.c index ce028e28de..d5318b24ae 100644 --- a/subprojects/gst-plugins-base/gst/compositor/compositororc-dist.c +++ b/subprojects/gst-plugins-base/gst/compositor/compositororc-dist.c @@ -98,8 +98,22 @@ typedef union void compositor_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n); void compositor_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1, const guint32 * ORC_RESTRICT s1, int n); +void compositor_orc_memset_u16_2d (guint8 * ORC_RESTRICT d1, int d1_stride, + int p1, int n, int m); void compositor_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void compositor_orc_blend_u10 (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void compositor_orc_blend_u12 (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void compositor_orc_blend_u16 (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void compositor_orc_blend_u10_swap (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void compositor_orc_blend_u12_swap (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void compositor_orc_blend_u16_swap (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); void compositor_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); void compositor_orc_source_argb (guint8 * ORC_RESTRICT d1, int d1_stride, @@ -370,6 +384,112 @@ compositor_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1, #endif +/* compositor_orc_memset_u16_2d */ +#ifdef DISABLE_ORC +void +compositor_orc_memset_u16_2d (guint8 * ORC_RESTRICT d1, int d1_stride, int p1, + int n, int m) +{ + int i; + int j; + orc_union16 *ORC_RESTRICT ptr0; + orc_union16 var32; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j); + + /* 0: loadpw */ + var32.i = p1; + + for (i = 0; i < n; i++) { + /* 1: storew */ + ptr0[i] = var32; + } + } + +} + +#else +static void +_backup_compositor_orc_memset_u16_2d (OrcExecutor * ORC_RESTRICT ex) +{ + int i; + int j; + int n = ex->n; + int m = ex->params[ORC_VAR_A1]; + orc_union16 *ORC_RESTRICT ptr0; + orc_union16 var32; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j); + + /* 0: loadpw */ + var32.i = ex->params[24]; + + for (i = 0; i < n; i++) { + /* 1: storew */ + ptr0[i] = var32; + } + } + +} + +void +compositor_orc_memset_u16_2d (guint8 * ORC_RESTRICT d1, int d1_stride, int p1, + int n, int m) +{ + OrcExecutor _ex, *ex = &_ex; + static volatile int p_inited = 0; + static OrcCode *c = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcProgram *p; + +#if 1 + static const orc_uint8 bc[] = { + 1, 7, 9, 28, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111, + 114, 99, 95, 109, 101, 109, 115, 101, 116, 95, 117, 49, 54, 95, 50, 100, + 11, 2, 2, 16, 2, 97, 0, 24, 2, 0, + }; + p = orc_program_new_from_static_bytecode (bc); + orc_program_set_backup_function (p, _backup_compositor_orc_memset_u16_2d); +#else + p = orc_program_new (); + orc_program_set_2d (p); + orc_program_set_name (p, "compositor_orc_memset_u16_2d"); + orc_program_set_backup_function (p, _backup_compositor_orc_memset_u16_2d); + orc_program_add_destination (p, 2, "d1"); + orc_program_add_parameter (p, 2, "p1"); + + orc_program_append_2 (p, "storew", 0, ORC_VAR_D1, ORC_VAR_P1, ORC_VAR_D1, + ORC_VAR_D1); +#endif + + orc_program_compile (p); + c = orc_program_take_code (p); + orc_program_free (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->arrays[ORC_VAR_A2] = c; + ex->program = 0; + + ex->n = n; + ORC_EXECUTOR_M (ex) = m; + ex->arrays[ORC_VAR_D1] = d1; + ex->params[ORC_VAR_D1] = d1_stride; + ex->params[ORC_VAR_P1] = p1; + + func = c->exec; + func (ex); +} +#endif + + /* compositor_orc_blend_u8 */ #ifdef DISABLE_ORC void @@ -562,6 +682,1242 @@ compositor_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride, #endif +/* compositor_orc_blend_u10 */ +#ifdef DISABLE_ORC +void +compositor_orc_blend_u10 (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) +{ + int i; + int j; + orc_union16 *ORC_RESTRICT ptr0; + const orc_union16 *ORC_RESTRICT ptr4; + orc_union16 var34; + orc_union16 var35; + orc_union32 var36; + orc_union16 var37; + orc_union32 var38; + orc_union32 var39; + orc_union32 var40; + orc_union32 var41; + orc_union32 var42; + orc_union32 var43; + orc_union32 var44; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j); + ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j); + + /* 5: loadpl */ + var36.i = p1; + + for (i = 0; i < n; i++) { + /* 0: loadw */ + var34 = ptr0[i]; + /* 1: convuwl */ + var38.i = (orc_uint16) var34.i; + /* 2: loadw */ + var35 = ptr4[i]; + /* 3: convuwl */ + var39.i = (orc_uint16) var35.i; + /* 4: subl */ + var40.i = ((orc_uint32) var39.i) - ((orc_uint32) var38.i); + /* 6: mulll */ + var41.i = (((orc_uint32) var40.i) * ((orc_uint32) var36.i)) & 0xffffffff; + /* 7: shll */ + var42.i = ((orc_uint32) var38.i) << 10; + /* 8: addl */ + var43.i = ((orc_uint32) var42.i) + ((orc_uint32) var41.i); + /* 9: shrul */ + var44.i = ((orc_uint32) var43.i) >> 10; + /* 10: convsuslw */ + var37.i = ORC_CLAMP_UW (var44.i); + /* 11: storew */ + ptr0[i] = var37; + } + } + +} + +#else +static void +_backup_compositor_orc_blend_u10 (OrcExecutor * ORC_RESTRICT ex) +{ + int i; + int j; + int n = ex->n; + int m = ex->params[ORC_VAR_A1]; + orc_union16 *ORC_RESTRICT ptr0; + const orc_union16 *ORC_RESTRICT ptr4; + orc_union16 var34; + orc_union16 var35; + orc_union32 var36; + orc_union16 var37; + orc_union32 var38; + orc_union32 var39; + orc_union32 var40; + orc_union32 var41; + orc_union32 var42; + orc_union32 var43; + orc_union32 var44; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j); + ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j); + + /* 5: loadpl */ + var36.i = ex->params[24]; + + for (i = 0; i < n; i++) { + /* 0: loadw */ + var34 = ptr0[i]; + /* 1: convuwl */ + var38.i = (orc_uint16) var34.i; + /* 2: loadw */ + var35 = ptr4[i]; + /* 3: convuwl */ + var39.i = (orc_uint16) var35.i; + /* 4: subl */ + var40.i = ((orc_uint32) var39.i) - ((orc_uint32) var38.i); + /* 6: mulll */ + var41.i = (((orc_uint32) var40.i) * ((orc_uint32) var36.i)) & 0xffffffff; + /* 7: shll */ + var42.i = ((orc_uint32) var38.i) << 10; + /* 8: addl */ + var43.i = ((orc_uint32) var42.i) + ((orc_uint32) var41.i); + /* 9: shrul */ + var44.i = ((orc_uint32) var43.i) >> 10; + /* 10: convsuslw */ + var37.i = ORC_CLAMP_UW (var44.i); + /* 11: storew */ + ptr0[i] = var37; + } + } + +} + +void +compositor_orc_blend_u10 (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) +{ + OrcExecutor _ex, *ex = &_ex; + static volatile int p_inited = 0; + static OrcCode *c = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcProgram *p; + +#if 1 + static const orc_uint8 bc[] = { + 1, 7, 9, 24, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111, + 114, 99, 95, 98, 108, 101, 110, 100, 95, 117, 49, 48, 11, 2, 2, 12, + 2, 2, 14, 1, 10, 0, 0, 0, 16, 2, 20, 4, 20, 4, 154, 32, + 0, 154, 33, 4, 129, 33, 33, 32, 120, 33, 33, 24, 124, 32, 32, 16, + 103, 33, 32, 33, 126, 33, 33, 16, 166, 0, 33, 2, 0, + }; + p = orc_program_new_from_static_bytecode (bc); + orc_program_set_backup_function (p, _backup_compositor_orc_blend_u10); +#else + p = orc_program_new (); + orc_program_set_2d (p); + orc_program_set_name (p, "compositor_orc_blend_u10"); + orc_program_set_backup_function (p, _backup_compositor_orc_blend_u10); + orc_program_add_destination (p, 2, "d1"); + orc_program_add_source (p, 2, "s1"); + orc_program_add_constant (p, 1, 0x0000000a, "c1"); + orc_program_add_parameter (p, 2, "p1"); + orc_program_add_temporary (p, 4, "t1"); + orc_program_add_temporary (p, 4, "t2"); + + orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "subl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, + ORC_VAR_D1); + orc_program_append_2 (p, "mulll", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1, + ORC_VAR_D1); + orc_program_append_2 (p, "shll", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2, + ORC_VAR_D1); + orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "convsuslw", 0, ORC_VAR_D1, ORC_VAR_T2, + ORC_VAR_D1, ORC_VAR_D1); +#endif + + orc_program_compile (p); + c = orc_program_take_code (p); + orc_program_free (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->arrays[ORC_VAR_A2] = c; + ex->program = 0; + + ex->n = n; + ORC_EXECUTOR_M (ex) = m; + ex->arrays[ORC_VAR_D1] = d1; + ex->params[ORC_VAR_D1] = d1_stride; + ex->arrays[ORC_VAR_S1] = (void *) s1; + ex->params[ORC_VAR_S1] = s1_stride; + ex->params[ORC_VAR_P1] = p1; + + func = c->exec; + func (ex); +} +#endif + + +/* compositor_orc_blend_u12 */ +#ifdef DISABLE_ORC +void +compositor_orc_blend_u12 (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) +{ + int i; + int j; + orc_union16 *ORC_RESTRICT ptr0; + const orc_union16 *ORC_RESTRICT ptr4; + orc_union16 var34; + orc_union16 var35; + orc_union32 var36; + orc_union16 var37; + orc_union32 var38; + orc_union32 var39; + orc_union32 var40; + orc_union32 var41; + orc_union32 var42; + orc_union32 var43; + orc_union32 var44; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j); + ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j); + + /* 5: loadpl */ + var36.i = p1; + + for (i = 0; i < n; i++) { + /* 0: loadw */ + var34 = ptr0[i]; + /* 1: convuwl */ + var38.i = (orc_uint16) var34.i; + /* 2: loadw */ + var35 = ptr4[i]; + /* 3: convuwl */ + var39.i = (orc_uint16) var35.i; + /* 4: subl */ + var40.i = ((orc_uint32) var39.i) - ((orc_uint32) var38.i); + /* 6: mulll */ + var41.i = (((orc_uint32) var40.i) * ((orc_uint32) var36.i)) & 0xffffffff; + /* 7: shll */ + var42.i = ((orc_uint32) var38.i) << 12; + /* 8: addl */ + var43.i = ((orc_uint32) var42.i) + ((orc_uint32) var41.i); + /* 9: shrul */ + var44.i = ((orc_uint32) var43.i) >> 12; + /* 10: convsuslw */ + var37.i = ORC_CLAMP_UW (var44.i); + /* 11: storew */ + ptr0[i] = var37; + } + } + +} + +#else +static void +_backup_compositor_orc_blend_u12 (OrcExecutor * ORC_RESTRICT ex) +{ + int i; + int j; + int n = ex->n; + int m = ex->params[ORC_VAR_A1]; + orc_union16 *ORC_RESTRICT ptr0; + const orc_union16 *ORC_RESTRICT ptr4; + orc_union16 var34; + orc_union16 var35; + orc_union32 var36; + orc_union16 var37; + orc_union32 var38; + orc_union32 var39; + orc_union32 var40; + orc_union32 var41; + orc_union32 var42; + orc_union32 var43; + orc_union32 var44; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j); + ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j); + + /* 5: loadpl */ + var36.i = ex->params[24]; + + for (i = 0; i < n; i++) { + /* 0: loadw */ + var34 = ptr0[i]; + /* 1: convuwl */ + var38.i = (orc_uint16) var34.i; + /* 2: loadw */ + var35 = ptr4[i]; + /* 3: convuwl */ + var39.i = (orc_uint16) var35.i; + /* 4: subl */ + var40.i = ((orc_uint32) var39.i) - ((orc_uint32) var38.i); + /* 6: mulll */ + var41.i = (((orc_uint32) var40.i) * ((orc_uint32) var36.i)) & 0xffffffff; + /* 7: shll */ + var42.i = ((orc_uint32) var38.i) << 12; + /* 8: addl */ + var43.i = ((orc_uint32) var42.i) + ((orc_uint32) var41.i); + /* 9: shrul */ + var44.i = ((orc_uint32) var43.i) >> 12; + /* 10: convsuslw */ + var37.i = ORC_CLAMP_UW (var44.i); + /* 11: storew */ + ptr0[i] = var37; + } + } + +} + +void +compositor_orc_blend_u12 (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) +{ + OrcExecutor _ex, *ex = &_ex; + static volatile int p_inited = 0; + static OrcCode *c = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcProgram *p; + +#if 1 + static const orc_uint8 bc[] = { + 1, 7, 9, 24, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111, + 114, 99, 95, 98, 108, 101, 110, 100, 95, 117, 49, 50, 11, 2, 2, 12, + 2, 2, 14, 1, 12, 0, 0, 0, 16, 2, 20, 4, 20, 4, 154, 32, + 0, 154, 33, 4, 129, 33, 33, 32, 120, 33, 33, 24, 124, 32, 32, 16, + 103, 33, 32, 33, 126, 33, 33, 16, 166, 0, 33, 2, 0, + }; + p = orc_program_new_from_static_bytecode (bc); + orc_program_set_backup_function (p, _backup_compositor_orc_blend_u12); +#else + p = orc_program_new (); + orc_program_set_2d (p); + orc_program_set_name (p, "compositor_orc_blend_u12"); + orc_program_set_backup_function (p, _backup_compositor_orc_blend_u12); + orc_program_add_destination (p, 2, "d1"); + orc_program_add_source (p, 2, "s1"); + orc_program_add_constant (p, 1, 0x0000000c, "c1"); + orc_program_add_parameter (p, 2, "p1"); + orc_program_add_temporary (p, 4, "t1"); + orc_program_add_temporary (p, 4, "t2"); + + orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "subl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, + ORC_VAR_D1); + orc_program_append_2 (p, "mulll", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1, + ORC_VAR_D1); + orc_program_append_2 (p, "shll", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2, + ORC_VAR_D1); + orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "convsuslw", 0, ORC_VAR_D1, ORC_VAR_T2, + ORC_VAR_D1, ORC_VAR_D1); +#endif + + orc_program_compile (p); + c = orc_program_take_code (p); + orc_program_free (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->arrays[ORC_VAR_A2] = c; + ex->program = 0; + + ex->n = n; + ORC_EXECUTOR_M (ex) = m; + ex->arrays[ORC_VAR_D1] = d1; + ex->params[ORC_VAR_D1] = d1_stride; + ex->arrays[ORC_VAR_S1] = (void *) s1; + ex->params[ORC_VAR_S1] = s1_stride; + ex->params[ORC_VAR_P1] = p1; + + func = c->exec; + func (ex); +} +#endif + + +/* compositor_orc_blend_u16 */ +#ifdef DISABLE_ORC +void +compositor_orc_blend_u16 (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) +{ + int i; + int j; + orc_union16 *ORC_RESTRICT ptr0; + const orc_union16 *ORC_RESTRICT ptr4; + orc_union16 var34; + orc_union16 var35; + orc_union32 var36; + orc_union16 var37; + orc_union32 var38; + orc_union32 var39; + orc_union32 var40; + orc_union32 var41; + orc_union32 var42; + orc_union32 var43; + orc_union32 var44; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j); + ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j); + + /* 5: loadpl */ + var36.i = p1; + + for (i = 0; i < n; i++) { + /* 0: loadw */ + var34 = ptr0[i]; + /* 1: convuwl */ + var38.i = (orc_uint16) var34.i; + /* 2: loadw */ + var35 = ptr4[i]; + /* 3: convuwl */ + var39.i = (orc_uint16) var35.i; + /* 4: subl */ + var40.i = ((orc_uint32) var39.i) - ((orc_uint32) var38.i); + /* 6: mulll */ + var41.i = (((orc_uint32) var40.i) * ((orc_uint32) var36.i)) & 0xffffffff; + /* 7: shll */ + var42.i = ((orc_uint32) var38.i) << 16; + /* 8: addl */ + var43.i = ((orc_uint32) var42.i) + ((orc_uint32) var41.i); + /* 9: shrul */ + var44.i = ((orc_uint32) var43.i) >> 16; + /* 10: convsuslw */ + var37.i = ORC_CLAMP_UW (var44.i); + /* 11: storew */ + ptr0[i] = var37; + } + } + +} + +#else +static void +_backup_compositor_orc_blend_u16 (OrcExecutor * ORC_RESTRICT ex) +{ + int i; + int j; + int n = ex->n; + int m = ex->params[ORC_VAR_A1]; + orc_union16 *ORC_RESTRICT ptr0; + const orc_union16 *ORC_RESTRICT ptr4; + orc_union16 var34; + orc_union16 var35; + orc_union32 var36; + orc_union16 var37; + orc_union32 var38; + orc_union32 var39; + orc_union32 var40; + orc_union32 var41; + orc_union32 var42; + orc_union32 var43; + orc_union32 var44; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j); + ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j); + + /* 5: loadpl */ + var36.i = ex->params[24]; + + for (i = 0; i < n; i++) { + /* 0: loadw */ + var34 = ptr0[i]; + /* 1: convuwl */ + var38.i = (orc_uint16) var34.i; + /* 2: loadw */ + var35 = ptr4[i]; + /* 3: convuwl */ + var39.i = (orc_uint16) var35.i; + /* 4: subl */ + var40.i = ((orc_uint32) var39.i) - ((orc_uint32) var38.i); + /* 6: mulll */ + var41.i = (((orc_uint32) var40.i) * ((orc_uint32) var36.i)) & 0xffffffff; + /* 7: shll */ + var42.i = ((orc_uint32) var38.i) << 16; + /* 8: addl */ + var43.i = ((orc_uint32) var42.i) + ((orc_uint32) var41.i); + /* 9: shrul */ + var44.i = ((orc_uint32) var43.i) >> 16; + /* 10: convsuslw */ + var37.i = ORC_CLAMP_UW (var44.i); + /* 11: storew */ + ptr0[i] = var37; + } + } + +} + +void +compositor_orc_blend_u16 (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) +{ + OrcExecutor _ex, *ex = &_ex; + static volatile int p_inited = 0; + static OrcCode *c = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcProgram *p; + +#if 1 + static const orc_uint8 bc[] = { + 1, 7, 9, 24, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111, + 114, 99, 95, 98, 108, 101, 110, 100, 95, 117, 49, 54, 11, 2, 2, 12, + 2, 2, 14, 1, 16, 0, 0, 0, 16, 2, 20, 4, 20, 4, 154, 32, + 0, 154, 33, 4, 129, 33, 33, 32, 120, 33, 33, 24, 124, 32, 32, 16, + 103, 33, 32, 33, 126, 33, 33, 16, 166, 0, 33, 2, 0, + }; + p = orc_program_new_from_static_bytecode (bc); + orc_program_set_backup_function (p, _backup_compositor_orc_blend_u16); +#else + p = orc_program_new (); + orc_program_set_2d (p); + orc_program_set_name (p, "compositor_orc_blend_u16"); + orc_program_set_backup_function (p, _backup_compositor_orc_blend_u16); + orc_program_add_destination (p, 2, "d1"); + orc_program_add_source (p, 2, "s1"); + orc_program_add_constant (p, 1, 0x00000010, "c1"); + orc_program_add_parameter (p, 2, "p1"); + orc_program_add_temporary (p, 4, "t1"); + orc_program_add_temporary (p, 4, "t2"); + + orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "subl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, + ORC_VAR_D1); + orc_program_append_2 (p, "mulll", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1, + ORC_VAR_D1); + orc_program_append_2 (p, "shll", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2, + ORC_VAR_D1); + orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "convsuslw", 0, ORC_VAR_D1, ORC_VAR_T2, + ORC_VAR_D1, ORC_VAR_D1); +#endif + + orc_program_compile (p); + c = orc_program_take_code (p); + orc_program_free (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->arrays[ORC_VAR_A2] = c; + ex->program = 0; + + ex->n = n; + ORC_EXECUTOR_M (ex) = m; + ex->arrays[ORC_VAR_D1] = d1; + ex->params[ORC_VAR_D1] = d1_stride; + ex->arrays[ORC_VAR_S1] = (void *) s1; + ex->params[ORC_VAR_S1] = s1_stride; + ex->params[ORC_VAR_P1] = p1; + + func = c->exec; + func (ex); +} +#endif + + +/* compositor_orc_blend_u10_swap */ +#ifdef DISABLE_ORC +void +compositor_orc_blend_u10_swap (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) +{ + int i; + int j; + orc_union16 *ORC_RESTRICT ptr0; + const orc_union16 *ORC_RESTRICT ptr4; + orc_union16 var35; + orc_union16 var36; + orc_union32 var37; + orc_union16 var38; + orc_union16 var39; + orc_union32 var40; + orc_union16 var41; + orc_union32 var42; + orc_union32 var43; + orc_union32 var44; + orc_union32 var45; + orc_union32 var46; + orc_union32 var47; + orc_union16 var48; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j); + ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j); + + /* 7: loadpl */ + var37.i = p1; + + for (i = 0; i < n; i++) { + /* 0: loadw */ + var35 = ptr0[i]; + /* 1: swapw */ + var39.i = ORC_SWAP_W (var35.i); + /* 2: convuwl */ + var40.i = (orc_uint16) var39.i; + /* 3: loadw */ + var36 = ptr4[i]; + /* 4: swapw */ + var41.i = ORC_SWAP_W (var36.i); + /* 5: convuwl */ + var42.i = (orc_uint16) var41.i; + /* 6: subl */ + var43.i = ((orc_uint32) var42.i) - ((orc_uint32) var40.i); + /* 8: mulll */ + var44.i = (((orc_uint32) var43.i) * ((orc_uint32) var37.i)) & 0xffffffff; + /* 9: shll */ + var45.i = ((orc_uint32) var40.i) << 10; + /* 10: addl */ + var46.i = ((orc_uint32) var45.i) + ((orc_uint32) var44.i); + /* 11: shrul */ + var47.i = ((orc_uint32) var46.i) >> 10; + /* 12: convsuslw */ + var48.i = ORC_CLAMP_UW (var47.i); + /* 13: swapw */ + var38.i = ORC_SWAP_W (var48.i); + /* 14: storew */ + ptr0[i] = var38; + } + } + +} + +#else +static void +_backup_compositor_orc_blend_u10_swap (OrcExecutor * ORC_RESTRICT ex) +{ + int i; + int j; + int n = ex->n; + int m = ex->params[ORC_VAR_A1]; + orc_union16 *ORC_RESTRICT ptr0; + const orc_union16 *ORC_RESTRICT ptr4; + orc_union16 var35; + orc_union16 var36; + orc_union32 var37; + orc_union16 var38; + orc_union16 var39; + orc_union32 var40; + orc_union16 var41; + orc_union32 var42; + orc_union32 var43; + orc_union32 var44; + orc_union32 var45; + orc_union32 var46; + orc_union32 var47; + orc_union16 var48; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j); + ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j); + + /* 7: loadpl */ + var37.i = ex->params[24]; + + for (i = 0; i < n; i++) { + /* 0: loadw */ + var35 = ptr0[i]; + /* 1: swapw */ + var39.i = ORC_SWAP_W (var35.i); + /* 2: convuwl */ + var40.i = (orc_uint16) var39.i; + /* 3: loadw */ + var36 = ptr4[i]; + /* 4: swapw */ + var41.i = ORC_SWAP_W (var36.i); + /* 5: convuwl */ + var42.i = (orc_uint16) var41.i; + /* 6: subl */ + var43.i = ((orc_uint32) var42.i) - ((orc_uint32) var40.i); + /* 8: mulll */ + var44.i = (((orc_uint32) var43.i) * ((orc_uint32) var37.i)) & 0xffffffff; + /* 9: shll */ + var45.i = ((orc_uint32) var40.i) << 10; + /* 10: addl */ + var46.i = ((orc_uint32) var45.i) + ((orc_uint32) var44.i); + /* 11: shrul */ + var47.i = ((orc_uint32) var46.i) >> 10; + /* 12: convsuslw */ + var48.i = ORC_CLAMP_UW (var47.i); + /* 13: swapw */ + var38.i = ORC_SWAP_W (var48.i); + /* 14: storew */ + ptr0[i] = var38; + } + } + +} + +void +compositor_orc_blend_u10_swap (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) +{ + OrcExecutor _ex, *ex = &_ex; + static volatile int p_inited = 0; + static OrcCode *c = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcProgram *p; + +#if 1 + static const orc_uint8 bc[] = { + 1, 7, 9, 29, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111, + 114, 99, 95, 98, 108, 101, 110, 100, 95, 117, 49, 48, 95, 115, 119, 97, + 112, 11, 2, 2, 12, 2, 2, 14, 1, 10, 0, 0, 0, 16, 2, 20, + 4, 20, 4, 20, 2, 183, 34, 0, 154, 32, 34, 183, 34, 4, 154, 33, + 34, 129, 33, 33, 32, 120, 33, 33, 24, 124, 32, 32, 16, 103, 33, 32, + 33, 126, 33, 33, 16, 166, 34, 33, 183, 0, 34, 2, 0, + }; + p = orc_program_new_from_static_bytecode (bc); + orc_program_set_backup_function (p, + _backup_compositor_orc_blend_u10_swap); +#else + p = orc_program_new (); + orc_program_set_2d (p); + orc_program_set_name (p, "compositor_orc_blend_u10_swap"); + orc_program_set_backup_function (p, + _backup_compositor_orc_blend_u10_swap); + orc_program_add_destination (p, 2, "d1"); + orc_program_add_source (p, 2, "s1"); + orc_program_add_constant (p, 1, 0x0000000a, "c1"); + orc_program_add_parameter (p, 2, "p1"); + orc_program_add_temporary (p, 4, "t1"); + orc_program_add_temporary (p, 4, "t2"); + orc_program_add_temporary (p, 2, "t3"); + + orc_program_append_2 (p, "swapw", 0, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "swapw", 0, ORC_VAR_T3, ORC_VAR_S1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "subl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, + ORC_VAR_D1); + orc_program_append_2 (p, "mulll", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1, + ORC_VAR_D1); + orc_program_append_2 (p, "shll", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2, + ORC_VAR_D1); + orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "convsuslw", 0, ORC_VAR_T3, ORC_VAR_T2, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "swapw", 0, ORC_VAR_D1, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); +#endif + + orc_program_compile (p); + c = orc_program_take_code (p); + orc_program_free (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->arrays[ORC_VAR_A2] = c; + ex->program = 0; + + ex->n = n; + ORC_EXECUTOR_M (ex) = m; + ex->arrays[ORC_VAR_D1] = d1; + ex->params[ORC_VAR_D1] = d1_stride; + ex->arrays[ORC_VAR_S1] = (void *) s1; + ex->params[ORC_VAR_S1] = s1_stride; + ex->params[ORC_VAR_P1] = p1; + + func = c->exec; + func (ex); +} +#endif + + +/* compositor_orc_blend_u12_swap */ +#ifdef DISABLE_ORC +void +compositor_orc_blend_u12_swap (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) +{ + int i; + int j; + orc_union16 *ORC_RESTRICT ptr0; + const orc_union16 *ORC_RESTRICT ptr4; + orc_union16 var35; + orc_union16 var36; + orc_union32 var37; + orc_union16 var38; + orc_union16 var39; + orc_union32 var40; + orc_union16 var41; + orc_union32 var42; + orc_union32 var43; + orc_union32 var44; + orc_union32 var45; + orc_union32 var46; + orc_union32 var47; + orc_union16 var48; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j); + ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j); + + /* 7: loadpl */ + var37.i = p1; + + for (i = 0; i < n; i++) { + /* 0: loadw */ + var35 = ptr0[i]; + /* 1: swapw */ + var39.i = ORC_SWAP_W (var35.i); + /* 2: convuwl */ + var40.i = (orc_uint16) var39.i; + /* 3: loadw */ + var36 = ptr4[i]; + /* 4: swapw */ + var41.i = ORC_SWAP_W (var36.i); + /* 5: convuwl */ + var42.i = (orc_uint16) var41.i; + /* 6: subl */ + var43.i = ((orc_uint32) var42.i) - ((orc_uint32) var40.i); + /* 8: mulll */ + var44.i = (((orc_uint32) var43.i) * ((orc_uint32) var37.i)) & 0xffffffff; + /* 9: shll */ + var45.i = ((orc_uint32) var40.i) << 12; + /* 10: addl */ + var46.i = ((orc_uint32) var45.i) + ((orc_uint32) var44.i); + /* 11: shrul */ + var47.i = ((orc_uint32) var46.i) >> 12; + /* 12: convsuslw */ + var48.i = ORC_CLAMP_UW (var47.i); + /* 13: swapw */ + var38.i = ORC_SWAP_W (var48.i); + /* 14: storew */ + ptr0[i] = var38; + } + } + +} + +#else +static void +_backup_compositor_orc_blend_u12_swap (OrcExecutor * ORC_RESTRICT ex) +{ + int i; + int j; + int n = ex->n; + int m = ex->params[ORC_VAR_A1]; + orc_union16 *ORC_RESTRICT ptr0; + const orc_union16 *ORC_RESTRICT ptr4; + orc_union16 var35; + orc_union16 var36; + orc_union32 var37; + orc_union16 var38; + orc_union16 var39; + orc_union32 var40; + orc_union16 var41; + orc_union32 var42; + orc_union32 var43; + orc_union32 var44; + orc_union32 var45; + orc_union32 var46; + orc_union32 var47; + orc_union16 var48; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j); + ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j); + + /* 7: loadpl */ + var37.i = ex->params[24]; + + for (i = 0; i < n; i++) { + /* 0: loadw */ + var35 = ptr0[i]; + /* 1: swapw */ + var39.i = ORC_SWAP_W (var35.i); + /* 2: convuwl */ + var40.i = (orc_uint16) var39.i; + /* 3: loadw */ + var36 = ptr4[i]; + /* 4: swapw */ + var41.i = ORC_SWAP_W (var36.i); + /* 5: convuwl */ + var42.i = (orc_uint16) var41.i; + /* 6: subl */ + var43.i = ((orc_uint32) var42.i) - ((orc_uint32) var40.i); + /* 8: mulll */ + var44.i = (((orc_uint32) var43.i) * ((orc_uint32) var37.i)) & 0xffffffff; + /* 9: shll */ + var45.i = ((orc_uint32) var40.i) << 12; + /* 10: addl */ + var46.i = ((orc_uint32) var45.i) + ((orc_uint32) var44.i); + /* 11: shrul */ + var47.i = ((orc_uint32) var46.i) >> 12; + /* 12: convsuslw */ + var48.i = ORC_CLAMP_UW (var47.i); + /* 13: swapw */ + var38.i = ORC_SWAP_W (var48.i); + /* 14: storew */ + ptr0[i] = var38; + } + } + +} + +void +compositor_orc_blend_u12_swap (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) +{ + OrcExecutor _ex, *ex = &_ex; + static volatile int p_inited = 0; + static OrcCode *c = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcProgram *p; + +#if 1 + static const orc_uint8 bc[] = { + 1, 7, 9, 29, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111, + 114, 99, 95, 98, 108, 101, 110, 100, 95, 117, 49, 50, 95, 115, 119, 97, + 112, 11, 2, 2, 12, 2, 2, 14, 1, 12, 0, 0, 0, 16, 2, 20, + 4, 20, 4, 20, 2, 183, 34, 0, 154, 32, 34, 183, 34, 4, 154, 33, + 34, 129, 33, 33, 32, 120, 33, 33, 24, 124, 32, 32, 16, 103, 33, 32, + 33, 126, 33, 33, 16, 166, 34, 33, 183, 0, 34, 2, 0, + }; + p = orc_program_new_from_static_bytecode (bc); + orc_program_set_backup_function (p, + _backup_compositor_orc_blend_u12_swap); +#else + p = orc_program_new (); + orc_program_set_2d (p); + orc_program_set_name (p, "compositor_orc_blend_u12_swap"); + orc_program_set_backup_function (p, + _backup_compositor_orc_blend_u12_swap); + orc_program_add_destination (p, 2, "d1"); + orc_program_add_source (p, 2, "s1"); + orc_program_add_constant (p, 1, 0x0000000c, "c1"); + orc_program_add_parameter (p, 2, "p1"); + orc_program_add_temporary (p, 4, "t1"); + orc_program_add_temporary (p, 4, "t2"); + orc_program_add_temporary (p, 2, "t3"); + + orc_program_append_2 (p, "swapw", 0, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "swapw", 0, ORC_VAR_T3, ORC_VAR_S1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "subl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, + ORC_VAR_D1); + orc_program_append_2 (p, "mulll", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1, + ORC_VAR_D1); + orc_program_append_2 (p, "shll", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2, + ORC_VAR_D1); + orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "convsuslw", 0, ORC_VAR_T3, ORC_VAR_T2, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "swapw", 0, ORC_VAR_D1, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); +#endif + + orc_program_compile (p); + c = orc_program_take_code (p); + orc_program_free (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->arrays[ORC_VAR_A2] = c; + ex->program = 0; + + ex->n = n; + ORC_EXECUTOR_M (ex) = m; + ex->arrays[ORC_VAR_D1] = d1; + ex->params[ORC_VAR_D1] = d1_stride; + ex->arrays[ORC_VAR_S1] = (void *) s1; + ex->params[ORC_VAR_S1] = s1_stride; + ex->params[ORC_VAR_P1] = p1; + + func = c->exec; + func (ex); +} +#endif + + +/* compositor_orc_blend_u16_swap */ +#ifdef DISABLE_ORC +void +compositor_orc_blend_u16_swap (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) +{ + int i; + int j; + orc_union16 *ORC_RESTRICT ptr0; + const orc_union16 *ORC_RESTRICT ptr4; + orc_union16 var35; + orc_union16 var36; + orc_union32 var37; + orc_union16 var38; + orc_union16 var39; + orc_union32 var40; + orc_union16 var41; + orc_union32 var42; + orc_union32 var43; + orc_union32 var44; + orc_union32 var45; + orc_union32 var46; + orc_union32 var47; + orc_union16 var48; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j); + ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j); + + /* 7: loadpl */ + var37.i = p1; + + for (i = 0; i < n; i++) { + /* 0: loadw */ + var35 = ptr0[i]; + /* 1: swapw */ + var39.i = ORC_SWAP_W (var35.i); + /* 2: convuwl */ + var40.i = (orc_uint16) var39.i; + /* 3: loadw */ + var36 = ptr4[i]; + /* 4: swapw */ + var41.i = ORC_SWAP_W (var36.i); + /* 5: convuwl */ + var42.i = (orc_uint16) var41.i; + /* 6: subl */ + var43.i = ((orc_uint32) var42.i) - ((orc_uint32) var40.i); + /* 8: mulll */ + var44.i = (((orc_uint32) var43.i) * ((orc_uint32) var37.i)) & 0xffffffff; + /* 9: shll */ + var45.i = ((orc_uint32) var40.i) << 16; + /* 10: addl */ + var46.i = ((orc_uint32) var45.i) + ((orc_uint32) var44.i); + /* 11: shrul */ + var47.i = ((orc_uint32) var46.i) >> 16; + /* 12: convsuslw */ + var48.i = ORC_CLAMP_UW (var47.i); + /* 13: swapw */ + var38.i = ORC_SWAP_W (var48.i); + /* 14: storew */ + ptr0[i] = var38; + } + } + +} + +#else +static void +_backup_compositor_orc_blend_u16_swap (OrcExecutor * ORC_RESTRICT ex) +{ + int i; + int j; + int n = ex->n; + int m = ex->params[ORC_VAR_A1]; + orc_union16 *ORC_RESTRICT ptr0; + const orc_union16 *ORC_RESTRICT ptr4; + orc_union16 var35; + orc_union16 var36; + orc_union32 var37; + orc_union16 var38; + orc_union16 var39; + orc_union32 var40; + orc_union16 var41; + orc_union32 var42; + orc_union32 var43; + orc_union32 var44; + orc_union32 var45; + orc_union32 var46; + orc_union32 var47; + orc_union16 var48; + + for (j = 0; j < m; j++) { + ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j); + ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j); + + /* 7: loadpl */ + var37.i = ex->params[24]; + + for (i = 0; i < n; i++) { + /* 0: loadw */ + var35 = ptr0[i]; + /* 1: swapw */ + var39.i = ORC_SWAP_W (var35.i); + /* 2: convuwl */ + var40.i = (orc_uint16) var39.i; + /* 3: loadw */ + var36 = ptr4[i]; + /* 4: swapw */ + var41.i = ORC_SWAP_W (var36.i); + /* 5: convuwl */ + var42.i = (orc_uint16) var41.i; + /* 6: subl */ + var43.i = ((orc_uint32) var42.i) - ((orc_uint32) var40.i); + /* 8: mulll */ + var44.i = (((orc_uint32) var43.i) * ((orc_uint32) var37.i)) & 0xffffffff; + /* 9: shll */ + var45.i = ((orc_uint32) var40.i) << 16; + /* 10: addl */ + var46.i = ((orc_uint32) var45.i) + ((orc_uint32) var44.i); + /* 11: shrul */ + var47.i = ((orc_uint32) var46.i) >> 16; + /* 12: convsuslw */ + var48.i = ORC_CLAMP_UW (var47.i); + /* 13: swapw */ + var38.i = ORC_SWAP_W (var48.i); + /* 14: storew */ + ptr0[i] = var38; + } + } + +} + +void +compositor_orc_blend_u16_swap (guint8 * ORC_RESTRICT d1, int d1_stride, + const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m) +{ + OrcExecutor _ex, *ex = &_ex; + static volatile int p_inited = 0; + static OrcCode *c = 0; + void (*func) (OrcExecutor *); + + if (!p_inited) { + orc_once_mutex_lock (); + if (!p_inited) { + OrcProgram *p; + +#if 1 + static const orc_uint8 bc[] = { + 1, 7, 9, 29, 99, 111, 109, 112, 111, 115, 105, 116, 111, 114, 95, 111, + 114, 99, 95, 98, 108, 101, 110, 100, 95, 117, 49, 54, 95, 115, 119, 97, + 112, 11, 2, 2, 12, 2, 2, 14, 1, 16, 0, 0, 0, 16, 2, 20, + 4, 20, 4, 20, 2, 183, 34, 0, 154, 32, 34, 183, 34, 4, 154, 33, + 34, 129, 33, 33, 32, 120, 33, 33, 24, 124, 32, 32, 16, 103, 33, 32, + 33, 126, 33, 33, 16, 166, 34, 33, 183, 0, 34, 2, 0, + }; + p = orc_program_new_from_static_bytecode (bc); + orc_program_set_backup_function (p, + _backup_compositor_orc_blend_u16_swap); +#else + p = orc_program_new (); + orc_program_set_2d (p); + orc_program_set_name (p, "compositor_orc_blend_u16_swap"); + orc_program_set_backup_function (p, + _backup_compositor_orc_blend_u16_swap); + orc_program_add_destination (p, 2, "d1"); + orc_program_add_source (p, 2, "s1"); + orc_program_add_constant (p, 1, 0x00000010, "c1"); + orc_program_add_parameter (p, 2, "p1"); + orc_program_add_temporary (p, 4, "t1"); + orc_program_add_temporary (p, 4, "t2"); + orc_program_add_temporary (p, 2, "t3"); + + orc_program_append_2 (p, "swapw", 0, ORC_VAR_T3, ORC_VAR_D1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T1, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "swapw", 0, ORC_VAR_T3, ORC_VAR_S1, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "convuwl", 0, ORC_VAR_T2, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); + orc_program_append_2 (p, "subl", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1, + ORC_VAR_D1); + orc_program_append_2 (p, "mulll", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1, + ORC_VAR_D1); + orc_program_append_2 (p, "shll", 0, ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "addl", 0, ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2, + ORC_VAR_D1); + orc_program_append_2 (p, "shrul", 0, ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1, + ORC_VAR_D1); + orc_program_append_2 (p, "convsuslw", 0, ORC_VAR_T3, ORC_VAR_T2, + ORC_VAR_D1, ORC_VAR_D1); + orc_program_append_2 (p, "swapw", 0, ORC_VAR_D1, ORC_VAR_T3, ORC_VAR_D1, + ORC_VAR_D1); +#endif + + orc_program_compile (p); + c = orc_program_take_code (p); + orc_program_free (p); + } + p_inited = TRUE; + orc_once_mutex_unlock (); + } + ex->arrays[ORC_VAR_A2] = c; + ex->program = 0; + + ex->n = n; + ORC_EXECUTOR_M (ex) = m; + ex->arrays[ORC_VAR_D1] = d1; + ex->params[ORC_VAR_D1] = d1_stride; + ex->arrays[ORC_VAR_S1] = (void *) s1; + ex->params[ORC_VAR_S1] = s1_stride; + ex->params[ORC_VAR_P1] = p1; + + func = c->exec; + func (ex); +} +#endif + + /* compositor_orc_blend_argb */ #ifdef DISABLE_ORC void diff --git a/subprojects/gst-plugins-base/gst/compositor/compositororc-dist.h b/subprojects/gst-plugins-base/gst/compositor/compositororc-dist.h index 75b6fb76a3..9a991e1344 100644 --- a/subprojects/gst-plugins-base/gst/compositor/compositororc-dist.h +++ b/subprojects/gst-plugins-base/gst/compositor/compositororc-dist.h @@ -1,8 +1,7 @@ /* autogenerated from compositororc.orc */ -#ifndef _COMPOSITORORC_H_ -#define _COMPOSITORORC_H_ +#pragma once #include @@ -82,7 +81,14 @@ typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16 void compositor_orc_splat_u32 (guint32 * ORC_RESTRICT d1, int p1, int n); void compositor_orc_memcpy_u32 (guint32 * ORC_RESTRICT d1, const guint32 * ORC_RESTRICT s1, int n); +void compositor_orc_memset_u16_2d (guint8 * ORC_RESTRICT d1, int d1_stride, int p1, int n, int m); void compositor_orc_blend_u8 (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void compositor_orc_blend_u10 (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void compositor_orc_blend_u12 (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void compositor_orc_blend_u16 (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void compositor_orc_blend_u10_swap (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void compositor_orc_blend_u12_swap (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); +void compositor_orc_blend_u16_swap (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); void compositor_orc_blend_argb (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); void compositor_orc_source_argb (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); void compositor_orc_blend_bgra (guint8 * ORC_RESTRICT d1, int d1_stride, const guint8 * ORC_RESTRICT s1, int s1_stride, int p1, int n, int m); @@ -96,5 +102,3 @@ void compositor_orc_overlay_bgra_addition (guint8 * ORC_RESTRICT d1, int d1_stri } #endif -#endif - diff --git a/subprojects/gst-plugins-base/gst/compositor/compositororc.orc b/subprojects/gst-plugins-base/gst/compositor/compositororc.orc index b91d1acdc5..572f8ae525 100644 --- a/subprojects/gst-plugins-base/gst/compositor/compositororc.orc +++ b/subprojects/gst-plugins-base/gst/compositor/compositororc.orc @@ -10,6 +10,13 @@ copyl d1, p1 copyl d1, s1 +.function compositor_orc_memset_u16_2d +.flags 2d +.dest 2 d1 guint8 +.param 2 p1 + +storew d1, p1 + .function compositor_orc_blend_u8 .flags 2d .dest 1 d1 guint8 @@ -28,6 +35,125 @@ addw t2, t1, t2 shruw t2, t2, c1 convsuswb d1, t2 +.function compositor_orc_blend_u10 +.flags 2d +.dest 2 d1 guint8 +.source 2 s1 guint8 +.param 2 p1 +.temp 4 t1 +.temp 4 t2 +.const 1 c1 10 + +convuwl t1, d1 +convuwl t2, s1 +subl t2, t2, t1 +mulll t2, t2, p1 +shll t1, t1, c1 +addl t2, t1, t2 +shrul t2, t2, c1 +convsuslw d1, t2 + +.function compositor_orc_blend_u12 +.flags 2d +.dest 2 d1 guint8 +.source 2 s1 guint8 +.param 2 p1 +.temp 4 t1 +.temp 4 t2 +.const 1 c1 12 + +convuwl t1, d1 +convuwl t2, s1 +subl t2, t2, t1 +mulll t2, t2, p1 +shll t1, t1, c1 +addl t2, t1, t2 +shrul t2, t2, c1 +convsuslw d1, t2 + +.function compositor_orc_blend_u16 +.flags 2d +.dest 2 d1 guint8 +.source 2 s1 guint8 +.param 2 p1 +.temp 4 t1 +.temp 4 t2 +.const 1 c1 16 + +convuwl t1, d1 +convuwl t2, s1 +subl t2, t2, t1 +mulll t2, t2, p1 +shll t1, t1, c1 +addl t2, t1, t2 +shrul t2, t2, c1 +convsuslw d1, t2 + +.function compositor_orc_blend_u10_swap +.flags 2d +.dest 2 d1 guint8 +.source 2 s1 guint8 +.param 2 p1 +.temp 4 t1 +.temp 4 t2 +.temp 2 t3 +.const 1 c1 10 + +swapw t3 d1 +convuwl t1, t3 +swapw t3 s1 +convuwl t2, t3 +subl t2, t2, t1 +mulll t2, t2, p1 +shll t1, t1, c1 +addl t2, t1, t2 +shrul t2, t2, c1 +convsuslw t3, t2 +swapw d1 t3 + +.function compositor_orc_blend_u12_swap +.flags 2d +.dest 2 d1 guint8 +.source 2 s1 guint8 +.param 2 p1 +.temp 4 t1 +.temp 4 t2 +.temp 2 t3 +.const 1 c1 12 + +swapw t3 d1 +convuwl t1, t3 +swapw t3 s1 +convuwl t2, t3 +subl t2, t2, t1 +mulll t2, t2, p1 +shll t1, t1, c1 +addl t2, t1, t2 +shrul t2, t2, c1 +convsuslw t3, t2 +swapw d1 t3 + +.function compositor_orc_blend_u16_swap +.flags 2d +.dest 2 d1 guint8 +.source 2 s1 guint8 +.param 2 p1 +.temp 4 t1 +.temp 4 t2 +.temp 2 t3 +.const 1 c1 16 + +swapw t3 d1 +convuwl t1, t3 +swapw t3 s1 +convuwl t2, t3 +subl t2, t2, t1 +mulll t2, t2, p1 +shll t1, t1, c1 +addl t2, t1, t2 +shrul t2, t2, c1 +convsuslw t3, t2 +swapw d1 t3 .function compositor_orc_blend_argb .flags 2d