diff --git a/ext/cog/Makefile.am b/ext/cog/Makefile.am index cb3f6349bc..c0a63af0e8 100644 --- a/ext/cog/Makefile.am +++ b/ext/cog/Makefile.am @@ -18,6 +18,7 @@ libgstcog_la_SOURCES = \ cog.h \ cogframe.c \ cogframe.h \ + cogtables.c \ cogutils.h \ cogvirtframe.c \ cogvirtframe.h \ @@ -32,6 +33,10 @@ libgstcog_la_SOURCES = \ gstcolorconvert.c \ gstlogoinsert.c +noinst_PROGRAMS = generate_tables +generate_tables_CFLAGS = $(GST_CFLAGS) +generate_tables_LDADD = $(GST_LIBS) + nodist_libgstcog_la_SOURCES = cogorc.c cogorc.h CLEANFILES = cogorc.c cogorc.h diff --git a/ext/cog/cog.orc b/ext/cog/cog.orc index 29b0c947ba..7aeea8eaed 100644 --- a/ext/cog/cog.orc +++ b/ext/cog/cog.orc @@ -318,6 +318,24 @@ mergebw t2, s2, s3 mergewl d1, t1, t2 +.function cogorc_combine2_u8 +.dest 1 d1 +.source 1 s1 +.source 1 s2 +.param 2 p1 +.param 2 p2 +.temp 2 t1 +.temp 2 t2 + +convubw t1, s1 +mullw t1, t1, p1 +convubw t2, s2 +mullw t2, t2, p2 +addw t1, t1, t2 +shruw t1, t1, 8 +convuuswb d1, t1 + + .function cogorc_combine4_u8 .dest 1 d1 .source 1 s1 diff --git a/ext/cog/cogvirtframe.c b/ext/cog/cogvirtframe.c index 3a9d0c5b19..284e42c7d6 100644 --- a/ext/cog/cogvirtframe.c +++ b/ext/cog/cogvirtframe.c @@ -14,6 +14,7 @@ #include "cogorc.h" +extern gint8 cog_resample_table_4tap[256][4]; CogFrame * cog_frame_new_virtual (CogMemoryDomain * domain, CogFrameFormat format, @@ -499,17 +500,54 @@ cog_virt_frame_new_vert_downsample (CogFrame * vf, int n_taps) } void -get_taps (double *taps, double x) +cog_virt_frame_render_resample_vert_1tap (CogFrame * frame, void *_dest, + int component, int i) { - taps[3] = x * x * (x - 1); - taps[2] = x * (-x * x + x + 1); - x = 1 - x; - taps[1] = x * (-x * x + x + 1); - taps[0] = x * x * (x - 1); + uint8_t *dest = _dest; + uint8_t *src1; + int n_src; + double *scale = (double *) frame->virt_priv; + double x; + int src_i; + + x = (*scale) * i; + src_i = floor (x); + + n_src = frame->virt_frame1->components[component].height; + src1 = cog_virt_frame_get_line (frame->virt_frame1, component, + CLAMP (src_i + 0, 0, n_src - 1)); + + orc_memcpy (dest, src1, frame->components[component].width); } void -cog_virt_frame_render_resample_vert (CogFrame * frame, void *_dest, +cog_virt_frame_render_resample_vert_2tap (CogFrame * frame, void *_dest, + int component, int i) +{ + uint8_t *dest = _dest; + uint8_t *src1; + uint8_t *src2; + int n_src; + double *scale = (double *) frame->virt_priv; + double x; + int src_i; + + x = (*scale) * i; + src_i = floor (x); + x -= floor (x); + + n_src = frame->virt_frame1->components[component].height; + src1 = cog_virt_frame_get_line (frame->virt_frame1, component, + CLAMP (src_i + 0, 0, n_src - 1)); + src2 = cog_virt_frame_get_line (frame->virt_frame1, component, + CLAMP (src_i + 1, 0, n_src - 1)); + + cogorc_combine2_u8 (dest, src1, src2, + rint (256 * (1 - x)), rint (256 * x), frame->components[component].width); +} + +void +cog_virt_frame_render_resample_vert_4tap (CogFrame * frame, void *_dest, int component, int i) { uint8_t *dest = _dest; @@ -518,14 +556,14 @@ cog_virt_frame_render_resample_vert (CogFrame * frame, void *_dest, uint8_t *src3; uint8_t *src4; int n_src; - double taps[4]; double *scale = (double *) frame->virt_priv; double x; int src_i; + int y; x = (*scale) * i; src_i = floor (x); - get_taps (taps, x - floor (x)); + y = 256 * (x - floor (x)); n_src = frame->virt_frame1->components[component].height; src1 = cog_virt_frame_get_line (frame->virt_frame1, component, @@ -538,9 +576,10 @@ cog_virt_frame_render_resample_vert (CogFrame * frame, void *_dest, CLAMP (src_i + 2, 0, n_src - 1)); cogorc_combine4_u8 (dest, src1, src2, src3, src4, - rint (taps[0] * 64.0), rint (taps[1] * 64.0), - rint (taps[2] * 64.0), rint (taps[3] * 64.0), - frame->components[component].width); + cog_resample_table_4tap[y][0], + cog_resample_table_4tap[y][1], + cog_resample_table_4tap[y][2], + cog_resample_table_4tap[y][3], frame->components[component].width); } CogFrame * @@ -548,10 +587,17 @@ cog_virt_frame_new_vert_resample (CogFrame * vf, int height) { CogFrame *virt_frame; double *scale; + int taps = 4; virt_frame = cog_frame_new_virtual (NULL, vf->format, vf->width, height); virt_frame->virt_frame1 = vf; - virt_frame->render_line = cog_virt_frame_render_resample_vert; + if (taps == 1) { + virt_frame->render_line = cog_virt_frame_render_resample_vert_1tap; + } else if (taps == 2) { + virt_frame->render_line = cog_virt_frame_render_resample_vert_2tap; + } else { + virt_frame->render_line = cog_virt_frame_render_resample_vert_4tap; + } scale = malloc (sizeof (double)); virt_frame->virt_priv = scale; @@ -562,34 +608,91 @@ cog_virt_frame_new_vert_resample (CogFrame * vf, int height) } void -cog_virt_frame_render_resample_horiz (CogFrame * frame, void *_dest, +cog_virt_frame_render_resample_horiz_1tap (CogFrame * frame, void *_dest, int component, int i) { uint8_t *dest = _dest; uint8_t *src; int j; int n_src; - double taps[4]; - double *scale = (double *) frame->virt_priv; - int src_i; + int scale = frame->param1; + int acc; n_src = frame->virt_frame1->components[component].width; src = cog_virt_frame_get_line (frame->virt_frame1, component, i); + acc = 0; for (j = 0; j < frame->components[component].width; j++) { - double x; - double y = 0; + dest[j] = src[(acc >> 8)]; + acc += scale; + } +} - x = (*scale) * j; - src_i = floor (x); - get_taps (taps, x - floor (x)); +void +cog_virt_frame_render_resample_horiz_4tap (CogFrame * frame, void *_dest, + int component, int i) +{ + uint8_t *dest = _dest; + uint8_t *src; + int j; + int n_src; + int scale = frame->param1; + int acc; - y = 0; - y += taps[0] * src[CLAMP (src_i - 1, 0, n_src - 1)]; - y += taps[1] * src[CLAMP (src_i + 0, 0, n_src - 1)]; - y += taps[2] * src[CLAMP (src_i + 1, 0, n_src - 1)]; - y += taps[3] * src[CLAMP (src_i + 2, 0, n_src - 1)]; - dest[j] = CLAMP (rint (y), 0, 255); + n_src = frame->virt_frame1->components[component].width; + src = cog_virt_frame_get_line (frame->virt_frame1, component, i); + + acc = 0; + for (j = 0; j < 1; j++) { + int src_i; + int y; + int z; + + src_i = acc >> 8; + y = acc & 255; + + z = 32; + z += cog_resample_table_4tap[y][0] * src[CLAMP (src_i - 1, 0, n_src - 1)]; + z += cog_resample_table_4tap[y][1] * src[CLAMP (src_i + 0, 0, n_src - 1)]; + z += cog_resample_table_4tap[y][2] * src[CLAMP (src_i + 1, 0, n_src - 1)]; + z += cog_resample_table_4tap[y][3] * src[CLAMP (src_i + 2, 0, n_src - 1)]; + z >>= 6; + dest[j] = CLAMP (z, 0, 255); + acc += scale; + } + for (; j < frame->components[component].width - 2; j++) { + int src_i; + int y; + int z; + + src_i = acc >> 8; + y = acc & 255; + + z = 32; + z += cog_resample_table_4tap[y][0] * src[src_i - 1]; + z += cog_resample_table_4tap[y][1] * src[src_i + 0]; + z += cog_resample_table_4tap[y][2] * src[src_i + 1]; + z += cog_resample_table_4tap[y][3] * src[src_i + 2]; + z >>= 6; + dest[j] = CLAMP (z, 0, 255); + acc += scale; + } + for (; j < frame->components[component].width; j++) { + int src_i; + int y; + int z; + + src_i = acc >> 8; + y = acc & 255; + + z = 32; + z += cog_resample_table_4tap[y][0] * src[CLAMP (src_i - 1, 0, n_src - 1)]; + z += cog_resample_table_4tap[y][1] * src[CLAMP (src_i + 0, 0, n_src - 1)]; + z += cog_resample_table_4tap[y][2] * src[CLAMP (src_i + 1, 0, n_src - 1)]; + z += cog_resample_table_4tap[y][3] * src[CLAMP (src_i + 2, 0, n_src - 1)]; + z >>= 6; + dest[j] = CLAMP (z, 0, 255); + acc += scale; } } @@ -598,15 +701,21 @@ cog_virt_frame_new_horiz_resample (CogFrame * vf, int width) { CogFrame *virt_frame; double *scale; + int taps = 4; virt_frame = cog_frame_new_virtual (NULL, vf->format, width, vf->height); virt_frame->virt_frame1 = vf; - virt_frame->render_line = cog_virt_frame_render_resample_horiz; + if (taps == 1) { + virt_frame->render_line = cog_virt_frame_render_resample_horiz_1tap; + } else { + virt_frame->render_line = cog_virt_frame_render_resample_horiz_4tap; + } scale = malloc (sizeof (double)); virt_frame->virt_priv = scale; *scale = (double) vf->width / width; + virt_frame->param1 = 256.0 * (*scale); return virt_frame; }