bayer2rgb: Support video/x-bayer 10/12/14/16 bit depths

Add support for 10/12/14/16 bit depths . This consists of multiple parts.
First is the parsing of caps, which pulls out the bitness and endianness
from the video/x-bayer format.

Second, gst_bayer2rgb_split_and_upsample_horiz() is split into two similar
functions, one for 8bit bayer handling and another for 16bit bayer handling.
The content is basically identical, except one uses 8bpp and the other 16bpp
inputs and outputs, and they each use different ORC code to match. The 16bpp
variant also handles endian swapping. There is now a wrapper called
gst_bayer2rgb_split_and_upsample_horiz() which selects the correct function
based on bpp from the parser.

Third, gst_bayer2rgb_process() is extended to handle both 8bit and 16bit
bayer data. Yet again there are matching ORC functions to handle the 16bit
data. This time however the 16bit handling of data is slightly special. The
ORC is not able to emit opcodes for 'x2 mergelq', so the trick here is to
store the BG and GR longs into separate 'dtmp' temporary buffer, and then
do one more ORC post-processing step, compensate for the less-than-16bpp
bitness using left shift, and reorder them into the destination frame
using 'mergelq' .

Example usage:
```
$ gst-launch-1.0 videotestsrc ! \
    video/x-bayer,width=512,height=512,format=bggr16le ! \
    bayer2rgb ! \
    video/x-raw,format=RGBA64_LE ! \
    videoconvert ! \
    autovideosink
```

Example usage:
```
$ gst-launch-1.0 videotestsrc ! \
    video/x-raw,width=512,height=512,format=ARGB ! \
    rgb2bayer ! \
    video/x-bayer,format=bggr12le ! \
    bayer2rgb ! \
    video/x-raw,format=RGBA64_LE ! \
    videoconvert ! \
    autovideosink
```

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/4686>
This commit is contained in:
Marek Vasut 2023-05-20 19:33:27 +02:00
parent 9d1a750117
commit 4c92d4096e
5 changed files with 4548 additions and 72 deletions

View file

@ -4012,12 +4012,12 @@
"long-name": "Bayer to RGB decoder for cameras",
"pad-templates": {
"sink": {
"caps": "video/x-bayer:\n format: { bggr, grbg, gbrg, rggb }\n width: [ 1, 2147483647 ]\n height: [ 1, 2147483647 ]\n framerate: [ 0/1, 2147483647/1 ]\n",
"caps": "video/x-bayer:\n format: { bggr, rggb, grbg, gbrg, bggr10le, rggb10le, grbg10le, gbrg10le, bggr10be, rggb10be, grbg10be, gbrg10be, bggr12le, rggb12le, grbg12le, gbrg12le, bggr12be, rggb12be, grbg12be, gbrg12be, bggr14le, rggb14le, grbg14le, gbrg14le, bggr14be, rggb14be, grbg14be, gbrg14be, bggr16le, rggb16le, grbg16le, gbrg16le, bggr16be, rggb16be, grbg16be, gbrg16be }\n width: [ 1, 2147483647 ]\n height: [ 1, 2147483647 ]\n framerate: [ 0/1, 2147483647/1 ]\n",
"direction": "sink",
"presence": "always"
},
"src": {
"caps": "video/x-raw:\n format: { RGBx, xRGB, BGRx, xBGR, RGBA, ARGB, BGRA, ABGR }\n width: [ 1, 2147483647 ]\n height: [ 1, 2147483647 ]\n framerate: [ 0/1, 2147483647/1 ]\n",
"caps": "video/x-raw:\n format: { RGBx, xRGB, BGRx, xBGR, RGBA, ARGB, BGRA, ABGR, RGBA64_LE, ARGB64_LE, BGRA64_LE, ABGR64_LE, RGBA64_BE, ARGB64_BE, BGRA64_BE, ABGR64_BE }\n width: [ 1, 2147483647 ]\n height: [ 1, 2147483647 ]\n framerate: [ 0/1, 2147483647/1 ]\n",
"direction": "src",
"presence": "always"
}

View file

@ -89,6 +89,8 @@
#include "gstbayerelements.h"
#include "gstbayerorc.h"
#define DIV_ROUND_UP(s,v) (((s) + ((v)-1)) / (v))
#define GST_CAT_DEFAULT gst_bayer2rgb_debug
GST_DEBUG_CATEGORY_STATIC (GST_CAT_DEFAULT);
@ -124,6 +126,8 @@ struct _GstBayer2RGB
int g_off; /* offset for green */
int b_off; /* offset for blue */
int format;
int bpp; /* bits per pixel, 8/10/12/14/16 */
int bigendian;
};
struct _GstBayer2RGBClass
@ -131,10 +135,32 @@ struct _GstBayer2RGBClass
GstBaseTransformClass parent;
};
#define SRC_CAPS \
GST_VIDEO_CAPS_MAKE ("{ RGBx, xRGB, BGRx, xBGR, RGBA, ARGB, BGRA, ABGR }")
#define BAYER_CAPS_GEN(mask, bits, endian) \
" "#mask#bits#endian
#define SINK_CAPS "video/x-bayer,format=(string){bggr,grbg,gbrg,rggb}," \
#define BAYER_CAPS_ORD(bits, endian) \
BAYER_CAPS_GEN(bggr, bits, endian)"," \
BAYER_CAPS_GEN(rggb, bits, endian)"," \
BAYER_CAPS_GEN(grbg, bits, endian)"," \
BAYER_CAPS_GEN(gbrg, bits, endian)
#define BAYER_CAPS_BITS(bits) \
BAYER_CAPS_ORD(bits, le)"," \
BAYER_CAPS_ORD(bits, be)
#define BAYER_CAPS_ALL \
BAYER_CAPS_ORD(,)"," \
BAYER_CAPS_BITS(10)"," \
BAYER_CAPS_BITS(12)"," \
BAYER_CAPS_BITS(14)"," \
BAYER_CAPS_BITS(16)
#define SRC_CAPS \
GST_VIDEO_CAPS_MAKE ("{ RGBx, xRGB, BGRx, xBGR, RGBA, ARGB, BGRA, ABGR, " \
"RGBA64_LE, ARGB64_LE, BGRA64_LE, ABGR64_LE, " \
"RGBA64_BE, ARGB64_BE, BGRA64_BE, ABGR64_BE }")
#define SINK_CAPS "video/x-bayer,format=(string){" BAYER_CAPS_ALL " }, "\
"width=(int)[1,MAX],height=(int)[1,MAX],framerate=(fraction)[0/1,MAX]"
enum
@ -252,23 +278,47 @@ gst_bayer2rgb_set_caps (GstBaseTransform * base, GstCaps * incaps,
gst_structure_get_int (structure, "height", &bayer2rgb->height);
format = gst_structure_get_string (structure, "format");
if (g_str_equal (format, "bggr")) {
if (g_str_has_prefix (format, "bggr")) {
bayer2rgb->format = GST_BAYER_2_RGB_FORMAT_BGGR;
} else if (g_str_equal (format, "gbrg")) {
} else if (g_str_has_prefix (format, "gbrg")) {
bayer2rgb->format = GST_BAYER_2_RGB_FORMAT_GBRG;
} else if (g_str_equal (format, "grbg")) {
} else if (g_str_has_prefix (format, "grbg")) {
bayer2rgb->format = GST_BAYER_2_RGB_FORMAT_GRBG;
} else if (g_str_equal (format, "rggb")) {
} else if (g_str_has_prefix (format, "rggb")) {
bayer2rgb->format = GST_BAYER_2_RGB_FORMAT_RGGB;
} else {
return FALSE;
}
if (strlen (format) == 4) { /* 8bit bayer */
bayer2rgb->bpp = 8;
} else if (strlen (format) == 8) { /* 10/12/14/16 le/be bayer */
bayer2rgb->bpp = (gint) g_ascii_strtoull (format + 4, NULL, 10);
if (bayer2rgb->bpp & 1) /* odd bayer2rgb->bpp bayer formats not supported */
return FALSE;
if (bayer2rgb->bpp < 10 || bayer2rgb->bpp > 16) /* bayer 10,12,14,16 only */
return FALSE;
if (g_str_has_suffix (format, "le"))
bayer2rgb->bigendian = 0;
else if (g_str_has_suffix (format, "be"))
bayer2rgb->bigendian = 1;
else
return FALSE;
} else
return FALSE;
/* To cater for different RGB formats, we need to set params for later */
gst_video_info_from_caps (&info, outcaps);
bayer2rgb->r_off = GST_VIDEO_INFO_COMP_OFFSET (&info, 0);
bayer2rgb->g_off = GST_VIDEO_INFO_COMP_OFFSET (&info, 1);
bayer2rgb->b_off = GST_VIDEO_INFO_COMP_OFFSET (&info, 2);
bayer2rgb->r_off =
GST_VIDEO_INFO_COMP_OFFSET (&info,
0) / DIV_ROUND_UP (GST_VIDEO_INFO_COMP_DEPTH (&info, 0), 8);
bayer2rgb->g_off =
GST_VIDEO_INFO_COMP_OFFSET (&info,
1) / DIV_ROUND_UP (GST_VIDEO_INFO_COMP_DEPTH (&info, 1), 8);
bayer2rgb->b_off =
GST_VIDEO_INFO_COMP_OFFSET (&info,
2) / DIV_ROUND_UP (GST_VIDEO_INFO_COMP_DEPTH (&info, 2), 8);
bayer2rgb->info = info;
@ -283,6 +333,8 @@ gst_bayer2rgb_reset (GstBayer2RGB * filter)
filter->r_off = 0;
filter->g_off = 0;
filter->b_off = 0;
filter->bpp = 8;
filter->bigendian = 0;
gst_video_info_init (&filter->info);
}
@ -326,22 +378,25 @@ gst_bayer2rgb_get_unit_size (GstBaseTransform * base, GstCaps * caps,
gsize * size)
{
GstStructure *structure;
GstBayer2RGB *bayer2rgb;
int width;
int height;
const char *name;
structure = gst_caps_get_structure (caps, 0);
bayer2rgb = GST_BAYER2RGB (base);
if (gst_structure_get_int (structure, "width", &width) &&
gst_structure_get_int (structure, "height", &height)) {
name = gst_structure_get_name (structure);
/* Our name must be either video/x-bayer video/x-raw */
if (strcmp (name, "video/x-raw")) {
*size = GST_ROUND_UP_4 (width) * height;
*size =
GST_ROUND_UP_4 (width) * height * DIV_ROUND_UP (bayer2rgb->bpp, 8);
return TRUE;
} else {
/* For output, calculate according to format (always 32 bits) */
*size = width * height * 4;
/* For output, calculate according to format */
*size = width * height * DIV_ROUND_UP (bayer2rgb->bpp, 8);
return TRUE;
}
@ -352,7 +407,7 @@ gst_bayer2rgb_get_unit_size (GstBaseTransform * base, GstCaps * caps,
}
static void
gst_bayer2rgb_split_and_upsample_horiz (guint8 * dest0, guint8 * dest1,
gst_bayer2rgb8_split_and_upsample_horiz (guint8 * dest0, guint8 * dest1,
const guint8 * src, GstBayer2RGB * bayer2rgb)
{
int n = bayer2rgb->width;
@ -433,20 +488,99 @@ gst_bayer2rgb_split_and_upsample_horiz (guint8 * dest0, guint8 * dest1,
}
}
static guint16
gswab16 (guint16 val, guint8 swap)
{
if (swap) {
return GUINT16_FROM_BE (val);
} else {
return val;
}
}
static void
gst_bayer2rgb16_split_and_upsample_horiz (guint16 * dest0, guint16 * dest1,
const guint16 * src, GstBayer2RGB * bayer2rgb)
{
int swap = bayer2rgb->bigendian;
int n = bayer2rgb->width;
int i;
dest0[0] = gswab16 (src[0], swap);
dest1[0] = gswab16 (src[1], swap);
dest0[1] = (gswab16 (src[0], swap) + gswab16 (src[2], swap) + 1) >> 1;
dest1[1] = gswab16 (src[1], swap);
if (swap) {
bayer16_orc_horiz_upsample_be (dest0 + 2, dest1 + 2, src + 1, (n - 4) >> 1);
} else {
bayer16_orc_horiz_upsample_le (dest0 + 2, dest1 + 2, src + 1, (n - 4) >> 1);
}
for (i = n - 2; i < n; i++) {
if ((i & 1) == 0) {
dest0[i] = gswab16 (src[i], swap);
dest1[i] = gswab16 (src[i - 1], swap);
} else {
dest0[i] = gswab16 (src[i - 1], swap);
dest1[i] = gswab16 (src[i], swap);
}
}
}
static void
gst_bayer2rgb_split_and_upsample_horiz (guint8 * dest0, guint8 * dest1,
const guint8 * src, GstBayer2RGB * bayer2rgb)
{
if (bayer2rgb->bpp == 8) {
gst_bayer2rgb8_split_and_upsample_horiz (dest0, dest1, src, bayer2rgb);
} else {
gst_bayer2rgb16_split_and_upsample_horiz ((guint16 *) dest0,
(guint16 *) dest1, (const guint16 *) src, bayer2rgb);
}
}
typedef void (*process_func) (guint8 * d0, const guint8 * s0, const guint8 * s1,
const guint8 * s2, const guint8 * s3, const guint8 * s4, const guint8 * s5,
int n);
typedef void (*process_func16) (guint16 * d0, guint16 * d1, const guint8 * s0,
const guint8 * s1, const guint8 * s2, const guint8 * s3, const guint8 * s4,
const guint8 * s5, int n);
#define LINE(t, x, b) ((t) + (((x) & 7) * ((b)->width * DIV_ROUND_UP((b)->bpp, 8))))
static void
gst_bayer2rgb_process (GstBayer2RGB * bayer2rgb, uint8_t * dest,
int dest_stride, uint8_t * src)
{
const int src_stride = GST_ROUND_UP_4 (bayer2rgb->width);
const int src_stride =
GST_ROUND_UP_4 (bayer2rgb->width) * DIV_ROUND_UP (bayer2rgb->bpp, 8);
const int bayersrc16 = bayer2rgb->bpp > 8;
int j;
guint8 *tmp;
guint32 *dtmp;
process_func merge[2] = { NULL, NULL };
process_func16 merge16[2] = { NULL, NULL };
int r_off, g_off, b_off;
/*
* Handle emission of either RGBA64 or RGBA (32bpp) . The default is
* emission of RGBA64 in case the input bayer data are >8 bit, since
* there is no loss of precision that way.
*
* The emission of RGBA (32bpp) as done here is done by shifting the
* debayered data by the bpp-8 bits right, to fit into the 8 bits per
* channel output buffer. This retains precision during calculation,
* and the calculation is a bit more expensive in terms of CPU cycles
* and memory. An alternative approach would be to downgrade the input
* bayer data in gst_bayer2rgb16_split_and_upsample_horiz() already,
* and then perform this second part of debayering as if those input
* data were 8bpp bayer data. This would increase speed, but decrease
* precision.
*/
const int bayerdst16 = (dest_stride / bayer2rgb->width / 4) == 2;
/* We exploit some symmetry in the functions here. The base functions
* are all named for the BGGR arrangement. For RGGB, we swap the
* red offset and blue offset in the output. For GRBG, we swap the
@ -463,25 +597,38 @@ gst_bayer2rgb_process (GstBayer2RGB * bayer2rgb, uint8_t * dest,
if (r_off == 2 && g_off == 1 && b_off == 0) {
merge[0] = bayer_orc_merge_bg_bgra;
merge[1] = bayer_orc_merge_gr_bgra;
merge16[0] = bayer16_orc_merge_bg_bgra;
merge16[1] = bayer16_orc_merge_gr_bgra;
} else if (r_off == 3 && g_off == 2 && b_off == 1) {
merge[0] = bayer_orc_merge_bg_abgr;
merge[1] = bayer_orc_merge_gr_abgr;
merge16[0] = bayer16_orc_merge_bg_abgr;
merge16[1] = bayer16_orc_merge_gr_abgr;
} else if (r_off == 1 && g_off == 2 && b_off == 3) {
merge[0] = bayer_orc_merge_bg_argb;
merge[1] = bayer_orc_merge_gr_argb;
merge16[0] = bayer16_orc_merge_bg_argb;
merge16[1] = bayer16_orc_merge_gr_argb;
} else if (r_off == 0 && g_off == 1 && b_off == 2) {
merge[0] = bayer_orc_merge_bg_rgba;
merge[1] = bayer_orc_merge_gr_rgba;
merge16[0] = bayer16_orc_merge_bg_rgba;
merge16[1] = bayer16_orc_merge_gr_rgba;
}
if (bayer2rgb->format == GST_BAYER_2_RGB_FORMAT_GRBG ||
bayer2rgb->format == GST_BAYER_2_RGB_FORMAT_GBRG) {
process_func tmp = merge[0];
merge[0] = merge[1];
merge[1] = tmp;
process_func16 tmp16 = merge16[0];
merge16[0] = merge16[1];
merge16[1] = tmp16;
}
tmp = g_malloc (2 * 4 * bayer2rgb->width);
#define LINE(t, x, b) ((t) + (((x) & 7) * ((b)->width)))
tmp = g_malloc (DIV_ROUND_UP (bayer2rgb->bpp, 8) * 2 * 4 * bayer2rgb->width);
if (bayersrc16 || bayerdst16)
dtmp = g_malloc (sizeof (*dtmp) * 2 * bayer2rgb->width);
/* Pre-process source line 1 into bottom two lines 6 and 7 as PREVIOUS line */
gst_bayer2rgb_split_and_upsample_horiz ( /* src line 1 */
@ -571,16 +718,40 @@ gst_bayer2rgb_process (GstBayer2RGB * bayer2rgb, uint8_t * dest,
* inputs from lines 0,1,2,3,4,5 i.e. b0,g0,g1,r1,b2,g2 and the merge
* function would be bayer_orc_merge_gr_* .
*/
merge[j & 1] (dest + j * dest_stride, /* output line j */
LINE (tmp, j * 2 - 2, bayer2rgb), /* PREVIOUS: even: BG g0 , odd: GR b0 */
LINE (tmp, j * 2 - 1, bayer2rgb), /* PREVIOUS: even: BG r0 , odd: GR g0 */
LINE (tmp, j * 2 + 0, bayer2rgb), /* CURRENT: even: BG b1 , odd: GR g1 */
LINE (tmp, j * 2 + 1, bayer2rgb), /* CURRENT: even: BG g1 , odd: GR r1 */
LINE (tmp, j * 2 + 2, bayer2rgb), /* NEXT: even: BG g2 , odd: GR b2 */
LINE (tmp, j * 2 + 3, bayer2rgb), /* NEXT: even: BG r2 , odd: GR g2 */
bayer2rgb->width >> 1);
if (bayersrc16) {
merge16[j & 1] ((guint16 *) dtmp, /* temporary buffer BG */
(guint16 *) (dtmp + bayer2rgb->width), /* temporary buffer GR */
LINE (tmp, j * 2 - 2, bayer2rgb), /* PREVIOUS: even: BG g0 , odd: GR b0 */
LINE (tmp, j * 2 - 1, bayer2rgb), /* PREVIOUS: even: BG r0 , odd: GR g0 */
LINE (tmp, j * 2 + 0, bayer2rgb), /* CURRENT: even: BG b1 , odd: GR g1 */
LINE (tmp, j * 2 + 1, bayer2rgb), /* CURRENT: even: BG g1 , odd: GR r1 */
LINE (tmp, j * 2 + 2, bayer2rgb), /* NEXT: even: BG g2 , odd: GR b2 */
LINE (tmp, j * 2 + 3, bayer2rgb), /* NEXT: even: BG r2 , odd: GR g2 */
bayer2rgb->width >> 1);
if (bayerdst16)
bayer16to16_orc_reorder (dest + j * dest_stride,
dtmp, dtmp + bayer2rgb->width, bayer2rgb->bpp, bayer2rgb->width);
else
bayer16to8_orc_reorder (dest + j * dest_stride,
dtmp, dtmp + bayer2rgb->width, bayer2rgb->bpp - 8,
bayer2rgb->width);
} else {
merge[j & 1] (bayerdst16 ? (guint8 *) dtmp : (dest + j * dest_stride), /* output line j */
LINE (tmp, j * 2 - 2, bayer2rgb), /* PREVIOUS: even: BG g0 , odd: GR b0 */
LINE (tmp, j * 2 - 1, bayer2rgb), /* PREVIOUS: even: BG r0 , odd: GR g0 */
LINE (tmp, j * 2 + 0, bayer2rgb), /* CURRENT: even: BG b1 , odd: GR g1 */
LINE (tmp, j * 2 + 1, bayer2rgb), /* CURRENT: even: BG g1 , odd: GR r1 */
LINE (tmp, j * 2 + 2, bayer2rgb), /* NEXT: even: BG g2 , odd: GR b2 */
LINE (tmp, j * 2 + 3, bayer2rgb), /* NEXT: even: BG r2 , odd: GR g2 */
bayer2rgb->width >> 1);
if (bayerdst16)
bayer8to16_orc_reorder (dest + j * dest_stride, dtmp, bayer2rgb->width);
}
}
if (bayersrc16)
g_free (dtmp);
g_free (tmp);
}

File diff suppressed because it is too large Load diff

View file

@ -1,13 +1,13 @@
/* autogenerated from gstbayerorc.orc */
#ifndef _GSTBAYERORC_H_
#define _GSTBAYERORC_H_
#pragma once
#include <glib.h>
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
@ -16,47 +16,64 @@ extern "C" {
#define _ORC_INTEGER_TYPEDEFS_
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
#include <stdint.h>
typedef int8_t orc_int8;
typedef int16_t orc_int16;
typedef int32_t orc_int32;
typedef int64_t orc_int64;
typedef uint8_t orc_uint8;
typedef uint16_t orc_uint16;
typedef uint32_t orc_uint32;
typedef uint64_t orc_uint64;
typedef int8_t orc_int8;
typedef int16_t orc_int16;
typedef int32_t orc_int32;
typedef int64_t orc_int64;
typedef uint8_t orc_uint8;
typedef uint16_t orc_uint16;
typedef uint32_t orc_uint32;
typedef uint64_t orc_uint64;
#define ORC_UINT64_C(x) UINT64_C(x)
#elif defined(_MSC_VER)
typedef signed __int8 orc_int8;
typedef signed __int16 orc_int16;
typedef signed __int32 orc_int32;
typedef signed __int64 orc_int64;
typedef unsigned __int8 orc_uint8;
typedef unsigned __int16 orc_uint16;
typedef unsigned __int32 orc_uint32;
typedef unsigned __int64 orc_uint64;
typedef signed __int8 orc_int8;
typedef signed __int16 orc_int16;
typedef signed __int32 orc_int32;
typedef signed __int64 orc_int64;
typedef unsigned __int8 orc_uint8;
typedef unsigned __int16 orc_uint16;
typedef unsigned __int32 orc_uint32;
typedef unsigned __int64 orc_uint64;
#define ORC_UINT64_C(x) (x##Ui64)
#define inline __inline
#else
#include <limits.h>
typedef signed char orc_int8;
typedef short orc_int16;
typedef int orc_int32;
typedef unsigned char orc_uint8;
typedef unsigned short orc_uint16;
typedef unsigned int orc_uint32;
typedef signed char orc_int8;
typedef short orc_int16;
typedef int orc_int32;
typedef unsigned char orc_uint8;
typedef unsigned short orc_uint16;
typedef unsigned int orc_uint32;
#if INT_MAX == LONG_MAX
typedef long long orc_int64;
typedef unsigned long long orc_uint64;
typedef long long orc_int64;
typedef unsigned long long orc_uint64;
#define ORC_UINT64_C(x) (x##ULL)
#else
typedef long orc_int64;
typedef unsigned long orc_uint64;
typedef long orc_int64;
typedef unsigned long orc_uint64;
#define ORC_UINT64_C(x) (x##UL)
#endif
#endif
typedef union { orc_int16 i; orc_int8 x2[2]; } orc_union16;
typedef union { orc_int32 i; float f; orc_int16 x2[2]; orc_int8 x4[4]; } orc_union32;
typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16 x4[4]; } orc_union64;
typedef union
{
orc_int16 i;
orc_int8 x2[2];
} orc_union16;
typedef union
{
orc_int32 i;
float f;
orc_int16 x2[2];
orc_int8 x4[4];
} orc_union32;
typedef union
{
orc_int64 i;
double f;
orc_int32 x2[2];
float x2f[2];
orc_int16 x4[4];
} orc_union64;
#endif
#ifndef ORC_RESTRICT
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
@ -80,20 +97,95 @@ typedef union { orc_int64 i; double f; orc_int32 x2[2]; float x2f[2]; orc_int16
#endif
#endif
void bayer_orc_horiz_upsample_unaligned (guint8 * ORC_RESTRICT d1, guint8 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1, int n);
void bayer_orc_horiz_upsample (guint8 * ORC_RESTRICT d1, guint8 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1, int n);
void bayer_orc_merge_bg_bgra (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5, const guint8 * ORC_RESTRICT s6, int n);
void bayer_orc_merge_gr_bgra (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5, const guint8 * ORC_RESTRICT s6, int n);
void bayer_orc_merge_bg_abgr (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5, const guint8 * ORC_RESTRICT s6, int n);
void bayer_orc_merge_gr_abgr (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5, const guint8 * ORC_RESTRICT s6, int n);
void bayer_orc_merge_bg_rgba (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5, const guint8 * ORC_RESTRICT s6, int n);
void bayer_orc_merge_gr_rgba (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5, const guint8 * ORC_RESTRICT s6, int n);
void bayer_orc_merge_bg_argb (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5, const guint8 * ORC_RESTRICT s6, int n);
void bayer_orc_merge_gr_argb (guint8 * ORC_RESTRICT d1, const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5, const guint8 * ORC_RESTRICT s6, int n);
void bayer_orc_horiz_upsample_unaligned (guint8 * ORC_RESTRICT d1,
guint8 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1, int n);
void bayer_orc_horiz_upsample (guint8 * ORC_RESTRICT d1,
guint8 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1, int n);
void bayer_orc_merge_bg_bgra (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4,
const guint8 * ORC_RESTRICT s5, const guint8 * ORC_RESTRICT s6, int n);
void bayer_orc_merge_gr_bgra (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4,
const guint8 * ORC_RESTRICT s5, const guint8 * ORC_RESTRICT s6, int n);
void bayer_orc_merge_bg_abgr (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4,
const guint8 * ORC_RESTRICT s5, const guint8 * ORC_RESTRICT s6, int n);
void bayer_orc_merge_gr_abgr (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4,
const guint8 * ORC_RESTRICT s5, const guint8 * ORC_RESTRICT s6, int n);
void bayer_orc_merge_bg_rgba (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4,
const guint8 * ORC_RESTRICT s5, const guint8 * ORC_RESTRICT s6, int n);
void bayer_orc_merge_gr_rgba (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4,
const guint8 * ORC_RESTRICT s5, const guint8 * ORC_RESTRICT s6, int n);
void bayer_orc_merge_bg_argb (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4,
const guint8 * ORC_RESTRICT s5, const guint8 * ORC_RESTRICT s6, int n);
void bayer_orc_merge_gr_argb (guint8 * ORC_RESTRICT d1,
const guint8 * ORC_RESTRICT s1, const guint8 * ORC_RESTRICT s2,
const guint8 * ORC_RESTRICT s3, const guint8 * ORC_RESTRICT s4,
const guint8 * ORC_RESTRICT s5, const guint8 * ORC_RESTRICT s6, int n);
void bayer16_orc_horiz_upsample_le (guint16 * ORC_RESTRICT d1,
guint16 * ORC_RESTRICT d2, const guint16 * ORC_RESTRICT s1, int n);
void bayer16_orc_horiz_upsample_be (guint16 * ORC_RESTRICT d1,
guint16 * ORC_RESTRICT d2, const guint16 * ORC_RESTRICT s1, int n);
void bayer16_orc_merge_bg_bgra (guint16 * ORC_RESTRICT d1,
guint16 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1,
const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3,
const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5,
const guint8 * ORC_RESTRICT s6, int n);
void bayer16_orc_merge_gr_bgra (guint16 * ORC_RESTRICT d1,
guint16 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1,
const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3,
const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5,
const guint8 * ORC_RESTRICT s6, int n);
void bayer16_orc_merge_bg_abgr (guint16 * ORC_RESTRICT d1,
guint16 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1,
const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3,
const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5,
const guint8 * ORC_RESTRICT s6, int n);
void bayer16_orc_merge_gr_abgr (guint16 * ORC_RESTRICT d1,
guint16 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1,
const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3,
const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5,
const guint8 * ORC_RESTRICT s6, int n);
void bayer16_orc_merge_bg_rgba (guint16 * ORC_RESTRICT d1,
guint16 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1,
const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3,
const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5,
const guint8 * ORC_RESTRICT s6, int n);
void bayer16_orc_merge_gr_rgba (guint16 * ORC_RESTRICT d1,
guint16 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1,
const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3,
const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5,
const guint8 * ORC_RESTRICT s6, int n);
void bayer16_orc_merge_bg_argb (guint16 * ORC_RESTRICT d1,
guint16 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1,
const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3,
const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5,
const guint8 * ORC_RESTRICT s6, int n);
void bayer16_orc_merge_gr_argb (guint16 * ORC_RESTRICT d1,
guint16 * ORC_RESTRICT d2, const guint8 * ORC_RESTRICT s1,
const guint8 * ORC_RESTRICT s2, const guint8 * ORC_RESTRICT s3,
const guint8 * ORC_RESTRICT s4, const guint8 * ORC_RESTRICT s5,
const guint8 * ORC_RESTRICT s6, int n);
void bayer16to16_orc_reorder (guint8 * ORC_RESTRICT d1,
const guint32 * ORC_RESTRICT s1, const guint32 * ORC_RESTRICT s2, int p1,
int n);
void bayer16to8_orc_reorder (guint8 * ORC_RESTRICT d1,
const guint32 * ORC_RESTRICT s1, const guint32 * ORC_RESTRICT s2, int p1,
int n);
void bayer8to16_orc_reorder (guint8 * ORC_RESTRICT d1,
const guint32 * ORC_RESTRICT s1, int n);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -248,3 +248,283 @@ x2 mergebw gb, g, b
x2 mergewl d, ar, gb
# 10..16 bit bayer handling
.function bayer16_orc_horiz_upsample_le
.dest 4 d0 guint16
.dest 4 d1 guint16
.source 4 s guint16
.temp 4 t
.temp 2 b
.temp 2 c
.temp 2 d
.temp 2 e
splitlw c, b, s
loadoffl t, s, 1
splitlw e, d, t
avguw e, c, e
mergewl d0, c, e
avguw b, b, d
mergewl d1, b, d
.function bayer16_orc_horiz_upsample_be
.dest 4 d0 guint16
.dest 4 d1 guint16
.source 4 s guint16
.temp 4 t
.temp 2 b
.temp 2 c
.temp 2 d
.temp 2 e
splitlw c, b, s
swapw b, b
swapw c, c
loadoffl t, s, 1
splitlw e, d, t
swapw d, d
swapw e, e
avguw e, c, e
mergewl d0, c, e
avguw b, b, d
mergewl d1, b, d
.function bayer16_orc_merge_bg_bgra
.dest 8 d1 guint16
.dest 8 d2 guint16
.source 4 g0 guint8
.source 4 r0 guint8
.source 4 b1 guint8
.source 4 g1 guint8
.source 4 g2 guint8
.source 4 r2 guint8
.temp 4 r
.temp 4 g
.temp 4 t
x2 avguw r, r0, r2
x2 avguw g, g0, g2
copyl t, g1
x2 avguw g, g, t
andl g, g, 65535
andl t, t, 4294901760
orl g, t, g
x2 mergewl d1, b1, g
x2 mergewl d2, r, 65535
.function bayer16_orc_merge_gr_bgra
.dest 8 d1 guint16
.dest 8 d2 guint16
.source 4 b0 guint8
.source 4 g0 guint8
.source 4 g1 guint8
.source 4 r1 guint8
.source 4 b2 guint8
.source 4 g2 guint8
.temp 4 b
.temp 4 g
.temp 4 t
x2 avguw b, b0, b2
x2 avguw g, g0, g2
copyl t, g1
x2 avguw g, g, t
andl g, g, 4294901760
andl t, t, 65535
orl g, t, g
x2 mergewl d1, b, g
x2 mergewl d2, r1, 65535
.function bayer16_orc_merge_bg_abgr
.dest 8 d1 guint16
.dest 8 d2 guint16
.source 4 g0 guint8
.source 4 r0 guint8
.source 4 b1 guint8
.source 4 g1 guint8
.source 4 g2 guint8
.source 4 r2 guint8
.temp 4 r
.temp 4 g
.temp 4 t
x2 avguw r, r0, r2
x2 avguw g, g0, g2
copyl t, g1
x2 avguw g, g, t
andl g, g, 65535
andl t, t, 4294901760
orl g, t, g
x2 mergewl d1, 65535, b1
x2 mergewl d2, g, r
.function bayer16_orc_merge_gr_abgr
.dest 8 d1 guint16
.dest 8 d2 guint16
.source 4 b0 guint8
.source 4 g0 guint8
.source 4 g1 guint8
.source 4 r1 guint8
.source 4 b2 guint8
.source 4 g2 guint8
.temp 4 b
.temp 4 g
.temp 4 t
x2 avguw b, b0, b2
x2 avguw g, g0, g2
copyl t, g1
x2 avguw g, g, t
andl g, g, 4294901760
andl t, t, 65535
orl g, t, g
x2 mergewl d1, 65535, b
x2 mergewl d2, g, r1
.function bayer16_orc_merge_bg_rgba
.dest 8 d1 guint16
.dest 8 d2 guint16
.source 4 g0 guint8
.source 4 r0 guint8
.source 4 b1 guint8
.source 4 g1 guint8
.source 4 g2 guint8
.source 4 r2 guint8
.temp 4 r
.temp 4 g
.temp 4 t
x2 avguw r, r0, r2
x2 avguw g, g0, g2
copyl t, g1
x2 avguw g, g, t
andl g, g, 65535
andl t, t, 4294901760
orl g, t, g
x2 mergewl d1, r, g
x2 mergewl d2, b1, 65535
.function bayer16_orc_merge_gr_rgba
.dest 8 d1 guint16
.dest 8 d2 guint16
.source 4 b0 guint8
.source 4 g0 guint8
.source 4 g1 guint8
.source 4 r1 guint8
.source 4 b2 guint8
.source 4 g2 guint8
.temp 4 b
.temp 4 g
.temp 4 t
x2 avguw b, b0, b2
x2 avguw g, g0, g2
copyl t, g1
x2 avguw g, g, t
andl g, g, 4294901760
andl t, t, 65535
orl g, t, g
x2 mergewl d1, r1, g
x2 mergewl d2, b, 65535
.function bayer16_orc_merge_bg_argb
.dest 8 d1 guint16
.dest 8 d2 guint16
.source 4 g0 guint8
.source 4 r0 guint8
.source 4 b1 guint8
.source 4 g1 guint8
.source 4 g2 guint8
.source 4 r2 guint8
.temp 4 r
.temp 4 g
.temp 4 t
x2 avguw r, r0, r2
x2 avguw g, g0, g2
copyl t, g1
x2 avguw g, g, t
andl g, g, 65535
andl t, t, 4294901760
orl g, t, g
x2 mergewl d1, 65535, r
x2 mergewl d2, g, b1
.function bayer16_orc_merge_gr_argb
.dest 8 d1 guint16
.dest 8 d2 guint16
.source 4 b0 guint8
.source 4 g0 guint8
.source 4 g1 guint8
.source 4 r1 guint8
.source 4 b2 guint8
.source 4 g2 guint8
.temp 4 b
.temp 4 g
.temp 4 t
x2 avguw b, b0, b2
x2 avguw g, g0, g2
copyl t, g1
x2 avguw g, g, t
andl g, g, 4294901760
andl t, t, 65535
orl g, t, g
x2 mergewl d1, 65535, r1
x2 mergewl d2, g, b
.function bayer16to16_orc_reorder
.dest 8 d guint8
.source 4 s1 guint32
.source 4 s2 guint32
.param 4 shift
.temp 4 u
.temp 4 v
.temp 8 q
x2 muluwl q, s1, 0xffff
x2 shrul q, q, shift
x2 convuuslw u, q
x2 muluwl q, s2, 0xffff
x2 shrul q, q, shift
x2 convuuslw v, q
mergelq d, u, v
.function bayer16to8_orc_reorder
.dest 4 d guint8
.source 4 s1 guint32
.source 4 s2 guint32
.param 4 shift
.temp 2 u
.temp 2 v
.temp 4 l
x2 shruw l, s1, shift
x2 convuuswb u, l
x2 shruw l, s2, shift
x2 convuuswb v, l
mergewl d, u, v
.function bayer8to16_orc_reorder
.dest 8 d guint8
.source 4 s guint32
x4 splatbw d, s