mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-12-25 09:40:37 +00:00
video-overlay-composition: Optimize premultiplication/unpremultiplication loops
Pull in video frame fields into local variables. Without this the compiler must assume that they could've changed on every use and read them from memory again. This reduces the inner loop from 6 memory reads per pixels to 4, and the number of writes stays at 3.
This commit is contained in:
parent
76aa92be51
commit
f441b5216e
1 changed files with 36 additions and 16 deletions
|
@ -841,12 +841,17 @@ static void
|
|||
gst_video_overlay_rectangle_premultiply_0 (GstVideoFrame * frame)
|
||||
{
|
||||
int i, j;
|
||||
for (j = 0; j < GST_VIDEO_FRAME_HEIGHT (frame); ++j) {
|
||||
int width = GST_VIDEO_FRAME_WIDTH (frame);
|
||||
int height = GST_VIDEO_FRAME_HEIGHT (frame);
|
||||
int stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0);
|
||||
guint8 *data = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
|
||||
|
||||
for (j = 0; j < height; ++j) {
|
||||
guint8 *line;
|
||||
|
||||
line = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
|
||||
line += GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0) * j;
|
||||
for (i = 0; i < GST_VIDEO_FRAME_WIDTH (frame); ++i) {
|
||||
line = data;
|
||||
line += stride * j;
|
||||
for (i = 0; i < width; ++i) {
|
||||
int a = line[0];
|
||||
line[1] = line[1] * a / 255;
|
||||
line[2] = line[2] * a / 255;
|
||||
|
@ -860,12 +865,17 @@ static void
|
|||
gst_video_overlay_rectangle_premultiply_3 (GstVideoFrame * frame)
|
||||
{
|
||||
int i, j;
|
||||
for (j = 0; j < GST_VIDEO_FRAME_HEIGHT (frame); ++j) {
|
||||
int width = GST_VIDEO_FRAME_WIDTH (frame);
|
||||
int height = GST_VIDEO_FRAME_HEIGHT (frame);
|
||||
int stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0);
|
||||
guint8 *data = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
|
||||
|
||||
for (j = 0; j < height; ++j) {
|
||||
guint8 *line;
|
||||
|
||||
line = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
|
||||
line += GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0) * j;
|
||||
for (i = 0; i < GST_VIDEO_FRAME_WIDTH (frame); ++i) {
|
||||
line = data;
|
||||
line += stride * j;
|
||||
for (i = 0; i < width; ++i) {
|
||||
int a = line[3];
|
||||
line[0] = line[0] * a / 255;
|
||||
line[1] = line[1] * a / 255;
|
||||
|
@ -899,12 +909,17 @@ static void
|
|||
gst_video_overlay_rectangle_unpremultiply_0 (GstVideoFrame * frame)
|
||||
{
|
||||
int i, j;
|
||||
for (j = 0; j < GST_VIDEO_FRAME_HEIGHT (frame); ++j) {
|
||||
int width = GST_VIDEO_FRAME_WIDTH (frame);
|
||||
int height = GST_VIDEO_FRAME_HEIGHT (frame);
|
||||
int stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0);
|
||||
guint8 *data = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
|
||||
|
||||
for (j = 0; j < height; ++j) {
|
||||
guint8 *line;
|
||||
|
||||
line = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
|
||||
line += GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0) * j;
|
||||
for (i = 0; i < GST_VIDEO_FRAME_WIDTH (frame); ++i) {
|
||||
line = data;
|
||||
line += stride * j;
|
||||
for (i = 0; i < width; ++i) {
|
||||
int a = line[0];
|
||||
if (a) {
|
||||
line[1] = MIN ((line[1] * 255 + a / 2) / a, 255);
|
||||
|
@ -920,12 +935,17 @@ static void
|
|||
gst_video_overlay_rectangle_unpremultiply_3 (GstVideoFrame * frame)
|
||||
{
|
||||
int i, j;
|
||||
for (j = 0; j < GST_VIDEO_FRAME_HEIGHT (frame); ++j) {
|
||||
int width = GST_VIDEO_FRAME_WIDTH (frame);
|
||||
int height = GST_VIDEO_FRAME_HEIGHT (frame);
|
||||
int stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0);
|
||||
guint8 *data = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
|
||||
|
||||
for (j = 0; j < height; ++j) {
|
||||
guint8 *line;
|
||||
|
||||
line = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
|
||||
line += GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0) * j;
|
||||
for (i = 0; i < GST_VIDEO_FRAME_WIDTH (frame); ++i) {
|
||||
line = data;
|
||||
line += stride * j;
|
||||
for (i = 0; i < width; ++i) {
|
||||
int a = line[3];
|
||||
if (a) {
|
||||
line[0] = MIN ((line[0] * 255 + a / 2) / a, 255);
|
||||
|
|
Loading…
Reference in a new issue