video-overlay-composition: Optimize premultiplication/unpremultiplication loops

Pull in video frame fields into local variables. Without this the
compiler must assume that they could've changed on every use and read
them from memory again.

This reduces the inner loop from 6 memory reads per pixels to 4, and the
number of writes stays at 3.
This commit is contained in:
Sebastian Dröge 2018-12-07 19:13:59 +02:00
parent 76aa92be51
commit f441b5216e

View file

@ -841,12 +841,17 @@ static void
gst_video_overlay_rectangle_premultiply_0 (GstVideoFrame * frame)
{
int i, j;
for (j = 0; j < GST_VIDEO_FRAME_HEIGHT (frame); ++j) {
int width = GST_VIDEO_FRAME_WIDTH (frame);
int height = GST_VIDEO_FRAME_HEIGHT (frame);
int stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0);
guint8 *data = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
for (j = 0; j < height; ++j) {
guint8 *line;
line = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
line += GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0) * j;
for (i = 0; i < GST_VIDEO_FRAME_WIDTH (frame); ++i) {
line = data;
line += stride * j;
for (i = 0; i < width; ++i) {
int a = line[0];
line[1] = line[1] * a / 255;
line[2] = line[2] * a / 255;
@ -860,12 +865,17 @@ static void
gst_video_overlay_rectangle_premultiply_3 (GstVideoFrame * frame)
{
int i, j;
for (j = 0; j < GST_VIDEO_FRAME_HEIGHT (frame); ++j) {
int width = GST_VIDEO_FRAME_WIDTH (frame);
int height = GST_VIDEO_FRAME_HEIGHT (frame);
int stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0);
guint8 *data = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
for (j = 0; j < height; ++j) {
guint8 *line;
line = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
line += GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0) * j;
for (i = 0; i < GST_VIDEO_FRAME_WIDTH (frame); ++i) {
line = data;
line += stride * j;
for (i = 0; i < width; ++i) {
int a = line[3];
line[0] = line[0] * a / 255;
line[1] = line[1] * a / 255;
@ -899,12 +909,17 @@ static void
gst_video_overlay_rectangle_unpremultiply_0 (GstVideoFrame * frame)
{
int i, j;
for (j = 0; j < GST_VIDEO_FRAME_HEIGHT (frame); ++j) {
int width = GST_VIDEO_FRAME_WIDTH (frame);
int height = GST_VIDEO_FRAME_HEIGHT (frame);
int stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0);
guint8 *data = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
for (j = 0; j < height; ++j) {
guint8 *line;
line = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
line += GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0) * j;
for (i = 0; i < GST_VIDEO_FRAME_WIDTH (frame); ++i) {
line = data;
line += stride * j;
for (i = 0; i < width; ++i) {
int a = line[0];
if (a) {
line[1] = MIN ((line[1] * 255 + a / 2) / a, 255);
@ -920,12 +935,17 @@ static void
gst_video_overlay_rectangle_unpremultiply_3 (GstVideoFrame * frame)
{
int i, j;
for (j = 0; j < GST_VIDEO_FRAME_HEIGHT (frame); ++j) {
int width = GST_VIDEO_FRAME_WIDTH (frame);
int height = GST_VIDEO_FRAME_HEIGHT (frame);
int stride = GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0);
guint8 *data = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
for (j = 0; j < height; ++j) {
guint8 *line;
line = GST_VIDEO_FRAME_PLANE_DATA (frame, 0);
line += GST_VIDEO_FRAME_PLANE_STRIDE (frame, 0) * j;
for (i = 0; i < GST_VIDEO_FRAME_WIDTH (frame); ++i) {
line = data;
line += stride * j;
for (i = 0; i < width; ++i) {
int a = line[3];
if (a) {
line[0] = MIN ((line[0] * 255 + a / 2) / a, 255);