From 45b2be990fed79bfb4c039c743d1a54d0e46f3da Mon Sep 17 00:00:00 2001 From: Helvetix Victorinox <Helvetix@src.gnome.org> Date: Tue, 8 Jul 2003 23:15:16 +0000 Subject: [PATCH] I hate cvs. Re-adding app/composite --- app/composite/Makefile.am | 39 + app/composite/gimp-composite-generic.c | 1153 ++++++++ app/composite/gimp-composite-generic.h | 29 + app/composite/gimp-composite-mmx.c | 2441 +++++++++++++++++ app/composite/gimp-composite-mmx.h | 51 + app/composite/gimp-composite-util.h | 30 + app/composite/gimp-composite.c | 172 ++ app/composite/gimp-composite.h | 182 ++ app/composite/gimp-composite.html | 82 + app/composite/make-gimp-composite-dispatch.py | 460 ++++ app/composite/ns.py | 185 ++ app/composite/tester.c | 466 ++++ 12 files changed, 5290 insertions(+) create mode 100644 app/composite/Makefile.am create mode 100644 app/composite/gimp-composite-generic.c create mode 100644 app/composite/gimp-composite-generic.h create mode 100644 app/composite/gimp-composite-mmx.c create mode 100644 app/composite/gimp-composite-mmx.h create mode 100644 app/composite/gimp-composite-util.h create mode 100644 app/composite/gimp-composite.c create mode 100644 app/composite/gimp-composite.h create mode 100644 app/composite/gimp-composite.html create mode 100755 app/composite/make-gimp-composite-dispatch.py create mode 100755 app/composite/ns.py create mode 100644 app/composite/tester.c diff --git a/app/composite/Makefile.am b/app/composite/Makefile.am new file mode 100644 index 0000000000..09e9a79c17 --- /dev/null +++ b/app/composite/Makefile.am @@ -0,0 +1,39 @@ +## Process this file with automake to produce Makefile.in + +noinst_LIBRARIES = libgimpcomposite.a + +libgimpcomposite_a_sources = \ + gimp-composite.c \ + gimp-composite-generic.c \ + gimp-composite-generic.h \ + gimp-composite.h \ + gimp-composite-mmx.c \ + gimp-composite-mmx.h \ + gimp-composite-util.h + +libgimpcomposite_a_built_sources = gimp-composite-dispatch.c + +libgimpcomposite_a_SOURCES = $(libgimpcomposite_a_built_sources) $(libgimpcomposite_a_sources) + +INCLUDES = \ + -I$(top_srcdir)/app \ + -I$(top_srcdir)/app/composite \ + $(GLIB_CFLAGS) \ + -I$(includedir) + +AM_CPPFLAGS = \ + -DG_LOG_DOMAIN=\"Gimp-Compositing\" \ + @GIMP_THREAD_FLAGS@ \ + @GIMP_MP_FLAGS@ + +AM_CCASFLAGS = \ + -I$(top_builddir) \ + -I$(top_srcdir) \ + -I$(top_srcdir)/app + +EXTRA_DIST = makefile.msc + +gimp-composite.c: gimp-composite-dispatch.c + +gimp-composite-dispatch.c: gimp-composite-generic.o make-gimp-composite-dispatch.py + ./make-gimp-composite-dispatch.py gimp-composite-generic.o > gimp-composite-dispatch.c diff --git a/app/composite/gimp-composite-generic.c b/app/composite/gimp-composite-generic.c new file mode 100644 index 0000000000..69ffed84d9 --- /dev/null +++ b/app/composite/gimp-composite-generic.c @@ -0,0 +1,1153 @@ +/* The GIMP -- an image manipulation program + * Copyright (C) 1995 Spencer Kimball and Peter Mattis + * + * -*- mode: c tab-width: 2; -*- + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + */ + +/* + * This file is supposed to contain the generic (read: C) implementation + * of the pixelfiddeling paint-functions. + */ + +#include <string.h> + +#include "glib/grand.h" +#include "glib/gtypes.h" + +#include "libgimpcolor/gimpcolortypes.h" +#include "libgimpcolor/gimpcolorspace.h" + +#include "gimp-composite.h" + +#define OPAQUE_OPACITY 255 +#define TRANSPARENT_OPACITY 0 + +#define INT_MULT(a,b,t) ((t) = (a) * (b) + 0x80, ((((t) >> 8) + (t)) >> 8)) + +/* This version of INT_MULT3 is very fast, but suffers from some + slight roundoff errors. It returns the correct result 99.987 + percent of the time */ +#define INT_MULT3(a,b,c,t) ((t) = (a) * (b) * (c)+ 0x7F5B, ((((t) >> 7) + (t)) >> 16)) +/* + This version of INT_MULT3 always gives the correct result, but runs at + approximatly one third the speed. */ +/* #define INT_MULT3(a,b,c,t) (((a) * (b) * (c)+ 32512) / 65025.0) + */ + +#define INT_BLEND(a,b,alpha,tmp) (INT_MULT((a)-(b), alpha, tmp) + (b)) + +#define RANDOM_TABLE_SIZE 4096 + +/* A drawable has an alphachannel if contains either 4 or 2 bytes data + * aka GRAYA and RGBA and thus the macro below works. This will have + * to change if we support bigger formats. We'll do it so for now because + * masking is always cheaper than passing parameters over the stack. */ +/* FIXME: Move to a global place */ + +#define HAS_ALPHA(bytes) (~bytes & 1) + + +static guchar add_lut[511]; +static gint32 random_table[RANDOM_TABLE_SIZE]; + +/* + * + * Pixel format type conversion + * + * XXX This implementation will not work for >8 bit colours. + * XXX This implementation is totally wrong. + */ +void +gimp_composite_convert_any_any_any_generic(GimpCompositeContext *ctx) +{ + int i; + int j; + char *D = ctx->D; + char *A = ctx->A; + int bpp_A = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + int bpp_D = gimp_composite_pixel_bpp[ctx->pixelformat_D]; + + for (i = 0; i < ctx->n_pixels; i++) { + for (j = 0; j < bpp_A; j++) { + D[j] = A[j]; + } + D[j] = GIMP_COMPOSITE_ALPHA_OPAQUE; + A += bpp_A; + D += bpp_D; + } +} + +void +gimp_composite_color_any_any_any_generic(guchar * dest, const guchar * color, guint w, guint bytes) +{ + /* dest % bytes and color % bytes must be 0 or we will crash + when bytes = 2 or 4. + Is this safe to assume? Lets find out. + This is 4-7X as fast as the simple version. + */ + +#if defined(sparc) || defined(__sparc__) + guchar c0, c1, c2, c3; +#else + guchar c0, c1, c2; + guint32 *longd, longc; + guint16 *shortd, shortc; +#endif + + switch (bytes) + { + case 1: + memset(dest, *color, w); + break; + + case 2: +#if defined(sparc) || defined(__sparc__) + c0 = color[0]; + c1 = color[1]; + while (w--) + { + dest[0] = c0; + dest[1] = c1; + dest += 2; + } +#else + shortc = ((guint16 *) color)[0]; + shortd = (guint16 *) dest; + while (w--) + { + *shortd = shortc; + shortd++; + } +#endif /* sparc || __sparc__ */ + break; + + case 3: + c0 = color[0]; + c1 = color[1]; + c2 = color[2]; + while (w--) + { + dest[0] = c0; + dest[1] = c1; + dest[2] = c2; + dest += 3; + } + break; + + case 4: +#if defined(sparc) || defined(__sparc__) + c0 = color[0]; + c1 = color[1]; + c2 = color[2]; + c3 = color[3]; + while (w--) + { + dest[0] = c0; + dest[1] = c1; + dest[2] = c2; + dest[3] = c3; + dest += 4; + } +#else + longc = ((guint32 *) color)[0]; + longd = (guint32 *) dest; + while (w--) + { + *longd = longc; + longd++; + } +#endif /* sparc || __sparc__ */ + break; + + default: + while (w--) + { + memcpy(dest, color, bytes); + dest += bytes; + } + } +} + +void +gimp_composite_blend_any_any_any_generic(GimpCompositeContext *ctx) +{ + guchar *src1 = ctx->A; + guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guchar blend = ctx->blend.blend; + guint bytes = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint w = ctx->n_pixels; + guint b; + const guchar blend2 = (255 - blend); + + while (w--) + { + for (b = 0; b < bytes; b++) + dest[b] = (src1[b] * blend2 + src2[b] * blend) / 255; + + src1 += bytes; + src2 += bytes; + dest += bytes; + } +} + + +#if 0 +void +gimp_composite_shade_generic(const guchar *src, guchar *dest, const guchar *col, guchar blend, guint w, guint bytes, guint has_alpha) +{ + const guchar blend2 = (255 - blend); + const guint alpha = (has_alpha) ? bytes - 1 : bytes; + guint b; + + while (w--) + { + for (b = 0; b < alpha; b++) + dest[b] = (src[b] * blend2 + col[b] * blend) / 255; + + if (has_alpha) + dest[alpha] = src[alpha]; /* alpha channel */ + + src += bytes; + dest += bytes; + } +} +#endif + +void +gimp_composite_darken_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = HAS_ALPHA(bytes1); + const guint has_alpha2 = HAS_ALPHA(bytes2); + const guint alpha = (has_alpha1 || has_alpha2) ? MAX(bytes1, bytes2) - 1 : bytes1; + guint b; + guchar s1, s2; + + while (length--) + { + for (b = 0; b < alpha; b++) + { + s1 = src1[b]; + s2 = src2[b]; + dest[b] = (s1 < s2) ? s1 : s2; + } + + if (has_alpha1 && has_alpha2) + dest[alpha] = MIN(src1[alpha], src2[alpha]); + else if (has_alpha2) + dest[alpha] = src2[alpha]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } +} + +void +gimp_composite_lighten_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = HAS_ALPHA(bytes1); + const guint has_alpha2 = HAS_ALPHA(bytes2); + const guint alpha = (has_alpha1 || has_alpha2) ? MAX(bytes1, bytes2) - 1 : bytes1; + guint b; + guchar s1, s2; + + while (length--) + { + for (b = 0; b < alpha; b++) + { + s1 = src1[b]; + s2 = src2[b]; + dest[b] = (s1 < s2) ? s2 : s1; + } + + if (has_alpha1 && has_alpha2) + dest[alpha] = MIN(src1[alpha], src2[alpha]); + else if (has_alpha2) + dest[alpha] = src2[alpha]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } +} + + +void +gimp_composite_hue_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = HAS_ALPHA(bytes1); + const guint has_alpha2 = HAS_ALPHA(bytes2); + guint r1, g1, b1; + guint r2, g2, b2; + + /* assumes inputs are only 4 byte RGBA pixels */ + while (length--) + { + r1 = src1[0]; + g1 = src1[1]; + b1 = src1[2]; + r2 = src2[0]; + g2 = src2[1]; + b2 = src2[2]; + gimp_rgb_to_hsv_int(&r1, &g1, &b1); + gimp_rgb_to_hsv_int(&r2, &g2, &b2); + + r1 = r2; + + /* set the destination */ + gimp_hsv_to_rgb_int(&r1, &g1, &b1); + + dest[0] = r1; + dest[1] = g1; + dest[2] = b1; + + if (has_alpha1 && has_alpha2) + dest[3] = MIN(src1[3], src2[3]); + else if (has_alpha2) + dest[3] = src2[3]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } +} + + +void +gimp_composite_saturation_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = HAS_ALPHA(bytes1); + const guint has_alpha2 = HAS_ALPHA(bytes2); + guint r1, g1, b1; + guint r2, g2, b2; + + /* assumes inputs are only 4 byte RGBA pixels */ + while (length--) + { + r1 = src1[0]; + g1 = src1[1]; + b1 = src1[2]; + r2 = src2[0]; + g2 = src2[1]; + b2 = src2[2]; + gimp_rgb_to_hsv_int(&r1, &g1, &b1); + gimp_rgb_to_hsv_int(&r2, &g2, &b2); + + g1 = g2; + + /* set the destination */ + gimp_hsv_to_rgb_int(&r1, &g1, &b1); + + dest[0] = r1; + dest[1] = g1; + dest[2] = b1; + + if (has_alpha1 && has_alpha2) + dest[3] = MIN(src1[3], src2[3]); + else if (has_alpha2) + dest[3] = src2[3]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } +} + + +void +gimp_composite_value_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = HAS_ALPHA(bytes1); + const guint has_alpha2 = HAS_ALPHA(bytes2); + guint r1, g1, b1; + guint r2, g2, b2; + + /* assumes inputs are only 4 byte RGBA pixels */ + while (length--) + { + r1 = src1[0]; + g1 = src1[1]; + b1 = src1[2]; + r2 = src2[0]; + g2 = src2[1]; + b2 = src2[2]; + gimp_rgb_to_hsv_int(&r1, &g1, &b1); + gimp_rgb_to_hsv_int(&r2, &g2, &b2); + + b1 = b2; + + /* set the destination */ + gimp_hsv_to_rgb_int(&r1, &g1, &b1); + + dest[0] = r1; + dest[1] = g1; + dest[2] = b1; + + if (has_alpha1 && has_alpha2) + dest[3] = MIN(src1[3], src2[3]); + else if (has_alpha2) + dest[3] = src2[3]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } +} + + +void +gimp_composite_color_only_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = HAS_ALPHA(bytes1); + const guint has_alpha2 = HAS_ALPHA(bytes2); + guint r1, g1, b1; + guint r2, g2, b2; + + /* assumes inputs are only 4 byte RGBA pixels */ + while (length--) + { + r1 = src1[0]; + g1 = src1[1]; + b1 = src1[2]; + r2 = src2[0]; + g2 = src2[1]; + b2 = src2[2]; + gimp_rgb_to_hls_int(&r1, &g1, &b1); + gimp_rgb_to_hls_int(&r2, &g2, &b2); + + /* transfer hue and saturation to the source pixel */ + r1 = r2; + b1 = b2; + + /* set the destination */ + gimp_hls_to_rgb_int(&r1, &g1, &b1); + + dest[0] = r1; + dest[1] = g1; + dest[2] = b1; + + if (has_alpha1 && has_alpha2) + dest[3] = MIN(src1[3], src2[3]); + else if (has_alpha2) + dest[3] = src2[3]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } +} + +void +gimp_composite_multiply_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = HAS_ALPHA(bytes1); + const guint has_alpha2 = HAS_ALPHA(bytes2); + const guint alpha = (has_alpha1 || has_alpha2) ? MAX(bytes1, bytes2) - 1 : bytes1; + guint b, tmp; + + if (has_alpha1 && has_alpha2) { + while (length--) + { + for (b = 0; b < alpha; b++) + dest[b] = INT_MULT(src1[b], src2[b], tmp); + + dest[alpha] = MIN(src1[alpha], src2[alpha]); + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } + } else if (has_alpha2) { + while (length--) + { + for (b = 0; b < alpha; b++) + dest[b] = INT_MULT(src1[b], src2[b], tmp); + + dest[alpha] = src2[alpha]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } + } else { + while (length--) + { + for (b = 0; b < alpha; b++) + dest[b] = INT_MULT(src1[b], src2[b], tmp); + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } + } +} + + +void +gimp_composite_divide_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = HAS_ALPHA(bytes1); + const guint has_alpha2 = HAS_ALPHA(bytes2); + const guint alpha = (has_alpha1 || has_alpha2) ? MAX(bytes1, bytes2) - 1 : bytes1; + guint b, result; + + while (length--) + { + for (b = 0; b < alpha; b++) + { + result = ((src1[b] * 256) / (1 + src2[b])); + dest[b] = MIN(result, 255); + } + + if (has_alpha1 && has_alpha2) + dest[alpha] = MIN(src1[alpha], src2[alpha]); + else if (has_alpha2) + dest[alpha] = src2[alpha]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } +} + + +void +gimp_composite_screen_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = HAS_ALPHA(bytes1); + const guint has_alpha2 = HAS_ALPHA(bytes2); + const guint alpha = (has_alpha1 || has_alpha2) ? MAX(bytes1, bytes2) - 1 : bytes1; + guint b, tmp; + + while (length--) + { + for (b = 0; b < alpha; b++) + dest[b] = 255 - INT_MULT((255 - src1[b]), (255 - src2[b]), tmp); + + if (has_alpha1 && has_alpha2) + dest[alpha] = MIN(src1[alpha], src2[alpha]); + else if (has_alpha2) + dest[alpha] = src2[alpha]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } +} + + +void +gimp_composite_overlay_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = HAS_ALPHA(bytes1); + const guint has_alpha2 = HAS_ALPHA(bytes2); + const guint alpha = (has_alpha1 || has_alpha2) ? MAX(bytes1, bytes2) - 1 : bytes1; + guint b, tmp; + + while (length--) + { + for (b = 0; b < alpha; b++) + { + dest[b] = INT_MULT(src1[b], src1[b] + INT_MULT(2 * src2[b], 255 - src1[b], tmp), tmp); + } + + if (has_alpha1 && has_alpha2) + dest[alpha] = MIN(src1[alpha], src2[alpha]); + else if (has_alpha2) + dest[alpha] = src2[alpha]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } +} + + +void +gimp_composite_dodge_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = HAS_ALPHA(bytes1); + const guint has_alpha2 = HAS_ALPHA(bytes2); + const guint alpha = (has_alpha1 || has_alpha2) ? MAX(bytes1, bytes2) - 1 : bytes1; + guint b, tmp; + + while (length--) + { + for (b = 0; b < alpha; b++) + { + tmp = src1[b] << 8; + tmp /= 256 - src2[b]; + dest[b] = (guchar) CLAMP(tmp, 0, 255); + } + + if (has_alpha1 && has_alpha2) + dest[alpha] = MIN(src1[alpha], src2[alpha]); + else if (has_alpha2) + dest[alpha] = src2[alpha]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } +} + + +void +gimp_composite_burn_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = HAS_ALPHA(bytes1); + const guint has_alpha2 = HAS_ALPHA(bytes2); + const guint alpha = (has_alpha1 || has_alpha2) ? MAX(bytes1, bytes2) - 1 : bytes1; + guint b; + + /* FIXME: Is the burn effect supposed to be dependant on the sign of this + * temporary variable? */ + gint tmp; + + while (length--) + { + for (b = 0; b < alpha; b++) + { + tmp = (255 - src1[b]) << 8; + tmp /= src2[b] + 1; + dest[b] = (guchar) CLAMP(255 - tmp, 0, 255); + } + if (has_alpha1 && has_alpha2) + dest[alpha] = MIN(src1[alpha], src2[alpha]); + else if (has_alpha2) + dest[alpha] = src2[alpha]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } +} + + +void +gimp_composite_hardlight_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = HAS_ALPHA(bytes1); + const guint has_alpha2 = HAS_ALPHA(bytes2); + const guint alpha = (has_alpha1 || has_alpha2) ? MAX(bytes1, bytes2) - 1 : bytes1; + guint b, tmp; + + while (length--) + { + for (b = 0; b < alpha; b++) + { + if (src2[b] > 128) + { + tmp = ((gint) 255 - src1[b]) * ((gint) 255 - ((src2[b] - 128) << 1)); + dest[b] = (guchar) CLAMP(255 - (tmp >> 8), 0, 255); + } + else + { + tmp = (gint) src1[b] * ((gint) src2[b] << 1); + dest[b] = (guchar) CLAMP(tmp >> 8, 0, 255); + } + } + + if (has_alpha1 && has_alpha2) + dest[alpha] = MIN(src1[alpha], src2[alpha]); + else if (has_alpha2) + dest[alpha] = src2[alpha]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } +} + + +void +gimp_composite_softlight_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = gimp_composite_pixel_alphap[ctx->pixelformat_A]; + const guint has_alpha2 = gimp_composite_pixel_alphap[ctx->pixelformat_B]; + const guint alpha = (has_alpha1 || has_alpha2) ? MAX(bytes1, bytes2) - 1 : bytes1; + guint b, tmpS, tmpM, tmp1, tmp2, tmp3; + + while (length--) + { + for (b = 0; b < alpha; b++) + { + /* Mix multiply and screen */ + tmpM = INT_MULT(src1[b], src2[b], tmpM); + tmpS = 255 - INT_MULT((255 - src1[b]), (255 - src2[b]), tmp1); + dest[b] = INT_MULT((255 - src1[b]), tmpM, tmp2) + INT_MULT(src1[b], tmpS, tmp3); + } + + if (has_alpha1 && has_alpha2) + dest[alpha] = MIN(src1[alpha], src2[alpha]); + else if (has_alpha2) + dest[alpha] = src2[alpha]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } +} + + +void +gimp_composite_grain_extract_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = gimp_composite_pixel_alphap[ctx->pixelformat_A]; + const guint has_alpha2 = gimp_composite_pixel_alphap[ctx->pixelformat_B]; + const guint alpha = (has_alpha1 || has_alpha2) ? MAX(bytes1, bytes2) - 1 : bytes1; + guint b; + gint diff; + + while (length--) + { + for (b = 0; b < alpha; b++) + { + diff = src1[b] - src2[b] + 128; + dest[b] = (guchar) CLAMP(diff, 0, 255); + } + + if (has_alpha1 && has_alpha2) + dest[alpha] = MIN(src1[alpha], src2[alpha]); + else if (has_alpha2) + dest[alpha] = src2[alpha]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } +} + + +void +gimp_composite_grain_merge_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = HAS_ALPHA(bytes1); + const guint has_alpha2 = HAS_ALPHA(bytes2); + const guint alpha = (has_alpha1 || has_alpha2) ? MAX(bytes1, bytes2) - 1 : bytes1; + guint b; + gint sum; + + while (length--) + { + for (b = 0; b < alpha; b++) + { + /* Add, re-center and clip. */ + sum = src1[b] + src2[b] - 128; + dest[b] = (guchar) CLAMP(sum, 0, 255); + } + + if (has_alpha1 && has_alpha2) + dest[alpha] = MIN(src1[alpha], src2[alpha]); + else if (has_alpha2) + dest[alpha] = src2[alpha]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } +} + +void +gimp_composite_addition_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *A = ctx->A; + const guchar *B = ctx->B; + guchar *D = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = gimp_composite_pixel_alphap[ctx->pixelformat_A]; + const guint has_alpha2 = gimp_composite_pixel_alphap[ctx->pixelformat_B]; + const guint alpha = (has_alpha1 || has_alpha2) ? MAX(bytes1, bytes2) - 1 : bytes1; + guint b; + + if (has_alpha1 && has_alpha2) { + while (length--) + { + for (b = 0; b < alpha; b++) + D[b] = add_lut[A[b] + B[b]]; + D[alpha] = MIN(A[alpha], B[alpha]); + A += bytes1; + B += bytes2; + D += bytes2; + } + } else if (has_alpha2) { + while (length--) + { + for (b = 0; b < alpha; b++) + D[b] = add_lut[A[b] + B[b]]; + D[alpha] = B[alpha]; + A += bytes1; + B += bytes2; + D += bytes2; + } + } else { + while (length--) + { + for (b = 0; b < alpha; b++) + D[b] = add_lut[A[b] + B[b]]; + A += bytes1; + B += bytes2; + D += bytes2; + } + } +} + + +void +gimp_composite_subtract_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = gimp_composite_pixel_alphap[ctx->pixelformat_A]; + const guint has_alpha2 = gimp_composite_pixel_alphap[ctx->pixelformat_B]; + const guint alpha = (has_alpha1 || has_alpha2) ? MAX(bytes1, bytes2) - 1 : bytes1; + guint b; + gint diff; + + while (length--) + { + for (b = 0; b < alpha; b++) + { + diff = src1[b] - src2[b]; + dest[b] = (diff < 0) ? 0 : diff; + } + + if (has_alpha1 && has_alpha2) + dest[alpha] = MIN(src1[alpha], src2[alpha]); + else if (has_alpha2) + dest[alpha] = src2[alpha]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } +} + + +void +gimp_composite_difference_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + const guchar *src2 = ctx->B; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + guint bytes2 = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + const guint has_alpha1 = gimp_composite_pixel_alphap[ctx->pixelformat_A]; + const guint has_alpha2 = gimp_composite_pixel_alphap[ctx->pixelformat_B]; + const guint alpha = (has_alpha1 || has_alpha2) ? MAX(bytes1, bytes2) - 1 : bytes1; + guint b; + gint diff; + + while (length--) + { + for (b = 0; b < alpha; b++) + { + diff = src1[b] - src2[b]; + dest[b] = (diff < 0) ? -diff : diff; + } + + if (has_alpha1 && has_alpha2) + dest[alpha] = MIN(src1[alpha], src2[alpha]); + else if (has_alpha2) + dest[alpha] = src2[alpha]; + + src1 += bytes1; + src2 += bytes2; + dest += bytes2; + } +} + + +void +gimp_composite_dissolve_any_any_any_generic(GimpCompositeContext * ctx) +{ + GRand *gr; + gint alpha; + gint b; + gint combined_opacity; + gint db = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + gint length = ctx->n_pixels; + gint opacity = ctx->dissolve.opacity; + gint sb = gimp_composite_pixel_bpp[ctx->pixelformat_B]; + gint x = ctx->dissolve.x; + gint y = ctx->dissolve.y; + guchar *mask = ctx->M; + gint32 rand_val; + guchar *dest = ctx->D; + guchar *src = ctx->B; + guint has_alpha = gimp_composite_pixel_alpha[ctx->pixelformat_B]; + + /* + * if destination does not have an alpha channel, add one to it. + */ + if (!gimp_composite_pixel_alphap[ctx->pixelformat_D]) { + ctx->pixelformat_D = gimp_composite_pixel_alpha[ctx->pixelformat_D]; + /*gimp_composite_convert_any_any_any_generic(ctx);*/ + } + + gr = g_rand_new_with_seed(random_table[y % RANDOM_TABLE_SIZE]); + + for (b = 0; b < x; b ++) + g_rand_int (gr); + + alpha = db - 1; + + /* + * XXX NB: The mask is assumed to be a linear array of bytes, no + * accounting for the mask being of a particular pixel format. + */ + while (length--) + { + /* preserve the intensity values */ + for (b = 0; b < alpha; b++) + dest[b] = src[b]; + + /* dissolve if random value is > opacity */ + rand_val = g_rand_int_range(gr, 0, 256); + + if (mask) { + if (has_alpha) + combined_opacity = opacity * src[alpha] * (*mask) / (255 * 255); + else + combined_opacity = opacity * (*mask) / 255; + + mask++; + } else { + if (has_alpha) + combined_opacity = opacity * src[alpha] / 255; + else + combined_opacity = opacity; + } + + dest[alpha] = (rand_val > combined_opacity) ? 0 : OPAQUE_OPACITY; + + dest += db; + src += sb; + } + + g_rand_free(gr); + + ctx->combine = gimp_composite_pixel_alphap[ctx->pixelformat_A] ? COMBINE_INTEN_A_INTEN_A : COMBINE_INTEN_INTEN_A; +} + +void +gimp_composite_replace_any_any_any_generic(GimpCompositeContext *ctx) +{ + ctx->D = ctx->B; + ctx->combine = REPLACE_INTEN; +} + + +void +gimp_composite_swap_any_any_any_generic(GimpCompositeContext * ctx) +{ + guint length; + guchar *src = ctx->A; + guchar *dest = ctx->B; + guint bytes1 = gimp_composite_pixel_bpp[ctx->pixelformat_A]; + length = ctx->n_pixels * bytes1; + + while (length--) + { + *src = *src ^ *dest; + *dest = *dest ^ *src; + *src = *src ^ *dest; + src++; + dest++; + } +} + +void +gimp_composite_normal_any_any_any_generic(GimpCompositeContext * ctx) +{ + ctx->D = ctx->B; +} + + +void +gimp_composite_normal_rgba8_any_any_generic(GimpCompositeContext * ctx) +{ + ctx->D = ctx->B; +} + + +void +gimp_composite_erase_rgba8_any_any_generic(GimpCompositeContext *ctx) +{ + ctx->D = ctx->B; + ctx->combine = (gimp_composite_pixel_alphap[ctx->pixelformat_A] && gimp_composite_pixel_alphap[ctx->pixelformat_B]) ? ERASE_INTEN : 0; +} + +void +gimp_composite_anti_erase_any_any_any_generic(GimpCompositeContext *ctx) +{ + ctx->D = ctx->B; + ctx->combine = (gimp_composite_pixel_alphap[ctx->pixelformat_A] && gimp_composite_pixel_alphap[ctx->pixelformat_B]) ? ANTI_ERASE_INTEN : 0; +} + +void +gimp_composite_color_erase_any_any_any_generic(GimpCompositeContext *ctx) +{ + ctx->D = ctx->B; + ctx->combine = (gimp_composite_pixel_alphap[ctx->pixelformat_A] && gimp_composite_pixel_alphap[ctx->pixelformat_B]) ? COLOR_ERASE_INTEN : 0; +} + + +void +gimp_composite_scale_any_any_any_generic(GimpCompositeContext * ctx) +{ + const guchar *src1 = ctx->A; + guchar *dest = ctx->D; + guint length = ctx->n_pixels; + guint bytes1 = (ctx->pixelformat_A == GIMP_PIXELFORMAT_V8) ? 1 + : (ctx->pixelformat_A == GIMP_PIXELFORMAT_VA8) ? 2 + : (ctx->pixelformat_A == GIMP_PIXELFORMAT_RGB8) ? 3 : (ctx->pixelformat_A == GIMP_PIXELFORMAT_RGBA8) ? 4 : 0; + gint tmp; + + length = ctx->n_pixels * bytes1; + + while (length--) + { + *dest++ = (guchar) INT_MULT(*src1, ctx->scale.scale, tmp); + src1++; + } +} + +void +gimp_composite_generic_init() +{ + guint i; + GRand *gr; +#define RANDOM_SEED 314159265 + + /* generate a table of random seeds */ + gr = g_rand_new_with_seed(RANDOM_SEED); + + for (i = 0; i < RANDOM_TABLE_SIZE; i++) + random_table[i] = g_rand_int(gr); + + for (i = 0; i < 256; i++) + add_lut[i] = i; + + for (i = 256; i <= 510; i++) + add_lut[i] = 255; +} diff --git a/app/composite/gimp-composite-generic.h b/app/composite/gimp-composite-generic.h new file mode 100644 index 0000000000..ac9baac73d --- /dev/null +++ b/app/composite/gimp-composite-generic.h @@ -0,0 +1,29 @@ + +extern void gimp_composite_color_generic (GimpCompositeContext *); +extern void gimp_composite_blend_pixels (GimpCompositeContext *); +extern void gimp_composite_shade_generic (GimpCompositeContext *); +extern void gimp_composite_darken_generic(GimpCompositeContext *); +extern void gimp_composite_lighten_generic(GimpCompositeContext *); +extern void gimp_composite_hue_only_generic (GimpCompositeContext *); +extern void gimp_composite_saturation_generic (GimpCompositeContext *); +extern void gimp_composite_value_generic(GimpCompositeContext *); +extern void gimp_composite_color_only_generic(GimpCompositeContext *); +extern void gimp_composite_multiply_generic(GimpCompositeContext *); +extern void gimp_composite_divide_generic(GimpCompositeContext *); +extern void gimp_composite_screen_generic(GimpCompositeContext *); +extern void gimp_composite_overlay_generic(GimpCompositeContext *); +extern void gimp_composite_dodge_generic(GimpCompositeContext *); +extern void gimp_composite_burn_generic (GimpCompositeContext *); +extern void gimp_composite_hardlight_generic(GimpCompositeContext *); +extern void gimp_composite_softlight_generic(GimpCompositeContext *); +extern void gimp_composite_grain_extract_generic(GimpCompositeContext *); +extern void gimp_composite_grain_merge_generic(GimpCompositeContext *); +extern void gimp_composite_addition_generic(GimpCompositeContext *); +extern void gimp_composite_subtract_generic(GimpCompositeContext *); +extern void gimp_composite_difference_generic(GimpCompositeContext *); +extern void gimp_composite_dissolve_generic(GimpCompositeContext *); +extern void gimp_composite_replace_generic(GimpCompositeContext *); +extern void gimp_composite_generic_init(GimpCompositeContext *); +extern void gimp_composite_swap_generic (GimpCompositeContext *); +extern void gimp_composite_scale_generic (GimpCompositeContext *); + diff --git a/app/composite/gimp-composite-mmx.c b/app/composite/gimp-composite-mmx.c new file mode 100644 index 0000000000..d43b7143e3 --- /dev/null +++ b/app/composite/gimp-composite-mmx.c @@ -0,0 +1,2441 @@ +/* The GIMP -- an image manipulation program + * Copyright (C) 1995 Spencer Kimball and Peter Mattis + * + * -*- mode: c tab-width: 2; -*- + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Much of the content of this file are derivative works of David + * Monniaux which are Copyright (C) 1999, 2001 David Monniaux + * Tip-o-the-hat to David for pioneering this effort. + * + * All of these functions use the mmx registers and expect them to + * remain intact across multiple asm() constructs. This may not work + * in the future, if the compiler allocates mmx registers for it's own + * use. XXX + */ + +#include <stdio.h> +#include <sys/types.h> + +#include "gimp-composite.h" +#include "gimp-composite-mmx.h" + +#undef USE_SSE + +#ifdef USE_SSE +#define pminub(src,dst,tmp) "pminub " "%%" #src ", %%" #dst +#define pmaxub(src,dst,tmp) "pmaxub " "%%" #src ", %%" #dst +#else +#define pminub(src,dst,tmp) "movq %%" #dst ", %%" #tmp ";" "psubusb %%" #src ", %%" #tmp ";" "psubb %%" #tmp ", %%" #dst + +#define pmaxub(a,b,tmp) "movq %%" #a ", %%" #tmp ";" "psubusb %%" #b ", %%" #tmp ";" "paddb %%" #tmp ", %%" #b +#endif + + +/* + * "\t" pdivwX(mm4,mm5,mm7) "\n" + * "\tpsrlq $32,%%mm4\n" + * "\tpsrlq $32,%%mm5\n" + * "\t" pdivwX(mm4,mm5,mm5) "\n" + * "\tpsllq $32,%%mm5\n" + * "\tpor %%mm5,%%mm7\n" + */ +/* + * Clobbers eax, ecx edx + */ +/* + * Double-word divide. Adjusted for subsequent unsigned packing + * (high-order bit of each word is cleared) + */ +#define pdivwX(dividend,divisor,quotient) "movd %%" #dividend ",%%eax; " \ + "movd %%" #divisor ",%%ecx; " \ + "xorl %%edx,%%edx; " \ + "divw %%cx; " \ + "roll $16, %%eax; " \ + "roll $16, %%ecx; " \ + "xorl %%edx,%%edx; " \ + "divw %%cx; " \ + "btr $15, %%eax; " \ + "roll $16, %%eax; " \ + "btr $15, %%eax; " \ + "movd %%eax,%%" #quotient ";" + +/* + * Quadword divide. No adjustment for subsequent unsigned packing + * (high-order bit of each word is left alone) + */ +#define pdivwqX(dividend,divisor,quotient) "movd %%" #dividend ",%%eax; " \ + "movd %%" #divisor ",%%ecx; " \ + "xorl %%edx,%%edx; " \ + "divw %%cx; " \ + "roll $16, %%eax; " \ + "roll $16, %%ecx; " \ + "xorl %%edx,%%edx; " \ + "divw %%cx; " \ + "roll $16, %%eax; " \ + "movd %%eax,%%" #quotient "; " \ + "psrlq $32,%%" #dividend ";" \ + "psrlq $32,%%" #divisor ";" \ + "movd %%" #dividend ",%%eax; " \ + "movd %%" #divisor ",%%ecx; " \ + "xorl %%edx,%%edx; " \ + "divw %%cx; " \ + "roll $16, %%eax; " \ + "roll $16, %%ecx; " \ + "xorl %%edx,%%edx; " \ + "divw %%cx; " \ + "roll $16, %%eax; " \ + "movd %%eax,%%" #divisor ";" \ + "psllq $32,%%" #divisor ";" \ + "por %%" #divisor ",%%" #quotient ";" + +/* + * Quadword divide. Adjusted for subsequent unsigned packing + * (high-order bit of each word is cleared) + */ +#define pdivwuqX(dividend,divisor,quotient) \ + pdivwX(dividend,divisor,quotient) \ + "psrlq $32,%%" #dividend ";" \ + "psrlq $32,%%" #divisor ";" \ + pdivwX(dividend,divisor,quotient) \ + "movd %%eax,%%" #divisor ";" \ + "psllq $32,%%" #divisor ";" \ + "por %%" #divisor ",%%" #quotient ";" + +/* equivalent to INT_MULT() macro */ +/* + * opr2 = INT_MULT(opr1, opr2, t) + * + * Operates across quad-words + * Result is left in opr2 + * + * opr1 = opr1 * opr + w128 + */ +#define pmulwX(opr1,opr2,w128) \ + "\tpmullw %%"#opr2", %%"#opr1"; " \ + "\tpaddw %%"#w128", %%"#opr1"; " \ + "\tmovq %%"#opr1", %%"#opr2"; " \ + "\tpsrlw $8, %%"#opr2"; " \ + "\tpaddw %%"#opr1", %%"#opr2"; " \ + "\tpsrlw $8, %%"#opr2"\n" + + + + +#define ASM(x) debug(#x); asm(x) + +#define DEBUG(x) + + +void +debug_display_mmx() +{ +#define mask32(x) ((x)& (unsigned long long) 0xFFFFFFFF) +#define print64(reg) { unsigned long long reg; asm("movq %%" #reg ",%0" : "=m" (reg)); printf(#reg"=%08llx %08llx", mask32(reg>>32), mask32(reg)); } + printf("--------------------------------------------\n"); + print64(mm0); printf(" "); print64(mm1); printf("\n"); + print64(mm2); printf(" "); print64(mm3); printf("\n"); + print64(mm4); printf(" "); print64(mm5); printf("\n"); + print64(mm6); printf(" "); print64(mm7); printf("\n"); + printf("--------------------------------------------\n"); +} + + +unsigned long rgba8_alpha_mask[2] = { 0xFF000000, 0xFF000000 }; +unsigned long rgba8_b1[2] = { 0x01010101, 0x01010101 }; +unsigned long rgba8_b255[2] = { 0xFFFFFFFF, 0xFFFFFFFF }; +unsigned long rgba8_w1[2] = { 0x00010001, 0x00010001 }; +unsigned long rgba8_w128[2] = { 0x00800080, 0x00800080 }; +unsigned long rgba8_w256[2] = { 0x01000100, 0x01000100 }; +unsigned long rgba8_w255[2] = { 0X00FF00FF, 0X00FF00FF }; + +unsigned long va8_alpha_mask[2] = { 0xFF00FF00, 0xFF00FF00 }; +unsigned long va8_b255[2] = { 0xFFFFFFFF, 0xFFFFFFFF }; +unsigned long va8_w1[2] = { 0x00010001, 0x00010001 }; +unsigned long va8_w255[2] = { 0X00FF00FF, 0X00FF00FF }; +/* + * + */ +void +gimp_composite_addition_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0,%%mm0" : : "m" (rgba8_alpha_mask) : "%mm0"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0), %%mm2; addl $8, %0\n" + "\tmovq (%1), %%mm3; addl $8, %1\n" + "\tmovq %%mm2, %%mm4\n" + "\tpaddusb %%mm3, %%mm4\n" + "\tmovq %%mm0, %%mm1\n" + "\tpandn %%mm4, %%mm1\n" + "\t" pminub(mm3, mm2, mm4) "\n" + "\tpand %%mm0, %%mm2\n" + "\tpor %%mm2, %%mm1\n" + "\tmovq %%mm1, (%2); addl $8, %2\n" + : "+r" (op.A), "+r" (op.B), "+r" (op.D) + : /* empty */ + : "0", "1", "2", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); + } + + if (op.n_pixels) { + asm(" movd (%0), %%mm2;\n" + "\tmovd (%1), %%mm3;\n" + "\tmovq %%mm2, %%mm4\n" + "\tpaddusb %%mm3, %%mm4\n" + "\tmovq %%mm0, %%mm1\n" + "\tpandn %%mm4, %%mm1\n" + "\t" pminub(mm3, mm2, mm4) "\n" + "\tpand %%mm0, %%mm2\n" + "\tpor %%mm2, %%mm1\n" + "\tmovd %%mm1, (%2);\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D) + : "0", "1", "2", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); + } + + asm("emms"); +} + +void gimp_composite_burn_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0,%%mm1" + : + : "m" (rgba8_alpha_mask) + : "%mm1"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0),%%mm0; addl $8,%0\n" + "\tmovq (%1),%%mm1; addl $8,%1\n" + + "\tmovq %3,%%mm2\n" + "\tpsubb %%mm0,%%mm2\n" /* mm2 = 255 - A */ + "\tpxor %%mm4,%%mm4\n" + "\tpunpcklbw %%mm2,%%mm4\n" /* mm4 = (255- A) * 256 */ + + "\tmovq %%mm1,%%mm3\n" + "\tpxor %%mm5,%%mm5\n" + "\tpunpcklbw %%mm5,%%mm3\n" + "\tmovq %4,%%mm5\n" + "\tpaddusw %%mm3,%%mm5\n" /* mm5 = B + 1 */ + + "\t" pdivwqX(mm4,mm5,mm7) "\n" + + "\tmovq %3,%%mm2\n" + "\tpsubb %%mm0,%%mm2\n" /* mm2 = 255 - A */ + "\tpxor %%mm4,%%mm4\n" + "\tpunpckhbw %%mm2,%%mm4\n" /* mm4 = (255- A) * 256 */ + + "\tmovq %%mm1,%%mm3\n" + "\tpxor %%mm5,%%mm5\n" + "\tpunpckhbw %%mm5,%%mm3\n" + "\tmovq %4,%%mm5\n" + "\tpaddusw %%mm3,%%mm5\n" /* mm5 = B + 1 */ + "\t" pdivwqX(mm4,mm5,mm6) "\n" + + "\tmovq %5,%%mm4\n" + "\tmovq %%mm4,%%mm5\n" + "\tpsubusw %%mm6,%%mm4\n" + "\tpsubusw %%mm7,%%mm5\n" + + "\tpackuswb %%mm4,%%mm5\n" + + "\t" pminub(mm0,mm1,mm3) "\n" /* mm1 = min(mm0,mm1) clobber mm3 */ + + "\tmovq %6,%%mm7\n" + "\tpand %%mm7,%%mm1\n" /* mm1 = mm7 & alpha_mask */ + + "\tpandn %%mm5,%%mm7\n" /* mm7 = ~mm7 & mm5 */ + "\tpor %%mm1,%%mm7\n" /* mm7 = mm7 | mm1 */ + + "\tmovq %%mm7,(%2); addl $8,%2\n" + : "+r" (op.A), "+r" (op.B), "+r" (op.D) + : "m" (rgba8_b255), "m" (rgba8_w1), "m" (rgba8_w255), "m" (rgba8_alpha_mask) + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0),%%mm0\n" + "\tmovd (%1),%%mm1\n" + + "\tmovq %3,%%mm2\n" + "\tpsubb %%mm0,%%mm2\n" /* mm2 = 255 - A */ + "\tpxor %%mm4,%%mm4\n" + "\tpunpcklbw %%mm2,%%mm4\n" /* mm4 = (255- A) * 256 */ + + "\tmovq %%mm1,%%mm3\n" + "\tpxor %%mm5,%%mm5\n" + "\tpunpcklbw %%mm5,%%mm3\n" + "\tmovq %4,%%mm5\n" + "\tpaddusw %%mm3,%%mm5\n" /* mm5 = B + 1 */ + + "\t" pdivwqX(mm4,mm5,mm7) "\n" + + "\tmovq %3,%%mm2\n" + "\tpsubb %%mm0,%%mm2\n" /* mm2 = 255 - A */ + "\tpxor %%mm4,%%mm4\n" + "\tpunpckhbw %%mm2,%%mm4\n" /* mm4 = (255- A) * 256 */ + + "\tmovq %%mm1,%%mm3\n" + "\tpxor %%mm5,%%mm5\n" + "\tpunpckhbw %%mm5,%%mm3\n" + "\tmovq %4,%%mm5\n" + "\tpaddusw %%mm3,%%mm5\n" /* mm5 = B + 1 */ + "\t" pdivwqX(mm4,mm5,mm6) "\n" + + "\tmovq %5,%%mm4\n" + "\tmovq %%mm4,%%mm5\n" + "\tpsubusw %%mm6,%%mm4\n" + "\tpsubusw %%mm7,%%mm5\n" + + "\tpackuswb %%mm4,%%mm5\n" + + "\t" pminub(mm0,mm1,mm3) "\n" /* mm1 = min(mm0,mm1) clobber mm3 */ + + "\tmovq %6,%%mm7\n" + "\tpand %%mm7,%%mm1\n" /* mm1 = mm7 & alpha_mask */ + + "\tpandn %%mm5,%%mm7\n" /* mm7 = ~mm7 & mm5 */ + "\tpor %%mm1,%%mm7\n" /* mm7 = mm7 | mm1 */ + + "\tmovd %%mm7,(%2)\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D), "m" (rgba8_b255), "m" (rgba8_w1), "m" (rgba8_w255), "m" (rgba8_alpha_mask) + : "0", "1", "2", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); + } + + asm("emms"); +} + +void +xxxgimp_composite_coloronly_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0,%%mm0" : : "m" (rgba8_alpha_mask) : "%mm0"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0), %%mm2; addl $8, %0\n" + "\tmovq (%1), %%mm3; addl $8, %1\n" + + + "\tmovq %%mm1, (%2); addl $8, %2\n" + : "+r" (op.A), "+S" (op.B), "+D" (op.D) + : /* empty */ + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0), %%mm2;\n" + "\tmovd (%1), %%mm3;\n" + + "\tmovd %%mm1, (%2);\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D) + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + asm("emms"); + +} + +void +gimp_composite_darken_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0), %%mm2; addl $8, %0\n" + "\tmovq (%1), %%mm3; addl $8, %1\n" + "\t" pminub(mm3, mm2, mm4) "\n" + "\tmovq %%mm2, (%2); addl $8, %2\n" + : "+r" (op.A), "+S" (op.B), "+D" (op.D) + : /* empty */ + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0), %%mm2;\n" + "\tmovd (%1), %%mm3;\n" + "\t" pminub(mm3, mm2, mm4) "\n" + "\tmovd %%mm2, (%2);\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D) + : "0", "1", "2", "%mm2", "%mm3", "%mm4"); + } + + asm("emms"); +} + +void +gimp_composite_difference_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0,%%mm0" : : "m" (rgba8_alpha_mask) : "%mm0"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0), %%mm2; addl $8, %0\n" + "\tmovq (%1), %%mm3; addl $8, %1\n" + "\tmovq %%mm2, %%mm4\n" + "\tmovq %%mm3, %%mm5\n" + "\tpsubusb %%mm3, %%mm4\n" + "\tpsubusb %%mm2, %%mm5\n" + "\tpaddb %%mm5, %%mm4\n" + "\tmovq %%mm0, %%mm1\n" + "\tpandn %%mm4, %%mm1\n" + "\tpminub %%mm3, %%mm2\n" + "\tpand %%mm0, %%mm2\n" + "\tpor %%mm2, %%mm1\n" + "\tmovq %%mm1, (%2); addl $8, %2\n" + : "+r" (op.A), "+r" (op.B), "+r" (op.D) + : /* empty */ + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0), %%mm2;\n" + "\tmovd (%1), %%mm3;\n" + "\tmovq %%mm2, %%mm4\n" + "\tmovq %%mm3, %%mm5\n" + "\tpsubusb %%mm3, %%mm4\n" + "\tpsubusb %%mm2, %%mm5\n" + "\tpaddb %%mm5, %%mm4\n" + "\tmovq %%mm0, %%mm1\n" + "\tpandn %%mm4, %%mm1\n" + "\tpminub %%mm3, %%mm2\n" + "\tpand %%mm0, %%mm2\n" + "\tpor %%mm2, %%mm1\n" + "\tmovd %%mm1, (%2);\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D) + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + asm("emms"); +} + + +void +xxxgimp_composite_dissolve_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0,%%mm0" : : "m" (rgba8_alpha_mask) : "%mm0"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile ("\tmovq (%0), %%mm2; addl $8, %0\n" + "\tmovq (%1), %%mm3; addl $8, %1\n" + + "\tmovq %%mm1, (%2); addl $8, %2\n" + : "+r" (op.A), "+r" (op.B), "+r" (op.D) + : /* empty */ + : "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + } + + if (op.n_pixels) { + asm volatile ("\tmovd (%0), %%mm2;\n" + "\tmovd (%1), %%mm3;\n" + + "\tmovd %%mm1, (%2);\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D) + : "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + } + + asm("emms"); +} + +void +gimp_composite_divide_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0, %%mm0\n" + "\tmovq %1, %%mm7\n" + : + : "m" (rgba8_alpha_mask), "m" (rgba8_w1) + : "%mm0", "%mm7"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0), %%mm0; addl $8, %0\n" + "\tmovq (%1), %%mm1; addl $8, %1\n" + + "\tpxor %%mm2,%%mm2\n" + "\tpunpcklbw %%mm0,%%mm2\n" /* mm2 = A*256 */ + + "\tmovq %%mm1,%%mm3\n" + "\tpxor %%mm5,%%mm5\n" + "\tpunpcklbw %%mm5,%%mm3\n" + "\tpaddw %%mm7,%%mm3\n" /* mm3 = B+1 */ + + "\t" pdivwuqX(mm2,mm3,mm5) "\n" /* mm5 = (A*256)/(B+1) */ + + "\tpxor %%mm2,%%mm2\n" + "\tpunpckhbw %%mm0,%%mm2\n" /* mm2 = A*256 */ + + "\tmovq %%mm1,%%mm3\n" + "\tpxor %%mm6,%%mm6\n" + "\tpunpckhbw %%mm6,%%mm3\n" + "\tpaddw %%mm7,%%mm3\n" /* mm3 = B+1 */ + + "\t" pdivwuqX(mm2,mm3,mm4) "\n" /* mm4 = (A*256)/(B+1) */ + + "\tpackuswb %%mm4,%%mm5\n" /* expects mm4 and mm5 to be signed values */ + + "\t" pminub(mm0,mm1,mm3) "\n" + "\tmovq %3,%%mm3\n" + "\tmovq %%mm3,%%mm2\n" + + "\tpandn %%mm5,%%mm3\n" + + "\tpand %%mm2,%%mm1\n" + "\tpor %%mm1,%%mm3\n" + + "\tmovq %%mm3,(%2); addl $8, %2\n" + : "+r" (op.A), "+r" (op.B), "+r" (op.D) + : "m" (rgba8_alpha_mask) + : "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0), %%mm0; addl $8, %0\n" + "\tmovd (%1), %%mm1; addl $8, %1\n" + + "\tpxor %%mm2,%%mm2\n" + "\tpunpcklbw %%mm0,%%mm2\n" /* mm2 = A*256 */ + + "\tmovq %%mm1,%%mm3\n" + "\tpxor %%mm5,%%mm5\n" + "\tpunpcklbw %%mm5,%%mm3\n" + "\tpaddw %%mm7,%%mm3\n" /* mm3 = B+1 */ + + "\t" pdivwuqX(mm2,mm3,mm5) "\n" /* mm5 = (A*256)/(B+1) */ + + "\tpxor %%mm2,%%mm2\n" + "\tpunpckhbw %%mm0,%%mm2\n" /* mm2 = A*256 */ + + "\tmovq %%mm1,%%mm3\n" + "\tpxor %%mm6,%%mm6\n" + "\tpunpckhbw %%mm6,%%mm3\n" + "\tpaddw %%mm7,%%mm3\n" /* mm3 = B+1 */ + + "\t" pdivwuqX(mm2,mm3,mm4) "\n" /* mm4 = (A*256)/(B+1) */ + + "\tpackuswb %%mm4,%%mm5\n" /* expects mm4 and mm5 to be signed values */ + + "\t" pminub(mm0,mm1,mm3) "\n" + "\tmovq %3,%%mm3\n" + "\tmovq %%mm3,%%mm2\n" + + "\tpandn %%mm5,%%mm3\n" + + "\tpand %%mm2,%%mm1\n" + "\tpor %%mm1,%%mm3\n" + + "\tmovd %%mm3,(%2); addl $8, %2\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D), "m" (rgba8_alpha_mask) + : "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + } + + asm("emms"); +} + +/* + * (src1[b] << 8) / (256 - src2[b]); + */ +void +gimp_composite_dodge_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0), %%mm0; addl $8, %0\n" + "\tmovq (%1), %%mm1; addl $8, %1\n" + "\tmovq %%mm1, %%mm3\n" + "\tpxor %%mm2, %%mm2\n" + "\tpunpcklbw %%mm2, %%mm3\n" + "\tpunpcklbw %%mm0, %%mm2\n" + + "\tmovq rgba8_w256, %%mm4\n" + "\tpsubw %%mm3, %%mm4\n" + + "\t" pdivwuqX(mm2,mm4,mm5) "\n" + + "\tmovq %%mm1, %%mm3\n" + "\tpxor %%mm2, %%mm2\n" + "\tpunpckhbw %%mm2, %%mm3\n" + "\tpunpckhbw %%mm0, %%mm2\n" + + "\tmovq rgba8_w256, %%mm4\n" + "\tpsubw %%mm3, %%mm4\n" + + "\t" pdivwuqX(mm2,mm4,mm6) "\n" + + "\tpackuswb %%mm6, %%mm5\n" + + "\tmovq rgba8_alpha_mask, %%mm6\n" + "\tmovq %%mm1,%%mm7\n" + "\t" pminub(mm0,mm7,mm2) "\n" + "\tpand %%mm6, %%mm7\n" + "\tpandn %%mm5, %%mm6\n" + + "\tpor %%mm6, %%mm7\n" + + "\tmovq %%mm7, (%2); addl $8, %2\n" + : "+r" (op.A), "+r" (op.B), "+r" (op.D) + : /* empty */ + : "0", "1", "2", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0), %%mm0;\n" + "\tmovq (%1), %%mm1;\n" + "\tmovq %%mm1, %%mm3\n" + "\tpxor %%mm2, %%mm2\n" + "\tpunpcklbw %%mm2, %%mm3\n" + "\tpunpcklbw %%mm0, %%mm2\n" + + "\tmovq rgba8_w256, %%mm4\n" + "\tpsubw %%mm3, %%mm4\n" + + "\t" pdivwuqX(mm2,mm4,mm5) "\n" + + "\tmovq %%mm1, %%mm3\n" + "\tpxor %%mm2, %%mm2\n" + "\tpunpckhbw %%mm2, %%mm3\n" + "\tpunpckhbw %%mm0, %%mm2\n" + + "\tmovq rgba8_w256, %%mm4\n" + "\tpsubw %%mm3, %%mm4\n" + + "\t" pdivwuqX(mm2,mm4,mm6) "\n" + + "\tpackuswb %%mm6, %%mm5\n" + + "\tmovq rgba8_alpha_mask, %%mm6\n" + "\tmovq %%mm1,%%mm7\n" + "\t" pminub(mm0,mm7,mm2) "\n" + "\tpand %%mm6, %%mm7\n" + "\tpandn %%mm5, %%mm6\n" + + "\tpor %%mm6, %%mm7\n" + + "\tmovd %%mm7, (%2);\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D) + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + } + + asm("emms"); +} + +void +gimp_composite_grain_extract_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0,%%mm0" : : "m" (rgba8_alpha_mask) : "%mm0"); + asm("pxor %%mm6,%%mm6" : : : "%mm6"); + asm("movq %0,%%mm7" : : "m" (rgba8_w128) : "%mm7"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0), %%mm2; addl $8, %0\n" + "\tmovq (%1), %%mm3; addl $8, %1\n" + + "\tmovq %%mm2, %%mm4\n" + "\tpunpcklbw %%mm6, %%mm4\n" + "\tmovq %%mm3, %%mm5\n" + "\tpunpcklbw %%mm6, %%mm5\n" + + "\tpsubw %%mm5, %%mm4\n" + "\tpaddw %%mm7, %%mm4\n" + "\tmovq %%mm4, %%mm1\n" + + "\tmovq %%mm2, %%mm4\n" + "\tpunpckhbw %%mm6, %%mm4\n" + "\tmovq %%mm3, %%mm5\n" + "\tpunpckhbw %%mm6, %%mm5\n" + + "\tpsubw %%mm5, %%mm4\n" + "\tpaddw %%mm7, %%mm4\n" + + "\tpackuswb %%mm4, %%mm1\n" + "\tmovq %%mm1, %%mm4\n" + + "\tmovq %%mm0, %%mm1; pandn %%mm4, %%mm1\n" + + "\t" pminub(mm3,mm2,mm4) "\n" + "\tpand %%mm0, %%mm2\n" + + "\tpor %%mm2, %%mm1\n" + "\tmovq %%mm1, (%2); addl $8, %2\n" + : "+r" (op.A), "+r" (op.B), "+r" (op.D) + : /* empty */ + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0), %%mm2;\n" + "\tmovd (%1), %%mm3;\n" + + "\tmovq %%mm2, %%mm4\n" + "\tpunpcklbw %%mm6, %%mm4\n" + "\tmovq %%mm3, %%mm5\n" + "\tpunpcklbw %%mm6, %%mm5\n" + + "\tpsubw %%mm5, %%mm4\n" + "\tpaddw %%mm7, %%mm4\n" + "\tmovq %%mm4, %%mm1\n" + + "\tmovq %%mm2, %%mm4\n" + "\tpunpckhbw %%mm6, %%mm4\n" + "\tmovq %%mm3, %%mm5\n" + "\tpunpckhbw %%mm6, %%mm5\n" + + "\tpsubw %%mm5, %%mm4\n" + "\tpaddw %%mm7, %%mm4\n" + + "\tpackuswb %%mm4, %%mm1\n" + "\tmovq %%mm1, %%mm4\n" + + "\tmovq %%mm0, %%mm1; pandn %%mm4, %%mm1\n" + + "\t" pminub(mm3,mm2,mm4) "\n" + "\tpand %%mm0, %%mm2\n" + + "\tpor %%mm2, %%mm1\n" + "\tmovd %%mm1, (%2);\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D) + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + asm("emms"); + +} + +void +gimp_composite_grain_merge_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0, %%mm0" : : "m" (rgba8_alpha_mask) : "%mm0"); + asm("pxor %%mm6, %%mm6" : : : "%mm6"); + asm("movq %0, %%mm7" : : "m" (rgba8_w128) : "%mm7"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0), %%mm2; addl $8, %0\n" + "\tmovq (%1), %%mm3; addl $8, %1\n" + + "\tmovq %%mm2, %%mm4\n" + "\tpunpcklbw %%mm6, %%mm4\n" + "\tmovq %%mm3, %%mm5\n" + "\tpunpcklbw %%mm6, %%mm5\n" + + "\tpaddw %%mm5, %%mm4\n" + "\tpsubw %%mm7, %%mm4\n" + "\tmovq %%mm4, %%mm1\n" + + "\tmovq %%mm2, %%mm4\n" + "\tpunpckhbw %%mm6, %%mm4\n" + "\tmovq %%mm3, %%mm5\n" + "\tpunpckhbw %%mm6, %%mm5\n" + + "\tpaddw %%mm5, %%mm4\n" + "\tpsubw %%mm7, %%mm4\n" + + "\tpackuswb %%mm4, %%mm1\n" + "\tmovq %%mm1, %%mm4\n" + + "\tmovq %%mm0, %%mm1; pandn %%mm4, %%mm1\n" + + "\t" pminub(mm3,mm2,mm4) "\n" + "\tpand %%mm0, %%mm2\n" + + "\tpor %%mm2, %%mm1\n" + "\tmovq %%mm1, (%2); addl $8, %2\n" + : "+r" (op.A), "+r" (op.B), "+r" (op.D) + : /* empty */ + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0), %%mm2;\n" + "\tmovd (%1), %%mm3;\n" + + "\tmovq %%mm2, %%mm4\n" + "\tpunpcklbw %%mm6, %%mm4\n" + "\tmovq %%mm3, %%mm5\n" + "\tpunpcklbw %%mm6, %%mm5\n" + + "\tpaddw %%mm5, %%mm4\n" + "\tpsubw %%mm7, %%mm4\n" + "\tmovq %%mm4, %%mm1\n" + + "\tmovq %%mm2, %%mm4\n" + "\tpunpckhbw %%mm6, %%mm4\n" + "\tmovq %%mm3, %%mm5\n" + "\tpunpckhbw %%mm6, %%mm5\n" + + "\tpaddw %%mm5, %%mm4\n" + "\tpsubw %%mm7, %%mm4\n" + + "\tpackuswb %%mm4, %%mm1\n" + "\tmovq %%mm1, %%mm4\n" + + "\tmovq %%mm0, %%mm1; pandn %%mm4, %%mm1\n" + + "\t" pminub(mm3,mm2,mm4) "\n" + "\tpand %%mm0, %%mm2\n" + + "\tpor %%mm2, %%mm1\n" + "\tmovd %%mm1, (%2);\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D) + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + asm("emms"); + +} + +void +xxxgimp_composite_hardlight_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0,%%mm0" : : "m" (rgba8_alpha_mask) : "%mm0"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + + } + + if (op.n_pixels) { + + } + + asm("emms"); + +} + +void +xxxgimp_composite_hueonly_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0,%%mm0" : : "m" (rgba8_alpha_mask) : "%mm0"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + + } + + if (op.n_pixels) { + + } + + asm("emms"); +} + +void +gimp_composite_lighten_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0,%%mm0" : : "m" (rgba8_alpha_mask) : "%mm0"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0), %%mm2; addl $8, %0\n" + "\tmovq (%1), %%mm3; addl $8, %1\n" + "\tmovq %%mm2, %%mm4\n" + "\t" pmaxub(mm3,mm4,mm5) "\n" + "\tmovq %%mm0, %%mm1\n" + "\tpandn %%mm4, %%mm1\n" + "\t" pminub(mm2,mm3,mm4) "\n" + "\tpand %%mm0, %%mm3\n" + "\tpor %%mm3, %%mm1\n" + "\tmovq %%mm1, (%2); addl $8, %2\n" + : "+r" (op.A), "+r" (op.B), "+r" (op.D) + : /* empty */ + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0), %%mm2\n" + "\tmovd (%1), %%mm3\n" + "\tmovq %%mm2, %%mm4\n" + "\t" pmaxub(mm3,mm4,mm5) "\n" + + "\tmovq %%mm0, %%mm1\n" + "\tpandn %%mm4, %%mm1\n" + + "\t" pminub(mm2,mm3,mm4) "\n" + + "\tpand %%mm0, %%mm3\n" + "\tpor %%mm3, %%mm1\n" + "\tmovd %%mm1, (%2)\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D) + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + } + + asm("emms"); +} + +void +gimp_composite_multiply_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0,%%mm0" : : "m" (rgba8_alpha_mask) : "%mm0"); + asm("movq %0,%%mm7" : : "m" (rgba8_w128) : "%mm7"); + asm("pxor %%mm6,%%mm6" : : : "%mm6"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0), %%mm2; addl $8, %0\n" + "\tmovq (%1), %%mm3; addl $8, %1\n" + + "\tmovq %%mm2, %%mm1\n" + "\tpunpcklbw %%mm6, %%mm1\n" + "\tmovq %%mm3, %%mm5\n" + "\tpunpcklbw %%mm6, %%mm5\n" + + "\t" pmulwX(mm5,mm1,mm7) "\n" + + "\tmovq %%mm2, %%mm4\n" + "\tpunpckhbw %%mm6, %%mm4\n" + "\tmovq %%mm3, %%mm5\n" + "\tpunpckhbw %%mm6, %%mm5\n" + + "\t" pmulwX(mm5,mm4,mm7) "\n" + + "\tpackuswb %%mm4, %%mm1\n" + + "\tmovq %%mm0, %%mm4\n" + "\tpandn %%mm1, %%mm4\n" + "\tmovq %%mm4, %%mm1\n" + "\t" pminub(mm3,mm2,mm4) "\n" + "\tpand %%mm0, %%mm2\n" + "\tpor %%mm2, %%mm1\n" + + "\tmovq %%mm1, (%2); addl $8, %2\n" + : "+r" (op.A), "+r" (op.B), "+r" (op.D) + : /* empty */ + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0), %%mm2\n" + "\tmovd (%1), %%mm3\n" + + "\tmovq %%mm2, %%mm1\n" + "\tpunpcklbw %%mm6, %%mm1\n" + "\tmovq %%mm3, %%mm5\n" + "\tpunpcklbw %%mm6, %%mm5\n" + + "\t" pmulwX(mm5,mm1,mm7) "\n" + + "\tmovq %%mm2, %%mm4\n" + "\tpunpckhbw %%mm6, %%mm4\n" + "\tmovq %%mm3, %%mm5\n" + "\tpunpckhbw %%mm6, %%mm5\n" + + "\t" pmulwX(mm5,mm4,mm7) "\n" + + "\tpackuswb %%mm4, %%mm1\n" + + "\tmovq %%mm0, %%mm4\n" + "\tpandn %%mm1, %%mm4\n" + "\tmovq %%mm4, %%mm1\n" + "\t" pminub(mm3,mm2,mm4) "\n" + "\tpand %%mm0, %%mm2\n" + "\tpor %%mm2, %%mm1\n" + + "\tmovd %%mm1, (%2)\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D) + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + } + + asm("emms"); +} + +unsigned long rgba8_lower_ff[2] = { 0x00FF00FF, 0x00FF00FF }; + +void +op_overlay() +{ + asm("movq %mm2, %mm1"); + asm("punpcklbw %mm6, %mm1"); + asm("movq %mm3, %mm5"); + asm("punpcklbw %mm6, %mm5"); + asm("pmullw %mm5, %mm1"); + asm("paddw %mm7, %mm1"); + asm("movq %mm1, %mm5"); + asm("psrlw $8, %mm5"); + asm("paddw %mm5, %mm1"); + asm("psrlw $8, %mm1"); + + asm("pcmpeqb %mm4, %mm4"); + asm("psubb %mm2, %mm4"); + asm("punpcklbw %mm6, %mm4"); + asm("pcmpeqb %mm5, %mm5"); + asm("psubb %mm3, %mm5"); + asm("punpcklbw %mm6, %mm5"); + asm("pmullw %mm5, %mm4"); + asm("paddw %mm7, %mm4"); + asm("movq %mm4, %mm5"); + asm("psrlw $8, %mm5"); + asm("paddw %mm5, %mm4"); + asm("psrlw $8, %mm4"); + + asm("movq rgba8_lower_ff, %mm5"); + asm("psubw %mm4, %mm5"); + + asm("psubw %mm1, %mm5"); + asm("movq %mm2, %mm4"); + asm("punpcklbw %mm6, %mm4"); + asm("pmullw %mm4, %mm5"); + asm("paddw %mm7, %mm5"); + asm("movq %mm5, %mm4"); + asm("psrlw $8, %mm4"); + asm("paddw %mm4, %mm5"); + asm("psrlw $8, %mm5"); + asm("paddw %mm1, %mm5"); + + asm("subl $8, %esp"); + asm("movq %mm5, (%esp)"); + + asm("movq %mm2, %mm1"); + asm("punpckhbw %mm6, %mm1"); + asm("movq %mm3, %mm5"); + asm("punpckhbw %mm6, %mm5"); + asm("pmullw %mm5, %mm1"); + asm("paddw %mm7, %mm1"); + asm("movq %mm1, %mm5"); + asm("psrlw $8, %mm5"); + asm("paddw %mm5, %mm1"); + asm("psrlw $8, %mm1"); + + asm("pcmpeqb %mm4, %mm4"); + asm("psubb %mm2, %mm4"); + asm("punpckhbw %mm6, %mm4"); + asm("pcmpeqb %mm5, %mm5"); + asm("psubb %mm3, %mm5"); + asm("punpckhbw %mm6, %mm5"); + asm("pmullw %mm5, %mm4"); + asm("paddw %mm7, %mm4"); + asm("movq %mm4, %mm5"); + asm("psrlw $8, %mm5"); + asm("paddw %mm5, %mm4"); + asm("psrlw $8, %mm4"); + + asm("movq rgba8_lower_ff, %mm5"); + asm("psubw %mm4, %mm5"); + + asm("psubw %mm1, %mm5"); + asm("movq %mm2, %mm4"); + asm("punpckhbw %mm6, %mm4"); + asm("pmullw %mm4, %mm5"); + asm("paddw %mm7, %mm5"); + asm("movq %mm5, %mm4"); + asm("psrlw $8, %mm4"); + asm("paddw %mm4, %mm5"); + asm("psrlw $8, %mm5"); + asm("paddw %mm1, %mm5"); + + asm("movq (%esp), %mm4"); + asm("addl $8, %esp"); + + asm("packuswb %mm5, %mm4"); + asm("movq %mm0, %mm1"); + asm("pandn %mm4, %mm1"); + + asm("movq %mm2, %mm4"); + asm("psubusb %mm3, %mm4"); + asm("psubb %mm4, %mm2"); + asm("pand %mm0, %mm2"); + asm("por %mm2, %mm1"); +} + +void +gimp_composite_overlay_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0,%%mm0" : : "m" (rgba8_alpha_mask) : "%mm0"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0), %%mm2; addl $8, %0\n" + "\tmovq (%1), %%mm3; addl $8, %1\n" + + "\tcall op_overlay\n" + + "\tmovq %%mm1, (%2); addl $8, %2\n" + : "+r" (op.A), "+S" (op.B), "+D" (op.D) + : /* empty */ + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0), %%mm2;\n" + "\tmovd (%1), %%mm3;\n" + + "\tcall op_overlay\n" + + "\tmovd %%mm1, (%2);\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D) + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + asm("emms"); +} + +void +xxxgimp_composite_saturationonly_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0,%%mm0" : : "m" (rgba8_alpha_mask) : "%mm0"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0), %%mm2; addl $8, %0\n" + "\tmovq (%1), %%mm3; addl $8, %1\n" + + + "\tmovq %%mm1, (%2); addl $8, %2\n" + : "+r" (op.A), "+S" (op.B), "+D" (op.D) + : /* empty */ + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0), %%mm2;\n" + "\tmovd (%1), %%mm3;\n" + + "\tmovd %%mm1, (%2);\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D) + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + asm("emms"); +} + +void +gimp_composite_scale_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm volatile ("pxor %%mm0,%%mm0\n" + "\tmovl %0,%%eax\n" + "\tmovl %%eax,%%ebx\n" + "\tshl $16,%%ebx\n" + "\torl %%ebx,%%eax\n" + "\tmovd %%eax,%%mm5\n" + "\tmovd %%eax,%%mm3\n" + "\tpsllq $32,%%mm5\n" + "\tpor %%mm5,%%mm3\n" + "\tmovq %1,%%mm7\n" + : /* empty */ + : "m" (op.scale.scale), "m" (rgba8_w128) + : "%eax", "%mm0", "%mm5", "%mm6", "%mm7"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile ("movq (%0),%%mm2; addl $8,%0\n" + "\tmovq %%mm2,%%mm1\n" + "\tpunpcklbw %%mm0,%%mm1\n" + "\tmovq %%mm3,%%mm5\n" + + "\t" pmulwX(mm5,mm1,mm7) "\n" + + "\tmovq %%mm2,%%mm4\n" + "\tpunpckhbw %%mm0,%%mm4\n" + "\tmovq %%mm3,%%mm5\n" + + "\t" pmulwX(mm5,mm4,mm7) "\n" + + "\tpackuswb %%mm4,%%mm1\n" + + "\tmovq %%mm1,(%1); addl $8,%1\n" + : "+r" (op.A), "+r" (op.D) + : /* empty */ + : "0", "1", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); + + } + + if (op.n_pixels) { + asm volatile ("movd (%0), %%mm2\n" + "\tmovq %%mm2,%%mm1\n" + "\tpunpcklbw %%mm0,%%mm1\n" + "\tmovq %%mm3,%%mm5\n" + + "\t" pmulwX(mm5,mm1,mm7) "\n" + + "\tpackuswb %%mm0,%%mm1\n" + "\tmovd %%mm1,(%1)\n" + : /* empty */ + : "r" (op.A), "r" (op.D) + : "0", "1", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); + } + + asm("emms"); +} + +void +gimp_composite_screen_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0,%%mm0" : : "m" (rgba8_alpha_mask) : "%mm0"); + asm("movq %0,%%mm7" : : "m" (rgba8_w128) : "%mm7"); + asm("pxor %mm6, %mm6"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0), %%mm2; addl $8, %0\n" + "\tmovq (%1), %%mm3; addl $8, %1\n" + + "\tpcmpeqb %%mm4, %%mm4\n" + "\tpsubb %%mm2, %%mm4\n" + "\tpcmpeqb %%mm5, %%mm5\n" + "\tpsubb %%mm3, %%mm5\n" + + "\tpunpcklbw %%mm6, %%mm4\n" + "\tpunpcklbw %%mm6, %%mm5\n" + "\tpmullw %%mm4, %%mm5\n" + "\tpaddw %%mm7, %%mm5\n" + "\tmovq %%mm5, %%mm1\n" + "\tpsrlw $ 8, %%mm1\n" + "\tpaddw %%mm5, %%mm1\n" + "\tpsrlw $ 8, %%mm1\n" + + "\tpcmpeqb %%mm4, %%mm4\n" + "\tpsubb %%mm2, %%mm4\n" + "\tpcmpeqb %%mm5, %%mm5\n" + "\tpsubb %%mm3, %%mm5\n" + + "\tpunpckhbw %%mm6, %%mm4\n" + "\tpunpckhbw %%mm6, %%mm5\n" + "\tpmullw %%mm4, %%mm5\n" + "\tpaddw %%mm7, %%mm5\n" + "\tmovq %%mm5, %%mm4\n" + "\tpsrlw $ 8, %%mm4\n" + "\tpaddw %%mm5, %%mm4\n" + "\tpsrlw $ 8, %%mm4\n" + + "\tpackuswb %%mm4, %%mm1\n" + + "\tpcmpeqb %%mm4, %%mm4\n" + "\tpsubb %%mm1, %%mm4\n" + + "\tmovq %%mm0, %%mm1\n" + "\tpandn %%mm4, %%mm1\n" + + "\t" pminub(mm2,mm3,mm4) "\n" + "\tpand %%mm0, %%mm3\n" + + "\tpor %%mm3, %%mm1\n" + + "\tmovq %%mm1, (%2); addl $8, %2\n" + : "+r" (op.A), "+r" (op.B), "+r" (op.D) + : /* empty */ + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0), %%mm2;\n" + "\tmovd (%1), %%mm3;\n" + + "\tpcmpeqb %%mm4, %%mm4\n" + "\tpsubb %%mm2, %%mm4\n" + "\tpcmpeqb %%mm5, %%mm5\n" + "\tpsubb %%mm3, %%mm5\n" + + "\tpunpcklbw %%mm6, %%mm4\n" + "\tpunpcklbw %%mm6, %%mm5\n" + "\tpmullw %%mm4, %%mm5\n" + "\tpaddw %%mm7, %%mm5\n" + "\tmovq %%mm5, %%mm1\n" + "\tpsrlw $ 8, %%mm1\n" + "\tpaddw %%mm5, %%mm1\n" + "\tpsrlw $ 8, %%mm1\n" + + "\tpcmpeqb %%mm4, %%mm4\n" + "\tpsubb %%mm2, %%mm4\n" + "\tpcmpeqb %%mm5, %%mm5\n" + "\tpsubb %%mm3, %%mm5\n" + + "\tpunpckhbw %%mm6, %%mm4\n" + "\tpunpckhbw %%mm6, %%mm5\n" + "\tpmullw %%mm4, %%mm5\n" + "\tpaddw %%mm7, %%mm5\n" + "\tmovq %%mm5, %%mm4\n" + "\tpsrlw $ 8, %%mm4\n" + "\tpaddw %%mm5, %%mm4\n" + "\tpsrlw $ 8, %%mm4\n" + + "\tpackuswb %%mm4, %%mm1\n" + + "\tpcmpeqb %%mm4, %%mm4\n" + "\tpsubb %%mm1, %%mm4\n" + + "\tmovq %%mm0, %%mm1\n" + "\tpandn %%mm4, %%mm1\n" + + "\t" pminub(mm2,mm3,mm4) "\n" + "\tpand %%mm0, %%mm3\n" + + "\tpor %%mm3, %%mm1\n" + + "\tmovd %%mm1, (%2);\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D) + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + } + + asm("emms"); +} + +void +xxxgimp_composite_softlight_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0,%%mm0" : : "m" (rgba8_alpha_mask) : "%mm0"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0), %%mm2; addl $8, %0\n" + "\tmovq (%1), %%mm3; addl $8, %1\n" + + + "\tmovq %%mm1, (%2); addl $8, %2\n" + : "+r" (op.A), "+S" (op.B), "+D" (op.D) + : /* empty */ + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0), %%mm2;\n" + "\tmovd (%1), %%mm3;\n" + + "\tmovd %%mm1, (%2);\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D) + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + asm("emms"); +} + +void +gimp_composite_subtract_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0,%%mm0" : : "m" (rgba8_alpha_mask) : "%mm0"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0), %%mm2; addl $8, %0\n" + "\tmovq (%1), %%mm3; addl $8, %1\n" + + "\tmovq %%mm2, %%mm4\n" + "\tpsubusb %%mm3, %%mm4\n" + + "\tmovq %%mm0, %%mm1\n" + "\tpandn %%mm4, %%mm1\n" + + "\t" pminub(mm3,mm2,mm4) "\n" + + "\tpand %%mm0, %%mm2\n" + "\tpor %%mm2, %%mm1\n" + "\tmovq %%mm1, (%2); addl $8, %2\n" + : "+r" (op.A), "+r" (op.B), "+r" (op.D) + : /* empty */ + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0), %%mm2;\n" + "\tmovd (%1), %%mm3;\n" + + "\tmovq %%mm2, %%mm4\n" + "\tpsubusb %%mm3, %%mm4\n" + + "\tmovq %%mm0, %%mm1\n" + "\tpandn %%mm4, %%mm1\n" + + "\t" pminub(mm3,mm2,mm4) "\n" + + "\tpand %%mm0, %%mm2\n" + "\tpor %%mm2, %%mm1\n" + "\tmovd %%mm1, (%2); addl $8, %2\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D) + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + } + + asm("emms"); +} + +void +gimp_composite_swap_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0), %%mm2\n" + "\tmovq (%1), %%mm3\n" + "\tmovq %%mm3, (%0)\n" + "\tmovq %%mm2, (%1)\n" + "\taddl $8, %0\n" + "\taddl $8, %1\n" + : "+r" (op.A), "+r" (op.B) + : /* empty */ + : "0", "1", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0), %%mm2\n" + "\tmovd (%1), %%mm3\n" + "\tmovd %%mm3, (%0)\n" + "\tmovd %%mm2, (%1)\n" + : /* empty */ + : "r" (op.A), "r" (op.B) + : "0", "1", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + asm("emms"); +} + +void +gimp_composite_valueonly_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0,%%mm0" : : "m" (rgba8_alpha_mask) : "%mm0"); + + for (; op.n_pixels >= 2; op.n_pixels -= 2) { + asm volatile (" movq (%0), %%mm2; addl $8, %0\n" + "\tmovq (%1), %%mm3; addl $8, %1\n" + + + "\tmovq %%mm1, (%2); addl $8, %2\n" + : "+r" (op.A), "+S" (op.B), "+D" (op.D) + : /* empty */ + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0), %%mm2;\n" + "\tmovd (%1), %%mm3;\n" + + "\tmovd %%mm1, (%2);\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D) + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + asm("emms"); +} + + +unsigned long v8_alpha_mask[2] = { 0xFF00FF00, 0xFF00FF00}; +unsigned long v8_mul_shift[2] = { 0x00800080, 0x00800080 }; + +#if 0 +void +gimp_composite_addition_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("pushl %edi"); + asm("pushl %ebx"); + asm("movl 12(%esp), %edi"); + asm("movq v8_alpha_mask, %mm0"); + + asm("subl $ 4, %ecx"); + asm("jl .add_pixels_1a_1a_last3"); + asm("movl $ 8, %ebx"); + asm(".add_pixels_1a_1a_loop:"); + + asm("movq (%eax), %mm2"); + asm("movq (%edx), %mm3"); + + asm("movq %mm2, %mm4"); + asm("paddusb %mm3, %mm4"); + asm("movq %mm0, %mm1"); + asm("pandn %mm4, %mm1"); + asm("movq %mm2, %mm4"); + asm("psubusb %mm3, %mm4"); + asm("psubb %mm4, %mm2"); + asm("pand %mm0, %mm2"); + asm("por %mm2, %mm1"); + asm("movq %mm1, (%edi)"); + asm("addl %ebx, %eax"); + asm("addl %ebx, %edx"); + asm("addl %ebx, %edi"); + asm("subl $ 4, %ecx"); + asm("jge .add_pixels_1a_1a_loop"); + + asm(".add_pixels_1a_1a_last3:"); + asm("test $ 2, %ecx"); + asm("jz .add_pixels_1a_1a_last1"); + asm("movd (%eax), %mm2"); + asm("movd (%edx), %mm3"); + + asm("movq %mm2, %mm4"); + asm("paddusb %mm3, %mm4"); + asm("movq %mm0, %mm1"); + asm("pandn %mm4, %mm1"); + asm("movq %mm2, %mm4"); + asm("psubusb %mm3, %mm4"); + asm("psubb %mm4, %mm2"); + asm("pand %mm0, %mm2"); + asm("por %mm2, %mm1"); + asm("addl $ 4, %eax"); + asm("addl $ 4, %edx"); + asm("addl $ 4, %edi"); + + asm(".add_pixels_1a_1a_last1:"); + asm("test $ 1, %ecx"); + asm("jz .add_pixels_1a_1a_end"); + + asm("movw (%eax), %bx"); + asm("movd %ebx, %mm2"); + asm("movw (%edx), %bx"); + asm("movd %ebx, %mm3"); + + asm("movq %mm2, %mm4"); + asm("paddusb %mm3, %mm4"); + asm("movq %mm0, %mm1"); + asm("pandn %mm4, %mm1"); + asm("movq %mm2, %mm4"); + asm("psubusb %mm3, %mm4"); + asm("psubb %mm4, %mm2"); + asm("pand %mm0, %mm2"); + asm("por %mm2, %mm1"); + asm("movd %mm1, %ebx"); + asm("movw %bx, (%edi)"); + + asm(".add_pixels_1a_1a_end:"); + + asm("emms"); + asm("popl %ebx"); + asm("popl %edi"); +} + +void +gimp_composite_burn_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("movq %0,%%mm1" + : + : "m" (va8_alpha_mask) + : "%mm1"); + + for (; op.n_pixels >= 4; op.n_pixels -= 4) { + asm volatile (" movq (%0),%%mm0; addl $8,%0\n" + "\tmovq (%1),%%mm1; addl $8,%1\n" + + "\tmovq %3,%%mm2\n" + "\tpsubb %%mm0,%%mm2\n" /* mm2 = 255 - A */ + "\tpxor %%mm4,%%mm4\n" + "\tpunpcklbw %%mm2,%%mm4\n" /* mm4 = (255- A) * 256 */ + + "\tmovq %%mm1,%%mm3\n" + "\tpxor %%mm5,%%mm5\n" + "\tpunpcklbw %%mm5,%%mm3\n" + "\tmovq %4,%%mm5\n" + "\tpaddusw %%mm3,%%mm5\n" /* mm5 = B + 1 */ + + "\t" pdivwqX(mm4,mm5,mm7) "\n" + + "\tmovq %3,%%mm2\n" + "\tpsubb %%mm0,%%mm2\n" /* mm2 = 255 - A */ + "\tpxor %%mm4,%%mm4\n" + "\tpunpckhbw %%mm2,%%mm4\n" /* mm4 = (255- A) * 256 */ + + "\tmovq %%mm1,%%mm3\n" + "\tpxor %%mm5,%%mm5\n" + "\tpunpckhbw %%mm5,%%mm3\n" + "\tmovq %4,%%mm5\n" + "\tpaddusw %%mm3,%%mm5\n" /* mm5 = B + 1 */ + "\t" pdivwqX(mm4,mm5,mm6) "\n" + + "\tmovq %5,%%mm4\n" + "\tmovq %%mm4,%%mm5\n" + "\tpsubusw %%mm6,%%mm4\n" + "\tpsubusw %%mm7,%%mm5\n" + + "\tpackuswb %%mm4,%%mm5\n" + + "\t" pminub(mm0,mm1,mm3) "\n" /* mm1 = min(mm0,mm1) clobber mm3 */ + + "\tmovq %6,%%mm7\n" + "\tpand %%mm7,%%mm1\n" /* mm1 = mm7 & alpha_mask */ + + "\tpandn %%mm5,%%mm7\n" /* mm7 = ~mm7 & mm5 */ + "\tpor %%mm1,%%mm7\n" /* mm7 = mm7 | mm1 */ + + "\tmovq %%mm7,(%2); addl $8,%2\n" + : "+r" (op.A), "+r" (op.B), "+r" (op.D) + : "m" (va8_b255), "m" (va8_w1), "m" (va8_w255), "m" (va8_alpha_mask) + : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + } + + if (op.n_pixels) { + asm volatile (" movd (%0),%%mm0\n" + "\tmovd (%1),%%mm1\n" + + "\tmovq %3,%%mm2\n" + "\tpsubb %%mm0,%%mm2\n" /* mm2 = 255 - A */ + "\tpxor %%mm4,%%mm4\n" + "\tpunpcklbw %%mm2,%%mm4\n" /* mm4 = (255- A) * 256 */ + + "\tmovq %%mm1,%%mm3\n" + "\tpxor %%mm5,%%mm5\n" + "\tpunpcklbw %%mm5,%%mm3\n" + "\tmovq %4,%%mm5\n" + "\tpaddusw %%mm3,%%mm5\n" /* mm5 = B + 1 */ + + "\t" pdivwqX(mm4,mm5,mm7) "\n" + + "\tmovq %3,%%mm2\n" + "\tpsubb %%mm0,%%mm2\n" /* mm2 = 255 - A */ + "\tpxor %%mm4,%%mm4\n" + "\tpunpckhbw %%mm2,%%mm4\n" /* mm4 = (255- A) * 256 */ + + "\tmovq %%mm1,%%mm3\n" + "\tpxor %%mm5,%%mm5\n" + "\tpunpckhbw %%mm5,%%mm3\n" + "\tmovq %4,%%mm5\n" + "\tpaddusw %%mm3,%%mm5\n" /* mm5 = B + 1 */ + "\t" pdivwqX(mm4,mm5,mm6) "\n" + + "\tmovq %5,%%mm4\n" + "\tmovq %%mm4,%%mm5\n" + "\tpsubusw %%mm6,%%mm4\n" + "\tpsubusw %%mm7,%%mm5\n" + + "\tpackuswb %%mm4,%%mm5\n" + + "\t" pminub(mm0,mm1,mm3) "\n" /* mm1 = min(mm0,mm1) clobber mm3 */ + + "\tmovq %6,%%mm7\n" + "\tpand %%mm7,%%mm1\n" /* mm1 = mm7 & alpha_mask */ + + "\tpandn %%mm5,%%mm7\n" /* mm7 = ~mm7 & mm5 */ + "\tpor %%mm1,%%mm7\n" /* mm7 = mm7 | mm1 */ + + "\tmovd %%mm7,(%2)\n" + : /* empty */ + : "r" (op.A), "r" (op.B), "r" (op.D), "m" (va8_b255), "m" (va8_w1), "m" (va8_w255), "m" (va8_alpha_mask) + : "0", "1", "2", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); + } + + asm("emms"); +} + +void +xxxgimp_composite_coloronly_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + +} + +void +gimp_composite_darken_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("pushl %edi"); + asm("pushl %ebx"); + asm("movl 12(%esp), %edi"); + asm("movq v8_alpha_mask, %mm0"); + asm("subl $ 4, %ecx"); + asm("jl .darken_pixels_1a_1a_last3"); + asm("movl $ 8, %ebx"); + asm(".darken_pixels_1a_1a_loop:"); + asm("movq (%eax), %mm2"); + asm("movq (%edx), %mm3"); + + asm("movq %mm2, %mm4"); + asm("psubusb %mm3, %mm4"); + asm("psubb %mm4, %mm2"); + asm("movq %mm2, %mm1"); + asm("movq %mm1, (%edi)"); + asm("addl %ebx, %eax"); + asm("addl %ebx, %edx"); + asm("addl %ebx, %edi"); + asm("subl $ 4, %ecx"); + asm("jge .darken_pixels_1a_1a_loop"); + + asm(".darken_pixels_1a_1a_last3:"); + asm("test $ 2, %ecx"); + asm("jz .darken_pixels_1a_1a_last1"); + asm("movd (%eax), %mm2"); + asm("movd (%edx), %mm3"); + + asm("movq %mm2, %mm4"); + asm("psubusb %mm3, %mm4"); + asm("psubb %mm4, %mm2"); + asm("movq %mm2, %mm1"); + asm("addl $ 4, %eax"); + asm("addl $ 4, %edx"); + asm("addl $ 4, %edi"); + + asm(".darken_pixels_1a_1a_last1:"); + asm("test $ 1, %ecx"); + asm("jz .darken_pixels_1a_1a_end"); + + asm("movw (%eax), %bx"); + asm("movd %ebx, %mm2"); + asm("movw (%edx), %bx"); + asm("movd %ebx, %mm3"); + + asm("movq %mm2, %mm4"); + asm("psubusb %mm3, %mm4"); + asm("psubb %mm4, %mm2"); + asm("movq %mm2, %mm1"); + asm("movd %mm1, %ebx"); + asm("movw %bx, (%edi)"); + + asm(".darken_pixels_1a_1a_end:"); + + asm("emms"); + asm("popl %ebx"); + asm("popl %edi"); +} + +void +gimp_composite_difference_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("pushl %edi"); + asm("pushl %ebx"); + asm("movl 12(%esp), %edi"); + asm("movq v8_alpha_mask, %mm0"); + asm("subl $ 4, %ecx"); + asm("jl .difference_pixels_1a_1a_last3"); + asm("movl $ 8, %ebx"); + asm(".difference_pixels_1a_1a_loop:"); + asm("movq (%eax), %mm2"); + asm("movq (%edx), %mm3"); + + asm("movq %mm2, %mm4"); + asm("movq %mm3, %mm5"); + asm("psubusb %mm3, %mm4"); + asm("psubusb %mm2, %mm5"); + asm("movq %mm0, %mm1"); + asm("paddb %mm5, %mm4"); + asm("pandn %mm4, %mm1"); + asm("psubb %mm4, %mm2"); + asm("pand %mm0, %mm2"); + asm("por %mm2, %mm1"); + asm("movq %mm1, (%edi)"); + asm("addl %ebx, %eax"); + asm("addl %ebx, %edx"); + asm("addl %ebx, %edi"); + asm("subl $ 4, %ecx"); + asm("jge .difference_pixels_1a_1a_loop"); + + asm(".difference_pixels_1a_1a_last3:"); + asm("test $ 2, %ecx"); + asm("jz .difference_pixels_1a_1a_last1"); + asm("movd (%eax), %mm2"); + asm("movd (%edx), %mm3"); + + asm("movq %mm2, %mm4"); + asm("movq %mm3, %mm5"); + asm("psubusb %mm3, %mm4"); + asm("psubusb %mm2, %mm5"); + asm("movq %mm0, %mm1"); + asm("paddb %mm5, %mm4"); + asm("pandn %mm4, %mm1"); + asm("psubb %mm4, %mm2"); + asm("pand %mm0, %mm2"); + asm("por %mm2, %mm1"); + asm("addl $ 4, %eax"); + asm("addl $ 4, %edx"); + asm("addl $ 4, %edi"); + + asm(".difference_pixels_1a_1a_last1:"); + asm("test $ 1, %ecx"); + asm("jz .difference_pixels_1a_1a_end"); + + asm("movw (%eax), %bx"); + asm("movd %ebx, %mm2"); + asm("movw (%edx), %bx"); + asm("movd %ebx, %mm3"); + + asm("movq %mm2, %mm4"); + asm("movq %mm3, %mm5"); + asm("psubusb %mm3, %mm4"); + asm("psubusb %mm2, %mm5"); + asm("movq %mm0, %mm1"); + asm("paddb %mm5, %mm4"); + asm("pandn %mm4, %mm1"); + asm("psubb %mm4, %mm2"); + asm("pand %mm0, %mm2"); + asm("por %mm2, %mm1"); + asm("movd %mm1, %ebx"); + asm("movw %bx, (%edi)"); + + asm(".difference_pixels_1a_1a_end:"); + + asm("emms"); + asm("popl %ebx"); + asm("popl %edi"); +} + +void +xxxgimp_composite_dissolve_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + +} + +void +xxxgimp_composite_divide_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + +} + +void +xxxgimp_composite_dodge_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + +} + +void +xxxgimp_composite_grainextract_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + +} + +void +xxxgimp_composite_grainmerge_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + +} + +void +xxxgimp_composite_hardlight_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + +} + +void +xxxgimp_composite_hueonly_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + +} + +void +xxxgimp_composite_lighten_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("pushl %edi"); + asm("pushl %ebx"); + asm("movl 12(%esp), %edi"); + asm("movq v8_alpha_mask, %mm0"); + asm("subl $ 4, %ecx"); + asm("jl .lighten_pixels_1a_1a_last3"); + asm("movl $ 8, %ebx"); + asm(".lighten_pixels_1a_1a_loop:"); + asm("movq (%eax), %mm2"); + asm("movq (%edx), %mm3"); + + asm("movq %mm2, %mm4"); + asm("psubusb %mm3, %mm4"); + asm("paddb %mm4, %mm3"); + asm("movq %mm0, %mm1"); + asm("pandn %mm3, %mm1"); + + asm("psubb %mm4, %mm2"); + asm("pand %mm0, %mm2"); + asm("por %mm2, %mm1"); + asm("movq %mm1, (%edi)"); + asm("addl %ebx, %eax"); + asm("addl %ebx, %edx"); + asm("addl %ebx, %edi"); + asm("subl $ 4, %ecx"); + asm("jge .lighten_pixels_1a_1a_loop"); + + asm(".lighten_pixels_1a_1a_last3:"); + asm("test $ 2, %ecx"); + asm("jz .lighten_pixels_1a_1a_last1"); + asm("movd (%eax), %mm2"); + asm("movd (%edx), %mm3"); + + asm("movq %mm2, %mm4"); + asm("psubusb %mm3, %mm4"); + asm("paddb %mm4, %mm3"); + asm("movq %mm0, %mm1"); + asm("pandn %mm3, %mm1"); + + asm("psubb %mm4, %mm2"); + asm("pand %mm0, %mm2"); + asm("por %mm2, %mm1"); + asm("addl $ 4, %eax"); + asm("addl $ 4, %edx"); + asm("addl $ 4, %edi"); + + asm(".lighten_pixels_1a_1a_last1:"); + asm("test $ 1, %ecx"); + asm("jz .lighten_pixels_1a_1a_end"); + + asm("movw (%eax), %bx"); + asm("movd %ebx, %mm2"); + asm("movw (%edx), %bx"); + asm("movd %ebx, %mm3"); + + asm("movq %mm2, %mm4"); + asm("psubusb %mm3, %mm4"); + asm("paddb %mm4, %mm3"); + asm("movq %mm0, %mm1"); + asm("pandn %mm3, %mm1"); + + asm("psubb %mm4, %mm2"); + asm("pand %mm0, %mm2"); + asm("por %mm2, %mm1"); + asm("movd %mm1, %ebx"); + asm("movw %bx, (%edi)"); + + asm(".lighten_pixels_1a_1a_end:"); + + asm("emms"); + asm("popl %ebx"); + asm("popl %edi"); +} + +void +xxxgimp_composite_multiply_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("pushl %edi"); + asm("pushl %ebx"); + asm("movl 12(%esp), %edi"); + asm("movq v8_alpha_mask, %mm0"); + asm("subl $ 4, %ecx"); + asm("jl .multiply_pixels_1a_1a_last3"); + asm("movl $ 8, %ebx"); + asm(".multiply_pixels_1a_1a_loop:"); + asm("movq (%eax), %mm2"); + asm("movq (%edx), %mm3"); + + + asm("movq %mm2, %mm1"); + asm("punpcklbw %mm6, %mm1"); + asm("movq %mm3, %mm5"); + asm("punpcklbw %mm6, %mm5"); + asm("pmullw %mm5, %mm1"); + asm("paddw %mm7, %mm1"); + asm("movq %mm1, %mm5"); + asm("psrlw $ 8, %mm5"); + asm("paddw %mm5, %mm1"); + asm("psrlw $ 8, %mm1"); + + asm("movq %mm2, %mm4"); + asm("punpckhbw %mm6, %mm4"); + asm("movq %mm3, %mm5"); + asm("punpckhbw %mm6, %mm5"); + asm("pmullw %mm5, %mm4"); + asm("paddw %mm7, %mm4"); + asm("movq %mm4, %mm5"); + asm("psrlw $ 8, %mm5"); + asm("paddw %mm5, %mm4"); + asm("psrlw $ 8, %mm4"); + + asm("packuswb %mm4, %mm1"); + + asm("movq %mm0, %mm4"); + asm("pandn %mm1, %mm4"); + asm("movq %mm4, %mm1"); + + asm("movq %mm2, %mm4"); + asm("psubusb %mm3, %mm4"); + asm("psubb %mm4, %mm2"); + asm("pand %mm0, %mm2"); + asm("por %mm2, %mm1"); + asm("movq %mm1, (%edi)"); + asm("addl %ebx, %eax"); + asm("addl %ebx, %edx"); + asm("addl %ebx, %edi"); + asm("subl $ 4, %ecx"); + asm("jge .multiply_pixels_1a_1a_loop"); + + asm(".multiply_pixels_1a_1a_last3:"); + asm("test $ 2, %ecx"); + asm("jz .multiply_pixels_1a_1a_last1"); + asm("movd (%eax), %mm2"); + asm("movd (%edx), %mm3"); + + + asm("movq %mm2, %mm1"); + asm("punpcklbw %mm6, %mm1"); + asm("movq %mm3, %mm5"); + asm("punpcklbw %mm6, %mm5"); + asm("pmullw %mm5, %mm1"); + asm("paddw %mm7, %mm1"); + asm("movq %mm1, %mm5"); + asm("psrlw $ 8, %mm5"); + asm("paddw %mm5, %mm1"); + asm("psrlw $ 8, %mm1"); + + asm("movq %mm2, %mm4"); + asm("punpckhbw %mm6, %mm4"); + asm("movq %mm3, %mm5"); + asm("punpckhbw %mm6, %mm5"); + asm("pmullw %mm5, %mm4"); + asm("paddw %mm7, %mm4"); + asm("movq %mm4, %mm5"); + asm("psrlw $ 8, %mm5"); + asm("paddw %mm5, %mm4"); + asm("psrlw $ 8, %mm4"); + + asm("packuswb %mm4, %mm1"); + + asm("movq %mm0, %mm4"); + asm("pandn %mm1, %mm4"); + asm("movq %mm4, %mm1"); + + asm("movq %mm2, %mm4"); + asm("psubusb %mm3, %mm4"); + asm("psubb %mm4, %mm2"); + asm("pand %mm0, %mm2"); + asm("por %mm2, %mm1"); + asm("addl $ 4, %eax"); + asm("addl $ 4, %edx"); + asm("addl $ 4, %edi"); + + asm(".multiply_pixels_1a_1a_last1:"); + asm("test $ 1, %ecx"); + asm("jz .multiply_pixels_1a_1a_end"); + + asm("movw (%eax), %bx"); + asm("movd %ebx, %mm2"); + asm("movw (%edx), %bx"); + asm("movd %ebx, %mm3"); + + + asm("movq %mm2, %mm1"); + asm("punpcklbw %mm6, %mm1"); + asm("movq %mm3, %mm5"); + asm("punpcklbw %mm6, %mm5"); + asm("pmullw %mm5, %mm1"); + asm("paddw %mm7, %mm1"); + asm("movq %mm1, %mm5"); + asm("psrlw $ 8, %mm5"); + asm("paddw %mm5, %mm1"); + asm("psrlw $ 8, %mm1"); + + asm("movq %mm2, %mm4"); + asm("punpckhbw %mm6, %mm4"); + asm("movq %mm3, %mm5"); + asm("punpckhbw %mm6, %mm5"); + asm("pmullw %mm5, %mm4"); + asm("paddw %mm7, %mm4"); + asm("movq %mm4, %mm5"); + asm("psrlw $ 8, %mm5"); + asm("paddw %mm5, %mm4"); + asm("psrlw $ 8, %mm4"); + + asm("packuswb %mm4, %mm1"); + + asm("movq %mm0, %mm4"); + asm("pandn %mm1, %mm4"); + asm("movq %mm4, %mm1"); + + asm("movq %mm2, %mm4"); + asm("psubusb %mm3, %mm4"); + asm("psubb %mm4, %mm2"); + asm("pand %mm0, %mm2"); + asm("por %mm2, %mm1"); + asm("movd %mm1, %ebx"); + asm("movw %bx, (%edi)"); + + asm(".multiply_pixels_1a_1a_end:"); + + asm("emms"); + asm("popl %ebx"); + asm("popl %edi"); +} + +void +gimp_composite_overlay_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("pushl %edi"); + asm("pushl %ebx"); + asm("movl 12(%esp), %edi"); + asm("movq v8_alpha_mask, %mm0"); + asm("subl $ 4, %ecx"); + asm("jl .overlay_pixels_1a_1a_last3"); + asm("movl $ 8, %ebx"); + asm(".overlay_pixels_1a_1a_loop:"); + asm("movq (%eax), %mm2"); + asm("movq (%edx), %mm3"); + asm("call op_overlay"); + asm("movq %mm1, (%edi)"); + asm("addl %ebx, %eax"); + asm("addl %ebx, %edx"); + asm("addl %ebx, %edi"); + asm("subl $ 4, %ecx"); + asm("jge .overlay_pixels_1a_1a_loop"); + + asm(".overlay_pixels_1a_1a_last3:"); + asm("test $ 2, %ecx"); + asm("jz .overlay_pixels_1a_1a_last1"); + asm("movd (%eax), %mm2"); + asm("movd (%edx), %mm3"); + asm("call op_overlay"); + asm("addl $ 4, %eax"); + asm("addl $ 4, %edx"); + asm("addl $ 4, %edi"); + + asm(".overlay_pixels_1a_1a_last1:"); + asm("test $ 1, %ecx"); + asm("jz .overlay_pixels_1a_1a_end"); + + asm("movw (%eax), %bx"); + asm("movd %ebx, %mm2"); + asm("movw (%edx), %bx"); + asm("movd %ebx, %mm3"); + asm("call op_overlay"); + asm("movd %mm1, %ebx"); + asm("movw %bx, (%edi)"); + + asm(".overlay_pixels_1a_1a_end:"); + + asm("emms"); + asm("popl %ebx"); + asm("popl %edi"); +} + +void +xxxgimp_composite_replace_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + +} + +void +xxxgimp_composite_saturationonly_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + +} + +void +xxxgimp_composite_screen_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("pushl %edi"); + asm("pushl %ebx"); + asm("movl 12(%esp), %edi"); + asm("movq v8_alpha_mask, %mm0"); + asm("subl $ 4, %ecx"); + asm("jl .screen_pixels_1a_1a_last3"); + asm("movl $ 8, %ebx"); + asm(".screen_pixels_1a_1a_loop:"); + asm("movq (%eax), %mm2"); + asm("movq (%edx), %mm3"); + + + asm("pcmpeqb %mm4, %mm4"); + asm("psubb %mm2, %mm4"); + asm("pcmpeqb %mm5, %mm5"); + asm("psubb %mm3, %mm5"); + + asm("movq %mm4, %mm1"); + asm("punpcklbw %mm6, %mm1"); + asm("movq %mm5, %mm3"); + asm("punpcklbw %mm6, %mm3"); + asm("pmullw %mm3, %mm1"); + asm("paddw %mm7, %mm1"); + asm("movq %mm1, %mm3"); + asm("psrlw $ 8, %mm3"); + asm("paddw %mm3, %mm1"); + asm("psrlw $ 8, %mm1"); + + asm("movq %mm4, %mm2"); + asm("punpckhbw %mm6, %mm2"); + asm("movq %mm5, %mm3"); + asm("punpckhbw %mm6, %mm3"); + asm("pmullw %mm3, %mm2"); + asm("paddw %mm7, %mm2"); + asm("movq %mm2, %mm3"); + asm("psrlw $ 8, %mm3"); + asm("paddw %mm3, %mm2"); + asm("psrlw $ 8, %mm2"); + + asm("packuswb %mm2, %mm1"); + + asm("pcmpeqb %mm3, %mm3"); + asm("psubb %mm1, %mm3"); + + asm("movq %mm0, %mm1"); + asm("pandn %mm3, %mm1"); + + asm("movq %mm2, %mm4"); + asm("psubusb %mm5, %mm2"); + asm("paddb %mm2, %mm5"); + asm("pcmpeqb %mm3, %mm3"); + asm("psubb %mm5, %mm3"); + + asm("pand %mm0, %mm3"); + asm("por %mm3, %mm1"); + asm("movq %mm1, (%edi)"); + asm("addl %ebx, %eax"); + asm("addl %ebx, %edx"); + asm("addl %ebx, %edi"); + asm("subl $ 4, %ecx"); + asm("jge .screen_pixels_1a_1a_loop"); + + asm(".screen_pixels_1a_1a_last3:"); + asm("test $ 2, %ecx"); + asm("jz .screen_pixels_1a_1a_last1"); + asm("movd (%eax), %mm2"); + asm("movd (%edx), %mm3"); + + + asm("pcmpeqb %mm4, %mm4"); + asm("psubb %mm2, %mm4"); + asm("pcmpeqb %mm5, %mm5"); + asm("psubb %mm3, %mm5"); + + asm("movq %mm4, %mm1"); + asm("punpcklbw %mm6, %mm1"); + asm("movq %mm5, %mm3"); + asm("punpcklbw %mm6, %mm3"); + asm("pmullw %mm3, %mm1"); + asm("paddw %mm7, %mm1"); + asm("movq %mm1, %mm3"); + asm("psrlw $ 8, %mm3"); + asm("paddw %mm3, %mm1"); + asm("psrlw $ 8, %mm1"); + + asm("movq %mm4, %mm2"); + asm("punpckhbw %mm6, %mm2"); + asm("movq %mm5, %mm3"); + asm("punpckhbw %mm6, %mm3"); + asm("pmullw %mm3, %mm2"); + asm("paddw %mm7, %mm2"); + asm("movq %mm2, %mm3"); + asm("psrlw $ 8, %mm3"); + asm("paddw %mm3, %mm2"); + asm("psrlw $ 8, %mm2"); + + asm("packuswb %mm2, %mm1"); + + asm("pcmpeqb %mm3, %mm3"); + asm("psubb %mm1, %mm3"); + + asm("movq %mm0, %mm1"); + asm("pandn %mm3, %mm1"); + + asm("movq %mm2, %mm4"); + asm("psubusb %mm5, %mm2"); + asm("paddb %mm2, %mm5"); + asm("pcmpeqb %mm3, %mm3"); + asm("psubb %mm5, %mm3"); + + asm("pand %mm0, %mm3"); + asm("por %mm3, %mm1"); + asm("addl $ 4, %eax"); + asm("addl $ 4, %edx"); + asm("addl $ 4, %edi"); + + asm(".screen_pixels_1a_1a_last1:"); + asm("test $ 1, %ecx"); + asm("jz .screen_pixels_1a_1a_end"); + + asm("movw (%eax), %bx"); + asm("movd %ebx, %mm2"); + asm("movw (%edx), %bx"); + asm("movd %ebx, %mm3"); + + + asm("pcmpeqb %mm4, %mm4"); + asm("psubb %mm2, %mm4"); + asm("pcmpeqb %mm5, %mm5"); + asm("psubb %mm3, %mm5"); + + asm("movq %mm4, %mm1"); + asm("punpcklbw %mm6, %mm1"); + asm("movq %mm5, %mm3"); + asm("punpcklbw %mm6, %mm3"); + asm("pmullw %mm3, %mm1"); + asm("paddw %mm7, %mm1"); + asm("movq %mm1, %mm3"); + asm("psrlw $ 8, %mm3"); + asm("paddw %mm3, %mm1"); + asm("psrlw $ 8, %mm1"); + + asm("movq %mm4, %mm2"); + asm("punpckhbw %mm6, %mm2"); + asm("movq %mm5, %mm3"); + asm("punpckhbw %mm6, %mm3"); + asm("pmullw %mm3, %mm2"); + asm("paddw %mm7, %mm2"); + asm("movq %mm2, %mm3"); + asm("psrlw $ 8, %mm3"); + asm("paddw %mm3, %mm2"); + asm("psrlw $ 8, %mm2"); + + asm("packuswb %mm2, %mm1"); + + asm("pcmpeqb %mm3, %mm3"); + asm("psubb %mm1, %mm3"); + + asm("movq %mm0, %mm1"); + asm("pandn %mm3, %mm1"); + + asm("movq %mm2, %mm4"); + asm("psubusb %mm5, %mm2"); + asm("paddb %mm2, %mm5"); + asm("pcmpeqb %mm3, %mm3"); + asm("psubb %mm5, %mm3"); + + asm("pand %mm0, %mm3"); + asm("por %mm3, %mm1"); + asm("movd %mm1, %ebx"); + asm("movw %bx, (%edi)"); + + asm(".screen_pixels_1a_1a_end:"); + + asm("emms"); + asm("popl %ebx"); + asm("popl %edi"); +} + +void +xxxgimp_composite_softlight_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + +} + +void +xxxgimp_composite_subtract_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + + asm("pushl %edi"); + asm("pushl %ebx"); + asm("movl 12(%esp), %edi"); + asm("movq v8_alpha_mask, %mm0"); + asm("subl $ 4, %ecx"); + asm("jl .substract_pixels_1a_1a_last3"); + asm("movl $ 8, %ebx"); + asm(".substract_pixels_1a_1a_loop:"); + asm("movq (%eax), %mm2"); + asm("movq (%edx), %mm3"); + + asm("movq %mm2, %mm4"); + asm("psubusb %mm3, %mm4"); + asm("movq %mm0, %mm1"); + asm("pandn %mm4, %mm1"); + asm("psubb %mm4, %mm2"); + asm("pand %mm0, %mm2"); + asm("por %mm2, %mm1"); + asm("movq %mm1, (%edi)"); + asm("addl %ebx, %eax"); + asm("addl %ebx, %edx"); + asm("addl %ebx, %edi"); + asm("subl $ 4, %ecx"); + asm("jge .substract_pixels_1a_1a_loop"); + + asm(".substract_pixels_1a_1a_last3:"); + asm("test $ 2, %ecx"); + asm("jz .substract_pixels_1a_1a_last1"); + asm("movd (%eax), %mm2"); + asm("movd (%edx), %mm3"); + + asm("movq %mm2, %mm4"); + asm("psubusb %mm3, %mm4"); + asm("movq %mm0, %mm1"); + asm("pandn %mm4, %mm1"); + asm("psubb %mm4, %mm2"); + asm("pand %mm0, %mm2"); + asm("por %mm2, %mm1"); + asm("addl $ 4, %eax"); + asm("addl $ 4, %edx"); + asm("addl $ 4, %edi"); + + asm(".substract_pixels_1a_1a_last1:"); + asm("test $ 1, %ecx"); + asm("jz .substract_pixels_1a_1a_end"); + + asm("movw (%eax), %bx"); + asm("movd %ebx, %mm2"); + asm("movw (%edx), %bx"); + asm("movd %ebx, %mm3"); + + asm("movq %mm2, %mm4"); + asm("psubusb %mm3, %mm4"); + asm("movq %mm0, %mm1"); + asm("pandn %mm4, %mm1"); + asm("psubb %mm4, %mm2"); + asm("pand %mm0, %mm2"); + asm("por %mm2, %mm1"); + asm("movd %mm1, %ebx"); + asm("movw %bx, (%edi)"); + + asm(".substract_pixels_1a_1a_end:"); + asm("emms"); + asm("popl %ebx"); + asm("popl %edi"); +} + +void +xxxgimp_composite_swap_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + +} + +void +xxxgimp_composite_valueonly_va8_va8_va8_mmx(GimpCompositeContext *_op) +{ + GimpCompositeContext op = *_op; + +} +#endif + +void +gimp_composite_mmx_init() +{ + +} diff --git a/app/composite/gimp-composite-mmx.h b/app/composite/gimp-composite-mmx.h new file mode 100644 index 0000000000..944d6a1856 --- /dev/null +++ b/app/composite/gimp-composite-mmx.h @@ -0,0 +1,51 @@ +#ifndef gimp_composite_context_h +#define gimp_composite_context_h +/* + * + */ +extern void gimp_composite_addition_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_burn_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_coloronly_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_darken_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_difference_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_dissolve_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_divide_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_dodge_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_grainextract_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_grainmerge_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_hardlight_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_hueonly_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_lighten_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_multiply_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_overlay_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_replace_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_saturationonly_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_screen_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_softlight_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_subtract_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_swap_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); +extern void gimp_composite_valueonly_rgba8_rgba8_rgba8_mmx(GimpCompositeContext *); + +extern void gimp_composite_addition_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_burn_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_coloronly_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_darken_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_difference_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_dissolve_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_divide_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_dodge_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_grainextract_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_grainmerge_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_hardlight_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_hueonly_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_lighten_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_multiply_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_overlay_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_replace_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_saturationonly_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_screen_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_softlight_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_subtract_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_swap_va8_va8_va8_mmx(GimpCompositeContext *); +extern void gimp_composite_valueonly_va8_va8_va8_mmx(GimpCompositeContext *); +#endif diff --git a/app/composite/gimp-composite-util.h b/app/composite/gimp-composite-util.h new file mode 100644 index 0000000000..9be706610d --- /dev/null +++ b/app/composite/gimp-composite-util.h @@ -0,0 +1,30 @@ +#ifndef gimp_composite_util +#define gimp_composite_util +/* + * + */ + +typedef struct { + unsigned char r; + unsigned char g; + unsigned char b; + unsigned char a; +} rgba8_t; + +typedef struct { + unsigned char r; + unsigned char g; + unsigned char b; +} rgb8_t; + +typedef struct { + unsigned char v; +} v8_t; + +typedef struct { + unsigned char v; + unsigned char a; +} va8_t; + +extern int gimp_composite_bpp[]; +#endif diff --git a/app/composite/gimp-composite.c b/app/composite/gimp-composite.c new file mode 100644 index 0000000000..acc4de20bc --- /dev/null +++ b/app/composite/gimp-composite.c @@ -0,0 +1,172 @@ +/* The GIMP -- an image manipulation program + * Copyright (C) 1995 Spencer Kimball and Peter Mattis + * + * Gimp image compositing + * Copyright (C) 2003 Helvetix Victorinox, a pseudonym, <helvetix@gimp.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +/* + * $Id$ + */ +#include <stdio.h> + +#include "gimp-composite.h" + +/* + * Details about pixel formats, bits-per-pixel alpha and non alpha + * versions of pixel formats. + */ +/* + * Report on the number of bytes a particular pixel format consumes per pixel. + */ +unsigned char gimp_composite_pixel_bpp[] = { + 1, /* GIMP_PIXELFORMAT_V8 */ + 2, /* GIMP_PIXELFORMAT_VA8 */ + 3, /* GIMP_PIXELFORMAT_RGB8 */ + 4, /* GIMP_PIXELFORMAT_RGBA8 */ +#if GIMP_16BITCOLOR + 2, /* GIMP_PIXELFORMAT_V16 */ + 4, /* GIMP_PIXELFORMAT_VA16 */ + 6, /* GIMP_PIXELFORMAT_RGB16 */ + 8, /* GIMP_PIXELFORMAT_RGBA16 */ +#endif + 0, /* GIMP_PIXELFORMAT_ANY */ +}; + +char *gimp_composite_pixel_name[] = { + "GIMP_PIXELFORMAT_V8", + "GIMP_PIXELFORMAT_VA8", + "GIMP_PIXELFORMAT_RGB8", + "GIMP_PIXELFORMAT_RGBA8", +#if GIMP_16BITCOLOR + "GIMP_PIXELFORMAT_V16", + "GIMP_PIXELFORMAT_VA16", + "GIMP_PIXELFORMAT_RGB16 ", + "GIMP_PIXELFORMAT_RGBA16 ", +#endif + "GIMP_PIXELFORMAT_ANY", +}; +/* + * Report true (non-zero) if a pixel format has alpha. + */ +unsigned char gimp_composite_pixel_alphap[] = { + 0, /* GIMP_PIXELFORMAT_V8 */ + 1, /* GIMP_PIXELFORMAT_VA8 */ + 0, /* GIMP_PIXELFORMAT_RGB8 */ + 1, /* GIMP_PIXELFORMAT_RGBA8 */ +#if GIMP_16BITCOLOR + 0, /* GIMP_PIXELFORMAT_V16 */ + 1, /* GIMP_PIXELFORMAT_VA16 */ + 0, /* GIMP_PIXELFORMAT_RGB16 */ + 1, /* GIMP_PIXELFORMAT_RGBA16 */ +#endif + 0, /* GIMP_PIXELFORMAT_UNKNOWN */ +}; + +/* + * Convert to/from pixel formats with/without alpha. + */ +GimpPixelFormat gimp_composite_pixel_alpha[] = { + GIMP_PIXELFORMAT_VA8, /* GIMP_PIXELFORMAT_V8 */ + GIMP_PIXELFORMAT_V8, /* GIMP_PIXELFORMAT_VA8 */ + GIMP_PIXELFORMAT_RGBA8, /* GIMP_PIXELFORMAT_RGB8 */ + GIMP_PIXELFORMAT_RGB8, /* GIMP_PIXELFORMAT_RGBA8 */ +#if GIMP_16BITCOLOR + GIMP_PIXELFORMAT_VA16, + GIMP_PIXELFORMAT_V16, + GIMP_PIXELFORMAT_RGBA16, + GIMP_PIXELFORMAT_RGB16 +#endif + GIMP_PIXELFORMAT_ANY, /* GIMP_PIXELFORMAT_ANY */ +}; + + +/* + * XXX I don't like to put this here. I think this information, + * specific to the functions, ought to be with the function. + */ +struct GimpCompositeOperationEffects gimp_composite_operation_effects[] = { + { TRUE, TRUE, FALSE, }, /* GIMP_NORMAL_MODE */ + { TRUE, TRUE, FALSE, }, /* GIMP_DISSOLVE_MODE */ + { TRUE, TRUE, FALSE, }, /* GIMP_BEHIND_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_MULTIPLY_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_SCREEN_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_OVERLAY_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_DIFFERENCE_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_ADDITION_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_SUBTRACT_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_DARKEN_ONLY_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_LIGHTEN_ONLY_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_HUE_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_SATURATION_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_COLOR_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_VALUE_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_DIVIDE_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_DODGE_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_BURN_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_HARDLIGHT_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_SOFTLIGHT_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_GRAIN_EXTRACT_MODE */ + { FALSE, FALSE, FALSE, }, /* GIMP_GRAIN_MERGE_MODE */ + { TRUE, FALSE, TRUE, }, /* GIMP_COLOR_ERASE_MODE */ + { TRUE, FALSE, TRUE, }, /* GIMP_ERASE_MODE */ + { TRUE, TRUE, TRUE, }, /* GIMP_REPLACE_MODE */ + { TRUE, TRUE, FALSE, }, /* GIMP_ANTI_ERASE_MODE */ + + { FALSE, FALSE, FALSE }, /* GIMP_SWAP */ + { FALSE, FALSE, FALSE }, /* GIMP_SCALE */ + { FALSE, FALSE, FALSE }, /* GIMP_CONVERT */ +}; + +void +gimp_composite_unsupported(GimpCompositeContext *ctx) +{ + printf("compositing function %d unsupported\n", ctx->op); +} + +struct { + char announce_function; +} gimp_composite_debug; + +#include "gimp-composite-dispatch.c" + +void +gimp_composite_dispatch(GimpCompositeContext *ctx) +{ + void (*function)(); + + function = gimp_composite_function[ctx->op][ctx->pixelformat_A][ctx->pixelformat_B][ctx->pixelformat_D]; + + if (function) + (*function)(ctx); + else { + printf("unsupported composite operation %d %d %d (see gimp-composite.h)\n", ctx->op, ctx->pixelformat_A, ctx->pixelformat_B); + } +} + +void +gimp_composite_context_print(GimpCompositeContext *ctx) +{ + printf("%p: %s op=%d A=%s(%d):%p B=%s(%d):%p D=%s(%d):%p M=%s(%d):%p n_pixels=%lu\n", + ctx, + gimp_composite_function_name[ctx->op][ctx->pixelformat_A][ctx->pixelformat_B][ctx->pixelformat_D], + ctx->op, + gimp_composite_pixel_name[ctx->pixelformat_A], ctx->pixelformat_A, ctx->A, + gimp_composite_pixel_name[ctx->pixelformat_B], ctx->pixelformat_B, ctx->A, + gimp_composite_pixel_name[ctx->pixelformat_D], ctx->pixelformat_D, ctx->A, + gimp_composite_pixel_name[ctx->pixelformat_M], ctx->pixelformat_M, ctx->A, + ctx->n_pixels); +} diff --git a/app/composite/gimp-composite.h b/app/composite/gimp-composite.h new file mode 100644 index 0000000000..66eeb3c276 --- /dev/null +++ b/app/composite/gimp-composite.h @@ -0,0 +1,182 @@ +/* The GIMP -- an image manipulation program + * Copyright (C) 1995 Spencer Kimball and Peter Mattis + * + * Gimp Image Compositing + * Copyright (C) 2003 Helvetix Victorinox, a pseudonym, <helvetix@gimp.org> + * $Id$ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef gimp_composite_h +#define gimp_composite_h + +#include <sys/types.h> +#include <glib-object.h> +#include "base/base-enums.h" +#include "paint-funcs/paint-funcs-types.h" + +#ifndef NULL +#define NULL ((void) 0) +#endif + +typedef enum { + GIMP_PIXELFORMAT_V8, + GIMP_PIXELFORMAT_VA8, + GIMP_PIXELFORMAT_RGB8, + GIMP_PIXELFORMAT_RGBA8, +#if GIMP_16BITCOLOR + GIMP_PIXELFORMAT_V16, + GIMP_PIXELFORMAT_VA16, + GIMP_PIXELFORMAT_RGB16, + GIMP_PIXELFORMAT_RGBA16, +#endif + GIMP_PIXELFORMAT_ANY, + GIMP_PIXELFORMAT_N +} GimpPixelFormat; + +typedef struct { + u_int8_t v; +} gimp_v8_t; + +typedef struct { + u_int8_t v; + u_int8_t a; +} gimp_va8_t; + +typedef struct { + u_int8_t r; + u_int8_t g; + u_int8_t b; +} gimp_rgb8_t; + +typedef struct { + u_int8_t r; + u_int8_t g; + u_int8_t b; + u_int8_t a; +} gimp_rgba8_t; + +#ifdef GIMP_16BITCOLOUR +typedef struct { + u_int16_t v; +} gimp_v16_t; + +typedef struct { + u_int16_t v; + u_int16_t a; +} gimp_va16_t; + +typedef struct { + u_int16_t r; + u_int16_t g; + u_int16_t b; +} gimp_rgb16_t; + +typedef struct { + u_int16_t r; + u_int16_t g; + u_int16_t b; + u_int16_t a; +} gimp_rgba16_t; +#endif + +extern unsigned char gimp_composite_pixel_bpp[]; /* bytes per-pixel for each of the pixel formats */ +extern unsigned char gimp_composite_pixel_alphap[]; /* does pixel format have alpha? */ +extern GimpPixelFormat gimp_composite_pixel_alpha[]; /* converter between alpha and non-alpha pixel formats */ + +#define GIMP_COMPOSITE_ALPHA_OPAQUE (-1) +#define GIMP_COMPOSITE_ALPHA_TRANSPARENT (0) +/* + * This is the enumeration of all the supported compositing + * operations. Many of them are taken from the GimpLayerModeEffect + * enumeration, but there are (possibly more) implemented. Here is + * where they are all enumerated. + * + * Nota Bene: Unfortunately, the order here is important! + */ +typedef enum { + GIMP_COMPOSITE_NORMAL = GIMP_NORMAL_MODE, + GIMP_COMPOSITE_DISSOLVE = GIMP_DISSOLVE_MODE, + GIMP_COMPOSITE_BEHIND = GIMP_BEHIND_MODE, + GIMP_COMPOSITE_MULTIPLY = GIMP_MULTIPLY_MODE, + GIMP_COMPOSITE_SCREEN = GIMP_SCREEN_MODE, + GIMP_COMPOSITE_OVERLAY = GIMP_OVERLAY_MODE, + GIMP_COMPOSITE_DIFFERENCE = GIMP_DIFFERENCE_MODE, + GIMP_COMPOSITE_ADDITION = GIMP_ADDITION_MODE, + GIMP_COMPOSITE_SUBTRACT = GIMP_SUBTRACT_MODE, + GIMP_COMPOSITE_DARKEN = GIMP_DARKEN_ONLY_MODE, + GIMP_COMPOSITE_LIGHTEN = GIMP_LIGHTEN_ONLY_MODE, + GIMP_COMPOSITE_HUE = GIMP_HUE_MODE, + GIMP_COMPOSITE_SATURATION = GIMP_SATURATION_MODE, + GIMP_COMPOSITE_COLOR_ONLY = GIMP_COLOR_MODE, + GIMP_COMPOSITE_VALUE = GIMP_VALUE_MODE, + GIMP_COMPOSITE_DIVIDE = GIMP_DIVIDE_MODE, + GIMP_COMPOSITE_DODGE = GIMP_DODGE_MODE, + GIMP_COMPOSITE_BURN = GIMP_BURN_MODE, + GIMP_COMPOSITE_HARDLIGHT = GIMP_HARDLIGHT_MODE, + GIMP_COMPOSITE_SOFTLIGHT = GIMP_SOFTLIGHT_MODE, + GIMP_COMPOSITE_GRAIN_EXTRACT = GIMP_GRAIN_EXTRACT_MODE, + GIMP_COMPOSITE_GRAIN_MERGE = GIMP_GRAIN_MERGE_MODE, + GIMP_COMPOSITE_COLOR_ERASE = GIMP_COLOR_ERASE_MODE, + GIMP_COMPOSITE_ERASE = GIMP_ERASE_MODE, + GIMP_COMPOSITE_REPLACE = GIMP_REPLACE_MODE, + GIMP_COMPOSITE_ANTI_ERASE = GIMP_ANTI_ERASE_MODE, + GIMP_COMPOSITE_BLEND, + GIMP_COMPOSITE_SHADE, + GIMP_COMPOSITE_SWAP, + GIMP_COMPOSITE_SCALE, + GIMP_COMPOSITE_CONVERT, + GIMP_COMPOSITE_N +} GimpCompositeOperation; + +struct GimpCompositeOperationEffects { + unsigned char affect_opacity; + unsigned char increase_opacity; + unsigned char decrease_opacity; +}; + +extern struct GimpCompositeOperationEffects gimp_composite_operation_effects[]; + +/* + * This is structure for communicating all that is necessary to a + * compositing operation. + */ +typedef struct { + unsigned char *A; /* Source A */ + unsigned char *B; /* Source B */ + unsigned char *D; /* Destination */ + unsigned char *M; /* Mask */ + unsigned long n_pixels; + + GimpPixelFormat pixelformat_A; + GimpPixelFormat pixelformat_B; + GimpPixelFormat pixelformat_D; + GimpPixelFormat pixelformat_M; + + struct { int opacity; char affect; } replace; + struct { int scale; } scale; + struct { int blend; } blend; + struct { int x; int y; int opacity; } dissolve; + + CombinationMode combine; + GimpCompositeOperation op; +} GimpCompositeContext; + + +extern void gimp_composite_dispatch(GimpCompositeContext *); +extern void gimp_composite_init(); +extern void gimp_composite_context_print(GimpCompositeContext *); +#endif diff --git a/app/composite/gimp-composite.html b/app/composite/gimp-composite.html new file mode 100644 index 0000000000..2a487a9a47 --- /dev/null +++ b/app/composite/gimp-composite.html @@ -0,0 +1,82 @@ + <h1>A GIMP Image Compositing Subsystem</h1> +<quote> + Update. The latest version of this code will be available in the + cvs version of The GIMP "real soon now." Instead of tracking this + software separately, you can simply wait for it to show up in CVS. +</quote> + <p> + On Februrary 26, 2003 I volunteered to help with the GIMP <abbr + title="Intel Multimedia Extensions">MMX</abbr> implemetation + that had been languishing and had recently started to cause + problems when building the current GIMP code. + </p> + <p> + <a href="gimp-composite.tgz">This</a> is release 0.0 of an extensible and customisable image + compositing interface for the GIMP. I'd like to hear feedback. + </p> + <p> + What you get is this: + </p> + <ul> + <li style="margin-bottom: 1em;"> + A general mechanism for incorporating compositing functions based + upon the compositing function and the pixel formats of the inputs and + the outputs of the function. + </li> + <li style="margin-bottom: 1em;"> + Generic implementations of the supported compositing functions as a + foundation for further/future improvements. You can see this code in + gimp-composite-generic.c which is a direct "port" of the existing GIMP + code which does the same. + </li> + <li style="margin-bottom: 1em;"> + The general mechanism allows any compositing function + implementation to be replaced by a different implementation that is, + for example, customised for the target CPU, or pixel formats, hardware + acceleration, and so forth. You can see this sort of code in + gimp-composite-mmx.c which contains implementations of several + compositing functions optimised with MMX assembly code. + </li> + </ul> + + <h2>Caveat</h2> + <p> + While I've been using this code, and working out the various problems + as they occur, this code is still immature and you may experience + problems. If you do, please tell me. + </p> + + <h2>Installation</h2> + <p> + To use this you : + </p> + <ul> + <li style="margin-bottom: 1em;"> + Untar the gimp-composite.tgz tarball, + <pre style="border: 1px solid purple; padding: 1ex;">% tar xzf gimp-composite.tgz</pre> + </li> + <li style="margin-bottom: 1em;"> + Edit <tt>gimp-composite/Makefile</tt> to set the values of three variables + to correspond to your local environment. For example, my values are: + <pre style="border: 1px solid purple; padding: 1ex;"> +GLIBINCLUDE=/home/helvetix/garnome/include/glib-2.0 +GLIBLIB=/home/helvetix/garnome/lib/glib-2.0/ +GIMP=/home/helvetix/Gnome/gimp</pre> + Note that the <tt>GIMP</tt> variable points to a clean cvs checkout of the + gimp-1.3 source code, <b>not</b> the path name of the gimp executable. + </li> + <li style="margin-bottom: 1em;"> + In the gimp-composite/ directory, execute "make install" + <pre style="border: 1px solid purple; padding: 1ex;">% cd gimp-composite ; make install</pre> + </li> + <li style="margin-bottom: 1em;"> + In the gimp-1.3 source directory, execute "autogen.sh && make" + <pre style="border: 1px solid purple; padding: 1ex;">% cd gimp ; ./autogen.sh && make</pre> + </li> + <li style="margin-bottom: 1em;"> + You can install the resultant gimp, or you can run it in place. + </li> + </ul> + <p> + Enjoy! Comments, feedback, complaints to me: HELVETIX Mysterious.ORG + </p> diff --git a/app/composite/make-gimp-composite-dispatch.py b/app/composite/make-gimp-composite-dispatch.py new file mode 100755 index 0000000000..79c33850bb --- /dev/null +++ b/app/composite/make-gimp-composite-dispatch.py @@ -0,0 +1,460 @@ +#!/usr/bin/env python +# -*- mode: python py-indent-offset: 2; -*- +# +# Gimp image compositing +# Copyright (C) 2003 Helvetix Victorinox, <helvetix@gimp.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +import sys +import string +import os +import ns +import pprint +import getopt +import copy + +# +# This programme creates C code for gluing a collection of compositing +# functions into an array indexed by compositing function, and the +# pixel formats of its arguments. +# +# I make some assuptions about the names of the compositing functions. +# +# I look into the namespace of a set of object files and figure out +# from them what compositing functions are implemented. This let's me +# build a table with the right cells populated with either the special +# compositing functions, or to use a generically implemented +# compositing function. + + +# These are in the same order as they appear in the +# ./app/base/base-enums.h GimpLayerModeEffects enumeration, because we +# (probably unwisely) use the value of the enumeration as an index +# into the Big Table. +# +# XXX I'd like some python functions that let me rummage around in C code.... +# +composite_modes=[ + "GIMP_COMPOSITE_NORMAL", + "GIMP_COMPOSITE_DISSOLVE", + "GIMP_COMPOSITE_BEHIND", + "GIMP_COMPOSITE_MULTIPLY", + "GIMP_COMPOSITE_SCREEN", + "GIMP_COMPOSITE_OVERLAY", + "GIMP_COMPOSITE_DIFFERENCE", + "GIMP_COMPOSITE_ADDITION", + "GIMP_COMPOSITE_SUBTRACT", + "GIMP_COMPOSITE_DARKEN", + "GIMP_COMPOSITE_LIGHTEN", + "GIMP_COMPOSITE_HUE", + "GIMP_COMPOSITE_SATURATION", + "GIMP_COMPOSITE_COLOR_ONLY", + "GIMP_COMPOSITE_VALUE", + "GIMP_COMPOSITE_DIVIDE", + "GIMP_COMPOSITE_DODGE", + "GIMP_COMPOSITE_BURN", + "GIMP_COMPOSITE_HARDLIGHT", + "GIMP_COMPOSITE_SOFTLIGHT", + "GIMP_COMPOSITE_GRAIN_EXTRACT", + "GIMP_COMPOSITE_GRAIN_MERGE", + "GIMP_COMPOSITE_COLOR_ERASE", + "GIMP_COMPOSITE_ERASE" , + "GIMP_COMPOSITE_REPLACE" , + "GIMP_COMPOSITE_ANTI_ERASE", + "GIMP_COMPOSITE_BLEND", + "GIMP_COMPOSITE_SHADE", + "GIMP_COMPOSITE_SWAP", + "GIMP_COMPOSITE_SCALE", + "GIMP_COMPOSITE_CONVERT", + ] + +pixel_format=[ + "GIMP_PIXELFORMAT_V8", + "GIMP_PIXELFORMAT_VA8", + "GIMP_PIXELFORMAT_RGB8", + "GIMP_PIXELFORMAT_RGBA8", +# "GIMP_PIXELFORMAT_V16", +# "GIMP_PIXELFORMAT_VA16", +# "GIMP_PIXELFORMAT_RGB16", +# "GIMP_PIXELFORMAT_RGBA16" + "GIMP_PIXELFORMAT_ANY", + ] + + +def pixel_depth_name(pixel_format): + s = string.replace(pixel_format.lower(), "gimp_pixelformat_", "") + return (s) + +pp = pprint.PrettyPrinter(indent=4) + + +def functionnameify(filename): + f = os.path.basename(filename) + f = string.replace(f, ".o", "") + f = string.replace(f, ".c", "") + f = string.replace(f, ".h", "") + f = string.replace(f, "-", "_") + return (f) + +def print_function_table(filename, function_table): + + function_table_declarations = dict() + + function_table_keys = function_table.keys() + function_table_keys.sort() + + for key in function_table_keys: + if not function_table_declarations.has_key(function_table[key][0]): + print "void %s(GimpCompositeContext *);" % (function_table[key][0]) + function_table_declarations[function_table[key][0]] = function_table[key][0] + pass + pass + + print "" + print "void (*%s[%d][%d][%d][%d])() = {" % (functionnameify(filename), + len(composite_modes), + len(pixel_format)-1, + len(pixel_format)-1, + len(pixel_format)-1) + for mode in composite_modes: + print " { /* %s */" % (mode) + for A in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + print " { /* A = %s */" % (pixel_depth_name(A)) + for B in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + print " /* %-6s */ {" % (pixel_depth_name(B)), + for D in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + key = "%s_%s_%s_%s" % (string.lower(mode), pixel_depth_name(A), pixel_depth_name(B), pixel_depth_name(D)) + if function_table.has_key(key): + print "%s, " % (function_table[key][0]), + else: + print "%s, " % ("NULL"), + pass + pass + print "}," + pass + print " }," + pass + print " }," + pass + + print "};\n" + + return + +def print_function_table_name(filename, function_table): + + print "" + print "char *%s_name[%d][%d][%d][%d] = {" % (functionnameify(filename), + len(composite_modes), + len(pixel_format)-1, + len(pixel_format)-1, + len(pixel_format)-1) + for mode in composite_modes: + print " { /* %s */" % (mode) + for A in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + print " { /* A = %s */" % (pixel_depth_name(A)) + for B in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + print " /* %-6s */ {" % (pixel_depth_name(B)), + for D in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + key = "%s_%s_%s_%s" % (string.lower(mode), pixel_depth_name(A), pixel_depth_name(B), pixel_depth_name(D)) + if function_table.has_key(key): + print '"%s", ' % (function_table[key][0]), + else: + print '"%s", ' % (""), + pass + pass + print "}," + pass + print " }," + pass + print " }," + pass + + print "};\n" + + return + +def load_function_table(filename): + nmx = ns.nmx(filename) + + gimp_composite_function = dict() + + for mode in composite_modes: + for A in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + for B in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + for D in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + key = "%s_%s_%s_%s" % (string.lower(mode), pixel_depth_name(A), pixel_depth_name(B), pixel_depth_name(D)) + + for a in ["GIMP_PIXELFORMAT_ANY", A]: + for b in ["GIMP_PIXELFORMAT_ANY", B]: + for d in ["GIMP_PIXELFORMAT_ANY", D]: + key = "%s_%s_%s_%s" % (string.lower(mode), pixel_depth_name(a), pixel_depth_name(b), pixel_depth_name(d)) + + f = nmx.exports_re(key + ".*") + if f != None: gimp_composite_function["%s_%s_%s_%s" % (string.lower(mode), pixel_depth_name(A), pixel_depth_name(B), pixel_depth_name(D))] = [f] + pass + pass + pass + pass + pass + pass + pass + + return (gimp_composite_function) + + +def merge_function_tables(tables): + main_table = copy.deepcopy(tables[0][1]) + + for t in tables[1:]: + print >>sys.stderr, t[0] + for mode in composite_modes: + for A in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + for B in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + for D in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + key = "%s_%s_%s_%s" % (string.lower(mode), pixel_depth_name(A), pixel_depth_name(B), pixel_depth_name(D)) + if t[1].has_key(key): + print >>sys.stderr, "%s = %s::%s" % (key, t[0], t[1][key]) + main_table[key] = t[1][key] + pass + pass + pass + pass + pass + pass + + return (main_table) + + +def print_test_code(tables): + return + + +def main(argv): + + objects = map(ns.nmx, argv) + + objs = objects + objs.reverse() + + gimp_composite_function = dict() + for o in objs: + for mode in composite_modes: + for A in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + for B in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + for D in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + key = "%s_%s_%s_%s" % (string.lower(mode), pixel_depth_name(A), pixel_depth_name(B), pixel_depth_name(D)) + + for a in [A, "GIMP_PIXELFORMAT_ANY"]: + for b in [B, "GIMP_PIXELFORMAT_ANY"]: + for d in [D, "GIMP_PIXELFORMAT_ANY"]: + composite_function = "%s_%s_%s_%s" % (string.lower(mode), pixel_depth_name(a), pixel_depth_name(b), pixel_depth_name(d)) + + f = o.exports_re(composite_function + ".*") + if f != None: + gimp_composite_function.update({key : [f, mode, A, B, D]}) + break + pass + if gimp_composite_function.has_key(key): + break; + pass + if gimp_composite_function.has_key(key): + break; + pass + + if not gimp_composite_function.has_key(key): + gimp_composite_function.update({key : ["gimp_composite_unsupported", mode, A, B, D]}) + pass + + pass + pass + pass + pass + pass + + + print "/* THIS FILE IS AUTOMATICALLY GENERATED. DO NOT EDIT */" + print "$Id$" + print '#include "gimp-composite.h"' + print "extern void %s(GimpCompositeContext *);" % ("gimp_composite_unsupported") + done = dict() + for k in gimp_composite_function.keys(): + f = gimp_composite_function[k] + if not done.has_key(f[0]): + print "extern void %s(GimpCompositeContext *);" % (f[0]) + done.update({f[0] : None}) + pass + pass + + if 1: + print "char *gimp_composite_function_name[%d][%d][%d][%d] = {" % (len(composite_modes), len(pixel_format)-1, len(pixel_format)-1, len(pixel_format)-1) + for mode in composite_modes: + print " {" + for A in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + print " {" + for B in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + print " {", + for D in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + key = "%s_%s_%s_%s" % (string.lower(mode), pixel_depth_name(A), pixel_depth_name(B), pixel_depth_name(D)) + if gimp_composite_function.has_key(key): + print '"%s", ' % (gimp_composite_function[key][0]), + else: + print '"%s", ' % ("gimp_composite_unsupported"), + pass + pass + print "}," + pass + print " }," + + pass + print " }," + pass + + print "};" + pass + + + print "" + print "void (*gimp_composite_function[%d][%d][%d][%d])() = {" % (len(composite_modes), len(pixel_format)-1, len(pixel_format)-1, len(pixel_format)-1) + for mode in composite_modes: + print " { /* %s */" % (mode) + for A in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + print " { /* A = %s */" % (pixel_depth_name(A)) + for B in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + print " /* %s */ {" % (pixel_depth_name(B)), + for D in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + key = "%s_%s_%s_%s" % (string.lower(mode), pixel_depth_name(A), pixel_depth_name(B), pixel_depth_name(D)) + if gimp_composite_function.has_key(key): + print "%s, " % (gimp_composite_function[key][0]), + else: + print "%s, " % ("gimp_composite_unsupported"), + pass + pass + print "}," + pass + print " }," + + pass + print " }," + pass + + print "};" + + + print """ +static int gimp_composite_initialised = 0; + +void +gimp_composite_init() +{ + if (!gimp_composite_initialised) { +""" + for o in objects: + print " %s_init();" % (functionnameify(o.filename)) + pass + + print " gimp_composite_initialised = 1;" + print " }" + print "}" + pass + +def gimp_composite_regression(function_tables): + + print """ +void +gimp_composite_regression() +{ + GimpCompositeContext generic_ctx; + GimpCompositeContext special_ctx; +""" + + generic_table = function_tables[0][1] + + for mode in composite_modes: + for A in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + for B in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + for D in filter(lambda pf: pf != "GIMP_PIXELFORMAT_ANY", pixel_format): + for f in function_tables[1:]: + key = "%s_%s_%s_%s" % (string.lower(mode), pixel_depth_name(A), pixel_depth_name(B), pixel_depth_name(D)) + if f[1].has_key(key): + print "" + print " special_ctx.op = %s;" % (mode) + print " generic_ctx.op = %s;" % (mode) + print " %s(&special_ctx);" % (f[1][key][0]) + print " %s(&generic_ctx);" % (generic_table[key][0]) + print ' if (gimp_composite_regression_compare(&generic_ctx, &special_ctx)) {' + print ' printf("%s disagrees with %s\\n");' % (f[1][key][0], generic_table[key][0]) + print ' }' + pass + pass + pass + pass + pass + pass + + + print """ +} +""" + +def gimp_composite_init(function_tables): + for o in function_tables: + print "extern void %s_init();" % (functionnameify(o[0])) + pass + + print "" + + print """ +static int gimp_composite_initialised = 0; + +void +gimp_composite_init() +{ + if (!gimp_composite_initialised) { +""" + for o in function_tables: + print " %s_init();" % (functionnameify(o[0])) + pass + + print " gimp_composite_initialised = 1;" + print " }" + print "}" + pass + + +print "/* THIS FILE IS AUTOMATICALLY GENERATED. DO NOT EDIT */" +print "/* $Id$ */" +print '#include "gimp-composite.h"' +print "extern void %s(GimpCompositeContext *);" % ("gimp_composite_unsupported") +print "" + +d = list() +for f in sys.argv[1:]: + dd = load_function_table(f) + d.append((f, dd)) + print_function_table(f, dd) + pass + +main_table = merge_function_tables(d) + +print_function_table("gimp_composite_function", main_table) +print_function_table_name("gimp_composite_function", main_table) + +gimp_composite_init(d) + +#gimp_composite_regression(d) + +sys.exit(0) diff --git a/app/composite/ns.py b/app/composite/ns.py new file mode 100755 index 0000000000..502e75c566 --- /dev/null +++ b/app/composite/ns.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python +# Copyright (C) 2003 Helvetix Victorinox, a pseudonym, <helvetix@gimp.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +# -*- mode: python py-indent-offset: 2; -*- +# +# Look at object files and figure things about the namespaces they +# require and provide. +# +# It is very useful when working on libraries where you really should +# be hygenic about the namespace you occupy and not clutter it with +# conflicting and extraneous names. +# + +import os +import re +import sys +import string +import pprint + +pp = pprint.PrettyPrinter(indent=2) + +# +# for each object file, we keep two lists: exported names and imported names. +# +# nm -A [files...] +# +class nmx: + def __init__(self, objfile=None): + self.objects = dict() + self.filename = None + + if objfile != None: + self.update(objfile) + pass + + return (None) + + def update(self, objfile): + self.filename = objfile + + fp = os.popen("nm -A " + objfile, "r") + + for line in fp.readlines(): + (object, type, symbol) = string.split(line) + object = object[:string.rfind(object, ':')] + + if not self.objects.has_key(object): + self.objects.update({ object : dict({"exports" : dict(), "imports" : dict()})}) + pass + + if type == "U": + self.objects[object]["imports"].update({symbol : dict()}) + elif type in ["C", "D", "T"]: + self.objects[object]["exports"].update({symbol : dict()}) + pass + pass + + fp.close() + return (None) + + def exports(self, name): + for o in self.objects.keys(): + if self.objects[o]["exports"].has_key(name): + return (1) + pass + return (0) + + def exports_re(self, name): + regex = re.compile(name) + + for o in self.objects.keys(): + for p in self.objects[o]["exports"].keys(): + if regex.match(p): + return (p) + pass + pass + return (None) + + pass + + +def nm(nmfile): + objects = dict() + + fp = open(nmfile, "r") + for line in fp.readlines(): + (object, type, symbol) = string.split(line) + object = object[:string.rfind(object, ':')] + + if not objects.has_key(object): + objects.update({ object : dict({"exports" : dict(), "imports" : dict()})}) + pass + + if type == "U": + objects[object]["imports"].update({symbol : dict()}) + elif type in ["C", "D", "T"]: + objects[object]["exports"].update({symbol : dict()}) + pass + + fp.close() + return (objects) + +def resolve_(objects, obj): + + for object in objects.keys(): + if object != obj: + for imported in objects[obj]["imports"].keys(): + if objects[object]["exports"].has_key(imported): + objects[obj]["imports"][imported] = object + pass + pass + + for exported in objects[obj]["exports"].keys(): + if objects[object]["imports"].has_key(exported): + objects[obj]["exports"][exported] = object + pass + pass + pass + pass + + return + +def resolve(objects): + + for object in objects.keys(): + resolve_(objects, object) + + return (objects) + +def report_unreferenced(objects): + for object in objects.keys(): + for symbol in objects[object]["exports"].keys(): + if len(objects[object]["exports"][symbol]) == 0: + print object + ":" + symbol, "unreferenced" + pass + pass + pass + return + +def report_referenced(objects): + for object in objects.keys(): + for symbol in objects[object]["imports"].keys(): + if len(objects[object]["imports"][symbol]) > 0: + print objects[object]["imports"][symbol] + ":" + symbol, object, "referenced" + pass + pass + pass + return + +def make_depend(objects): + for object in objects.keys(): + for symbol in objects[object]["imports"].keys(): + if len(objects[object]["imports"][symbol]) > 0: + print object + ":" + symbol, "referenced", objects[object]["imports"][symbol] + pass + pass + pass + return + + +def main(argv): + ns = nm(argv[0]) + + resolve(ns) + + report_referenced(ns) + report_unreferenced(ns) + pass + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/app/composite/tester.c b/app/composite/tester.c new file mode 100644 index 0000000000..7a643b887d --- /dev/null +++ b/app/composite/tester.c @@ -0,0 +1,466 @@ +#include <stdio.h> +#include <sys/types.h> +#include <sys/time.h> + +#include "gimp-composite.h" +#include "gimp-composite-util.h" + +#undef use_oldmmx + +extern void xxx_3a(rgba8_t *, rgba8_t *, rgba8_t *, u_long); + +main(int argc, char *argv[]) +{ + double f; + GimpCompositeContext ctx; + GimpCompositeContext ctx_generic; + GimpCompositeContext ctx_va8; + GimpCompositeContext ctx_va8_generic; + int iterations; + rgba8_t *d1; + rgba8_t *d2; + rgba8_t *rgba8A; + rgba8_t *rgba8B; + va8_t *va8A; + va8_t *va8B; + va8_t *va8_d1; + va8_t *va8_d2; + struct timeval t0, t1, new_elapsed, old_elapsed; + unsigned long i; + unsigned long n_pixels; + + iterations = atoi(argv[1]); + n_pixels = atol(argv[2]); + + rgba8A = (rgba8_t *) calloc(sizeof(rgba8_t), n_pixels+1); + rgba8B = (rgba8_t *) calloc(sizeof(rgba8_t), n_pixels+1); + va8A = (va8_t *) calloc(sizeof(va8_t), n_pixels+1); + va8B = (va8_t *) calloc(sizeof(va8_t), n_pixels+1); + d1 = (rgba8_t *) calloc(sizeof(rgba8_t), n_pixels+1); + d2 = (rgba8_t *) calloc(sizeof(rgba8_t), n_pixels+1); + va8_d1 = (va8_t *) calloc(sizeof(va8_t), n_pixels+1); + va8_d2 = (va8_t *) calloc(sizeof(va8_t), n_pixels+1); + + srand(314159); + + for (i = 0; i < n_pixels; i++) { +#if 0 + rgba8A[i].r = rand() % 256; + rgba8A[i].g = rand() % 256; + rgba8A[i].b = rand() % 256; + rgba8A[i].a = rand() % 256; + + rgba8B[i].r = rand() % 256; + rgba8B[i].g = rand() % 256; + rgba8B[i].b = rand() % 256; + rgba8B[i].a = rand() % 256; +#else + rgba8A[i].r = 255-i; + rgba8A[i].g = 255-i; + rgba8A[i].b = 255-i; + rgba8A[i].a = 255-i; + + rgba8B[i].r = i; + rgba8B[i].g = i; + rgba8B[i].b = i; + rgba8B[i].a = i; + + va8A[i].v = i; + va8A[i].a = 255-i; + va8B[i].v = i; + va8B[i].a = i; +#endif + } + + gimp_composite_init(); + +#define do_add +#define do_darken +#define do_difference +#define do_lighten +#define do_multiply +#define do_subtract +#define do_screen +#define do_grainextract +#define do_grainmerge +#define do_divide +#define do_dodge +#define do_swap +#define do_scale +#define do_burn + + ctx.A = (unsigned char *) rgba8A; + ctx.pixelformat_A = GIMP_PIXELFORMAT_RGBA8; + ctx.B = (unsigned char *) rgba8B; + ctx.pixelformat_B = GIMP_PIXELFORMAT_RGBA8; + ctx.D = (unsigned char *) d2; + ctx.pixelformat_D = GIMP_PIXELFORMAT_RGBA8; + ctx.M = NULL; + ctx.pixelformat_M = GIMP_PIXELFORMAT_ANY; + ctx.n_pixels = n_pixels; + ctx.scale.scale = 2; + + ctx_generic.A = (unsigned char *) rgba8A; + ctx_generic.pixelformat_A = GIMP_PIXELFORMAT_RGBA8; + ctx_generic.B = (unsigned char *) rgba8B; + ctx_generic.pixelformat_B = GIMP_PIXELFORMAT_RGBA8; + ctx_generic.D = (unsigned char *) d1; + ctx_generic.pixelformat_D = GIMP_PIXELFORMAT_RGBA8; + ctx_generic.M = NULL; + ctx_generic.pixelformat_M = GIMP_PIXELFORMAT_ANY; + ctx_generic.n_pixels = n_pixels; + ctx_generic.scale.scale = 2; + + + ctx_va8.A = (unsigned char *) va8A; + ctx_va8.pixelformat_A = GIMP_PIXELFORMAT_VA8; + ctx_va8.B = (unsigned char *) va8B; + ctx_va8.pixelformat_B = GIMP_PIXELFORMAT_VA8; + ctx_va8.D = (unsigned char *) va8_d2; + ctx_va8.pixelformat_D = GIMP_PIXELFORMAT_VA8; + ctx_va8.M = NULL; + ctx_va8.pixelformat_M = GIMP_PIXELFORMAT_ANY; + ctx_va8.n_pixels = n_pixels; + ctx_va8.scale.scale = 2; + + ctx_va8_generic.A = (unsigned char *) va8A; + ctx_va8_generic.pixelformat_A = GIMP_PIXELFORMAT_VA8; + ctx_va8_generic.B = (unsigned char *) va8B; + ctx_va8_generic.pixelformat_B = GIMP_PIXELFORMAT_VA8; + ctx_va8_generic.D = (unsigned char *) va8_d1; + ctx_va8_generic.pixelformat_D = GIMP_PIXELFORMAT_VA8; + ctx_va8_generic.M = NULL; + ctx_va8_generic.pixelformat_M = GIMP_PIXELFORMAT_ANY; + ctx_va8_generic.n_pixels = n_pixels; + ctx_va8_generic.scale.scale = 2; + + +#define timer_fsecs(tv) ((double) ((tv).tv_sec) + (double) ((tv).tv_usec / 1000000.0)) +#define timer_report(name,t1,t2) printf("%15s %15.10f %15.10f %15.10f\n", name, timer_fsecs(t1), timer_fsecs(t2), timer_fsecs(t1)/timer_fsecs(t2)); + +#ifdef do_burn + /* burn */ + gettimeofday(&t0, NULL); + ctx.op = GIMP_COMPOSITE_BURN; + for (i = 0; i < iterations; i++) { gimp_composite_dispatch(&ctx); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &new_elapsed); + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_burn_any_any_any_generic(&ctx_generic); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &old_elapsed); + comp_rgba8("burn rgba8", ctx.A, ctx.B, ctx_generic.D, ctx.D, ctx.n_pixels); + timer_report("burn rgba8", old_elapsed, new_elapsed); + + gettimeofday(&t0, NULL); + ctx_va8.op = GIMP_COMPOSITE_BURN; + ctx_va8_generic.op = GIMP_COMPOSITE_BURN; + for (i = 0; i < iterations; i++) { gimp_composite_dispatch(&ctx_va8); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &new_elapsed); + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_burn_any_any_any_generic(&ctx_va8_generic); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &old_elapsed); + comp_va8("burn rgba8", ctx_va8.A, ctx_va8.B, ctx_va8_generic.D, ctx_va8.D, ctx_va8.n_pixels); + timer_report("burn va8", old_elapsed, new_elapsed); +#endif + +#ifdef do_dodge + /* dodge */ + gettimeofday(&t0, NULL); + ctx.op = GIMP_COMPOSITE_DODGE; + for (i = 0; i < iterations; i++) { gimp_composite_dispatch(&ctx); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &new_elapsed); + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_dodge_any_any_any_generic(&ctx_generic); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &old_elapsed); + comp_rgba8("dodge", ctx.A, ctx.B, ctx_generic.D, ctx.D, ctx.n_pixels); + timer_report("dodge", old_elapsed, new_elapsed); +#endif + +#ifdef do_divide + /* divide */ + ctx.op = GIMP_COMPOSITE_DIVIDE; + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_dispatch(&ctx); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &new_elapsed); + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_divide_any_any_any_generic(&ctx_generic); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &old_elapsed); + comp_rgba8("divide", ctx.A, ctx.B, ctx_generic.D, ctx.D, ctx.n_pixels); + timer_report("divide", old_elapsed, new_elapsed); +#endif + +#ifdef do_grainextract + /* grainextract */ + ctx.op = GIMP_COMPOSITE_GRAIN_EXTRACT; + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_dispatch(&ctx); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &new_elapsed); + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_grain_extract_any_any_any_generic(&ctx_generic); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &old_elapsed); + comp_rgba8("grain extract", ctx.A, ctx.B, ctx_generic.D, ctx.D, ctx.n_pixels); + timer_report("grainextract", old_elapsed, new_elapsed); +#endif + +#ifdef do_grainmerge + ctx.op = GIMP_COMPOSITE_GRAIN_MERGE; + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_dispatch(&ctx); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &new_elapsed); + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_grain_merge_any_any_any_generic(&ctx_generic); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &old_elapsed); + comp_rgba8("grain merge", ctx.A, ctx.B, ctx_generic.D, ctx.D, ctx.n_pixels); + timer_report("grainmerge", old_elapsed, new_elapsed); +#endif + +#ifdef do_scale + gettimeofday(&t0, NULL); + ctx.op = GIMP_COMPOSITE_SCALE; + for (i = 0; i < iterations; i++) { gimp_composite_dispatch(&ctx); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &new_elapsed); + + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_scale_any_any_any_generic(&ctx_generic); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &old_elapsed); + comp_rgba8("scale", ctx.A, NULL, ctx_generic.D, ctx.D, ctx.n_pixels); + timer_report("scale", old_elapsed, new_elapsed); +#endif + +#ifdef do_screen + gettimeofday(&t0, NULL); + ctx.op = GIMP_COMPOSITE_SCREEN; + for (i = 0; i < iterations; i++) { gimp_composite_dispatch(&ctx); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &new_elapsed); + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_screen_any_any_any_generic(&ctx_generic); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &old_elapsed); + comp_rgba8("screen", ctx.A, ctx.B, ctx_generic.D, ctx.D, ctx.n_pixels); + timer_report("screen", old_elapsed, new_elapsed); +#endif + +#ifdef do_lighten + gettimeofday(&t0, NULL); + ctx.op = GIMP_COMPOSITE_LIGHTEN; + for (i = 0; i < iterations; i++) { gimp_composite_dispatch(&ctx); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &new_elapsed); + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_lighten_any_any_any_generic(&ctx_generic); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &old_elapsed); + comp_rgba8("lighten", ctx.A, ctx.B, ctx_generic.D, ctx.D, ctx.n_pixels); + timer_report("lighten", old_elapsed, new_elapsed); +#endif + +#ifdef do_darken + /* darken */ + gettimeofday(&t0, NULL); + ctx.op = GIMP_COMPOSITE_DARKEN; + for (i = 0; i < iterations; i++) { gimp_composite_dispatch(&ctx); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &new_elapsed); + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_darken_any_any_any_generic(&ctx_generic); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &old_elapsed); + comp_rgba8("darken", ctx.A, ctx.B, ctx_generic.D, ctx.D, ctx.n_pixels); + timer_report("darken", old_elapsed, new_elapsed); +#endif + +#ifdef do_difference + gettimeofday(&t0, NULL); + ctx.op = GIMP_COMPOSITE_DIFFERENCE; + for (i = 0; i < iterations; i++) { gimp_composite_dispatch(&ctx); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &new_elapsed); + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_difference_any_any_any_generic(&ctx_generic); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &old_elapsed); + comp_rgba8("difference", ctx.A, ctx.B, ctx_generic.D, ctx.D, ctx.n_pixels); + timer_report("difference", old_elapsed, new_elapsed); +#endif + +#ifdef do_multiply + gettimeofday(&t0, NULL); + ctx.op = GIMP_COMPOSITE_MULTIPLY; + for (i = 0; i < iterations; i++) { gimp_composite_dispatch(&ctx); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &new_elapsed); + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_multiply_any_any_any_generic(&ctx_generic); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &old_elapsed); + comp_rgba8("multiply", ctx.A, ctx.B, ctx_generic.D, ctx.D, ctx.n_pixels); + timer_report("multiply", old_elapsed, new_elapsed); +#endif + +#ifdef do_subtract + gettimeofday(&t0, NULL); + ctx.op = GIMP_COMPOSITE_SUBTRACT; + for (i = 0; i < iterations; i++) { gimp_composite_dispatch(&ctx); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &new_elapsed); + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_subtract_any_any_any_generic(&ctx_generic); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &old_elapsed); + comp_rgba8("subtract", ctx.A, ctx.B, ctx_generic.D, ctx.D, ctx.n_pixels); + timer_report("subtract", old_elapsed, new_elapsed); +#endif + +#ifdef do_add + gettimeofday(&t0, NULL); + ctx.op = GIMP_COMPOSITE_ADDITION; + for (i = 0; i < iterations; i++) { gimp_composite_dispatch(&ctx); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &new_elapsed); + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_addition_any_any_any_generic(&ctx_generic); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &old_elapsed); + comp_rgba8("addition", ctx.A, ctx.B, ctx_generic.D, ctx.D, ctx.n_pixels); + timer_report("add", old_elapsed, new_elapsed); +#endif + +#ifdef do_swap + gettimeofday(&t0, NULL); + ctx.op = GIMP_COMPOSITE_SWAP; + for (i = 0; i < iterations; i++) { gimp_composite_dispatch(&ctx); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &new_elapsed); + gettimeofday(&t0, NULL); + for (i = 0; i < iterations; i++) { gimp_composite_swap_any_any_any_generic(&ctx_generic); } + gettimeofday(&t1, NULL); + timersub(&t1, &t0, &old_elapsed); + comp_rgba8("swap", ctx.A, ctx.B, ctx_generic.A, ctx.A, ctx.n_pixels); + comp_rgba8("swap", ctx.A, ctx.B, ctx_generic.B, ctx.B, ctx.n_pixels); + timer_report("swap", old_elapsed, new_elapsed); +#endif + + return (0); +} + +print_rgba8(rgba8_t *p) +{ + printf("#%02x%02x%02x,%02X", p->r, p->g, p->b, p->a); + fflush(stdout); +} + +print_va8(va8_t *va8) +{ + printf("#%02x,%02X", va8->v, va8->a); + fflush(stdout); +} + +comp_rgba8(char *str, rgba8_t *rgba8A, rgba8_t *rgba8B, rgba8_t *expected, rgba8_t *got, u_long length) +{ + int i; + int failed; + int fail_count; + + fail_count = 0; + + for (i = 0; i < length; i++) { + failed = 0; + + if (expected[i].r != got[i].r) { failed = 1; } + if (expected[i].g != got[i].g) { failed = 1; } + if (expected[i].b != got[i].b) { failed = 1; } + if (expected[i].a != got[i].a) { failed = 1; } + if (failed) { + fail_count++; + printf("%s %8d A=", str, i); print_rgba8(&rgba8A[i]); + if (rgba8B != (rgba8_t *) 0) { + printf(" B="); print_rgba8(&rgba8B[i]); + } + printf(" "); + printf("exp="); + print_rgba8(&expected[i]); + printf(" got="); + print_rgba8(&got[i]); + printf("\n"); + } + if (fail_count > 5) + break; + } + + return (fail_count); +} + +comp_va8(char *str, va8_t *va8A, va8_t *va8B, va8_t *expected, va8_t *got, u_long length) +{ + int i; + int failed; + int fail_count; + + fail_count = 0; + + for (i = 0; i < length; i++) { + failed = 0; + + if (expected[i].v != got[i].v) { failed = 1; } + if (expected[i].a != got[i].a) { failed = 1; } + if (failed) { + fail_count++; + printf("%s %8d A=", str, i); print_va8(&va8A[i]); + if (va8B != (va8_t *) 0) { printf(" B="); print_va8(&va8B[i]); } + printf(" "); + printf("exp="); + print_va8(&expected[i]); + printf(" got="); + print_va8(&got[i]); + printf("\n"); + } + if (fail_count > 5) + break; + } + + return (fail_count); +} + + +dump_rgba8(char *str, rgba8_t *rgba, u_long length) +{ + int i; + + printf("%s\n", str); + + for (i = 0; i < length; i++) { + printf("%5d: ", i); + print_rgba8(&rgba[i]); + printf("\n"); + } +} + +void +xxx_3a(rgba8_t *a, rgba8_t *b, rgba8_t *c, u_long length) +{ + int i; + + for (i = 0; i < length; i++) { + printf("%5d: ", i); + print_rgba8(&a[i]); + printf(" "); + print_rgba8(&b[i]); + printf(" "); + print_rgba8(&c[i]); + printf("\n"); + } +}