mirror of https://github.com/GNOME/gimp.git
964 lines
23 KiB
C
964 lines
23 KiB
C
/* The GIMP -- an image manipulation program
|
|
* Copyright (C) 1995 Spencer Kimball and Peter Mattis
|
|
* Copyright (C) 2005 Frederic Leroy <fredo@starox.org>
|
|
*
|
|
* -*- mode: c tab-width: 2; -*-
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
*/
|
|
|
|
#include "config.h"
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <glib-object.h>
|
|
|
|
#include "base/base-types.h"
|
|
|
|
#include "gimp-composite.h"
|
|
#include "gimp-composite-altivec.h"
|
|
|
|
#ifdef COMPILE_ALTIVEC_IS_OKAY
|
|
|
|
#ifdef HAVE_ALTIVEC_H
|
|
#include <altivec.h>
|
|
#endif
|
|
|
|
/* Paper over differences between official gcc and Apple's weird gcc */
|
|
#ifdef HAVE_ALTIVEC_H
|
|
#define INIT_VECTOR(v...) {v}
|
|
#define CONST_BUFFER(b) (b)
|
|
#else
|
|
#define INIT_VECTOR(v...) (v)
|
|
#define CONST_BUFFER(b) ((guchar *)(b))
|
|
#endif
|
|
|
|
static const vector unsigned char alphamask = (const vector unsigned char)
|
|
INIT_VECTOR(0,0,0,0xff,0,0,0,0xff,0,0,0,0xff,0,0,0,0xff);
|
|
static const vector unsigned char combine_high_bytes = (const vector unsigned char)
|
|
INIT_VECTOR(0,16,2,18,4,20,6,22,8,24,10,26,12,28,14,30);
|
|
static const vector unsigned short ox0080 = (const vector unsigned short)
|
|
INIT_VECTOR(0x80,0x80,0x80,0x80,0x80,0x80,0x80,0x80);
|
|
static const vector unsigned short ox0008 = (const vector unsigned short)
|
|
INIT_VECTOR(0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8);
|
|
static const vector signed short ox00ff = (const vector signed short)
|
|
INIT_VECTOR(0x00ff,0x00ff,0x00ff,0x00ff,0x00ff,0x00ff,0x00ff,0x00ff);
|
|
static const vector signed short oxff80 = (const vector signed short)
|
|
INIT_VECTOR(0xff80,0xff80,0xff80,0xff80,0xff80,0xff80,0xff80,0xff80);
|
|
|
|
/* Load a vector from an unaligned location in memory */
|
|
static inline vector unsigned char
|
|
LoadUnaligned(const guchar *v)
|
|
{
|
|
if ((long)v & 0x0f)
|
|
{
|
|
vector unsigned char permuteVector = vec_lvsl(0, v);
|
|
vector unsigned char low = vec_ld(0, v);
|
|
vector unsigned char high = vec_ld(16, v);
|
|
return vec_perm(low, high, permuteVector);
|
|
}
|
|
else
|
|
return vec_ld(0, v); /* don't want overflow */
|
|
}
|
|
|
|
/* Load less than a vector from an unaligned location in memory */
|
|
static inline vector unsigned char
|
|
LoadUnalignedLess(const guchar *v,
|
|
int n)
|
|
{
|
|
vector unsigned char permuteVector = vec_lvsl(0, v);
|
|
if (((long)v&0x0f)+n > 15)
|
|
{
|
|
vector unsigned char low = vec_ld(0, v);
|
|
vector unsigned char high = vec_ld(16, v);
|
|
return vec_perm(low, high, permuteVector);
|
|
}
|
|
else
|
|
{
|
|
vector unsigned char tmp = vec_ld(0, v);
|
|
return vec_perm(tmp, tmp, permuteVector); /* don't want overflow */
|
|
}
|
|
}
|
|
|
|
/* Store a vector to an unaligned location in memory */
|
|
static inline void
|
|
StoreUnaligned (vector unsigned char v,
|
|
const guchar *where)
|
|
{
|
|
if ((unsigned long)where & 0x0f)
|
|
{
|
|
/* Load the surrounding area */
|
|
vector unsigned char low = vec_ld(0, where);
|
|
vector unsigned char high = vec_ld(16, where);
|
|
/* Prepare the constants that we need */
|
|
vector unsigned char permuteVector = vec_lvsr(0, where);
|
|
vector signed char oxFF = vec_splat_s8(-1);
|
|
vector signed char ox00 = vec_splat_s8(0);
|
|
/* Make a mask for which parts of the vectors to swap out */
|
|
vector unsigned char mask = (vector unsigned char)vec_perm(ox00, oxFF, permuteVector);
|
|
v = vec_perm(v, v, permuteVector);
|
|
/* Insert our data into the low and high vectors */
|
|
low = vec_sel(low, v, mask);
|
|
high = vec_sel(v, high, mask);
|
|
/* Store the two aligned result vectors */
|
|
vec_st(low, 0, CONST_BUFFER(where));
|
|
vec_st(high, 16, CONST_BUFFER(where));
|
|
}
|
|
else
|
|
{ /* prevent overflow */
|
|
vec_st(v, 0, CONST_BUFFER(where));
|
|
}
|
|
}
|
|
|
|
/* Store less than a vector to an unaligned location in memory */
|
|
static inline void
|
|
StoreUnalignedLess (vector unsigned char v,
|
|
const guchar *where,
|
|
int n)
|
|
{
|
|
int i;
|
|
vector unsigned char permuteVector = vec_lvsr(0, where);
|
|
v = vec_perm(v, v, permuteVector);
|
|
for (i=0; i<n; i++)
|
|
vec_ste(v, i, CONST_BUFFER(where));
|
|
}
|
|
|
|
void
|
|
gimp_composite_addition_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
|
|
{
|
|
const guchar *A = ctx->A;
|
|
const guchar *B = ctx->B;
|
|
guchar *D = ctx->D;
|
|
guint length = ctx->n_pixels;
|
|
vector unsigned char a,b,d,alpha_a,alpha_b;
|
|
|
|
while (length >= 4)
|
|
{
|
|
a=LoadUnaligned(A);
|
|
b=LoadUnaligned(B);
|
|
|
|
alpha_a=vec_and(a, alphamask);
|
|
alpha_b=vec_and(b, alphamask);
|
|
d=vec_min(alpha_a, alpha_b);
|
|
|
|
a=vec_andc(a, alphamask);
|
|
a=vec_adds(a, d);
|
|
b=vec_andc(b, alphamask);
|
|
d=vec_adds(a, b);
|
|
|
|
StoreUnaligned(d, D);
|
|
|
|
A+=16;
|
|
B+=16;
|
|
D+=16;
|
|
length-=4;
|
|
}
|
|
/* process last pixels */
|
|
length = length*4;
|
|
a=LoadUnalignedLess(A, length);
|
|
b=LoadUnalignedLess(B, length);
|
|
|
|
alpha_a=vec_and(a,alphamask);
|
|
alpha_b=vec_and(b,alphamask);
|
|
d=vec_min(alpha_a,alpha_b);
|
|
|
|
a=vec_andc(a,alphamask);
|
|
a=vec_adds(a,d);
|
|
b=vec_andc(b,alphamask);
|
|
d=vec_adds(a,b);
|
|
|
|
StoreUnalignedLess(d, D, length);
|
|
}
|
|
|
|
void
|
|
gimp_composite_subtract_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
|
|
{
|
|
const guchar *A = ctx->A;
|
|
const guchar *B = ctx->B;
|
|
guchar *D = ctx->D;
|
|
guint length = ctx->n_pixels;
|
|
vector unsigned char a,b,d,alpha_a,alpha_b;
|
|
|
|
while (length >= 4)
|
|
{
|
|
a=LoadUnaligned(A);
|
|
b=LoadUnaligned(B);
|
|
|
|
alpha_a=vec_and(a, alphamask);
|
|
alpha_b=vec_and(b, alphamask);
|
|
d=vec_min(alpha_a, alpha_b);
|
|
|
|
a=vec_andc(a, alphamask);
|
|
a=vec_adds(a, d);
|
|
b=vec_andc(b, alphamask);
|
|
d=vec_subs(a, b);
|
|
|
|
StoreUnaligned(d, D);
|
|
|
|
A+=16;
|
|
B+=16;
|
|
D+=16;
|
|
length-=4;
|
|
}
|
|
/* process last pixels */
|
|
length = length*4;
|
|
a=LoadUnalignedLess(A, length);
|
|
b=LoadUnalignedLess(B, length);
|
|
|
|
alpha_a=vec_and(a,alphamask);
|
|
alpha_b=vec_and(b,alphamask);
|
|
d=vec_min(alpha_a,alpha_b);
|
|
|
|
a=vec_andc(a,alphamask);
|
|
a=vec_adds(a,d);
|
|
b=vec_andc(b,alphamask);
|
|
d=vec_subs(a,b);
|
|
|
|
StoreUnalignedLess(d, D, length);
|
|
}
|
|
|
|
void
|
|
gimp_composite_swap_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
|
|
{
|
|
const guchar *A = ctx->A;
|
|
const guchar *B = ctx->B;
|
|
guint length = ctx->n_pixels;
|
|
vector unsigned char a,b;
|
|
|
|
while (length >= 4)
|
|
{
|
|
a=LoadUnaligned(A);
|
|
b=LoadUnaligned(B);
|
|
StoreUnaligned(b, A);
|
|
StoreUnaligned(a, B);
|
|
A+=16;
|
|
B+=16;
|
|
length-=4;
|
|
}
|
|
/* process last pixels */
|
|
length = length*4;
|
|
a=LoadUnalignedLess(A, length);
|
|
b=LoadUnalignedLess(B, length);
|
|
StoreUnalignedLess(a, B, length);
|
|
StoreUnalignedLess(b, A, length);
|
|
}
|
|
|
|
void
|
|
gimp_composite_difference_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
|
|
{
|
|
const guchar *A = ctx->A;
|
|
const guchar *B = ctx->B;
|
|
guchar *D = ctx->D;
|
|
guint length = ctx->n_pixels;
|
|
vector unsigned char a,b,d,e,alpha_a,alpha_b;
|
|
|
|
while (length >= 4)
|
|
{
|
|
a=LoadUnaligned(A);
|
|
b=LoadUnaligned(B);
|
|
|
|
alpha_a=vec_and(a, alphamask);
|
|
alpha_b=vec_and(b, alphamask);
|
|
d=vec_min(alpha_a, alpha_b);
|
|
|
|
a=vec_andc(a, alphamask);
|
|
a=vec_adds(a, d);
|
|
b=vec_andc(b, alphamask);
|
|
d=vec_subs(a, b);
|
|
e=vec_subs(b, a);
|
|
d=vec_add(d,e);
|
|
|
|
StoreUnaligned(d, D);
|
|
|
|
A+=16;
|
|
B+=16;
|
|
D+=16;
|
|
length-=4;
|
|
}
|
|
/* process last pixels */
|
|
length = length*4;
|
|
a=LoadUnalignedLess(A, length);
|
|
b=LoadUnalignedLess(B, length);
|
|
|
|
alpha_a=vec_and(a,alphamask);
|
|
alpha_b=vec_and(b,alphamask);
|
|
d=vec_min(alpha_a,alpha_b);
|
|
|
|
a=vec_andc(a,alphamask);
|
|
a=vec_adds(a,d);
|
|
b=vec_andc(b,alphamask);
|
|
d=vec_subs(a,b);
|
|
e=vec_subs(b, a);
|
|
d=vec_add(d,e);
|
|
|
|
StoreUnalignedLess(d, D, length);
|
|
}
|
|
|
|
void
|
|
gimp_composite_darken_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
|
|
{
|
|
const guchar *A = ctx->A;
|
|
const guchar *B = ctx->B;
|
|
guchar *D = ctx->D;
|
|
guint length = ctx->n_pixels;
|
|
vector unsigned char a,b,d;
|
|
|
|
while (length >= 4)
|
|
{
|
|
a=LoadUnaligned(A);
|
|
b=LoadUnaligned(B);
|
|
|
|
d=vec_min(a, b);
|
|
|
|
StoreUnaligned(d, D);
|
|
|
|
A+=16;
|
|
B+=16;
|
|
D+=16;
|
|
length-=4;
|
|
}
|
|
/* process last pixels */
|
|
length = length*4;
|
|
a=LoadUnalignedLess(A, length);
|
|
b=LoadUnalignedLess(B, length);
|
|
|
|
d=vec_min(a, b);
|
|
|
|
StoreUnalignedLess(d, D, length);
|
|
}
|
|
|
|
void
|
|
gimp_composite_lighten_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
|
|
{
|
|
const guchar *A = ctx->A;
|
|
const guchar *B = ctx->B;
|
|
guchar *D = ctx->D;
|
|
guint length = ctx->n_pixels;
|
|
vector unsigned char a,b,d,alpha_a,alpha_b;
|
|
|
|
while (length >= 4)
|
|
{
|
|
a=LoadUnaligned(A);
|
|
b=LoadUnaligned(B);
|
|
|
|
alpha_a=vec_and(a, alphamask);
|
|
alpha_b=vec_and(b, alphamask);
|
|
d=vec_min(alpha_a, alpha_b);
|
|
|
|
a=vec_andc(a, alphamask);
|
|
a=vec_adds(a, d);
|
|
b=vec_andc(b, alphamask);
|
|
d=vec_max(a, b);
|
|
|
|
StoreUnaligned(d, D);
|
|
|
|
A+=16;
|
|
B+=16;
|
|
D+=16;
|
|
length-=4;
|
|
}
|
|
/* process last pixels */
|
|
length = length*4;
|
|
a=LoadUnalignedLess(A, length);
|
|
b=LoadUnalignedLess(B, length);
|
|
|
|
alpha_a=vec_and(a,alphamask);
|
|
alpha_b=vec_and(b,alphamask);
|
|
d=vec_min(alpha_a,alpha_b);
|
|
|
|
a=vec_andc(a,alphamask);
|
|
a=vec_adds(a,d);
|
|
b=vec_andc(b,alphamask);
|
|
d=vec_max(a, b);
|
|
|
|
StoreUnalignedLess(d, D, length);
|
|
}
|
|
|
|
void
|
|
gimp_composite_multiply_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
|
|
{
|
|
const guchar *A = ctx->A;
|
|
const guchar *B = ctx->B;
|
|
guchar *D = ctx->D;
|
|
guint length = ctx->n_pixels;
|
|
vector unsigned char a,b,d,alpha_a,alpha_b,alpha;
|
|
vector unsigned short al,ah;
|
|
|
|
while (length >= 4)
|
|
{
|
|
a=LoadUnaligned(A);
|
|
b=LoadUnaligned(B);
|
|
|
|
al=vec_mule(a,b);
|
|
al=vec_add(al,ox0080);
|
|
ah=vec_mulo(a,b);
|
|
ah=vec_add(ah,ox0080);
|
|
al=vec_add(al,vec_sr(al,ox0008));
|
|
ah=vec_add(ah,vec_sr(ah,ox0008));
|
|
d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);
|
|
|
|
alpha_a=vec_and(a, alphamask);
|
|
alpha_b=vec_and(b, alphamask);
|
|
alpha=vec_min(alpha_a, alpha_b);
|
|
|
|
d=vec_andc(d, alphamask);
|
|
d=vec_or(d, alpha);
|
|
|
|
StoreUnaligned(d, D);
|
|
|
|
A+=16;
|
|
B+=16;
|
|
D+=16;
|
|
length-=4;
|
|
}
|
|
/* process last pixels */
|
|
length = length*4;
|
|
a=LoadUnalignedLess(A, length);
|
|
b=LoadUnalignedLess(B, length);
|
|
|
|
al=vec_mule(a,b);
|
|
al=vec_add(al,ox0080);
|
|
ah=vec_mulo(a,b);
|
|
ah=vec_add(ah,ox0080);
|
|
al=vec_add(al,vec_sr(al,ox0008));
|
|
ah=vec_add(ah,vec_sr(ah,ox0008));
|
|
d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);
|
|
|
|
alpha_a=vec_and(a, alphamask);
|
|
alpha_b=vec_and(b, alphamask);
|
|
alpha=vec_min(alpha_a, alpha_b);
|
|
|
|
d=vec_andc(d, alphamask);
|
|
d=vec_or(d, alpha);
|
|
|
|
StoreUnalignedLess(d, D, length);
|
|
}
|
|
|
|
void
|
|
gimp_composite_blend_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
|
|
{
|
|
const guchar *A = ctx->A;
|
|
const guchar *B = ctx->B;
|
|
guchar *D = ctx->D;
|
|
guint length = ctx->n_pixels;
|
|
guchar blend = ctx->blend.blend;
|
|
union
|
|
{
|
|
vector unsigned char v;
|
|
unsigned char u8[16];
|
|
} vblend;
|
|
|
|
vector unsigned char vblendc;
|
|
vector unsigned char a,b,d;
|
|
vector unsigned short al,ah,bl,bh,one=vec_splat_u16(1);
|
|
guchar tmp;
|
|
|
|
for (tmp=0; tmp<16; tmp++ )
|
|
vblend.u8[tmp]=blend;
|
|
vblendc=vec_nor(vblend.v,vblend.v);
|
|
|
|
while (length >= 4)
|
|
{
|
|
a=LoadUnaligned(A);
|
|
b=LoadUnaligned(B);
|
|
|
|
/* dest[b] = (src1[b] * blend2 + src2[b] * blend) / 255;
|
|
* to divide by 255 we use ((n+1)+(n+1)>>8)>>8
|
|
* It works for all value but 0xffff
|
|
* happily blending formula can't give this value */
|
|
|
|
al=vec_mule(a,vblendc);
|
|
ah=vec_mulo(a,vblendc);
|
|
|
|
bl=vec_mule(b,vblend.v);
|
|
bh=vec_mulo(b,vblend.v);
|
|
|
|
al=vec_add(al,bl);
|
|
al=vec_add(al,one);
|
|
al=vec_add(al,vec_sr(al,ox0008));
|
|
|
|
ah=vec_add(ah,bh);
|
|
ah=vec_add(ah,one);
|
|
ah=vec_add(ah,vec_sr(ah,ox0008));
|
|
|
|
d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);
|
|
|
|
StoreUnaligned(d, D);
|
|
|
|
A+=16;
|
|
B+=16;
|
|
D+=16;
|
|
length-=4;
|
|
}
|
|
/* process last pixels */
|
|
length = length*4;
|
|
a=LoadUnalignedLess(A, length);
|
|
b=LoadUnalignedLess(B, length);
|
|
|
|
al=vec_mule(a,vblendc);
|
|
ah=vec_mulo(a,vblendc);
|
|
|
|
bl=vec_mule(b,vblend.v);
|
|
bh=vec_mulo(b,vblend.v);
|
|
|
|
al=vec_add(al,bl);
|
|
al=vec_add(al,one);
|
|
al=vec_add(al,vec_sr(al,ox0008));
|
|
|
|
ah=vec_add(ah,bh);
|
|
ah=vec_add(ah,one);
|
|
ah=vec_add(ah,vec_sr(ah,ox0008));
|
|
|
|
d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);
|
|
|
|
StoreUnalignedLess(d, D, length);
|
|
}
|
|
|
|
void
|
|
gimp_composite_screen_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
|
|
{
|
|
const guchar *A = ctx->A;
|
|
const guchar *B = ctx->B;
|
|
guchar *D = ctx->D;
|
|
guint length = ctx->n_pixels;
|
|
vector unsigned char a,b,d,alpha_a,alpha_b,alpha;
|
|
vector unsigned short ah,al;
|
|
|
|
while (length >= 4)
|
|
{
|
|
a=LoadUnaligned(A);
|
|
b=LoadUnaligned(B);
|
|
|
|
alpha_a=vec_and(a, alphamask);
|
|
alpha_b=vec_and(b, alphamask);
|
|
alpha=vec_min(alpha_a, alpha_b);
|
|
|
|
a=vec_nor(a,a);
|
|
b=vec_nor(b,b);
|
|
al=vec_mule(a,b);
|
|
al=vec_add(al,ox0080);
|
|
ah=vec_mulo(a,b);
|
|
ah=vec_add(ah,ox0080);
|
|
|
|
al=vec_add(al,vec_sr(al,ox0008));
|
|
ah=vec_add(ah,vec_sr(ah,ox0008));
|
|
|
|
d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);
|
|
|
|
d=vec_nor(d,d);
|
|
d=vec_andc(d, alphamask);
|
|
d=vec_or(d, alpha);
|
|
|
|
StoreUnaligned(d, D);
|
|
|
|
A+=16;
|
|
B+=16;
|
|
D+=16;
|
|
length-=4;
|
|
}
|
|
/* process last pixels */
|
|
length = length*4;
|
|
a=LoadUnalignedLess(A, length);
|
|
b=LoadUnalignedLess(B, length);
|
|
|
|
alpha_a=vec_and(a, alphamask);
|
|
alpha_b=vec_and(b, alphamask);
|
|
alpha=vec_min(alpha_a, alpha_b);
|
|
|
|
a=vec_nor(a,a);
|
|
b=vec_nor(b,b);
|
|
al=vec_mule(a,b);
|
|
al=vec_add(al,ox0080);
|
|
ah=vec_mulo(a,b);
|
|
ah=vec_add(ah,ox0080);
|
|
|
|
al=vec_add(al,vec_sr(al,ox0008));
|
|
ah=vec_add(ah,vec_sr(ah,ox0008));
|
|
|
|
d=vec_perm((vector unsigned char)al,(vector unsigned char)ah,combine_high_bytes);
|
|
d=vec_nor(d,d);
|
|
|
|
d=vec_andc(d, alphamask);
|
|
d=vec_or(d, alpha);
|
|
|
|
StoreUnalignedLess(d, D, length);
|
|
}
|
|
|
|
void
|
|
gimp_composite_grain_merge_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
|
|
{
|
|
const guchar *A = ctx->A;
|
|
const guchar *B = ctx->B;
|
|
guchar *D = ctx->D;
|
|
guint length = ctx->n_pixels;
|
|
vector unsigned char a,b,d,alpha_a,alpha_b,alpha;
|
|
vector signed short ah,al,bh,bl;
|
|
|
|
while (length >= 4)
|
|
{
|
|
a=LoadUnaligned(A);
|
|
b=LoadUnaligned(B);
|
|
|
|
alpha_a=vec_and(a, alphamask);
|
|
alpha_b=vec_and(b, alphamask);
|
|
alpha=vec_min(alpha_a, alpha_b);
|
|
|
|
ah=vec_unpackh((vector signed char)a);
|
|
ah=vec_and(ah,ox00ff);
|
|
al=vec_unpackl((vector signed char)a);
|
|
al=vec_and(al,ox00ff);
|
|
bh=vec_unpackh((vector signed char)b);
|
|
bh=vec_and(bh,ox00ff);
|
|
bl=vec_unpackl((vector signed char)b);
|
|
bl=vec_and(bl,ox00ff);
|
|
|
|
ah=vec_add(ah,bh);
|
|
al=vec_add(al,bl);
|
|
ah=vec_add(ah,oxff80);
|
|
al=vec_add(al,oxff80);
|
|
|
|
d=vec_packsu(ah,al);
|
|
|
|
d=vec_andc(d, alphamask);
|
|
d=vec_or(d, alpha);
|
|
|
|
StoreUnaligned(d, D);
|
|
|
|
A+=16;
|
|
B+=16;
|
|
D+=16;
|
|
length-=4;
|
|
}
|
|
/* process last pixels */
|
|
length = length*4;
|
|
a=LoadUnalignedLess(A, length);
|
|
b=LoadUnalignedLess(B, length);
|
|
|
|
alpha_a=vec_and(a, alphamask);
|
|
alpha_b=vec_and(b, alphamask);
|
|
alpha=vec_min(alpha_a, alpha_b);
|
|
|
|
ah=vec_unpackh((vector signed char)a);
|
|
ah=vec_and(ah,ox00ff);
|
|
al=vec_unpackl((vector signed char)a);
|
|
al=vec_and(al,ox00ff);
|
|
bh=vec_unpackh((vector signed char)b);
|
|
bh=vec_and(bh,ox00ff);
|
|
bl=vec_unpackl((vector signed char)b);
|
|
bl=vec_and(bl,ox00ff);
|
|
|
|
ah=vec_add(ah,bh);
|
|
al=vec_add(al,bl);
|
|
ah=vec_add(ah,oxff80);
|
|
al=vec_add(al,oxff80);
|
|
|
|
d=vec_packsu(ah,al);
|
|
|
|
d=vec_andc(d, alphamask);
|
|
d=vec_or(d, alpha);
|
|
|
|
StoreUnalignedLess(d, D, length);
|
|
}
|
|
|
|
void
|
|
gimp_composite_grain_extract_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
|
|
{
|
|
const guchar *A = ctx->A;
|
|
const guchar *B = ctx->B;
|
|
guchar *D = ctx->D;
|
|
guint length = ctx->n_pixels;
|
|
vector unsigned char a,b,d,alpha_a,alpha_b,alpha;
|
|
vector signed short ah,al,bh,bl;
|
|
|
|
while (length >= 4)
|
|
{
|
|
a=LoadUnaligned(A);
|
|
b=LoadUnaligned(B);
|
|
|
|
alpha_a=vec_and(a, alphamask);
|
|
alpha_b=vec_and(b, alphamask);
|
|
alpha=vec_min(alpha_a, alpha_b);
|
|
|
|
ah=vec_unpackh((vector signed char)a);
|
|
ah=vec_and(ah,ox00ff);
|
|
al=vec_unpackl((vector signed char)a);
|
|
al=vec_and(al,ox00ff);
|
|
bh=vec_unpackh((vector signed char)b);
|
|
bh=vec_and(bh,ox00ff);
|
|
bl=vec_unpackl((vector signed char)b);
|
|
bl=vec_and(bl,ox00ff);
|
|
|
|
ah=vec_sub(ah,bh);
|
|
al=vec_sub(al,bl);
|
|
ah=vec_sub(ah,oxff80);
|
|
al=vec_sub(al,oxff80);
|
|
|
|
d=vec_packsu(ah,al);
|
|
|
|
d=vec_andc(d, alphamask);
|
|
d=vec_or(d, alpha);
|
|
|
|
StoreUnaligned(d, D);
|
|
|
|
A+=16;
|
|
B+=16;
|
|
D+=16;
|
|
length-=4;
|
|
}
|
|
/* process last pixels */
|
|
length = length*4;
|
|
a=LoadUnalignedLess(A, length);
|
|
b=LoadUnalignedLess(B, length);
|
|
|
|
alpha_a=vec_and(a, alphamask);
|
|
alpha_b=vec_and(b, alphamask);
|
|
alpha=vec_min(alpha_a, alpha_b);
|
|
|
|
ah=vec_unpackh((vector signed char)a);
|
|
ah=vec_and(ah,ox00ff);
|
|
al=vec_unpackl((vector signed char)a);
|
|
al=vec_and(al,ox00ff);
|
|
bh=vec_unpackh((vector signed char)b);
|
|
bh=vec_and(bh,ox00ff);
|
|
bl=vec_unpackl((vector signed char)b);
|
|
bl=vec_and(bl,ox00ff);
|
|
|
|
ah=vec_sub(ah,bh);
|
|
al=vec_sub(al,bl);
|
|
ah=vec_sub(ah,oxff80);
|
|
al=vec_sub(al,oxff80);
|
|
|
|
d=vec_packsu(ah,al);
|
|
|
|
d=vec_andc(d, alphamask);
|
|
d=vec_or(d, alpha);
|
|
|
|
StoreUnalignedLess(d, D, length);
|
|
}
|
|
|
|
void
|
|
gimp_composite_divide_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
|
|
{
|
|
const guchar *A = ctx->A;
|
|
const guchar *B = ctx->B;
|
|
guchar *D = ctx->D;
|
|
guint length = ctx->n_pixels;
|
|
vector unsigned char a,b,d;
|
|
vector unsigned char alpha_a,alpha_b,alpha;
|
|
vector signed short ox0001=vec_splat_s16(1);
|
|
union
|
|
{
|
|
vector signed short v;
|
|
vector unsigned short vu;
|
|
gushort u16[8];
|
|
} ah,al,bh,bl;
|
|
|
|
while (length >= 4)
|
|
{
|
|
a=LoadUnaligned(A);
|
|
b=LoadUnaligned(B);
|
|
|
|
alpha_a=vec_and(a, alphamask);
|
|
alpha_b=vec_and(b, alphamask);
|
|
alpha=vec_min(alpha_a, alpha_b);
|
|
|
|
ah.v=vec_unpackh((vector signed char)a);
|
|
ah.v=vec_sl(ah.v,ox0008);
|
|
al.v=vec_unpackl((vector signed char)a);
|
|
al.v=vec_sl(al.v,ox0008);
|
|
|
|
bh.v=vec_unpackh((vector signed char)b);
|
|
bh.v=vec_and(bh.v,ox00ff);
|
|
bh.v=vec_add(bh.v,ox0001);
|
|
bl.v=vec_unpackl((vector signed char)b);
|
|
bl.v=vec_and(bl.v,ox00ff);
|
|
bl.v=vec_add(bl.v,ox0001);
|
|
|
|
ah.u16[0]=ah.u16[0]/bh.u16[0];
|
|
ah.u16[1]=ah.u16[1]/bh.u16[1];
|
|
ah.u16[2]=ah.u16[2]/bh.u16[2];
|
|
ah.u16[4]=ah.u16[4]/bh.u16[4];
|
|
ah.u16[5]=ah.u16[5]/bh.u16[5];
|
|
ah.u16[6]=ah.u16[6]/bh.u16[6];
|
|
|
|
al.u16[0]=al.u16[0]/bl.u16[0];
|
|
al.u16[1]=al.u16[1]/bl.u16[1];
|
|
al.u16[2]=al.u16[2]/bl.u16[2];
|
|
al.u16[4]=al.u16[4]/bl.u16[4];
|
|
al.u16[5]=al.u16[5]/bl.u16[5];
|
|
al.u16[6]=al.u16[6]/bl.u16[6];
|
|
|
|
d=vec_packs(ah.vu,al.vu);
|
|
|
|
d=vec_andc(d, alphamask);
|
|
d=vec_or(d, alpha);
|
|
|
|
StoreUnaligned(d, D);
|
|
A+=16;
|
|
B+=16;
|
|
D+=16;
|
|
length-=4;
|
|
}
|
|
length = length*4;
|
|
a=LoadUnalignedLess(A, length);
|
|
b=LoadUnalignedLess(B, length);
|
|
|
|
alpha_a=vec_and(a, alphamask);
|
|
alpha_b=vec_and(b, alphamask);
|
|
alpha=vec_min(alpha_a, alpha_b);
|
|
|
|
ah.v=vec_unpackh((vector signed char)a);
|
|
ah.v=vec_sl(ah.v,ox0008);
|
|
al.v=vec_unpackl((vector signed char)a);
|
|
al.v=vec_sl(al.v,ox0008);
|
|
|
|
bh.v=vec_unpackh((vector signed char)b);
|
|
bh.v=vec_and(bh.v,ox00ff);
|
|
bh.v=vec_add(bh.v,ox0001);
|
|
bl.v=vec_unpackl((vector signed char)b);
|
|
bl.v=vec_and(bl.v,ox00ff);
|
|
bl.v=vec_add(bl.v,ox0001);
|
|
|
|
ah.u16[0]=ah.u16[0]/bh.u16[0];
|
|
ah.u16[1]=ah.u16[1]/bh.u16[1];
|
|
ah.u16[2]=ah.u16[2]/bh.u16[2];
|
|
ah.u16[4]=ah.u16[4]/bh.u16[4];
|
|
ah.u16[5]=ah.u16[5]/bh.u16[5];
|
|
ah.u16[6]=ah.u16[6]/bh.u16[6];
|
|
|
|
al.u16[0]=al.u16[0]/bl.u16[0];
|
|
al.u16[1]=al.u16[1]/bl.u16[1];
|
|
al.u16[2]=al.u16[2]/bl.u16[2];
|
|
al.u16[4]=al.u16[4]/bl.u16[4];
|
|
al.u16[5]=al.u16[5]/bl.u16[5];
|
|
al.u16[6]=al.u16[6]/bl.u16[6];
|
|
|
|
d=vec_packs(ah.vu,al.vu);
|
|
|
|
d=vec_andc(d, alphamask);
|
|
d=vec_or(d, alpha);
|
|
|
|
StoreUnalignedLess(d, D, length);
|
|
}
|
|
|
|
void
|
|
gimp_composite_dodge_rgba8_rgba8_rgba8_altivec (GimpCompositeContext *ctx)
|
|
{
|
|
const guchar *A = ctx->A;
|
|
const guchar *B = ctx->B;
|
|
guchar *D = ctx->D;
|
|
guint length = ctx->n_pixels;
|
|
vector unsigned char a,b,d;
|
|
vector unsigned char alpha_a,alpha_b,alpha;
|
|
vector signed short ox0001=vec_splat_s16(1);
|
|
union
|
|
{
|
|
vector signed short v;
|
|
vector unsigned short vu;
|
|
gushort u16[8];
|
|
} ah,al,bh,bl;
|
|
|
|
while (length >= 4)
|
|
{
|
|
a=LoadUnaligned(A);
|
|
b=LoadUnaligned(B);
|
|
|
|
alpha_a=vec_and(a, alphamask);
|
|
alpha_b=vec_and(b, alphamask);
|
|
alpha=vec_min(alpha_a, alpha_b);
|
|
|
|
ah.v=vec_unpackh((vector signed char)a);
|
|
ah.v=vec_sl(ah.v,ox0008);
|
|
al.v=vec_unpackl((vector signed char)a);
|
|
al.v=vec_sl(al.v,ox0008);
|
|
|
|
b=vec_nor(b,b);
|
|
bh.v=vec_unpackh((vector signed char)b);
|
|
bh.v=vec_and(bh.v,ox00ff);
|
|
bh.v=vec_add(bh.v,ox0001);
|
|
bl.v=vec_unpackl((vector signed char)b);
|
|
bl.v=vec_and(bl.v,ox00ff);
|
|
bl.v=vec_add(bl.v,ox0001);
|
|
|
|
ah.u16[0]=ah.u16[0]/bh.u16[0];
|
|
ah.u16[1]=ah.u16[1]/bh.u16[1];
|
|
ah.u16[2]=ah.u16[2]/bh.u16[2];
|
|
ah.u16[4]=ah.u16[4]/bh.u16[4];
|
|
ah.u16[5]=ah.u16[5]/bh.u16[5];
|
|
ah.u16[6]=ah.u16[6]/bh.u16[6];
|
|
|
|
al.u16[0]=al.u16[0]/bl.u16[0];
|
|
al.u16[1]=al.u16[1]/bl.u16[1];
|
|
al.u16[2]=al.u16[2]/bl.u16[2];
|
|
al.u16[4]=al.u16[4]/bl.u16[4];
|
|
al.u16[5]=al.u16[5]/bl.u16[5];
|
|
al.u16[6]=al.u16[6]/bl.u16[6];
|
|
|
|
d=vec_packs(ah.vu,al.vu);
|
|
|
|
d=vec_andc(d, alphamask);
|
|
d=vec_or(d, alpha);
|
|
|
|
StoreUnaligned(d, D);
|
|
A+=16;
|
|
B+=16;
|
|
D+=16;
|
|
length-=4;
|
|
}
|
|
length = length*4;
|
|
a=LoadUnalignedLess(A, length);
|
|
b=LoadUnalignedLess(B, length);
|
|
|
|
alpha_a=vec_and(a, alphamask);
|
|
alpha_b=vec_and(b, alphamask);
|
|
alpha=vec_min(alpha_a, alpha_b);
|
|
|
|
ah.v=vec_unpackh((vector signed char)a);
|
|
ah.v=vec_sl(ah.v,ox0008);
|
|
al.v=vec_unpackl((vector signed char)a);
|
|
al.v=vec_sl(al.v,ox0008);
|
|
|
|
b=vec_nor(b,b);
|
|
bh.v=vec_unpackh((vector signed char)b);
|
|
bh.v=vec_and(bh.v,ox00ff);
|
|
bh.v=vec_add(bh.v,ox0001);
|
|
bl.v=vec_unpackl((vector signed char)b);
|
|
bl.v=vec_and(bl.v,ox00ff);
|
|
bl.v=vec_add(bl.v,ox0001);
|
|
|
|
ah.u16[0]=ah.u16[0]/bh.u16[0];
|
|
ah.u16[1]=ah.u16[1]/bh.u16[1];
|
|
ah.u16[2]=ah.u16[2]/bh.u16[2];
|
|
ah.u16[4]=ah.u16[4]/bh.u16[4];
|
|
ah.u16[5]=ah.u16[5]/bh.u16[5];
|
|
ah.u16[6]=ah.u16[6]/bh.u16[6];
|
|
|
|
al.u16[0]=al.u16[0]/bl.u16[0];
|
|
al.u16[1]=al.u16[1]/bl.u16[1];
|
|
al.u16[2]=al.u16[2]/bl.u16[2];
|
|
al.u16[4]=al.u16[4]/bl.u16[4];
|
|
al.u16[5]=al.u16[5]/bl.u16[5];
|
|
al.u16[6]=al.u16[6]/bl.u16[6];
|
|
|
|
d=vec_packs(ah.vu,al.vu);
|
|
|
|
d=vec_andc(d, alphamask);
|
|
d=vec_or(d, alpha);
|
|
|
|
StoreUnalignedLess(d, D, length);
|
|
}
|
|
|
|
#endif /* COMPILE_IS_OKAY */
|