[Clang] Add __builtin_reduce_or and __builtin_reduce_and

This patch implements two builtins specified in D111529.
The last __builtin_reduce_add will be seperated into another one.

Differential Revision: https://reviews.llvm.org/D116736
This commit is contained in:
Jun Zhang 2022-01-05 17:20:57 +08:00
parent 1dab5f6c83
commit 8de0c1feca
No known key found for this signature in database
GPG Key ID: E19904830B621534
5 changed files with 68 additions and 2 deletions

View File

@ -656,6 +656,8 @@ BUILTIN(__builtin_elementwise_trunc, "v.", "nct")
BUILTIN(__builtin_reduce_max, "v.", "nct") BUILTIN(__builtin_reduce_max, "v.", "nct")
BUILTIN(__builtin_reduce_min, "v.", "nct") BUILTIN(__builtin_reduce_min, "v.", "nct")
BUILTIN(__builtin_reduce_xor, "v.", "nct") BUILTIN(__builtin_reduce_xor, "v.", "nct")
BUILTIN(__builtin_reduce_or, "v.", "nct")
BUILTIN(__builtin_reduce_and, "v.", "nct")
BUILTIN(__builtin_matrix_transpose, "v.", "nFt") BUILTIN(__builtin_matrix_transpose, "v.", "nFt")
BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt") BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt")

View File

@ -3221,6 +3221,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_reduce_xor: case Builtin::BI__builtin_reduce_xor:
return RValue::get(emitUnaryBuiltin( return RValue::get(emitUnaryBuiltin(
*this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor")); *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
case Builtin::BI__builtin_reduce_or:
return RValue::get(emitUnaryBuiltin(
*this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
case Builtin::BI__builtin_reduce_and:
return RValue::get(emitUnaryBuiltin(
*this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));
case Builtin::BI__builtin_matrix_transpose: { case Builtin::BI__builtin_matrix_transpose: {
auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>(); auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();

View File

@ -2237,8 +2237,10 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
break; break;
} }
// __builtin_reduce_xor supports vector of integers only. // These builtins support vectors of integers only.
case Builtin::BI__builtin_reduce_xor: { case Builtin::BI__builtin_reduce_xor:
case Builtin::BI__builtin_reduce_or:
case Builtin::BI__builtin_reduce_and: {
if (PrepareBuiltinReduceMathOneArgCall(TheCall)) if (PrepareBuiltinReduceMathOneArgCall(TheCall))
return ExprError(); return ExprError();

View File

@ -68,3 +68,25 @@ void test_builtin_reduce_xor(si8 vi1, u4 vu1) {
// CHECK-NEXT: call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[VU1]]) // CHECK-NEXT: call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[VU1]])
unsigned r3 = __builtin_reduce_xor(vu1); unsigned r3 = __builtin_reduce_xor(vu1);
} }
void test_builtin_reduce_or(si8 vi1, u4 vu1) {
// CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
// CHECK-NEXT: call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[VI1]])
short r2 = __builtin_reduce_or(vi1);
// CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
// CHECK-NEXT: call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[VU1]])
unsigned r3 = __builtin_reduce_or(vu1);
}
void test_builtin_reduce_and(si8 vi1, u4 vu1) {
// CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
// CHECK-NEXT: call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> [[VI1]])
short r2 = __builtin_reduce_and(vi1);
// CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
// CHECK-NEXT: call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[VU1]])
unsigned r3 = __builtin_reduce_and(vu1);
}

View File

@ -52,3 +52,37 @@ void test_builtin_reduce_xor(int i, float4 v, int3 iv) {
i = __builtin_reduce_xor(v); i = __builtin_reduce_xor(v);
// expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}} // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
} }
void test_builtin_reduce_or(int i, float4 v, int3 iv) {
struct Foo s = __builtin_reduce_or(iv);
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
i = __builtin_reduce_or();
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
i = __builtin_reduce_or(iv, iv);
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
i = __builtin_reduce_or(i);
// expected-error@-1 {{1st argument must be a vector of integers (was 'int')}}
i = __builtin_reduce_or(v);
// expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
}
void test_builtin_reduce_and(int i, float4 v, int3 iv) {
struct Foo s = __builtin_reduce_and(iv);
// expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
i = __builtin_reduce_and();
// expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
i = __builtin_reduce_and(iv, iv);
// expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
i = __builtin_reduce_and(i);
// expected-error@-1 {{1st argument must be a vector of integers (was 'int')}}
i = __builtin_reduce_and(v);
// expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
}