[Clang] Add __builtin_reduce_or and __builtin_reduce_and

This patch implements two builtins specified in D111529. The last __builtin_reduce_add will be seperated into another one. Differential Revision: https://reviews.llvm.org/D116736
2022-01-05 17:20:57 +08:00 · 2022-01-05 17:20:57 +08:00 · 8de0c1feca
parent 1dab5f6c83
commit 8de0c1feca
5 changed files with 68 additions and 2 deletions
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@ -656,6 +656,8 @@ BUILTIN(__builtin_elementwise_trunc, "v.", "nct")
 BUILTIN(__builtin_reduce_max, "v.", "nct")
 BUILTIN(__builtin_reduce_min, "v.", "nct")
 BUILTIN(__builtin_reduce_xor, "v.", "nct")
+BUILTIN(__builtin_reduce_or, "v.", "nct")
+BUILTIN(__builtin_reduce_and, "v.", "nct")

 BUILTIN(__builtin_matrix_transpose, "v.", "nFt")
 BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt")
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@ -3221,6 +3221,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
  case Builtin::BI__builtin_reduce_xor:
    return RValue::get(emitUnaryBuiltin(
        *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor"));
+  case Builtin::BI__builtin_reduce_or:
+    return RValue::get(emitUnaryBuiltin(
+        *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or"));
+  case Builtin::BI__builtin_reduce_and:
+    return RValue::get(emitUnaryBuiltin(
+        *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and"));

  case Builtin::BI__builtin_matrix_transpose: {
    auto *MatrixTy = E->getArg(0)->getType()->castAs<ConstantMatrixType>();
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@ -2237,8 +2237,10 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
    break;
  }

-  // __builtin_reduce_xor supports vector of integers only.
-  case Builtin::BI__builtin_reduce_xor: {
+  // These builtins support vectors of integers only.
+  case Builtin::BI__builtin_reduce_xor:
+  case Builtin::BI__builtin_reduce_or:
+  case Builtin::BI__builtin_reduce_and: {
    if (PrepareBuiltinReduceMathOneArgCall(TheCall))
      return ExprError();

--- a/clang/test/CodeGen/builtins-reduction-math.c
+++ b/clang/test/CodeGen/builtins-reduction-math.c
@ -68,3 +68,25 @@ void test_builtin_reduce_xor(si8 vi1, u4 vu1) {
  // CHECK-NEXT: call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[VU1]])
  unsigned r3 = __builtin_reduce_xor(vu1);
 }
+
+void test_builtin_reduce_or(si8 vi1, u4 vu1) {
+
+  // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
+  // CHECK-NEXT: call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[VI1]])
+  short r2 = __builtin_reduce_or(vi1);
+
+  // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
+  // CHECK-NEXT: call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[VU1]])
+  unsigned r3 = __builtin_reduce_or(vu1);
+}
+
+void test_builtin_reduce_and(si8 vi1, u4 vu1) {
+
+  // CHECK:      [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16
+  // CHECK-NEXT: call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> [[VI1]])
+  short r2 = __builtin_reduce_and(vi1);
+
+  // CHECK:      [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16
+  // CHECK-NEXT: call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[VU1]])
+  unsigned r3 = __builtin_reduce_and(vu1);
+}
--- a/clang/test/Sema/builtins-reduction-math.c
+++ b/clang/test/Sema/builtins-reduction-math.c
@ -52,3 +52,37 @@ void test_builtin_reduce_xor(int i, float4 v, int3 iv) {
  i = __builtin_reduce_xor(v);
  // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
 }
+
+void test_builtin_reduce_or(int i, float4 v, int3 iv) {
+  struct Foo s = __builtin_reduce_or(iv);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
+
+  i = __builtin_reduce_or();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+
+  i = __builtin_reduce_or(iv, iv);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+
+  i = __builtin_reduce_or(i);
+  // expected-error@-1 {{1st argument must be a vector of integers (was 'int')}}
+
+  i = __builtin_reduce_or(v);
+  // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
+}
+
+void test_builtin_reduce_and(int i, float4 v, int3 iv) {
+  struct Foo s = __builtin_reduce_and(iv);
+  // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}}
+
+  i = __builtin_reduce_and();
+  // expected-error@-1 {{too few arguments to function call, expected 1, have 0}}
+
+  i = __builtin_reduce_and(iv, iv);
+  // expected-error@-1 {{too many arguments to function call, expected 1, have 2}}
+
+  i = __builtin_reduce_and(i);
+  // expected-error@-1 {{1st argument must be a vector of integers (was 'int')}}
+
+  i = __builtin_reduce_and(v);
+  // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}}
+}