[X86] Don't match x87 register inline asm constraints unless the VT is floating point or its a clobber

The register class picked will be the RFP80 register class which has a f80 VT. The code in SelectionDAGBuilder that generates copies around inline assembly doesn't know how to handle an integer and floating point type of different bit widths. The test case is derived from this https://godbolt.org/z/sEa659 which gcc accepts but clang crashes on. This patch just gives a more graceful error. I'm not sure if the single element struct case is special in gcc. Adding another field to the struct makes gcc reject it. If we want to support this correctly I think we need a change in the frontend to give us the true element type. Right now the frontend just realizes the constraint can take a memory argument so creates an integer type of the same size and bitcasts. Differential Revision: https://reviews.llvm.org/D87485
2020-09-17 10:33:34 -07:00 · 2020-09-17 10:33:34 -07:00 · 3783d3bc7b
parent 0602e8f77f
commit 3783d3bc7b
2 changed files with 59 additions and 16 deletions
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@ -50618,23 +50618,27 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,

  // Not found as a standard register?
  if (!Res.second) {
-    // Map st(0) -> st(7) -> ST0
-    if (Constraint.size() == 7 && Constraint[0] == '{' &&
-        tolower(Constraint[1]) == 's' && tolower(Constraint[2]) == 't' &&
-        Constraint[3] == '(' &&
-        (Constraint[4] >= '0' && Constraint[4] <= '7') &&
-        Constraint[5] == ')' && Constraint[6] == '}') {
-      // st(7) is not allocatable and thus not a member of RFP80. Return
-      // singleton class in cases where we have a reference to it.
-      if (Constraint[4] == '7')
-        return std::make_pair(X86::FP7, &X86::RFP80_7RegClass);
-      return std::make_pair(X86::FP0 + Constraint[4] - '0',
-                            &X86::RFP80RegClass);
-    }
+    // Only match x87 registers if the VT is one SelectionDAGBuilder can convert
+    // to/from f80.
+    if (VT == MVT::Other || VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80) {
+      // Map st(0) -> st(7) -> ST0
+      if (Constraint.size() == 7 && Constraint[0] == '{' &&
+          tolower(Constraint[1]) == 's' && tolower(Constraint[2]) == 't' &&
+          Constraint[3] == '(' &&
+          (Constraint[4] >= '0' && Constraint[4] <= '7') &&
+          Constraint[5] == ')' && Constraint[6] == '}') {
+        // st(7) is not allocatable and thus not a member of RFP80. Return
+        // singleton class in cases where we have a reference to it.
+        if (Constraint[4] == '7')
+          return std::make_pair(X86::FP7, &X86::RFP80_7RegClass);
+        return std::make_pair(X86::FP0 + Constraint[4] - '0',
+                              &X86::RFP80RegClass);
+      }

-    // GCC allows "st(0)" to be called just plain "st".
-    if (StringRef("{st}").equals_lower(Constraint))
-      return std::make_pair(X86::FP0, &X86::RFP80RegClass);
+      // GCC allows "st(0)" to be called just plain "st".
+      if (StringRef("{st}").equals_lower(Constraint))
+        return std::make_pair(X86::FP0, &X86::RFP80RegClass);
+    }

    // flags -> EFLAGS
    if (StringRef("{flags}").equals_lower(Constraint))
--- a/llvm/test/CodeGen/X86/asm-reject-x87-int.ll
+++ b/llvm/test/CodeGen/X86/asm-reject-x87-int.ll
@ -0,0 +1,39 @@
+; RUN: not llc -o /dev/null %s -mtriple=i386-unknown-unknown 2>&1 | FileCheck %s
+
+; This test was derived from this C code. The frontend sees that the constraint
+; doesn't accept memory, but the argument is a strict. So it tries to bitcast
+; to an integer of the same size. SelectionDAGBuilder doesn't know how to copy
+; between integers and fp80 so it asserts or crashes.
+;
+; gcc accepts the code. But rejects it if the struct is replaced by an int. From
+; the InlineAsm block those two cases look the same in LLVM IR. So if the single
+; elementstruct case is valid, then the frontend needs to emit different IR.
+
+; typedef struct float4 {
+;   float f;
+; } float4;
+;
+; int main() {
+;   float4 f4;
+;   f4.f = 4.0f;
+;   __asm  ("fadd %%st(0), %%st(0)" : "+t" (f4));
+;   return 0;
+; }
+
+%struct.float4 = type { float }
+
+; CHECK: error: couldn't allocate output register for constraint '{st}'
+define dso_local i32 @foo() {
+entry:
+  %retval = alloca i32, align 4
+  %f4 = alloca %struct.float4, align 4
+  store i32 0, i32* %retval, align 4
+  %f = getelementptr inbounds %struct.float4, %struct.float4* %f4, i32 0, i32 0
+  store float 4.000000e+00, float* %f, align 4
+  %0 = bitcast %struct.float4* %f4 to i32*
+  %1 = load i32, i32* %0, align 4
+  %2 = call i32 asm "fadd %st(0), %st(0)", "={st},0,~{dirflag},~{fpsr},~{flags}"(i32 %1)
+  %3 = bitcast %struct.float4* %f4 to i32*
+  store i32 %2, i32* %3, align 4
+  ret i32 0
+}