[AARCH64][RegisterCoalescer] clang miscompiles zero-extension to long long

Implement AArch64 variant of shouldCoalesce() to detect a known failing case and prevent the coalescing of a 32-bit copy into a 64-bit sign-extending load. Do not coalesce in the following case: COPY where source is bottom 32 bits of a 64-register, and destination is a 32-bit subregister of a 64-bit register, ie it causes the rest of the register to be implicitly set to zero. A mir test has been added. In the test case, the 32-bit copy implements a 32 to 64 bit zero extension and relies on the upper 32 bits being zeroed. Coalescing to the result of the 64-bit load meant overwriting the upper 32 bits incorrectly when the loaded byte was negative. Reviewed By: john.brawn Differential Revision: https://reviews.llvm.org/D85956
2020-09-08 08:04:52 +01:00 · 2020-09-08 08:04:52 +01:00 · 8ee1419ab6
parent ea795304ec
commit 8ee1419ab6
3 changed files with 55 additions and 0 deletions
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@ -734,3 +734,19 @@ unsigned AArch64RegisterInfo::getLocalAddressRegister(
    return getBaseRegister();
  return getFrameRegister(MF);
 }
+
+/// SrcRC and DstRC will be morphed into NewRC if this returns true
+bool AArch64RegisterInfo::shouldCoalesce(
+    MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg,
+    const TargetRegisterClass *DstRC, unsigned DstSubReg,
+    const TargetRegisterClass *NewRC, LiveIntervals &LIS) const {
+  if (MI->isCopy() &&
+      ((DstRC->getID() == AArch64::GPR64RegClassID) ||
+       (DstRC->getID() == AArch64::GPR64commonRegClassID)) &&
+      MI->getOperand(0).getSubReg() && MI->getOperand(1).getSubReg())
+    // Do not coalesce in the case of a 32-bit subregister copy
+    // which implements a 32 to 64 bit zero extension
+    // which relies on the upper 32 bits being zeroed.
+    return false;
+  return true;
+}
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@ -129,6 +129,12 @@ public:

  unsigned getLocalAddressRegister(const MachineFunction &MF) const;
  bool regNeedsCFI(unsigned Reg, unsigned &RegToUseForCFI) const;
+
+  /// SrcRC and DstRC will be morphed into NewRC if this returns true
+  bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC,
+                      unsigned SubReg, const TargetRegisterClass *DstRC,
+                      unsigned DstSubReg, const TargetRegisterClass *NewRC,
+                      LiveIntervals &LIS) const override;
 };

 } // end namespace llvm
--- a/llvm/test/CodeGen/AArch64/zext-reg-coalesce.mir
+++ b/llvm/test/CodeGen/AArch64/zext-reg-coalesce.mir
@ -0,0 +1,33 @@
+# RUN: llc -mtriple=aarch64-arm-none-eabi -o - %s \
+# RUN: -run-pass simple-register-coalescing | FileCheck %s
+
+# In this test case, the 32-bit copy implements a 32 to 64 bit zero extension
+# and relies on the upper 32 bits being zeroed.
+# Coalescing to the result of the 64-bit load meant overwriting
+# the upper 32 bits incorrectly when the loaded byte was negative.
+
+--- |
+  @c = local_unnamed_addr global i8 -1, align 4
+
+  define i64 @bug_e(i32 %i32) local_unnamed_addr {
+  ret i64 0
+  }
+...
+---
+name:            bug_e
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0
+
+    %1:gpr32 = COPY $w0
+    %2:gpr64common = ADRP target-flags(aarch64-page) @c
+    %3:gpr64 = LDRSBXui %2, target-flags(aarch64-pageoff, aarch64-nc) @c :: (dereferenceable load 1 from @c, align 4)
+    %0:gpr32 = COPY %3.sub_32
+  ; CHECK: {{.*}}.sub_32:gpr64 = COPY {{.*}}.sub_32
+    STRBBui %1, %2, target-flags(aarch64-pageoff, aarch64-nc) @c :: (store 1 into @c, align 4)
+    %8:gpr64all = SUBREG_TO_REG 0, %0, %subreg.sub_32
+    $x0 = COPY %8
+  ; CHECK: $x0 = COPY
+    RET_ReallyLR implicit $x0
+...