forked from OSchip/llvm-project
[PowerPC] Re-enable combine for i64 BSWAP on targets without LDBRX
The combine was disabled in 4e22c7265d
as it caused failures in
the ppc64be-multistage (bootstrap) bot.
It turns out that the combine did not correctly update the MMO for
the high load which caused aliased stores to be reported as unaliased.
This patch fixes that problem and re-enables the combine.
This commit is contained in:
parent
f2d5fce86e
commit
3553698de7
|
@ -15251,9 +15251,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
|
||||
|
||||
// Can't split volatile or atomic loads.
|
||||
// FIXME: Disabling this to unblock the big endian bot until I can get it
|
||||
// fixed.
|
||||
if (!LD->isSimple() || !Subtarget.hasLDBRX())
|
||||
if (!LD->isSimple())
|
||||
return SDValue();
|
||||
SDValue BasePtr = LD->getBasePtr();
|
||||
SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
|
||||
|
@ -15261,8 +15259,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
|
|||
Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
|
||||
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
|
||||
DAG.getIntPtrConstant(4, dl));
|
||||
SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
|
||||
LD->getPointerInfo(), LD->getAlignment());
|
||||
MachineMemOperand *NewMMO = DAG.getMachineFunction().getMachineMemOperand(
|
||||
LD->getMemOperand(), 4, 4);
|
||||
SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
|
||||
Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
|
||||
SDValue Res;
|
||||
if (Subtarget.isLittleEndian())
|
||||
|
|
|
@ -151,19 +151,12 @@ define i64 @LDBRX(i8* %ptr, i64 %off) {
|
|||
;
|
||||
; X64-LABEL: LDBRX:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: ldx r4, r3, r4
|
||||
; X64-NEXT: rotldi r5, r4, 16
|
||||
; X64-NEXT: rotldi r3, r4, 8
|
||||
; X64-NEXT: rldimi r3, r5, 8, 48
|
||||
; X64-NEXT: rotldi r5, r4, 24
|
||||
; X64-NEXT: rldimi r3, r5, 16, 40
|
||||
; X64-NEXT: rotldi r5, r4, 32
|
||||
; X64-NEXT: rldimi r3, r5, 24, 32
|
||||
; X64-NEXT: rotldi r5, r4, 48
|
||||
; X64-NEXT: rldimi r3, r5, 40, 16
|
||||
; X64-NEXT: rotldi r5, r4, 56
|
||||
; X64-NEXT: rldimi r3, r5, 48, 8
|
||||
; X64-NEXT: rldimi r3, r4, 56, 0
|
||||
; X64-NEXT: li r6, 4
|
||||
; X64-NEXT: lwbrx r5, r3, r4
|
||||
; X64-NEXT: add r3, r3, r4
|
||||
; X64-NEXT: lwbrx r3, r3, r6
|
||||
; X64-NEXT: rldimi r5, r3, 32, 0
|
||||
; X64-NEXT: mr r3, r5
|
||||
; X64-NEXT: blr
|
||||
;
|
||||
; PWR7_64-LABEL: LDBRX:
|
||||
|
|
|
@ -4,20 +4,11 @@
|
|||
define void @bs(i64* %p) {
|
||||
; CHECK-LABEL: bs:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld 4, 0(3)
|
||||
; CHECK-NEXT: rotldi 5, 4, 16
|
||||
; CHECK-NEXT: rotldi 6, 4, 8
|
||||
; CHECK-NEXT: rldimi 6, 5, 8, 48
|
||||
; CHECK-NEXT: rotldi 5, 4, 24
|
||||
; CHECK-NEXT: rldimi 6, 5, 16, 40
|
||||
; CHECK-NEXT: rotldi 5, 4, 32
|
||||
; CHECK-NEXT: rldimi 6, 5, 24, 32
|
||||
; CHECK-NEXT: rotldi 5, 4, 48
|
||||
; CHECK-NEXT: rldimi 6, 5, 40, 16
|
||||
; CHECK-NEXT: rotldi 5, 4, 56
|
||||
; CHECK-NEXT: rldimi 6, 5, 48, 8
|
||||
; CHECK-NEXT: rldimi 6, 4, 56, 0
|
||||
; CHECK-NEXT: std 6, 0(3)
|
||||
; CHECK-NEXT: li 4, 4
|
||||
; CHECK-NEXT: lwbrx 5, 0, 3
|
||||
; CHECK-NEXT: lwbrx 4, 3, 4
|
||||
; CHECK-NEXT: rldimi 5, 4, 32, 0
|
||||
; CHECK-NEXT: std 5, 0(3)
|
||||
; CHECK-NEXT: blr
|
||||
%x = load i64, i64* %p, align 8
|
||||
%b = call i64 @llvm.bswap.i64(i64 %x)
|
||||
|
@ -50,19 +41,10 @@ define i64 @volatile_ld(i64* %p) {
|
|||
define i64 @misaligned_ld(i64* %p) {
|
||||
; CHECK-LABEL: misaligned_ld:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: ld 4, 0(3)
|
||||
; CHECK-NEXT: rotldi 5, 4, 16
|
||||
; CHECK-NEXT: rotldi 3, 4, 8
|
||||
; CHECK-NEXT: rldimi 3, 5, 8, 48
|
||||
; CHECK-NEXT: rotldi 5, 4, 24
|
||||
; CHECK-NEXT: rldimi 3, 5, 16, 40
|
||||
; CHECK-NEXT: rotldi 5, 4, 32
|
||||
; CHECK-NEXT: rldimi 3, 5, 24, 32
|
||||
; CHECK-NEXT: rotldi 5, 4, 48
|
||||
; CHECK-NEXT: rldimi 3, 5, 40, 16
|
||||
; CHECK-NEXT: rotldi 5, 4, 56
|
||||
; CHECK-NEXT: rldimi 3, 5, 48, 8
|
||||
; CHECK-NEXT: rldimi 3, 4, 56, 0
|
||||
; CHECK-NEXT: li 4, 4
|
||||
; CHECK-NEXT: lwbrx 4, 3, 4
|
||||
; CHECK-NEXT: lwbrx 3, 0, 3
|
||||
; CHECK-NEXT: rldimi 3, 4, 32, 0
|
||||
; CHECK-NEXT: blr
|
||||
%x = load i64, i64* %p, align 1
|
||||
%b = call i64 @llvm.bswap.i64(i64 %x)
|
||||
|
|
Loading…
Reference in New Issue