[ExpandMemCmp] Correctly set alignment of generated loads

Summary:
This is a part of the series of efforts for correcting alignment of memory operations.
(Another related bugs: https://bugs.llvm.org/show_bug.cgi?id=44388 , https://bugs.llvm.org/show_bug.cgi?id=44543 )

This fixes https://bugs.llvm.org/show_bug.cgi?id=43880 by giving default alignment of loads to 1.

The test CodeGen/AArch64/bcmp-inline-small.ll should have been changed; it was introduced by https://reviews.llvm.org/D64805 . I talked with @evandro, and confirmed that the test is okay to be changed.
Other two tests from PowerPC needed changes as well, but fixes were straightforward.

Reviewers: courbet

Reviewed By: courbet

Subscribers: nlopes, gchatelet, wuzish, nemanjai, kristof.beyls, hiraditya, steven.zhang, danielkiss, llvm-commits, evandro

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D76113
This commit is contained in:
Juneyoung Lee 2020-03-16 22:38:29 +09:00
parent 332edcc6bd
commit 7aecf2323c
5 changed files with 202 additions and 183 deletions

View File

@ -273,6 +273,8 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
// Get the memory source at offset `OffsetBytes`.
Value *LhsSource = CI->getArgOperand(0);
Value *RhsSource = CI->getArgOperand(1);
Align LhsAlign = LhsSource->getPointerAlignment(DL).valueOrOne();
Align RhsAlign = RhsSource->getPointerAlignment(DL).valueOrOne();
if (OffsetBytes > 0) {
auto *ByteType = Type::getInt8Ty(CI->getContext());
LhsSource = Builder.CreateConstGEP1_64(
@ -281,6 +283,8 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
RhsSource = Builder.CreateConstGEP1_64(
ByteType, Builder.CreateBitCast(RhsSource, ByteType->getPointerTo()),
OffsetBytes);
LhsAlign = commonAlignment(LhsAlign, OffsetBytes);
RhsAlign = commonAlignment(RhsAlign, OffsetBytes);
}
LhsSource = Builder.CreateBitCast(LhsSource, LoadSizeType->getPointerTo());
RhsSource = Builder.CreateBitCast(RhsSource, LoadSizeType->getPointerTo());
@ -290,13 +294,13 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
if (auto *C = dyn_cast<Constant>(LhsSource))
Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
if (!Lhs)
Lhs = Builder.CreateLoad(LoadSizeType, LhsSource);
Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign);
Value *Rhs = nullptr;
if (auto *C = dyn_cast<Constant>(RhsSource))
Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
if (!Rhs)
Rhs = Builder.CreateLoad(LoadSizeType, RhsSource);
Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
// Swap bytes if required.
if (NeedsBSwap) {

View File

@ -4,13 +4,28 @@
declare i32 @bcmp(i8*, i8*, i64) nounwind readonly
declare i32 @memcmp(i8*, i8*, i64) nounwind readonly
define i1 @bcmp_b2(i8* %s1, i8* %s2) {
define i1 @test_b2(i8* %s1, i8* %s2) {
entry:
%bcmp = call i32 @bcmp(i8* %s1, i8* %s2, i64 15)
%ret = icmp eq i32 %bcmp, 0
ret i1 %ret
; CHECK-LABEL: bcmp_b2:
; CHECK-LABEL: test_b2:
; CHECK-NOT: bl bcmp
; CHECKN: ldr x
; CHECKN-NEXT: ldr x
; CHECKN-NEXT: ldur x
; CHECKN-NEXT: ldur x
; CHECKS-COUNT-30: ldrb w
}
define i1 @test_b2_align8(i8* align 8 %s1, i8* align 8 %s2) {
entry:
%bcmp = call i32 @bcmp(i8* %s1, i8* %s2, i64 15)
%ret = icmp eq i32 %bcmp, 0
ret i1 %ret
; CHECK-LABEL: test_b2_align8:
; CHECK-NOT: bl bcmp
; CHECKN: ldr x
; CHECKN-NEXT: ldr x
@ -20,19 +35,19 @@ entry:
; CHECKS-NEXT: ldr x
; CHECKS-NEXT: ldr w
; CHECKS-NEXT: ldr w
; CHECKS-NEXT: ldrh w
; CHECKS-NEXT: ldrh w
; CHECKS-NEXT: ldrb w
; CHECKS-NEXT: ldrb w
; CHECKS-NEXT: ldrh w
; CHECKS-NEXT: ldrh w
; CHECKS-NEXT: ldrb w
; CHECKS-NEXT: ldrb w
}
define i1 @bcmp_bs(i8* %s1, i8* %s2) optsize {
define i1 @test_bs(i8* %s1, i8* %s2) optsize {
entry:
%memcmp = call i32 @memcmp(i8* %s1, i8* %s2, i64 31)
%ret = icmp eq i32 %memcmp, 0
ret i1 %ret
; CHECK-LABEL: bcmp_bs:
; CHECK-LABEL: test_bs:
; CHECKN-NOT: bl memcmp
; CHECKN: ldp x
; CHECKN-NEXT: ldp x

View File

@ -35,8 +35,8 @@ define signext i32 @zeroEqualityTest02(i8* %x, i8* %y) {
define signext i32 @zeroEqualityTest01(i8* %x, i8* %y) {
; CHECK-LABEL: zeroEqualityTest01:
; CHECK: # %bb.0:
; CHECK-NEXT: ld 5, 0(3)
; CHECK-NEXT: ld 6, 0(4)
; CHECK-NEXT: ldx 5, 0, 3
; CHECK-NEXT: ldx 6, 0, 4
; CHECK-NEXT: cmpld 5, 6
; CHECK-NEXT: bne 0, .LBB1_2
; CHECK-NEXT: # %bb.1: # %loadbb1
@ -125,7 +125,7 @@ define signext i32 @equalityFoldTwoConstants() {
define signext i32 @equalityFoldOneConstant(i8* %X) {
; CHECK-LABEL: equalityFoldOneConstant:
; CHECK: # %bb.0:
; CHECK-NEXT: ld 4, 0(3)
; CHECK-NEXT: ldx 4, 0, 3
; CHECK-NEXT: li 5, 1
; CHECK-NEXT: sldi 5, 5, 32
; CHECK-NEXT: cmpld 4, 5

View File

@ -8,8 +8,8 @@
define zeroext i1 @opeq1(
; PPC64LE-LABEL: opeq1:
; PPC64LE: # %bb.0: # %"entry+land.rhs.i"
; PPC64LE-NEXT: ld 3, 0(3)
; PPC64LE-NEXT: ld 4, 0(4)
; PPC64LE-NEXT: ldx 3, 0, 3
; PPC64LE-NEXT: ldx 4, 0, 4
; PPC64LE-NEXT: xor 3, 3, 4
; PPC64LE-NEXT: cntlzd 3, 3
; PPC64LE-NEXT: rldicl 3, 3, 58, 63

View File

@ -9,8 +9,8 @@ define i32 @cmp2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp2(
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1
; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1
; ALL-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
; ALL-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
; ALL-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
@ -26,8 +26,8 @@ define i32 @cmp2_align2(i8* nocapture readonly align 2 %x, i8* nocapture readonl
; ALL-LABEL: @cmp2_align2(
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 2
; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 2
; ALL-NEXT: [[TMP5:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP3]])
; ALL-NEXT: [[TMP6:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP4]])
; ALL-NEXT: [[TMP7:%.*]] = zext i16 [[TMP5]] to i32
@ -49,8 +49,8 @@ define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL: loadbb:
; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i16*
; ALL-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i16*
; ALL-NEXT: [[TMP5:%.*]] = load i16, i16* [[TMP3]]
; ALL-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP4]]
; ALL-NEXT: [[TMP5:%.*]] = load i16, i16* [[TMP3]], align 1
; ALL-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP4]], align 1
; ALL-NEXT: [[TMP7]] = call i16 @llvm.bswap.i16(i16 [[TMP5]])
; ALL-NEXT: [[TMP8]] = call i16 @llvm.bswap.i16(i16 [[TMP6]])
; ALL-NEXT: [[TMP9:%.*]] = icmp eq i16 [[TMP7]], [[TMP8]]
@ -58,8 +58,8 @@ define i32 @cmp3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL: loadbb1:
; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 2
; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 2
; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]]
; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]], align 1
; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]], align 1
; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
; ALL-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
; ALL-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
@ -76,8 +76,8 @@ define i32 @cmp4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp4(
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; ALL-NEXT: [[TMP5:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP3]])
; ALL-NEXT: [[TMP6:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP4]])
; ALL-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP5]], [[TMP6]]
@ -101,8 +101,8 @@ define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL: loadbb:
; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32*
; ALL-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]]
; ALL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]]
; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 1
; ALL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 1
; ALL-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
@ -110,8 +110,8 @@ define i32 @cmp5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL: loadbb1:
; ALL-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 4
; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]]
; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
; ALL-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]], align 1
; ALL-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]], align 1
; ALL-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
; ALL-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
; ALL-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
@ -136,8 +136,8 @@ define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL: loadbb:
; ALL-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32*
; ALL-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]]
; ALL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]]
; ALL-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 1
; ALL-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 1
; ALL-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
; ALL-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
; ALL-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
@ -147,8 +147,8 @@ define i32 @cmp6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; ALL-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i16*
; ALL-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i16*
; ALL-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]]
; ALL-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]]
; ALL-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]], align 1
; ALL-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]], align 1
; ALL-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
; ALL-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
; ALL-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i32
@ -184,8 +184,8 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32: loadbb:
; X32-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]]
; X32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]]
; X32-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 1
; X32-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP4]], align 1
; X32-NEXT: [[TMP7]] = call i32 @llvm.bswap.i32(i32 [[TMP5]])
; X32-NEXT: [[TMP8]] = call i32 @llvm.bswap.i32(i32 [[TMP6]])
; X32-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], [[TMP8]]
@ -195,8 +195,8 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X32-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32*
; X32-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i32*
; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
; X32-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]]
; X32-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]], align 1
; X32-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 1
; X32-NEXT: [[TMP16]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
; X32-NEXT: [[TMP17]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
; X32-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP16]], [[TMP17]]
@ -208,8 +208,8 @@ define i32 @cmp8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-LABEL: @cmp8(
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64-NEXT: [[TMP5:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP3]])
; X64-NEXT: [[TMP6:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP4]])
; X64-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP5]], [[TMP6]]
@ -237,8 +237,8 @@ define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64: loadbb:
; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]]
; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]]
; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]], align 1
; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 1
; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
@ -246,8 +246,8 @@ define i32 @cmp9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64: loadbb1:
; X64-NEXT: [[TMP10:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]]
; X64-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]]
; X64-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP10]], align 1
; X64-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP11]], align 1
; X64-NEXT: [[TMP14:%.*]] = zext i8 [[TMP12]] to i32
; X64-NEXT: [[TMP15:%.*]] = zext i8 [[TMP13]] to i32
; X64-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP15]]
@ -276,8 +276,8 @@ define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64: loadbb:
; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]]
; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]]
; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]], align 1
; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 1
; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
@ -287,8 +287,8 @@ define i32 @cmp10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i16*
; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i16*
; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]]
; X64-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]]
; X64-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP12]], align 1
; X64-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP13]], align 1
; X64-NEXT: [[TMP16:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP14]])
; X64-NEXT: [[TMP17:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP15]])
; X64-NEXT: [[TMP18]] = zext i16 [[TMP16]] to i64
@ -328,8 +328,8 @@ define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64: loadbb:
; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]]
; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]]
; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]], align 1
; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 1
; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
@ -339,8 +339,8 @@ define i32 @cmp12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32*
; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i32*
; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]]
; X64-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]]
; X64-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP12]], align 1
; X64-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 1
; X64-NEXT: [[TMP16:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP14]])
; X64-NEXT: [[TMP17:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP15]])
; X64-NEXT: [[TMP18]] = zext i32 [[TMP16]] to i64
@ -398,8 +398,8 @@ define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64: loadbb:
; X64-NEXT: [[TMP3:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64-NEXT: [[TMP4:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]]
; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]]
; X64-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP3]], align 1
; X64-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP4]], align 1
; X64-NEXT: [[TMP7]] = call i64 @llvm.bswap.i64(i64 [[TMP5]])
; X64-NEXT: [[TMP8]] = call i64 @llvm.bswap.i64(i64 [[TMP6]])
; X64-NEXT: [[TMP9:%.*]] = icmp eq i64 [[TMP7]], [[TMP8]]
@ -409,8 +409,8 @@ define i32 @cmp16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-NEXT: [[TMP11:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i64*
; X64-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP11]] to i64*
; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP12]]
; X64-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP13]]
; X64-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP12]], align 1
; X64-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP13]], align 1
; X64-NEXT: [[TMP16]] = call i64 @llvm.bswap.i64(i64 [[TMP14]])
; X64-NEXT: [[TMP17]] = call i64 @llvm.bswap.i64(i64 [[TMP15]])
; X64-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP16]], [[TMP17]]
@ -427,8 +427,8 @@ define i32 @cmp_eq2(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp_eq2(
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
; ALL-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1
; ALL-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1
; ALL-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
@ -445,13 +445,13 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-LABEL: @cmp_eq3(
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
; X32-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1
; X32-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1
; X32-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2
; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16
; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16
; X32-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]]
@ -469,15 +469,15 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
; X64_1LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1
; X64_1LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i16 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2
; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@ -489,13 +489,13 @@ define i32 @cmp_eq3(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq3(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i16*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i16*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]]
; X64_2LD-NEXT: [[TMP3:%.*]] = load i16, i16* [[TMP1]], align 1
; X64_2LD-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i16 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 2
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 2
; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i16
; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i16
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i16 [[TMP10]], [[TMP11]]
@ -516,8 +516,8 @@ define i32 @cmp_eq4(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; ALL-LABEL: @cmp_eq4(
; ALL-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; ALL-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; ALL-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
; ALL-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; ALL-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; ALL-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
; ALL-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
@ -534,13 +534,13 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-LABEL: @cmp_eq5(
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X32-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
; X32-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
; X32-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32
; X32-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
@ -558,15 +558,15 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@ -578,13 +578,13 @@ define i32 @cmp_eq5(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq5(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i32
; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i32
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
@ -605,15 +605,15 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-LABEL: @cmp_eq6(
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1
; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1
; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
@ -631,8 +631,8 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@ -640,8 +640,8 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1
; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@ -653,15 +653,15 @@ define i32 @cmp_eq6(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq6(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1
; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1
; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
@ -682,15 +682,15 @@ define i32 @cmp_eq6_align4(i8* nocapture readonly align 4 %x, i8* nocapture read
; X32-LABEL: @cmp_eq6_align4(
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
; X32-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 4
; X32-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 4
; X32-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
; X32-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
; X32-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
@ -708,8 +708,8 @@ define i32 @cmp_eq6_align4(i8* nocapture readonly align 4 %x, i8* nocapture read
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@ -717,8 +717,8 @@ define i32 @cmp_eq6_align4(i8* nocapture readonly align 4 %x, i8* nocapture read
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 4
; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 4
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@ -730,15 +730,15 @@ define i32 @cmp_eq6_align4(i8* nocapture readonly align 4 %x, i8* nocapture read
; X64_2LD-LABEL: @cmp_eq6_align4(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 4
; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 4
; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 4
; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i32
; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i32
; X64_2LD-NEXT: [[TMP14:%.*]] = xor i32 [[TMP12]], [[TMP13]]
@ -759,15 +759,15 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-LABEL: @cmp_eq7(
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3
; X32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
@ -783,8 +783,8 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X64_1LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
; X64_1LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@ -792,8 +792,8 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@ -805,15 +805,15 @@ define i32 @cmp_eq7(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq7(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X64_2LD-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
; X64_2LD-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
; X64_2LD-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
@ -832,15 +832,15 @@ define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X32-LABEL: @cmp_eq8(
; X32-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i32*
; X32-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i32*
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]]
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]]
; X32-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 1
; X32-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 1
; X32-NEXT: [[TMP5:%.*]] = xor i32 [[TMP3]], [[TMP4]]
; X32-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 4
; X32-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 4
; X32-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
; X32-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
; X32-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
; X32-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
; X32-NEXT: [[TMP12:%.*]] = xor i32 [[TMP10]], [[TMP11]]
; X32-NEXT: [[TMP13:%.*]] = or i32 [[TMP5]], [[TMP12]]
; X32-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
@ -852,8 +852,8 @@ define i32 @cmp_eq8(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-LABEL: @cmp_eq8(
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
; X64-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0
@ -880,15 +880,15 @@ define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
; X64_1LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_1LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
; X64_1LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
; X64_1LD-NEXT: [[TMP10:%.*]] = icmp ne i8 [[TMP8]], [[TMP9]]
; X64_1LD-NEXT: br i1 [[TMP10]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@ -900,13 +900,13 @@ define i32 @cmp_eq9(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq9(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]]
; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]]
; X64_2LD-NEXT: [[TMP8:%.*]] = load i8, i8* [[TMP6]], align 1
; X64_2LD-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP7]], align 1
; X64_2LD-NEXT: [[TMP10:%.*]] = zext i8 [[TMP8]] to i64
; X64_2LD-NEXT: [[TMP11:%.*]] = zext i8 [[TMP9]] to i64
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
@ -937,8 +937,8 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@ -946,8 +946,8 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
; X64_1LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1
; X64_1LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i16 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@ -959,15 +959,15 @@ define i32 @cmp_eq10(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq10(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i16*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i16*
; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]]
; X64_2LD-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP8]], align 1
; X64_2LD-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP9]], align 1
; X64_2LD-NEXT: [[TMP12:%.*]] = zext i16 [[TMP10]] to i64
; X64_2LD-NEXT: [[TMP13:%.*]] = zext i16 [[TMP11]] to i64
; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]
@ -998,8 +998,8 @@ define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@ -1007,8 +1007,8 @@ define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@ -1020,15 +1020,15 @@ define i32 @cmp_eq11(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq11(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 3
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 3
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
@ -1057,8 +1057,8 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@ -1066,8 +1066,8 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
; X64_1LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
; X64_1LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@ -1079,15 +1079,15 @@ define i32 @cmp_eq12(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq12(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 8
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 8
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i32*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i32*
; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]]
; X64_2LD-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP8]], align 1
; X64_2LD-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 1
; X64_2LD-NEXT: [[TMP12:%.*]] = zext i32 [[TMP10]] to i64
; X64_2LD-NEXT: [[TMP13:%.*]] = zext i32 [[TMP11]] to i64
; X64_2LD-NEXT: [[TMP14:%.*]] = xor i64 [[TMP12]], [[TMP13]]
@ -1118,8 +1118,8 @@ define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@ -1127,8 +1127,8 @@ define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 5
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@ -1140,15 +1140,15 @@ define i32 @cmp_eq13(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq13(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 5
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 5
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
@ -1177,8 +1177,8 @@ define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@ -1186,8 +1186,8 @@ define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 6
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@ -1199,15 +1199,15 @@ define i32 @cmp_eq14(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq14(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 6
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 6
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
@ -1236,8 +1236,8 @@ define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD: loadbb:
; X64_1LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_1LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_1LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
; X64_1LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_1LD-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]]
; X64_1LD-NEXT: br i1 [[TMP5]], label [[RES_BLOCK:%.*]], label [[LOADBB1:%.*]]
; X64_1LD: loadbb1:
@ -1245,8 +1245,8 @@ define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_1LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 7
; X64_1LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_1LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
; X64_1LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
; X64_1LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
; X64_1LD-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]]
; X64_1LD-NEXT: br i1 [[TMP12]], label [[RES_BLOCK]], label [[ENDBLOCK]]
; X64_1LD: endblock:
@ -1258,15 +1258,15 @@ define i32 @cmp_eq15(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64_2LD-LABEL: @cmp_eq15(
; X64_2LD-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i64*
; X64_2LD-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i64*
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]]
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]]
; X64_2LD-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP1]], align 1
; X64_2LD-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP2]], align 1
; X64_2LD-NEXT: [[TMP5:%.*]] = xor i64 [[TMP3]], [[TMP4]]
; X64_2LD-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[X]], i64 7
; X64_2LD-NEXT: [[TMP7:%.*]] = getelementptr i8, i8* [[Y]], i64 7
; X64_2LD-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP6]] to i64*
; X64_2LD-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to i64*
; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]]
; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]]
; X64_2LD-NEXT: [[TMP10:%.*]] = load i64, i64* [[TMP8]], align 1
; X64_2LD-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP9]], align 1
; X64_2LD-NEXT: [[TMP12:%.*]] = xor i64 [[TMP10]], [[TMP11]]
; X64_2LD-NEXT: [[TMP13:%.*]] = or i64 [[TMP5]], [[TMP12]]
; X64_2LD-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
@ -1291,8 +1291,8 @@ define i32 @cmp_eq16(i8* nocapture readonly %x, i8* nocapture readonly %y) {
; X64-LABEL: @cmp_eq16(
; X64-NEXT: [[TMP1:%.*]] = bitcast i8* [[X:%.*]] to i128*
; X64-NEXT: [[TMP2:%.*]] = bitcast i8* [[Y:%.*]] to i128*
; X64-NEXT: [[TMP3:%.*]] = load i128, i128* [[TMP1]]
; X64-NEXT: [[TMP4:%.*]] = load i128, i128* [[TMP2]]
; X64-NEXT: [[TMP3:%.*]] = load i128, i128* [[TMP1]], align 1
; X64-NEXT: [[TMP4:%.*]] = load i128, i128* [[TMP2]], align 1
; X64-NEXT: [[TMP5:%.*]] = icmp ne i128 [[TMP3]], [[TMP4]]
; X64-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
; X64-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP6]], 0