forked from OSchip/llvm-project
Add SARX/SHRX/SHLX code generation support
llvm-svn: 164675
This commit is contained in:
parent
2de86af22d
commit
2b425e1e24
|
@ -565,6 +565,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
|||
// BMI/BMI2 foldable instructions
|
||||
{ X86::RORX32ri, X86::RORX32mi, 0 },
|
||||
{ X86::RORX64ri, X86::RORX64mi, 0 },
|
||||
{ X86::SARX32rr, X86::SARX32rm, 0 },
|
||||
{ X86::SARX64rr, X86::SARX64rm, 0 },
|
||||
{ X86::SHRX32rr, X86::SHRX32rm, 0 },
|
||||
{ X86::SHRX64rr, X86::SHRX64rm, 0 },
|
||||
{ X86::SHLX32rr, X86::SHLX32rm, 0 },
|
||||
{ X86::SHLX64rr, X86::SHLX64rm, 0 },
|
||||
};
|
||||
|
||||
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
|
||||
|
|
|
@ -896,4 +896,59 @@ let Predicates = [HasBMI2] in {
|
|||
(RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
|
||||
def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
|
||||
(RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
|
||||
|
||||
// Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not
|
||||
// immedidate shift, i.e. the following code is considered better
|
||||
//
|
||||
// mov %edi, %esi
|
||||
// shl $imm, %esi
|
||||
// ... %edi, ...
|
||||
//
|
||||
// than
|
||||
//
|
||||
// movb $imm, %sil
|
||||
// shlx %sil, %edi, %esi
|
||||
// ... %edi, ...
|
||||
//
|
||||
let AddedComplexity = 1 in {
|
||||
def : Pat<(sra GR32:$src1, GR8:$src2),
|
||||
(SARX32rr GR32:$src1,
|
||||
(INSERT_SUBREG
|
||||
(i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
|
||||
def : Pat<(sra GR64:$src1, GR8:$src2),
|
||||
(SARX64rr GR64:$src1,
|
||||
(INSERT_SUBREG
|
||||
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
|
||||
|
||||
def : Pat<(srl GR32:$src1, GR8:$src2),
|
||||
(SHRX32rr GR32:$src1,
|
||||
(INSERT_SUBREG
|
||||
(i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
|
||||
def : Pat<(srl GR64:$src1, GR8:$src2),
|
||||
(SHRX64rr GR64:$src1,
|
||||
(INSERT_SUBREG
|
||||
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
|
||||
|
||||
def : Pat<(shl GR32:$src1, GR8:$src2),
|
||||
(SHLX32rr GR32:$src1,
|
||||
(INSERT_SUBREG
|
||||
(i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
|
||||
def : Pat<(shl GR64:$src1, GR8:$src2),
|
||||
(SHLX64rr GR64:$src1,
|
||||
(INSERT_SUBREG
|
||||
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
|
||||
}
|
||||
|
||||
// Patterns on SARXrm/SHRXrm/SHLXrm are explicitly omitted to favor
|
||||
//
|
||||
// mov (%ecx), %esi
|
||||
// shl $imm, $esi
|
||||
//
|
||||
// over
|
||||
//
|
||||
// movb $imm %al
|
||||
// shlx %al, (%ecx), %esi
|
||||
//
|
||||
// As SARXrr/SHRXrr/SHLXrr is favored on variable shift, the peephole
|
||||
// optimization will fold them into SARXrm/SHRXrm/SHLXrm if possible.
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=corei7 | FileCheck %s
|
||||
; rdar://5571034
|
||||
|
||||
; This requires physreg joining, %vreg13 is live everywhere:
|
||||
|
|
|
@ -0,0 +1,178 @@
|
|||
; RUN: llc -mtriple=i386-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI2 %s
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI264 %s
|
||||
|
||||
define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone {
|
||||
entry:
|
||||
%shl = shl i32 %x, %shamt
|
||||
; BMI2: shl32
|
||||
; BMI2: shlxl
|
||||
; BMI2: ret
|
||||
; BMI264: shl32
|
||||
; BMI264: shlxl
|
||||
; BMI264: ret
|
||||
ret i32 %shl
|
||||
}
|
||||
|
||||
define i32 @shl32i(i32 %x) nounwind uwtable readnone {
|
||||
entry:
|
||||
%shl = shl i32 %x, 5
|
||||
; BMI2: shl32i
|
||||
; BMI2-NOT: shlxl
|
||||
; BMI2: ret
|
||||
; BMI264: shl32i
|
||||
; BMI264-NOT: shlxl
|
||||
; BMI264: ret
|
||||
ret i32 %shl
|
||||
}
|
||||
|
||||
define i32 @shl32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
|
||||
entry:
|
||||
%x = load i32* %p
|
||||
%shl = shl i32 %x, %shamt
|
||||
; BMI2: shl32p
|
||||
; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}}
|
||||
; BMI2: ret
|
||||
; BMI264: shl32p
|
||||
; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}}
|
||||
; BMI264: ret
|
||||
ret i32 %shl
|
||||
}
|
||||
|
||||
define i32 @shl32pi(i32* %p) nounwind uwtable readnone {
|
||||
entry:
|
||||
%x = load i32* %p
|
||||
%shl = shl i32 %x, 5
|
||||
; BMI2: shl32pi
|
||||
; BMI2-NOT: shlxl
|
||||
; BMI2: ret
|
||||
; BMI264: shl32pi
|
||||
; BMI264-NOT: shlxl
|
||||
; BMI264: ret
|
||||
ret i32 %shl
|
||||
}
|
||||
|
||||
define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone {
|
||||
entry:
|
||||
%shl = shl i64 %x, %shamt
|
||||
; BMI264: shl64
|
||||
; BMI264: shlxq
|
||||
; BMI264: ret
|
||||
ret i64 %shl
|
||||
}
|
||||
|
||||
define i64 @shl64i(i64 %x) nounwind uwtable readnone {
|
||||
entry:
|
||||
%shl = shl i64 %x, 7
|
||||
; BMI264: shl64i
|
||||
; BMI264-NOT: shlxq
|
||||
; BMI264: ret
|
||||
ret i64 %shl
|
||||
}
|
||||
|
||||
define i64 @shl64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
|
||||
entry:
|
||||
%x = load i64* %p
|
||||
%shl = shl i64 %x, %shamt
|
||||
; BMI264: shl64p
|
||||
; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}}
|
||||
; BMI264: ret
|
||||
ret i64 %shl
|
||||
}
|
||||
|
||||
define i64 @shl64pi(i64* %p) nounwind uwtable readnone {
|
||||
entry:
|
||||
%x = load i64* %p
|
||||
%shl = shl i64 %x, 7
|
||||
; BMI264: shl64p
|
||||
; BMI264-NOT: shlxq
|
||||
; BMI264: ret
|
||||
ret i64 %shl
|
||||
}
|
||||
|
||||
define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
|
||||
entry:
|
||||
%shl = lshr i32 %x, %shamt
|
||||
; BMI2: lshr32
|
||||
; BMI2: shrxl
|
||||
; BMI2: ret
|
||||
; BMI264: lshr32
|
||||
; BMI264: shrxl
|
||||
; BMI264: ret
|
||||
ret i32 %shl
|
||||
}
|
||||
|
||||
define i32 @lshr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
|
||||
entry:
|
||||
%x = load i32* %p
|
||||
%shl = lshr i32 %x, %shamt
|
||||
; BMI2: lshr32p
|
||||
; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}}
|
||||
; BMI2: ret
|
||||
; BMI264: lshr32
|
||||
; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}}
|
||||
; BMI264: ret
|
||||
ret i32 %shl
|
||||
}
|
||||
|
||||
define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
|
||||
entry:
|
||||
%shl = lshr i64 %x, %shamt
|
||||
; BMI264: lshr64
|
||||
; BMI264: shrxq
|
||||
; BMI264: ret
|
||||
ret i64 %shl
|
||||
}
|
||||
|
||||
define i64 @lshr64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
|
||||
entry:
|
||||
%x = load i64* %p
|
||||
%shl = lshr i64 %x, %shamt
|
||||
; BMI264: lshr64p
|
||||
; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}}
|
||||
; BMI264: ret
|
||||
ret i64 %shl
|
||||
}
|
||||
|
||||
define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
|
||||
entry:
|
||||
%shl = ashr i32 %x, %shamt
|
||||
; BMI2: ashr32
|
||||
; BMI2: sarxl
|
||||
; BMI2: ret
|
||||
; BMI264: ashr32
|
||||
; BMI264: sarxl
|
||||
; BMI264: ret
|
||||
ret i32 %shl
|
||||
}
|
||||
|
||||
define i32 @ashr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
|
||||
entry:
|
||||
%x = load i32* %p
|
||||
%shl = ashr i32 %x, %shamt
|
||||
; BMI2: ashr32p
|
||||
; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}}
|
||||
; BMI2: ret
|
||||
; BMI264: ashr32
|
||||
; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}}
|
||||
; BMI264: ret
|
||||
ret i32 %shl
|
||||
}
|
||||
|
||||
define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
|
||||
entry:
|
||||
%shl = ashr i64 %x, %shamt
|
||||
; BMI264: ashr64
|
||||
; BMI264: sarxq
|
||||
; BMI264: ret
|
||||
ret i64 %shl
|
||||
}
|
||||
|
||||
define i64 @ashr64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
|
||||
entry:
|
||||
%x = load i64* %p
|
||||
%shl = ashr i64 %x, %shamt
|
||||
; BMI264: ashr64p
|
||||
; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}}
|
||||
; BMI264: ret
|
||||
ret i64 %shl
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -mtriple=i386-apple-darwin9 -fast-isel=false -O0 < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=i386-apple-darwin9 -mcpu=corei7 -fast-isel=false -O0 < %s | FileCheck %s
|
||||
|
||||
; Gather non-machine specific tests for the transformations in
|
||||
; CodeGen/SelectionDAG/TargetLowering. Currently, these
|
||||
|
|
Loading…
Reference in New Issue