forked from OSchip/llvm-project
[X86] Add patterns for using movss/movsd for atomic load/store of f32/64. Remove atomic fadd pseudos use isel patterns instead.
This patch adds patterns for turning bitcasted atomic load/store into movss/sd. It also removes the pseudo instructions for atomic RMW fadd. Instead just adding isel patterns for folding an atomic load into addss/sd. And relying on the new movss/sd store pattern to handle the write part. This also makes the fadd patterns use VEX and EVEX instructions when AVX or AVX512F are enabled. Differential Revision: https://reviews.llvm.org/D60394 llvm-svn: 358215
This commit is contained in:
parent
f7e548c076
commit
586fad50ac
|
@ -29090,53 +29090,6 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
|
|||
return SinkMBB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
X86TargetLowering::EmitLoweredAtomicFP(MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const {
|
||||
// Combine the following atomic floating-point modification pattern:
|
||||
// a.store(reg OP a.load(acquire), release)
|
||||
// Transform them into:
|
||||
// OPss (%gpr), %xmm
|
||||
// movss %xmm, (%gpr)
|
||||
// Or sd equivalent for 64-bit operations.
|
||||
unsigned MOp, FOp;
|
||||
switch (MI.getOpcode()) {
|
||||
default: llvm_unreachable("unexpected instr type for EmitLoweredAtomicFP");
|
||||
case X86::RELEASE_FADD32mr:
|
||||
FOp = X86::ADDSSrm;
|
||||
MOp = X86::MOVSSmr;
|
||||
break;
|
||||
case X86::RELEASE_FADD64mr:
|
||||
FOp = X86::ADDSDrm;
|
||||
MOp = X86::MOVSDmr;
|
||||
break;
|
||||
}
|
||||
const X86InstrInfo *TII = Subtarget.getInstrInfo();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
|
||||
unsigned ValOpIdx = X86::AddrNumOperands;
|
||||
unsigned VSrc = MI.getOperand(ValOpIdx).getReg();
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(*BB, MI, DL, TII->get(FOp),
|
||||
MRI.createVirtualRegister(MRI.getRegClass(VSrc)))
|
||||
.addReg(VSrc);
|
||||
for (int i = 0; i < X86::AddrNumOperands; ++i) {
|
||||
MachineOperand &Operand = MI.getOperand(i);
|
||||
// Clear any kill flags on register operands as we'll create a second
|
||||
// instruction using the same address operands.
|
||||
if (Operand.isReg())
|
||||
Operand.setIsKill(false);
|
||||
MIB.add(Operand);
|
||||
}
|
||||
MachineInstr *FOpMI = MIB;
|
||||
MIB = BuildMI(*BB, MI, DL, TII->get(MOp));
|
||||
for (int i = 0; i < X86::AddrNumOperands; ++i)
|
||||
MIB.add(MI.getOperand(i));
|
||||
MIB.addReg(FOpMI->getOperand(0).getReg(), RegState::Kill);
|
||||
MI.eraseFromParent(); // The pseudo instruction is gone now.
|
||||
return BB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *
|
||||
X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
|
||||
MachineBasicBlock *BB) const {
|
||||
|
@ -30372,10 +30325,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
|||
return BB;
|
||||
}
|
||||
|
||||
case X86::RELEASE_FADD32mr:
|
||||
case X86::RELEASE_FADD64mr:
|
||||
return EmitLoweredAtomicFP(MI, BB);
|
||||
|
||||
case X86::FP32_TO_INT16_IN_MEM:
|
||||
case X86::FP32_TO_INT32_IN_MEM:
|
||||
case X86::FP32_TO_INT64_IN_MEM:
|
||||
|
|
|
@ -996,28 +996,31 @@ defm : RELEASE_BINOP_MI<"OR", or>;
|
|||
defm : RELEASE_BINOP_MI<"XOR", xor>;
|
||||
defm : RELEASE_BINOP_MI<"SUB", sub>;
|
||||
|
||||
// Same as above, but for floating-point.
|
||||
// FIXME: imm version.
|
||||
// FIXME: Version that doesn't clobber $src, using AVX's VADDSS.
|
||||
// Atomic load + floating point patterns.
|
||||
// FIXME: This could also handle SIMD operations with *ps and *pd instructions.
|
||||
let usesCustomInserter = 1, SchedRW = [WriteMicrocoded] in {
|
||||
multiclass RELEASE_FP_BINOP_MI<SDNode op> {
|
||||
def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, FR32:$src),
|
||||
"#BINOP "#NAME#"32mr PSEUDO!",
|
||||
[(atomic_store_32 addr:$dst,
|
||||
(i32 (bitconvert (op
|
||||
(f32 (bitconvert (i32 (atomic_load_32 addr:$dst)))),
|
||||
FR32:$src))))]>, Requires<[HasSSE1]>;
|
||||
def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, FR64:$src),
|
||||
"#BINOP "#NAME#"64mr PSEUDO!",
|
||||
[(atomic_store_64 addr:$dst,
|
||||
(i64 (bitconvert (op
|
||||
(f64 (bitconvert (i64 (atomic_load_64 addr:$dst)))),
|
||||
FR64:$src))))]>, Requires<[HasSSE2]>;
|
||||
multiclass ATOMIC_LOAD_FP_BINOP_MI<string Name, SDNode op> {
|
||||
def : Pat<(op FR32:$src1, (bitconvert (i32 (atomic_load_32 addr:$src2)))),
|
||||
(!cast<Instruction>(Name#"SSrm") FR32:$src1, addr:$src2)>,
|
||||
Requires<[UseSSE1]>;
|
||||
def : Pat<(op FR32:$src1, (bitconvert (i32 (atomic_load_32 addr:$src2)))),
|
||||
(!cast<Instruction>("V"#Name#"SSrm") FR32:$src1, addr:$src2)>,
|
||||
Requires<[UseAVX]>;
|
||||
def : Pat<(op FR32X:$src1, (bitconvert (i32 (atomic_load_32 addr:$src2)))),
|
||||
(!cast<Instruction>("V"#Name#"SSZrm") FR32X:$src1, addr:$src2)>,
|
||||
Requires<[HasAVX512]>;
|
||||
|
||||
def : Pat<(op FR64:$src1, (bitconvert (i64 (atomic_load_64 addr:$src2)))),
|
||||
(!cast<Instruction>(Name#"SDrm") FR64:$src1, addr:$src2)>,
|
||||
Requires<[UseSSE1]>;
|
||||
def : Pat<(op FR64:$src1, (bitconvert (i64 (atomic_load_64 addr:$src2)))),
|
||||
(!cast<Instruction>("V"#Name#"SDrm") FR64:$src1, addr:$src2)>,
|
||||
Requires<[UseAVX]>;
|
||||
def : Pat<(op FR64X:$src1, (bitconvert (i64 (atomic_load_64 addr:$src2)))),
|
||||
(!cast<Instruction>("V"#Name#"SDZrm") FR64X:$src1, addr:$src2)>,
|
||||
Requires<[HasAVX512]>;
|
||||
}
|
||||
defm RELEASE_FADD : RELEASE_FP_BINOP_MI<fadd>;
|
||||
defm : ATOMIC_LOAD_FP_BINOP_MI<"ADD", fadd>;
|
||||
// FIXME: Add fsub, fmul, fdiv, ...
|
||||
}
|
||||
|
||||
multiclass RELEASE_UNOP<string Name, dag dag8, dag dag16, dag dag32,
|
||||
dag dag64> {
|
||||
|
@ -1078,6 +1081,35 @@ def : Pat<(i16 (atomic_load_16 addr:$src)), (MOV16rm addr:$src)>;
|
|||
def : Pat<(i32 (atomic_load_32 addr:$src)), (MOV32rm addr:$src)>;
|
||||
def : Pat<(i64 (atomic_load_64 addr:$src)), (MOV64rm addr:$src)>;
|
||||
|
||||
// Floating point loads/stores.
|
||||
def : Pat<(atomic_store_32 addr:$dst, (i32 (bitconvert (f32 FR32:$src)))),
|
||||
(MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;
|
||||
def : Pat<(atomic_store_32 addr:$dst, (i32 (bitconvert (f32 FR32:$src)))),
|
||||
(VMOVSSmr addr:$dst, FR32:$src)>, Requires<[UseAVX]>;
|
||||
def : Pat<(atomic_store_32 addr:$dst, (i32 (bitconvert (f32 FR32:$src)))),
|
||||
(VMOVSSZmr addr:$dst, FR32:$src)>, Requires<[HasAVX512]>;
|
||||
|
||||
def : Pat<(atomic_store_64 addr:$dst, (i64 (bitconvert (f64 FR64:$src)))),
|
||||
(MOVSDmr addr:$dst, FR64:$src)>, Requires<[UseSSE2]>;
|
||||
def : Pat<(atomic_store_64 addr:$dst, (i64 (bitconvert (f64 FR64:$src)))),
|
||||
(VMOVSDmr addr:$dst, FR64:$src)>, Requires<[UseAVX]>;
|
||||
def : Pat<(atomic_store_64 addr:$dst, (i64 (bitconvert (f64 FR64:$src)))),
|
||||
(VMOVSDmr addr:$dst, FR64:$src)>, Requires<[HasAVX512]>;
|
||||
|
||||
def : Pat<(f32 (bitconvert (i32 (atomic_load_32 addr:$src)))),
|
||||
(MOVSSrm addr:$src)>, Requires<[UseSSE1]>;
|
||||
def : Pat<(f32 (bitconvert (i32 (atomic_load_32 addr:$src)))),
|
||||
(VMOVSSrm addr:$src)>, Requires<[UseAVX]>;
|
||||
def : Pat<(f32 (bitconvert (i32 (atomic_load_32 addr:$src)))),
|
||||
(VMOVSSZrm addr:$src)>, Requires<[HasAVX512]>;
|
||||
|
||||
def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))),
|
||||
(MOVSDrm addr:$src)>, Requires<[UseSSE2]>;
|
||||
def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))),
|
||||
(VMOVSDrm addr:$src)>, Requires<[UseAVX]>;
|
||||
def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))),
|
||||
(VMOVSDZrm addr:$src)>, Requires<[HasAVX512]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// DAG Pattern Matching Rules
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -41,25 +41,31 @@ define void @fadd_32r(float* %loc, float %val) nounwind {
|
|||
;
|
||||
; X86-SSE2-LABEL: fadd_32r:
|
||||
; X86-SSE2: # %bb.0:
|
||||
; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-SSE2-NEXT: addss (%eax), %xmm0
|
||||
; X86-SSE2-NEXT: movss %xmm0, (%eax)
|
||||
; X86-SSE2-NEXT: retl
|
||||
;
|
||||
; X86-AVX-LABEL: fadd_32r:
|
||||
; X86-AVX: # %bb.0:
|
||||
; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-AVX-NEXT: addss (%eax), %xmm0
|
||||
; X86-AVX-NEXT: movss %xmm0, (%eax)
|
||||
; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-AVX-NEXT: vaddss (%eax), %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vmovss %xmm0, (%eax)
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fadd_32r:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: addss (%rdi), %xmm0
|
||||
; X64-NEXT: movss %xmm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
; X64-SSE-LABEL: fadd_32r:
|
||||
; X64-SSE: # %bb.0:
|
||||
; X64-SSE-NEXT: addss (%rdi), %xmm0
|
||||
; X64-SSE-NEXT: movss %xmm0, (%rdi)
|
||||
; X64-SSE-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: fadd_32r:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vaddss (%rdi), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vmovss %xmm0, (%rdi)
|
||||
; X64-AVX-NEXT: retq
|
||||
%floc = bitcast float* %loc to i32*
|
||||
%1 = load atomic i32, i32* %floc seq_cst, align 4
|
||||
%2 = bitcast i32 %1 to float
|
||||
|
@ -194,11 +200,17 @@ define void @fadd_64r(double* %loc, double %val) nounwind {
|
|||
; X86-AVX-NEXT: popl %ebp
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fadd_64r:
|
||||
; X64: # %bb.0:
|
||||
; X64-NEXT: addsd (%rdi), %xmm0
|
||||
; X64-NEXT: movsd %xmm0, (%rdi)
|
||||
; X64-NEXT: retq
|
||||
; X64-SSE-LABEL: fadd_64r:
|
||||
; X64-SSE: # %bb.0:
|
||||
; X64-SSE-NEXT: addsd (%rdi), %xmm0
|
||||
; X64-SSE-NEXT: movsd %xmm0, (%rdi)
|
||||
; X64-SSE-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: fadd_64r:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vaddsd (%rdi), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vmovsd %xmm0, (%rdi)
|
||||
; X64-AVX-NEXT: retq
|
||||
%floc = bitcast double* %loc to i64*
|
||||
%1 = load atomic i64, i64* %floc seq_cst, align 8
|
||||
%2 = bitcast i64 %1 to double
|
||||
|
@ -249,8 +261,8 @@ define void @fadd_32g() nounwind {
|
|||
; X86-AVX-LABEL: fadd_32g:
|
||||
; X86-AVX: # %bb.0:
|
||||
; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-AVX-NEXT: addss glob32, %xmm0
|
||||
; X86-AVX-NEXT: movss %xmm0, glob32
|
||||
; X86-AVX-NEXT: vaddss glob32, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vmovss %xmm0, glob32
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
; X64-SSE-LABEL: fadd_32g:
|
||||
|
@ -263,8 +275,8 @@ define void @fadd_32g() nounwind {
|
|||
; X64-AVX-LABEL: fadd_32g:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: addss {{.*}}(%rip), %xmm0
|
||||
; X64-AVX-NEXT: movss %xmm0, {{.*}}(%rip)
|
||||
; X64-AVX-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vmovss %xmm0, {{.*}}(%rip)
|
||||
; X64-AVX-NEXT: retq
|
||||
%i = load atomic i32, i32* bitcast (float* @glob32 to i32*) monotonic, align 4
|
||||
%f = bitcast i32 %i to float
|
||||
|
@ -397,8 +409,8 @@ define void @fadd_64g() nounwind {
|
|||
; X64-AVX-LABEL: fadd_64g:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X64-AVX-NEXT: addsd {{.*}}(%rip), %xmm0
|
||||
; X64-AVX-NEXT: movsd %xmm0, {{.*}}(%rip)
|
||||
; X64-AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vmovsd %xmm0, {{.*}}(%rip)
|
||||
; X64-AVX-NEXT: retq
|
||||
%i = load atomic i64, i64* bitcast (double* @glob64 to i64*) monotonic, align 8
|
||||
%f = bitcast i64 %i to double
|
||||
|
@ -446,24 +458,24 @@ define void @fadd_32imm() nounwind {
|
|||
; X86-AVX-LABEL: fadd_32imm:
|
||||
; X86-AVX: # %bb.0:
|
||||
; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-AVX-NEXT: addss -559038737, %xmm0
|
||||
; X86-AVX-NEXT: movss %xmm0, -559038737
|
||||
; X86-AVX-NEXT: vaddss -559038737, %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vmovss %xmm0, -559038737
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
; X64-SSE-LABEL: fadd_32imm:
|
||||
; X64-SSE: # %bb.0:
|
||||
; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
|
||||
; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-SSE-NEXT: addss (%rax), %xmm0
|
||||
; X64-SSE-NEXT: movss %xmm0, (%rax)
|
||||
; X64-SSE-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: fadd_32imm:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
|
||||
; X64-AVX-NEXT: addss (%rax), %xmm0
|
||||
; X64-AVX-NEXT: movss %xmm0, (%rax)
|
||||
; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: vaddss (%rax), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vmovss %xmm0, (%rax)
|
||||
; X64-AVX-NEXT: retq
|
||||
%i = load atomic i32, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4
|
||||
%f = bitcast i32 %i to float
|
||||
|
@ -588,18 +600,18 @@ define void @fadd_64imm() nounwind {
|
|||
;
|
||||
; X64-SSE-LABEL: fadd_64imm:
|
||||
; X64-SSE: # %bb.0:
|
||||
; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X64-SSE-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
|
||||
; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X64-SSE-NEXT: addsd (%rax), %xmm0
|
||||
; X64-SSE-NEXT: movsd %xmm0, (%rax)
|
||||
; X64-SSE-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: fadd_64imm:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X64-AVX-NEXT: movl $3735928559, %eax # imm = 0xDEADBEEF
|
||||
; X64-AVX-NEXT: addsd (%rax), %xmm0
|
||||
; X64-AVX-NEXT: movsd %xmm0, (%rax)
|
||||
; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X64-AVX-NEXT: vaddsd (%rax), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vmovsd %xmm0, (%rax)
|
||||
; X64-AVX-NEXT: retq
|
||||
%i = load atomic i64, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8
|
||||
%f = bitcast i64 %i to double
|
||||
|
@ -650,8 +662,8 @@ define void @fadd_32stack() nounwind {
|
|||
; X86-AVX: # %bb.0:
|
||||
; X86-AVX-NEXT: pushl %eax
|
||||
; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-AVX-NEXT: addss (%esp), %xmm0
|
||||
; X86-AVX-NEXT: movss %xmm0, (%esp)
|
||||
; X86-AVX-NEXT: vaddss (%esp), %xmm0, %xmm0
|
||||
; X86-AVX-NEXT: vmovss %xmm0, (%esp)
|
||||
; X86-AVX-NEXT: popl %eax
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
|
@ -665,8 +677,8 @@ define void @fadd_32stack() nounwind {
|
|||
; X64-AVX-LABEL: fadd_32stack:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: addss -{{[0-9]+}}(%rsp), %xmm0
|
||||
; X64-AVX-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX-NEXT: vaddss -{{[0-9]+}}(%rsp), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX-NEXT: retq
|
||||
%ptr = alloca i32, align 4
|
||||
%bc3 = bitcast i32* %ptr to float*
|
||||
|
@ -801,8 +813,8 @@ define void @fadd_64stack() nounwind {
|
|||
; X64-AVX-LABEL: fadd_64stack:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X64-AVX-NEXT: addsd -{{[0-9]+}}(%rsp), %xmm0
|
||||
; X64-AVX-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX-NEXT: vaddsd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp)
|
||||
; X64-AVX-NEXT: retq
|
||||
%ptr = alloca i64, align 8
|
||||
%bc3 = bitcast i64* %ptr to double*
|
||||
|
@ -951,11 +963,17 @@ define void @fadd_array(i64* %arg, double %arg1, i64 %arg2) nounwind {
|
|||
; X86-AVX-NEXT: popl %ebp
|
||||
; X86-AVX-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: fadd_array:
|
||||
; X64: # %bb.0: # %bb
|
||||
; X64-NEXT: addsd (%rdi,%rsi,8), %xmm0
|
||||
; X64-NEXT: movsd %xmm0, (%rdi,%rsi,8)
|
||||
; X64-NEXT: retq
|
||||
; X64-SSE-LABEL: fadd_array:
|
||||
; X64-SSE: # %bb.0: # %bb
|
||||
; X64-SSE-NEXT: addsd (%rdi,%rsi,8), %xmm0
|
||||
; X64-SSE-NEXT: movsd %xmm0, (%rdi,%rsi,8)
|
||||
; X64-SSE-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: fadd_array:
|
||||
; X64-AVX: # %bb.0: # %bb
|
||||
; X64-AVX-NEXT: vaddsd (%rdi,%rsi,8), %xmm0, %xmm0
|
||||
; X64-AVX-NEXT: vmovsd %xmm0, (%rdi,%rsi,8)
|
||||
; X64-AVX-NEXT: retq
|
||||
bb:
|
||||
%tmp4 = getelementptr inbounds i64, i64* %arg, i64 %arg2
|
||||
%tmp6 = load atomic i64, i64* %tmp4 monotonic, align 8
|
||||
|
|
|
@ -123,14 +123,12 @@ define void @store_float(float* %fptr, float %v) {
|
|||
;
|
||||
; X64-SSE-LABEL: store_float:
|
||||
; X64-SSE: # %bb.0:
|
||||
; X64-SSE-NEXT: movd %xmm0, %eax
|
||||
; X64-SSE-NEXT: movl %eax, (%rdi)
|
||||
; X64-SSE-NEXT: movss %xmm0, (%rdi)
|
||||
; X64-SSE-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: store_float:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vmovd %xmm0, %eax
|
||||
; X64-AVX-NEXT: movl %eax, (%rdi)
|
||||
; X64-AVX-NEXT: vmovss %xmm0, (%rdi)
|
||||
; X64-AVX-NEXT: retq
|
||||
store atomic float %v, float* %fptr unordered, align 4
|
||||
ret void
|
||||
|
@ -164,14 +162,12 @@ define void @store_double(double* %fptr, double %v) {
|
|||
;
|
||||
; X64-SSE-LABEL: store_double:
|
||||
; X64-SSE: # %bb.0:
|
||||
; X64-SSE-NEXT: movq %xmm0, %rax
|
||||
; X64-SSE-NEXT: movq %rax, (%rdi)
|
||||
; X64-SSE-NEXT: movsd %xmm0, (%rdi)
|
||||
; X64-SSE-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: store_double:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vmovq %xmm0, %rax
|
||||
; X64-AVX-NEXT: movq %rax, (%rdi)
|
||||
; X64-AVX-NEXT: vmovsd %xmm0, (%rdi)
|
||||
; X64-AVX-NEXT: retq
|
||||
store atomic double %v, double* %fptr unordered, align 8
|
||||
ret void
|
||||
|
@ -350,8 +346,8 @@ define float @load_float(float* %fptr) {
|
|||
; X86-SSE2-NEXT: pushl %eax
|
||||
; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: movd (%eax), %xmm0
|
||||
; X86-SSE2-NEXT: movd %xmm0, (%esp)
|
||||
; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-SSE2-NEXT: movss %xmm0, (%esp)
|
||||
; X86-SSE2-NEXT: flds (%esp)
|
||||
; X86-SSE2-NEXT: popl %eax
|
||||
; X86-SSE2-NEXT: .cfi_def_cfa_offset 4
|
||||
|
@ -362,8 +358,8 @@ define float @load_float(float* %fptr) {
|
|||
; X86-AVX-NEXT: pushl %eax
|
||||
; X86-AVX-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-AVX-NEXT: vmovd (%eax), %xmm0
|
||||
; X86-AVX-NEXT: vmovd %xmm0, (%esp)
|
||||
; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-AVX-NEXT: vmovss %xmm0, (%esp)
|
||||
; X86-AVX-NEXT: flds (%esp)
|
||||
; X86-AVX-NEXT: popl %eax
|
||||
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
|
||||
|
@ -383,12 +379,12 @@ define float @load_float(float* %fptr) {
|
|||
;
|
||||
; X64-SSE-LABEL: load_float:
|
||||
; X64-SSE: # %bb.0:
|
||||
; X64-SSE-NEXT: movd (%rdi), %xmm0
|
||||
; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-SSE-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: load_float:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vmovd (%rdi), %xmm0
|
||||
; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: retq
|
||||
%v = load atomic float, float* %fptr unordered, align 4
|
||||
ret float %v
|
||||
|
@ -453,12 +449,12 @@ define double @load_double(double* %fptr) {
|
|||
;
|
||||
; X64-SSE-LABEL: load_double:
|
||||
; X64-SSE: # %bb.0:
|
||||
; X64-SSE-NEXT: movq (%rdi), %xmm0
|
||||
; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X64-SSE-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: load_double:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: vmovq (%rdi), %xmm0
|
||||
; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X64-AVX-NEXT: retq
|
||||
%v = load atomic double, double* %fptr unordered, align 8
|
||||
ret double %v
|
||||
|
@ -703,9 +699,8 @@ define float @load_float_seq_cst(float* %fptr) {
|
|||
; X86-SSE2-NEXT: pushl %eax
|
||||
; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-SSE2-NEXT: movl (%eax), %eax
|
||||
; X86-SSE2-NEXT: movd %eax, %xmm0
|
||||
; X86-SSE2-NEXT: movd %xmm0, (%esp)
|
||||
; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-SSE2-NEXT: movss %xmm0, (%esp)
|
||||
; X86-SSE2-NEXT: flds (%esp)
|
||||
; X86-SSE2-NEXT: popl %eax
|
||||
; X86-SSE2-NEXT: .cfi_def_cfa_offset 4
|
||||
|
@ -716,9 +711,8 @@ define float @load_float_seq_cst(float* %fptr) {
|
|||
; X86-AVX-NEXT: pushl %eax
|
||||
; X86-AVX-NEXT: .cfi_def_cfa_offset 8
|
||||
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X86-AVX-NEXT: movl (%eax), %eax
|
||||
; X86-AVX-NEXT: vmovd %eax, %xmm0
|
||||
; X86-AVX-NEXT: vmovd %xmm0, (%esp)
|
||||
; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X86-AVX-NEXT: vmovss %xmm0, (%esp)
|
||||
; X86-AVX-NEXT: flds (%esp)
|
||||
; X86-AVX-NEXT: popl %eax
|
||||
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
|
||||
|
@ -738,14 +732,12 @@ define float @load_float_seq_cst(float* %fptr) {
|
|||
;
|
||||
; X64-SSE-LABEL: load_float_seq_cst:
|
||||
; X64-SSE: # %bb.0:
|
||||
; X64-SSE-NEXT: movl (%rdi), %eax
|
||||
; X64-SSE-NEXT: movd %eax, %xmm0
|
||||
; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-SSE-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: load_float_seq_cst:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: movl (%rdi), %eax
|
||||
; X64-AVX-NEXT: vmovd %eax, %xmm0
|
||||
; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||
; X64-AVX-NEXT: retq
|
||||
%v = load atomic float, float* %fptr seq_cst, align 4
|
||||
ret float %v
|
||||
|
@ -810,14 +802,12 @@ define double @load_double_seq_cst(double* %fptr) {
|
|||
;
|
||||
; X64-SSE-LABEL: load_double_seq_cst:
|
||||
; X64-SSE: # %bb.0:
|
||||
; X64-SSE-NEXT: movq (%rdi), %rax
|
||||
; X64-SSE-NEXT: movq %rax, %xmm0
|
||||
; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X64-SSE-NEXT: retq
|
||||
;
|
||||
; X64-AVX-LABEL: load_double_seq_cst:
|
||||
; X64-AVX: # %bb.0:
|
||||
; X64-AVX-NEXT: movq (%rdi), %rax
|
||||
; X64-AVX-NEXT: vmovq %rax, %xmm0
|
||||
; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; X64-AVX-NEXT: retq
|
||||
%v = load atomic double, double* %fptr seq_cst, align 8
|
||||
ret double %v
|
||||
|
|
Loading…
Reference in New Issue