Add AVX MOV{SS,SD}{rr,rm} instructions

llvm-svn: 106588
2010-06-22 22:38:56 +00:00 · 2010-06-22 22:38:56 +00:00 · 1a890f9dc0
parent adef9327fe
commit 1a890f9dc0
6 changed files with 100 additions and 34 deletions
--- a/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/llvm/lib/Target/X86/X86InstrFormats.td
@ -83,7 +83,6 @@ def SSEPackedInt    : Domain<3>;
 class OpSize { bit hasOpSizePrefix = 1; }
 class AdSize { bit hasAdSizePrefix = 1; }
 class REX_W  { bit hasREX_WPrefix = 1; }
-class VEX_4V { bit hasVEX_4VPrefix = 1; }
 class LOCK   { bit hasLockPrefix = 1; }
 class SegFS  { bits<2> SegOvrBits = 1; }
 class SegGS  { bits<2> SegOvrBits = 2; }
@ -102,6 +101,8 @@ class XS     { bits<4> Prefix = 12; }
 class T8     { bits<4> Prefix = 13; }
 class TA     { bits<4> Prefix = 14; }
 class TF     { bits<4> Prefix = 15; }
+class VEX    { bit hasVEXPrefix = 1; }
+class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }

 class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
              string AsmStr, Domain d = GenericDomain>
@ -125,11 +126,12 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,

  bits<4> Prefix = 0;       // Which prefix byte does this inst have?
  bit hasREX_WPrefix  = 0;  // Does this inst requires the REX.W prefix?
-  bit hasVEX_4VPrefix  = 0;  // Does this inst requires the VEX.VVVV prefix?
  FPFormat FPForm = NotFP;  // What flavor of FP instruction is this?
  bit hasLockPrefix = 0;    // Does this inst have a 0xF0 prefix?
  bits<2> SegOvrBits = 0;   // Segment override prefix.
  Domain ExeDomain = d;
+  bit hasVEXPrefix  = 0;    // Does this inst requires a VEX prefix?
+  bit hasVEX_4VPrefix  = 0; // Does this inst requires the VEX.VVVV field?

  // TSFlags layout should be kept in sync with X86InstrInfo.h.
  let TSFlags{5-0}   = FormBits;
@ -143,7 +145,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
  let TSFlags{21-20} = SegOvrBits;
  let TSFlags{23-22} = ExeDomain.Value;
  let TSFlags{31-24} = Opcode;
-  let TSFlags{32}    = hasVEX_4VPrefix;
+  let TSFlags{32}    = hasVEXPrefix;
+  let TSFlags{33}    = hasVEX_4VPrefix;
 }

 class I<bits<8> o, Format f, dag outs, dag ins, string asm,
@ -217,24 +220,24 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
 // SI - SSE 1 & 2 scalar instructions
 class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
      : I<o, F, outs, ins, asm, pattern> {
-  let Predicates = !if(hasVEX_4VPrefix /* VEX_4V */,
+  let Predicates = !if(hasVEXPrefix /* VEX_4V */,
            !if(!eq(Prefix, 11 /* XD */), [HasAVX, HasSSE2], [HasAVX, HasSSE1]),
            !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));

  // AVX instructions have a 'v' prefix in the mnemonic
-  let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm);
+  let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
 }

 // PI - SSE 1 & 2 packed instructions
 class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
         Domain d>
      : I<o, F, outs, ins, asm, pattern, d> {
-  let Predicates = !if(hasVEX_4VPrefix /* VEX_4V */,
+  let Predicates = !if(hasVEXPrefix /* VEX_4V */,
        !if(hasOpSizePrefix /* OpSize */, [HasAVX, HasSSE2], [HasAVX, HasSSE1]),
        !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));

  // AVX instructions have a 'v' prefix in the mnemonic
-  let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm);
+  let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
 }

 // SSE1 Instruction Templates:
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@ -424,13 +424,14 @@ namespace X86II {
  // those enums below are used, TSFlags must be shifted right by 32 first.
  enum {
    //===------------------------------------------------------------------===//
-    // VEX_4V - VEX prefixes are instruction prefixes used in AVX.
+    // VEXPrefix - VEX prefixes are instruction prefixes used in AVX.
    // VEX_4V is used to specify an additional AVX/SSE register. Several 2
    // address instructions in SSE are represented as 3 address ones in AVX
    // and the additional register is encoded in VEX_VVVV prefix.
    //
    VEXShift    = 0,
-    VEX_4V      = 1 << VEXShift
+    VEX         = 1 << VEXShift,
+    VEX_4V      = 2 << VEXShift
  };

  // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@ -496,33 +496,48 @@ def COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
 // SSE 1 & 2 - Move Instructions
 //===----------------------------------------------------------------------===//

+class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> :
+      SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm,
+      [(set (vt VR128:$dst), (movl VR128:$src1, (scalar_to_vector RC:$src2)))]>;
+
+// Loading from memory automatically zeroing upper bits.
+class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
+                    PatFrag mem_pat, string OpcodeStr> :
+      SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                        [(set RC:$dst, (mem_pat addr:$src))]>;
+
 // Move Instructions. Register-to-register movss/movsd is not used for FR32/64
 // register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
 // is used instead. Register-to-register movss/movsd is not modeled as an
 // INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
 // in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
-let Constraints = "$src1 = $dst" in {
-def MOVSSrr : SSI<0x10, MRMSrcReg,
-                  (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
-                  "movss\t{$src2, $dst|$dst, $src2}",
-                  [(set (v4f32 VR128:$dst),
-                        (movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>;
-def MOVSDrr : SDI<0x10, MRMSrcReg,
-                  (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
-                  "movsd\t{$src2, $dst|$dst, $src2}",
-                  [(set (v2f64 VR128:$dst),
-                        (movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>;
+let isAsmParserOnly = 1 in {
+  def VMOVSSrr : sse12_move_rr<FR32, v4f32,
+                  "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
+  def VMOVSDrr : sse12_move_rr<FR64, v2f64,
+                  "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
+
+  let canFoldAsLoad = 1, isReMaterializable = 1 in {
+    def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
+
+    let AddedComplexity = 20 in
+      def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
+  }
+}
+
+let Constraints = "$src1 = $dst" in {
+  def MOVSSrr : sse12_move_rr<FR32, v4f32,
+                          "movss\t{$src2, $dst|$dst, $src2}">, XS;
+  def MOVSDrr : sse12_move_rr<FR64, v2f64,
+                          "movsd\t{$src2, $dst|$dst, $src2}">, XD;
 }

-// Loading from memory automatically zeroing upper bits.
 let canFoldAsLoad = 1, isReMaterializable = 1 in {
-def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
-                  "movss\t{$src, $dst|$dst, $src}",
-                  [(set FR32:$dst, (loadf32 addr:$src))]>;
-let AddedComplexity = 20 in
-def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
-                  "movsd\t{$src, $dst|$dst, $src}",
-                  [(set FR64:$dst, (loadf64 addr:$src))]>;
+  def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
+
+  let AddedComplexity = 20 in
+    def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
 }

 let AddedComplexity = 15 in {
@ -574,6 +589,15 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
                  "movsd\t{$src, $dst|$dst, $src}",
                  [(store FR64:$src, addr:$dst)]>;

+let isAsmParserOnly = 1 in {
+def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
+                  "movss\t{$src, $dst|$dst, $src}",
+                  [(store FR32:$src, addr:$dst)]>, XS, VEX_4V;
+def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+                  "movsd\t{$src, $dst|$dst, $src}",
+                  [(store FR64:$src, addr:$dst)]>, XD, VEX_4V;
+}
+
 // Extract and store.
 def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
                 addr:$dst),
--- a/llvm/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/X86MCCodeEmitter.cpp
@ -714,15 +714,21 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
  // Keep track of the current byte being emitted.
  unsigned CurByte = 0;
  
-  // Is this instruction encoded in AVX form?
-  bool IsAVXForm = false;
+  // Is this instruction encoded using the AVX VEX prefix?
+  bool HasVEXPrefix = false;
+
+  // It uses the VEX.VVVV field?
+  bool HasVEX_4V = false;
+
+  if ((TSFlags >> 32) & X86II::VEX)
+    HasVEXPrefix = true;
  if ((TSFlags >> 32) & X86II::VEX_4V)
-    IsAVXForm = true;
+    HasVEX_4V = true;

  // FIXME: We should emit the prefixes in exactly the same order as GAS does,
  // in order to provide diffability.

-  if (!IsAVXForm)
+  if (!HasVEXPrefix)
    EmitOpcodePrefix(TSFlags, CurByte, MI, Desc, OS);
  else
    EmitVEXOpcodePrefix(TSFlags, CurByte, MI, Desc, OS);
@ -772,7 +778,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
    EmitByte(BaseOpcode, CurByte, OS);
    SrcRegNum = CurOp + 1;

-    if (IsAVXForm) // Skip 1st src (which is encoded in VEX_VVVV)
+    if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
      SrcRegNum++;

    EmitRegModRMByte(MI.getOperand(SrcRegNum),
@ -783,7 +789,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
  case X86II::MRMSrcMem: {
    int AddrOperands = X86AddrNumOperands;
    unsigned FirstMemOp = CurOp+1;
-    if (IsAVXForm) {
+    if (HasVEX_4V) {
      ++AddrOperands;
      ++FirstMemOp;  // Skip the register source (which is encoded in VEX_VVVV).
    }
--- a/llvm/test/MC/AsmParser/X86/x86_32-encoding.s
+++ b/llvm/test/MC/AsmParser/X86/x86_32-encoding.s
@ -10302,3 +10302,19 @@
 // CHECK: encoding: [0xc5,0xe9,0x55,0x6c,0xcb,0xfc]
          vandnpd  -4(%ebx,%ecx,8), %xmm2, %xmm5

+// CHECK: vmovss  -4(%ebx,%ecx,8), %xmm5
+// CHECK: encoding: [0xc5,0xfa,0x10,0x6c,0xcb,0xfc]
+          vmovss  -4(%ebx,%ecx,8), %xmm5
+
+// CHECK: vmovss  %xmm4, %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xea,0x10,0xec]
+          vmovss  %xmm4, %xmm2, %xmm5
+
+// CHECK: vmovsd  -4(%ebx,%ecx,8), %xmm5
+// CHECK: encoding: [0xc5,0xfb,0x10,0x6c,0xcb,0xfc]
+          vmovsd  -4(%ebx,%ecx,8), %xmm5
+
+// CHECK: vmovsd  %xmm4, %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xeb,0x10,0xec]
+          vmovsd  %xmm4, %xmm2, %xmm5
+
--- a/llvm/test/MC/AsmParser/X86/x86_64-encoding.s
+++ b/llvm/test/MC/AsmParser/X86/x86_64-encoding.s
@ -360,6 +360,22 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc5,0x19,0x55,0x54,0xcb,0xfc]
          vandnpd  -4(%rbx,%rcx,8), %xmm12, %xmm10

+// CHECK: vmovss  -4(%rbx,%rcx,8), %xmm10
+// CHECK: encoding: [0xc5,0x7a,0x10,0x54,0xcb,0xfc]
+          vmovss  -4(%rbx,%rcx,8), %xmm10
+
+// CHECK: vmovss  %xmm14, %xmm10, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x2a,0x10,0xfe]
+          vmovss  %xmm14, %xmm10, %xmm15
+
+// CHECK: vmovsd  -4(%rbx,%rcx,8), %xmm10
+// CHECK: encoding: [0xc5,0x7b,0x10,0x54,0xcb,0xfc]
+          vmovsd  -4(%rbx,%rcx,8), %xmm10
+
+// CHECK: vmovsd  %xmm14, %xmm10, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x2b,0x10,0xfe]
+          vmovsd  %xmm14, %xmm10, %xmm15
+
 // rdar://7840289
 // CHECK: pshufb	CPI1_0(%rip), %xmm1
 // CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x0d,A,A,A,A]