llvm-project/llvm/test/CodeGen/X86/avx2-arith.ll

; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s

; CHECK: vpaddq %ymm
define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
  %x = add <4 x i64> %i, %j
  ret <4 x i64> %x
}

; CHECK: vpaddd %ymm
define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
  %x = add <8 x i32> %i, %j
  ret <8 x i32> %x
}

; CHECK: vpaddw %ymm
define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
  %x = add <16 x i16> %i, %j
  ret <16 x i16> %x
}

; CHECK: vpaddb %ymm
define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
  %x = add <32 x i8> %i, %j
  ret <32 x i8> %x
}

; CHECK: vpsubq %ymm
define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
  %x = sub <4 x i64> %i, %j
  ret <4 x i64> %x
}

; CHECK: vpsubd %ymm
define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
  %x = sub <8 x i32> %i, %j
  ret <8 x i32> %x
}

; CHECK: vpsubw %ymm
define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
  %x = sub <16 x i16> %i, %j
  ret <16 x i16> %x
}

; CHECK: vpsubb %ymm
define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
  %x = sub <32 x i8> %i, %j
  ret <32 x i8> %x
}

; CHECK: vpmulld %ymm
define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
  %x = mul <8 x i32> %i, %j
  ret <8 x i32> %x
}

; CHECK: vpmullw %ymm
define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
  %x = mul <16 x i16> %i, %j
  ret <16 x i16> %x
}

; CHECK: mul-v4i64
; CHECK: vpmuludq %ymm
; CHECK-NEXT: vpsrlq $32, %ymm
; CHECK-NEXT: vpmuludq %ymm
; CHECK-NEXT: vpsllq $32, %ymm
; CHECK-NEXT: vpaddq %ymm
; CHECK-NEXT: vpsrlq $32, %ymm
; CHECK-NEXT: vpmuludq %ymm
; CHECK-NEXT: vpsllq $32, %ymm
; CHECK-NEXT: vpaddq %ymm
define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
  %x = mul <4 x i64> %i, %j
  ret <4 x i64> %x
}

; CHECK: mul_const1
; CHECK: vpaddd
; CHECK: ret
define <8 x i32> @mul_const1(<8 x i32> %x) {
  %y = mul <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
  ret <8 x i32> %y
}

; CHECK: mul_const2
; CHECK: vpsllq  $2
; CHECK: ret
define <4 x i64> @mul_const2(<4 x i64> %x) {
  %y = mul <4 x i64> %x, <i64 4, i64 4, i64 4, i64 4>
  ret <4 x i64> %y
}

; CHECK: mul_const3
; CHECK: vpsllw  $3
; CHECK: ret
define <16 x i16> @mul_const3(<16 x i16> %x) {
  %y = mul <16 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
  ret <16 x i16> %y
}

; CHECK: mul_const4
; CHECK: vpxor
; CHECK: vpsubq
; CHECK: ret
define <4 x i64> @mul_const4(<4 x i64> %x) {
  %y = mul <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1>
  ret <4 x i64> %y
}

; CHECK: mul_const5
; CHECK: vxorps
; CHECK-NEXT: ret
define <8 x i32> @mul_const5(<8 x i32> %x) {
  %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
  ret <8 x i32> %y
}

; CHECK: mul_const6
; CHECK: vpmulld
; CHECK: ret
define <8 x i32> @mul_const6(<8 x i32> %x) {
  %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0>
  ret <8 x i32> %y
}

; CHECK: mul_const7
; CHECK: vpaddq
; CHECK: vpaddq
; CHECK: ret
define <8 x i64> @mul_const7(<8 x i64> %x) {
  %y = mul <8 x i64> %x, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
  ret <8 x i64> %y
}

; CHECK: mul_const8
; CHECK: vpsllw  $3
; CHECK: ret
define <8 x i16> @mul_const8(<8 x i16> %x) {
  %y = mul <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
  ret <8 x i16> %y
}

; CHECK: mul_const9
; CHECK: vpmulld
; CHECK: ret
define <8 x i32> @mul_const9(<8 x i32> %x) {
  %y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
  ret <8 x i32> %y
}