2093 lines
101 KiB
LLVM
2093 lines
101 KiB
LLVM
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||
|
; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2 < %s | FileCheck %s
|
||
|
|
||
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||
|
target triple = "x86_64-unknown-unknown"
|
||
|
|
||
|
; Stack reload folding tests.
|
||
|
;
|
||
|
; By including a nop call with sideeffects we can force a partial register spill of the
|
||
|
; relevant registers and check that the reload is correctly folded into the instruction.
|
||
|
|
||
|
define <4 x double> @stack_fold_broadcastsd_ymm(<2 x double> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_broadcastsd_ymm:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vbroadcastsd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
|
||
|
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer
|
||
|
; fadd forces execution domain
|
||
|
%3 = fadd <4 x double> %2, <double 0x1, double 0x0, double 0x0, double 0x0>
|
||
|
ret <4 x double> %3
|
||
|
}
|
||
|
|
||
|
define <4 x float> @stack_fold_broadcastss(<4 x float> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_broadcastss:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vbroadcastss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||
|
; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer
|
||
|
; fadd forces execution domain
|
||
|
%3 = fadd <4 x float> %2, <float 1.0, float 0x0, float 0x0, float 0x0>
|
||
|
ret <4 x float> %3
|
||
|
}
|
||
|
|
||
|
define <8 x float> @stack_fold_broadcastss_ymm(<4 x float> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_broadcastss_ymm:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vbroadcastss {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
|
||
|
; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <4 x float> %a0, <4 x float> undef, <8 x i32> zeroinitializer
|
||
|
; fadd forces execution domain
|
||
|
%3 = fadd <8 x float> %2, <float 1.0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
|
||
|
ret <8 x float> %3
|
||
|
}
|
||
|
|
||
|
define <4 x i32> @stack_fold_extracti128(<8 x i16> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_extracti128:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||
|
; CHECK-NEXT: vextracti128 $1, %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Folded Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
|
||
|
; CHECK-NEXT: vzeroupper
|
||
|
; CHECK-NEXT: retq
|
||
|
; zext forces execution domain
|
||
|
%t1 = zext <8 x i16> %a0 to <8 x i32>
|
||
|
%t2 = shufflevector <8 x i32> %t1, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||
|
%t3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
ret <4 x i32> %t2
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_inserti128(<4 x i32> %a0, <4 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_inserti128:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||
|
; add forces execution domain
|
||
|
%3 = add <8 x i32> %2, <i32 2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||
|
ret <8 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_mpsadbw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vmpsadbw $7, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
|
||
|
|
||
|
define <32 x i8> @stack_fold_pabsb(<32 x i8> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pabsb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpabsb {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp sgt <32 x i8> %a0, zeroinitializer
|
||
|
%3 = sub <32 x i8> zeroinitializer, %a0
|
||
|
%4 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %3
|
||
|
ret <32 x i8> %4
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_pabsd(<8 x i32> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pabsd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpabsd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp sgt <8 x i32> %a0, zeroinitializer
|
||
|
%3 = sub <8 x i32> zeroinitializer, %a0
|
||
|
%4 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %3
|
||
|
ret <8 x i32> %4
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_pabsw(<16 x i16> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pabsw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpabsw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp sgt <16 x i16> %a0, zeroinitializer
|
||
|
%3 = sub <16 x i16> zeroinitializer, %a0
|
||
|
%4 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %3
|
||
|
ret <16 x i16> %4
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_packssdw(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_packssdw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpackssdw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
|
||
|
|
||
|
define <32 x i8> @stack_fold_packsswb(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_packsswb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpacksswb {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1)
|
||
|
ret <32 x i8> %2
|
||
|
}
|
||
|
declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_packusdw(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_packusdw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpackusdw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
|
||
|
|
||
|
define <32 x i8> @stack_fold_packuswb(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_packuswb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpackuswb {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1)
|
||
|
ret <32 x i8> %2
|
||
|
}
|
||
|
declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
|
||
|
|
||
|
define <32 x i8> @stack_fold_paddb(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_paddb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpaddb {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = add <32 x i8> %a0, %a1
|
||
|
ret <32 x i8> %2
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_paddd(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_paddd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpaddd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = add <8 x i32> %a0, %a1
|
||
|
ret <8 x i32> %2
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @stack_fold_paddq(<4 x i64> %a0, <4 x i64> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_paddq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpaddq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = add <4 x i64> %a0, %a1
|
||
|
ret <4 x i64> %2
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_paddsb(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_paddsb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpaddsb {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1)
|
||
|
ret <32 x i8> %2
|
||
|
}
|
||
|
declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_paddsw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_paddsw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpaddsw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone
|
||
|
|
||
|
define <32 x i8> @stack_fold_paddusb(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_paddusb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpaddusb {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1)
|
||
|
ret <32 x i8> %2
|
||
|
}
|
||
|
declare <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_paddusw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_paddusw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpaddusw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_paddw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_paddw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpaddw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = add <16 x i16> %a0, %a1
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_palignr(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_palignr:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpalignr $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],mem[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16]
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <32 x i8> %a1, <32 x i8> %a0, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
|
||
|
ret <32 x i8> %2
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_pand(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pand:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpand {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = and <32 x i8> %a0, %a1
|
||
|
; add forces execution domain
|
||
|
%3 = add <32 x i8> %2, <i8 2, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||
|
ret <32 x i8> %3
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_pandn(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pandn:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpandn {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = xor <32 x i8> %a0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
|
||
|
%3 = and <32 x i8> %2, %a1
|
||
|
; add forces execution domain
|
||
|
%4 = add <32 x i8> %3, <i8 2, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||
|
ret <32 x i8> %4
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_pavgb(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pavgb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpavgb {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = zext <32 x i8> %a0 to <32 x i16>
|
||
|
%3 = zext <32 x i8> %a1 to <32 x i16>
|
||
|
%4 = add <32 x i16> %2, %3
|
||
|
%5 = add <32 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||
|
%6 = lshr <32 x i16> %5, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
|
||
|
%7 = trunc <32 x i16> %6 to <32 x i8>
|
||
|
ret <32 x i8> %7
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_pavgw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pavgw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpavgw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = zext <16 x i16> %a0 to <16 x i32>
|
||
|
%3 = zext <16 x i16> %a1 to <16 x i32>
|
||
|
%4 = add <16 x i32> %2, %3
|
||
|
%5 = add <16 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||
|
%6 = lshr <16 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||
|
%7 = trunc <16 x i32> %6 to <16 x i16>
|
||
|
ret <16 x i16> %7
|
||
|
}
|
||
|
|
||
|
define <4 x i32> @stack_fold_pblendd(<4 x i32> %a0, <4 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pblendd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpblendd $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: # xmm0 = mem[0,1,2],xmm0[3]
|
||
|
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
|
||
|
; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
|
||
|
; add forces execution domain
|
||
|
%3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
|
||
|
ret <4 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_pblendd_ymm(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pblendd_ymm:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpblendd $7, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = mem[0,1,2],ymm0[3,4,5,6,7]
|
||
|
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||
|
; add forces execution domain
|
||
|
%3 = add <8 x i32> %2, <i32 2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||
|
ret <8 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %c) {
|
||
|
; CHECK-LABEL: stack_fold_pblendvb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpblendvb %ymm0, {{[-0-9]+}}(%r{{[sb]}}p), %ymm1, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a1, <32 x i8> %c, <32 x i8> %a0)
|
||
|
ret <32 x i8> %2
|
||
|
}
|
||
|
declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pblendw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpblendw $7, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = mem[0,1,2],ymm0[3,4,5,6,7],mem[8,9,10],ymm0[11,12,13,14,15]
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 16, i32 17, i32 18, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
|
||
|
define <16 x i8> @stack_fold_pbroadcastb(<16 x i8> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pbroadcastb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpbroadcastb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer
|
||
|
ret <16 x i8> %2
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_pbroadcastb_ymm(<16 x i8> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pbroadcastb_ymm:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpbroadcastb {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <32 x i32> zeroinitializer
|
||
|
ret <32 x i8> %2
|
||
|
}
|
||
|
|
||
|
define <4 x i32> @stack_fold_pbroadcastd(<4 x i32> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pbroadcastd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpbroadcastd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> zeroinitializer
|
||
|
; add forces execution domain
|
||
|
%3 = add <4 x i32> %2, <i32 2, i32 1, i32 1, i32 1>
|
||
|
ret <4 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_pbroadcastd_ymm(<4 x i32> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pbroadcastd_ymm:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpbroadcastd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <8 x i32> zeroinitializer
|
||
|
; add forces execution domain
|
||
|
%3 = add <8 x i32> %2, <i32 2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||
|
ret <8 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <2 x i64> @stack_fold_pbroadcastq(<2 x i64> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pbroadcastq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpbroadcastq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: vpaddq {{.*}}(%rip), %xmm0, %xmm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer
|
||
|
; add forces execution domain
|
||
|
%3 = add <2 x i64> %2, <i64 2, i64 1>
|
||
|
ret <2 x i64> %3
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @stack_fold_pbroadcastq_ymm(<2 x i64> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pbroadcastq_ymm:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpbroadcastq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> zeroinitializer
|
||
|
; add forces execution domain
|
||
|
%3 = add <4 x i64> %2, <i64 2, i64 1, i64 1, i64 1>
|
||
|
ret <4 x i64> %3
|
||
|
}
|
||
|
|
||
|
define <8 x i16> @stack_fold_pbroadcastw(<8 x i16> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pbroadcastw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpbroadcastw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
|
||
|
ret <8 x i16> %2
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_pbroadcastw_ymm(<8 x i16> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pbroadcastw_ymm:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpbroadcastw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <16 x i32> zeroinitializer
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pcmpeqb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpcmpeqb {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp eq <32 x i8> %a0, %a1
|
||
|
%3 = sext <32 x i1> %2 to <32 x i8>
|
||
|
ret <32 x i8> %3
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pcmpeqd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpcmpeqd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp eq <8 x i32> %a0, %a1
|
||
|
%3 = sext <8 x i1> %2 to <8 x i32>
|
||
|
ret <8 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @stack_fold_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pcmpeqq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpcmpeqq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp eq <4 x i64> %a0, %a1
|
||
|
%3 = sext <4 x i1> %2 to <4 x i64>
|
||
|
ret <4 x i64> %3
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pcmpeqw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpcmpeqw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp eq <16 x i16> %a0, %a1
|
||
|
%3 = sext <16 x i1> %2 to <16 x i16>
|
||
|
ret <16 x i16> %3
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pcmpgtb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpcmpgtb {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp sgt <32 x i8> %a0, %a1
|
||
|
%3 = sext <32 x i1> %2 to <32 x i8>
|
||
|
ret <32 x i8> %3
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pcmpgtd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpcmpgtd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp sgt <8 x i32> %a0, %a1
|
||
|
%3 = sext <8 x i1> %2 to <8 x i32>
|
||
|
ret <8 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @stack_fold_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pcmpgtq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpcmpgtq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp sgt <4 x i64> %a0, %a1
|
||
|
%3 = sext <4 x i1> %2 to <4 x i64>
|
||
|
ret <4 x i64> %3
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pcmpgtw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpcmpgtw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp sgt <16 x i16> %a0, %a1
|
||
|
%3 = sext <16 x i1> %2 to <16 x i16>
|
||
|
ret <16 x i16> %3
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_perm2i128(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_perm2i128:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vperm2i128 $33, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = ymm0[2,3],mem[0,1]
|
||
|
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
|
||
|
; add forces execution domain
|
||
|
%3 = add <8 x i32> %2, <i32 2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||
|
ret <8 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_permd(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_permd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpermd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a1, <8 x i32> %a0)
|
||
|
; add forces execution domain
|
||
|
%3 = add <8 x i32> %2, <i32 2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||
|
ret <8 x i32> %3
|
||
|
}
|
||
|
declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
|
||
|
|
||
|
define <4 x double> @stack_fold_permpd(<4 x double> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_permpd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpermpd $235, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = mem[3,2,2,3]
|
||
|
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||
|
; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
|
||
|
; fadd forces execution domain
|
||
|
%3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
|
||
|
ret <4 x double> %3
|
||
|
}
|
||
|
|
||
|
define <8 x float> @stack_fold_permps(<8 x i32> %a0, <8 x float> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_permps:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpermps {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x i32> %a0)
|
||
|
ret <8 x float> %2
|
||
|
}
|
||
|
declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
|
||
|
|
||
|
define <4 x i64> @stack_fold_permq(<4 x i64> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_permq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpermq $235, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = mem[3,2,2,3]
|
||
|
; CHECK-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
|
||
|
; add forces execution domain
|
||
|
%3 = add <4 x i64> %2, <i64 2, i64 1, i64 1, i64 1>
|
||
|
ret <4 x i64> %3
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_phaddd(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_phaddd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vphaddd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1)
|
||
|
ret <8 x i32> %2
|
||
|
}
|
||
|
declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_phaddsw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_phaddsw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vphaddsw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_phaddw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_phaddw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vphaddw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||
|
|
||
|
define <8 x i32> @stack_fold_phsubd(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_phsubd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vphsubd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1)
|
||
|
ret <8 x i32> %2
|
||
|
}
|
||
|
declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_phsubsw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_phsubsw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vphsubsw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_phsubw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_phsubw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vphsubw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pmaddubsw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmaddubsw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
|
||
|
|
||
|
define <8 x i32> @stack_fold_pmaddwd(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pmaddwd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmaddwd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1)
|
||
|
ret <8 x i32> %2
|
||
|
}
|
||
|
declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
|
||
|
|
||
|
define <32 x i8> @stack_fold_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pmaxsb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmaxsb {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp sgt <32 x i8> %a0, %a1
|
||
|
%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %a1
|
||
|
ret <32 x i8> %3
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_pmaxsd(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pmaxsd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmaxsd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp sgt <8 x i32> %a0, %a1
|
||
|
%3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %a1
|
||
|
ret <8 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_pmaxsw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pmaxsw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmaxsw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp sgt <16 x i16> %a0, %a1
|
||
|
%3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %a1
|
||
|
ret <16 x i16> %3
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_pmaxub(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pmaxub:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmaxub {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp ugt <32 x i8> %a0, %a1
|
||
|
%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %a1
|
||
|
ret <32 x i8> %3
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_pmaxud(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pmaxud:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmaxud {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp ugt <8 x i32> %a0, %a1
|
||
|
%3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %a1
|
||
|
ret <8 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_pmaxuw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pmaxuw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmaxuw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp ugt <16 x i16> %a0, %a1
|
||
|
%3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %a1
|
||
|
ret <16 x i16> %3
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_pminsb(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pminsb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpminsb {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp slt <32 x i8> %a0, %a1
|
||
|
%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %a1
|
||
|
ret <32 x i8> %3
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_pminsd(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pminsd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpminsd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp slt <8 x i32> %a0, %a1
|
||
|
%3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %a1
|
||
|
ret <8 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_pminsw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pminsw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpminsw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp slt <16 x i16> %a0, %a1
|
||
|
%3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %a1
|
||
|
ret <16 x i16> %3
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_pminub(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pminub:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpminub {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp ult <32 x i8> %a0, %a1
|
||
|
%3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %a1
|
||
|
ret <32 x i8> %3
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_pminud(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pminud:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpminud {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp ult <8 x i32> %a0, %a1
|
||
|
%3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %a1
|
||
|
ret <8 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_pminuw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pminuw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpminuw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = icmp ult <16 x i16> %a0, %a1
|
||
|
%3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %a1
|
||
|
ret <16 x i16> %3
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pmovsxbd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmovsxbd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||
|
%3 = sext <8 x i8> %2 to <8 x i32>
|
||
|
ret <8 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pmovsxbq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmovsxbq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||
|
%3 = sext <4 x i8> %2 to <4 x i64>
|
||
|
ret <4 x i64> %3
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pmovsxbw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmovsxbw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = sext <16 x i8> %a0 to <16 x i16>
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pmovsxdq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmovsxdq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = sext <4 x i32> %a0 to <4 x i64>
|
||
|
ret <4 x i64> %2
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pmovsxwd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmovsxwd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = sext <8 x i16> %a0 to <8 x i32>
|
||
|
ret <8 x i32> %2
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pmovsxwq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmovsxwq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||
|
%3 = sext <4 x i16> %2 to <4 x i64>
|
||
|
ret <4 x i64> %3
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pmovzxbd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmovzxbd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||
|
%3 = zext <8 x i8> %2 to <8 x i32>
|
||
|
ret <8 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @stack_fold_pmovzxbq(<16 x i8> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pmovzxbq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmovzxbq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||
|
%3 = zext <4 x i8> %2 to <4 x i64>
|
||
|
ret <4 x i64> %3
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_pmovzxbw(<16 x i8> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pmovzxbw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmovzxbw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = zext <16 x i8> %a0 to <16 x i16>
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @stack_fold_pmovzxdq(<4 x i32> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pmovzxdq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmovzxdq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = zext <4 x i32> %a0 to <4 x i64>
|
||
|
ret <4 x i64> %2
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_pmovzxwd(<8 x i16> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pmovzxwd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmovzxwd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = zext <8 x i16> %a0 to <8 x i32>
|
||
|
ret <8 x i32> %2
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @stack_fold_pmovzxwq(<8 x i16> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pmovzxwq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmovzxwq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||
|
%3 = zext <4 x i16> %2 to <4 x i64>
|
||
|
ret <4 x i64> %3
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @stack_fold_pmuldq(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pmuldq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmuldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = bitcast <8 x i32> %a0 to <4 x i64>
|
||
|
%3 = bitcast <8 x i32> %a1 to <4 x i64>
|
||
|
%4 = shl <4 x i64> %2, <i64 32, i64 32, i64 32, i64 32>
|
||
|
%5 = ashr <4 x i64> %4, <i64 32, i64 32, i64 32, i64 32>
|
||
|
%6 = shl <4 x i64> %3, <i64 32, i64 32, i64 32, i64 32>
|
||
|
%7 = ashr <4 x i64> %6, <i64 32, i64 32, i64 32, i64 32>
|
||
|
%8 = mul <4 x i64> %5, %7
|
||
|
ret <4 x i64> %8
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pmulhrsw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmulhrsw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_pmulhuw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pmulhuw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmulhuw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_pmulhw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pmulhw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmulhw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||
|
|
||
|
define <8 x i32> @stack_fold_pmulld(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pmulld:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmulld {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = mul <8 x i32> %a0, %a1
|
||
|
ret <8 x i32> %2
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_pmullw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pmullw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmullw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = mul <16 x i16> %a0, %a1
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @stack_fold_pmuludq(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pmuludq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpmuludq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = bitcast <8 x i32> %a0 to <4 x i64>
|
||
|
%3 = bitcast <8 x i32> %a1 to <4 x i64>
|
||
|
%4 = and <4 x i64> %2, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
|
||
|
%5 = and <4 x i64> %3, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
|
||
|
%6 = mul <4 x i64> %4, %5
|
||
|
ret <4 x i64> %6
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_por(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_por:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpor {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = or <32 x i8> %a0, %a1
|
||
|
; add forces execution domain
|
||
|
%3 = add <32 x i8> %2, <i8 2, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||
|
ret <32 x i8> %3
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @stack_fold_psadbw(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psadbw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsadbw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1)
|
||
|
ret <4 x i64> %2
|
||
|
}
|
||
|
declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
|
||
|
|
||
|
define <32 x i8> @stack_fold_pshufb(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pshufb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpshufb {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1)
|
||
|
ret <32 x i8> %2
|
||
|
}
|
||
|
declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||
|
|
||
|
define <8 x i32> @stack_fold_pshufd(<8 x i32> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_pshufd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpshufd $27, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = mem[3,2,1,0,7,6,5,4]
|
||
|
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
|
||
|
; add forces execution domain
|
||
|
%3 = add <8 x i32> %2, <i32 2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||
|
ret <8 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_vpshufhw(<16 x i16> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_vpshufhw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpshufhw $27, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = mem[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12]
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 13, i32 12>
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_vpshuflw(<16 x i16> %a0) {
|
||
|
; CHECK-LABEL: stack_fold_vpshuflw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpshuflw $27, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = mem[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15]
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15>
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_psignb(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psignb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsignb {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1)
|
||
|
ret <32 x i8> %2
|
||
|
}
|
||
|
declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||
|
|
||
|
define <8 x i32> @stack_fold_psignd(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psignd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsignd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1)
|
||
|
ret <8 x i32> %2
|
||
|
}
|
||
|
declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_psignw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psignw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsignw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||
|
|
||
|
define <8 x i32> @stack_fold_pslld(<8 x i32> %a0, <4 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pslld:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpslld {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1)
|
||
|
ret <8 x i32> %2
|
||
|
}
|
||
|
declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
|
||
|
|
||
|
define <4 x i64> @stack_fold_psllq(<4 x i64> %a0, <2 x i64> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psllq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsllq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1)
|
||
|
ret <4 x i64> %2
|
||
|
}
|
||
|
declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
|
||
|
|
||
|
define <4 x i32> @stack_fold_psllvd(<4 x i32> %a0, <4 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psllvd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsllvd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1)
|
||
|
ret <4 x i32> %2
|
||
|
}
|
||
|
declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
|
||
|
|
||
|
define <8 x i32> @stack_fold_psllvd_ymm(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psllvd_ymm:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsllvd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1)
|
||
|
ret <8 x i32> %2
|
||
|
}
|
||
|
declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
|
||
|
|
||
|
define <2 x i64> @stack_fold_psllvq(<2 x i64> %a0, <2 x i64> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psllvq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsllvq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1)
|
||
|
ret <2 x i64> %2
|
||
|
}
|
||
|
declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
|
||
|
|
||
|
define <4 x i64> @stack_fold_psllvq_ymm(<4 x i64> %a0, <4 x i64> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psllvq_ymm:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsllvq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1)
|
||
|
ret <4 x i64> %2
|
||
|
}
|
||
|
declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_psllw(<16 x i16> %a0, <8 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psllw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsllw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
|
||
|
|
||
|
define <8 x i32> @stack_fold_psrad(<8 x i32> %a0, <4 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psrad:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsrad {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1)
|
||
|
ret <8 x i32> %2
|
||
|
}
|
||
|
declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
|
||
|
|
||
|
define <4 x i32> @stack_fold_psravd(<4 x i32> %a0, <4 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psravd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsravd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1)
|
||
|
ret <4 x i32> %2
|
||
|
}
|
||
|
declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
|
||
|
|
||
|
define <8 x i32> @stack_fold_psravd_ymm(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psravd_ymm:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsravd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1)
|
||
|
ret <8 x i32> %2
|
||
|
}
|
||
|
declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_psraw(<16 x i16> %a0, <8 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psraw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsraw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
|
||
|
|
||
|
define <8 x i32> @stack_fold_psrld(<8 x i32> %a0, <4 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psrld:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsrld {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1)
|
||
|
ret <8 x i32> %2
|
||
|
}
|
||
|
declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
|
||
|
|
||
|
define <4 x i64> @stack_fold_psrlq(<4 x i64> %a0, <2 x i64> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psrlq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsrlq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1)
|
||
|
ret <4 x i64> %2
|
||
|
}
|
||
|
declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
|
||
|
|
||
|
define <4 x i32> @stack_fold_psrlvd(<4 x i32> %a0, <4 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psrlvd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsrlvd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1)
|
||
|
ret <4 x i32> %2
|
||
|
}
|
||
|
declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
|
||
|
|
||
|
define <8 x i32> @stack_fold_psrlvd_ymm(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psrlvd_ymm:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsrlvd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1)
|
||
|
ret <8 x i32> %2
|
||
|
}
|
||
|
declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
|
||
|
|
||
|
define <2 x i64> @stack_fold_psrlvq(<2 x i64> %a0, <2 x i64> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psrlvq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsrlvq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1)
|
||
|
ret <2 x i64> %2
|
||
|
}
|
||
|
declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
|
||
|
|
||
|
define <4 x i64> @stack_fold_psrlvq_ymm(<4 x i64> %a0, <4 x i64> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psrlvq_ymm:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsrlvq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1)
|
||
|
ret <4 x i64> %2
|
||
|
}
|
||
|
declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_psrlw(<16 x i16> %a0, <8 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psrlw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsrlw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
|
||
|
|
||
|
define <32 x i8> @stack_fold_psubb(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psubb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsubb {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = sub <32 x i8> %a0, %a1
|
||
|
ret <32 x i8> %2
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_psubd(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psubd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsubd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = sub <8 x i32> %a0, %a1
|
||
|
ret <8 x i32> %2
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @stack_fold_psubq(<4 x i64> %a0, <4 x i64> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psubq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsubq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = sub <4 x i64> %a0, %a1
|
||
|
ret <4 x i64> %2
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_psubsb(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psubsb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsubsb {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1)
|
||
|
ret <32 x i8> %2
|
||
|
}
|
||
|
declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_psubsw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psubsw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsubsw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone
|
||
|
|
||
|
define <32 x i8> @stack_fold_psubusb(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psubusb:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsubusb {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1)
|
||
|
ret <32 x i8> %2
|
||
|
}
|
||
|
declare <32 x i8> @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_psubusw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psubusw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsubusw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1)
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
declare <16 x i16> @llvm.usub.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone
|
||
|
|
||
|
define <16 x i16> @stack_fold_psubw(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_psubw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpsubw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = sub <16 x i16> %a0, %a1
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_punpckhbw(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_punpckhbw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpunpckhbw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31]
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
|
||
|
ret <32 x i8> %2
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_punpckhdq(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_punpckhdq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpunpckhdq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
|
||
|
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
|
||
|
; add forces execution domain
|
||
|
%3 = add <8 x i32> %2, <i32 2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||
|
ret <8 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @stack_fold_punpckhqdq(<4 x i64> %a0, <4 x i64> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_punpckhqdq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpunpckhqdq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = ymm0[1],mem[1],ymm0[3],mem[3]
|
||
|
; CHECK-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
|
||
|
; add forces execution domain
|
||
|
%3 = add <4 x i64> %2, <i64 2, i64 1, i64 1, i64 1>
|
||
|
ret <4 x i64> %3
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_punpckhwd(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_punpckhwd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpunpckhwd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15]
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_punpcklbw(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_punpcklbw:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpunpcklbw {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23]
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
|
||
|
ret <32 x i8> %2
|
||
|
}
|
||
|
|
||
|
define <8 x i32> @stack_fold_punpckldq(<8 x i32> %a0, <8 x i32> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_punpckldq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
|
||
|
; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
|
||
|
; add forces execution domain
|
||
|
%3 = add <8 x i32> %2, <i32 2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||
|
ret <8 x i32> %3
|
||
|
}
|
||
|
|
||
|
define <4 x i64> @stack_fold_punpcklqdq(<4 x i64> %a0, <4 x i64> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_punpcklqdq:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
|
||
|
; CHECK-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
|
||
|
; add forces execution domain
|
||
|
%3 = add <4 x i64> %2, <i64 2, i64 1, i64 1, i64 1>
|
||
|
ret <4 x i64> %3
|
||
|
}
|
||
|
|
||
|
define <16 x i16> @stack_fold_punpcklwd(<16 x i16> %a0, <16 x i16> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_punpcklwd:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11]
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
|
||
|
ret <16 x i16> %2
|
||
|
}
|
||
|
|
||
|
define <32 x i8> @stack_fold_pxor(<32 x i8> %a0, <32 x i8> %a1) {
|
||
|
; CHECK-LABEL: stack_fold_pxor:
|
||
|
; CHECK: # %bb.0:
|
||
|
; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
|
||
|
; CHECK-NEXT: #APP
|
||
|
; CHECK-NEXT: nop
|
||
|
; CHECK-NEXT: #NO_APP
|
||
|
; CHECK-NEXT: vpxor {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
|
||
|
; CHECK-NEXT: vpaddb {{.*}}(%rip), %ymm0, %ymm0
|
||
|
; CHECK-NEXT: retq
|
||
|
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
|
||
|
%2 = xor <32 x i8> %a0, %a1
|
||
|
; add forces execution domain
|
||
|
%3 = add <32 x i8> %2, <i8 2, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
|
||
|
ret <32 x i8> %3
|
||
|
}
|