207 lines
7.0 KiB
LLVM
207 lines
7.0 KiB
LLVM
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||
|
; RUN: llc < %s -mattr=+sha -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
|
||
|
; RUN: llc < %s -mattr=+sha,+avx2 -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
|
||
|
|
||
|
declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone
|
||
|
|
||
|
define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
|
||
|
; CHECK-LABEL: test_sha1rnds4rr:
|
||
|
; CHECK: # %bb.0: # %entry
|
||
|
; CHECK-NEXT: sha1rnds4 $3, %xmm1, %xmm0
|
||
|
; CHECK-NEXT: retq
|
||
|
entry:
|
||
|
%0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3)
|
||
|
ret <4 x i32> %0
|
||
|
}
|
||
|
|
||
|
define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
|
||
|
; CHECK-LABEL: test_sha1rnds4rm:
|
||
|
; CHECK: # %bb.0: # %entry
|
||
|
; CHECK-NEXT: sha1rnds4 $3, (%rdi), %xmm0
|
||
|
; CHECK-NEXT: retq
|
||
|
entry:
|
||
|
%0 = load <4 x i32>, <4 x i32>* %b
|
||
|
%1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
|
||
|
ret <4 x i32> %1
|
||
|
}
|
||
|
|
||
|
declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone
|
||
|
|
||
|
define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
|
||
|
; CHECK-LABEL: test_sha1nexterr:
|
||
|
; CHECK: # %bb.0: # %entry
|
||
|
; CHECK-NEXT: sha1nexte %xmm1, %xmm0
|
||
|
; CHECK-NEXT: retq
|
||
|
entry:
|
||
|
%0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b)
|
||
|
ret <4 x i32> %0
|
||
|
}
|
||
|
|
||
|
define <4 x i32> @test_sha1nexterm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
|
||
|
; CHECK-LABEL: test_sha1nexterm:
|
||
|
; CHECK: # %bb.0: # %entry
|
||
|
; CHECK-NEXT: sha1nexte (%rdi), %xmm0
|
||
|
; CHECK-NEXT: retq
|
||
|
entry:
|
||
|
%0 = load <4 x i32>, <4 x i32>* %b
|
||
|
%1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0)
|
||
|
ret <4 x i32> %1
|
||
|
}
|
||
|
|
||
|
declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone
|
||
|
|
||
|
define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
|
||
|
; CHECK-LABEL: test_sha1msg1rr:
|
||
|
; CHECK: # %bb.0: # %entry
|
||
|
; CHECK-NEXT: sha1msg1 %xmm1, %xmm0
|
||
|
; CHECK-NEXT: retq
|
||
|
entry:
|
||
|
%0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b)
|
||
|
ret <4 x i32> %0
|
||
|
}
|
||
|
|
||
|
define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
|
||
|
; CHECK-LABEL: test_sha1msg1rm:
|
||
|
; CHECK: # %bb.0: # %entry
|
||
|
; CHECK-NEXT: sha1msg1 (%rdi), %xmm0
|
||
|
; CHECK-NEXT: retq
|
||
|
entry:
|
||
|
%0 = load <4 x i32>, <4 x i32>* %b
|
||
|
%1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0)
|
||
|
ret <4 x i32> %1
|
||
|
}
|
||
|
|
||
|
declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone
|
||
|
|
||
|
define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
|
||
|
; CHECK-LABEL: test_sha1msg2rr:
|
||
|
; CHECK: # %bb.0: # %entry
|
||
|
; CHECK-NEXT: sha1msg2 %xmm1, %xmm0
|
||
|
; CHECK-NEXT: retq
|
||
|
entry:
|
||
|
%0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b)
|
||
|
ret <4 x i32> %0
|
||
|
}
|
||
|
|
||
|
define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
|
||
|
; CHECK-LABEL: test_sha1msg2rm:
|
||
|
; CHECK: # %bb.0: # %entry
|
||
|
; CHECK-NEXT: sha1msg2 (%rdi), %xmm0
|
||
|
; CHECK-NEXT: retq
|
||
|
entry:
|
||
|
%0 = load <4 x i32>, <4 x i32>* %b
|
||
|
%1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0)
|
||
|
ret <4 x i32> %1
|
||
|
}
|
||
|
|
||
|
declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
|
||
|
|
||
|
define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind uwtable {
|
||
|
; SSE-LABEL: test_sha256rnds2rr:
|
||
|
; SSE: # %bb.0: # %entry
|
||
|
; SSE-NEXT: movaps %xmm0, %xmm3
|
||
|
; SSE-NEXT: movaps %xmm2, %xmm0
|
||
|
; SSE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3
|
||
|
; SSE-NEXT: movaps %xmm3, %xmm0
|
||
|
; SSE-NEXT: retq
|
||
|
;
|
||
|
; AVX-LABEL: test_sha256rnds2rr:
|
||
|
; AVX: # %bb.0: # %entry
|
||
|
; AVX-NEXT: vmovaps %xmm0, %xmm3
|
||
|
; AVX-NEXT: vmovaps %xmm2, %xmm0
|
||
|
; AVX-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3
|
||
|
; AVX-NEXT: vmovaps %xmm3, %xmm0
|
||
|
; AVX-NEXT: retq
|
||
|
entry:
|
||
|
%0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
|
||
|
ret <4 x i32> %0
|
||
|
}
|
||
|
|
||
|
define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, <4 x i32>* %b, <4 x i32> %c) nounwind uwtable {
|
||
|
; SSE-LABEL: test_sha256rnds2rm:
|
||
|
; SSE: # %bb.0: # %entry
|
||
|
; SSE-NEXT: movaps %xmm0, %xmm2
|
||
|
; SSE-NEXT: movaps %xmm1, %xmm0
|
||
|
; SSE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2
|
||
|
; SSE-NEXT: movaps %xmm2, %xmm0
|
||
|
; SSE-NEXT: retq
|
||
|
;
|
||
|
; AVX-LABEL: test_sha256rnds2rm:
|
||
|
; AVX: # %bb.0: # %entry
|
||
|
; AVX-NEXT: vmovaps %xmm0, %xmm2
|
||
|
; AVX-NEXT: vmovaps %xmm1, %xmm0
|
||
|
; AVX-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2
|
||
|
; AVX-NEXT: vmovaps %xmm2, %xmm0
|
||
|
; AVX-NEXT: retq
|
||
|
entry:
|
||
|
%0 = load <4 x i32>, <4 x i32>* %b
|
||
|
%1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c)
|
||
|
ret <4 x i32> %1
|
||
|
}
|
||
|
|
||
|
declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone
|
||
|
|
||
|
define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
|
||
|
; CHECK-LABEL: test_sha256msg1rr:
|
||
|
; CHECK: # %bb.0: # %entry
|
||
|
; CHECK-NEXT: sha256msg1 %xmm1, %xmm0
|
||
|
; CHECK-NEXT: retq
|
||
|
entry:
|
||
|
%0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b)
|
||
|
ret <4 x i32> %0
|
||
|
}
|
||
|
|
||
|
define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
|
||
|
; CHECK-LABEL: test_sha256msg1rm:
|
||
|
; CHECK: # %bb.0: # %entry
|
||
|
; CHECK-NEXT: sha256msg1 (%rdi), %xmm0
|
||
|
; CHECK-NEXT: retq
|
||
|
entry:
|
||
|
%0 = load <4 x i32>, <4 x i32>* %b
|
||
|
%1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0)
|
||
|
ret <4 x i32> %1
|
||
|
}
|
||
|
|
||
|
declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone
|
||
|
|
||
|
define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
|
||
|
; CHECK-LABEL: test_sha256msg2rr:
|
||
|
; CHECK: # %bb.0: # %entry
|
||
|
; CHECK-NEXT: sha256msg2 %xmm1, %xmm0
|
||
|
; CHECK-NEXT: retq
|
||
|
entry:
|
||
|
%0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b)
|
||
|
ret <4 x i32> %0
|
||
|
}
|
||
|
|
||
|
define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
|
||
|
; CHECK-LABEL: test_sha256msg2rm:
|
||
|
; CHECK: # %bb.0: # %entry
|
||
|
; CHECK-NEXT: sha256msg2 (%rdi), %xmm0
|
||
|
; CHECK-NEXT: retq
|
||
|
entry:
|
||
|
%0 = load <4 x i32>, <4 x i32>* %b
|
||
|
%1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0)
|
||
|
ret <4 x i32> %1
|
||
|
}
|
||
|
|
||
|
; Make sure we don't forget that sha instructions have no VEX equivalents and thus don't zero YMM/ZMM.
|
||
|
define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
|
||
|
; SSE-LABEL: test_sha1rnds4_zero_extend:
|
||
|
; SSE: # %bb.0: # %entry
|
||
|
; SSE-NEXT: sha1rnds4 $3, (%rdi), %xmm0
|
||
|
; SSE-NEXT: xorps %xmm1, %xmm1
|
||
|
; SSE-NEXT: retq
|
||
|
;
|
||
|
; AVX-LABEL: test_sha1rnds4_zero_extend:
|
||
|
; AVX: # %bb.0: # %entry
|
||
|
; AVX-NEXT: sha1rnds4 $3, (%rdi), %xmm0
|
||
|
; AVX-NEXT: vmovaps %xmm0, %xmm0
|
||
|
; AVX-NEXT: retq
|
||
|
entry:
|
||
|
%0 = load <4 x i32>, <4 x i32>* %b
|
||
|
%1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
|
||
|
%2 = shufflevector <4 x i32> %1, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||
|
ret <8 x i32> %2
|
||
|
}
|