; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-linux -o - | FileCheck %s define void @test(i32* %p1, i32* %p2) { ; CHECK-LABEL: test: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #3 ; CHECK-NEXT: mov w9, #1 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: str w9, [x1] ; CHECK-NEXT: ret %tmp = shufflevector <1 x i32> , <1 x i32> undef, <3 x i32> %tmp2 = shufflevector <3 x i32> , <3 x i32> %tmp, <3 x i32> %tmp3 = shufflevector <3 x i32> %tmp2, <3 x i32> undef, <6 x i32> %tmp4 = shufflevector <6 x i32> undef, <6 x i32> %tmp3, <9 x i32> %tmp6 = extractelement <9 x i32> %tmp4, i32 7 %tmp8 = extractelement <9 x i32> %tmp4, i32 8 store i32 %tmp6, i32* %p1, align 4 store i32 %tmp8, i32* %p2, align 4 ret void } ; Test case from PR41951 define <4 x i32> @widen_shuffles_reduced(<3 x i32> %x, <3 x i32> %y) { ; CHECK-LABEL: widen_shuffles_reduced: ; CHECK: // %bb.0: ; CHECK-NEXT: zip1 v2.4s, v0.4s, v1.4s ; CHECK-NEXT: zip1 v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ext v0.16b, v0.16b, v2.16b, #8 ; CHECK-NEXT: ret %s3 = shufflevector <3 x i32> %y, <3 x i32> %x, <4 x i32> ret <4 x i32> %s3 } define void @zip_mask_check(<3 x float>* %p1, <3 x float>* %p2, i32* %p3) { ; CHECK-LABEL: zip_mask_check: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: trn2 v0.4s, v0.4s, v1.4s ; CHECK-NEXT: fmla v0.4s, v0.4s, v0.4s ; CHECK-NEXT: fmla v0.4s, v0.4s, v0.4s ; CHECK-NEXT: str s0, [x2] ; CHECK-NEXT: ret %tmp3 = load <3 x float>, <3 x float>* %p1, align 16 %tmp4 = load <3 x float>, <3 x float>* %p2, align 4 %tmp5 = shufflevector <3 x float> %tmp3, <3 x float> %tmp4, <4 x i32> %tmp6 = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> %tmp7 = shufflevector <4 x float> %tmp6, <4 x float> undef, <4 x i32> %tmp8 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp7, <4 x float> undef, <4 x float> undef) %tmp9 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> %tmp8) %tmp10 = shufflevector <4 x float> %tmp9, <4 x float> undef, <16 x i32> %tmp11 = bitcast <16 x float> %tmp10 to <16 x i32> %tmp12 = extractelement <16 x i32> %tmp11, i32 0 store i32 %tmp12, i32* %p3, align 4 ret void } declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1