; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=armv8-arm-none-eabi -verify-machineinstrs %s -o - | FileCheck %s ; RUN: llc -mtriple=armebv8-arm-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECKBE ; This is the same as Thumb2/mve-vmovn.ll, testing the same patterns for neon ; under both both LE and BE. The vmovn instruction is very different between ; mve and neon, so these tests are not necessarily expected to generate a (neon) ; vmovn. define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc1(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: vmovn32_trunc1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vzip.32 q0, q1 ; CHECK-NEXT: vmovn.i32 d17, q1 ; CHECK-NEXT: vmovn.i32 d16, q0 ; CHECK-NEXT: vorr q0, q8, q8 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn32_trunc1: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.32 q8, q1 ; CHECKBE-NEXT: vrev64.32 q9, q0 ; CHECKBE-NEXT: vzip.32 q9, q8 ; CHECKBE-NEXT: vmovn.i32 d17, q8 ; CHECKBE-NEXT: vmovn.i32 d16, q9 ; CHECKBE-NEXT: vrev64.16 q0, q8 ; CHECKBE-NEXT: bx lr entry: %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> %out = trunc <8 x i32> %strided.vec to <8 x i16> ret <8 x i16> %out } define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc2(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: vmovn32_trunc2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vzip.32 q1, q0 ; CHECK-NEXT: vmovn.i32 d1, q0 ; CHECK-NEXT: vmovn.i32 d0, q1 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn32_trunc2: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.32 q8, q0 ; CHECKBE-NEXT: vrev64.32 q9, q1 ; CHECKBE-NEXT: vzip.32 q9, q8 ; CHECKBE-NEXT: vmovn.i32 d17, q8 ; CHECKBE-NEXT: vmovn.i32 d16, q9 ; CHECKBE-NEXT: vrev64.16 q0, q8 ; CHECKBE-NEXT: bx lr entry: %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> %out = trunc <8 x i32> %strided.vec to <8 x i16> ret <8 x i16> %out } define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc1(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: vmovn16_trunc1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vzip.16 q0, q1 ; CHECK-NEXT: vmovn.i16 d17, q1 ; CHECK-NEXT: vmovn.i16 d16, q0 ; CHECK-NEXT: vorr q0, q8, q8 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn16_trunc1: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.16 q8, q1 ; CHECKBE-NEXT: vrev64.16 q9, q0 ; CHECKBE-NEXT: vzip.16 q9, q8 ; CHECKBE-NEXT: vmovn.i16 d17, q8 ; CHECKBE-NEXT: vmovn.i16 d16, q9 ; CHECKBE-NEXT: vrev64.8 q0, q8 ; CHECKBE-NEXT: bx lr entry: %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> %out = trunc <16 x i16> %strided.vec to <16 x i8> ret <16 x i8> %out } define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc2(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: vmovn16_trunc2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vzip.16 q1, q0 ; CHECK-NEXT: vmovn.i16 d1, q0 ; CHECK-NEXT: vmovn.i16 d0, q1 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn16_trunc2: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.16 q8, q0 ; CHECKBE-NEXT: vrev64.16 q9, q1 ; CHECKBE-NEXT: vzip.16 q9, q8 ; CHECKBE-NEXT: vmovn.i16 d17, q8 ; CHECKBE-NEXT: vmovn.i16 d16, q9 ; CHECKBE-NEXT: vrev64.8 q0, q8 ; CHECKBE-NEXT: bx lr entry: %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> %out = trunc <16 x i16> %strided.vec to <16 x i8> ret <16 x i8> %out } define arm_aapcs_vfpcc <2 x i64> @vmovn64_t1(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: vmovn64_t1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.f64 d1, d2 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn64_t1: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vmov.f64 d1, d2 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> ret <2 x i64> %out } define arm_aapcs_vfpcc <2 x i64> @vmovn64_t2(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: vmovn64_t2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vorr d3, d0, d0 ; CHECK-NEXT: vorr q0, q1, q1 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn64_t2: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vorr d3, d0, d0 ; CHECKBE-NEXT: vorr q0, q1, q1 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> ret <2 x i64> %out } define arm_aapcs_vfpcc <2 x i64> @vmovn64_b1(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: vmovn64_b1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.f64 d1, d3 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn64_b1: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vmov.f64 d1, d3 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> ret <2 x i64> %out } define arm_aapcs_vfpcc <2 x i64> @vmovn64_b2(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: vmovn64_b2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.f64 d16, d3 ; CHECK-NEXT: vorr d17, d0, d0 ; CHECK-NEXT: vorr q0, q8, q8 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn64_b2: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vmov.f64 d16, d3 ; CHECKBE-NEXT: vorr d17, d0, d0 ; CHECKBE-NEXT: vorr q0, q8, q8 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> ret <2 x i64> %out } define arm_aapcs_vfpcc <2 x i64> @vmovn64_b3(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: vmovn64_b3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.f64 d0, d1 ; CHECK-NEXT: vmov.f64 d1, d2 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn64_b3: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vmov.f64 d0, d1 ; CHECKBE-NEXT: vmov.f64 d1, d2 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> ret <2 x i64> %out } define arm_aapcs_vfpcc <2 x i64> @vmovn64_b4(<2 x i64> %src1, <2 x i64> %src2) { ; CHECK-LABEL: vmovn64_b4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vorr d3, d1, d1 ; CHECK-NEXT: vorr q0, q1, q1 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn64_b4: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vorr d3, d1, d1 ; CHECKBE-NEXT: vorr q0, q1, q1 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> ret <2 x i64> %out } define arm_aapcs_vfpcc <4 x i32> @vmovn32_t1(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: vmovn32_t1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vtrn.32 q0, q1 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn32_t1: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.32 q8, q1 ; CHECKBE-NEXT: vrev64.32 q9, q0 ; CHECKBE-NEXT: vtrn.32 q9, q8 ; CHECKBE-NEXT: vrev64.32 q0, q9 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> ret <4 x i32> %out } define arm_aapcs_vfpcc <4 x i32> @vmovn32_t2(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: vmovn32_t2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vtrn.32 q1, q0 ; CHECK-NEXT: vorr q0, q1, q1 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn32_t2: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.32 q8, q0 ; CHECKBE-NEXT: vrev64.32 q9, q1 ; CHECKBE-NEXT: vtrn.32 q9, q8 ; CHECKBE-NEXT: vrev64.32 q0, q9 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> ret <4 x i32> %out } define arm_aapcs_vfpcc <4 x i32> @vmovn32_b1(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: vmovn32_b1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vrev64.32 q8, q0 ; CHECK-NEXT: vtrn.32 q8, q1 ; CHECK-NEXT: vorr q0, q1, q1 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn32_b1: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.32 q8, q0 ; CHECKBE-NEXT: vrev64.32 q9, q1 ; CHECKBE-NEXT: vrev64.32 q8, q8 ; CHECKBE-NEXT: vtrn.32 q8, q9 ; CHECKBE-NEXT: vrev64.32 q0, q9 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> ret <4 x i32> %out } define arm_aapcs_vfpcc <4 x i32> @vmovn32_b2(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: vmovn32_b2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vext.32 q8, q0, q0, #1 ; CHECK-NEXT: vtrn.32 q8, q1 ; CHECK-NEXT: vext.32 q0, q1, q1, #1 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn32_b2: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.32 q8, q0 ; CHECKBE-NEXT: vrev64.32 q9, q1 ; CHECKBE-NEXT: vext.32 q8, q8, q8, #1 ; CHECKBE-NEXT: vtrn.32 q8, q9 ; CHECKBE-NEXT: vext.32 q8, q9, q9, #1 ; CHECKBE-NEXT: vrev64.32 q0, q8 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> ret <4 x i32> %out } define arm_aapcs_vfpcc <4 x i32> @vmovn32_b3(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: vmovn32_b3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vorr q8, q0, q0 ; CHECK-NEXT: vtrn.32 q8, q1 ; CHECK-NEXT: vtrn.32 q0, q8 ; CHECK-NEXT: vorr q0, q8, q8 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn32_b3: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.32 q9, q0 ; CHECKBE-NEXT: vrev64.32 q8, q1 ; CHECKBE-NEXT: vorr q10, q9, q9 ; CHECKBE-NEXT: vtrn.32 q10, q8 ; CHECKBE-NEXT: vtrn.32 q9, q10 ; CHECKBE-NEXT: vrev64.32 q0, q10 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> ret <4 x i32> %out } define arm_aapcs_vfpcc <4 x i32> @vmovn32_b4(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: vmovn32_b4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vorr q8, q0, q0 ; CHECK-NEXT: vtrn.32 q8, q1 ; CHECK-NEXT: vtrn.32 q8, q0 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn32_b4: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.32 q9, q0 ; CHECKBE-NEXT: vrev64.32 q8, q1 ; CHECKBE-NEXT: vorr q10, q9, q9 ; CHECKBE-NEXT: vtrn.32 q10, q8 ; CHECKBE-NEXT: vtrn.32 q10, q9 ; CHECKBE-NEXT: vrev64.32 q0, q9 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> ret <4 x i32> %out } define arm_aapcs_vfpcc <8 x i16> @vmovn16_t1(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: vmovn16_t1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vtrn.16 q0, q1 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn16_t1: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.16 q8, q1 ; CHECKBE-NEXT: vrev64.16 q9, q0 ; CHECKBE-NEXT: vtrn.16 q9, q8 ; CHECKBE-NEXT: vrev64.16 q0, q9 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> ret <8 x i16> %out } define arm_aapcs_vfpcc <8 x i16> @vmovn16_t2(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: vmovn16_t2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vtrn.16 q1, q0 ; CHECK-NEXT: vorr q0, q1, q1 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn16_t2: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.16 q8, q0 ; CHECKBE-NEXT: vrev64.16 q9, q1 ; CHECKBE-NEXT: vtrn.16 q9, q8 ; CHECKBE-NEXT: vrev64.16 q0, q9 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> ret <8 x i16> %out } define arm_aapcs_vfpcc <8 x i16> @vmovn16_b1(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: vmovn16_b1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vrev32.16 d16, d1 ; CHECK-NEXT: vrev32.16 d17, d0 ; CHECK-NEXT: vtrn.16 d16, d3 ; CHECK-NEXT: vtrn.16 d17, d2 ; CHECK-NEXT: vorr q0, q1, q1 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn16_b1: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.16 d16, d1 ; CHECKBE-NEXT: vrev64.16 d17, d0 ; CHECKBE-NEXT: vrev64.16 d19, d3 ; CHECKBE-NEXT: vrev32.16 d16, d16 ; CHECKBE-NEXT: vrev64.16 d18, d2 ; CHECKBE-NEXT: vrev32.16 d17, d17 ; CHECKBE-NEXT: vtrn.16 d16, d19 ; CHECKBE-NEXT: vtrn.16 d17, d18 ; CHECKBE-NEXT: vrev64.16 q0, q9 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> ret <8 x i16> %out } define arm_aapcs_vfpcc <8 x i16> @vmovn16_b2(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: vmovn16_b2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vorr d17, d3, d3 ; CHECK-NEXT: vtrn.16 d17, d1 ; CHECK-NEXT: vorr d16, d2, d2 ; CHECK-NEXT: vtrn.16 d16, d0 ; CHECK-NEXT: vtrn.16 d3, d17 ; CHECK-NEXT: vtrn.16 d2, d16 ; CHECK-NEXT: vorr q0, q8, q8 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn16_b2: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.16 d17, d3 ; CHECKBE-NEXT: vorr d21, d17, d17 ; CHECKBE-NEXT: vrev64.16 d16, d1 ; CHECKBE-NEXT: vrev64.16 d19, d2 ; CHECKBE-NEXT: vrev64.16 d18, d0 ; CHECKBE-NEXT: vtrn.16 d21, d16 ; CHECKBE-NEXT: vorr d20, d19, d19 ; CHECKBE-NEXT: vtrn.16 d20, d18 ; CHECKBE-NEXT: vtrn.16 d17, d21 ; CHECKBE-NEXT: vtrn.16 d19, d20 ; CHECKBE-NEXT: vrev64.16 q0, q10 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> ret <8 x i16> %out } define arm_aapcs_vfpcc <8 x i16> @vmovn16_b3(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: vmovn16_b3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vorr d17, d1, d1 ; CHECK-NEXT: vtrn.16 d17, d3 ; CHECK-NEXT: vorr d16, d0, d0 ; CHECK-NEXT: vtrn.16 d16, d2 ; CHECK-NEXT: vtrn.16 d1, d17 ; CHECK-NEXT: vtrn.16 d0, d16 ; CHECK-NEXT: vorr q0, q8, q8 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn16_b3: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.16 d17, d1 ; CHECKBE-NEXT: vorr d21, d17, d17 ; CHECKBE-NEXT: vrev64.16 d16, d3 ; CHECKBE-NEXT: vrev64.16 d19, d0 ; CHECKBE-NEXT: vrev64.16 d18, d2 ; CHECKBE-NEXT: vtrn.16 d21, d16 ; CHECKBE-NEXT: vorr d20, d19, d19 ; CHECKBE-NEXT: vtrn.16 d20, d18 ; CHECKBE-NEXT: vtrn.16 d17, d21 ; CHECKBE-NEXT: vtrn.16 d19, d20 ; CHECKBE-NEXT: vrev64.16 q0, q10 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> ret <8 x i16> %out } define arm_aapcs_vfpcc <8 x i16> @vmovn16_b4(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: vmovn16_b4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vrev32.16 d16, d3 ; CHECK-NEXT: vrev32.16 d17, d2 ; CHECK-NEXT: vtrn.16 d16, d1 ; CHECK-NEXT: vtrn.16 d17, d0 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn16_b4: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.16 d16, d3 ; CHECKBE-NEXT: vrev64.16 d17, d2 ; CHECKBE-NEXT: vrev64.16 d19, d1 ; CHECKBE-NEXT: vrev32.16 d16, d16 ; CHECKBE-NEXT: vrev64.16 d18, d0 ; CHECKBE-NEXT: vrev32.16 d17, d17 ; CHECKBE-NEXT: vtrn.16 d16, d19 ; CHECKBE-NEXT: vtrn.16 d17, d18 ; CHECKBE-NEXT: vrev64.16 q0, q9 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> ret <8 x i16> %out } define arm_aapcs_vfpcc <16 x i8> @vmovn8_b1(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: vmovn8_b1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vtrn.8 q0, q1 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn8_b1: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.8 q8, q1 ; CHECKBE-NEXT: vrev64.8 q9, q0 ; CHECKBE-NEXT: vtrn.8 q9, q8 ; CHECKBE-NEXT: vrev64.8 q0, q9 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> ret <16 x i8> %out } define arm_aapcs_vfpcc <16 x i8> @vmovn8_b2(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: vmovn8_b2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vtrn.8 q1, q0 ; CHECK-NEXT: vorr q0, q1, q1 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: vmovn8_b2: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.8 q8, q0 ; CHECKBE-NEXT: vrev64.8 q9, q1 ; CHECKBE-NEXT: vtrn.8 q9, q8 ; CHECKBE-NEXT: vrev64.8 q0, q9 ; CHECKBE-NEXT: bx lr entry: %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> ret <16 x i8> %out } define arm_aapcs_vfpcc <16 x i8> @vmovn8_t1(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: vmovn8_t1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vorr q2, q0, q0 ; CHECK-NEXT: vldr d16, .LCPI24_0 ; CHECK-NEXT: vorr d6, d3, d3 ; CHECK-NEXT: vtbl.8 d1, {d5, d6}, d16 ; CHECK-NEXT: vorr d5, d2, d2 ; CHECK-NEXT: vtbl.8 d0, {d4, d5}, d16 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI24_0: ; CHECK-NEXT: .byte 0 @ 0x0 ; CHECK-NEXT: .byte 9 @ 0x9 ; CHECK-NEXT: .byte 2 @ 0x2 ; CHECK-NEXT: .byte 11 @ 0xb ; CHECK-NEXT: .byte 4 @ 0x4 ; CHECK-NEXT: .byte 13 @ 0xd ; CHECK-NEXT: .byte 6 @ 0x6 ; CHECK-NEXT: .byte 15 @ 0xf ; ; CHECKBE-LABEL: vmovn8_t1: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vldr d16, .LCPI24_0 ; CHECKBE-NEXT: vrev64.8 d19, d3 ; CHECKBE-NEXT: vrev64.8 d21, d2 ; CHECKBE-NEXT: vrev64.8 d18, d1 ; CHECKBE-NEXT: vrev64.8 d16, d16 ; CHECKBE-NEXT: vrev64.8 d20, d0 ; CHECKBE-NEXT: vtbl.8 d19, {d18, d19}, d16 ; CHECKBE-NEXT: vtbl.8 d18, {d20, d21}, d16 ; CHECKBE-NEXT: vrev64.8 q0, q9 ; CHECKBE-NEXT: bx lr ; CHECKBE-NEXT: .p2align 3 ; CHECKBE-NEXT: @ %bb.1: ; CHECKBE-NEXT: .LCPI24_0: ; CHECKBE-NEXT: .byte 0 @ 0x0 ; CHECKBE-NEXT: .byte 9 @ 0x9 ; CHECKBE-NEXT: .byte 2 @ 0x2 ; CHECKBE-NEXT: .byte 11 @ 0xb ; CHECKBE-NEXT: .byte 4 @ 0x4 ; CHECKBE-NEXT: .byte 13 @ 0xd ; CHECKBE-NEXT: .byte 6 @ 0x6 ; CHECKBE-NEXT: .byte 15 @ 0xf entry: %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> ret <16 x i8> %out } define arm_aapcs_vfpcc <16 x i8> @vmovn8_t2(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: vmovn8_t2: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: @ kill: def $q1 killed $q1 def $d2_d3_d4 ; CHECK-NEXT: vldr d18, .LCPI25_0 ; CHECK-NEXT: vorr d4, d1, d1 ; CHECK-NEXT: vtbl.8 d17, {d3, d4}, d18 ; CHECK-NEXT: vorr d3, d0, d0 ; CHECK-NEXT: vtbl.8 d16, {d2, d3}, d18 ; CHECK-NEXT: vorr q0, q8, q8 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI25_0: ; CHECK-NEXT: .byte 1 @ 0x1 ; CHECK-NEXT: .byte 8 @ 0x8 ; CHECK-NEXT: .byte 3 @ 0x3 ; CHECK-NEXT: .byte 10 @ 0xa ; CHECK-NEXT: .byte 5 @ 0x5 ; CHECK-NEXT: .byte 12 @ 0xc ; CHECK-NEXT: .byte 7 @ 0x7 ; CHECK-NEXT: .byte 14 @ 0xe ; ; CHECKBE-LABEL: vmovn8_t2: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vldr d16, .LCPI25_0 ; CHECKBE-NEXT: vrev64.8 d19, d1 ; CHECKBE-NEXT: vrev64.8 d21, d0 ; CHECKBE-NEXT: vrev64.8 d18, d3 ; CHECKBE-NEXT: vrev64.8 d16, d16 ; CHECKBE-NEXT: vrev64.8 d20, d2 ; CHECKBE-NEXT: vtbl.8 d19, {d18, d19}, d16 ; CHECKBE-NEXT: vtbl.8 d18, {d20, d21}, d16 ; CHECKBE-NEXT: vrev64.8 q0, q9 ; CHECKBE-NEXT: bx lr ; CHECKBE-NEXT: .p2align 3 ; CHECKBE-NEXT: @ %bb.1: ; CHECKBE-NEXT: .LCPI25_0: ; CHECKBE-NEXT: .byte 1 @ 0x1 ; CHECKBE-NEXT: .byte 8 @ 0x8 ; CHECKBE-NEXT: .byte 3 @ 0x3 ; CHECKBE-NEXT: .byte 10 @ 0xa ; CHECKBE-NEXT: .byte 5 @ 0x5 ; CHECKBE-NEXT: .byte 12 @ 0xc ; CHECKBE-NEXT: .byte 7 @ 0x7 ; CHECKBE-NEXT: .byte 14 @ 0xe entry: %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> ret <16 x i8> %out } define arm_aapcs_vfpcc <16 x i8> @vmovn8_t3(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: vmovn8_t3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vorr q2, q0, q0 ; CHECK-NEXT: vldr d16, .LCPI26_0 ; CHECK-NEXT: vorr d6, d3, d3 ; CHECK-NEXT: vtbl.8 d1, {d5, d6}, d16 ; CHECK-NEXT: vorr d5, d2, d2 ; CHECK-NEXT: vtbl.8 d0, {d4, d5}, d16 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI26_0: ; CHECK-NEXT: .byte 1 @ 0x1 ; CHECK-NEXT: .byte 8 @ 0x8 ; CHECK-NEXT: .byte 3 @ 0x3 ; CHECK-NEXT: .byte 10 @ 0xa ; CHECK-NEXT: .byte 5 @ 0x5 ; CHECK-NEXT: .byte 12 @ 0xc ; CHECK-NEXT: .byte 7 @ 0x7 ; CHECK-NEXT: .byte 14 @ 0xe ; ; CHECKBE-LABEL: vmovn8_t3: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vldr d16, .LCPI26_0 ; CHECKBE-NEXT: vrev64.8 d19, d3 ; CHECKBE-NEXT: vrev64.8 d21, d2 ; CHECKBE-NEXT: vrev64.8 d18, d1 ; CHECKBE-NEXT: vrev64.8 d16, d16 ; CHECKBE-NEXT: vrev64.8 d20, d0 ; CHECKBE-NEXT: vtbl.8 d19, {d18, d19}, d16 ; CHECKBE-NEXT: vtbl.8 d18, {d20, d21}, d16 ; CHECKBE-NEXT: vrev64.8 q0, q9 ; CHECKBE-NEXT: bx lr ; CHECKBE-NEXT: .p2align 3 ; CHECKBE-NEXT: @ %bb.1: ; CHECKBE-NEXT: .LCPI26_0: ; CHECKBE-NEXT: .byte 1 @ 0x1 ; CHECKBE-NEXT: .byte 8 @ 0x8 ; CHECKBE-NEXT: .byte 3 @ 0x3 ; CHECKBE-NEXT: .byte 10 @ 0xa ; CHECKBE-NEXT: .byte 5 @ 0x5 ; CHECKBE-NEXT: .byte 12 @ 0xc ; CHECKBE-NEXT: .byte 7 @ 0x7 ; CHECKBE-NEXT: .byte 14 @ 0xe entry: %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> ret <16 x i8> %out } define arm_aapcs_vfpcc <16 x i8> @vmovn8_t4(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: vmovn8_t4: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: @ kill: def $q1 killed $q1 def $d2_d3_d4 ; CHECK-NEXT: vldr d18, .LCPI27_0 ; CHECK-NEXT: vorr d4, d1, d1 ; CHECK-NEXT: vtbl.8 d17, {d3, d4}, d18 ; CHECK-NEXT: vorr d3, d0, d0 ; CHECK-NEXT: vtbl.8 d16, {d2, d3}, d18 ; CHECK-NEXT: vorr q0, q8, q8 ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI27_0: ; CHECK-NEXT: .byte 0 @ 0x0 ; CHECK-NEXT: .byte 9 @ 0x9 ; CHECK-NEXT: .byte 2 @ 0x2 ; CHECK-NEXT: .byte 11 @ 0xb ; CHECK-NEXT: .byte 4 @ 0x4 ; CHECK-NEXT: .byte 13 @ 0xd ; CHECK-NEXT: .byte 6 @ 0x6 ; CHECK-NEXT: .byte 15 @ 0xf ; ; CHECKBE-LABEL: vmovn8_t4: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vldr d16, .LCPI27_0 ; CHECKBE-NEXT: vrev64.8 d19, d1 ; CHECKBE-NEXT: vrev64.8 d21, d0 ; CHECKBE-NEXT: vrev64.8 d18, d3 ; CHECKBE-NEXT: vrev64.8 d16, d16 ; CHECKBE-NEXT: vrev64.8 d20, d2 ; CHECKBE-NEXT: vtbl.8 d19, {d18, d19}, d16 ; CHECKBE-NEXT: vtbl.8 d18, {d20, d21}, d16 ; CHECKBE-NEXT: vrev64.8 q0, q9 ; CHECKBE-NEXT: bx lr ; CHECKBE-NEXT: .p2align 3 ; CHECKBE-NEXT: @ %bb.1: ; CHECKBE-NEXT: .LCPI27_0: ; CHECKBE-NEXT: .byte 0 @ 0x0 ; CHECKBE-NEXT: .byte 9 @ 0x9 ; CHECKBE-NEXT: .byte 2 @ 0x2 ; CHECKBE-NEXT: .byte 11 @ 0xb ; CHECKBE-NEXT: .byte 4 @ 0x4 ; CHECKBE-NEXT: .byte 13 @ 0xd ; CHECKBE-NEXT: .byte 6 @ 0x6 ; CHECKBE-NEXT: .byte 15 @ 0xf entry: %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> ret <16 x i8> %out } define arm_aapcs_vfpcc <16 x i8> @test(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: test: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vtrn.8 q0, q1 ; CHECK-NEXT: bx lr ; ; CHECKBE-LABEL: test: ; CHECKBE: @ %bb.0: @ %entry ; CHECKBE-NEXT: vrev64.16 q8, q1 ; CHECKBE-NEXT: vrev64.16 q9, q0 ; CHECKBE-NEXT: vtrn.8 q9, q8 ; CHECKBE-NEXT: vrev64.8 q0, q9 ; CHECKBE-NEXT: bx lr entry: %a0 = extractelement <8 x i16> %src1, i32 0 %a1 = extractelement <8 x i16> %src1, i32 1 %a2 = extractelement <8 x i16> %src1, i32 2 %a3 = extractelement <8 x i16> %src1, i32 3 %a4 = extractelement <8 x i16> %src1, i32 4 %a5 = extractelement <8 x i16> %src1, i32 5 %a6 = extractelement <8 x i16> %src1, i32 6 %a7 = extractelement <8 x i16> %src1, i32 7 %b0 = extractelement <8 x i16> %src2, i32 0 %b1 = extractelement <8 x i16> %src2, i32 1 %b2 = extractelement <8 x i16> %src2, i32 2 %b3 = extractelement <8 x i16> %src2, i32 3 %b4 = extractelement <8 x i16> %src2, i32 4 %b5 = extractelement <8 x i16> %src2, i32 5 %b6 = extractelement <8 x i16> %src2, i32 6 %b7 = extractelement <8 x i16> %src2, i32 7 %s0 = trunc i16 %a0 to i8 %s1 = trunc i16 %a1 to i8 %s2 = trunc i16 %a2 to i8 %s3 = trunc i16 %a3 to i8 %s4 = trunc i16 %a4 to i8 %s5 = trunc i16 %a5 to i8 %s6 = trunc i16 %a6 to i8 %s7 = trunc i16 %a7 to i8 %t0 = trunc i16 %b0 to i8 %t1 = trunc i16 %b1 to i8 %t2 = trunc i16 %b2 to i8 %t3 = trunc i16 %b3 to i8 %t4 = trunc i16 %b4 to i8 %t5 = trunc i16 %b5 to i8 %t6 = trunc i16 %b6 to i8 %t7 = trunc i16 %b7 to i8 %r0 = insertelement <16 x i8> undef, i8 %s0, i32 0 %r1 = insertelement <16 x i8> %r0, i8 %s1, i32 2 %r2 = insertelement <16 x i8> %r1, i8 %s2, i32 4 %r3 = insertelement <16 x i8> %r2, i8 %s3, i32 6 %r4 = insertelement <16 x i8> %r3, i8 %s4, i32 8 %r5 = insertelement <16 x i8> %r4, i8 %s5, i32 10 %r6 = insertelement <16 x i8> %r5, i8 %s6, i32 12 %r7 = insertelement <16 x i8> %r6, i8 %s7, i32 14 %r10 = insertelement <16 x i8> %r7, i8 %t0, i32 1 %r11 = insertelement <16 x i8> %r10, i8 %t1, i32 3 %r12 = insertelement <16 x i8> %r11, i8 %t2, i32 5 %r13 = insertelement <16 x i8> %r12, i8 %t3, i32 7 %r14 = insertelement <16 x i8> %r13, i8 %t4, i32 9 %r15 = insertelement <16 x i8> %r14, i8 %t5, i32 11 %r16 = insertelement <16 x i8> %r15, i8 %t6, i32 13 %r17 = insertelement <16 x i8> %r16, i8 %t7, i32 15 ret <16 x i8> %r17 }