; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t ; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. ; WARN-NOT: warning ; Range checks: for all the instruction tested in this file, the ; immediate must be within the range [-8, 7] (4-bit immediate). Out of ; range values are tested only in one case (following). Valid values ; are tested all through the rest of the file. define void @imm_out_of_range( * %base, %mask) nounwind { ; CHECK-LABEL: imm_out_of_range: ; CHECK: // %bb.0: ; CHECK-NEXT: addvl x8, x0, #8 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] ; CHECK-NEXT: addvl x8, x0, #-9 ; CHECK-NEXT: st1d { z0.d }, p0, [x8] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 8 %data = call @llvm.masked.load.nxv2i64(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 -9 call void @llvm.masked.store.nxv2i64( %data, * %base_store, i32 1, %mask) ret void } ; 2-lane contiguous load/stores define void @test_masked_ldst_sv2i8( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, #-8, mul vl] ; CHECK-NEXT: st1b { z0.d }, p0, [x0, #-7, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -8 %data = call @llvm.masked.load.nxv2i8(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 -7 call void @llvm.masked.store.nxv2i8( %data, * %base_store, i32 1, %mask) ret void } define void @test_masked_ldst_sv2i16( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, #-8, mul vl] ; CHECK-NEXT: st1h { z0.d }, p0, [x0, #-7, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -8 %data = call @llvm.masked.load.nxv2i16(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 -7 call void @llvm.masked.store.nxv2i16( %data, * %base_store, i32 1, %mask) ret void } define void @test_masked_ldst_sv2i32( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, #-8, mul vl] ; CHECK-NEXT: st1w { z0.d }, p0, [x0, #-7, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -8 %data = call @llvm.masked.load.nxv2i32(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 -7 call void @llvm.masked.store.nxv2i32( %data, * %base_store, i32 1, %mask) ret void } define void @test_masked_ldst_sv2i64( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #-8, mul vl] ; CHECK-NEXT: st1d { z0.d }, p0, [x0, #-7, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -8 %data = call @llvm.masked.load.nxv2i64(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 -7 call void @llvm.masked.store.nxv2i64( %data, * %base_store, i32 1, %mask) ret void } define void @test_masked_ldst_sv2f16( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #-8, mul vl] ; CHECK-NEXT: st1h { z0.d }, p0, [x0, #-7, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -8 %data = call @llvm.masked.load.nxv2f16(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 -7 call void @llvm.masked.store.nxv2f16( %data, * %base_store, i32 1, %mask) ret void } define void @test_masked_ldst_sv2f32( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, #-8, mul vl] ; CHECK-NEXT: st1w { z0.d }, p0, [x0, #-7, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -8 %data = call @llvm.masked.load.nxv2f32(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 -7 call void @llvm.masked.store.nxv2f32( %data, * %base_store, i32 1, %mask) ret void } define void @test_masked_ldst_sv2f64( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #-6, mul vl] ; CHECK-NEXT: st1d { z0.d }, p0, [x0, #-5, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -6 %data = call @llvm.masked.load.nxv2f64(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 -5 call void @llvm.masked.store.nxv2f64( %data, * %base_store, i32 1, %mask) ret void } ; 2-lane zero/sign extended contiguous loads. define @masked_zload_sv2i8_to_sv2i64(* %base, %mask) nounwind { ; CHECK-LABEL: masked_zload_sv2i8_to_sv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, #-4, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -4 %load = call @llvm.masked.load.nxv2i8(* %base_load, i32 1, %mask, undef) %ext = zext %load to ret %ext } define @masked_sload_sv2i8_to_sv2i64(* %base, %mask) nounwind { ; CHECK-LABEL: masked_sload_sv2i8_to_sv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, #-3, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -3 %load = call @llvm.masked.load.nxv2i8(* %base_load, i32 1, %mask, undef) %ext = sext %load to ret %ext } define @masked_zload_sv2i16_to_sv2i64(* %base, %mask) nounwind { ; CHECK-LABEL: masked_zload_sv2i16_to_sv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #1, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 1 %load = call @llvm.masked.load.nxv2i16(* %base_load, i32 1, %mask, undef) %ext = zext %load to ret %ext } define @masked_sload_sv2i16_to_sv2i64(* %base, %mask) nounwind { ; CHECK-LABEL: masked_sload_sv2i16_to_sv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, #2, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 2 %load = call @llvm.masked.load.nxv2i16(* %base_load, i32 1, %mask, undef) %ext = sext %load to ret %ext } define @masked_zload_sv2i32_to_sv2i64(* %base, %mask) nounwind { ; CHECK-LABEL: masked_zload_sv2i32_to_sv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, #-2, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -2 %load = call @llvm.masked.load.nxv2i32(* %base_load, i32 1, %mask, undef) %ext = zext %load to ret %ext } define @masked_sload_sv2i32_to_sv2i64(* %base, %mask) nounwind { ; CHECK-LABEL: masked_sload_sv2i32_to_sv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, #-1, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 %load = call @llvm.masked.load.nxv2i32(* %base_load, i32 1, %mask, undef) %ext = sext %load to ret %ext } ; 2-lane truncating contiguous stores. define void @masked_trunc_store_sv2i64_to_sv2i8( %val, *%base, %mask) nounwind { ; CHECK-LABEL: masked_trunc_store_sv2i64_to_sv2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: st1b { z0.d }, p0, [x0, #3, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 3 %trunc = trunc %val to call void @llvm.masked.store.nxv2i8( %trunc, *%base_load, i32 1, %mask) ret void } define void @masked_trunc_store_sv2i64_to_sv2i16( %val, *%base, %mask) nounwind { ; CHECK-LABEL: masked_trunc_store_sv2i64_to_sv2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: st1h { z0.d }, p0, [x0, #4, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 4 %trunc = trunc %val to call void @llvm.masked.store.nxv2i16( %trunc, *%base_load, i32 1, %mask) ret void } define void @masked_trunc_store_sv2i64_to_sv2i32( %val, *%base, %mask) nounwind { ; CHECK-LABEL: masked_trunc_store_sv2i64_to_sv2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: st1w { z0.d }, p0, [x0, #5, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 5 %trunc = trunc %val to call void @llvm.masked.store.nxv2i32( %trunc, *%base_load, i32 1, %mask) ret void } ; 4-lane contiguous load/stores. define void @test_masked_ldst_sv4i8( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0, #-1, mul vl] ; CHECK-NEXT: st1b { z0.s }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 %data = call @llvm.masked.load.nxv4i8(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 2 call void @llvm.masked.store.nxv4i8( %data, * %base_store, i32 1, %mask) ret void } define void @test_masked_ldst_sv4i16( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, #-1, mul vl] ; CHECK-NEXT: st1h { z0.s }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 %data = call @llvm.masked.load.nxv4i16(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 2 call void @llvm.masked.store.nxv4i16( %data, * %base_store, i32 1, %mask) ret void } define void @test_masked_ldst_sv4i32( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, #6, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [x0, #7, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 6 %data = call @llvm.masked.load.nxv4i32(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 7 call void @llvm.masked.store.nxv4i32( %data, * %base_store, i32 1, %mask) ret void } define void @test_masked_ldst_sv4f16( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, #-1, mul vl] ; CHECK-NEXT: st1h { z0.s }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 %data = call @llvm.masked.load.nxv4f16(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 2 call void @llvm.masked.store.nxv4f16( %data, * %base_store, i32 1, %mask) ret void } define void @test_masked_ldst_sv4f32( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, #-1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 %data = call @llvm.masked.load.nxv4f32(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 2 call void @llvm.masked.store.nxv4f32( %data, * %base_store, i32 1, %mask) ret void } ; 4-lane zero/sign extended contiguous loads. define @masked_zload_sv4i8_to_sv4i32(* %base, %mask) nounwind { ; CHECK-LABEL: masked_zload_sv4i8_to_sv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, #-4, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -4 %load = call @llvm.masked.load.nxv4i8(* %base_load, i32 1, %mask, undef) %ext = zext %load to ret %ext } define @masked_sload_sv4i8_to_sv4i32(* %base, %mask) nounwind { ; CHECK-LABEL: masked_sload_sv4i8_to_sv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0, #-3, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -3 %load = call @llvm.masked.load.nxv4i8(* %base_load, i32 1, %mask, undef) %ext = sext %load to ret %ext } define @masked_zload_sv4i16_to_sv4i32(* %base, %mask) nounwind { ; CHECK-LABEL: masked_zload_sv4i16_to_sv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, #1, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 1 %load = call @llvm.masked.load.nxv4i16(* %base_load, i32 1, %mask, undef) %ext = zext %load to ret %ext } define @masked_sload_sv4i16_to_sv4i32(* %base, %mask) nounwind { ; CHECK-LABEL: masked_sload_sv4i16_to_sv4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, #2, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 2 %load = call @llvm.masked.load.nxv4i16(* %base_load, i32 1, %mask, undef) %ext = sext %load to ret %ext } ; 4-lane truncating contiguous stores. define void @masked_trunc_store_sv4i32_to_sv4i8( %val, *%base, %mask) nounwind { ; CHECK-LABEL: masked_trunc_store_sv4i32_to_sv4i8: ; CHECK: // %bb.0: ; CHECK-NEXT: st1b { z0.s }, p0, [x0, #3, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 3 %trunc = trunc %val to call void @llvm.masked.store.nxv4i8( %trunc, *%base_load, i32 1, %mask) ret void } define void @masked_trunc_store_sv4i32_to_sv4i16( %val, *%base, %mask) nounwind { ; CHECK-LABEL: masked_trunc_store_sv4i32_to_sv4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: st1h { z0.s }, p0, [x0, #4, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 4 %trunc = trunc %val to call void @llvm.masked.store.nxv4i16( %trunc, *%base_load, i32 1, %mask) ret void } ; 8-lane contiguous load/stores. define void @test_masked_ldst_sv8i8( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0, #6, mul vl] ; CHECK-NEXT: st1b { z0.h }, p0, [x0, #7, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 6 %data = call @llvm.masked.load.nxv8i8(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 7 call void @llvm.masked.store.nxv8i8( %data, * %base_store, i32 1, %mask) ret void } define void @test_masked_ldst_sv8i16( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, #6, mul vl] ; CHECK-NEXT: st1h { z0.h }, p0, [x0, #7, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 6 %data = call @llvm.masked.load.nxv8i16(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 7 call void @llvm.masked.store.nxv8i16( %data, * %base_store, i32 1, %mask) ret void } define void @test_masked_ldst_sv8f16( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, #-1, mul vl] ; CHECK-NEXT: st1h { z0.h }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 %data = call @llvm.masked.load.nxv8f16(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 2 call void @llvm.masked.store.nxv8f16( %data, * %base_store, i32 1, %mask) ret void } define void @test_masked_ldst_sv8bf16( * %base, %mask) nounwind #0 { ; CHECK-LABEL: test_masked_ldst_sv8bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, #-1, mul vl] ; CHECK-NEXT: st1h { z0.h }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -1 %data = call @llvm.masked.load.nxv8bf16(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 2 call void @llvm.masked.store.nxv8bf16( %data, * %base_store, i32 1, %mask) ret void } ; 8-lane zero/sign extended contiguous loads. define @masked_zload_sv8i8_to_sv8i16(* %base, %mask) nounwind { ; CHECK-LABEL: masked_zload_sv8i8_to_sv8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0, #-4, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -4 %load = call @llvm.masked.load.nxv8i8(* %base_load, i32 1, %mask, undef) %ext = zext %load to ret %ext } define @masked_sload_sv8i8_to_sv8i16(* %base, %mask) nounwind { ; CHECK-LABEL: masked_sload_sv8i8_to_sv8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0, #-3, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 -3 %load = call @llvm.masked.load.nxv8i8(* %base_load, i32 1, %mask, undef) %ext = sext %load to ret %ext } ; 8-lane truncating contiguous stores. define void @masked_trunc_store_sv8i16_to_sv8i8( %val, *%base, %mask) nounwind { ; CHECK-LABEL: masked_trunc_store_sv8i16_to_sv8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: st1b { z0.h }, p0, [x0, #3, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 3 %trunc = trunc %val to call void @llvm.masked.store.nxv8i8( %trunc, *%base_load, i32 1, %mask) ret void } ; 16-lane contiguous load/stores. define void @test_masked_ldst_sv16i8( * %base, %mask) nounwind { ; CHECK-LABEL: test_masked_ldst_sv16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #6, mul vl] ; CHECK-NEXT: st1b { z0.b }, p0, [x0, #7, mul vl] ; CHECK-NEXT: ret %base_load = getelementptr , * %base, i64 6 %data = call @llvm.masked.load.nxv16i8(* %base_load, i32 1, %mask, undef) %base_store = getelementptr , * %base, i64 7 call void @llvm.masked.store.nxv16i8( %data, * %base_store, i32 1, %mask) ret void } ; 2-element contiguous loads. declare @llvm.masked.load.nxv2i8 (* , i32, , ) declare @llvm.masked.load.nxv2i16(*, i32, , ) declare @llvm.masked.load.nxv2i32(*, i32, , ) declare @llvm.masked.load.nxv2i64(*, i32, , ) declare @llvm.masked.load.nxv2f16(*, i32, , ) declare @llvm.masked.load.nxv2f32(*, i32, , ) declare @llvm.masked.load.nxv2f64(*, i32, , ) ; 4-element contiguous loads. declare @llvm.masked.load.nxv4i8 (* , i32, , ) declare @llvm.masked.load.nxv4i16(*, i32, , ) declare @llvm.masked.load.nxv4i32(*, i32, , ) declare @llvm.masked.load.nxv4f16(*, i32, , ) declare @llvm.masked.load.nxv4f32(*, i32, , ) ; 8-element contiguous loads. declare @llvm.masked.load.nxv8i8 (* , i32, , ) declare @llvm.masked.load.nxv8i16(*, i32, , ) declare @llvm.masked.load.nxv8f16(*, i32, , ) declare @llvm.masked.load.nxv8bf16(*, i32, , ) ; 16-element contiguous loads. declare @llvm.masked.load.nxv16i8(*, i32, , ) ; 2-element contiguous stores. declare void @llvm.masked.store.nxv2i8 ( , * , i32, ) declare void @llvm.masked.store.nxv2i16(, *, i32, ) declare void @llvm.masked.store.nxv2i32(, *, i32, ) declare void @llvm.masked.store.nxv2i64(, *, i32, ) declare void @llvm.masked.store.nxv2f16(, *, i32, ) declare void @llvm.masked.store.nxv2f32(, *, i32, ) declare void @llvm.masked.store.nxv2f64(, *, i32, ) ; 4-element contiguous stores. declare void @llvm.masked.store.nxv4i8 ( , * , i32, ) declare void @llvm.masked.store.nxv4i16(, *, i32, ) declare void @llvm.masked.store.nxv4i32(, *, i32, ) declare void @llvm.masked.store.nxv4f16(, *, i32, ) declare void @llvm.masked.store.nxv4f32(, *, i32, ) ; 8-element contiguous stores. declare void @llvm.masked.store.nxv8i8 ( , * , i32, ) declare void @llvm.masked.store.nxv8i16(, *, i32, ) declare void @llvm.masked.store.nxv8f16(, *, i32, ) declare void @llvm.masked.store.nxv8bf16(, *, i32, ) ; 16-element contiguous stores. declare void @llvm.masked.store.nxv16i8(, *, i32, ) ; +bf16 is required for the bfloat version. attributes #0 = { "target-features"="+sve,+bf16" }