# RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s # RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck --check-prefix=GCN %s --- # Trivial clause at beginning of program name: trivial_smem_clause_load_smrd4_x1 body: | bb.0: ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 S_ENDPGM 0 ... --- # Trivial clause at beginning of program name: trivial_smem_clause_load_smrd4_x2 body: | bb.0: ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- # Trivial clause at beginning of program name: trivial_smem_clause_load_smrd4_x3 body: | bb.0: ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x3 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0 $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0 S_ENDPGM 0 ... --- # Trivial clause at beginning of program name: trivial_smem_clause_load_smrd4_x4 body: | bb.0: ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x4 ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0 ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0 $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0 $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0 S_ENDPGM 0 ... --- # Reuse of same input pointer is OK name: trivial_smem_clause_load_smrd4_x2_sameptr body: | bb.0: ; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr ; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 S_ENDPGM 0 ... --- # 32-bit load partially clobbers its own ptr reg name: smrd_load4_overwrite_ptr_lo body: | bb.0: ; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 S_ENDPGM 0 ... --- # 32-bit load partially clobbers its own ptr reg name: smrd_load4_overwrite_ptr_hi body: | bb.0: ; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi ; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 S_ENDPGM 0 ... --- # 64-bit load clobbers its own ptr reg name: smrd_load8_overwrite_ptr body: | bb.0: ; GCN-LABEL: name: smrd_load8_overwrite_ptr ; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 S_ENDPGM 0 ... --- # vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt # breaks the clause. name: break_smem_clause_at_max_smem_clause_size_smrd_load4 body: | bb.0: ; GCN-LABEL: name: break_smem_clause_at_max_smem_clause_size_smrd_load4 ; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0 ; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28 ; GCN-NEXT: S_ENDPGM 0 $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0 $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28 S_ENDPGM 0 ... --- name: break_smem_clause_simple_load_smrd4_lo_ptr body: | bb.0: ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_lo_ptr ; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- name: break_smem_clause_simple_load_smrd4_hi_ptr body: | bb.0: ; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- name: break_smem_clause_simple_load_smrd8_ptr body: | bb.0: ; GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- name: break_smem_clause_simple_load_smrd16_ptr body: | bb.0: ; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0 S_ENDPGM 0 ... --- name: break_smem_clause_block_boundary_load_smrd8_ptr body: | ; GCN-LABEL: name: break_smem_clause_block_boundary_load_smrd8_ptr ; GCN: bb.0: ; GCN: successors: %bb.1(0x80000000) ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN: bb.1: ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 bb.0: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0 bb.1: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- # The load clobbers the pointer of the store, so it needs to break. name: break_smem_clause_store_load_into_ptr_smrd4 body: | bb.0: ; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4 ; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0 $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0 S_ENDPGM 0 ... --- # The load clobbers the data of the store, so it needs to break. # FIXME: Would it be better to s_nop and wait later? name: break_smem_clause_store_load_into_data_smrd4 body: | bb.0: ; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4 ; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0 $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- # Regular VALU instruction breaks clause, no nop needed name: valu_inst_breaks_smem_clause body: | bb.0: ; GCN-LABEL: name: valu_inst_breaks_smem_clause ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $vgpr8 = V_MOV_B32_e32 0, implicit $exec $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- # Regular SALU instruction breaks clause, no nop needed name: salu_inst_breaks_smem_clause body: | bb.0: ; GCN-LABEL: name: salu_inst_breaks_smem_clause ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $sgpr8 = S_MOV_B32 0 ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $sgpr8 = S_MOV_B32 0 $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- name: ds_inst_breaks_smem_clause body: | bb.0: ; GCN-LABEL: name: ds_inst_breaks_smem_clause ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- name: flat_inst_breaks_smem_clause body: | bb.0: ; GCN-LABEL: name: flat_inst_breaks_smem_clause ; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 ; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr ; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0 S_ENDPGM 0 ... --- # FIXME: Should this be handled? name: implicit_use_breaks_smem_clause body: | bb.0: ; GCN-LABEL: name: implicit_use_breaks_smem_clause ; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13 ; XNACK-NEXT: S_NOP 0 ; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0 ; GCN-NEXT: S_ENDPGM 0 $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13 $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0 S_ENDPGM 0 ...