llvm-for-llvmta/test/CodeGen/AMDGPU/break-smem-soft-clauses.mir

352 lines
12 KiB
YAML

# RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s
# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck --check-prefix=GCN %s
---
# Trivial clause at beginning of program
name: trivial_smem_clause_load_smrd4_x1
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x1
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
S_ENDPGM 0
...
---
# Trivial clause at beginning of program
name: trivial_smem_clause_load_smrd4_x2
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr1 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
# Trivial clause at beginning of program
name: trivial_smem_clause_load_smrd4_x3
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x3
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
$sgpr1 = S_LOAD_DWORD_IMM $sgpr6_sgpr7, 0, 0, 0
$sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
S_ENDPGM 0
...
---
# Trivial clause at beginning of program
name: trivial_smem_clause_load_smrd4_x4
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x4
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
$sgpr1 = S_LOAD_DWORD_IMM $sgpr8_sgpr9, 0, 0, 0
$sgpr2 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
$sgpr3 = S_LOAD_DWORD_IMM $sgpr16_sgpr17, 0, 0, 0
S_ENDPGM 0
...
---
# Reuse of same input pointer is OK
name: trivial_smem_clause_load_smrd4_x2_sameptr
body: |
bb.0:
; GCN-LABEL: name: trivial_smem_clause_load_smrd4_x2_sameptr
; GCN: $sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr12 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
S_ENDPGM 0
...
---
# 32-bit load partially clobbers its own ptr reg
name: smrd_load4_overwrite_ptr_lo
body: |
bb.0:
; GCN-LABEL: name: smrd_load4_overwrite_ptr_lo
; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
S_ENDPGM 0
...
---
# 32-bit load partially clobbers its own ptr reg
name: smrd_load4_overwrite_ptr_hi
body: |
bb.0:
; GCN-LABEL: name: smrd_load4_overwrite_ptr_hi
; GCN: $sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr11 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
S_ENDPGM 0
...
---
# 64-bit load clobbers its own ptr reg
name: smrd_load8_overwrite_ptr
body: |
bb.0:
; GCN-LABEL: name: smrd_load8_overwrite_ptr
; GCN: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
S_ENDPGM 0
...
---
# vmcnt has 4 bits, so maximum 16 outstanding loads. The waitcnt
# breaks the clause.
name: break_smem_clause_at_max_smem_clause_size_smrd_load4
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_at_max_smem_clause_size_smrd_load4
; GCN: $sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0
; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
; GCN-NEXT: S_ENDPGM 0
$sgpr13 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr14 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr15 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr16 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr17 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr18 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr19 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr20 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr21 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr22 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr23 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr24 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr25 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr26 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr27 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr28 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr30_sgpr31, 0, 0, 0
$sgpr0 = S_MOV_B32 $sgpr0, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28
S_ENDPGM 0
...
---
name: break_smem_clause_simple_load_smrd4_lo_ptr
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_lo_ptr
; GCN: $sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr10 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr12 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
name: break_smem_clause_simple_load_smrd4_hi_ptr
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd4_hi_ptr
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr3 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
name: break_smem_clause_simple_load_smrd8_ptr
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd8_ptr
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
name: break_smem_clause_simple_load_smrd16_ptr
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_simple_load_smrd16_ptr
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr12_sgpr13_sgpr14_sgpr15 = S_LOAD_DWORDX4_IMM $sgpr6_sgpr7, 0, 0, 0
S_ENDPGM 0
...
---
name: break_smem_clause_block_boundary_load_smrd8_ptr
body: |
; GCN-LABEL: name: break_smem_clause_block_boundary_load_smrd8_ptr
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN: bb.1:
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
bb.0:
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
bb.1:
$sgpr10_sgpr11 = S_LOAD_DWORDX2_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
# The load clobbers the pointer of the store, so it needs to break.
name: break_smem_clause_store_load_into_ptr_smrd4
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_store_load_into_ptr_smrd4
; GCN: S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
S_STORE_DWORD_IMM $sgpr16, $sgpr10_sgpr11, 0, 0, 0
$sgpr12 = S_LOAD_DWORD_IMM $sgpr14_sgpr15, 0, 0, 0
S_ENDPGM 0
...
---
# The load clobbers the data of the store, so it needs to break.
# FIXME: Would it be better to s_nop and wait later?
name: break_smem_clause_store_load_into_data_smrd4
body: |
bb.0:
; GCN-LABEL: name: break_smem_clause_store_load_into_data_smrd4
; GCN: S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
S_STORE_DWORD_IMM $sgpr8, $sgpr10_sgpr11, 0, 0, 0
$sgpr8 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
# Regular VALU instruction breaks clause, no nop needed
name: valu_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: valu_inst_breaks_smem_clause
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $vgpr8 = V_MOV_B32_e32 0, implicit $exec
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$vgpr8 = V_MOV_B32_e32 0, implicit $exec
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
# Regular SALU instruction breaks clause, no nop needed
name: salu_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: salu_inst_breaks_smem_clause
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $sgpr8 = S_MOV_B32 0
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$sgpr8 = S_MOV_B32 0
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
name: ds_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: ds_inst_breaks_smem_clause
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$vgpr8 = DS_READ_B32 $vgpr9, 0, 0, implicit $m0, implicit $exec
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
name: flat_inst_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: flat_inst_breaks_smem_clause
; GCN: $sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
; GCN-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GCN-NEXT: $sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0 = S_LOAD_DWORD_IMM $sgpr10_sgpr11, 0, 0, 0
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
$sgpr2 = S_LOAD_DWORD_IMM $sgpr12_sgpr13, 0, 0, 0
S_ENDPGM 0
...
---
# FIXME: Should this be handled?
name: implicit_use_breaks_smem_clause
body: |
bb.0:
; GCN-LABEL: name: implicit_use_breaks_smem_clause
; GCN: $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13
; XNACK-NEXT: S_NOP 0
; GCN-NEXT: $sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0
; GCN-NEXT: S_ENDPGM 0
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0, implicit $sgpr12_sgpr13
$sgpr12_sgpr13 = S_LOAD_DWORDX2_IMM $sgpr6_sgpr7, 0, 0, 0
S_ENDPGM 0
...