# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=none -verify-machineinstrs %s -o - | FileCheck -check-prefixes=FULL,ALL %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=none -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefixes=SIMPLE,ALL %s --- # ALL-LABEL: name: kernel0 # FULL: machineFunctionInfo: # FULL-NEXT: explicitKernArgSize: 128 # FULL-NEXT: maxKernArgAlign: 64 # FULL-NEXT: ldsSize: 2048 # FULL-NEXT: gdsSize: 256 # FULL-NEXT: dynLDSAlign: 1 # FULL-NEXT: isEntryFunction: true # FULL-NEXT: noSignedZerosFPMath: false # FULL-NEXT: memoryBound: true # FULL-NEXT: waveLimiter: true # FULL-NEXT: hasSpilledSGPRs: false # FULL-NEXT: hasSpilledVGPRs: false # FULL-NEXT: scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11' # FULL-NEXT: frameOffsetReg: '$sgpr12' # FULL-NEXT: stackPtrOffsetReg: '$sgpr13' # FULL-NEXT: bytesInStackArgArea: 0 # FULL-NEXT: returnsVoid: true # FULL-NEXT: argumentInfo: # FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } # FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } # FULL-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } # FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } # FULL-NEXT: workGroupIDX: { reg: '$sgpr6' } # FULL-NEXT: workGroupIDY: { reg: '$sgpr13' } # FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' } # FULL-NEXT: LDSKernelId: { reg: '$sgpr15' } # FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' } # FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # FULL-NEXT: workItemIDX: { reg: '$vgpr0' } # FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } # FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true # FULL-NEXT: fp32-input-denormals: true # FULL-NEXT: fp32-output-denormals: true # FULL-NEXT: fp64-fp16-input-denormals: true # FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: occupancy: 10 # FULL-NEXT: vgprForAGPRCopy: '' # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: # SIMPLE-NEXT: explicitKernArgSize: 128 # SIMPLE-NEXT: maxKernArgAlign: 64 # SIMPLE-NEXT: ldsSize: 2048 # SIMPLE-NEXT: gdsSize: 256 # SIMPLE-NEXT: isEntryFunction: true # SIMPLE-NEXT: memoryBound: true # SIMPLE-NEXT: waveLimiter: true # SIMPLE-NEXT: scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11' # SIMPLE-NEXT: frameOffsetReg: '$sgpr12' # SIMPLE-NEXT: stackPtrOffsetReg: '$sgpr13' # SIMPLE-NEXT: argumentInfo: # SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } # SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } # SIMPLE-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } # SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } # SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr6' } # SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' } # SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' } # SIMPLE-NEXT: LDSKernelId: { reg: '$sgpr15' } # SIMPLE-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' } # SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # SIMPLE-NEXT: workItemIDX: { reg: '$vgpr0' } # SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } # SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } # SIMPLE-NEXT: occupancy: 10 # SIMPLE-NEXT: body: name: kernel0 machineFunctionInfo: explicitKernArgSize: 128 maxKernArgAlign: 64 ldsSize: 2048 gdsSize: 256 isEntryFunction: true noSignedZerosFPMath: false memoryBound: true waveLimiter: true scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11' frameOffsetReg: '$sgpr12' stackPtrOffsetReg: '$sgpr13' argumentInfo: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } workGroupIDX: { reg: '$sgpr6' } privateSegmentWaveByteOffset: { reg: '$sgpr7' } workItemIDX: { reg: '$vgpr0' } body: | bb.0: S_ENDPGM 0 ... # FIXME: Should be able to not print section for simple --- # ALL-LABEL: name: no_mfi # FULL: machineFunctionInfo: # FULL-NEXT: explicitKernArgSize: 0 # FULL-NEXT: maxKernArgAlign: 1 # FULL-NEXT: ldsSize: 0 # FULL-NEXT: gdsSize: 0 # FULL-NEXT: dynLDSAlign: 1 # FULL-NEXT: isEntryFunction: false # FULL-NEXT: noSignedZerosFPMath: false # FULL-NEXT: memoryBound: false # FULL-NEXT: waveLimiter: false # FULL-NEXT: hasSpilledSGPRs: false # FULL-NEXT: hasSpilledVGPRs: false # FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg' # FULL-NEXT: frameOffsetReg: '$fp_reg' # FULL-NEXT: stackPtrOffsetReg: '$sp_reg' # FULL-NEXT: bytesInStackArgArea: 0 # FULL-NEXT: returnsVoid: true # FULL-NEXT: argumentInfo: # FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } # FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } # FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } # FULL-NEXT: workGroupIDX: { reg: '$sgpr12' } # FULL-NEXT: workGroupIDY: { reg: '$sgpr13' } # FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' } # FULL-NEXT: LDSKernelId: { reg: '$sgpr15' } # FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } # FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true # FULL-NEXT: fp32-input-denormals: true # FULL-NEXT: fp32-output-denormals: true # FULL-NEXT: fp64-fp16-input-denormals: true # FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: occupancy: 10 # FULL-NEXT: vgprForAGPRCopy: '' # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: # SIMPLE-NEXT: maxKernArgAlign: 1 # SIMPLE-NEXT: argumentInfo: # SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } # SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } # SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } # SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' } # SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' } # SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' } # SIMPLE-NEXT: LDSKernelId: { reg: '$sgpr15' } # SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } # SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } # SIMPLE-NEXT: occupancy: 10 # SIMPLE-NEXT: body: name: no_mfi body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: empty_mfi # FULL: machineFunctionInfo: # FULL-NEXT: explicitKernArgSize: 0 # FULL-NEXT: maxKernArgAlign: 1 # FULL-NEXT: ldsSize: 0 # FULL-NEXT: gdsSize: 0 # FULL-NEXT: dynLDSAlign: 1 # FULL-NEXT: isEntryFunction: false # FULL-NEXT: noSignedZerosFPMath: false # FULL-NEXT: memoryBound: false # FULL-NEXT: waveLimiter: false # FULL-NEXT: hasSpilledSGPRs: false # FULL-NEXT: hasSpilledVGPRs: false # FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg' # FULL-NEXT: frameOffsetReg: '$fp_reg' # FULL-NEXT: stackPtrOffsetReg: '$sp_reg' # FULL-NEXT: bytesInStackArgArea: 0 # FULL-NEXT: returnsVoid: true # FULL-NEXT: argumentInfo: # FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } # FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } # FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } # FULL-NEXT: workGroupIDX: { reg: '$sgpr12' } # FULL-NEXT: workGroupIDY: { reg: '$sgpr13' } # FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' } # FULL-NEXT: LDSKernelId: { reg: '$sgpr15' } # FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } # FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true # FULL-NEXT: fp32-input-denormals: true # FULL-NEXT: fp32-output-denormals: true # FULL-NEXT: fp64-fp16-input-denormals: true # FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: occupancy: 10 # FULL-NEXT: vgprForAGPRCopy: '' # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: # SIMPLE-NEXT: maxKernArgAlign: 1 # SIMPLE-NEXT: argumentInfo: # SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } # SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } # SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } # SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' } # SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' } # SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' } # SIMPLE-NEXT: LDSKernelId: { reg: '$sgpr15' } # SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } # SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } # SIMPLE-NEXT: occupancy: 10 # SIMPLE-NEXT: body: name: empty_mfi machineFunctionInfo: body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: empty_mfi_entry_func # FULL: machineFunctionInfo: # FULL-NEXT: explicitKernArgSize: 0 # FULL-NEXT: maxKernArgAlign: 1 # FULL-NEXT: ldsSize: 0 # FULL-NEXT: gdsSize: 0 # FULL-NEXT: dynLDSAlign: 1 # FULL-NEXT: isEntryFunction: true # FULL-NEXT: noSignedZerosFPMath: false # FULL-NEXT: memoryBound: false # FULL-NEXT: waveLimiter: false # FULL-NEXT: hasSpilledSGPRs: false # FULL-NEXT: hasSpilledVGPRs: false # FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg' # FULL-NEXT: frameOffsetReg: '$fp_reg' # FULL-NEXT: stackPtrOffsetReg: '$sp_reg' # FULL-NEXT: bytesInStackArgArea: 0 # FULL-NEXT: returnsVoid: true # FULL-NEXT: argumentInfo: # FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } # FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } # FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } # FULL-NEXT: workGroupIDX: { reg: '$sgpr12' } # FULL-NEXT: workGroupIDY: { reg: '$sgpr13' } # FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' } # FULL-NEXT: LDSKernelId: { reg: '$sgpr15' } # FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } # FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true # FULL-NEXT: fp32-input-denormals: true # FULL-NEXT: fp32-output-denormals: true # FULL-NEXT: fp64-fp16-input-denormals: true # FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: occupancy: 10 # FULL-NEXT: vgprForAGPRCopy: '' # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: # SIMPLE-NEXT: maxKernArgAlign: 1 # SIMPLE-NEXT: isEntryFunction: true # SIMPLE-NEXT: argumentInfo: # SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } # SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } # SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } # SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' } # SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' } # SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' } # SIMPLE-NEXT: LDSKernelId: { reg: '$sgpr15' } # SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 } # SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } # SIMPLE-NEXT: occupancy: 10 # SIMPLE-NEXT: body: name: empty_mfi_entry_func machineFunctionInfo: isEntryFunction: true body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: default_regs_mfi # FULL: scratchRSrcReg: '$private_rsrc_reg' # FULL-NEXT: frameOffsetReg: '$fp_reg' # FULL-NEXT: stackPtrOffsetReg: '$sp_reg' # SIMPLE-NOT: scratchRSrcReg # SIMPLE-NOT:: stackPtrOffsetReg name: default_regs_mfi machineFunctionInfo: scratchRSrcReg: '$private_rsrc_reg' body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: fake_stack_arginfo # FULL: argumentInfo: # FULL: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # FULL: flatScratchInit: { offset: 4 } # FULL: workItemIDY: { reg: '$vgpr0', mask: 65280 } # SIMPLE: argumentInfo: # SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' } # SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' } # SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' } # SIMPLE-NEXT: flatScratchInit: { offset: 4 } # SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' } # SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' } # SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' } # SIMPLE-NEXT: LDSKernelId: { reg: '$sgpr15' } # SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' } # SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 } # SIMPLE-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 } # SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 } name: fake_stack_arginfo machineFunctionInfo: argumentInfo: flatScratchInit: { offset: 4 } workItemIDY: { reg: '$vgpr0' , mask: 0xff00 } body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: parse_mode # ALL: mode: # ALL-NEXT: ieee: false # ALL-NEXT: dx10-clamp: false # ALL-NEXT: fp32-input-denormals: false # ALL-NEXT: fp32-output-denormals: false # ALL-NEXT: fp64-fp16-input-denormals: false # ALL-NEXT: fp64-fp16-output-denormals: false name: parse_mode machineFunctionInfo: mode: ieee: false dx10-clamp: false fp32-input-denormals: false fp32-output-denormals: false fp64-fp16-input-denormals: false fp64-fp16-output-denormals: false body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: parse_spilled_regs # ALL: machineFunctionInfo: # ALL: hasSpilledSGPRs: true # ALL-NEXT: hasSpilledVGPRs: true name: parse_spilled_regs machineFunctionInfo: hasSpilledSGPRs: true hasSpilledVGPRs: true body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: dyn_lds_with_alignment # FULL: ldsSize: 0 # FULL-NEXT: gdsSize: 0 # FULL-NEXT: dynLDSAlign: 8 # SIMPLE: dynLDSAlign: 8 name: dyn_lds_with_alignment machineFunctionInfo: dynLDSAlign: 8 body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: occupancy_0 # ALL: occupancy: 10 name: occupancy_0 machineFunctionInfo: occupancy: 0 body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: occupancy_3 # ALL: occupancy: 3 name: occupancy_3 machineFunctionInfo: occupancy: 3 body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: scavenge_fi # ALL: scavengeFI: '%stack.0' name: scavenge_fi stack: - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } machineFunctionInfo: scavengeFI: '%stack.0' body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: bytes_in_stack_arg_area # ALL: bytesInStackArgArea: 444 name: bytes_in_stack_arg_area machineFunctionInfo: bytesInStackArgArea: 444 body: | bb.0: SI_RETURN ... --- # ALL-LABEL: name: returns_void_true # FULL: returnsVoid: true # SIMPLE-NOT: returnsVoid name: returns_void_true machineFunctionInfo: returnsVoid: true body: | bb.0: SI_RETURN ... --- # ALL-LABEL: name: returns_void_false # ALL: returnsVoid: false name: returns_void_false machineFunctionInfo: returnsVoid: false body: | bb.0: SI_RETURN ... --- # ALL-LABEL: name: vgpr_for_agpr_copy # ALL: vgprForAGPRCopy: '$vgpr2' name: vgpr_for_agpr_copy machineFunctionInfo: vgprForAGPRCopy: '$vgpr2' body: | bb.0: SI_RETURN ... --- # ALL-LABEL: name: vgpr_for_agpr_copy_noreg # FULL: vgprForAGPRCopy: '' # SIMPLE-NOT: vgprForAGPRCopy name: vgpr_for_agpr_copy_noreg machineFunctionInfo: vgprForAGPRCopy: '$noreg' body: | bb.0: SI_RETURN ...