; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s ; We can often fold an ADDI into the offset of load/store instructions: ; (load (addi base, off1), off2) -> (load base, off1+off2) ; (store val, (addi base, off1), off2) -> (store val, base, off1+off2) ; This is possible when the off1+off2 continues to fit the 12-bit immediate. ; Check if we do the fold under various conditions. If off1 is (the low part of) ; an address the fold's safety depends on the variable's alignment. @g_0 = dso_local global i64 0 @g_1 = dso_local global i64 0, align 1 @g_2 = dso_local global i64 0, align 2 @g_4 = dso_local global i64 0, align 4 @g_8 = dso_local global i64 0, align 8 @g_16 = dso_local global i64 0, align 16 define dso_local i64 @load_g_0() nounwind { ; RV32I-LABEL: load_g_0: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a1, %hi(g_0) ; RV32I-NEXT: lw a0, %lo(g_0)(a1) ; RV32I-NEXT: lw a1, %lo(g_0+4)(a1) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_g_0: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: lui a0, %hi(g_0) ; RV64I-NEXT: ld a0, %lo(g_0)(a0) ; RV64I-NEXT: ret entry: %0 = load i64, i64* @g_0 ret i64 %0 } define dso_local i64 @load_g_1() nounwind { ; RV32I-LABEL: load_g_1: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a1, %hi(g_1) ; RV32I-NEXT: lw a0, %lo(g_1)(a1) ; RV32I-NEXT: addi a1, a1, %lo(g_1) ; RV32I-NEXT: lw a1, 4(a1) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_g_1: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: lui a0, %hi(g_1) ; RV64I-NEXT: ld a0, %lo(g_1)(a0) ; RV64I-NEXT: ret entry: %0 = load i64, i64* @g_1 ret i64 %0 } define dso_local i64 @load_g_2() nounwind { ; RV32I-LABEL: load_g_2: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a1, %hi(g_2) ; RV32I-NEXT: lw a0, %lo(g_2)(a1) ; RV32I-NEXT: addi a1, a1, %lo(g_2) ; RV32I-NEXT: lw a1, 4(a1) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_g_2: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: lui a0, %hi(g_2) ; RV64I-NEXT: ld a0, %lo(g_2)(a0) ; RV64I-NEXT: ret entry: %0 = load i64, i64* @g_2 ret i64 %0 } define dso_local i64 @load_g_4() nounwind { ; RV32I-LABEL: load_g_4: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a1, %hi(g_4) ; RV32I-NEXT: lw a0, %lo(g_4)(a1) ; RV32I-NEXT: addi a1, a1, %lo(g_4) ; RV32I-NEXT: lw a1, 4(a1) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_g_4: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: lui a0, %hi(g_4) ; RV64I-NEXT: ld a0, %lo(g_4)(a0) ; RV64I-NEXT: ret entry: %0 = load i64, i64* @g_4 ret i64 %0 } define dso_local i64 @load_g_8() nounwind { ; RV32I-LABEL: load_g_8: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a1, %hi(g_8) ; RV32I-NEXT: lw a0, %lo(g_8)(a1) ; RV32I-NEXT: lw a1, %lo(g_8+4)(a1) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_g_8: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: lui a0, %hi(g_8) ; RV64I-NEXT: ld a0, %lo(g_8)(a0) ; RV64I-NEXT: ret entry: %0 = load i64, i64* @g_8 ret i64 %0 } define dso_local i64 @load_g_16() nounwind { ; RV32I-LABEL: load_g_16: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a1, %hi(g_16) ; RV32I-NEXT: lw a0, %lo(g_16)(a1) ; RV32I-NEXT: lw a1, %lo(g_16+4)(a1) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_g_16: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: lui a0, %hi(g_16) ; RV64I-NEXT: ld a0, %lo(g_16)(a0) ; RV64I-NEXT: ret entry: %0 = load i64, i64* @g_16 ret i64 %0 } define dso_local void @store_g_4() nounwind { ; RV32I-LABEL: store_g_4: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a0, %hi(g_4) ; RV32I-NEXT: sw zero, %lo(g_4)(a0) ; RV32I-NEXT: addi a0, a0, %lo(g_4) ; RV32I-NEXT: sw zero, 4(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: store_g_4: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: lui a0, %hi(g_4) ; RV64I-NEXT: sd zero, %lo(g_4)(a0) ; RV64I-NEXT: ret entry: store i64 0, i64* @g_4 ret void } define dso_local void @store_g_8() nounwind { ; RV32I-LABEL: store_g_8: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a0, %hi(g_8) ; RV32I-NEXT: sw zero, %lo(g_8+4)(a0) ; RV32I-NEXT: sw zero, %lo(g_8)(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: store_g_8: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: lui a0, %hi(g_8) ; RV64I-NEXT: sd zero, %lo(g_8)(a0) ; RV64I-NEXT: ret entry: store i64 0, i64* @g_8 ret void } ; Check if we can fold ADDI into the offset of store instructions, ; when store instructions is the root node in DAG. @g_4_i32 = global i32 0, align 4 define dso_local void @inc_g_i32() nounwind { ; RV32I-LABEL: inc_g_i32: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a0, %hi(g_4_i32) ; RV32I-NEXT: lw a1, %lo(g_4_i32)(a0) ; RV32I-NEXT: addi a1, a1, 1 ; RV32I-NEXT: sw a1, %lo(g_4_i32)(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: inc_g_i32: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: lui a0, %hi(g_4_i32) ; RV64I-NEXT: lw a1, %lo(g_4_i32)(a0) ; RV64I-NEXT: addiw a1, a1, 1 ; RV64I-NEXT: sw a1, %lo(g_4_i32)(a0) ; RV64I-NEXT: ret entry: %0 = load i32, i32* @g_4_i32 %inc = add i32 %0, 1 store i32 %inc, i32* @g_4_i32 br label %if.end if.end: ret void } ; Check for folds in accesses to the second element of an i64 array. @ga_8 = dso_local local_unnamed_addr global [2 x i64] zeroinitializer, align 8 @ga_16 = dso_local local_unnamed_addr global [2 x i64] zeroinitializer, align 16 define dso_local i64 @load_ga_8() nounwind { ; RV32I-LABEL: load_ga_8: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a0, %hi(ga_8) ; RV32I-NEXT: addi a1, a0, %lo(ga_8) ; RV32I-NEXT: lw a0, 8(a1) ; RV32I-NEXT: lw a1, 12(a1) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_ga_8: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: lui a0, %hi(ga_8+8) ; RV64I-NEXT: ld a0, %lo(ga_8+8)(a0) ; RV64I-NEXT: ret entry: %0 = load i64, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @ga_8, i32 0, i32 1) ret i64 %0 } define dso_local i64 @load_ga_16() nounwind { ; RV32I-LABEL: load_ga_16: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a1, %hi(ga_16) ; RV32I-NEXT: lw a0, %lo(ga_16+8)(a1) ; RV32I-NEXT: lw a1, %lo(ga_16+12)(a1) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_ga_16: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: lui a0, %hi(ga_16) ; RV64I-NEXT: ld a0, %lo(ga_16+8)(a0) ; RV64I-NEXT: ret entry: %0 = load i64, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @ga_16, i32 0, i32 1) ret i64 %0 } ; Check for folds in accesses to thread-local variables. @tl_4 = dso_local thread_local global i64 0, align 4 @tl_8 = dso_local thread_local global i64 0, align 8 define dso_local i64 @load_tl_4() nounwind { ; RV32I-LABEL: load_tl_4: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a0, %tprel_hi(tl_4) ; RV32I-NEXT: add a1, a0, tp, %tprel_add(tl_4) ; RV32I-NEXT: lw a0, %tprel_lo(tl_4)(a1) ; RV32I-NEXT: addi a1, a1, %tprel_lo(tl_4) ; RV32I-NEXT: lw a1, 4(a1) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_tl_4: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: lui a0, %tprel_hi(tl_4) ; RV64I-NEXT: add a0, a0, tp, %tprel_add(tl_4) ; RV64I-NEXT: ld a0, %tprel_lo(tl_4)(a0) ; RV64I-NEXT: ret entry: %0 = load i64, i64* @tl_4 ret i64 %0 } define dso_local i64 @load_tl_8() nounwind { ; RV32I-LABEL: load_tl_8: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a0, %tprel_hi(tl_8) ; RV32I-NEXT: add a1, a0, tp, %tprel_add(tl_8) ; RV32I-NEXT: lw a0, %tprel_lo(tl_8)(a1) ; RV32I-NEXT: lw a1, %tprel_lo(tl_8+4)(a1) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_tl_8: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: lui a0, %tprel_hi(tl_8) ; RV64I-NEXT: add a0, a0, tp, %tprel_add(tl_8) ; RV64I-NEXT: ld a0, %tprel_lo(tl_8)(a0) ; RV64I-NEXT: ret entry: %0 = load i64, i64* @tl_8 ret i64 %0 } define dso_local i64 @load_const_ok() nounwind { ; RV32I-LABEL: load_const_ok: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lw a0, 2040(zero) ; RV32I-NEXT: lw a1, 2044(zero) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_const_ok: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: ld a0, 2040(zero) ; RV64I-NEXT: ret entry: %0 = load i64, i64* inttoptr (i32 2040 to i64*) ret i64 %0 } define dso_local i64 @load_cost_overflow() nounwind { ; RV32I-LABEL: load_cost_overflow: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: lw a1, -2048(a0) ; RV32I-NEXT: lw a0, 2044(zero) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_cost_overflow: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: ld a0, 2044(zero) ; RV64I-NEXT: ret entry: %0 = load i64, i64* inttoptr (i64 2044 to i64*) ret i64 %0 } define dso_local i32 @load_const_medium() nounwind { ; RV32I-LABEL: load_const_medium: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: lw a0, -16(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_const_medium: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: lw a0, -16(a0) ; RV64I-NEXT: ret entry: %0 = load i32, i32* inttoptr (i64 4080 to i32*) ret i32 %0 } ; The constant here is 0x7ffff800, this value requires LUI+ADDIW on RV64, ; LUI+ADDI would produce a different constant so we can't fold into the load ; offset. define dso_local i32 @load_const_large() nounwind { ; RV32I-LABEL: load_const_large: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a0, 524288 ; RV32I-NEXT: lw a0, -2048(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_const_large: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: lui a0, 524288 ; RV64I-NEXT: addiw a0, a0, -2048 ; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: ret entry: %0 = load i32, i32* inttoptr (i64 2147481600 to i32*) ret i32 %0 }