llvm-project/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
; RUN:   | FileCheck %s -check-prefix=RV32I

; TODO: lbu and lhu should be selected to avoid the unnecessary masking.

@bytes = global [5 x i8] zeroinitializer, align 1

define i32 @test_zext_i8() {
; RV32I-LABEL: test_zext_i8:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a0, %hi(bytes)
; RV32I-NEXT:    lbu a1, %lo(bytes)(a0)
; RV32I-NEXT:    addi a2, zero, 136
; RV32I-NEXT:    bne a1, a2, .LBB0_3
; RV32I-NEXT:  # %bb.1: # %entry
; RV32I-NEXT:    addi a0, a0, %lo(bytes)
; RV32I-NEXT:    lbu a0, 1(a0)
; RV32I-NEXT:    addi a1, zero, 7
; RV32I-NEXT:    bne a0, a1, .LBB0_3
; RV32I-NEXT:  # %bb.2: # %if.end
; RV32I-NEXT:    mv a0, zero
; RV32I-NEXT:    ret
; RV32I-NEXT:  .LBB0_3: # %if.then
; RV32I-NEXT:    addi a0, zero, 1
; RV32I-NEXT:    ret
entry:
  %0 = load i8, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @bytes, i32 0, i32 0), align 1
  %cmp = icmp eq i8 %0, -120
  %1 = load i8, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @bytes, i32 0, i32 1), align 1
  %cmp3 = icmp eq i8 %1, 7
  %or.cond = and i1 %cmp, %cmp3
  br i1 %or.cond, label %if.end, label %if.then

if.then:
  ret i32 1

if.end:
  ret i32 0
}

@shorts = global [5 x i16] zeroinitializer, align 2

define i32 @test_zext_i16() {
; RV32I-LABEL: test_zext_i16:
; RV32I:       # %bb.0: # %entry
; RV32I-NEXT:    lui a0, %hi(shorts)
; RV32I-NEXT:    lui a1, 16
; RV32I-NEXT:    addi a1, a1, -120
; RV32I-NEXT:    lhu a2, %lo(shorts)(a0)
; RV32I-NEXT:    bne a2, a1, .LBB1_3
; RV32I-NEXT:  # %bb.1: # %entry
; RV32I-NEXT:    addi a0, a0, %lo(shorts)
; RV32I-NEXT:    lhu a0, 2(a0)
; RV32I-NEXT:    addi a1, zero, 7
; RV32I-NEXT:    bne a0, a1, .LBB1_3
; RV32I-NEXT:  # %bb.2: # %if.end
; RV32I-NEXT:    mv a0, zero
; RV32I-NEXT:    ret
; RV32I-NEXT:  .LBB1_3: # %if.then
; RV32I-NEXT:    addi a0, zero, 1
; RV32I-NEXT:    ret
entry:
  %0 = load i16, i16* getelementptr inbounds ([5 x i16], [5 x i16]* @shorts, i32 0, i32 0), align 2
  %cmp = icmp eq i16 %0, -120
  %1 = load i16, i16* getelementptr inbounds ([5 x i16], [5 x i16]* @shorts, i32 0, i32 1), align 2
  %cmp3 = icmp eq i16 %1, 7
  %or.cond = and i1 %cmp, %cmp3
  br i1 %or.cond, label %if.end, label %if.then

if.then:
  ret i32 1

if.end:
  ret i32 0
}
[RISCV] Add test case showing suboptimal codegen when loading unsigned char/short Implementing isZextFree will allow lbu or lhu to be selected rather than lb+mask and lh+mask. llvm-svn: 330942 2018-04-26 22:00:35 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
			`; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \`
			`; RUN: \| FileCheck %s -check-prefix=RV32I`

			`; TODO: lbu and lhu should be selected to avoid the unnecessary masking.`

			`@bytes = global [5 x i8] zeroinitializer, align 1`

			`define i32 @test_zext_i8() {`
			`; RV32I-LABEL: test_zext_i8:`
			`; RV32I: # %bb.0: # %entry`
			`; RV32I-NEXT: lui a0, %hi(bytes)`
[RISCV] Separate base from offset in lowerGlobalAddress Summary: When lowering global address, lower the base as a TargetGlobal first then create an SDNode for the offset separately and chain it to the address calculation This optimization will create a DAG where the base address of a global access will be reused between different access. The offset can later be folded into the immediate part of the memory access instruction. With this optimization we generate: lui a0, %hi(s) addi a0, a0, %lo(s) ; shared base address. addi a1, zero, 20 ; 2 instructions per access. sw a1, 44(a0) addi a1, zero, 10 sw a1, 8(a0) addi a1, zero, 30 sw a1, 80(a0) Instead of: lui a0, %hi(s+44) ; 3 instructions per access. addi a1, zero, 20 sw a1, %lo(s+44)(a0) lui a0, %hi(s+8) addi a1, zero, 10 sw a1, %lo(s+8)(a0) lui a0, %hi(s+80) addi a1, zero, 30 sw a1, %lo(s+80)(a0) Which will save one instruction per access. Reviewers: asb, apazos Reviewed By: asb Subscribers: rbar, johnrusso, simoncook, jordy.potman.lists, niosHD, kito-cheng, shiva0217, zzheng, edward-jones, mgrang, apazos, asb, llvm-commits Differential Revision: https://reviews.llvm.org/D46989 llvm-svn: 332641 2018-05-18 02:14:53 +08:00			`; RV32I-NEXT: lbu a1, %lo(bytes)(a0)`
			`; RV32I-NEXT: addi a2, zero, 136`
			`; RV32I-NEXT: bne a1, a2, .LBB0_3`
[RISCV] Add test case showing suboptimal codegen when loading unsigned char/short Implementing isZextFree will allow lbu or lhu to be selected rather than lb+mask and lh+mask. llvm-svn: 330942 2018-04-26 22:00:35 +08:00			`; RV32I-NEXT: # %bb.1: # %entry`
[RISCV] Separate base from offset in lowerGlobalAddress Summary: When lowering global address, lower the base as a TargetGlobal first then create an SDNode for the offset separately and chain it to the address calculation This optimization will create a DAG where the base address of a global access will be reused between different access. The offset can later be folded into the immediate part of the memory access instruction. With this optimization we generate: lui a0, %hi(s) addi a0, a0, %lo(s) ; shared base address. addi a1, zero, 20 ; 2 instructions per access. sw a1, 44(a0) addi a1, zero, 10 sw a1, 8(a0) addi a1, zero, 30 sw a1, 80(a0) Instead of: lui a0, %hi(s+44) ; 3 instructions per access. addi a1, zero, 20 sw a1, %lo(s+44)(a0) lui a0, %hi(s+8) addi a1, zero, 10 sw a1, %lo(s+8)(a0) lui a0, %hi(s+80) addi a1, zero, 30 sw a1, %lo(s+80)(a0) Which will save one instruction per access. Reviewers: asb, apazos Reviewed By: asb Subscribers: rbar, johnrusso, simoncook, jordy.potman.lists, niosHD, kito-cheng, shiva0217, zzheng, edward-jones, mgrang, apazos, asb, llvm-commits Differential Revision: https://reviews.llvm.org/D46989 llvm-svn: 332641 2018-05-18 02:14:53 +08:00			`; RV32I-NEXT: addi a0, a0, %lo(bytes)`
			`; RV32I-NEXT: lbu a0, 1(a0)`
[RISCV] Add test case showing suboptimal codegen when loading unsigned char/short Implementing isZextFree will allow lbu or lhu to be selected rather than lb+mask and lh+mask. llvm-svn: 330942 2018-04-26 22:00:35 +08:00			`; RV32I-NEXT: addi a1, zero, 7`
			`; RV32I-NEXT: bne a0, a1, .LBB0_3`
			`; RV32I-NEXT: # %bb.2: # %if.end`
			`; RV32I-NEXT: mv a0, zero`
			`; RV32I-NEXT: ret`
			`; RV32I-NEXT: .LBB0_3: # %if.then`
			`; RV32I-NEXT: addi a0, zero, 1`
			`; RV32I-NEXT: ret`
			`entry:`
			`%0 = load i8, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @bytes, i32 0, i32 0), align 1`
			`%cmp = icmp eq i8 %0, -120`
			`%1 = load i8, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @bytes, i32 0, i32 1), align 1`
			`%cmp3 = icmp eq i8 %1, 7`
			`%or.cond = and i1 %cmp, %cmp3`
			`br i1 %or.cond, label %if.end, label %if.then`

			`if.then:`
			`ret i32 1`

			`if.end:`
			`ret i32 0`
			`}`

			`@shorts = global [5 x i16] zeroinitializer, align 2`

			`define i32 @test_zext_i16() {`
			`; RV32I-LABEL: test_zext_i16:`
			`; RV32I: # %bb.0: # %entry`
[RISCV] Separate base from offset in lowerGlobalAddress Summary: When lowering global address, lower the base as a TargetGlobal first then create an SDNode for the offset separately and chain it to the address calculation This optimization will create a DAG where the base address of a global access will be reused between different access. The offset can later be folded into the immediate part of the memory access instruction. With this optimization we generate: lui a0, %hi(s) addi a0, a0, %lo(s) ; shared base address. addi a1, zero, 20 ; 2 instructions per access. sw a1, 44(a0) addi a1, zero, 10 sw a1, 8(a0) addi a1, zero, 30 sw a1, 80(a0) Instead of: lui a0, %hi(s+44) ; 3 instructions per access. addi a1, zero, 20 sw a1, %lo(s+44)(a0) lui a0, %hi(s+8) addi a1, zero, 10 sw a1, %lo(s+8)(a0) lui a0, %hi(s+80) addi a1, zero, 30 sw a1, %lo(s+80)(a0) Which will save one instruction per access. Reviewers: asb, apazos Reviewed By: asb Subscribers: rbar, johnrusso, simoncook, jordy.potman.lists, niosHD, kito-cheng, shiva0217, zzheng, edward-jones, mgrang, apazos, asb, llvm-commits Differential Revision: https://reviews.llvm.org/D46989 llvm-svn: 332641 2018-05-18 02:14:53 +08:00			`; RV32I-NEXT: lui a0, %hi(shorts)`
			`; RV32I-NEXT: lui a1, 16`
			`; RV32I-NEXT: addi a1, a1, -120`
			`; RV32I-NEXT: lhu a2, %lo(shorts)(a0)`
			`; RV32I-NEXT: bne a2, a1, .LBB1_3`
[RISCV] Add test case showing suboptimal codegen when loading unsigned char/short Implementing isZextFree will allow lbu or lhu to be selected rather than lb+mask and lh+mask. llvm-svn: 330942 2018-04-26 22:00:35 +08:00			`; RV32I-NEXT: # %bb.1: # %entry`
[RISCV] Separate base from offset in lowerGlobalAddress Summary: When lowering global address, lower the base as a TargetGlobal first then create an SDNode for the offset separately and chain it to the address calculation This optimization will create a DAG where the base address of a global access will be reused between different access. The offset can later be folded into the immediate part of the memory access instruction. With this optimization we generate: lui a0, %hi(s) addi a0, a0, %lo(s) ; shared base address. addi a1, zero, 20 ; 2 instructions per access. sw a1, 44(a0) addi a1, zero, 10 sw a1, 8(a0) addi a1, zero, 30 sw a1, 80(a0) Instead of: lui a0, %hi(s+44) ; 3 instructions per access. addi a1, zero, 20 sw a1, %lo(s+44)(a0) lui a0, %hi(s+8) addi a1, zero, 10 sw a1, %lo(s+8)(a0) lui a0, %hi(s+80) addi a1, zero, 30 sw a1, %lo(s+80)(a0) Which will save one instruction per access. Reviewers: asb, apazos Reviewed By: asb Subscribers: rbar, johnrusso, simoncook, jordy.potman.lists, niosHD, kito-cheng, shiva0217, zzheng, edward-jones, mgrang, apazos, asb, llvm-commits Differential Revision: https://reviews.llvm.org/D46989 llvm-svn: 332641 2018-05-18 02:14:53 +08:00			`; RV32I-NEXT: addi a0, a0, %lo(shorts)`
			`; RV32I-NEXT: lhu a0, 2(a0)`
[RISCV] Add test case showing suboptimal codegen when loading unsigned char/short Implementing isZextFree will allow lbu or lhu to be selected rather than lb+mask and lh+mask. llvm-svn: 330942 2018-04-26 22:00:35 +08:00			`; RV32I-NEXT: addi a1, zero, 7`
			`; RV32I-NEXT: bne a0, a1, .LBB1_3`
			`; RV32I-NEXT: # %bb.2: # %if.end`
			`; RV32I-NEXT: mv a0, zero`
			`; RV32I-NEXT: ret`
			`; RV32I-NEXT: .LBB1_3: # %if.then`
			`; RV32I-NEXT: addi a0, zero, 1`
			`; RV32I-NEXT: ret`
			`entry:`
			`%0 = load i16, i16* getelementptr inbounds ([5 x i16], [5 x i16]* @shorts, i32 0, i32 0), align 2`
			`%cmp = icmp eq i16 %0, -120`
			`%1 = load i16, i16* getelementptr inbounds ([5 x i16], [5 x i16]* @shorts, i32 0, i32 1), align 2`
			`%cmp3 = icmp eq i16 %1, 7`
			`%or.cond = and i1 %cmp, %cmp3`
			`br i1 %or.cond, label %if.end, label %if.then`

			`if.then:`
			`ret i32 1`

			`if.end:`
			`ret i32 0`
			`}`