forked from OSchip/llvm-project
[DAG] Fix typo preventing some stores merges to truncated stores.
Check the actual memory type stored and not the extended value size when considering if truncated store merge is worthwhile. Reviewers: efriedma, RKSimon, spatel, jyknight Reviewed By: efriedma Subscribers: llvm-commits, nhaehnle Differential Revision: https://reviews.llvm.org/D35623 llvm-svn: 308833
This commit is contained in:
parent
6876680773
commit
4e6dcf73f9
|
@ -12866,8 +12866,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
|
|||
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
|
||||
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
|
||||
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
|
||||
TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
|
||||
FirstStoreAS, FirstStoreAlign, &IsFast) &&
|
||||
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
|
||||
FirstStoreAlign, &IsFast) &&
|
||||
IsFast) {
|
||||
LastIntegerTrunc = true;
|
||||
LastLegalType = i + 1;
|
||||
|
@ -13098,8 +13098,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
|
|||
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
|
||||
StoreTy) &&
|
||||
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
|
||||
TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
|
||||
FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
|
||||
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
|
||||
FirstStoreAlign, &IsFastSt) &&
|
||||
IsFastSt &&
|
||||
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
|
||||
FirstLoadAlign, &IsFastLd) &&
|
||||
|
|
|
@ -10,8 +10,7 @@
|
|||
|
||||
|
||||
; GCN-LABEL: {{^}}merge_global_store_2_constants_i8:
|
||||
; GCN: buffer_store_byte
|
||||
; GCN: buffer_store_byte
|
||||
; GCN: buffer_store_short
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 {
|
||||
%out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
|
||||
|
@ -489,8 +488,7 @@ define amdgpu_kernel void @merge_global_store_4_vector_elts_loads_v4i32(i32 addr
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}merge_local_store_2_constants_i8:
|
||||
; GCN: ds_write_b8
|
||||
; GCN: ds_write_b8
|
||||
; GCN: ds_write_b16
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 {
|
||||
%out.gep.1 = getelementptr i8, i8 addrspace(3)* %out, i32 1
|
||||
|
|
|
@ -23,10 +23,9 @@ define i32 @ebpf_filter(%struct.__sk_buff* nocapture readnone %ebpf_packet) #0 s
|
|||
; EL: r1 = 134678021
|
||||
; EB: r1 = 84281096
|
||||
; CHECK: *(u32 *)(r10 - 8) = r1
|
||||
; CHECK: r1 = 9
|
||||
; CHECK: *(u8 *)(r10 - 4) = r1
|
||||
; CHECK: r1 = 10
|
||||
; CHECK: *(u8 *)(r10 - 3) = r1
|
||||
; EL: r1 = 2569
|
||||
; EB: r1 = 2314
|
||||
; CHECK: *(u16 *)(r10 - 4) = r1
|
||||
; CHECK: *(u16 *)(r10 + 24) = r2
|
||||
; CHECK: *(u16 *)(r10 + 22) = r2
|
||||
; CHECK: *(u16 *)(r10 + 20) = r2
|
||||
|
|
Loading…
Reference in New Issue