2018-04-12 00:03:07 +08:00
|
|
|
; RUN: llc -fast-isel-sink-local-values < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM --check-prefix=ARM-MACHO
|
|
|
|
; RUN: llc -fast-isel-sink-local-values < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM --check-prefix=ARM-ELF
|
|
|
|
; RUN: llc -fast-isel-sink-local-values < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
|
|
|
|
; RUN: llc -fast-isel-sink-local-values < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=+long-calls -verify-machineinstrs | FileCheck %s --check-prefix=ARM-LONG --check-prefix=ARM-LONG-MACHO
|
|
|
|
; RUN: llc -fast-isel-sink-local-values < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=+long-calls -verify-machineinstrs | FileCheck %s --check-prefix=ARM-LONG --check-prefix=ARM-LONG-ELF
|
|
|
|
; RUN: llc -fast-isel-sink-local-values < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=+long-calls -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-LONG
|
2011-11-12 07:31:03 +08:00
|
|
|
|
2013-05-15 00:26:38 +08:00
|
|
|
; Note that some of these tests assume that relocations are either
|
|
|
|
; movw/movt or constant pool loads. Different platforms will select
|
|
|
|
; different approaches.
|
|
|
|
|
2011-11-12 07:31:03 +08:00
|
|
|
@message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
|
|
|
|
@temp = common global [60 x i8] zeroinitializer, align 1
|
|
|
|
|
|
|
|
define void @t1() nounwind ssp {
|
2013-11-22 21:25:07 +08:00
|
|
|
; ARM-LABEL: t1:
|
2013-05-15 00:26:38 +08:00
|
|
|
; ARM: {{(movw r0, :lower16:_?message1)|(ldr r0, .LCPI)}}
|
|
|
|
; ARM: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}}
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; ARM-DAG: add r0, r0, #5
|
|
|
|
; ARM-DAG: movw r1, #64
|
|
|
|
; ARM-DAG: movw r2, #10
|
|
|
|
; ARM-DAG: and r1, r1, #255
|
2013-05-15 00:26:38 +08:00
|
|
|
; ARM: bl {{_?}}memset
|
2013-11-22 21:25:07 +08:00
|
|
|
; ARM-LONG-LABEL: t1:
|
2016-05-28 12:47:13 +08:00
|
|
|
|
|
|
|
; ARM-LONG-MACHO: {{(movw r3, :lower16:L_memset\$non_lazy_ptr)|(ldr r3, .LCPI)}}
|
|
|
|
; ARM-LONG-MACHO: {{(movt r3, :upper16:L_memset\$non_lazy_ptr)?}}
|
|
|
|
; ARM-LONG-MACHO: ldr r3, [r3]
|
|
|
|
|
|
|
|
; ARM-LONG-ELF: movw r3, :lower16:memset
|
|
|
|
; ARM-LONG-ELF: movt r3, :upper16:memset
|
|
|
|
|
2012-06-13 03:25:13 +08:00
|
|
|
; ARM-LONG: blx r3
|
2013-11-22 21:25:07 +08:00
|
|
|
; THUMB-LABEL: t1:
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB: {{(movw r0, :lower16:_?message1)|(ldr.n r0, .LCPI)}}
|
|
|
|
; THUMB: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}}
|
2011-11-12 07:31:03 +08:00
|
|
|
; THUMB: adds r0, #5
|
|
|
|
; THUMB: movs r1, #64
|
2013-06-08 04:10:37 +08:00
|
|
|
; THUMB: and r1, r1, #255
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; THUMB: movs r2, #10
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB: bl {{_?}}memset
|
2013-11-22 21:25:07 +08:00
|
|
|
; THUMB-LONG-LABEL: t1:
|
2012-06-13 03:25:13 +08:00
|
|
|
; THUMB-LONG: movw r3, :lower16:L_memset$non_lazy_ptr
|
|
|
|
; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
|
|
|
|
; THUMB-LONG: ldr r3, [r3]
|
|
|
|
; THUMB-LONG: blx r3
|
Remove alignment argument from memcpy/memmove/memset in favour of alignment attributes (Step 1)
Summary:
This is a resurrection of work first proposed and discussed in Aug 2015:
http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
and initially landed (but then backed out) in Nov 2015:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change is the first in a series that allows source and dest to each
have their own alignments by using the alignment attribute on their arguments.
In this change we:
1) Remove the alignment argument.
2) Add alignment attributes to the source & dest arguments. We, temporarily,
require that the alignments for source & dest be equal.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
Downstream users may have to update their lit tests that check for
@llvm.memcpy/memmove/memset call/declaration patterns. The following extended sed script
may help with updating the majority of your tests, but it does not catch all possible
patterns so some manual checking and updating will be required.
s~declare void @llvm\.mem(set|cpy|move)\.p([^(]*)\((.*), i32, i1\)~declare void @llvm.mem\1.p\2(\3, i1)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* \3, i8 \4, i8 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* \3, i8 \4, i16 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* \3, i8 \4, i32 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* \3, i8 \4, i64 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* \3, i8 \4, i128 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* align \6 \3, i8 \4, i8 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* align \6 \3, i8 \4, i16 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* align \6 \3, i8 \4, i32 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* align \6 \3, i8 \4, i64 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* align \6 \3, i8 \4, i128 \5, i1 \7)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* \4, i8\5* \6, i8 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* \4, i8\5* \6, i16 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* \4, i8\5* \6, i32 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* \4, i8\5* \6, i64 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* \4, i8\5* \6, i128 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* align \8 \4, i8\5* align \8 \6, i8 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* align \8 \4, i8\5* align \8 \6, i16 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* align \8 \4, i8\5* align \8 \6, i32 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* align \8 \4, i8\5* align \8 \6, i64 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* align \8 \4, i8\5* align \8 \6, i128 \7, i1 \9)~g
The remaining changes in the series will:
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments.
Step 3) Update Clang to use the new IRBuilder API.
Step 4) Update Polly to use the new IRBuilder API.
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use
getDestAlignment() and getSourceAlignment() instead.
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.
Reviewers: pete, hfinkel, lhames, reames, bollu
Reviewed By: reames
Subscribers: niosHD, reames, jholewinski, qcolombet, jfb, sanjoy, arsenm, dschuff, dylanmckay, mehdi_amini, sdardis, nemanjai, david2050, nhaehnle, javed.absar, sbc100, jgravelle-google, eraman, aheejin, kbarton, JDevlieghere, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, llvm-commits
Differential Revision: https://reviews.llvm.org/D41675
llvm-svn: 322965
2018-01-20 01:13:12 +08:00
|
|
|
call void @llvm.memset.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i1 false)
|
2011-11-12 07:31:03 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
Remove alignment argument from memcpy/memmove/memset in favour of alignment attributes (Step 1)
Summary:
This is a resurrection of work first proposed and discussed in Aug 2015:
http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
and initially landed (but then backed out) in Nov 2015:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change is the first in a series that allows source and dest to each
have their own alignments by using the alignment attribute on their arguments.
In this change we:
1) Remove the alignment argument.
2) Add alignment attributes to the source & dest arguments. We, temporarily,
require that the alignments for source & dest be equal.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
Downstream users may have to update their lit tests that check for
@llvm.memcpy/memmove/memset call/declaration patterns. The following extended sed script
may help with updating the majority of your tests, but it does not catch all possible
patterns so some manual checking and updating will be required.
s~declare void @llvm\.mem(set|cpy|move)\.p([^(]*)\((.*), i32, i1\)~declare void @llvm.mem\1.p\2(\3, i1)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* \3, i8 \4, i8 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* \3, i8 \4, i16 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* \3, i8 \4, i32 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* \3, i8 \4, i64 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* \3, i8 \4, i128 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* align \6 \3, i8 \4, i8 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* align \6 \3, i8 \4, i16 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* align \6 \3, i8 \4, i32 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* align \6 \3, i8 \4, i64 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* align \6 \3, i8 \4, i128 \5, i1 \7)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* \4, i8\5* \6, i8 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* \4, i8\5* \6, i16 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* \4, i8\5* \6, i32 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* \4, i8\5* \6, i64 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* \4, i8\5* \6, i128 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* align \8 \4, i8\5* align \8 \6, i8 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* align \8 \4, i8\5* align \8 \6, i16 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* align \8 \4, i8\5* align \8 \6, i32 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* align \8 \4, i8\5* align \8 \6, i64 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* align \8 \4, i8\5* align \8 \6, i128 \7, i1 \9)~g
The remaining changes in the series will:
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments.
Step 3) Update Clang to use the new IRBuilder API.
Step 4) Update Polly to use the new IRBuilder API.
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use
getDestAlignment() and getSourceAlignment() instead.
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.
Reviewers: pete, hfinkel, lhames, reames, bollu
Reviewed By: reames
Subscribers: niosHD, reames, jholewinski, qcolombet, jfb, sanjoy, arsenm, dschuff, dylanmckay, mehdi_amini, sdardis, nemanjai, david2050, nhaehnle, javed.absar, sbc100, jgravelle-google, eraman, aheejin, kbarton, JDevlieghere, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, llvm-commits
Differential Revision: https://reviews.llvm.org/D41675
llvm-svn: 322965
2018-01-20 01:13:12 +08:00
|
|
|
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind
|
2011-11-12 07:31:03 +08:00
|
|
|
|
|
|
|
define void @t2() nounwind ssp {
|
2013-11-22 21:25:07 +08:00
|
|
|
; ARM-LABEL: t2:
|
2016-05-28 12:47:13 +08:00
|
|
|
|
|
|
|
; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
|
|
|
|
; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
|
|
|
|
; ARM-MACHO: ldr r0, [r0]
|
|
|
|
|
|
|
|
; ARM-ELF: movw r0, :lower16:temp
|
|
|
|
; ARM-ELF: movt r0, :upper16:temp
|
|
|
|
|
2011-11-12 07:31:03 +08:00
|
|
|
; ARM: add r1, r0, #4
|
|
|
|
; ARM: add r0, r0, #16
|
2013-05-15 00:26:38 +08:00
|
|
|
; ARM: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
|
2011-11-12 07:31:03 +08:00
|
|
|
; ARM: mov r0, r1
|
2013-05-15 00:26:38 +08:00
|
|
|
; ARM: ldr r1, [sp[[SLOT]]] @ 4-byte Reload
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; ARM: movw r2, #17
|
2013-05-15 00:26:38 +08:00
|
|
|
; ARM: bl {{_?}}memcpy
|
2013-11-22 21:25:07 +08:00
|
|
|
; ARM-LONG-LABEL: t2:
|
2016-05-28 12:47:13 +08:00
|
|
|
|
|
|
|
; ARM-LONG-MACHO: {{(movw r3, :lower16:L_memcpy\$non_lazy_ptr)|(ldr r3, .LCPI)}}
|
|
|
|
; ARM-LONG-MACHO: {{(movt r3, :upper16:L_memcpy\$non_lazy_ptr)?}}
|
|
|
|
; ARM-LONG-MACHO: ldr r3, [r3]
|
|
|
|
|
|
|
|
; ARM-LONG-ELF: movw r3, :lower16:memcpy
|
|
|
|
; ARM-LONG-ELF: movt r3, :upper16:memcpy
|
|
|
|
|
2012-06-13 03:25:13 +08:00
|
|
|
; ARM-LONG: blx r3
|
2013-11-22 21:25:07 +08:00
|
|
|
; THUMB-LABEL: t2:
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
|
|
|
|
; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
|
2011-11-12 07:31:03 +08:00
|
|
|
; THUMB: ldr r0, [r0]
|
|
|
|
; THUMB: adds r1, r0, #4
|
|
|
|
; THUMB: adds r0, #16
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
|
2011-11-12 07:31:03 +08:00
|
|
|
; THUMB: mov r0, r1
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; THUMB: movs r2, #17
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB: bl {{_?}}memcpy
|
2013-11-22 21:25:07 +08:00
|
|
|
; THUMB-LONG-LABEL: t2:
|
2012-06-13 03:25:13 +08:00
|
|
|
; THUMB-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr
|
|
|
|
; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
|
|
|
|
; THUMB-LONG: ldr r3, [r3]
|
|
|
|
; THUMB-LONG: blx r3
|
Remove alignment argument from memcpy/memmove/memset in favour of alignment attributes (Step 1)
Summary:
This is a resurrection of work first proposed and discussed in Aug 2015:
http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
and initially landed (but then backed out) in Nov 2015:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change is the first in a series that allows source and dest to each
have their own alignments by using the alignment attribute on their arguments.
In this change we:
1) Remove the alignment argument.
2) Add alignment attributes to the source & dest arguments. We, temporarily,
require that the alignments for source & dest be equal.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
Downstream users may have to update their lit tests that check for
@llvm.memcpy/memmove/memset call/declaration patterns. The following extended sed script
may help with updating the majority of your tests, but it does not catch all possible
patterns so some manual checking and updating will be required.
s~declare void @llvm\.mem(set|cpy|move)\.p([^(]*)\((.*), i32, i1\)~declare void @llvm.mem\1.p\2(\3, i1)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* \3, i8 \4, i8 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* \3, i8 \4, i16 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* \3, i8 \4, i32 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* \3, i8 \4, i64 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* \3, i8 \4, i128 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* align \6 \3, i8 \4, i8 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* align \6 \3, i8 \4, i16 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* align \6 \3, i8 \4, i32 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* align \6 \3, i8 \4, i64 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* align \6 \3, i8 \4, i128 \5, i1 \7)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* \4, i8\5* \6, i8 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* \4, i8\5* \6, i16 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* \4, i8\5* \6, i32 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* \4, i8\5* \6, i64 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* \4, i8\5* \6, i128 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* align \8 \4, i8\5* align \8 \6, i8 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* align \8 \4, i8\5* align \8 \6, i16 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* align \8 \4, i8\5* align \8 \6, i32 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* align \8 \4, i8\5* align \8 \6, i64 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* align \8 \4, i8\5* align \8 \6, i128 \7, i1 \9)~g
The remaining changes in the series will:
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments.
Step 3) Update Clang to use the new IRBuilder API.
Step 4) Update Polly to use the new IRBuilder API.
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use
getDestAlignment() and getSourceAlignment() instead.
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.
Reviewers: pete, hfinkel, lhames, reames, bollu
Reviewed By: reames
Subscribers: niosHD, reames, jholewinski, qcolombet, jfb, sanjoy, arsenm, dschuff, dylanmckay, mehdi_amini, sdardis, nemanjai, david2050, nhaehnle, javed.absar, sbc100, jgravelle-google, eraman, aheejin, kbarton, JDevlieghere, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, llvm-commits
Differential Revision: https://reviews.llvm.org/D41675
llvm-svn: 322965
2018-01-20 01:13:12 +08:00
|
|
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 17, i1 false)
|
2011-11-12 07:31:03 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
Remove alignment argument from memcpy/memmove/memset in favour of alignment attributes (Step 1)
Summary:
This is a resurrection of work first proposed and discussed in Aug 2015:
http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
and initially landed (but then backed out) in Nov 2015:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change is the first in a series that allows source and dest to each
have their own alignments by using the alignment attribute on their arguments.
In this change we:
1) Remove the alignment argument.
2) Add alignment attributes to the source & dest arguments. We, temporarily,
require that the alignments for source & dest be equal.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
Downstream users may have to update their lit tests that check for
@llvm.memcpy/memmove/memset call/declaration patterns. The following extended sed script
may help with updating the majority of your tests, but it does not catch all possible
patterns so some manual checking and updating will be required.
s~declare void @llvm\.mem(set|cpy|move)\.p([^(]*)\((.*), i32, i1\)~declare void @llvm.mem\1.p\2(\3, i1)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* \3, i8 \4, i8 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* \3, i8 \4, i16 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* \3, i8 \4, i32 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* \3, i8 \4, i64 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* \3, i8 \4, i128 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* align \6 \3, i8 \4, i8 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* align \6 \3, i8 \4, i16 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* align \6 \3, i8 \4, i32 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* align \6 \3, i8 \4, i64 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* align \6 \3, i8 \4, i128 \5, i1 \7)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* \4, i8\5* \6, i8 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* \4, i8\5* \6, i16 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* \4, i8\5* \6, i32 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* \4, i8\5* \6, i64 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* \4, i8\5* \6, i128 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* align \8 \4, i8\5* align \8 \6, i8 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* align \8 \4, i8\5* align \8 \6, i16 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* align \8 \4, i8\5* align \8 \6, i32 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* align \8 \4, i8\5* align \8 \6, i64 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* align \8 \4, i8\5* align \8 \6, i128 \7, i1 \9)~g
The remaining changes in the series will:
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments.
Step 3) Update Clang to use the new IRBuilder API.
Step 4) Update Polly to use the new IRBuilder API.
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use
getDestAlignment() and getSourceAlignment() instead.
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.
Reviewers: pete, hfinkel, lhames, reames, bollu
Reviewed By: reames
Subscribers: niosHD, reames, jholewinski, qcolombet, jfb, sanjoy, arsenm, dschuff, dylanmckay, mehdi_amini, sdardis, nemanjai, david2050, nhaehnle, javed.absar, sbc100, jgravelle-google, eraman, aheejin, kbarton, JDevlieghere, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, llvm-commits
Differential Revision: https://reviews.llvm.org/D41675
llvm-svn: 322965
2018-01-20 01:13:12 +08:00
|
|
|
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
|
2011-11-12 07:31:03 +08:00
|
|
|
|
|
|
|
define void @t3() nounwind ssp {
|
2013-11-22 21:25:07 +08:00
|
|
|
; ARM-LABEL: t3:
|
2016-05-28 12:47:13 +08:00
|
|
|
|
|
|
|
; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
|
|
|
|
; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
|
|
|
|
; ARM-MACHO: ldr r0, [r0]
|
|
|
|
|
|
|
|
; ARM-ELF: movw r0, :lower16:temp
|
|
|
|
; ARM-ELF: movt r0, :upper16:temp
|
|
|
|
|
|
|
|
|
2011-11-12 07:31:03 +08:00
|
|
|
; ARM: add r1, r0, #4
|
|
|
|
; ARM: add r0, r0, #16
|
|
|
|
; ARM: mov r0, r1
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; ARM: movw r2, #10
|
2013-05-15 00:26:38 +08:00
|
|
|
; ARM: bl {{_?}}memmove
|
2013-11-22 21:25:07 +08:00
|
|
|
; ARM-LONG-LABEL: t3:
|
2016-05-28 12:47:13 +08:00
|
|
|
|
|
|
|
; ARM-LONG-MACHO: {{(movw r3, :lower16:L_memmove\$non_lazy_ptr)|(ldr r3, .LCPI)}}
|
|
|
|
; ARM-LONG-MACHO: {{(movt r3, :upper16:L_memmove\$non_lazy_ptr)?}}
|
|
|
|
; ARM-LONG-MACHO: ldr r3, [r3]
|
|
|
|
|
|
|
|
; ARM-LONG-ELF: movw r3, :lower16:memmove
|
|
|
|
; ARM-LONG-ELF: movt r3, :upper16:memmove
|
|
|
|
|
2012-06-13 03:25:13 +08:00
|
|
|
; ARM-LONG: blx r3
|
2013-11-22 21:25:07 +08:00
|
|
|
; THUMB-LABEL: t3:
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
|
|
|
|
; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
|
2011-11-12 07:31:03 +08:00
|
|
|
; THUMB: ldr r0, [r0]
|
|
|
|
; THUMB: adds r1, r0, #4
|
|
|
|
; THUMB: adds r0, #16
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
|
2011-11-12 07:31:03 +08:00
|
|
|
; THUMB: mov r0, r1
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload
|
[FastISel] Sink local value materializations to first use
Summary:
Local values are constants, global addresses, and stack addresses that
can't be folded into the instruction that uses them. For example, when
storing the address of a global variable into memory, we need to
materialize that address into a register.
FastISel doesn't want to materialize any given local value more than
once, so it generates all local value materialization code at
EmitStartPt, which always dominates the current insertion point. This
allows it to maintain a map of local value registers, and it knows that
the local value area will always dominate the current insertion point.
The downside is that local value instructions are always emitted without
a source location. This is done to prevent jumpy line tables, but it
means that the local value area will be considered part of the previous
statement. Consider this C code:
call1(); // line 1
++global; // line 2
++global; // line 3
call2(&global, &local); // line 4
Today we end up with assembly and line tables like this:
.loc 1 1
callq call1
leaq global(%rip), %rdi
leaq local(%rsp), %rsi
.loc 1 2
addq $1, global(%rip)
.loc 1 3
addq $1, global(%rip)
.loc 1 4
callq call2
The LEA instructions in the local value area have no source location and
are treated as being on line 1. Stepping through the code in a debugger
and correlating it with the assembly won't make much sense, because
these materializations are only required for line 4.
This is actually problematic for the VS debugger "set next statement"
feature, which effectively assumes that there are no registers live
across statement boundaries. By sinking the local value code into the
statement and fixing up the source location, we can make that feature
work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and
https://crbug.com/793819.
This change is obviously not enough to make this feature work reliably
in all cases, but I felt that it was worth doing anyway because it
usually generates smaller, more comprehensible -O0 code. I measured a
0.12% regression in code generation time with LLC on the sqlite3
amalgamation, so I think this is worth doing.
There are some special cases worth calling out in the commit message:
1. local values materialized for phis
2. local values used by no-op casts
3. dead local value code
Local values can be materialized for phis, and this does not show up as
a vreg use in MachineRegisterInfo. In this case, if there are no other
uses, this patch sinks the value to the first terminator, EH label, or
the end of the BB if nothing else exists.
Local values may also be used by no-op casts, which adds the register to
the RegFixups table. Without reversing the RegFixups map direction, we
don't have enough information to sink these instructions.
Lastly, if the local value register has no other uses, we can delete it.
This comes up when fastisel tries two instruction selection approaches
and the first materializes the value but fails and the second succeeds
without using the local value.
Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo
Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya
Differential Revision: https://reviews.llvm.org/D43093
llvm-svn: 327581
2018-03-15 05:54:21 +08:00
|
|
|
; THUMB: movs r2, #10
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB: bl {{_?}}memmove
|
2013-11-22 21:25:07 +08:00
|
|
|
; THUMB-LONG-LABEL: t3:
|
2012-06-13 03:25:13 +08:00
|
|
|
; THUMB-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr
|
|
|
|
; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr
|
|
|
|
; THUMB-LONG: ldr r3, [r3]
|
|
|
|
; THUMB-LONG: blx r3
|
Remove alignment argument from memcpy/memmove/memset in favour of alignment attributes (Step 1)
Summary:
This is a resurrection of work first proposed and discussed in Aug 2015:
http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
and initially landed (but then backed out) in Nov 2015:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change is the first in a series that allows source and dest to each
have their own alignments by using the alignment attribute on their arguments.
In this change we:
1) Remove the alignment argument.
2) Add alignment attributes to the source & dest arguments. We, temporarily,
require that the alignments for source & dest be equal.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
Downstream users may have to update their lit tests that check for
@llvm.memcpy/memmove/memset call/declaration patterns. The following extended sed script
may help with updating the majority of your tests, but it does not catch all possible
patterns so some manual checking and updating will be required.
s~declare void @llvm\.mem(set|cpy|move)\.p([^(]*)\((.*), i32, i1\)~declare void @llvm.mem\1.p\2(\3, i1)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* \3, i8 \4, i8 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* \3, i8 \4, i16 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* \3, i8 \4, i32 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* \3, i8 \4, i64 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* \3, i8 \4, i128 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* align \6 \3, i8 \4, i8 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* align \6 \3, i8 \4, i16 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* align \6 \3, i8 \4, i32 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* align \6 \3, i8 \4, i64 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* align \6 \3, i8 \4, i128 \5, i1 \7)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* \4, i8\5* \6, i8 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* \4, i8\5* \6, i16 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* \4, i8\5* \6, i32 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* \4, i8\5* \6, i64 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* \4, i8\5* \6, i128 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* align \8 \4, i8\5* align \8 \6, i8 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* align \8 \4, i8\5* align \8 \6, i16 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* align \8 \4, i8\5* align \8 \6, i32 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* align \8 \4, i8\5* align \8 \6, i64 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* align \8 \4, i8\5* align \8 \6, i128 \7, i1 \9)~g
The remaining changes in the series will:
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments.
Step 3) Update Clang to use the new IRBuilder API.
Step 4) Update Polly to use the new IRBuilder API.
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use
getDestAlignment() and getSourceAlignment() instead.
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.
Reviewers: pete, hfinkel, lhames, reames, bollu
Reviewed By: reames
Subscribers: niosHD, reames, jholewinski, qcolombet, jfb, sanjoy, arsenm, dschuff, dylanmckay, mehdi_amini, sdardis, nemanjai, david2050, nhaehnle, javed.absar, sbc100, jgravelle-google, eraman, aheejin, kbarton, JDevlieghere, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, llvm-commits
Differential Revision: https://reviews.llvm.org/D41675
llvm-svn: 322965
2018-01-20 01:13:12 +08:00
|
|
|
call void @llvm.memmove.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false)
|
2011-11-12 07:31:03 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2011-11-15 06:46:17 +08:00
|
|
|
define void @t4() nounwind ssp {
|
2013-11-22 21:25:07 +08:00
|
|
|
; ARM-LABEL: t4:
|
2016-05-28 12:47:13 +08:00
|
|
|
|
|
|
|
; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
|
|
|
|
; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
|
|
|
|
; ARM-MACHO: ldr r0, [r0]
|
|
|
|
|
|
|
|
; ARM-ELF: movw r0, :lower16:temp
|
|
|
|
; ARM-ELF: movt r0, :upper16:temp
|
|
|
|
|
2012-01-07 12:07:22 +08:00
|
|
|
; ARM: ldr r1, [r0, #16]
|
|
|
|
; ARM: str r1, [r0, #4]
|
|
|
|
; ARM: ldr r1, [r0, #20]
|
|
|
|
; ARM: str r1, [r0, #8]
|
|
|
|
; ARM: ldrh r1, [r0, #24]
|
2011-11-15 06:46:17 +08:00
|
|
|
; ARM: strh r1, [r0, #12]
|
|
|
|
; ARM: bx lr
|
2013-11-22 21:25:07 +08:00
|
|
|
; THUMB-LABEL: t4:
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
|
|
|
|
; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
|
2011-11-15 06:46:17 +08:00
|
|
|
; THUMB: ldr r0, [r0]
|
2012-01-07 12:07:22 +08:00
|
|
|
; THUMB: ldr r1, [r0, #16]
|
|
|
|
; THUMB: str r1, [r0, #4]
|
|
|
|
; THUMB: ldr r1, [r0, #20]
|
|
|
|
; THUMB: str r1, [r0, #8]
|
|
|
|
; THUMB: ldrh r1, [r0, #24]
|
2011-11-15 06:46:17 +08:00
|
|
|
; THUMB: strh r1, [r0, #12]
|
|
|
|
; THUMB: bx lr
|
Remove alignment argument from memcpy/memmove/memset in favour of alignment attributes (Step 1)
Summary:
This is a resurrection of work first proposed and discussed in Aug 2015:
http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
and initially landed (but then backed out) in Nov 2015:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change is the first in a series that allows source and dest to each
have their own alignments by using the alignment attribute on their arguments.
In this change we:
1) Remove the alignment argument.
2) Add alignment attributes to the source & dest arguments. We, temporarily,
require that the alignments for source & dest be equal.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
Downstream users may have to update their lit tests that check for
@llvm.memcpy/memmove/memset call/declaration patterns. The following extended sed script
may help with updating the majority of your tests, but it does not catch all possible
patterns so some manual checking and updating will be required.
s~declare void @llvm\.mem(set|cpy|move)\.p([^(]*)\((.*), i32, i1\)~declare void @llvm.mem\1.p\2(\3, i1)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* \3, i8 \4, i8 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* \3, i8 \4, i16 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* \3, i8 \4, i32 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* \3, i8 \4, i64 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* \3, i8 \4, i128 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* align \6 \3, i8 \4, i8 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* align \6 \3, i8 \4, i16 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* align \6 \3, i8 \4, i32 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* align \6 \3, i8 \4, i64 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* align \6 \3, i8 \4, i128 \5, i1 \7)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* \4, i8\5* \6, i8 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* \4, i8\5* \6, i16 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* \4, i8\5* \6, i32 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* \4, i8\5* \6, i64 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* \4, i8\5* \6, i128 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* align \8 \4, i8\5* align \8 \6, i8 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* align \8 \4, i8\5* align \8 \6, i16 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* align \8 \4, i8\5* align \8 \6, i32 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* align \8 \4, i8\5* align \8 \6, i64 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* align \8 \4, i8\5* align \8 \6, i128 \7, i1 \9)~g
The remaining changes in the series will:
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments.
Step 3) Update Clang to use the new IRBuilder API.
Step 4) Update Polly to use the new IRBuilder API.
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use
getDestAlignment() and getSourceAlignment() instead.
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.
Reviewers: pete, hfinkel, lhames, reames, bollu
Reviewed By: reames
Subscribers: niosHD, reames, jholewinski, qcolombet, jfb, sanjoy, arsenm, dschuff, dylanmckay, mehdi_amini, sdardis, nemanjai, david2050, nhaehnle, javed.absar, sbc100, jgravelle-google, eraman, aheejin, kbarton, JDevlieghere, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, llvm-commits
Differential Revision: https://reviews.llvm.org/D41675
llvm-svn: 322965
2018-01-20 01:13:12 +08:00
|
|
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false)
|
2011-11-15 06:46:17 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
Remove alignment argument from memcpy/memmove/memset in favour of alignment attributes (Step 1)
Summary:
This is a resurrection of work first proposed and discussed in Aug 2015:
http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
and initially landed (but then backed out) in Nov 2015:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change is the first in a series that allows source and dest to each
have their own alignments by using the alignment attribute on their arguments.
In this change we:
1) Remove the alignment argument.
2) Add alignment attributes to the source & dest arguments. We, temporarily,
require that the alignments for source & dest be equal.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
Downstream users may have to update their lit tests that check for
@llvm.memcpy/memmove/memset call/declaration patterns. The following extended sed script
may help with updating the majority of your tests, but it does not catch all possible
patterns so some manual checking and updating will be required.
s~declare void @llvm\.mem(set|cpy|move)\.p([^(]*)\((.*), i32, i1\)~declare void @llvm.mem\1.p\2(\3, i1)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* \3, i8 \4, i8 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* \3, i8 \4, i16 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* \3, i8 \4, i32 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* \3, i8 \4, i64 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* \3, i8 \4, i128 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* align \6 \3, i8 \4, i8 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* align \6 \3, i8 \4, i16 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* align \6 \3, i8 \4, i32 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* align \6 \3, i8 \4, i64 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* align \6 \3, i8 \4, i128 \5, i1 \7)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* \4, i8\5* \6, i8 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* \4, i8\5* \6, i16 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* \4, i8\5* \6, i32 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* \4, i8\5* \6, i64 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* \4, i8\5* \6, i128 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* align \8 \4, i8\5* align \8 \6, i8 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* align \8 \4, i8\5* align \8 \6, i16 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* align \8 \4, i8\5* align \8 \6, i32 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* align \8 \4, i8\5* align \8 \6, i64 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* align \8 \4, i8\5* align \8 \6, i128 \7, i1 \9)~g
The remaining changes in the series will:
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments.
Step 3) Update Clang to use the new IRBuilder API.
Step 4) Update Polly to use the new IRBuilder API.
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use
getDestAlignment() and getSourceAlignment() instead.
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.
Reviewers: pete, hfinkel, lhames, reames, bollu
Reviewed By: reames
Subscribers: niosHD, reames, jholewinski, qcolombet, jfb, sanjoy, arsenm, dschuff, dylanmckay, mehdi_amini, sdardis, nemanjai, david2050, nhaehnle, javed.absar, sbc100, jgravelle-google, eraman, aheejin, kbarton, JDevlieghere, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, llvm-commits
Differential Revision: https://reviews.llvm.org/D41675
llvm-svn: 322965
2018-01-20 01:13:12 +08:00
|
|
|
declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind
|
2012-12-06 09:34:31 +08:00
|
|
|
|
|
|
|
define void @t5() nounwind ssp {
|
2013-11-22 21:25:07 +08:00
|
|
|
; ARM-LABEL: t5:
|
2016-05-28 12:47:13 +08:00
|
|
|
|
|
|
|
; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
|
|
|
|
; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
|
|
|
|
; ARM-MACHO: ldr r0, [r0]
|
|
|
|
|
|
|
|
; ARM-ELF: movw r0, :lower16:temp
|
|
|
|
; ARM-ELF: movt r0, :upper16:temp
|
|
|
|
|
2012-12-06 09:34:31 +08:00
|
|
|
; ARM: ldrh r1, [r0, #16]
|
|
|
|
; ARM: strh r1, [r0, #4]
|
|
|
|
; ARM: ldrh r1, [r0, #18]
|
|
|
|
; ARM: strh r1, [r0, #6]
|
|
|
|
; ARM: ldrh r1, [r0, #20]
|
|
|
|
; ARM: strh r1, [r0, #8]
|
|
|
|
; ARM: ldrh r1, [r0, #22]
|
|
|
|
; ARM: strh r1, [r0, #10]
|
|
|
|
; ARM: ldrh r1, [r0, #24]
|
|
|
|
; ARM: strh r1, [r0, #12]
|
|
|
|
; ARM: bx lr
|
2013-11-22 21:25:07 +08:00
|
|
|
; THUMB-LABEL: t5:
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
|
|
|
|
; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
|
2012-12-06 09:34:31 +08:00
|
|
|
; THUMB: ldr r0, [r0]
|
|
|
|
; THUMB: ldrh r1, [r0, #16]
|
|
|
|
; THUMB: strh r1, [r0, #4]
|
|
|
|
; THUMB: ldrh r1, [r0, #18]
|
|
|
|
; THUMB: strh r1, [r0, #6]
|
|
|
|
; THUMB: ldrh r1, [r0, #20]
|
|
|
|
; THUMB: strh r1, [r0, #8]
|
|
|
|
; THUMB: ldrh r1, [r0, #22]
|
|
|
|
; THUMB: strh r1, [r0, #10]
|
|
|
|
; THUMB: ldrh r1, [r0, #24]
|
|
|
|
; THUMB: strh r1, [r0, #12]
|
|
|
|
; THUMB: bx lr
|
Remove alignment argument from memcpy/memmove/memset in favour of alignment attributes (Step 1)
Summary:
This is a resurrection of work first proposed and discussed in Aug 2015:
http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
and initially landed (but then backed out) in Nov 2015:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change is the first in a series that allows source and dest to each
have their own alignments by using the alignment attribute on their arguments.
In this change we:
1) Remove the alignment argument.
2) Add alignment attributes to the source & dest arguments. We, temporarily,
require that the alignments for source & dest be equal.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
Downstream users may have to update their lit tests that check for
@llvm.memcpy/memmove/memset call/declaration patterns. The following extended sed script
may help with updating the majority of your tests, but it does not catch all possible
patterns so some manual checking and updating will be required.
s~declare void @llvm\.mem(set|cpy|move)\.p([^(]*)\((.*), i32, i1\)~declare void @llvm.mem\1.p\2(\3, i1)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* \3, i8 \4, i8 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* \3, i8 \4, i16 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* \3, i8 \4, i32 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* \3, i8 \4, i64 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* \3, i8 \4, i128 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* align \6 \3, i8 \4, i8 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* align \6 \3, i8 \4, i16 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* align \6 \3, i8 \4, i32 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* align \6 \3, i8 \4, i64 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* align \6 \3, i8 \4, i128 \5, i1 \7)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* \4, i8\5* \6, i8 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* \4, i8\5* \6, i16 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* \4, i8\5* \6, i32 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* \4, i8\5* \6, i64 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* \4, i8\5* \6, i128 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* align \8 \4, i8\5* align \8 \6, i8 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* align \8 \4, i8\5* align \8 \6, i16 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* align \8 \4, i8\5* align \8 \6, i32 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* align \8 \4, i8\5* align \8 \6, i64 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* align \8 \4, i8\5* align \8 \6, i128 \7, i1 \9)~g
The remaining changes in the series will:
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments.
Step 3) Update Clang to use the new IRBuilder API.
Step 4) Update Polly to use the new IRBuilder API.
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use
getDestAlignment() and getSourceAlignment() instead.
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.
Reviewers: pete, hfinkel, lhames, reames, bollu
Reviewed By: reames
Subscribers: niosHD, reames, jholewinski, qcolombet, jfb, sanjoy, arsenm, dschuff, dylanmckay, mehdi_amini, sdardis, nemanjai, david2050, nhaehnle, javed.absar, sbc100, jgravelle-google, eraman, aheejin, kbarton, JDevlieghere, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, llvm-commits
Differential Revision: https://reviews.llvm.org/D41675
llvm-svn: 322965
2018-01-20 01:13:12 +08:00
|
|
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false)
|
2012-12-06 09:34:31 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
define void @t6() nounwind ssp {
|
2013-11-22 21:25:07 +08:00
|
|
|
; ARM-LABEL: t6:
|
2016-05-28 12:47:13 +08:00
|
|
|
|
|
|
|
; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
|
|
|
|
; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
|
|
|
|
; ARM-MACHO: ldr r0, [r0]
|
|
|
|
|
|
|
|
; ARM-ELF: movw r0, :lower16:temp
|
|
|
|
; ARM-ELF: movt r0, :upper16:temp
|
|
|
|
|
2012-12-06 09:34:31 +08:00
|
|
|
; ARM: ldrb r1, [r0, #16]
|
|
|
|
; ARM: strb r1, [r0, #4]
|
|
|
|
; ARM: ldrb r1, [r0, #17]
|
|
|
|
; ARM: strb r1, [r0, #5]
|
|
|
|
; ARM: ldrb r1, [r0, #18]
|
|
|
|
; ARM: strb r1, [r0, #6]
|
|
|
|
; ARM: ldrb r1, [r0, #19]
|
|
|
|
; ARM: strb r1, [r0, #7]
|
|
|
|
; ARM: ldrb r1, [r0, #20]
|
|
|
|
; ARM: strb r1, [r0, #8]
|
|
|
|
; ARM: ldrb r1, [r0, #21]
|
|
|
|
; ARM: strb r1, [r0, #9]
|
|
|
|
; ARM: ldrb r1, [r0, #22]
|
|
|
|
; ARM: strb r1, [r0, #10]
|
|
|
|
; ARM: ldrb r1, [r0, #23]
|
|
|
|
; ARM: strb r1, [r0, #11]
|
|
|
|
; ARM: ldrb r1, [r0, #24]
|
|
|
|
; ARM: strb r1, [r0, #12]
|
|
|
|
; ARM: ldrb r1, [r0, #25]
|
|
|
|
; ARM: strb r1, [r0, #13]
|
|
|
|
; ARM: bx lr
|
2013-11-22 21:25:07 +08:00
|
|
|
; THUMB-LABEL: t6:
|
2013-05-15 00:26:38 +08:00
|
|
|
; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
|
|
|
|
; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
|
2012-12-06 09:34:31 +08:00
|
|
|
; THUMB: ldr r0, [r0]
|
|
|
|
; THUMB: ldrb r1, [r0, #16]
|
|
|
|
; THUMB: strb r1, [r0, #4]
|
|
|
|
; THUMB: ldrb r1, [r0, #17]
|
|
|
|
; THUMB: strb r1, [r0, #5]
|
|
|
|
; THUMB: ldrb r1, [r0, #18]
|
|
|
|
; THUMB: strb r1, [r0, #6]
|
|
|
|
; THUMB: ldrb r1, [r0, #19]
|
|
|
|
; THUMB: strb r1, [r0, #7]
|
|
|
|
; THUMB: ldrb r1, [r0, #20]
|
|
|
|
; THUMB: strb r1, [r0, #8]
|
|
|
|
; THUMB: ldrb r1, [r0, #21]
|
|
|
|
; THUMB: strb r1, [r0, #9]
|
|
|
|
; THUMB: ldrb r1, [r0, #22]
|
|
|
|
; THUMB: strb r1, [r0, #10]
|
|
|
|
; THUMB: ldrb r1, [r0, #23]
|
|
|
|
; THUMB: strb r1, [r0, #11]
|
|
|
|
; THUMB: ldrb r1, [r0, #24]
|
|
|
|
; THUMB: strb r1, [r0, #12]
|
|
|
|
; THUMB: ldrb r1, [r0, #25]
|
|
|
|
; THUMB: strb r1, [r0, #13]
|
|
|
|
; THUMB: bx lr
|
Remove alignment argument from memcpy/memmove/memset in favour of alignment attributes (Step 1)
Summary:
This is a resurrection of work first proposed and discussed in Aug 2015:
http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
and initially landed (but then backed out) in Nov 2015:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change is the first in a series that allows source and dest to each
have their own alignments by using the alignment attribute on their arguments.
In this change we:
1) Remove the alignment argument.
2) Add alignment attributes to the source & dest arguments. We, temporarily,
require that the alignments for source & dest be equal.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
Downstream users may have to update their lit tests that check for
@llvm.memcpy/memmove/memset call/declaration patterns. The following extended sed script
may help with updating the majority of your tests, but it does not catch all possible
patterns so some manual checking and updating will be required.
s~declare void @llvm\.mem(set|cpy|move)\.p([^(]*)\((.*), i32, i1\)~declare void @llvm.mem\1.p\2(\3, i1)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* \3, i8 \4, i8 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* \3, i8 \4, i16 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* \3, i8 \4, i32 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* \3, i8 \4, i64 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* \3, i8 \4, i128 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* align \6 \3, i8 \4, i8 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* align \6 \3, i8 \4, i16 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* align \6 \3, i8 \4, i32 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* align \6 \3, i8 \4, i64 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* align \6 \3, i8 \4, i128 \5, i1 \7)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* \4, i8\5* \6, i8 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* \4, i8\5* \6, i16 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* \4, i8\5* \6, i32 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* \4, i8\5* \6, i64 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* \4, i8\5* \6, i128 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* align \8 \4, i8\5* align \8 \6, i8 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* align \8 \4, i8\5* align \8 \6, i16 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* align \8 \4, i8\5* align \8 \6, i32 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* align \8 \4, i8\5* align \8 \6, i64 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* align \8 \4, i8\5* align \8 \6, i128 \7, i1 \9)~g
The remaining changes in the series will:
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments.
Step 3) Update Clang to use the new IRBuilder API.
Step 4) Update Polly to use the new IRBuilder API.
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use
getDestAlignment() and getSourceAlignment() instead.
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.
Reviewers: pete, hfinkel, lhames, reames, bollu
Reviewed By: reames
Subscribers: niosHD, reames, jholewinski, qcolombet, jfb, sanjoy, arsenm, dschuff, dylanmckay, mehdi_amini, sdardis, nemanjai, david2050, nhaehnle, javed.absar, sbc100, jgravelle-google, eraman, aheejin, kbarton, JDevlieghere, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, llvm-commits
Differential Revision: https://reviews.llvm.org/D41675
llvm-svn: 322965
2018-01-20 01:13:12 +08:00
|
|
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false)
|
2012-12-06 09:34:31 +08:00
|
|
|
ret void
|
|
|
|
}
|
2013-02-19 05:46:28 +08:00
|
|
|
|
2013-02-19 05:59:15 +08:00
|
|
|
; rdar://13202135
|
2013-02-19 05:46:28 +08:00
|
|
|
define void @t7() nounwind ssp {
|
|
|
|
; Just make sure this doesn't assert when we have an odd length and an alignment of 2.
|
Remove alignment argument from memcpy/memmove/memset in favour of alignment attributes (Step 1)
Summary:
This is a resurrection of work first proposed and discussed in Aug 2015:
http://lists.llvm.org/pipermail/llvm-dev/2015-August/089384.html
and initially landed (but then backed out) in Nov 2015:
http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20151109/312083.html
The @llvm.memcpy/memmove/memset intrinsics currently have an explicit argument
which is required to be a constant integer. It represents the alignment of the
dest (and source), and so must be the minimum of the actual alignment of the
two.
This change is the first in a series that allows source and dest to each
have their own alignments by using the alignment attribute on their arguments.
In this change we:
1) Remove the alignment argument.
2) Add alignment attributes to the source & dest arguments. We, temporarily,
require that the alignments for source & dest be equal.
For example, code which used to read:
call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 100, i32 4, i1 false)
will now read
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 100, i1 false)
Downstream users may have to update their lit tests that check for
@llvm.memcpy/memmove/memset call/declaration patterns. The following extended sed script
may help with updating the majority of your tests, but it does not catch all possible
patterns so some manual checking and updating will be required.
s~declare void @llvm\.mem(set|cpy|move)\.p([^(]*)\((.*), i32, i1\)~declare void @llvm.mem\1.p\2(\3, i1)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* \3, i8 \4, i8 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* \3, i8 \4, i16 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* \3, i8 \4, i32 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* \3, i8 \4, i64 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* \3, i8 \4, i128 \5, i1 \6)~g
s~call void @llvm\.memset\.p([^(]*)i8\(i8([^*]*)\* (.*), i8 (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i8(i8\2* align \6 \3, i8 \4, i8 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i16\(i8([^*]*)\* (.*), i8 (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i16(i8\2* align \6 \3, i8 \4, i16 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i32\(i8([^*]*)\* (.*), i8 (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i32(i8\2* align \6 \3, i8 \4, i32 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i64\(i8([^*]*)\* (.*), i8 (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i64(i8\2* align \6 \3, i8 \4, i64 \5, i1 \7)~g
s~call void @llvm\.memset\.p([^(]*)i128\(i8([^*]*)\* (.*), i8 (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.memset.p\1i128(i8\2* align \6 \3, i8 \4, i128 \5, i1 \7)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* \4, i8\5* \6, i8 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* \4, i8\5* \6, i16 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* \4, i8\5* \6, i32 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* \4, i8\5* \6, i64 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 [01], i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* \4, i8\5* \6, i128 \7, i1 \8)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i8\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i8 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i8(i8\3* align \8 \4, i8\5* align \8 \6, i8 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i16\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i16 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i16(i8\3* align \8 \4, i8\5* align \8 \6, i16 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i32\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i32 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i32(i8\3* align \8 \4, i8\5* align \8 \6, i32 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i64\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i64 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i64(i8\3* align \8 \4, i8\5* align \8 \6, i64 \7, i1 \9)~g
s~call void @llvm\.mem(cpy|move)\.p([^(]*)i128\(i8([^*]*)\* (.*), i8([^*]*)\* (.*), i128 (.*), i32 ([0-9]*), i1 ([^)]*)\)~call void @llvm.mem\1.p\2i128(i8\3* align \8 \4, i8\5* align \8 \6, i128 \7, i1 \9)~g
The remaining changes in the series will:
Step 2) Expand the IRBuilder API to allow creation of memcpy/memmove with differing
source and dest alignments.
Step 3) Update Clang to use the new IRBuilder API.
Step 4) Update Polly to use the new IRBuilder API.
Step 5) Update LLVM passes that create memcpy/memmove calls to use the new IRBuilder API,
and those that use use MemIntrinsicInst::[get|set]Alignment() to use
getDestAlignment() and getSourceAlignment() instead.
Step 6) Remove the single-alignment IRBuilder API for memcpy/memmove, and the
MemIntrinsicInst::[get|set]Alignment() methods.
Reviewers: pete, hfinkel, lhames, reames, bollu
Reviewed By: reames
Subscribers: niosHD, reames, jholewinski, qcolombet, jfb, sanjoy, arsenm, dschuff, dylanmckay, mehdi_amini, sdardis, nemanjai, david2050, nhaehnle, javed.absar, sbc100, jgravelle-google, eraman, aheejin, kbarton, JDevlieghere, asb, rbar, johnrusso, simoncook, jordy.potman.lists, apazos, sabuasal, llvm-commits
Differential Revision: https://reviews.llvm.org/D41675
llvm-svn: 322965
2018-01-20 01:13:12 +08:00
|
|
|
call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 3, i1 false)
|
2013-02-19 05:46:28 +08:00
|
|
|
ret void
|
|
|
|
}
|
2013-03-08 04:42:17 +08:00
|
|
|
|
|
|
|
define i32 @t8(i32 %x) nounwind {
|
|
|
|
entry:
|
2013-11-22 21:25:07 +08:00
|
|
|
; ARM-LABEL: t8:
|
2013-03-08 04:42:17 +08:00
|
|
|
; ARM-NOT: FastISel missed call: %expval = call i32 @llvm.expect.i32(i32 %x, i32 1)
|
2013-11-22 21:25:07 +08:00
|
|
|
; THUMB-LABEL: t8:
|
2013-03-08 04:42:17 +08:00
|
|
|
; THUMB-NOT: FastISel missed call: %expval = call i32 @llvm.expect.i32(i32 %x, i32 1)
|
|
|
|
%expval = call i32 @llvm.expect.i32(i32 %x, i32 1)
|
|
|
|
ret i32 %expval
|
|
|
|
}
|
|
|
|
|
|
|
|
declare i32 @llvm.expect.i32(i32, i32) nounwind readnone
|