forked from OSchip/llvm-project
[OPENMP] Fix mixture of omp and clang pragmas
Fixes PR45753 When a program that contains a loop to which both `omp parallel for` pragma and `clang loop` pragma are associated is compiled with the -fopenmp option, `clang loop` pragma did not take effect. The example below should not be vectorized by the `clang loop` pragma but it was actually vectorized. The cause is that `llvm.loop.vectorize.width` was not output to the IR when -fopenmp is specified. The fix attaches attributes if they exist for the loop. [example.c] ``` int a[100], b[100]; void foo() { #pragma omp parallel for #pragma clang loop vectorize(disable) for (int i = 0; i < 100; i++) a[i] += b[i] * i; } ``` [compile] ``` $ clang -O2 -fopenmp example.c -c -Rpass=vect example.c:3:11: remark: vectorized loop (vectorization width: 4, interleaved count: 2) [-Rpass=loop-vectorize] #pragma omp parallel for ^ ``` [IR with -fopenmp] ``` $ clang -O2 exmaple.c -S -emit-llvm -mllvm -disable-llvm-optzns -o - -fopenmp | grep 'vectorize\.width' ``` [IR with -fno-openmp] ``` $ clang -O2 example.c -S -emit-llvm -mllvm -disable-llvm-optzns -o - -fno-openmp | grep 'vectorize\.width' !7 = !{!"llvm.loop.vectorize.width", i32 1} ``` Differential Revision: https://reviews.llvm.org/D79921
This commit is contained in:
parent
9b7fba1421
commit
ac2c5af67f
|
@ -1730,8 +1730,19 @@ void CodeGenFunction::EmitOMPInnerLoop(
|
|||
auto CondBlock = createBasicBlock("omp.inner.for.cond");
|
||||
EmitBlock(CondBlock);
|
||||
const SourceRange R = S.getSourceRange();
|
||||
LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
|
||||
SourceLocToDebugLoc(R.getEnd()));
|
||||
|
||||
// If attributes are attached, push to the basic block with them.
|
||||
const auto &OMPED = cast<OMPExecutableDirective>(S);
|
||||
const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
|
||||
const Stmt *SS = ICS->getCapturedStmt();
|
||||
const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
|
||||
if (AS)
|
||||
LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
|
||||
AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
|
||||
SourceLocToDebugLoc(R.getEnd()));
|
||||
else
|
||||
LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
|
||||
SourceLocToDebugLoc(R.getEnd()));
|
||||
|
||||
// If there are any cleanups between here and the loop-exit scope,
|
||||
// create a block to stage a loop exit along.
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
// RUN: %clang_cc1 -verify -fopenmp -x c -emit-llvm %s -triple x86_64-unknown-linux -o - -femit-all-decls -disable-llvm-passes | FileCheck %s
|
||||
// RUN: %clang_cc1 -verify -x c -emit-llvm %s -triple x86_64-unknown-linux -o - -femit-all-decls -disable-llvm-passes | FileCheck %s
|
||||
// expected-no-diagnostics
|
||||
|
||||
// CHECK: !{{[0-9]+}} = !{!"llvm.loop.vectorize.width", i32 1}
|
||||
void sub(double *restrict a, double *restrict b, int n) {
|
||||
int i;
|
||||
|
||||
#pragma omp parallel for
|
||||
#pragma clang loop vectorize(disable)
|
||||
for (i = 0; i < n; i++) {
|
||||
a[i] = a[i] + b[i];
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue