llvm-project/clang/test/OpenMP/tile_codegen_for_dependent.cpp

// Check code generation
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR

// Check same results after serialization round-trip
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
// expected-no-diagnostics

// The loop trip count used by #pragma omp for depends on code generated
// by #pragma omp file. Check that theses PreInits are emitted before
// the code generated by #pragma omp for.

#ifndef HEADER
#define HEADER

// placeholder for loop body code.
extern "C" void body(...) {}


// IR-LABEL: @func(
// IR-NEXT:  [[ENTRY:.*]]:
// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
// IR-NEXT:    %[[DOTOMP_IV:.+]] = alloca i32, align 4
// IR-NEXT:    %[[TMP:.+]] = alloca i32, align 4
// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
// IR-NEXT:    %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
// IR-NEXT:    %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4
// IR-NEXT:    %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4
// IR-NEXT:    %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4
// IR-NEXT:    %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4
// IR-NEXT:    %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4
// IR-NEXT:    %[[DOTFLOOR_0_IV_I:.+]] = alloca i32, align 4
// IR-NEXT:    %[[DOTOMP_LB:.+]] = alloca i32, align 4
// IR-NEXT:    %[[DOTOMP_UB:.+]] = alloca i32, align 4
// IR-NEXT:    %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4
// IR-NEXT:    %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
// IR-NEXT:    %[[DOTFLOOR_0_IV_I12:.+]] = alloca i32, align 4
// IR-NEXT:    %[[DOTTILE_0_IV_I:.+]] = alloca i32, align 4
// IR-NEXT:    %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
// IR-NEXT:    store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
// IR-NEXT:    store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
// IR-NEXT:    store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
// IR-NEXT:    %[[TMP1:.+]] = load i32, i32* %[[START_ADDR]], align 4
// IR-NEXT:    store i32 %[[TMP1]], i32* %[[I]], align 4
// IR-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[START_ADDR]], align 4
// IR-NEXT:    store i32 %[[TMP2]], i32* %[[DOTCAPTURE_EXPR_]], align 4
// IR-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[END_ADDR]], align 4
// IR-NEXT:    store i32 %[[TMP3]], i32* %[[DOTCAPTURE_EXPR_1]], align 4
// IR-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
// IR-NEXT:    store i32 %[[TMP4]], i32* %[[DOTCAPTURE_EXPR_2]], align 4
// IR-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_1]], align 4
// IR-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
// IR-NEXT:    %[[SUB:.+]] = sub i32 %[[TMP5]], %[[TMP6]]
// IR-NEXT:    %[[SUB4:.+]] = sub i32 %[[SUB]], 1
// IR-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
// IR-NEXT:    %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP7]]
// IR-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
// IR-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP8]]
// IR-NEXT:    %[[SUB5:.+]] = sub i32 %[[DIV]], 1
// IR-NEXT:    store i32 %[[SUB5]], i32* %[[DOTCAPTURE_EXPR_3]], align 4
// IR-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
// IR-NEXT:    %[[ADD7:.+]] = add i32 %[[TMP9]], 1
// IR-NEXT:    store i32 %[[ADD7]], i32* %[[DOTCAPTURE_EXPR_6]], align 4
// IR-NEXT:    %[[TMP10:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
// IR-NEXT:    %[[SUB9:.+]] = sub i32 %[[TMP10]], -3
// IR-NEXT:    %[[DIV10:.+]] = udiv i32 %[[SUB9]], 4
// IR-NEXT:    %[[SUB11:.+]] = sub i32 %[[DIV10]], 1
// IR-NEXT:    store i32 %[[SUB11]], i32* %[[DOTCAPTURE_EXPR_8]], align 4
// IR-NEXT:    store i32 0, i32* %[[DOTFLOOR_0_IV_I]], align 4
// IR-NEXT:    %[[TMP11:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
// IR-NEXT:    %[[CMP:.+]] = icmp ult i32 0, %[[TMP11]]
// IR-NEXT:    br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]]
// IR-EMPTY:
// IR-NEXT:  [[OMP_PRECOND_THEN]]:
// IR-NEXT:    store i32 0, i32* %[[DOTOMP_LB]], align 4
// IR-NEXT:    %[[TMP12:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
// IR-NEXT:    store i32 %[[TMP12]], i32* %[[DOTOMP_UB]], align 4
// IR-NEXT:    store i32 1, i32* %[[DOTOMP_STRIDE]], align 4
// IR-NEXT:    store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4
// IR-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[TMP0]], i32 34, i32* %[[DOTOMP_IS_LAST]], i32* %[[DOTOMP_LB]], i32* %[[DOTOMP_UB]], i32* %[[DOTOMP_STRIDE]], i32 1, i32 1)
// IR-NEXT:    %[[TMP13:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
// IR-NEXT:    %[[TMP14:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
// IR-NEXT:    %[[CMP13:.+]] = icmp ugt i32 %[[TMP13]], %[[TMP14]]
// IR-NEXT:    br i1 %[[CMP13]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
// IR-EMPTY:
// IR-NEXT:  [[COND_TRUE]]:
// IR-NEXT:    %[[TMP15:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
// IR-NEXT:    br label %[[COND_END:.+]]
// IR-EMPTY:
// IR-NEXT:  [[COND_FALSE]]:
// IR-NEXT:    %[[TMP16:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
// IR-NEXT:    br label %[[COND_END]]
// IR-EMPTY:
// IR-NEXT:  [[COND_END]]:
// IR-NEXT:    %[[COND:.+]] = phi i32 [ %[[TMP15]], %[[COND_TRUE]] ], [ %[[TMP16]], %[[COND_FALSE]] ]
// IR-NEXT:    store i32 %[[COND]], i32* %[[DOTOMP_UB]], align 4
// IR-NEXT:    %[[TMP17:.+]] = load i32, i32* %[[DOTOMP_LB]], align 4
// IR-NEXT:    store i32 %[[TMP17]], i32* %[[DOTOMP_IV]], align 4
// IR-NEXT:    br label %[[OMP_INNER_FOR_COND:.+]]
// IR-EMPTY:
// IR-NEXT:  [[OMP_INNER_FOR_COND]]:
// IR-NEXT:    %[[TMP18:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
// IR-NEXT:    %[[TMP19:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
// IR-NEXT:    %[[ADD14:.+]] = add i32 %[[TMP19]], 1
// IR-NEXT:    %[[CMP15:.+]] = icmp ult i32 %[[TMP18]], %[[ADD14]]
// IR-NEXT:    br i1 %[[CMP15]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
// IR-EMPTY:
// IR-NEXT:  [[OMP_INNER_FOR_BODY]]:
// IR-NEXT:    %[[TMP20:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
// IR-NEXT:    %[[MUL:.+]] = mul i32 %[[TMP20]], 4
// IR-NEXT:    %[[ADD16:.+]] = add i32 0, %[[MUL]]
// IR-NEXT:    store i32 %[[ADD16]], i32* %[[DOTFLOOR_0_IV_I12]], align 4
// IR-NEXT:    %[[TMP21:.+]] = load i32, i32* %[[DOTFLOOR_0_IV_I12]], align 4
// IR-NEXT:    store i32 %[[TMP21]], i32* %[[DOTTILE_0_IV_I]], align 4
// IR-NEXT:    br label %[[FOR_COND:.+]]
// IR-EMPTY:
// IR-NEXT:  [[FOR_COND]]:
// IR-NEXT:    %[[TMP22:.+]] = load i32, i32* %[[DOTTILE_0_IV_I]], align 4
// IR-NEXT:    %[[TMP23:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
// IR-NEXT:    %[[ADD17:.+]] = add i32 %[[TMP23]], 1
// IR-NEXT:    %[[TMP24:.+]] = load i32, i32* %[[DOTFLOOR_0_IV_I12]], align 4
// IR-NEXT:    %[[ADD18:.+]] = add nsw i32 %[[TMP24]], 4
// IR-NEXT:    %[[CMP19:.+]] = icmp ult i32 %[[ADD17]], %[[ADD18]]
// IR-NEXT:    br i1 %[[CMP19]], label %[[COND_TRUE20:.+]], label %[[COND_FALSE22:.+]]
// IR-EMPTY:
// IR-NEXT:  [[COND_TRUE20]]:
// IR-NEXT:    %[[TMP25:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
// IR-NEXT:    %[[ADD21:.+]] = add i32 %[[TMP25]], 1
// IR-NEXT:    br label %[[COND_END24:.+]]
// IR-EMPTY:
// IR-NEXT:  [[COND_FALSE22]]:
// IR-NEXT:    %[[TMP26:.+]] = load i32, i32* %[[DOTFLOOR_0_IV_I12]], align 4
// IR-NEXT:    %[[ADD23:.+]] = add nsw i32 %[[TMP26]], 4
// IR-NEXT:    br label %[[COND_END24]]
// IR-EMPTY:
// IR-NEXT:  [[COND_END24]]:
// IR-NEXT:    %[[COND25:.+]] = phi i32 [ %[[ADD21]], %[[COND_TRUE20]] ], [ %[[ADD23]], %[[COND_FALSE22]] ]
// IR-NEXT:    %[[CMP26:.+]] = icmp ult i32 %[[TMP22]], %[[COND25]]
// IR-NEXT:    br i1 %[[CMP26]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
// IR-EMPTY:
// IR-NEXT:  [[FOR_BODY]]:
// IR-NEXT:    %[[TMP27:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
// IR-NEXT:    %[[TMP28:.+]] = load i32, i32* %[[DOTTILE_0_IV_I]], align 4
// IR-NEXT:    %[[TMP29:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
// IR-NEXT:    %[[MUL27:.+]] = mul i32 %[[TMP28]], %[[TMP29]]
// IR-NEXT:    %[[ADD28:.+]] = add i32 %[[TMP27]], %[[MUL27]]
// IR-NEXT:    store i32 %[[ADD28]], i32* %[[I]], align 4
// IR-NEXT:    %[[TMP30:.+]] = load i32, i32* %[[START_ADDR]], align 4
// IR-NEXT:    %[[TMP31:.+]] = load i32, i32* %[[END_ADDR]], align 4
// IR-NEXT:    %[[TMP32:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
// IR-NEXT:    %[[TMP33:.+]] = load i32, i32* %[[I]], align 4
// IR-NEXT:    call void (...) @body(i32 %[[TMP30]], i32 %[[TMP31]], i32 %[[TMP32]], i32 %[[TMP33]])
// IR-NEXT:    br label %[[FOR_INC:.+]]
// IR-EMPTY:
// IR-NEXT:  [[FOR_INC]]:
// IR-NEXT:    %[[TMP34:.+]] = load i32, i32* %[[DOTTILE_0_IV_I]], align 4
// IR-NEXT:    %[[INC:.+]] = add nsw i32 %[[TMP34]], 1
// IR-NEXT:    store i32 %[[INC]], i32* %[[DOTTILE_0_IV_I]], align 4
// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
// IR-EMPTY:
// IR-NEXT:  [[FOR_END]]:
// IR-NEXT:    br label %[[OMP_BODY_CONTINUE:.+]]
// IR-EMPTY:
// IR-NEXT:  [[OMP_BODY_CONTINUE]]:
// IR-NEXT:    br label %[[OMP_INNER_FOR_INC:.+]]
// IR-EMPTY:
// IR-NEXT:  [[OMP_INNER_FOR_INC]]:
// IR-NEXT:    %[[TMP35:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
// IR-NEXT:    %[[ADD29:.+]] = add i32 %[[TMP35]], 1
// IR-NEXT:    store i32 %[[ADD29]], i32* %[[DOTOMP_IV]], align 4
// IR-NEXT:    br label %[[OMP_INNER_FOR_COND]]
// IR-EMPTY:
// IR-NEXT:  [[OMP_INNER_FOR_END]]:
// IR-NEXT:    br label %[[OMP_LOOP_EXIT:.+]]
// IR-EMPTY:
// IR-NEXT:  [[OMP_LOOP_EXIT]]:
// IR-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[TMP0]])
// IR-NEXT:    br label %[[OMP_PRECOND_END]]
// IR-EMPTY:
// IR-NEXT:  [[OMP_PRECOND_END]]:
// IR-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @3, i32 %[[TMP0]])
// IR-NEXT:    ret void
// IR-NEXT:  }
extern "C" void func(int start, int end, int step) {
#pragma omp for
#pragma omp tile sizes(4)
  for (int i = start; i < end; i += step)
    body(start, end, step, i);
}

#endif /* HEADER */
[Clang][OpenMP] Emit dependent PreInits before directive. The PreInits of a loop transformation (atm moment only tile) include the computation of the trip count. The trip count is needed by any loop-associated directives that consumes the transformation-generated loop. Hence, we must ensure that the PreInits of consumed loop transformations are emitted with the consuming directive. This is done by addinging the inner loop transformation's PreInits to the outer loop-directive's PreInits. The outer loop-directive will consume the de-sugared AST such that the inner PreInits are not emitted twice. The PreInits of a loop transformation are still emitted directly if its generated loop(s) are not associated with another loop-associated directive. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D102180 2021-06-02 22:12:32 +08:00			`// Check code generation`
			`// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - \| FileCheck %s --check-prefix=IR`

			`// Check same results after serialization round-trip`
			`// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s`
			`// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - \| FileCheck %s --check-prefix=IR`
			`// expected-no-diagnostics`

			`// The loop trip count used by #pragma omp for depends on code generated`
			`// by #pragma omp file. Check that theses PreInits are emitted before`
			`// the code generated by #pragma omp for.`

			`#ifndef HEADER`
			`#define HEADER`

			`// placeholder for loop body code.`
			`extern "C" void body(...) {}`


			`// IR-LABEL: @func(`
			`// IR-NEXT: [[ENTRY:.*]]:`
			`// IR-NEXT: %[[START_ADDR:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[END_ADDR:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[STEP_ADDR:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[DOTOMP_IV:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[TMP:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[I:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[DOTFLOOR_0_IV_I:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[DOTOMP_LB:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[DOTOMP_UB:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[DOTFLOOR_0_IV_I12:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[DOTTILE_0_IV_I:.+]] = alloca i32, align 4`
			`// IR-NEXT: %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)`
			`// IR-NEXT: store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4`
			`// IR-NEXT: store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4`
			`// IR-NEXT: store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4`
			`// IR-NEXT: %[[TMP1:.+]] = load i32, i32* %[[START_ADDR]], align 4`
			`// IR-NEXT: store i32 %[[TMP1]], i32* %[[I]], align 4`
			`// IR-NEXT: %[[TMP2:.+]] = load i32, i32* %[[START_ADDR]], align 4`
			`// IR-NEXT: store i32 %[[TMP2]], i32* %[[DOTCAPTURE_EXPR_]], align 4`
			`// IR-NEXT: %[[TMP3:.+]] = load i32, i32* %[[END_ADDR]], align 4`
			`// IR-NEXT: store i32 %[[TMP3]], i32* %[[DOTCAPTURE_EXPR_1]], align 4`
			`// IR-NEXT: %[[TMP4:.+]] = load i32, i32* %[[STEP_ADDR]], align 4`
			`// IR-NEXT: store i32 %[[TMP4]], i32* %[[DOTCAPTURE_EXPR_2]], align 4`
			`// IR-NEXT: %[[TMP5:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_1]], align 4`
			`// IR-NEXT: %[[TMP6:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4`
			`// IR-NEXT: %[[SUB:.+]] = sub i32 %[[TMP5]], %[[TMP6]]`
			`// IR-NEXT: %[[SUB4:.+]] = sub i32 %[[SUB]], 1`
			`// IR-NEXT: %[[TMP7:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4`
			`// IR-NEXT: %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP7]]`
			`// IR-NEXT: %[[TMP8:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4`
			`// IR-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP8]]`
			`// IR-NEXT: %[[SUB5:.+]] = sub i32 %[[DIV]], 1`
			`// IR-NEXT: store i32 %[[SUB5]], i32* %[[DOTCAPTURE_EXPR_3]], align 4`
			`// IR-NEXT: %[[TMP9:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4`
			`// IR-NEXT: %[[ADD7:.+]] = add i32 %[[TMP9]], 1`
			`// IR-NEXT: store i32 %[[ADD7]], i32* %[[DOTCAPTURE_EXPR_6]], align 4`
			`// IR-NEXT: %[[TMP10:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4`
			`// IR-NEXT: %[[SUB9:.+]] = sub i32 %[[TMP10]], -3`
			`// IR-NEXT: %[[DIV10:.+]] = udiv i32 %[[SUB9]], 4`
			`// IR-NEXT: %[[SUB11:.+]] = sub i32 %[[DIV10]], 1`
			`// IR-NEXT: store i32 %[[SUB11]], i32* %[[DOTCAPTURE_EXPR_8]], align 4`
			`// IR-NEXT: store i32 0, i32* %[[DOTFLOOR_0_IV_I]], align 4`
			`// IR-NEXT: %[[TMP11:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4`
			`// IR-NEXT: %[[CMP:.+]] = icmp ult i32 0, %[[TMP11]]`
			`// IR-NEXT: br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[OMP_PRECOND_THEN]]:`
			`// IR-NEXT: store i32 0, i32* %[[DOTOMP_LB]], align 4`
			`// IR-NEXT: %[[TMP12:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4`
			`// IR-NEXT: store i32 %[[TMP12]], i32* %[[DOTOMP_UB]], align 4`
			`// IR-NEXT: store i32 1, i32* %[[DOTOMP_STRIDE]], align 4`
			`// IR-NEXT: store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4`
			`// IR-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[TMP0]], i32 34, i32* %[[DOTOMP_IS_LAST]], i32* %[[DOTOMP_LB]], i32* %[[DOTOMP_UB]], i32* %[[DOTOMP_STRIDE]], i32 1, i32 1)`
			`// IR-NEXT: %[[TMP13:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4`
			`// IR-NEXT: %[[TMP14:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4`
			`// IR-NEXT: %[[CMP13:.+]] = icmp ugt i32 %[[TMP13]], %[[TMP14]]`
			`// IR-NEXT: br i1 %[[CMP13]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[COND_TRUE]]:`
			`// IR-NEXT: %[[TMP15:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4`
			`// IR-NEXT: br label %[[COND_END:.+]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[COND_FALSE]]:`
			`// IR-NEXT: %[[TMP16:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4`
			`// IR-NEXT: br label %[[COND_END]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[COND_END]]:`
			`// IR-NEXT: %[[COND:.+]] = phi i32 [ %[[TMP15]], %[[COND_TRUE]] ], [ %[[TMP16]], %[[COND_FALSE]] ]`
			`// IR-NEXT: store i32 %[[COND]], i32* %[[DOTOMP_UB]], align 4`
			`// IR-NEXT: %[[TMP17:.+]] = load i32, i32* %[[DOTOMP_LB]], align 4`
			`// IR-NEXT: store i32 %[[TMP17]], i32* %[[DOTOMP_IV]], align 4`
			`// IR-NEXT: br label %[[OMP_INNER_FOR_COND:.+]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[OMP_INNER_FOR_COND]]:`
			`// IR-NEXT: %[[TMP18:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4`
			`// IR-NEXT: %[[TMP19:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4`
			`// IR-NEXT: %[[ADD14:.+]] = add i32 %[[TMP19]], 1`
			`// IR-NEXT: %[[CMP15:.+]] = icmp ult i32 %[[TMP18]], %[[ADD14]]`
			`// IR-NEXT: br i1 %[[CMP15]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[OMP_INNER_FOR_BODY]]:`
			`// IR-NEXT: %[[TMP20:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4`
			`// IR-NEXT: %[[MUL:.+]] = mul i32 %[[TMP20]], 4`
			`// IR-NEXT: %[[ADD16:.+]] = add i32 0, %[[MUL]]`
			`// IR-NEXT: store i32 %[[ADD16]], i32* %[[DOTFLOOR_0_IV_I12]], align 4`
			`// IR-NEXT: %[[TMP21:.+]] = load i32, i32* %[[DOTFLOOR_0_IV_I12]], align 4`
			`// IR-NEXT: store i32 %[[TMP21]], i32* %[[DOTTILE_0_IV_I]], align 4`
			`// IR-NEXT: br label %[[FOR_COND:.+]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[FOR_COND]]:`
			`// IR-NEXT: %[[TMP22:.+]] = load i32, i32* %[[DOTTILE_0_IV_I]], align 4`
			`// IR-NEXT: %[[TMP23:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4`
			`// IR-NEXT: %[[ADD17:.+]] = add i32 %[[TMP23]], 1`
			`// IR-NEXT: %[[TMP24:.+]] = load i32, i32* %[[DOTFLOOR_0_IV_I12]], align 4`
			`// IR-NEXT: %[[ADD18:.+]] = add nsw i32 %[[TMP24]], 4`
			`// IR-NEXT: %[[CMP19:.+]] = icmp ult i32 %[[ADD17]], %[[ADD18]]`
			`// IR-NEXT: br i1 %[[CMP19]], label %[[COND_TRUE20:.+]], label %[[COND_FALSE22:.+]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[COND_TRUE20]]:`
			`// IR-NEXT: %[[TMP25:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4`
			`// IR-NEXT: %[[ADD21:.+]] = add i32 %[[TMP25]], 1`
			`// IR-NEXT: br label %[[COND_END24:.+]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[COND_FALSE22]]:`
			`// IR-NEXT: %[[TMP26:.+]] = load i32, i32* %[[DOTFLOOR_0_IV_I12]], align 4`
			`// IR-NEXT: %[[ADD23:.+]] = add nsw i32 %[[TMP26]], 4`
			`// IR-NEXT: br label %[[COND_END24]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[COND_END24]]:`
			`// IR-NEXT: %[[COND25:.+]] = phi i32 [ %[[ADD21]], %[[COND_TRUE20]] ], [ %[[ADD23]], %[[COND_FALSE22]] ]`
			`// IR-NEXT: %[[CMP26:.+]] = icmp ult i32 %[[TMP22]], %[[COND25]]`
			`// IR-NEXT: br i1 %[[CMP26]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[FOR_BODY]]:`
			`// IR-NEXT: %[[TMP27:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4`
			`// IR-NEXT: %[[TMP28:.+]] = load i32, i32* %[[DOTTILE_0_IV_I]], align 4`
			`// IR-NEXT: %[[TMP29:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4`
			`// IR-NEXT: %[[MUL27:.+]] = mul i32 %[[TMP28]], %[[TMP29]]`
			`// IR-NEXT: %[[ADD28:.+]] = add i32 %[[TMP27]], %[[MUL27]]`
			`// IR-NEXT: store i32 %[[ADD28]], i32* %[[I]], align 4`
			`// IR-NEXT: %[[TMP30:.+]] = load i32, i32* %[[START_ADDR]], align 4`
			`// IR-NEXT: %[[TMP31:.+]] = load i32, i32* %[[END_ADDR]], align 4`
			`// IR-NEXT: %[[TMP32:.+]] = load i32, i32* %[[STEP_ADDR]], align 4`
			`// IR-NEXT: %[[TMP33:.+]] = load i32, i32* %[[I]], align 4`
Revert "[Clang/Test]: Rename enable_noundef_analysis to disable-noundef-analysis and turn it off by default" This reverts commit aacfbb953eb705af2ecfeb95a6262818fa85dd92. Revert "Fix lit test failures in CodeGenCoroutines" This reverts commit 63fff0f5bffe20fa2c84a45a41161afa0043cb34. 2021-11-09 01:09:49 +08:00			`// IR-NEXT: call void (...) @body(i32 %[[TMP30]], i32 %[[TMP31]], i32 %[[TMP32]], i32 %[[TMP33]])`
[Clang][OpenMP] Emit dependent PreInits before directive. The PreInits of a loop transformation (atm moment only tile) include the computation of the trip count. The trip count is needed by any loop-associated directives that consumes the transformation-generated loop. Hence, we must ensure that the PreInits of consumed loop transformations are emitted with the consuming directive. This is done by addinging the inner loop transformation's PreInits to the outer loop-directive's PreInits. The outer loop-directive will consume the de-sugared AST such that the inner PreInits are not emitted twice. The PreInits of a loop transformation are still emitted directly if its generated loop(s) are not associated with another loop-associated directive. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D102180 2021-06-02 22:12:32 +08:00			`// IR-NEXT: br label %[[FOR_INC:.+]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[FOR_INC]]:`
			`// IR-NEXT: %[[TMP34:.+]] = load i32, i32* %[[DOTTILE_0_IV_I]], align 4`
			`// IR-NEXT: %[[INC:.+]] = add nsw i32 %[[TMP34]], 1`
			`// IR-NEXT: store i32 %[[INC]], i32* %[[DOTTILE_0_IV_I]], align 4`
			`// IR-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[FOR_END]]:`
			`// IR-NEXT: br label %[[OMP_BODY_CONTINUE:.+]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[OMP_BODY_CONTINUE]]:`
			`// IR-NEXT: br label %[[OMP_INNER_FOR_INC:.+]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[OMP_INNER_FOR_INC]]:`
			`// IR-NEXT: %[[TMP35:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4`
			`// IR-NEXT: %[[ADD29:.+]] = add i32 %[[TMP35]], 1`
			`// IR-NEXT: store i32 %[[ADD29]], i32* %[[DOTOMP_IV]], align 4`
			`// IR-NEXT: br label %[[OMP_INNER_FOR_COND]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[OMP_INNER_FOR_END]]:`
			`// IR-NEXT: br label %[[OMP_LOOP_EXIT:.+]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[OMP_LOOP_EXIT]]:`
			`// IR-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[TMP0]])`
			`// IR-NEXT: br label %[[OMP_PRECOND_END]]`
			`// IR-EMPTY:`
			`// IR-NEXT: [[OMP_PRECOND_END]]:`
			`// IR-NEXT: call void @__kmpc_barrier(%struct.ident_t* @3, i32 %[[TMP0]])`
			`// IR-NEXT: ret void`
			`// IR-NEXT: }`
			`extern "C" void func(int start, int end, int step) {`
			`#pragma omp for`
			`#pragma omp tile sizes(4)`
			`for (int i = start; i < end; i += step)`
			`body(start, end, step, i);`
			`}`

			`#endif /* HEADER */`