nuudlman/llvm: clang/test/OpenMP/align_clause

// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
// RUN: %clang_cc1 -no-opaque-pointers -emit-llvm -o - -fopenmp \
// RUN: -triple i386-unknown-unknown -fopenmp-version=51 %s | \
// RUN: FileCheck %s --check-prefix=CHECK-32
// RUN: %clang_cc1 -no-opaque-pointers -emit-llvm -o - -fopenmp \
// RUN: -triple x86_64-unknown-linux-gnu -fopenmp-version=51 %s | FileCheck %s
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp \
// RUN: -triple x86_64-unknown-linux-gnu -fopenmp-version=51 \
// RUN: -emit-pch %s -o %t
// RUN: %clang_cc1 -no-opaque-pointers -fopenmp \
// RUN: -triple x86_64-unknown-linux-gnu -fopenmp-version=51 \
// RUN: -include-pch %t -emit-llvm %s -o - | FileCheck %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER

typedef enum omp_allocator_handle_t {
  omp_null_allocator = 0,
  omp_default_mem_alloc = 1,
  omp_large_cap_mem_alloc = 2,
  omp_const_mem_alloc = 3,
  omp_high_bw_mem_alloc = 4,
  omp_low_lat_mem_alloc = 5,
  omp_cgroup_mem_alloc = 6,
  omp_pteam_mem_alloc = 7,
  omp_thread_mem_alloc = 8,
  KMP_ALLOCATOR_MAX_HANDLE = __UINTPTR_MAX__
} omp_allocator_handle_t;

int main() {
  int foo0[5];
  int foo1[10];
  int foo2[20];
  int foo3[30];
  int foo4[40];
  int foo5[50];
  int foo6[60];
  int foo7[70];
  int foo8[80];
  omp_allocator_handle_t MyAlloc = omp_large_cap_mem_alloc;

#pragma omp allocate(foo0) align(1)
#pragma omp allocate(foo1) allocator(omp_pteam_mem_alloc) align(2)
#pragma omp allocate(foo2) align(4) allocator(omp_cgroup_mem_alloc)
#pragma omp allocate(foo3) align(8) allocator(omp_low_lat_mem_alloc)
#pragma omp allocate(foo4) align(16) allocator(omp_high_bw_mem_alloc)
#pragma omp allocate(foo5) align(32) allocator(omp_const_mem_alloc)
#pragma omp allocate(foo6) align(64) allocator(omp_large_cap_mem_alloc)
#pragma omp allocate(foo7) align(32) allocator(omp_thread_mem_alloc)
#pragma omp allocate(foo8) align(16) allocator(omp_null_allocator)
  {
    double foo9[80];
    double foo10[90];
#pragma omp allocate(foo9) align(8) allocator(omp_thread_mem_alloc)
#pragma omp allocate(foo10) align(128)
  }
  {
    int bar1;
    int bar2[10];
    int bar3[20];
    int *bar4;
    float bar5;
    double bar6[30];
#pragma omp allocate(bar1, bar2, bar3) align(2) allocator(MyAlloc)
#pragma omp allocate(bar4, bar5, bar6) align(16)
  }
}

// Verify align clause in template with non-type template parameter.
template <typename T, unsigned size, unsigned align>
T run() {
  T foo[size];
#pragma omp allocate(foo) align(align) allocator(omp_cgroup_mem_alloc)
  return foo[0];
}

int template_test() {
  double result;
  result = run<double, 1000, 16>();
  return 0;
}
#endif
// CHECK-32-LABEL: define {{[^@]+}}@main
// CHECK-32-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-32-NEXT:  entry:
// CHECK-32-NEXT:    [[MYALLOC:%.*]] = alloca i32, align 4
// CHECK-32-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK-32-NEXT:    [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 20, i8* null)
// CHECK-32-NEXT:    [[DOTFOO0__ADDR:%.*]] = bitcast i8* [[DOTFOO0__VOID_ADDR]] to [5 x i32]*
// CHECK-32-NEXT:    [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 40, i8* inttoptr (i32 7 to i8*))
// CHECK-32-NEXT:    [[DOTFOO1__ADDR:%.*]] = bitcast i8* [[DOTFOO1__VOID_ADDR]] to [10 x i32]*
// CHECK-32-NEXT:    [[DOTFOO2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 80, i8* inttoptr (i32 6 to i8*))
// CHECK-32-NEXT:    [[DOTFOO2__ADDR:%.*]] = bitcast i8* [[DOTFOO2__VOID_ADDR]] to [20 x i32]*
// CHECK-32-NEXT:    [[DOTFOO3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 8, i32 120, i8* inttoptr (i32 5 to i8*))
// CHECK-32-NEXT:    [[DOTFOO3__ADDR:%.*]] = bitcast i8* [[DOTFOO3__VOID_ADDR]] to [30 x i32]*
// CHECK-32-NEXT:    [[DOTFOO4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 160, i8* inttoptr (i32 4 to i8*))
// CHECK-32-NEXT:    [[DOTFOO4__ADDR:%.*]] = bitcast i8* [[DOTFOO4__VOID_ADDR]] to [40 x i32]*
// CHECK-32-NEXT:    [[DOTFOO5__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 32, i32 200, i8* inttoptr (i32 3 to i8*))
// CHECK-32-NEXT:    [[DOTFOO5__ADDR:%.*]] = bitcast i8* [[DOTFOO5__VOID_ADDR]] to [50 x i32]*
// CHECK-32-NEXT:    [[DOTFOO6__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 64, i32 240, i8* inttoptr (i32 2 to i8*))
// CHECK-32-NEXT:    [[DOTFOO6__ADDR:%.*]] = bitcast i8* [[DOTFOO6__VOID_ADDR]] to [60 x i32]*
// CHECK-32-NEXT:    [[DOTFOO7__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 32, i32 280, i8* inttoptr (i32 8 to i8*))
// CHECK-32-NEXT:    [[DOTFOO7__ADDR:%.*]] = bitcast i8* [[DOTFOO7__VOID_ADDR]] to [70 x i32]*
// CHECK-32-NEXT:    [[DOTFOO8__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 320, i8* null)
// CHECK-32-NEXT:    [[DOTFOO8__ADDR:%.*]] = bitcast i8* [[DOTFOO8__VOID_ADDR]] to [80 x i32]*
// CHECK-32-NEXT:    store i32 2, i32* [[MYALLOC]], align 4
// CHECK-32-NEXT:    [[DOTFOO9__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 8, i32 640, i8* inttoptr (i32 8 to i8*))
// CHECK-32-NEXT:    [[DOTFOO9__ADDR:%.*]] = bitcast i8* [[DOTFOO9__VOID_ADDR]] to [80 x double]*
// CHECK-32-NEXT:    [[DOTFOO10__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 128, i32 720, i8* null)
// CHECK-32-NEXT:    [[DOTFOO10__ADDR:%.*]] = bitcast i8* [[DOTFOO10__VOID_ADDR]] to [90 x double]*
// CHECK-32-NEXT:    [[TMP1:%.*]] = bitcast [90 x double]* [[DOTFOO10__ADDR]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP1]], i8* null)
// CHECK-32-NEXT:    [[TMP2:%.*]] = bitcast [80 x double]* [[DOTFOO9__ADDR]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i32 8 to i8*))
// CHECK-32-NEXT:    [[TMP3:%.*]] = load i32, i32* [[MYALLOC]], align 4
// CHECK-32-NEXT:    [[CONV:%.*]] = inttoptr i32 [[TMP3]] to i8*
// CHECK-32-NEXT:    [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 4, i8* [[CONV]])
// CHECK-32-NEXT:    [[DOTBAR1__ADDR:%.*]] = bitcast i8* [[DOTBAR1__VOID_ADDR]] to i32*
// CHECK-32-NEXT:    [[TMP4:%.*]] = load i32, i32* [[MYALLOC]], align 4
// CHECK-32-NEXT:    [[CONV1:%.*]] = inttoptr i32 [[TMP4]] to i8*
// CHECK-32-NEXT:    [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 40, i8* [[CONV1]])
// CHECK-32-NEXT:    [[DOTBAR2__ADDR:%.*]] = bitcast i8* [[DOTBAR2__VOID_ADDR]] to [10 x i32]*
// CHECK-32-NEXT:    [[TMP5:%.*]] = load i32, i32* [[MYALLOC]], align 4
// CHECK-32-NEXT:    [[CONV2:%.*]] = inttoptr i32 [[TMP5]] to i8*
// CHECK-32-NEXT:    [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 80, i8* [[CONV2]])
// CHECK-32-NEXT:    [[DOTBAR3__ADDR:%.*]] = bitcast i8* [[DOTBAR3__VOID_ADDR]] to [20 x i32]*
// CHECK-32-NEXT:    [[DOTBAR4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 4, i8* null)
// CHECK-32-NEXT:    [[DOTBAR4__ADDR:%.*]] = bitcast i8* [[DOTBAR4__VOID_ADDR]] to i32**
// CHECK-32-NEXT:    [[DOTBAR5__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 4, i8* null)
// CHECK-32-NEXT:    [[DOTBAR5__ADDR:%.*]] = bitcast i8* [[DOTBAR5__VOID_ADDR]] to float*
// CHECK-32-NEXT:    [[DOTBAR6__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 240, i8* null)
// CHECK-32-NEXT:    [[DOTBAR6__ADDR:%.*]] = bitcast i8* [[DOTBAR6__VOID_ADDR]] to [30 x double]*
// CHECK-32-NEXT:    [[TMP6:%.*]] = bitcast [30 x double]* [[DOTBAR6__ADDR]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP6]], i8* null)
// CHECK-32-NEXT:    [[TMP7:%.*]] = bitcast float* [[DOTBAR5__ADDR]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP7]], i8* null)
// CHECK-32-NEXT:    [[TMP8:%.*]] = bitcast i32** [[DOTBAR4__ADDR]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP8]], i8* null)
// CHECK-32-NEXT:    [[TMP9:%.*]] = bitcast [20 x i32]* [[DOTBAR3__ADDR]] to i8*
// CHECK-32-NEXT:    [[TMP10:%.*]] = load i32, i32* [[MYALLOC]], align 4
// CHECK-32-NEXT:    [[CONV3:%.*]] = inttoptr i32 [[TMP10]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP9]], i8* [[CONV3]])
// CHECK-32-NEXT:    [[TMP11:%.*]] = bitcast [10 x i32]* [[DOTBAR2__ADDR]] to i8*
// CHECK-32-NEXT:    [[TMP12:%.*]] = load i32, i32* [[MYALLOC]], align 4
// CHECK-32-NEXT:    [[CONV4:%.*]] = inttoptr i32 [[TMP12]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP11]], i8* [[CONV4]])
// CHECK-32-NEXT:    [[TMP13:%.*]] = bitcast i32* [[DOTBAR1__ADDR]] to i8*
// CHECK-32-NEXT:    [[TMP14:%.*]] = load i32, i32* [[MYALLOC]], align 4
// CHECK-32-NEXT:    [[CONV5:%.*]] = inttoptr i32 [[TMP14]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP13]], i8* [[CONV5]])
// CHECK-32-NEXT:    [[TMP15:%.*]] = bitcast [80 x i32]* [[DOTFOO8__ADDR]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP15]], i8* null)
// CHECK-32-NEXT:    [[TMP16:%.*]] = bitcast [70 x i32]* [[DOTFOO7__ADDR]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP16]], i8* inttoptr (i32 8 to i8*))
// CHECK-32-NEXT:    [[TMP17:%.*]] = bitcast [60 x i32]* [[DOTFOO6__ADDR]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP17]], i8* inttoptr (i32 2 to i8*))
// CHECK-32-NEXT:    [[TMP18:%.*]] = bitcast [50 x i32]* [[DOTFOO5__ADDR]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP18]], i8* inttoptr (i32 3 to i8*))
// CHECK-32-NEXT:    [[TMP19:%.*]] = bitcast [40 x i32]* [[DOTFOO4__ADDR]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP19]], i8* inttoptr (i32 4 to i8*))
// CHECK-32-NEXT:    [[TMP20:%.*]] = bitcast [30 x i32]* [[DOTFOO3__ADDR]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP20]], i8* inttoptr (i32 5 to i8*))
// CHECK-32-NEXT:    [[TMP21:%.*]] = bitcast [20 x i32]* [[DOTFOO2__ADDR]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP21]], i8* inttoptr (i32 6 to i8*))
// CHECK-32-NEXT:    [[TMP22:%.*]] = bitcast [10 x i32]* [[DOTFOO1__ADDR]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP22]], i8* inttoptr (i32 7 to i8*))
// CHECK-32-NEXT:    [[TMP23:%.*]] = bitcast [5 x i32]* [[DOTFOO0__ADDR]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP23]], i8* null)
// CHECK-32-NEXT:    ret i32 0
//
//
// CHECK-32-LABEL: define {{[^@]+}}@_Z13template_testv
// CHECK-32-SAME: () #[[ATTR2:[0-9]+]] {
// CHECK-32-NEXT:  entry:
// CHECK-32-NEXT:    [[RESULT:%.*]] = alloca double, align 8
// CHECK-32-NEXT:    [[CALL:%.*]] = call noundef double @_Z3runIdLj1000ELj16EET_v()
// CHECK-32-NEXT:    store double [[CALL]], double* [[RESULT]], align 8
// CHECK-32-NEXT:    ret i32 0
//
//
// CHECK-32-LABEL: define {{[^@]+}}@_Z3runIdLj1000ELj16EET_v
// CHECK-32-SAME: () #[[ATTR2]] comdat {
// CHECK-32-NEXT:  entry:
// CHECK-32-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK-32-NEXT:    [[DOTFOO__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 8000, i8* inttoptr (i32 6 to i8*))
// CHECK-32-NEXT:    [[DOTFOO__ADDR:%.*]] = bitcast i8* [[DOTFOO__VOID_ADDR]] to [1000 x double]*
// CHECK-32-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], [1000 x double]* [[DOTFOO__ADDR]], i32 0, i32 0
// CHECK-32-NEXT:    [[TMP1:%.*]] = load double, double* [[ARRAYIDX]], align 8
// CHECK-32-NEXT:    [[TMP2:%.*]] = bitcast [1000 x double]* [[DOTFOO__ADDR]] to i8*
// CHECK-32-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i32 6 to i8*))
// CHECK-32-NEXT:    ret double [[TMP1]]
//
//
// CHECK-LABEL: define {{[^@]+}}@main
// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[MYALLOC:%.*]] = alloca i64, align 8
// CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
// CHECK-NEXT:    [[DOTFOO0__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 32, i8* null)
// CHECK-NEXT:    [[DOTFOO0__ADDR:%.*]] = bitcast i8* [[DOTFOO0__VOID_ADDR]] to [5 x i32]*
// CHECK-NEXT:    [[DOTFOO1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 48, i8* inttoptr (i64 7 to i8*))
// CHECK-NEXT:    [[DOTFOO1__ADDR:%.*]] = bitcast i8* [[DOTFOO1__VOID_ADDR]] to [10 x i32]*
// CHECK-NEXT:    [[DOTFOO2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 80, i8* inttoptr (i64 6 to i8*))
// CHECK-NEXT:    [[DOTFOO2__ADDR:%.*]] = bitcast i8* [[DOTFOO2__VOID_ADDR]] to [20 x i32]*
// CHECK-NEXT:    [[DOTFOO3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 8, i64 128, i8* inttoptr (i64 5 to i8*))
// CHECK-NEXT:    [[DOTFOO3__ADDR:%.*]] = bitcast i8* [[DOTFOO3__VOID_ADDR]] to [30 x i32]*
// CHECK-NEXT:    [[DOTFOO4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 160, i8* inttoptr (i64 4 to i8*))
// CHECK-NEXT:    [[DOTFOO4__ADDR:%.*]] = bitcast i8* [[DOTFOO4__VOID_ADDR]] to [40 x i32]*
// CHECK-NEXT:    [[DOTFOO5__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 32, i64 208, i8* inttoptr (i64 3 to i8*))
// CHECK-NEXT:    [[DOTFOO5__ADDR:%.*]] = bitcast i8* [[DOTFOO5__VOID_ADDR]] to [50 x i32]*
// CHECK-NEXT:    [[DOTFOO6__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 64, i64 240, i8* inttoptr (i64 2 to i8*))
// CHECK-NEXT:    [[DOTFOO6__ADDR:%.*]] = bitcast i8* [[DOTFOO6__VOID_ADDR]] to [60 x i32]*
// CHECK-NEXT:    [[DOTFOO7__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 32, i64 288, i8* inttoptr (i64 8 to i8*))
// CHECK-NEXT:    [[DOTFOO7__ADDR:%.*]] = bitcast i8* [[DOTFOO7__VOID_ADDR]] to [70 x i32]*
// CHECK-NEXT:    [[DOTFOO8__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 320, i8* null)
// CHECK-NEXT:    [[DOTFOO8__ADDR:%.*]] = bitcast i8* [[DOTFOO8__VOID_ADDR]] to [80 x i32]*
// CHECK-NEXT:    store i64 2, i64* [[MYALLOC]], align 8
// CHECK-NEXT:    [[DOTFOO9__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 8, i64 640, i8* inttoptr (i64 8 to i8*))
// CHECK-NEXT:    [[DOTFOO9__ADDR:%.*]] = bitcast i8* [[DOTFOO9__VOID_ADDR]] to [80 x double]*
// CHECK-NEXT:    [[DOTFOO10__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 128, i64 720, i8* null)
// CHECK-NEXT:    [[DOTFOO10__ADDR:%.*]] = bitcast i8* [[DOTFOO10__VOID_ADDR]] to [90 x double]*
// CHECK-NEXT:    [[TMP1:%.*]] = bitcast [90 x double]* [[DOTFOO10__ADDR]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP1]], i8* null)
// CHECK-NEXT:    [[TMP2:%.*]] = bitcast [80 x double]* [[DOTFOO9__ADDR]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i64 8 to i8*))
// CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[MYALLOC]], align 8
// CHECK-NEXT:    [[CONV:%.*]] = inttoptr i64 [[TMP3]] to i8*
// CHECK-NEXT:    [[DOTBAR1__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 4, i8* [[CONV]])
// CHECK-NEXT:    [[DOTBAR1__ADDR:%.*]] = bitcast i8* [[DOTBAR1__VOID_ADDR]] to i32*
// CHECK-NEXT:    [[TMP4:%.*]] = load i64, i64* [[MYALLOC]], align 8
// CHECK-NEXT:    [[CONV1:%.*]] = inttoptr i64 [[TMP4]] to i8*
// CHECK-NEXT:    [[DOTBAR2__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 48, i8* [[CONV1]])
// CHECK-NEXT:    [[DOTBAR2__ADDR:%.*]] = bitcast i8* [[DOTBAR2__VOID_ADDR]] to [10 x i32]*
// CHECK-NEXT:    [[TMP5:%.*]] = load i64, i64* [[MYALLOC]], align 8
// CHECK-NEXT:    [[CONV2:%.*]] = inttoptr i64 [[TMP5]] to i8*
// CHECK-NEXT:    [[DOTBAR3__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 80, i8* [[CONV2]])
// CHECK-NEXT:    [[DOTBAR3__ADDR:%.*]] = bitcast i8* [[DOTBAR3__VOID_ADDR]] to [20 x i32]*
// CHECK-NEXT:    [[DOTBAR4__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 8, i8* null)
// CHECK-NEXT:    [[DOTBAR4__ADDR:%.*]] = bitcast i8* [[DOTBAR4__VOID_ADDR]] to i32**
// CHECK-NEXT:    [[DOTBAR5__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 4, i8* null)
// CHECK-NEXT:    [[DOTBAR5__ADDR:%.*]] = bitcast i8* [[DOTBAR5__VOID_ADDR]] to float*
// CHECK-NEXT:    [[DOTBAR6__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 240, i8* null)
// CHECK-NEXT:    [[DOTBAR6__ADDR:%.*]] = bitcast i8* [[DOTBAR6__VOID_ADDR]] to [30 x double]*
// CHECK-NEXT:    [[TMP6:%.*]] = bitcast [30 x double]* [[DOTBAR6__ADDR]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP6]], i8* null)
// CHECK-NEXT:    [[TMP7:%.*]] = bitcast float* [[DOTBAR5__ADDR]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP7]], i8* null)
// CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32** [[DOTBAR4__ADDR]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP8]], i8* null)
// CHECK-NEXT:    [[TMP9:%.*]] = bitcast [20 x i32]* [[DOTBAR3__ADDR]] to i8*
// CHECK-NEXT:    [[TMP10:%.*]] = load i64, i64* [[MYALLOC]], align 8
// CHECK-NEXT:    [[CONV3:%.*]] = inttoptr i64 [[TMP10]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP9]], i8* [[CONV3]])
// CHECK-NEXT:    [[TMP11:%.*]] = bitcast [10 x i32]* [[DOTBAR2__ADDR]] to i8*
// CHECK-NEXT:    [[TMP12:%.*]] = load i64, i64* [[MYALLOC]], align 8
// CHECK-NEXT:    [[CONV4:%.*]] = inttoptr i64 [[TMP12]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP11]], i8* [[CONV4]])
// CHECK-NEXT:    [[TMP13:%.*]] = bitcast i32* [[DOTBAR1__ADDR]] to i8*
// CHECK-NEXT:    [[TMP14:%.*]] = load i64, i64* [[MYALLOC]], align 8
// CHECK-NEXT:    [[CONV5:%.*]] = inttoptr i64 [[TMP14]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP13]], i8* [[CONV5]])
// CHECK-NEXT:    [[TMP15:%.*]] = bitcast [80 x i32]* [[DOTFOO8__ADDR]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP15]], i8* null)
// CHECK-NEXT:    [[TMP16:%.*]] = bitcast [70 x i32]* [[DOTFOO7__ADDR]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP16]], i8* inttoptr (i64 8 to i8*))
// CHECK-NEXT:    [[TMP17:%.*]] = bitcast [60 x i32]* [[DOTFOO6__ADDR]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP17]], i8* inttoptr (i64 2 to i8*))
// CHECK-NEXT:    [[TMP18:%.*]] = bitcast [50 x i32]* [[DOTFOO5__ADDR]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP18]], i8* inttoptr (i64 3 to i8*))
// CHECK-NEXT:    [[TMP19:%.*]] = bitcast [40 x i32]* [[DOTFOO4__ADDR]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP19]], i8* inttoptr (i64 4 to i8*))
// CHECK-NEXT:    [[TMP20:%.*]] = bitcast [30 x i32]* [[DOTFOO3__ADDR]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP20]], i8* inttoptr (i64 5 to i8*))
// CHECK-NEXT:    [[TMP21:%.*]] = bitcast [20 x i32]* [[DOTFOO2__ADDR]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP21]], i8* inttoptr (i64 6 to i8*))
// CHECK-NEXT:    [[TMP22:%.*]] = bitcast [10 x i32]* [[DOTFOO1__ADDR]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP22]], i8* inttoptr (i64 7 to i8*))
// CHECK-NEXT:    [[TMP23:%.*]] = bitcast [5 x i32]* [[DOTFOO0__ADDR]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP23]], i8* null)
// CHECK-NEXT:    ret i32 0
//
//
// CHECK-LABEL: define {{[^@]+}}@_Z13template_testv
// CHECK-SAME: () #[[ATTR2:[0-9]+]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[RESULT:%.*]] = alloca double, align 8
// CHECK-NEXT:    [[CALL:%.*]] = call noundef double @_Z3runIdLj1000ELj16EET_v()
// CHECK-NEXT:    store double [[CALL]], double* [[RESULT]], align 8
// CHECK-NEXT:    ret i32 0
//
//
// CHECK-LABEL: define {{[^@]+}}@_Z3runIdLj1000ELj16EET_v
// CHECK-SAME: () #[[ATTR2]] comdat {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK-NEXT:    [[DOTFOO__VOID_ADDR:%.*]] = call i8* @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 8000, i8* inttoptr (i64 6 to i8*))
// CHECK-NEXT:    [[DOTFOO__ADDR:%.*]] = bitcast i8* [[DOTFOO__VOID_ADDR]] to [1000 x double]*
// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x double], [1000 x double]* [[DOTFOO__ADDR]], i64 0, i64 0
// CHECK-NEXT:    [[TMP1:%.*]] = load double, double* [[ARRAYIDX]], align 16
// CHECK-NEXT:    [[TMP2:%.*]] = bitcast [1000 x double]* [[DOTFOO__ADDR]] to i8*
// CHECK-NEXT:    call void @__kmpc_free(i32 [[TMP0]], i8* [[TMP2]], i8* inttoptr (i64 6 to i8*))
// CHECK-NEXT:    ret double [[TMP1]]
//