// RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL -fdeclare-opencl-builtins -DNO_HEADER
// RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL -fdeclare-opencl-builtins -finclude-default-header
// RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL1.2 -fdeclare-opencl-builtins -DNO_HEADER -cl-ext=-cl_intel_subgroups
// RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL1.2 -fdeclare-opencl-builtins -finclude-default-header -cl-ext=-cl_intel_subgroups
// RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL2.0 -fdeclare-opencl-builtins -DNO_HEADER
// RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL2.0 -fdeclare-opencl-builtins -finclude-default-header
// RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL3.0 -fdeclare-opencl-builtins -finclude-default-header
// RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CLC++ -fdeclare-opencl-builtins -DNO_HEADER
// RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CLC++ -fdeclare-opencl-builtins -finclude-default-header
// RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CLC++2021 -fdeclare-opencl-builtins -finclude-default-header
// RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL2.0 -fdeclare-opencl-builtins -finclude-default-header -cl-ext=-cl_khr_fp64 -DNO_FP64
// RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL3.0 -fdeclare-opencl-builtins -finclude-default-header -DNO_ATOMSCOPE
// Test the -fdeclare-opencl-builtins option. This is not a completeness
// test, so it should not test for all builtins defined by OpenCL. Instead
// this test should cover different functional aspects of the TableGen builtin
// function machinery.
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#if __OPENCL_C_VERSION__ < CL_VERSION_1_2
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#endif
#if __OPENCL_C_VERSION__ <= CL_VERSION_1_2
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
#endif
// First, test that Clang gracefully handles missing types.
#ifdef NO_HEADER
void test_without_header(void) {
barrier(0) // expected-note@-1 0+{{candidate function not viable}}
// expected-error@-2 0+{{argument type 'void' is incomplete}}
// expected-error@-3 0+{{no matching function for call to 'barrier'}}
// expected-error@* {{typedef type cl_mem_fence_flags not found}
#endif
// Provide typedefs when invoking clang without -finclude-default-header.
#ifdef NO_HEADER
typedef unsigned char uchartypedef unsigned int uinttypedef unsigned long ulongtypedef unsigned short ushorttypedef __SIZE_TYPE__ size_ttypedef __PTRDIFF_TYPE__ ptrdiff_ttypedef __INTPTR_TYPE__ intptr_ttypedef __UINTPTR_TYPE__ uintptr_ttypedef char char2 __attribute__((ext_vector_type(2)))typedef char char4 __attribute__((ext_vector_type(4)))typedef uchar uchar4 __attribute__((ext_vector_type(4)))typedef uchar uchar16 __attribute__((ext_vector_type(16)))typedef float float4 __attribute__((ext_vector_type(4)))typedef float float16 __attribute__((ext_vector_type(16)))typedef half half4 __attribute__((ext_vector_type(4)))typedef int int2 __attribute__((ext_vector_type(2)))typedef int int4 __attribute__((ext_vector_type(4)))typedef uint uint2 __attribute__((ext_vector_type(2)))typedef uint uint4 __attribute__((ext_vector_type(4)))typedef long long2 __attribute__((ext_vector_type(2)))typedef long long8 __attribute__((ext_vector_type(8)))typedef ulong ulong4 __attribute__((ext_vector_type(4)))typedef short short16 __attribute__((ext_vector_type(16)))typedef ushort ushort3 __attribute__((ext_vector_type(3)))
typedef int clk_profiling_info#define CLK_PROFILING_COMMAND_EXEC_TIME 0x1
typedef uint cl_mem_fence_flags#define CLK_GLOBAL_MEM_FENCE 0x02
typedef struct {int a
// Enable extensions that are enabled in opencl-c-base.h.
#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
#define __opencl_c_device_enqueue 1
#define __opencl_c_generic_address_space 1
#define cl_khr_subgroup_extended_types 1
#define cl_khr_subgroup_ballot 1
#define cl_khr_subgroup_non_uniform_arithmetic 1
#define cl_khr_subgroup_clustered_reduce 1
#define __opencl_c_read_write_images 1
#define __opencl_subgroup_builtins 1
#endif
#if (__OPENCL_CPP_VERSION__ == 100 || __OPENCL_C_VERSION__ == 200)
#define __opencl_c_atomic_order_seq_cst 1
#define __opencl_c_atomic_scope_device 1
#endif
#define __opencl_c_named_address_space_builtins 1
#endif
kernel void test_pointers(volatile global void *global_p, global const int4 *a) {
int i unsigned int ui
prefetch(a, 2)
atom_add((volatile __global int *)global_p, i) atom_cmpxchg((volatile __global unsigned int *)global_p, ui, ui)}
// Only test enum arguments when the base header is included, because we need
// the enum declarations.
#if !defined(NO_HEADER) && (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
kernel void test_enum_args(volatile global atomic_int *global_p, global int *expected) {
int desired atomic_work_item_fence(CLK_GLOBAL_MEM_FENCE, memory_order_acq_rel, memory_scope_device) atomic_compare_exchange_strong_explicit(global_p, expected, desired,
memory_order_acq_rel,
memory_order_relaxed,
memory_scope_work_group)}
#endif
#if defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200
void test_typedef_args(clk_event_t evt, volatile atomic_flag *flg, global unsigned long long *values) {
capture_event_profiling_info(evt, CLK_PROFILING_COMMAND_EXEC_TIME, values)
atomic_flag_clear(flg) bool result = atomic_flag_test_and_set(flg)
size_t ws[2] = {2, 8} ndrange_t r = ndrange_2D(ws)}
// Check that atomic_fetch_ functions can be called with (u)intptr_t arguments,
// despite OpenCLBuiltins.td not providing explicit overloads for those types.
void test_atomic_fetch(volatile __generic atomic_int *a_int,
volatile __generic atomic_intptr_t *a_intptr,
volatile __generic atomic_uintptr_t *a_uintptr) {
int i intptr_t ip uintptr_t uip ptrdiff_t ptrdiff
i = atomic_fetch_add(a_int, i) ip = atomic_fetch_add(a_intptr, ptrdiff) uip = atomic_fetch_add(a_uintptr, ptrdiff)
ip = atomic_fetch_or(a_intptr, ip) uip = atomic_fetch_or(a_uintptr, uip)}
#endif
#if !defined(NO_HEADER) && !defined(NO_FP64) && __OPENCL_C_VERSION__ >= 200
// Check added atomic_fetch_ functions by cl_ext_float_atomics
// extension can be called
void test_atomic_fetch_with_address_space(volatile __generic atomic_float *a_float,
volatile __generic atomic_double *a_double,
volatile __local atomic_float *a_float_local,
volatile __local atomic_double *a_double_local,
volatile __global atomic_float *a_float_global,
volatile __global atomic_double *a_double_global) {
float f1, resf1 double d1, resd1 resf1 = atomic_fetch_min(a_float, f1) resf1 = atomic_fetch_max_explicit(a_float_local, f1, memory_order_seq_cst) resf1 = atomic_fetch_add_explicit(a_float_global, f1, memory_order_seq_cst, memory_scope_work_group)
resd1 = atomic_fetch_min(a_double, d1) resd1 = atomic_fetch_max_explicit(a_double_local, d1, memory_order_seq_cst) resd1 = atomic_fetch_add_explicit(a_double_global, d1, memory_order_seq_cst, memory_scope_work_group)}
#endif // !defined(NO_HEADER) && __OPENCL_C_VERSION__ >= 200
#if !defined(NO_HEADER) && __OPENCL_C_VERSION__ == 200 && defined(__opencl_c_generic_address_space)
// Test that overloads that use atomic_double are not available when the fp64
// extension is disabled. Test this by counting the number of notes about
// candidate functions.
void test_atomic_double_reporting(volatile __generic atomic_int *a) {
atomic_init(a, a) // expected-error@-1{{no matching function for call to 'atomic_init'}}
#if defined(NO_FP64)
// Expecting 5 candidates: int, uint, long, ulong, float
// expected-note@-4 5 {{candidate function not viable: no known conversion}}
#else
// Expecting 6 candidates: int, uint, long, ulong, float, double
// expected-note@-7 6 {{candidate function not viable: no known conversion}}
#endif
}
#endif
#if defined(NO_ATOMSCOPE) && __OPENCL_C_VERSION__ >= 300
// Disable the feature by undefining the feature macro.
#undef __opencl_c_atomic_scope_device
// Test that only the overload with explicit order and scope arguments is
// available when the __opencl_c_atomic_scope_device feature is disabled.
void test_atomics_without_scope_device(volatile __generic atomic_int *a_int) {
int d
atomic_exchange(a_int, d) // expected-error@-1{{use of undeclared identifier 'atomic_exchange'}}
atomic_exchange_explicit(a_int, d, memory_order_seq_cst) // expected-error@-1{{no matching function for call to 'atomic_exchange_explicit'}}
atomic_exchange_explicit(a_int, d, memory_order_seq_cst, memory_scope_work_group)}
#endif
// Test old atomic overloaded with generic address space in C++ for OpenCL.
#if __OPENCL_C_VERSION__ >= 200
void test_legacy_atomics_cpp(__generic volatile unsigned int *a) {
atomic_add(a, 1)#if !defined(__cplusplus)
// expected-error@-2{{no matching function for call to 'atomic_add'}}
// expected-note@-3 4 {{candidate function not viable}}
#endif
}
#endif
kernel void basic_conversion(void) {
float f char2 c2 long2 l2 float4 f4 int4 i4
#ifdef NO_FP64
(void)convert_double_rtp(f) // expected-error@-1{{use of undeclared identifier 'convert_double_rtp'}}
#else
double d f = convert_float(d)#endif
l2 = convert_long2_rtz(c2) i4 = convert_int4_sat(f4)}
kernel void basic_conversion_neg(void) {
int i float f
f = convert_float_sat(i)#if !defined(__OPENCL_CPP_VERSION__)
// expected-error@-2{{use of undeclared identifier 'convert_float_sat'}}
#else
// expected-error@-4{{use of undeclared identifier 'convert_float_sat' // expected-note@-5{{'convert_float' declared here}}
#endif
}
char4 test_int(char c, char4 c4) {
char m = max(c, c) char4 m4 = max(c4, c4) uchar4 abs1 = abs(c4) uchar4 abs2 = abs(abs1) return max(c4, c)}
kernel void basic_vector_misc(float4 a) {
float4 res uint4 mask = (uint4)(1, 2, 3, 4)
res = shuffle(a, mask)}
kernel void basic_image_readonly(read_only image2d_t image_read_only_image2d) {
int2 i2 sampler_t sampler half4 res float4 resf
resf = read_imagef(image_read_only_image2d, i2) res = read_imageh(image_read_only_image2d, i2)#if __OPENCL_C_VERSION__ < CL_VERSION_1_2 && !defined(__OPENCL_CPP_VERSION__)
// expected-error@-3{{no matching function for call to 'read_imagef'}}
// expected-error@-3{{no matching function for call to 'read_imageh'}}
#endif
res = read_imageh(image_read_only_image2d, sampler, i2)
int imgWidth = get_image_width(image_read_only_image2d)}
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
kernel void basic_image_readwrite(read_write image3d_t image_read_write_image3d) {
half4 h4 int4 i4
write_imageh(image_read_write_image3d, i4, h4)
int imgDepth = get_image_depth(image_read_write_image3d)}
#endif // __OPENCL_C_VERSION__ >= CL_VERSION_2_0
kernel void basic_image_writeonly(write_only image1d_buffer_t image_write_only_image1d_buffer, write_only image3d_t image3dwo) {
half4 h4 float4 f4 int i
write_imagef(image_write_only_image1d_buffer, i, f4) write_imageh(image_write_only_image1d_buffer, i, h4)
int4 i4 write_imagef(image3dwo, i4, i, f4)#if __OPENCL_C_VERSION__ <= CL_VERSION_1_2 && !defined(__OPENCL_CPP_VERSION__)
// expected-error@-2{{no matching function for call to 'write_imagef'}}
#endif
}
kernel void basic_subgroup(global uint *out) {
out[0] = get_sub_group_size()#if __OPENCL_C_VERSION__ <= CL_VERSION_1_2 && !defined(__OPENCL_CPP_VERSION__)
// expected-error@-2{{use of undeclared identifier 'get_sub_group_size'}}
#endif
// Only test when the base header is included, because we need the enum declarations.
#if !defined(NO_HEADER) && (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
sub_group_barrier(CLK_GLOBAL_MEM_FENCE, memory_scope_device)#endif
}
kernel void extended_subgroup(global uint4 *out, global int *scalar, global char2 *c2) {
out[0] = get_sub_group_eq_mask() scalar[0] = sub_group_non_uniform_scan_inclusive_or(3) scalar[1] = sub_group_clustered_reduce_logical_xor(2, 4) *c2 = sub_group_broadcast(*c2, 2)#if __OPENCL_C_VERSION__ < CL_VERSION_2_0 && !defined(__OPENCL_CPP_VERSION__)
// expected-error@-5{{use of undeclared identifier 'get_sub_group_eq_mask'}}
// expected-error@-5{{use of undeclared identifier 'sub_group_non_uniform_scan_inclusive_or'}}
// expected-error@-5{{use of undeclared identifier 'sub_group_clustered_reduce_logical_xor'}}
// expected-error@-5{{use of undeclared identifier 'sub_group_broadcast'}}
#endif
}
kernel void basic_vector_data(void) {
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
generic void *generic_p#endif
constant void *constant_p local void *local_p global void *global_p private void *private_p size_t s ulong4 ul4 short16 s16#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
ushort3 us3 uchar16 uc16#endif
long8 l8 uint2 ui2 float16 f16
ul4 = vload4(s, (const __constant ulong *) constant_p) s16 = vload16(s, (const __constant short *) constant_p)
#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
us3 = vload3(s, (const __generic ushort *) generic_p) uc16 = vload16(s, (const __generic uchar *) generic_p)#endif
l8 = vload8(s, (const __global long *) global_p) ui2 = vload2(s, (const __local uint *) local_p) f16 = vload16(s, (const __private float *) private_p)}
kernel void basic_work_item(void) {
uint ui
barrier(CLK_GLOBAL_MEM_FENCE)
get_enqueued_local_size(ui)#if !defined(__OPENCL_CPP_VERSION__) && __OPENCL_C_VERSION__ < CL_VERSION_2_0
// expected-error@-2{{use of undeclared identifier 'get_enqueued_local_size'}}
#endif
}
#ifdef NO_FP64
void test_extension_types(char2 c2) {
// We should see 6 candidates for float and half types, and none for double types.
int i = isnan(c2) // expected-error@-1{{no matching function for call to 'isnan'}}
// expected-note@-2 6 {{candidate function not viable: no known conversion from '__private char2' (vector of 2 'char' values) to 'float}}
// expected-note@-3 6 {{candidate function not viable: no known conversion from '__private char2' (vector of 2 'char' values) to 'half}}
}
#endif