// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -verify -S -o - %s
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
typedef unsigned long ulongtypedef unsigned int uint
// To get all errors for feature checking we need to put them in one function
// since Clang will stop codegen for the next function if it finds error during
// codegen of the previous function.
void test_target_builtin(global int* out, int a)
{
__builtin_amdgcn_s_memrealtime() *out = __builtin_amdgcn_mov_dpp(a, 0, 0, 0, false)}
void test_s_sleep(int x)
{
__builtin_amdgcn_s_sleep(x)}
void test_s_waitcnt(int x)
{
__builtin_amdgcn_s_waitcnt(x)}
void test_s_sendmsg(int in)
{
__builtin_amdgcn_s_sendmsg(in, 1)}
void test_s_sendmsg_var(int in1, int in2)
{
__builtin_amdgcn_s_sendmsg(in1, in2)}
void test_s_sendmsghalt(int in)
{
__builtin_amdgcn_s_sendmsghalt(in, 1)}
void test_s_sendmsghalt_var(int in1, int in2)
{
__builtin_amdgcn_s_sendmsghalt(in1, in2)}
void test_s_incperflevel(int x)
{
__builtin_amdgcn_s_incperflevel(x)}
void test_s_decperflevel(int x)
{
__builtin_amdgcn_s_decperflevel(x)}
void test_s_setprio(int x)
{
__builtin_amdgcn_s_setprio(x) __builtin_amdgcn_s_setprio(65536)}
void test_sched_barrier(int x)
{
__builtin_amdgcn_sched_barrier(x)}
void test_sicmp_i32(global ulong* out, int a, int b, uint c)
{
*out = __builtin_amdgcn_sicmp(a, b, c)}
void test_uicmp_i32(global ulong* out, uint a, uint b, uint c)
{
*out = __builtin_amdgcn_uicmp(a, b, c)}
void test_sicmp_i64(global ulong* out, long a, long b, uint c)
{
*out = __builtin_amdgcn_sicmpl(a, b, c)}
void test_uicmp_i64(global ulong* out, ulong a, ulong b, uint c)
{
*out = __builtin_amdgcn_uicmpl(a, b, c)}
void test_fcmp_f32(global ulong* out, float a, float b, uint c)
{
*out = __builtin_amdgcn_fcmpf(a, b, c)}
void test_fcmp_f64(global ulong* out, double a, double b, uint c)
{
*out = __builtin_amdgcn_fcmp(a, b, c)}
void test_ds_swizzle(global int* out, int a, int b)
{
*out = __builtin_amdgcn_ds_swizzle(a, b)}
void test_s_getreg(global int* out, int a)
{
*out = __builtin_amdgcn_s_getreg(a)}
void test_mov_dpp2(global int* out, int a, int b, int c, int d, bool e)
{
*out = __builtin_amdgcn_mov_dpp(a, b, 0, 0, false) *out = __builtin_amdgcn_mov_dpp(a, 0, c, 0, false) *out = __builtin_amdgcn_mov_dpp(a, 0, 0, d, false) *out = __builtin_amdgcn_mov_dpp(a, 0, 0, 0, e)}
void test_update_dpp2(global int* out, int a, int b, int c, int d, int e, bool f)
{
*out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, false) *out = __builtin_amdgcn_update_dpp(a, 0, c, 0, 0, false) *out = __builtin_amdgcn_update_dpp(a, 0, 0, d, 0, false) *out = __builtin_amdgcn_update_dpp(a, 0, 0, 0, e, false) *out = __builtin_amdgcn_update_dpp(a, 0, 0, 0, 0, f)}
void test_ds_faddf(local float *out, float src, int a) {
*out = __builtin_amdgcn_ds_faddf(out, src, a, 0, false) *out = __builtin_amdgcn_ds_faddf(out, src, 0, a, false) *out = __builtin_amdgcn_ds_faddf(out, src, 0, 0, a)}
void test_ds_fminf(local float *out, float src, int a) {
*out = __builtin_amdgcn_ds_fminf(out, src, a, 0, false) *out = __builtin_amdgcn_ds_fminf(out, src, 0, a, false) *out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, a)}
void test_ds_fmaxf(local float *out, float src, int a) {
*out = __builtin_amdgcn_ds_fmaxf(out, src, a, 0, false) *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, a, false) *out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, a)}
void test_fence() {
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST + 1, "workgroup") __builtin_amdgcn_fence(__ATOMIC_ACQUIRE - 1, "workgroup") __builtin_amdgcn_fence(4) __builtin_amdgcn_fence(4, 4, 4) __builtin_amdgcn_fence(3.14, "") __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, 5) const char ptr[] = "workgroup" __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, ptr)}
void test_s_setreg(int x, int y) {
__builtin_amdgcn_s_setreg(x, 0) __builtin_amdgcn_s_setreg(x, y)}
void test_atomic_inc32() {
uint val = 17 val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup") val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_RELAXED, "workgroup") val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_CONSUME, "workgroup") val = __builtin_amdgcn_atomic_inc32(4) val = __builtin_amdgcn_atomic_inc32(&val, val, 4, 4, 4, 4) val = __builtin_amdgcn_atomic_inc32(&val, val, 3.14, "") val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE, 5) const char ptr[] = "workgroup" val = __builtin_amdgcn_atomic_inc32(&val, val, __ATOMIC_ACQUIRE, ptr) int signedVal = 15 signedVal = __builtin_amdgcn_atomic_inc32(&signedVal, signedVal, __ATOMIC_ACQUIRE, "")}
void test_atomic_inc64() {
__UINT64_TYPE__ val = 17 val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup") val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_RELAXED, "workgroup") val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_CONSUME, "workgroup") val = __builtin_amdgcn_atomic_inc64(4) val = __builtin_amdgcn_atomic_inc64(&val, val, 4, 4, 4, 4) val = __builtin_amdgcn_atomic_inc64(&val, val, 3.14, "") val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE, 5) const char ptr[] = "workgroup" val = __builtin_amdgcn_atomic_inc64(&val, val, __ATOMIC_ACQUIRE, ptr) __INT64_TYPE__ signedVal = 15 signedVal = __builtin_amdgcn_atomic_inc64(&signedVal, signedVal, __ATOMIC_ACQUIRE, "")}
void test_atomic_dec32() {
uint val = 17 val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup") val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_RELAXED, "workgroup") val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_CONSUME, "workgroup") val = __builtin_amdgcn_atomic_dec32(4) val = __builtin_amdgcn_atomic_dec32(&val, val, 4, 4, 4, 4) val = __builtin_amdgcn_atomic_dec32(&val, val, 3.14, "") val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQUIRE, 5) const char ptr[] = "workgroup" val = __builtin_amdgcn_atomic_dec32(&val, val, __ATOMIC_ACQUIRE, ptr) int signedVal = 15 signedVal = __builtin_amdgcn_atomic_dec32(&signedVal, signedVal, __ATOMIC_ACQUIRE, "")}
void test_atomic_dec64() {
__UINT64_TYPE__ val = 17 val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_SEQ_CST + 1, "workgroup") val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_RELAXED, "workgroup") val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_CONSUME, "workgroup") val = __builtin_amdgcn_atomic_dec64(4) val = __builtin_amdgcn_atomic_dec64(&val, val, 4, 4, 4, 4) val = __builtin_amdgcn_atomic_dec64(&val, val, 3.14, "") val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQUIRE, 5) const char ptr[] = "workgroup" val = __builtin_amdgcn_atomic_dec64(&val, val, __ATOMIC_ACQUIRE, ptr) __INT64_TYPE__ signedVal = 15 signedVal = __builtin_amdgcn_atomic_dec64(&signedVal, signedVal, __ATOMIC_ACQUIRE, "")}