RZOODNAYNAGAXHNLGMDDPYVIBNFJBDYMWO6I3C3J5LEZCMAQUZRAC
GKKJ75HX2ERLVBZVE2CUB6T3J2SUT7R3UKEKTEYNOG43ZKX6X5MQC
NGD6QWRDHWBMQXL2YXZVF3BFQALSII3LH6BMIOFL7VFMGPLHCZEQC
ULVYXPG2IOJ724MTFVKJLQMPMVL5VAJUOFH7BIXPXVJ2SUGOFRFAC
BJDGFYBMECTJG7BHLNHLSCUCBVYHAY6OGY37FIJP6JDGNDXQNQVAC
WJFK2JGGVKQN4LVHZNB4SOKH2GAE5BODBU2D6Y4CO76EIHSW6EVAC
GSGI6HIWST43XFEORCMKH6VPEGXHEZG5EK4JUUNWP3XFYUKGNA3AC
E3MBKFT4GEFDAGZQQW4OROY5F6FWC46G6MRH54GDYTGO7O5YSRIAC
B2MBQPT3B3N2EVCTHX34PKGIQSLUU622HEEVZNHQFL2PAJFFX3SQC
REQUIRES NSIMD Vectors
#include <loop.hxx>
#include <cctk.h>
#include <cctk_Arguments_Checked.h>
#include <cctk_Parameters.h>
#include <nsimd/nsimd-all.hpp>
#include <algorithm>
#include <array>
#include <cassert>
#include <cstdlib>
namespace HydroToyCarpetX {
using namespace std;
constexpr int dim = 3;
extern "C" void HydroToyCarpetX_Evolve(CCTK_ARGUMENTS) {
DECLARE_CCTK_ARGUMENTS_HydroToyCarpetX_Evolve;
DECLARE_CCTK_PARAMETERS;
const CCTK_REAL dt = CCTK_DELTA_TIME;
const CCTK_REAL dx = CCTK_DELTA_SPACE(0);
const CCTK_REAL dy = CCTK_DELTA_SPACE(1);
const CCTK_REAL dz = CCTK_DELTA_SPACE(2);
const CCTK_REAL dt_dx = dt / dx;
const CCTK_REAL dt_dy = dt / dy;
const CCTK_REAL dt_dz = dt / dz;
const Loop::vect<int, dim> di{{1, 0, 0}};
const Loop::vect<int, dim> dj{{0, 1, 0}};
const Loop::vect<int, dim> dk{{0, 0, 1}};
const Loop::vect<int, dim> lsh{{cctk_lsh[0], cctk_lsh[1], cctk_lsh[2]}};
const Loop::vect<int, dim> nghostzones{
{cctk_nghostzones[0], cctk_nghostzones[1], cctk_nghostzones[2]}};
array<CCTK_INT, dim> tmin_, tmax_;
GetTileExtent(cctkGH, tmin_.data(), tmax_.data());
const Loop::vect<int, dim> tmin(tmin_);
const Loop::vect<int, dim> tmax(tmax_);
const Loop::vect<int, dim> imin = max(tmin, nghostzones);
const Loop::vect<int, dim> imax = min(tmax, lsh - nghostzones);
// regular, cell centred variables
const Loop::vect<int, dim> ash{{cctk_ash[0], cctk_ash[1], cctk_ash[2]}};
// variables without ghosts
const Loop::vect<int, dim> ash_ng = ash - 2 * nghostzones;
// fluxes :face-centred without ghosts
const Loop::vect<int, dim> ash_fx = ash_ng + di;
const Loop::vect<int, dim> ash_fy = ash_ng + dj;
const Loop::vect<int, dim> ash_fz = ash_ng + dk;
const auto mkstr{[](const Loop::vect<int, dim> &ash) {
Loop::vect<ptrdiff_t, dim> str;
ptrdiff_t s = 1;
for (int d = 0; d < dim; ++d) {
str[d] = s;
s *= ash[d];
}
return str;
}};
const Loop::vect<ptrdiff_t, dim> str = mkstr(ash);
const Loop::vect<ptrdiff_t, dim> str_fx = mkstr(ash_fx);
const Loop::vect<ptrdiff_t, dim> str_fy = mkstr(ash_fy);
const Loop::vect<ptrdiff_t, dim> str_fz = mkstr(ash_fz);
constexpr ptrdiff_t off = 0;
const ptrdiff_t off_fx = -str_fx[0] - str_fx[1] - str_fx[2];
const ptrdiff_t off_fy = -str_fy[0] - str_fy[1] - str_fy[2];
const ptrdiff_t off_fz = -str_fz[0] - str_fz[1] - str_fz[2];
typedef CCTK_INT8 CCTK_INTEGER;
static_assert(sizeof(CCTK_INTEGER) == sizeof(CCTK_REAL), "");
typedef nsimd::pack<CCTK_REAL> CCTK_VEC_REAL;
typedef nsimd::pack<CCTK_INTEGER> CCTK_VEC_INTEGER;
typedef nsimd::packl<CCTK_REAL> CCTK_VEC_BOOLEAN;
constexpr int VS = sizeof(CCTK_VEC_REAL) / sizeof(CCTK_REAL);
auto vec_dt_dx = nsimd::set1<CCTK_VEC_REAL>(dt_dx);
auto vec_dt_dy = nsimd::set1<CCTK_VEC_REAL>(dt_dy);
auto vec_dt_dz = nsimd::set1<CCTK_VEC_REAL>(dt_dz);
// Transport
// dt rho + d_i (rho vel^i) = 0
// dt mom_j + d_i (mom_j vel^i) = 0
// dt etot + d_i (etot vel^i) = 0
const auto calcupdate{[&](auto &u_, const auto &u_p_, const auto &fx_,
const auto &fy_, const auto &fz_) {
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
ptrdiff_t idx_i0 = off + str[1] * j + str[2] * k;
ptrdiff_t idx_fx_i0 = off_fx + str_fx[1] * j + str_fx[2] * k;
ptrdiff_t idx_fy_i0 = off_fy + str_fy[1] * j + str_fy[2] * k;
ptrdiff_t idx_fz_i0 = off_fz + str_fz[1] * j + str_fz[2] * k;
for (int i = imin[0]; i < imax[0]; i += VS) {
ptrdiff_t idx = idx_i0 + i;
ptrdiff_t idx_fx = idx_fx_i0 + i;
ptrdiff_t idx_fy = idx_fy_i0 + i;
ptrdiff_t idx_fz = idx_fz_i0 + i;
auto u_p = nsimd::loadu<CCTK_VEC_REAL>(&u_p_[idx]);
auto fx1 = nsimd::loadu<CCTK_VEC_REAL>(&fx_[idx_fx + str_fx[0]]);
auto fx0 = nsimd::loadu<CCTK_VEC_REAL>(&fx_[idx_fx]);
auto fy1 = nsimd::loadu<CCTK_VEC_REAL>(&fy_[idx_fy + str_fy[1]]);
auto fy0 = nsimd::loadu<CCTK_VEC_REAL>(&fy_[idx_fy]);
auto fz1 = nsimd::loadu<CCTK_VEC_REAL>(&fz_[idx_fz + str_fz[2]]);
auto fz0 = nsimd::loadu<CCTK_VEC_REAL>(&fz_[idx_fz]);
auto u = u_p - (vec_dt_dx * (fx1 - fx0) + vec_dt_dy * (fy1 - fy0) +
vec_dt_dz * (fz1 - fz0));
if (i + VS >= imax[0]) {
auto iota = nsimd::iota<CCTK_VEC_INTEGER>();
auto imask = iota < nsimd::set1<CCTK_VEC_INTEGER>(
CCTK_INTEGER(imax[0] - (i + VS)));
auto mask = reinterpretl(CCTK_VEC_BOOLEAN(), imask);
nsimd::storeu_masked(&u_[idx], u, mask);
break;
}
nsimd::storeu(&u_[idx], u);
}
}
}
}};
calcupdate(rho, rho_p, fxrho, fyrho, fzrho);
calcupdate(momx, momx_p, fxmomx, fymomx, fzmomx);
calcupdate(momy, momy_p, fxmomy, fymomy, fzmomy);
calcupdate(momz, momz_p, fxmomz, fymomz, fzmomz);
calcupdate(etot, etot_p, fxetot, fyetot, fzetot);
}
} // namespace HydroToyCarpetX
#include <loop.hxx>
#include <cctk.h>
#include <cctk_Arguments_Checked.h>
#include <cctk_Parameters.h>
#include <experimental/simd>
#include <cassert>
#include <cmath>
#include <iostream>
namespace HydroToyCarpetX {
using namespace std;
using namespace std::experimental;
constexpr int dim = 3;
extern "C" void HydroToyCarpetX_Evolve(CCTK_ARGUMENTS) {
DECLARE_CCTK_ARGUMENTS_HydroToyCarpetX_Evolve;
DECLARE_CCTK_PARAMETERS;
const CCTK_REAL dt = CCTK_DELTA_TIME;
const CCTK_REAL dx = CCTK_DELTA_SPACE(0);
const CCTK_REAL dy = CCTK_DELTA_SPACE(1);
const CCTK_REAL dz = CCTK_DELTA_SPACE(2);
const Loop::GF3D1<const CCTK_REAL> rho_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
rho_p);
const Loop::GF3D1<const CCTK_REAL> momx_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
momx_p);
const Loop::GF3D1<const CCTK_REAL> momy_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
momy_p);
const Loop::GF3D1<const CCTK_REAL> momz_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
momz_p);
const Loop::GF3D1<const CCTK_REAL> etot_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
etot_p);
const Loop::GF3D1<const CCTK_REAL> fxrho_(cctkGH, {0, 1, 1}, {0, 0, 0},
fxrho);
const Loop::GF3D1<const CCTK_REAL> fxmomx_(cctkGH, {0, 1, 1}, {0, 0, 0},
fxmomx);
const Loop::GF3D1<const CCTK_REAL> fxmomy_(cctkGH, {0, 1, 1}, {0, 0, 0},
fxmomy);
const Loop::GF3D1<const CCTK_REAL> fxmomz_(cctkGH, {0, 1, 1}, {0, 0, 0},
fxmomz);
const Loop::GF3D1<const CCTK_REAL> fxetot_(cctkGH, {0, 1, 1}, {0, 0, 0},
fxetot);
const Loop::GF3D1<const CCTK_REAL> fyrho_(cctkGH, {1, 0, 1}, {0, 0, 0},
fyrho);
const Loop::GF3D1<const CCTK_REAL> fymomx_(cctkGH, {1, 0, 1}, {0, 0, 0},
fymomx);
const Loop::GF3D1<const CCTK_REAL> fymomy_(cctkGH, {1, 0, 1}, {0, 0, 0},
fymomy);
const Loop::GF3D1<const CCTK_REAL> fymomz_(cctkGH, {1, 0, 1}, {0, 0, 0},
fymomz);
const Loop::GF3D1<const CCTK_REAL> fyetot_(cctkGH, {1, 0, 1}, {0, 0, 0},
fyetot);
const Loop::GF3D1<const CCTK_REAL> fzrho_(cctkGH, {1, 1, 0}, {0, 0, 0},
fzrho);
const Loop::GF3D1<const CCTK_REAL> fzmomx_(cctkGH, {1, 1, 0}, {0, 0, 0},
fzmomx);
const Loop::GF3D1<const CCTK_REAL> fzmomy_(cctkGH, {1, 1, 0}, {0, 0, 0},
fzmomy);
const Loop::GF3D1<const CCTK_REAL> fzmomz_(cctkGH, {1, 1, 0}, {0, 0, 0},
fzmomz);
const Loop::GF3D1<const CCTK_REAL> fzetot_(cctkGH, {1, 1, 0}, {0, 0, 0},
fzetot);
const Loop::GF3D1<CCTK_REAL> rho_(cctkGH, {1, 1, 1}, {1, 1, 1}, rho);
const Loop::GF3D1<CCTK_REAL> momx_(cctkGH, {1, 1, 1}, {1, 1, 1}, momx);
const Loop::GF3D1<CCTK_REAL> momy_(cctkGH, {1, 1, 1}, {1, 1, 1}, momy);
const Loop::GF3D1<CCTK_REAL> momz_(cctkGH, {1, 1, 1}, {1, 1, 1}, momz);
const Loop::GF3D1<CCTK_REAL> etot_(cctkGH, {1, 1, 1}, {1, 1, 1}, etot);
// Transport
// dt rho + d_i (rho vel^i) = 0
// dt mom_j + d_i (mom_j vel^i) = 0
// dt etot + d_i (etot vel^i) = 0
const CCTK_REAL dt_dx = dt / dx;
const CCTK_REAL dt_dy = dt / dy;
const CCTK_REAL dt_dz = dt / dz;
if (false) {
Loop::loop_int<1, 1, 1>(cctkGH, [&](const Loop::PointDesc &p) {
auto calcupdate{[&](auto &fx, auto &fy, auto &fz) {
return dt_dx * (fx(p.I + p.DI(0)) - fx(p.I)) +
dt_dy * (fy(p.I + p.DI(1)) - fy(p.I)) +
dt_dz * (fz(p.I + p.DI(2)) - fz(p.I));
}};
rho_(p.I) = rho_p_(p.I) - calcupdate(fxrho_, fyrho_, fzrho_);
// momx_(p.I) = momx_p_(p.I) - calcupdate(fxmomx_, fymomx_, fzmomx_);
// momy_(p.I) = momy_p_(p.I) - calcupdate(fxmomy_, fymomy_, fzmomy_);
// momz_(p.I) = momz_p_(p.I) - calcupdate(fxmomz_, fymomz_, fzmomz_);
// etot_(p.I) = etot_p_(p.I) - calcupdate(fxetot_, fyetot_, fzetot_);
});
}
if (false) {
const auto calcupdate{[&](auto I, auto &fx, auto &fy, auto &fz) {
const Loop::vect<int, dim> DI{{1, 0, 0}};
const Loop::vect<int, dim> DJ{{0, 1, 0}};
const Loop::vect<int, dim> DK{{0, 0, 1}};
return dt_dx * (fx(I + DI) - fx(I)) + dt_dy * (fy(I + DJ) - fy(I)) +
dt_dz * (fz(I + DK) - fz(I));
}};
Loop::loop_int<1, 1, 1>(cctkGH, [&](const Loop::PointDesc &p) {
rho_(p.I) = rho_p_(p.I) - calcupdate(p.I, fxrho_, fyrho_, fzrho_);
});
Loop::loop_int<1, 1, 1>(cctkGH, [&](const Loop::PointDesc &p) {
momx_(p.I) = momx_p_(p.I) - calcupdate(p.I, fxmomx_, fymomx_, fzmomx_);
});
Loop::loop_int<1, 1, 1>(cctkGH, [&](const Loop::PointDesc &p) {
momy_(p.I) = momy_p_(p.I) - calcupdate(p.I, fxmomy_, fymomy_, fzmomy_);
});
Loop::loop_int<1, 1, 1>(cctkGH, [&](const Loop::PointDesc &p) {
momz_(p.I) = momz_p_(p.I) - calcupdate(p.I, fxmomz_, fymomz_, fzmomz_);
});
Loop::loop_int<1, 1, 1>(cctkGH, [&](const Loop::PointDesc &p) {
etot_(p.I) = etot_p_(p.I) - calcupdate(p.I, fxetot_, fyetot_, fzetot_);
});
}
if (false) {
Loop::vect<int, dim> imin, imax;
for (int d = 0; d < dim; ++d) {
imin[d] = 1;
imax[d] = cctk_lsh[d] - 1;
}
auto calcupdate{[&](auto I, auto &fx, auto &fy, auto &fz) {
const Loop::vect<int, dim> DI{{1, 0, 0}};
const Loop::vect<int, dim> DJ{{0, 1, 0}};
const Loop::vect<int, dim> DK{{0, 0, 1}};
return dt_dx * (fx(I + DI) - fx(I)) + dt_dy * (fy(I + DJ) - fy(I)) +
dt_dz * (fz(I + DK) - fz(I));
}};
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
for (int i = imin[0]; i < imax[0]; ++i) {
const Loop::vect<int, dim> I{{i, j, k}};
rho_(I) = rho_p_(I) - calcupdate(I, fxrho_, fyrho_, fzrho_);
}
}
}
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
for (int i = imin[0]; i < imax[0]; ++i) {
const Loop::vect<int, dim> I{{i, j, k}};
momx_(I) = momx_p_(I) - calcupdate(I, fxmomx_, fymomx_, fzmomx_);
}
}
}
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
for (int i = imin[0]; i < imax[0]; ++i) {
const Loop::vect<int, dim> I{{i, j, k}};
momy_(I) = momy_p_(I) - calcupdate(I, fxmomy_, fymomy_, fzmomy_);
}
}
}
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
for (int i = imin[0]; i < imax[0]; ++i) {
const Loop::vect<int, dim> I{{i, j, k}};
momz_(I) = momz_p_(I) - calcupdate(I, fxmomz_, fymomz_, fzmomz_);
}
}
}
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
for (int i = imin[0]; i < imax[0]; ++i) {
const Loop::vect<int, dim> I{{i, j, k}};
etot_(I) = etot_p_(I) - calcupdate(I, fxetot_, fyetot_, fzetot_);
}
}
}
}
if (false) {
Loop::vect<int, dim> imin, imax;
for (int d = 0; d < dim; ++d) {
imin[d] = 1;
imax[d] = cctk_lsh[d] - 1;
}
const auto calcupdate{
[&](auto &u, auto &u_p, auto &fx, auto &fy, auto &fz) {
const Loop::vect<int, dim> DI{{1, 0, 0}};
const Loop::vect<int, dim> DJ{{0, 1, 0}};
const Loop::vect<int, dim> DK{{0, 0, 1}};
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
for (int i = imin[0]; i < imax[0]; ++i) {
const Loop::vect<int, dim> I{{i, j, k}};
u(I) = u_p(I) - (dt_dx * (fx(I + DI) - fx(I)) +
dt_dy * (fy(I + DJ) - fy(I)) +
dt_dz * (fz(I + DK) - fz(I)));
}
}
}
}};
calcupdate(rho_, rho_p_, fxrho_, fyrho_, fzrho_);
calcupdate(momx_, momx_p_, fxmomx_, fymomx_, fzmomx_);
calcupdate(momy_, momy_p_, fxmomy_, fymomy_, fzmomy_);
calcupdate(momz_, momz_p_, fxmomz_, fymomz_, fzmomz_);
calcupdate(etot_, etot_p_, fxetot_, fyetot_, fzetot_);
}
if (true) {
Loop::vect<int, dim> ash;
for (int d = 0; d < dim; ++d)
ash[d] = cctk_ash[d];
Loop::vect<int, dim> str[8];
for (int itype = 0b000; itype <= 0b111; ++itype) {
str[itype][0] = 1;
str[itype][1] = str[itype][0] * (ash[0] + ((itype & 0b100) == 0));
str[itype][2] = str[itype][1] * (ash[1] + ((itype & 0b010) == 0));
}
Loop::vect<int, dim> imin, imax;
for (int d = 0; d < dim; ++d) {
imin[d] = 1;
imax[d] = cctk_lsh[d] - 1;
}
typedef native_simd<CCTK_REAL> CCTK_REAL_VEC;
constexpr int VS = CCTK_REAL_VEC::size();
assert((imax[0] - imin[0]) % VS == 0);
auto vec_dt_dx = CCTK_REAL_VEC(dt_dx);
auto vec_dt_dy = CCTK_REAL_VEC(dt_dy);
auto vec_dt_dz = CCTK_REAL_VEC(dt_dz);
const auto calcupdate{[&](auto &u_, auto &u_p_, auto &fx_, auto &fy_,
auto &fz_) {
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
int idx111_i0 = str[0b111][1] * j + str[0b111][2] * k;
int idx011_i0 = str[0b011][1] * j + str[0b011][2] * k;
int idx101_i0 = str[0b101][1] * j + str[0b101][2] * k;
int idx110_i0 = str[0b110][1] * j + str[0b110][2] * k;
for (int i = imin[0]; i < imax[0]; i += VS) {
int idx111 = idx111_i0 + i;
int idx011 = idx011_i0 + i;
int idx101 = idx101_i0 + i;
int idx110 = idx110_i0 + i;
CCTK_REAL_VEC u_p{}, fx1{}, fx0{}, fy1{}, fy0{}, fz1{}, fz0{};
u_p.copy_from(&u_p_[idx111], element_aligned);
fx1.copy_from(&fx_[idx011 + str[0b011][0]], element_aligned);
fx0.copy_from(&fx_[idx011], element_aligned);
fy1.copy_from(&fy_[idx101 + str[0b101][1]], element_aligned);
fy0.copy_from(&fy_[idx101], element_aligned);
fz1.copy_from(&fz_[idx110 + str[0b110][2]], element_aligned);
fz0.copy_from(&fz_[idx110], element_aligned);
auto u = u_p - (vec_dt_dx * (fx1 - fx0) + vec_dt_dy * (fy1 - fy0) +
vec_dt_dz * (fz1 - fz0));
u.copy_to(&u_[idx111], element_aligned);
}
}
}
}};
calcupdate(rho, rho_p, fxrho, fyrho, fzrho);
calcupdate(momx, momx_p, fxmomx, fymomx, fzmomx);
calcupdate(momy, momy_p, fxmomy, fymomy, fzmomy);
calcupdate(momz, momz_p, fxmomz, fymomz, fzmomz);
calcupdate(etot, etot_p, fxetot, fyetot, fzetot);
}
}
} // namespace HydroToyCarpetX
#include <loop.hxx>
#include <cctk.h>
#include <cctk_Arguments_Checked.h>
#include <cctk_Parameters.h>
#include <Vc/Vc>
#include <cassert>
#include <cmath>
#include <iostream>
namespace HydroToyCarpetX {
using namespace std;
constexpr int dim = 3;
extern "C" void HydroToyCarpetX_Evolve(CCTK_ARGUMENTS) {
DECLARE_CCTK_ARGUMENTS_HydroToyCarpetX_Evolve;
DECLARE_CCTK_PARAMETERS;
const CCTK_REAL dt = CCTK_DELTA_TIME;
const CCTK_REAL dx = CCTK_DELTA_SPACE(0);
const CCTK_REAL dy = CCTK_DELTA_SPACE(1);
const CCTK_REAL dz = CCTK_DELTA_SPACE(2);
const Loop::GF3D1<const CCTK_REAL> rho_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
rho_p);
const Loop::GF3D1<const CCTK_REAL> momx_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
momx_p);
const Loop::GF3D1<const CCTK_REAL> momy_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
momy_p);
const Loop::GF3D1<const CCTK_REAL> momz_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
momz_p);
const Loop::GF3D1<const CCTK_REAL> etot_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
etot_p);
const Loop::GF3D1<const CCTK_REAL> fxrho_(cctkGH, {0, 1, 1}, {0, 0, 0},
fxrho);
const Loop::GF3D1<const CCTK_REAL> fxmomx_(cctkGH, {0, 1, 1}, {0, 0, 0},
fxmomx);
const Loop::GF3D1<const CCTK_REAL> fxmomy_(cctkGH, {0, 1, 1}, {0, 0, 0},
fxmomy);
const Loop::GF3D1<const CCTK_REAL> fxmomz_(cctkGH, {0, 1, 1}, {0, 0, 0},
fxmomz);
const Loop::GF3D1<const CCTK_REAL> fxetot_(cctkGH, {0, 1, 1}, {0, 0, 0},
fxetot);
const Loop::GF3D1<const CCTK_REAL> fyrho_(cctkGH, {1, 0, 1}, {0, 0, 0},
fyrho);
const Loop::GF3D1<const CCTK_REAL> fymomx_(cctkGH, {1, 0, 1}, {0, 0, 0},
fymomx);
const Loop::GF3D1<const CCTK_REAL> fymomy_(cctkGH, {1, 0, 1}, {0, 0, 0},
fymomy);
const Loop::GF3D1<const CCTK_REAL> fymomz_(cctkGH, {1, 0, 1}, {0, 0, 0},
fymomz);
const Loop::GF3D1<const CCTK_REAL> fyetot_(cctkGH, {1, 0, 1}, {0, 0, 0},
fyetot);
const Loop::GF3D1<const CCTK_REAL> fzrho_(cctkGH, {1, 1, 0}, {0, 0, 0},
fzrho);
const Loop::GF3D1<const CCTK_REAL> fzmomx_(cctkGH, {1, 1, 0}, {0, 0, 0},
fzmomx);
const Loop::GF3D1<const CCTK_REAL> fzmomy_(cctkGH, {1, 1, 0}, {0, 0, 0},
fzmomy);
const Loop::GF3D1<const CCTK_REAL> fzmomz_(cctkGH, {1, 1, 0}, {0, 0, 0},
fzmomz);
const Loop::GF3D1<const CCTK_REAL> fzetot_(cctkGH, {1, 1, 0}, {0, 0, 0},
fzetot);
const Loop::GF3D1<CCTK_REAL> rho_(cctkGH, {1, 1, 1}, {1, 1, 1}, rho);
const Loop::GF3D1<CCTK_REAL> momx_(cctkGH, {1, 1, 1}, {1, 1, 1}, momx);
const Loop::GF3D1<CCTK_REAL> momy_(cctkGH, {1, 1, 1}, {1, 1, 1}, momy);
const Loop::GF3D1<CCTK_REAL> momz_(cctkGH, {1, 1, 1}, {1, 1, 1}, momz);
const Loop::GF3D1<CCTK_REAL> etot_(cctkGH, {1, 1, 1}, {1, 1, 1}, etot);
// Transport
// dt rho + d_i (rho vel^i) = 0
// dt mom_j + d_i (mom_j vel^i) = 0
// dt etot + d_i (etot vel^i) = 0
const CCTK_REAL dt_dx = dt / dx;
const CCTK_REAL dt_dy = dt / dy;
const CCTK_REAL dt_dz = dt / dz;
if (false) {
Loop::loop_int<1, 1, 1>(cctkGH, [&](const Loop::PointDesc &p) {
auto calcupdate{[&](auto &fx, auto &fy, auto &fz) {
return dt_dx * (fx(p.I + p.DI(0)) - fx(p.I)) +
dt_dy * (fy(p.I + p.DI(1)) - fy(p.I)) +
dt_dz * (fz(p.I + p.DI(2)) - fz(p.I));
}};
rho_(p.I) = rho_p_(p.I) - calcupdate(fxrho_, fyrho_, fzrho_);
// momx_(p.I) = momx_p_(p.I) - calcupdate(fxmomx_, fymomx_, fzmomx_);
// momy_(p.I) = momy_p_(p.I) - calcupdate(fxmomy_, fymomy_, fzmomy_);
// momz_(p.I) = momz_p_(p.I) - calcupdate(fxmomz_, fymomz_, fzmomz_);
// etot_(p.I) = etot_p_(p.I) - calcupdate(fxetot_, fyetot_, fzetot_);
});
}
if (false) {
const auto calcupdate{[&](auto I, auto &fx, auto &fy, auto &fz) {
const Loop::vect<int, dim> DI{{1, 0, 0}};
const Loop::vect<int, dim> DJ{{0, 1, 0}};
const Loop::vect<int, dim> DK{{0, 0, 1}};
return dt_dx * (fx(I + DI) - fx(I)) + dt_dy * (fy(I + DJ) - fy(I)) +
dt_dz * (fz(I + DK) - fz(I));
}};
Loop::loop_int<1, 1, 1>(cctkGH, [&](const Loop::PointDesc &p) {
rho_(p.I) = rho_p_(p.I) - calcupdate(p.I, fxrho_, fyrho_, fzrho_);
});
Loop::loop_int<1, 1, 1>(cctkGH, [&](const Loop::PointDesc &p) {
momx_(p.I) = momx_p_(p.I) - calcupdate(p.I, fxmomx_, fymomx_, fzmomx_);
});
Loop::loop_int<1, 1, 1>(cctkGH, [&](const Loop::PointDesc &p) {
momy_(p.I) = momy_p_(p.I) - calcupdate(p.I, fxmomy_, fymomy_, fzmomy_);
});
Loop::loop_int<1, 1, 1>(cctkGH, [&](const Loop::PointDesc &p) {
momz_(p.I) = momz_p_(p.I) - calcupdate(p.I, fxmomz_, fymomz_, fzmomz_);
});
Loop::loop_int<1, 1, 1>(cctkGH, [&](const Loop::PointDesc &p) {
etot_(p.I) = etot_p_(p.I) - calcupdate(p.I, fxetot_, fyetot_, fzetot_);
});
}
if (false) {
Loop::vect<int, dim> imin, imax;
for (int d = 0; d < dim; ++d) {
imin[d] = 1;
imax[d] = cctk_lsh[d] - 1;
}
auto calcupdate{[&](auto I, auto &fx, auto &fy, auto &fz) {
const Loop::vect<int, dim> DI{{1, 0, 0}};
const Loop::vect<int, dim> DJ{{0, 1, 0}};
const Loop::vect<int, dim> DK{{0, 0, 1}};
return dt_dx * (fx(I + DI) - fx(I)) + dt_dy * (fy(I + DJ) - fy(I)) +
dt_dz * (fz(I + DK) - fz(I));
}};
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
for (int i = imin[0]; i < imax[0]; ++i) {
const Loop::vect<int, dim> I{{i, j, k}};
rho_(I) = rho_p_(I) - calcupdate(I, fxrho_, fyrho_, fzrho_);
}
}
}
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
for (int i = imin[0]; i < imax[0]; ++i) {
const Loop::vect<int, dim> I{{i, j, k}};
momx_(I) = momx_p_(I) - calcupdate(I, fxmomx_, fymomx_, fzmomx_);
}
}
}
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
for (int i = imin[0]; i < imax[0]; ++i) {
const Loop::vect<int, dim> I{{i, j, k}};
momy_(I) = momy_p_(I) - calcupdate(I, fxmomy_, fymomy_, fzmomy_);
}
}
}
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
for (int i = imin[0]; i < imax[0]; ++i) {
const Loop::vect<int, dim> I{{i, j, k}};
momz_(I) = momz_p_(I) - calcupdate(I, fxmomz_, fymomz_, fzmomz_);
}
}
}
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
for (int i = imin[0]; i < imax[0]; ++i) {
const Loop::vect<int, dim> I{{i, j, k}};
etot_(I) = etot_p_(I) - calcupdate(I, fxetot_, fyetot_, fzetot_);
}
}
}
}
if (false) {
Loop::vect<int, dim> imin, imax;
for (int d = 0; d < dim; ++d) {
imin[d] = 1;
imax[d] = cctk_lsh[d] - 1;
}
const auto calcupdate{
[&](auto &u, auto &u_p, auto &fx, auto &fy, auto &fz) {
const Loop::vect<int, dim> DI{{1, 0, 0}};
const Loop::vect<int, dim> DJ{{0, 1, 0}};
const Loop::vect<int, dim> DK{{0, 0, 1}};
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
for (int i = imin[0]; i < imax[0]; ++i) {
const Loop::vect<int, dim> I{{i, j, k}};
u(I) = u_p(I) - (dt_dx * (fx(I + DI) - fx(I)) +
dt_dy * (fy(I + DJ) - fy(I)) +
dt_dz * (fz(I + DK) - fz(I)));
}
}
}
}};
calcupdate(rho_, rho_p_, fxrho_, fyrho_, fzrho_);
calcupdate(momx_, momx_p_, fxmomx_, fymomx_, fzmomx_);
calcupdate(momy_, momy_p_, fxmomy_, fymomy_, fzmomy_);
calcupdate(momz_, momz_p_, fxmomz_, fymomz_, fzmomz_);
calcupdate(etot_, etot_p_, fxetot_, fyetot_, fzetot_);
}
if (true) {
Loop::vect<int, dim> ash;
for (int d = 0; d < dim; ++d)
ash[d] = cctk_ash[d];
Loop::vect<int, dim> str[8];
for (int itype = 0b000; itype <= 0b111; ++itype) {
str[itype][0] = 1;
str[itype][1] = str[itype][0] * (ash[0] + ((itype & 0b100) == 0));
str[itype][2] = str[itype][1] * (ash[1] + ((itype & 0b010) == 0));
}
Loop::vect<int, dim> imin, imax;
for (int d = 0; d < dim; ++d) {
imin[d] = 1;
imax[d] = cctk_lsh[d] - 1;
}
typedef Vc::native_simd<CCTK_REAL> CCTK_REAL_VEC;
constexpr int VS = CCTK_REAL_VEC::size();
assert((imax[0] - imin[0]) % VS == 0);
auto vec_dt_dx = CCTK_REAL_VEC(dt_dx);
auto vec_dt_dy = CCTK_REAL_VEC(dt_dy);
auto vec_dt_dz = CCTK_REAL_VEC(dt_dz);
const auto calcupdate{[&](auto &u_, auto &u_p_, auto &fx_, auto &fy_,
auto &fz_) {
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
int idx111_i0 = str[0b111][1] * j + str[0b111][2] * k;
int idx011_i0 = str[0b011][1] * j + str[0b011][2] * k;
int idx101_i0 = str[0b101][1] * j + str[0b101][2] * k;
int idx110_i0 = str[0b110][1] * j + str[0b110][2] * k;
for (int i = imin[0]; i < imax[0]; i += VS) {
int idx111 = idx111_i0 + i;
int idx011 = idx011_i0 + i;
int idx101 = idx101_i0 + i;
int idx110 = idx110_i0 + i;
CCTK_REAL_VEC u_p(&u_p_[idx111], Vc::element_aligned);
CCTK_REAL_VEC fx1(&fx_[idx011 + str[0b011][0]],
Vc::element_aligned);
CCTK_REAL_VEC fx0(&fx_[idx011], Vc::element_aligned);
CCTK_REAL_VEC fy1(&fy_[idx101 + str[0b101][1]],
Vc::element_aligned);
CCTK_REAL_VEC fy0(&fy_[idx101], Vc::element_aligned);
CCTK_REAL_VEC fz1(&fz_[idx110 + str[0b110][2]],
Vc::element_aligned);
CCTK_REAL_VEC fz0(&fz_[idx110], Vc::element_aligned);
auto u = u_p - (vec_dt_dx * (fx1 - fx0) + vec_dt_dy * (fy1 - fy0) +
vec_dt_dz * (fz1 - fz0));
u.copy_to(&u_[idx111], Vc::element_aligned);
}
}
}
}};
calcupdate(rho, rho_p, fxrho, fyrho, fzrho);
calcupdate(momx, momx_p, fxmomx, fymomx, fzmomx);
calcupdate(momy, momy_p, fxmomy, fymomy, fzmomy);
calcupdate(momz, momz_p, fxmomz, fymomz, fzmomz);
calcupdate(etot, etot_p, fxetot, fyetot, fzetot);
}
}
} // namespace HydroToyCarpetX
#include <loop.hxx>
#include <vectors.h>
#include <cctk.h>
#include <cctk_Arguments_Checked.h>
#include <cctk_Parameters.h>
#include <algorithm>
#include <array>
#include <cassert>
#include <cstdlib>
namespace HydroToyCarpetX {
using namespace std;
constexpr int dim = 3;
extern "C" void HydroToyCarpetX_Evolve(CCTK_ARGUMENTS) {
DECLARE_CCTK_ARGUMENTS_HydroToyCarpetX_Evolve;
DECLARE_CCTK_PARAMETERS;
const CCTK_REAL dt = CCTK_DELTA_TIME;
const CCTK_REAL dx = CCTK_DELTA_SPACE(0);
const CCTK_REAL dy = CCTK_DELTA_SPACE(1);
const CCTK_REAL dz = CCTK_DELTA_SPACE(2);
const CCTK_REAL dt_dx = dt / dx;
const CCTK_REAL dt_dy = dt / dy;
const CCTK_REAL dt_dz = dt / dz;
const Loop::vect<int, dim> di{{1, 0, 0}};
const Loop::vect<int, dim> dj{{0, 1, 0}};
const Loop::vect<int, dim> dk{{0, 0, 1}};
const Loop::vect<int, dim> lsh{{cctk_lsh[0], cctk_lsh[1], cctk_lsh[2]}};
const Loop::vect<int, dim> nghostzones{
{cctk_nghostzones[0], cctk_nghostzones[1], cctk_nghostzones[2]}};
array<CCTK_INT, dim> tmin_, tmax_;
GetTileExtent(cctkGH, tmin_.data(), tmax_.data());
const Loop::vect<int, dim> tmin(tmin_);
const Loop::vect<int, dim> tmax(tmax_);
const Loop::vect<int, dim> imin = max(tmin, nghostzones);
const Loop::vect<int, dim> imax = min(tmax, lsh - nghostzones);
// regular, cell centred variables
const Loop::vect<int, dim> ash{{cctk_ash[0], cctk_ash[1], cctk_ash[2]}};
// variables without ghosts
const Loop::vect<int, dim> ash_ng = ash - 2 * nghostzones;
// fluxes :face-centred without ghosts
const Loop::vect<int, dim> ash_fx = ash_ng + di;
const Loop::vect<int, dim> ash_fy = ash_ng + dj;
const Loop::vect<int, dim> ash_fz = ash_ng + dk;
const auto mkstr{[](const Loop::vect<int, dim> &ash) {
Loop::vect<ptrdiff_t, dim> str;
ptrdiff_t s = 1;
for (int d = 0; d < dim; ++d) {
str[d] = s;
s *= ash[d];
}
return str;
}};
const Loop::vect<ptrdiff_t, dim> str = mkstr(ash);
const Loop::vect<ptrdiff_t, dim> str_fx = mkstr(ash_fx);
const Loop::vect<ptrdiff_t, dim> str_fy = mkstr(ash_fy);
const Loop::vect<ptrdiff_t, dim> str_fz = mkstr(ash_fz);
constexpr ptrdiff_t off = 0;
const ptrdiff_t off_fx = -str_fx[0] - str_fx[1] - str_fx[2];
const ptrdiff_t off_fy = -str_fy[0] - str_fy[1] - str_fy[2];
const ptrdiff_t off_fz = -str_fz[0] - str_fz[1] - str_fz[2];
typedef vectype<CCTK_REAL> CCTK_VEC_REAL;
constexpr int VS = vecprops<CCTK_REAL>::size();
// assert((imax[0] - imin[0]) % VS == 0);
auto vec_dt_dx = CCTK_VEC_REAL::set1(dt_dx);
auto vec_dt_dy = CCTK_VEC_REAL::set1(dt_dy);
auto vec_dt_dz = CCTK_VEC_REAL::set1(dt_dz);
// Transport
// dt rho + d_i (rho vel^i) = 0
// dt mom_j + d_i (mom_j vel^i) = 0
// dt etot + d_i (etot vel^i) = 0
const auto calcupdate{[&](auto &u_, const auto &u_p_, const auto &fx_,
const auto &fy_, const auto &fz_) {
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
ptrdiff_t idx_i0 = off + str[1] * j + str[2] * k;
ptrdiff_t idx_fx_i0 = off_fx + str_fx[1] * j + str_fx[2] * k;
ptrdiff_t idx_fy_i0 = off_fy + str_fy[1] * j + str_fy[2] * k;
ptrdiff_t idx_fz_i0 = off_fz + str_fz[1] * j + str_fz[2] * k;
for (int i = imin[0]; i < imax[0]; i += VS) {
ptrdiff_t idx = idx_i0 + i;
ptrdiff_t idx_fx = idx_fx_i0 + i;
ptrdiff_t idx_fy = idx_fy_i0 + i;
ptrdiff_t idx_fz = idx_fz_i0 + i;
auto u_p = CCTK_VEC_REAL::loadu(u_p_[idx]);
auto fx1 = CCTK_VEC_REAL::loadu(fx_[idx_fx + str_fx[0]]);
auto fx0 = CCTK_VEC_REAL::loadu(fx_[idx_fx]);
auto fy1 = CCTK_VEC_REAL::loadu(fy_[idx_fy + str_fy[1]]);
auto fy0 = CCTK_VEC_REAL::loadu(fy_[idx_fy]);
auto fz1 = CCTK_VEC_REAL::loadu(fz_[idx_fz + str_fz[2]]);
auto fz0 = CCTK_VEC_REAL::loadu(fz_[idx_fz]);
auto u = u_p - (vec_dt_dx * (fx1 - fx0) + vec_dt_dy * (fy1 - fy0) +
vec_dt_dz * (fz1 - fz0));
u.storeu_partial(u_[idx], i, i, imax[0]);
}
}
}
}};
calcupdate(rho, rho_p, fxrho, fyrho, fzrho);
calcupdate(momx, momx_p, fxmomx, fymomx, fzmomx);
calcupdate(momy, momy_p, fxmomy, fymomy, fzmomy);
calcupdate(momz, momz_p, fxmomz, fymomz, fzmomz);
calcupdate(etot, etot_p, fxetot, fyetot, fzetot);
}
} // namespace HydroToyCarpetX
#include <loop.hxx>
#include <vectors.h>
#include <cctk.h>
#include <cctk_Arguments_Checked.h>
#include <cctk_Parameters.h>
#include <algorithm>
#include <array>
#include <cassert>
#include <cstdlib>
namespace HydroToyCarpetX {
using namespace std;
constexpr int dim = 3;
extern "C" void HydroToyCarpetX_Fluxes(CCTK_ARGUMENTS) {
DECLARE_CCTK_ARGUMENTS_HydroToyCarpetX_Fluxes;
DECLARE_CCTK_PARAMETERS;
#warning "TODO: scale fluxes"
const CCTK_REAL dt = CCTK_DELTA_TIME;
const CCTK_REAL dx = CCTK_DELTA_SPACE(0);
const CCTK_REAL dy = CCTK_DELTA_SPACE(1);
const CCTK_REAL dz = CCTK_DELTA_SPACE(2);
const Loop::vect<int, dim> zero{{0, 0, 0}};
const Loop::vect<int, dim> di{{1, 0, 0}};
const Loop::vect<int, dim> dj{{0, 1, 0}};
const Loop::vect<int, dim> dk{{0, 0, 1}};
const Loop::vect<int, dim> lsh{{cctk_lsh[0], cctk_lsh[1], cctk_lsh[2]}};
const Loop::vect<int, dim> nghostzones{
{cctk_nghostzones[0], cctk_nghostzones[1], cctk_nghostzones[2]}};
array<CCTK_INT, dim> tmin_, tmax_;
GetTileExtent(cctkGH, tmin_.data(), tmax_.data());
const Loop::vect<int, dim> tmin(tmin_);
const Loop::vect<int, dim> tmax(tmax_);
// const Loop::vect<int, dim> imin = max(tmin, nghostzones);
// const Loop::vect<int, dim> imax = min(tmax, lsh - nghostzones);
const Loop::vect<int, dim> imin_fx = max(tmin, nghostzones);
const Loop::vect<int, dim> imax_fx =
min(tmax + (tmax >= lsh).ifelse(di, zero), lsh + di - nghostzones);
const Loop::vect<int, dim> imin_fy = max(tmin, nghostzones);
const Loop::vect<int, dim> imax_fy =
min(tmax + (tmax >= lsh).ifelse(di, zero), lsh + dj - nghostzones);
const Loop::vect<int, dim> imin_fz = max(tmin, nghostzones);
const Loop::vect<int, dim> imax_fz =
min(tmax + (tmax >= lsh).ifelse(di, zero), lsh + dk - nghostzones);
// regular, cell centred variables
const Loop::vect<int, dim> ash{{cctk_ash[0], cctk_ash[1], cctk_ash[2]}};
// variables without ghosts
const Loop::vect<int, dim> ash_ng = ash - 2 * nghostzones;
// fluxes :face-centred without ghosts
const Loop::vect<int, dim> ash_fx = ash_ng + di;
const Loop::vect<int, dim> ash_fy = ash_ng + dj;
const Loop::vect<int, dim> ash_fz = ash_ng + dk;
const auto mkstr{[](const Loop::vect<int, dim> &ash) {
Loop::vect<ptrdiff_t, dim> str;
ptrdiff_t s = 1;
for (int d = 0; d < dim; ++d) {
str[d] = s;
s *= ash[d];
}
return str;
}};
const Loop::vect<ptrdiff_t, dim> str = mkstr(ash);
const Loop::vect<ptrdiff_t, dim> str_fx = mkstr(ash_fx);
const Loop::vect<ptrdiff_t, dim> str_fy = mkstr(ash_fy);
const Loop::vect<ptrdiff_t, dim> str_fz = mkstr(ash_fz);
constexpr ptrdiff_t off = 0;
const ptrdiff_t off_fx = -str_fx[0] - str_fx[1] - str_fx[2];
const ptrdiff_t off_fy = -str_fy[0] - str_fy[1] - str_fy[2];
const ptrdiff_t off_fz = -str_fz[0] - str_fz[1] - str_fz[2];
typedef vectype<CCTK_REAL> CCTK_VEC_REAL;
constexpr int VS = vecprops<CCTK_REAL>::size();
// assert((imax_fx[0] - imin_fx[0]) % VS == 0);
// assert((imax_fy[0] - imin_fy[0]) % VS == 0);
// assert((imax_fz[0] - imin_fz[0]) % VS == 0);
// LLF (local Lax-Friedrichs) Riemann solver
const auto llf{[](auto lambda_m, auto lambda_p, auto var_m, auto var_p,
auto flux_m, auto flux_p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
return CCTK_VEC_REAL::set1(0.5) *
((flux_m + flux_p) -
fmax(fabs(lambda_m), fabs(lambda_p)) * (var_p - var_m));
}};
// Fluxes
// frho^i = rho vel^i
// fmom^i_j = mom_j vel^i + delta^i_j press
// fetot^i = (etot + press) vel^i
const auto calcflux_x{[&](auto &fx_, const auto &u_, const auto &flux_x) {
for (int k = imin_fx[2]; k < imax_fx[2]; ++k) {
for (int j = imin_fx[1]; j < imax_fx[1]; ++j) {
ptrdiff_t idx_i0 = off + str[1] * j + str[2] * k;
ptrdiff_t idx_fx_i0 = off_fx + str_fx[1] * j + str_fx[2] * k;
for (int i = imin_fx[0]; i < imax_fx[0]; i += VS) {
ptrdiff_t idx = idx_i0 + i;
ptrdiff_t idx_fx = idx_fx_i0 + i;
CCTK_VEC_REAL lambda_m(1.0);
CCTK_VEC_REAL lambda_p(-1.0);
auto var_m = CCTK_VEC_REAL::loadu(u_[idx - str[0]]);
auto var_p = CCTK_VEC_REAL::loadu(u_[idx]);
auto flux_m = flux_x(idx - str[0]);
auto flux_p = flux_x(idx);
auto fx = llf(lambda_m, lambda_p, var_m, var_p, flux_m, flux_p);
fx.storeu_partial(fx_[idx_fx], i, i, imax_fx[0]);
}
}
}
}};
calcflux_x(fxrho, rho, [&](ptrdiff_t idx) CCTK_ATTRIBUTE_ALWAYS_INLINE {
return CCTK_VEC_REAL::loadu(rho[idx]) * CCTK_VEC_REAL::loadu(velx[idx]);
});
calcflux_x(fxmomx, momx, [&](ptrdiff_t idx) CCTK_ATTRIBUTE_ALWAYS_INLINE {
return CCTK_VEC_REAL::loadu(momx[idx]) * CCTK_VEC_REAL::loadu(velx[idx]) +
CCTK_VEC_REAL::loadu(press[idx]);
});
calcflux_x(fxmomy, momy, [&](ptrdiff_t idx) CCTK_ATTRIBUTE_ALWAYS_INLINE {
return CCTK_VEC_REAL::loadu(momy[idx]) * CCTK_VEC_REAL::loadu(velx[idx]);
});
calcflux_x(fxmomz, momz, [&](ptrdiff_t idx) CCTK_ATTRIBUTE_ALWAYS_INLINE {
return CCTK_VEC_REAL::loadu(momz[idx]) * CCTK_VEC_REAL::loadu(velx[idx]);
});
calcflux_x(fxetot, etot, [&](ptrdiff_t idx) CCTK_ATTRIBUTE_ALWAYS_INLINE {
return (CCTK_VEC_REAL::loadu(etot[idx]) +
CCTK_VEC_REAL::loadu(press[idx])) *
CCTK_VEC_REAL::loadu(velx[idx]);
});
const auto calcflux_y{[&](auto &fy_, const auto &u_, const auto &flux_y) {
for (int k = imin_fy[2]; k < imax_fy[2]; ++k) {
for (int j = imin_fy[1]; j < imax_fy[1]; ++j) {
ptrdiff_t idx_i0 = off + str[1] * j + str[2] * k;
ptrdiff_t idx_fy_i0 = off_fy + str_fy[1] * j + str_fy[2] * k;
for (int i = imin_fy[0]; i < imax_fy[0]; i += VS) {
ptrdiff_t idx = idx_i0 + i;
ptrdiff_t idx_fy = idx_fy_i0 + i;
CCTK_VEC_REAL lambda_m(1.0);
CCTK_VEC_REAL lambda_p(-1.0);
auto var_m = CCTK_VEC_REAL::loadu(u_[idx - str[1]]);
auto var_p = CCTK_VEC_REAL::loadu(u_[idx]);
auto flux_m = flux_y(idx - str[1]);
auto flux_p = flux_y(idx);
auto fy = llf(lambda_m, lambda_p, var_m, var_p, flux_m, flux_p);
fy.storeu_partial(fy_[idx_fy], i, i, imax_fy[0]);
}
}
}
}};
calcflux_y(fyrho, rho, [&](ptrdiff_t idx) CCTK_ATTRIBUTE_ALWAYS_INLINE {
return CCTK_VEC_REAL::loadu(rho[idx]) * CCTK_VEC_REAL::loadu(vely[idx]);
});
calcflux_y(fymomx, momx, [&](ptrdiff_t idx) CCTK_ATTRIBUTE_ALWAYS_INLINE {
return CCTK_VEC_REAL::loadu(momx[idx]) * CCTK_VEC_REAL::loadu(vely[idx]);
});
calcflux_y(fymomy, momy, [&](ptrdiff_t idx) CCTK_ATTRIBUTE_ALWAYS_INLINE {
return CCTK_VEC_REAL::loadu(momy[idx]) * CCTK_VEC_REAL::loadu(vely[idx]) +
CCTK_VEC_REAL::loadu(press[idx]);
});
calcflux_y(fymomz, momz, [&](ptrdiff_t idx) CCTK_ATTRIBUTE_ALWAYS_INLINE {
return CCTK_VEC_REAL::loadu(momz[idx]) * CCTK_VEC_REAL::loadu(vely[idx]);
});
calcflux_y(fyetot, etot, [&](ptrdiff_t idx) CCTK_ATTRIBUTE_ALWAYS_INLINE {
return (CCTK_VEC_REAL::loadu(etot[idx]) +
CCTK_VEC_REAL::loadu(press[idx])) *
CCTK_VEC_REAL::loadu(vely[idx]);
});
const auto calcflux_z{[&](auto &fz_, const auto &u_, const auto &flux_z) {
for (int k = imin_fz[2]; k < imax_fz[2]; ++k) {
for (int j = imin_fz[1]; j < imax_fz[1]; ++j) {
ptrdiff_t idx_i0 = off + str[1] * j + str[2] * k;
ptrdiff_t idx_fz_i0 = off_fz + str_fz[1] * j + str_fz[2] * k;
for (int i = imin_fz[0]; i < imax_fz[0]; i += VS) {
ptrdiff_t idx = idx_i0 + i;
ptrdiff_t idx_fz = idx_fz_i0 + i;
CCTK_VEC_REAL lambda_m(1.0);
CCTK_VEC_REAL lambda_p(-1.0);
auto var_m = CCTK_VEC_REAL::loadu(u_[idx - str[2]]);
auto var_p = CCTK_VEC_REAL::loadu(u_[idx]);
auto flux_m = flux_z(idx - str[2]);
auto flux_p = flux_z(idx);
auto fz = llf(lambda_m, lambda_p, var_m, var_p, flux_m, flux_p);
fz.storeu_partial(fz_[idx_fz], i, i, imax_fz[0]);
}
}
}
}};
calcflux_z(fzrho, rho, [&](ptrdiff_t idx) CCTK_ATTRIBUTE_ALWAYS_INLINE {
return CCTK_VEC_REAL::loadu(rho[idx]) * CCTK_VEC_REAL::loadu(velz[idx]);
});
calcflux_z(fzmomx, momx, [&](ptrdiff_t idx) CCTK_ATTRIBUTE_ALWAYS_INLINE {
return CCTK_VEC_REAL::loadu(momx[idx]) * CCTK_VEC_REAL::loadu(velz[idx]);
});
calcflux_z(fzmomy, momy, [&](ptrdiff_t idx) CCTK_ATTRIBUTE_ALWAYS_INLINE {
return CCTK_VEC_REAL::loadu(momy[idx]) * CCTK_VEC_REAL::loadu(velz[idx]);
});
calcflux_z(fzmomz, momz, [&](ptrdiff_t idx) CCTK_ATTRIBUTE_ALWAYS_INLINE {
return CCTK_VEC_REAL::loadu(momz[idx]) * CCTK_VEC_REAL::loadu(velz[idx]) +
CCTK_VEC_REAL::loadu(press[idx]);
});
calcflux_z(fzetot, etot, [&](ptrdiff_t idx) CCTK_ATTRIBUTE_ALWAYS_INLINE {
return (CCTK_VEC_REAL::loadu(etot[idx]) +
CCTK_VEC_REAL::loadu(press[idx])) *
CCTK_VEC_REAL::loadu(velz[idx]);
});
}
} // namespace HydroToyCarpetX
const Loop::GF3D<CCTK_REAL, 1, 1, 1> rho_(cctkGH, rho);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> momx_(cctkGH, momx);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> momy_(cctkGH, momy);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> momz_(cctkGH, momz);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> etot_(cctkGH, etot);
const Loop::GF3D1<CCTK_REAL> rho_(cctkGH, {1, 1, 1}, {1, 1, 1}, rho);
const Loop::GF3D1<CCTK_REAL> momx_(cctkGH, {1, 1, 1}, {1, 1, 1}, momx);
const Loop::GF3D1<CCTK_REAL> momy_(cctkGH, {1, 1, 1}, {1, 1, 1}, momy);
const Loop::GF3D1<CCTK_REAL> momz_(cctkGH, {1, 1, 1}, {1, 1, 1}, momz);
const Loop::GF3D1<CCTK_REAL> etot_(cctkGH, {1, 1, 1}, {1, 1, 1}, etot);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> rho_p_(cctkGH, rho_p);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momx_p_(cctkGH, momx_p);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momy_p_(cctkGH, momy_p);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momz_p_(cctkGH, momz_p);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> etot_p_(cctkGH, etot_p);
const Loop::GF3D1<const CCTK_REAL> rho_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
rho_p);
const Loop::GF3D1<const CCTK_REAL> momx_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
momx_p);
const Loop::GF3D1<const CCTK_REAL> momy_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
momy_p);
const Loop::GF3D1<const CCTK_REAL> momz_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
momz_p);
const Loop::GF3D1<const CCTK_REAL> etot_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
etot_p);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> rho_(cctkGH, rho);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> momx_(cctkGH, momx);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> momy_(cctkGH, momy);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> momz_(cctkGH, momz);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> etot_(cctkGH, etot);
const Loop::GF3D1<CCTK_REAL> rho_(cctkGH, {1, 1, 1}, {1, 1, 1}, rho);
const Loop::GF3D1<CCTK_REAL> momx_(cctkGH, {1, 1, 1}, {1, 1, 1}, momx);
const Loop::GF3D1<CCTK_REAL> momy_(cctkGH, {1, 1, 1}, {1, 1, 1}, momy);
const Loop::GF3D1<CCTK_REAL> momz_(cctkGH, {1, 1, 1}, {1, 1, 1}, momz);
const Loop::GF3D1<CCTK_REAL> etot_(cctkGH, {1, 1, 1}, {1, 1, 1}, etot);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> rho_(cctkGH, rho);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momx_(cctkGH, momx);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momy_(cctkGH, momy);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momz_(cctkGH, momz);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> etot_(cctkGH, etot);
const Loop::GF3D1<const CCTK_REAL> rho_(cctkGH, {1, 1, 1}, {1, 1, 1}, rho);
const Loop::GF3D1<const CCTK_REAL> momx_(cctkGH, {1, 1, 1}, {1, 1, 1}, momx);
const Loop::GF3D1<const CCTK_REAL> momy_(cctkGH, {1, 1, 1}, {1, 1, 1}, momy);
const Loop::GF3D1<const CCTK_REAL> momz_(cctkGH, {1, 1, 1}, {1, 1, 1}, momz);
const Loop::GF3D1<const CCTK_REAL> etot_(cctkGH, {1, 1, 1}, {1, 1, 1}, etot);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> press_(cctkGH, press);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> velx_(cctkGH, velx);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> vely_(cctkGH, vely);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> velz_(cctkGH, velz);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> eint_(cctkGH, eint);
const Loop::GF3D1<CCTK_REAL> press_(cctkGH, {1, 1, 1}, {1, 1, 1}, press);
const Loop::GF3D1<CCTK_REAL> velx_(cctkGH, {1, 1, 1}, {1, 1, 1}, velx);
const Loop::GF3D1<CCTK_REAL> vely_(cctkGH, {1, 1, 1}, {1, 1, 1}, vely);
const Loop::GF3D1<CCTK_REAL> velz_(cctkGH, {1, 1, 1}, {1, 1, 1}, velz);
const Loop::GF3D1<CCTK_REAL> eint_(cctkGH, {1, 1, 1}, {1, 1, 1}, eint);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> rho_(cctkGH, rho);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momx_(cctkGH, momx);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momy_(cctkGH, momy);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momz_(cctkGH, momz);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> etot_(cctkGH, etot);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> press_(cctkGH, press);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> velx_(cctkGH, velx);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> vely_(cctkGH, vely);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> velz_(cctkGH, velz);
#if 0
const Loop::GF3D<CCTK_REAL, 0, 1, 1> fxrho_(cctkGH, fxrho);
const Loop::GF3D<CCTK_REAL, 0, 1, 1> fxmomx_(cctkGH, fxmomx);
const Loop::GF3D<CCTK_REAL, 0, 1, 1> fxmomy_(cctkGH, fxmomy);
const Loop::GF3D<CCTK_REAL, 0, 1, 1> fxmomz_(cctkGH, fxmomz);
const Loop::GF3D<CCTK_REAL, 0, 1, 1> fxetot_(cctkGH, fxetot);
const Loop::GF3D1<const CCTK_REAL> rho_(cctkGH, {1, 1, 1}, {1, 1, 1}, rho);
const Loop::GF3D1<const CCTK_REAL> momx_(cctkGH, {1, 1, 1}, {1, 1, 1}, momx);
const Loop::GF3D1<const CCTK_REAL> momy_(cctkGH, {1, 1, 1}, {1, 1, 1}, momy);
const Loop::GF3D1<const CCTK_REAL> momz_(cctkGH, {1, 1, 1}, {1, 1, 1}, momz);
const Loop::GF3D1<const CCTK_REAL> etot_(cctkGH, {1, 1, 1}, {1, 1, 1}, etot);
const Loop::GF3D<CCTK_REAL, 1, 0, 1> fyrho_(cctkGH, fyrho);
const Loop::GF3D<CCTK_REAL, 1, 0, 1> fymomx_(cctkGH, fymomx);
const Loop::GF3D<CCTK_REAL, 1, 0, 1> fymomy_(cctkGH, fymomy);
const Loop::GF3D<CCTK_REAL, 1, 0, 1> fymomz_(cctkGH, fymomz);
const Loop::GF3D<CCTK_REAL, 1, 0, 1> fyetot_(cctkGH, fyetot);
const Loop::GF3D1<const CCTK_REAL> press_(cctkGH, {1, 1, 1}, {1, 1, 1},
press);
const Loop::GF3D1<const CCTK_REAL> velx_(cctkGH, {1, 1, 1}, {1, 1, 1}, velx);
const Loop::GF3D1<const CCTK_REAL> vely_(cctkGH, {1, 1, 1}, {1, 1, 1}, vely);
const Loop::GF3D1<const CCTK_REAL> velz_(cctkGH, {1, 1, 1}, {1, 1, 1}, velz);
const Loop::GF3D<CCTK_REAL, 1, 1, 0> fzrho_(cctkGH, fzrho);
const Loop::GF3D<CCTK_REAL, 1, 1, 0> fzmomx_(cctkGH, fzmomx);
const Loop::GF3D<CCTK_REAL, 1, 1, 0> fzmomy_(cctkGH, fzmomy);
const Loop::GF3D<CCTK_REAL, 1, 1, 0> fzmomz_(cctkGH, fzmomz);
const Loop::GF3D<CCTK_REAL, 1, 1, 0> fzetot_(cctkGH, fzetot);
#else
const array<int, dim> nghosts{0, 0, 0};
const Loop::GF3D1<CCTK_REAL> fxrho_(cctkGH, {0, 1, 1}, nghosts, fxrho);
const Loop::GF3D1<CCTK_REAL> fxmomx_(cctkGH, {0, 1, 1}, nghosts, fxmomx);
const Loop::GF3D1<CCTK_REAL> fxmomy_(cctkGH, {0, 1, 1}, nghosts, fxmomy);
const Loop::GF3D1<CCTK_REAL> fxmomz_(cctkGH, {0, 1, 1}, nghosts, fxmomz);
const Loop::GF3D1<CCTK_REAL> fxetot_(cctkGH, {0, 1, 1}, nghosts, fxetot);
const Loop::GF3D1<CCTK_REAL> fxrho_(cctkGH, {0, 1, 1}, {0, 0, 0}, fxrho);
const Loop::GF3D1<CCTK_REAL> fxmomx_(cctkGH, {0, 1, 1}, {0, 0, 0}, fxmomx);
const Loop::GF3D1<CCTK_REAL> fxmomy_(cctkGH, {0, 1, 1}, {0, 0, 0}, fxmomy);
const Loop::GF3D1<CCTK_REAL> fxmomz_(cctkGH, {0, 1, 1}, {0, 0, 0}, fxmomz);
const Loop::GF3D1<CCTK_REAL> fxetot_(cctkGH, {0, 1, 1}, {0, 0, 0}, fxetot);
const Loop::GF3D1<CCTK_REAL> fyrho_(cctkGH, {1, 0, 1}, nghosts, fyrho);
const Loop::GF3D1<CCTK_REAL> fymomx_(cctkGH, {1, 0, 1}, nghosts, fymomx);
const Loop::GF3D1<CCTK_REAL> fymomy_(cctkGH, {1, 0, 1}, nghosts, fymomy);
const Loop::GF3D1<CCTK_REAL> fymomz_(cctkGH, {1, 0, 1}, nghosts, fymomz);
const Loop::GF3D1<CCTK_REAL> fyetot_(cctkGH, {1, 0, 1}, nghosts, fyetot);
const Loop::GF3D1<CCTK_REAL> fyrho_(cctkGH, {1, 0, 1}, {0, 0, 0}, fyrho);
const Loop::GF3D1<CCTK_REAL> fymomx_(cctkGH, {1, 0, 1}, {0, 0, 0}, fymomx);
const Loop::GF3D1<CCTK_REAL> fymomy_(cctkGH, {1, 0, 1}, {0, 0, 0}, fymomy);
const Loop::GF3D1<CCTK_REAL> fymomz_(cctkGH, {1, 0, 1}, {0, 0, 0}, fymomz);
const Loop::GF3D1<CCTK_REAL> fyetot_(cctkGH, {1, 0, 1}, {0, 0, 0}, fyetot);
const Loop::GF3D1<CCTK_REAL> fzrho_(cctkGH, {1, 1, 0}, nghosts, fzrho);
const Loop::GF3D1<CCTK_REAL> fzmomx_(cctkGH, {1, 1, 0}, nghosts, fzmomx);
const Loop::GF3D1<CCTK_REAL> fzmomy_(cctkGH, {1, 1, 0}, nghosts, fzmomy);
const Loop::GF3D1<CCTK_REAL> fzmomz_(cctkGH, {1, 1, 0}, nghosts, fzmomz);
const Loop::GF3D1<CCTK_REAL> fzetot_(cctkGH, {1, 1, 0}, nghosts, fzetot);
#endif
const Loop::GF3D1<CCTK_REAL> fzrho_(cctkGH, {1, 1, 0}, {0, 0, 0}, fzrho);
const Loop::GF3D1<CCTK_REAL> fzmomx_(cctkGH, {1, 1, 0}, {0, 0, 0}, fzmomx);
const Loop::GF3D1<CCTK_REAL> fzmomy_(cctkGH, {1, 1, 0}, {0, 0, 0}, fzmomy);
const Loop::GF3D1<CCTK_REAL> fzmomz_(cctkGH, {1, 1, 0}, {0, 0, 0}, fzmomz);
const Loop::GF3D1<CCTK_REAL> fzetot_(cctkGH, {1, 1, 0}, {0, 0, 0}, fzetot);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> rho_p_(cctkGH, rho_p);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momx_p_(cctkGH, momx_p);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momy_p_(cctkGH, momy_p);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momz_p_(cctkGH, momz_p);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> etot_p_(cctkGH, etot_p);
#if 0
const Loop::GF3D<const CCTK_REAL, 0, 1, 1> fxrho_(cctkGH, fxrho);
const Loop::GF3D<const CCTK_REAL, 0, 1, 1> fxmomx_(cctkGH, fxmomx);
const Loop::GF3D<const CCTK_REAL, 0, 1, 1> fxmomy_(cctkGH, fxmomy);
const Loop::GF3D<const CCTK_REAL, 0, 1, 1> fxmomz_(cctkGH, fxmomz);
const Loop::GF3D<const CCTK_REAL, 0, 1, 1> fxetot_(cctkGH, fxetot);
const Loop::GF3D<const CCTK_REAL, 1, 0, 1> fyrho_(cctkGH, fyrho);
const Loop::GF3D<const CCTK_REAL, 1, 0, 1> fymomx_(cctkGH, fymomx);
const Loop::GF3D<const CCTK_REAL, 1, 0, 1> fymomy_(cctkGH, fymomy);
const Loop::GF3D<const CCTK_REAL, 1, 0, 1> fymomz_(cctkGH, fymomz);
const Loop::GF3D<const CCTK_REAL, 1, 0, 1> fyetot_(cctkGH, fyetot);
const Loop::GF3D1<const CCTK_REAL> rho_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
rho_p);
const Loop::GF3D1<const CCTK_REAL> momx_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
momx_p);
const Loop::GF3D1<const CCTK_REAL> momy_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
momy_p);
const Loop::GF3D1<const CCTK_REAL> momz_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
momz_p);
const Loop::GF3D1<const CCTK_REAL> etot_p_(cctkGH, {1, 1, 1}, {1, 1, 1},
etot_p);
const Loop::GF3D<const CCTK_REAL, 1, 1, 0> fzrho_(cctkGH, fzrho);
const Loop::GF3D<const CCTK_REAL, 1, 1, 0> fzmomx_(cctkGH, fzmomx);
const Loop::GF3D<const CCTK_REAL, 1, 1, 0> fzmomy_(cctkGH, fzmomy);
const Loop::GF3D<const CCTK_REAL, 1, 1, 0> fzmomz_(cctkGH, fzmomz);
const Loop::GF3D<const CCTK_REAL, 1, 1, 0> fzetot_(cctkGH, fzetot);
#else
const array<int, dim> nghosts{0, 0, 0};
const Loop::GF3D1<const CCTK_REAL> fxrho_(cctkGH, {0, 1, 1}, nghosts, fxrho);
const Loop::GF3D1<const CCTK_REAL> fxmomx_(cctkGH, {0, 1, 1}, nghosts,
const Loop::GF3D1<const CCTK_REAL> fxrho_(cctkGH, {0, 1, 1}, {0, 0, 0},
fxrho);
const Loop::GF3D1<const CCTK_REAL> fxmomx_(cctkGH, {0, 1, 1}, {0, 0, 0},
const Loop::GF3D1<const CCTK_REAL> fyrho_(cctkGH, {1, 0, 1}, nghosts, fyrho);
const Loop::GF3D1<const CCTK_REAL> fymomx_(cctkGH, {1, 0, 1}, nghosts,
const Loop::GF3D1<const CCTK_REAL> fyrho_(cctkGH, {1, 0, 1}, {0, 0, 0},
fyrho);
const Loop::GF3D1<const CCTK_REAL> fymomx_(cctkGH, {1, 0, 1}, {0, 0, 0},
const Loop::GF3D1<const CCTK_REAL> fzrho_(cctkGH, {1, 1, 0}, nghosts, fzrho);
const Loop::GF3D1<const CCTK_REAL> fzmomx_(cctkGH, {1, 1, 0}, nghosts,
const Loop::GF3D1<const CCTK_REAL> fzrho_(cctkGH, {1, 1, 0}, {0, 0, 0},
fzrho);
const Loop::GF3D1<const CCTK_REAL> fzmomx_(cctkGH, {1, 1, 0}, {0, 0, 0},
const Loop::GF3D<CCTK_REAL, 1, 1, 1> rho_(cctkGH, rho);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> momx_(cctkGH, momx);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> momy_(cctkGH, momy);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> momz_(cctkGH, momz);
const Loop::GF3D<CCTK_REAL, 1, 1, 1> etot_(cctkGH, etot);
const Loop::GF3D1<CCTK_REAL> rho_(cctkGH, {1, 1, 1}, {1, 1, 1}, rho);
const Loop::GF3D1<CCTK_REAL> momx_(cctkGH, {1, 1, 1}, {1, 1, 1}, momx);
const Loop::GF3D1<CCTK_REAL> momy_(cctkGH, {1, 1, 1}, {1, 1, 1}, momy);
const Loop::GF3D1<CCTK_REAL> momz_(cctkGH, {1, 1, 1}, {1, 1, 1}, momz);
const Loop::GF3D1<CCTK_REAL> etot_(cctkGH, {1, 1, 1}, {1, 1, 1}, etot);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> rho_(cctkGH, rho);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momx_(cctkGH, momx);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momy_(cctkGH, momy);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momz_(cctkGH, momz);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> etot_(cctkGH, etot);
const Loop::GF3D1<CCTK_REAL> rho_(cctkGH, {1, 1, 1}, {1, 1, 1}, rho);
const Loop::GF3D1<CCTK_REAL> momx_(cctkGH, {1, 1, 1}, {1, 1, 1}, momx);
const Loop::GF3D1<CCTK_REAL> momy_(cctkGH, {1, 1, 1}, {1, 1, 1}, momy);
const Loop::GF3D1<CCTK_REAL> momz_(cctkGH, {1, 1, 1}, {1, 1, 1}, momz);
const Loop::GF3D1<CCTK_REAL> etot_(cctkGH, {1, 1, 1}, {1, 1, 1}, etot);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> rho_(cctkGH, rho);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momx_(cctkGH, momx);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momy_(cctkGH, momy);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> momz_(cctkGH, momz);
const Loop::GF3D<const CCTK_REAL, 1, 1, 1> etot_(cctkGH, etot);
const Loop::GF3D1<const CCTK_REAL> rho_(cctkGH, {1, 1, 1}, {1, 1, 1}, rho);
const Loop::GF3D1<const CCTK_REAL> momx_(cctkGH, {1, 1, 1}, {1, 1, 1}, momx);
const Loop::GF3D1<const CCTK_REAL> momy_(cctkGH, {1, 1, 1}, {1, 1, 1}, momy);
const Loop::GF3D1<const CCTK_REAL> momz_(cctkGH, {1, 1, 1}, {1, 1, 1}, momz);
const Loop::GF3D1<const CCTK_REAL> etot_(cctkGH, {1, 1, 1}, {1, 1, 1}, etot);
#if 0
const Loop::GF3D<const CCTK_REAL, 0, 1, 1> fxrho_(cctkGH, fxrho);
const Loop::GF3D<const CCTK_REAL, 0, 1, 1> fxmomx_(cctkGH, fxmomx);
const Loop::GF3D<const CCTK_REAL, 0, 1, 1> fxmomy_(cctkGH, fxmomy);
const Loop::GF3D<const CCTK_REAL, 0, 1, 1> fxmomz_(cctkGH, fxmomz);
const Loop::GF3D<const CCTK_REAL, 0, 1, 1> fxetot_(cctkGH, fxetot);
#else
const array<int, dim> nghosts{0, 0, 0};
const Loop::GF3D1<const CCTK_REAL> fxrho_(cctkGH, {0, 1, 1}, nghosts, fxrho);
const Loop::GF3D1<const CCTK_REAL> fxmomx_(cctkGH, {0, 1, 1}, nghosts,
const Loop::GF3D1<const CCTK_REAL> fxrho_(cctkGH, {0, 1, 1}, {0, 0, 0},
fxrho);
const Loop::GF3D1<const CCTK_REAL> fxmomx_(cctkGH, {0, 1, 1}, {0, 0, 0},
#include <loop.hxx>
#include <cctk.h>
#include <cctk_Arguments_Checked.h>
#include <cctk_Parameters.h>
#include <nsimd/nsimd-all.hpp>
#include <algorithm>
#include <array>
#include <cassert>
#include <cstdlib>
namespace HydroToyCarpetX {
using namespace std;
constexpr int dim = 3;
namespace {
template <typename T> T pow2(T x) { return x * x; }
} // namespace
extern "C" void HydroToyCarpetX_Pressure(CCTK_ARGUMENTS) {
DECLARE_CCTK_ARGUMENTS_HydroToyCarpetX_Pressure;
DECLARE_CCTK_PARAMETERS;
const Loop::vect<int, dim> zero{{0, 0, 0}};
const Loop::vect<int, dim> lsh{{cctk_lsh[0], cctk_lsh[1], cctk_lsh[2]}};
array<CCTK_INT, dim> tmin_, tmax_;
GetTileExtent(cctkGH, tmin_.data(), tmax_.data());
const Loop::vect<int, dim> tmin(tmin_);
const Loop::vect<int, dim> tmax(tmax_);
const Loop::vect<int, dim> imin = max(tmin, zero);
const Loop::vect<int, dim> imax = min(tmax, lsh);
// regular, cell centred variables
const Loop::vect<int, dim> ash{{cctk_ash[0], cctk_ash[1], cctk_ash[2]}};
const auto mkstr{[](const Loop::vect<int, dim> &ash) {
Loop::vect<ptrdiff_t, dim> str;
ptrdiff_t s = 1;
for (int d = 0; d < dim; ++d) {
str[d] = s;
s *= ash[d];
}
return str;
}};
const Loop::vect<ptrdiff_t, dim> str = mkstr(ash);
constexpr ptrdiff_t off = 0;
typedef CCTK_INT8 CCTK_INTEGER;
static_assert(sizeof(CCTK_INTEGER) == sizeof(CCTK_REAL), "");
typedef nsimd::pack<CCTK_REAL> CCTK_VEC_REAL;
typedef nsimd::pack<CCTK_INTEGER> CCTK_VEC_INTEGER;
typedef nsimd::packl<CCTK_REAL> CCTK_VEC_BOOLEAN;
constexpr int VS = sizeof(CCTK_VEC_REAL) / sizeof(CCTK_REAL);
// Equation of state: p = (gamma - 1) e
// vel^j = delta^j_i mom_i / rho
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
ptrdiff_t idx_i0 = off + str[1] * j + str[2] * k;
for (int i = imin[0]; i < imax[0]; i += VS) {
ptrdiff_t idx = idx_i0 + i;
auto rho_ = nsimd::loadu<CCTK_VEC_REAL>(&rho[idx]);
auto momx_ = nsimd::loadu<CCTK_VEC_REAL>(&momx[idx]);
auto momy_ = nsimd::loadu<CCTK_VEC_REAL>(&momy[idx]);
auto momz_ = nsimd::loadu<CCTK_VEC_REAL>(&momz[idx]);
auto etot_ = nsimd::loadu<CCTK_VEC_REAL>(&etot[idx]);
auto rho_inv = nsimd::set1<CCTK_VEC_REAL>(1.0) / rho_;
auto ekin = nsimd::set1<CCTK_VEC_REAL>(0.5) * rho_inv *
sqrt(pow2(momx_) + pow2(momy_) + pow2(momz_));
auto eint_ = etot_ - ekin;
auto press_ = nsimd::set1<CCTK_VEC_REAL>(gamma - 1) * eint_;
auto velx_ = rho_inv * momx_;
auto vely_ = rho_inv * momy_;
auto velz_ = rho_inv * momz_;
if (i + VS >= imax[0]) {
auto iota = nsimd::iota<CCTK_VEC_INTEGER>();
auto imask = iota < nsimd::set1<CCTK_VEC_INTEGER>(
CCTK_INTEGER(imax[0] - (i + VS)));
auto mask = reinterpretl(CCTK_VEC_BOOLEAN(), imask);
nsimd::storeu_masked(&press[idx], press_, mask);
nsimd::storeu_masked(&velx[idx], velx_, mask);
nsimd::storeu_masked(&vely[idx], vely_, mask);
nsimd::storeu_masked(&velz[idx], velz_, mask);
nsimd::storeu_masked(&eint[idx], eint_, mask);
break;
}
nsimd::storeu(&press[idx], press_);
nsimd::storeu(&velx[idx], velx_);
nsimd::storeu(&vely[idx], vely_);
nsimd::storeu(&velz[idx], velz_);
nsimd::storeu(&eint[idx], eint_);
}
}
}
}
} // namespace HydroToyCarpetX
#include <loop.hxx>
#include <vectors.h>
#include <cctk.h>
#include <cctk_Arguments_Checked.h>
#include <cctk_Parameters.h>
#include <algorithm>
#include <array>
#include <cassert>
#include <cstdlib>
namespace HydroToyCarpetX {
using namespace std;
constexpr int dim = 3;
namespace {
template <typename T> T pow2(T x) { return x * x; }
} // namespace
extern "C" void HydroToyCarpetX_Pressure(CCTK_ARGUMENTS) {
DECLARE_CCTK_ARGUMENTS_HydroToyCarpetX_Pressure;
DECLARE_CCTK_PARAMETERS;
const Loop::vect<int, dim> zero{{0, 0, 0}};
const Loop::vect<int, dim> lsh{{cctk_lsh[0], cctk_lsh[1], cctk_lsh[2]}};
array<CCTK_INT, dim> tmin_, tmax_;
GetTileExtent(cctkGH, tmin_.data(), tmax_.data());
const Loop::vect<int, dim> tmin(tmin_);
const Loop::vect<int, dim> tmax(tmax_);
const Loop::vect<int, dim> imin = max(tmin, zero);
const Loop::vect<int, dim> imax = min(tmax, lsh);
// regular, cell centred variables
const Loop::vect<int, dim> ash{{cctk_ash[0], cctk_ash[1], cctk_ash[2]}};
const auto mkstr{[](const Loop::vect<int, dim> &ash) {
Loop::vect<ptrdiff_t, dim> str;
ptrdiff_t s = 1;
for (int d = 0; d < dim; ++d) {
str[d] = s;
s *= ash[d];
}
return str;
}};
const Loop::vect<ptrdiff_t, dim> str = mkstr(ash);
constexpr ptrdiff_t off = 0;
typedef vectype<CCTK_REAL> CCTK_VEC_REAL;
constexpr int VS = vecprops<CCTK_REAL>::size();
// assert((imax[0] - imin[0]) % VS == 0);
// Equation of state: p = (gamma - 1) e
// vel^j = delta^j_i mom_i / rho
for (int k = imin[2]; k < imax[2]; ++k) {
for (int j = imin[1]; j < imax[1]; ++j) {
ptrdiff_t idx_i0 = off + str[1] * j + str[2] * k;
for (int i = imin[0]; i < imax[0]; i += VS) {
ptrdiff_t idx = idx_i0 + i;
auto rho_ = CCTK_VEC_REAL::loadu(rho[idx]);
auto momx_ = CCTK_VEC_REAL::loadu(momx[idx]);
auto momy_ = CCTK_VEC_REAL::loadu(momy[idx]);
auto momz_ = CCTK_VEC_REAL::loadu(momz[idx]);
auto etot_ = CCTK_VEC_REAL::loadu(etot[idx]);
auto rho_inv = CCTK_VEC_REAL::set1(1.0) / rho_;
auto ekin = CCTK_VEC_REAL::set1(0.5) * rho_inv *
sqrt(pow2(momx_) + pow2(momy_) + pow2(momz_));
auto eint_ = etot_ - ekin;
auto press_ = CCTK_VEC_REAL::set1(gamma - 1) * eint_;
auto velx_ = rho_inv * momx_;
auto vely_ = rho_inv * momy_;
auto velz_ = rho_inv * momz_;
press_.storeu_partial(press[idx], i, i, imax[0]);
velx_.storeu_partial(velx[idx], i, i, imax[0]);
vely_.storeu_partial(vely[idx], i, i, imax[0]);
velz_.storeu_partial(velz[idx], i, i, imax[0]);
eint_.storeu_partial(eint[idx], i, i, imax[0]);
}
}
}
}
} // namespace HydroToyCarpetX