8 #ifndef CHARGEDEPOSITION_H_
9 #define CHARGEDEPOSITION_H_
40 template <
int depos_order>
42 const amrex::ParticleReal *
const wp,
43 const int *
const ion_lev,
45 const long np_to_depose,
46 const std::array<amrex::Real,3>&
dx,
47 const std::array<amrex::Real, 3> xyzmin,
50 const int n_rz_azimuthal_modes,
52 const long load_balance_costs_update_algo)
54 using namespace amrex;
56 #if !defined(AMREX_USE_GPU)
62 const bool do_ionization = ion_lev;
63 const amrex::Real dzi = 1.0_rt/
dx[2];
64 #if defined(WARPX_DIM_1D_Z)
65 const amrex::Real invvol = dzi;
67 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
68 const amrex::Real dxi = 1.0_rt/
dx[0];
69 const amrex::Real invvol = dxi*dzi;
70 #elif defined(WARPX_DIM_3D)
71 const amrex::Real dxi = 1.0_rt/
dx[0];
72 const amrex::Real dyi = 1.0_rt/
dx[1];
73 const amrex::Real invvol = dxi*dyi*dzi;
76 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
77 const amrex::Real xmin = xyzmin[0];
79 #if defined(WARPX_DIM_3D)
80 const amrex::Real ymin = xyzmin[1];
82 const amrex::Real zmin = xyzmin[2];
91 #if defined(WARPX_USE_GPUCLOCK)
92 amrex::Real* cost_real =
nullptr;
101 #if defined(WARPX_USE_GPUCLOCK)
107 amrex::Real wq = q*wp[ip]*invvol;
112 amrex::ParticleReal xp, yp, zp;
113 GetPosition(ip, xp, yp, zp);
117 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
120 #if defined(WARPX_DIM_RZ)
121 const amrex::Real rp = std::sqrt(xp*xp + yp*yp);
122 amrex::Real costheta;
123 amrex::Real sintheta;
132 const amrex::Real x = (rp - xmin)*dxi;
134 const amrex::Real x = (xp - xmin)*dxi;
139 amrex::Real sx[depos_order + 1] = {0._rt};
141 if (rho_type[0] ==
NODE) {
142 i = compute_shape_factor(sx, x);
143 }
else if (rho_type[0] == CELL) {
144 i = compute_shape_factor(sx, x - 0.5_rt);
147 #if defined(WARPX_DIM_3D)
149 const amrex::Real
y = (yp - ymin)*dyi;
150 amrex::Real sy[depos_order + 1] = {0._rt};
152 if (rho_type[1] ==
NODE) {
153 j = compute_shape_factor(sy,
y);
154 }
else if (rho_type[1] == CELL) {
155 j = compute_shape_factor(sy,
y - 0.5_rt);
159 const amrex::Real
z = (zp - zmin)*dzi;
160 amrex::Real sz[depos_order + 1] = {0._rt};
162 if (rho_type[WARPX_ZINDEX] ==
NODE) {
163 k = compute_shape_factor(sz,
z);
164 }
else if (rho_type[WARPX_ZINDEX] == CELL) {
165 k = compute_shape_factor(sz,
z - 0.5_rt);
169 #if defined(WARPX_DIM_1D_Z)
170 for (
int iz=0; iz<=depos_order; iz++){
172 &rho_arr(lo.
x+k+iz, 0, 0, 0),
176 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
177 for (
int iz=0; iz<=depos_order; iz++){
178 for (
int ix=0; ix<=depos_order; ix++){
180 &rho_arr(lo.
x+
i+ix, lo.
y+k+iz, 0, 0),
182 #if defined(WARPX_DIM_RZ)
184 for (
int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
193 #elif defined(WARPX_DIM_3D)
194 for (
int iz=0; iz<=depos_order; iz++){
195 for (
int iy=0; iy<=depos_order; iy++){
196 for (
int ix=0; ix<=depos_order; ix++){
198 &rho_arr(lo.
x+
i+ix, lo.
y+j+iy, lo.
z+k+iz),
199 sx[ix]*sy[iy]*sz[iz]*wq);
206 #if defined(WARPX_USE_GPUCLOCK)
236 template <
int depos_order>
238 const amrex::ParticleReal *
const wp,
239 const int *
const ion_lev,
242 const long np_to_deposit,
243 const std::array<amrex::Real,3>&
dx,
244 const std::array<amrex::Real, 3> xyzmin,
247 const int n_rz_azimuthal_modes,
249 const long load_balance_costs_update_algo,
255 using namespace amrex;
259 #if !defined(AMREX_USE_GPU)
260 amrex::ignore_unused(ix_type, cost, load_balance_costs_update_algo, a_bins, box, geom, a_tbox_max_size);
265 const bool do_ionization = ion_lev;
266 const amrex::Real dzi = 1.0_rt/
dx[2];
267 #if defined(WARPX_DIM_1D_Z)
268 const amrex::Real invvol = dzi;
270 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
271 const amrex::Real dxi = 1.0_rt/
dx[0];
272 const amrex::Real invvol = dxi*dzi;
273 #elif defined(WARPX_DIM_3D)
274 const amrex::Real dxi = 1.0_rt/
dx[0];
275 const amrex::Real dyi = 1.0_rt/
dx[1];
276 const amrex::Real invvol = dxi*dyi*dzi;
279 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
280 const amrex::Real xmin = xyzmin[0];
282 #if defined(WARPX_DIM_3D)
283 const amrex::Real ymin = xyzmin[1];
285 const amrex::Real zmin = xyzmin[2];
288 auto rho_box = rho_fab.
box();
295 #if defined(WARPX_USE_GPUCLOCK)
296 amrex::Real* cost_real =
nullptr;
303 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
306 const auto domain = geom.
Domain();
311 sample_tbox_x.
grow(depos_order);
313 const auto npts = sample_tbox_x.
numPts();
315 const int nblocks = a_bins.
numBins();
317 const int threads_per_block = 256;
319 std::size_t shared_mem_bytes = npts*
sizeof(amrex::Real);
325 "Tile size too big for GPU shared memory charge deposition");
328 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
340 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
341 const int bin_id = blockIdx.x;
342 const unsigned int bin_start = offsets_ptr[bin_id];
343 const unsigned int bin_stop = offsets_ptr[bin_id+1];
345 if (bin_start == bin_stop) {
return; }
349 ParticleReal xp, yp, zp;
350 GetPosition(permutation[bin_start], xp, yp, zp);
351 #if defined(WARPX_DIM_3D)
352 IntVect iv =
IntVect(
int(amrex::Math::floor((xp-plo[0])*dxiarr[0])),
353 int(amrex::Math::floor((yp-plo[1])*dxiarr[1])),
354 int(amrex::Math::floor((zp-plo[2])*dxiarr[2])));
355 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
357 int(amrex::Math::floor((xp-plo[0])*dxiarr[0])),
358 int(amrex::Math::floor((zp-plo[1])*dxiarr[1])));
359 #elif defined(WARPX_DIM_1D_Z)
360 IntVect iv =
IntVect(
int(amrex::Math::floor((zp-plo[0])*dxiarr[0])));
362 iv += domain.smallEnd();
367 tbx.
grow(depos_order);
370 amrex::Real*
const shared = gsm.
dataPtr();
375 volatile amrex::Real* vs = shared;
376 for (
int i = threadIdx.x;
i < tbx.
numPts();
i += blockDim.x) {
384 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
386 for (
unsigned int ip_orig = bin_start + threadIdx.x; ip_orig < bin_stop; ip_orig += blockDim.x)
389 unsigned int ip = permutation[ip_orig];
391 #if defined(WARPX_USE_GPUCLOCK)
397 amrex::Real wq = q*wp[ip]*invvol;
402 amrex::ParticleReal xp, yp, zp;
403 GetPosition(ip, xp, yp, zp);
407 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ) || defined(WARPX_DIM_3D)
410 #if defined(WARPX_DIM_RZ)
411 const amrex::Real rp = std::sqrt(xp*xp + yp*yp);
412 amrex::Real costheta;
413 amrex::Real sintheta;
422 const amrex::Real x = (rp - xmin)*dxi;
424 const amrex::Real x = (xp - xmin)*dxi;
429 amrex::Real sx[depos_order + 1] = {0._rt};
431 if (rho_type[0] ==
NODE) {
432 i = compute_shape_factor(sx, x);
433 }
else if (rho_type[0] == CELL) {
434 i = compute_shape_factor(sx, x - 0.5_rt);
437 #if defined(WARPX_DIM_3D)
439 const amrex::Real
y = (yp - ymin)*dyi;
440 amrex::Real sy[depos_order + 1] = {0._rt};
442 if (rho_type[1] ==
NODE) {
443 j = compute_shape_factor(sy,
y);
444 }
else if (rho_type[1] == CELL) {
445 j = compute_shape_factor(sy,
y - 0.5_rt);
449 const amrex::Real
z = (zp - zmin)*dzi;
450 amrex::Real sz[depos_order + 1] = {0._rt};
452 if (rho_type[WARPX_ZINDEX] ==
NODE) {
453 k = compute_shape_factor(sz,
z);
454 }
else if (rho_type[WARPX_ZINDEX] == CELL) {
455 k = compute_shape_factor(sz,
z - 0.5_rt);
459 #if defined(WARPX_DIM_1D_Z)
460 for (
int iz=0; iz<=depos_order; iz++){
462 &buf(lo.
x+k+iz, 0, 0, 0),
466 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
467 for (
int iz=0; iz<=depos_order; iz++){
468 for (
int ix=0; ix<=depos_order; ix++){
470 &buf(lo.
x+
i+ix, lo.
y+k+iz, 0, 0),
472 #if defined(WARPX_DIM_RZ)
474 for (
int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
483 #elif defined(WARPX_DIM_3D)
484 for (
int iz=0; iz<=depos_order; iz++){
485 for (
int iy=0; iy<=depos_order; iy++){
486 for (
int ix=0; ix<=depos_order; ix++){
488 &buf(lo.
x+
i+ix, lo.
y+j+iy, lo.
z+k+iz),
489 sx[ix]*sy[iy]*sz[iz]*wq);
496 #if defined(AMREX_USE_CUDA) || defined(AMREX_USE_HIP)
499 addLocalToGlobal(tbx, rho_arr, buf);
503 #if defined(WARPX_USE_GPUCLOCK)
#define AMREX_D_DECL(a, b, c)
void doChargeDepositionSharedShapeN(const GetParticlePosition &GetPosition, const amrex::ParticleReal *const wp, const int *const ion_lev, amrex::FArrayBox &rho_fab, const amrex::IntVect &ix_type, const long np_to_deposit, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes, amrex::Real *cost, const long load_balance_costs_update_algo, const amrex::DenseBins< WarpXParticleContainer::ParticleType > &a_bins, const amrex::Box &box, const amrex::Geometry &geom, const amrex::IntVect &a_tbox_max_size)
Definition: ChargeDeposition.H:237
void doChargeDepositionShapeN(const GetParticlePosition &GetPosition, const amrex::ParticleReal *const wp, const int *const ion_lev, amrex::FArrayBox &rho_fab, const long np_to_depose, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes, amrex::Real *cost, const long load_balance_costs_update_algo)
Definition: ChargeDeposition.H:41
#define WARPX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: TextMsg.H:13
static amrex::IntVect shared_tilesize
tileSize to use for shared current deposition operations
Definition: WarpX.H:370
Defines a timer object to be used on GPU; measures summed thread cycles.
Definition: KernelTimer.H:27
virtual void free(void *pt)=0
virtual void * alloc(std::size_t sz)=0
const Box & box() const noexcept
AMREX_FORCE_INLINE Array4< T const > array() const noexcept
AMREX_GPU_HOST_DEVICE BoxND & grow(int i) noexcept
AMREX_GPU_HOST_DEVICE IntVectND< dim > type() const noexcept
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
GpuArray< Real, AMREX_SPACEDIM > InvCellSizeArray() const noexcept
index_type * offsetsPtr() noexcept
index_type * permutationPtr() noexcept
Long numBins() const noexcept
const Box & Domain() const noexcept
GpuArray< Real, AMREX_SPACEDIM > ProbLoArray() const noexcept
static std::size_t sharedMemPerBlock() noexcept
def y
Definition: Excitation_Flag_Generator.py:76
def z
Definition: Excitation_Flag_Generator.py:77
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void AddNoRet(T *sum, T value) noexcept
void streamSynchronize() noexcept
gpuStream_t gpuStream() noexcept
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > convert(const BoxND< dim > &b, const IntVectND< dim > &typ) noexcept
void launch(T const &n, L &&f) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 end(BoxND< dim > const &box) noexcept
Arena * The_Managed_Arena()
IntVectND< AMREX_SPACEDIM > IntVect
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 begin(BoxND< dim > const &box) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int getTileIndex(const IntVect &iv, const Box &box, const bool a_do_tiling, const IntVect &a_tile_size, Box &tbx)
i
Definition: check_interp_points_and_weights.py:174
int dx
Definition: stencil.py:436
Definition: ShapeFactors.H:27
Functor that can be used to extract the positions of the macroparticles inside a ParallelFor kernel.
Definition: GetAndSetPosition.H:53
@ GpuClock
Definition: WarpXAlgorithmSelection.H:138
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr T real() const noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr T imag() const noexcept
AMREX_GPU_DEVICE T * dataPtr() noexcept