8 #ifndef CURRENTDEPOSITION_H_
9 #define CURRENTDEPOSITION_H_
26 using namespace amrex::literals;
52 template <
int depos_order>
54 const amrex::ParticleReal *
const wp,
55 const amrex::ParticleReal *
const uxp,
56 const amrex::ParticleReal *
const uyp,
57 const amrex::ParticleReal *
const uzp,
58 const int *
const ion_lev,
62 const long np_to_depose,
63 const amrex::Real relative_time,
64 const std::array<amrex::Real,3>&
dx,
65 const std::array<amrex::Real,3>& xyzmin,
68 const int n_rz_azimuthal_modes,
70 const long load_balance_costs_update_algo)
72 #if !defined(WARPX_DIM_RZ)
76 #if !defined(AMREX_USE_GPU)
82 const bool do_ionization = ion_lev;
83 const amrex::Real dzi = 1.0_rt/
dx[2];
84 #if defined(WARPX_DIM_1D_Z)
85 const amrex::Real invvol = dzi;
87 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
88 const amrex::Real dxi = 1.0_rt/
dx[0];
89 const amrex::Real invvol = dxi*dzi;
90 #elif defined(WARPX_DIM_3D)
91 const amrex::Real dxi = 1.0_rt/
dx[0];
92 const amrex::Real dyi = 1.0_rt/
dx[1];
93 const amrex::Real invvol = dxi*dyi*dzi;
96 #if (AMREX_SPACEDIM >= 2)
97 const amrex::Real xmin = xyzmin[0];
99 #if defined(WARPX_DIM_3D)
100 const amrex::Real ymin = xyzmin[1];
102 const amrex::Real zmin = xyzmin[2];
113 constexpr
int zdir = WARPX_ZINDEX;
118 #if defined(WARPX_USE_GPUCLOCK)
119 amrex::Real* cost_real =
nullptr;
128 #if defined(WARPX_USE_GPUCLOCK)
135 const amrex::Real gaminv = 1.0_rt/std::sqrt(1.0_rt + uxp[ip]*uxp[ip]*clightsq
136 + uyp[ip]*uyp[ip]*clightsq
137 + uzp[ip]*uzp[ip]*clightsq);
138 amrex::Real wq = q*wp[ip];
143 amrex::ParticleReal xp, yp, zp;
144 GetPosition(ip, xp, yp, zp);
146 const amrex::Real vx = uxp[ip]*gaminv;
147 const amrex::Real vy = uyp[ip]*gaminv;
148 const amrex::Real vz = uzp[ip]*gaminv;
150 #if defined(WARPX_DIM_RZ)
153 const amrex::Real xpmid = xp + relative_time*vx;
154 const amrex::Real ypmid = yp + relative_time*vy;
155 const amrex::Real rpmid = std::sqrt(xpmid*xpmid + ypmid*ypmid);
156 amrex::Real costheta;
157 amrex::Real sintheta;
159 costheta = xpmid/rpmid;
160 sintheta = ypmid/rpmid;
166 const amrex::Real wqx = wq*invvol*(+vx*costheta + vy*sintheta);
167 const amrex::Real wqy = wq*invvol*(-vx*sintheta + vy*costheta);
169 const amrex::Real wqx = wq*invvol*vx;
170 const amrex::Real wqy = wq*invvol*vy;
172 const amrex::Real wqz = wq*invvol*vz;
176 #if (AMREX_SPACEDIM >= 2)
179 #if defined(WARPX_DIM_RZ)
181 const double xmid = (rpmid - xmin)*dxi;
183 const double xmid = ((xp - xmin) + relative_time*vx)*dxi;
190 double sx_node[depos_order + 1] = {0.};
191 double sx_cell[depos_order + 1] = {0.};
194 if (jx_type[0] ==
NODE || jy_type[0] ==
NODE || jz_type[0] ==
NODE) {
195 j_node = compute_shape_factor(sx_node, xmid);
197 if (jx_type[0] == CELL || jy_type[0] == CELL || jz_type[0] == CELL) {
198 j_cell = compute_shape_factor(sx_cell, xmid - 0.5);
201 amrex::Real sx_jx[depos_order + 1] = {0._rt};
202 amrex::Real sx_jy[depos_order + 1] = {0._rt};
203 amrex::Real sx_jz[depos_order + 1] = {0._rt};
204 for (
int ix=0; ix<=depos_order; ix++)
206 sx_jx[ix] = ((jx_type[0] ==
NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
207 sx_jy[ix] = ((jy_type[0] ==
NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
208 sx_jz[ix] = ((jz_type[0] ==
NODE) ? amrex::Real(sx_node[ix]) : amrex::Real(sx_cell[ix]));
211 int const j_jx = ((jx_type[0] ==
NODE) ? j_node : j_cell);
212 int const j_jy = ((jy_type[0] ==
NODE) ? j_node : j_cell);
213 int const j_jz = ((jz_type[0] ==
NODE) ? j_node : j_cell);
216 #if defined(WARPX_DIM_3D)
219 const double ymid = ((yp - ymin) + relative_time*vy)*dyi;
220 double sy_node[depos_order + 1] = {0.};
221 double sy_cell[depos_order + 1] = {0.};
224 if (jx_type[1] ==
NODE || jy_type[1] ==
NODE || jz_type[1] ==
NODE) {
225 k_node = compute_shape_factor(sy_node, ymid);
227 if (jx_type[1] == CELL || jy_type[1] == CELL || jz_type[1] == CELL) {
228 k_cell = compute_shape_factor(sy_cell, ymid - 0.5);
230 amrex::Real sy_jx[depos_order + 1] = {0._rt};
231 amrex::Real sy_jy[depos_order + 1] = {0._rt};
232 amrex::Real sy_jz[depos_order + 1] = {0._rt};
233 for (
int iy=0; iy<=depos_order; iy++)
235 sy_jx[iy] = ((jx_type[1] ==
NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
236 sy_jy[iy] = ((jy_type[1] ==
NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
237 sy_jz[iy] = ((jz_type[1] ==
NODE) ? amrex::Real(sy_node[iy]) : amrex::Real(sy_cell[iy]));
239 int const k_jx = ((jx_type[1] ==
NODE) ? k_node : k_cell);
240 int const k_jy = ((jy_type[1] ==
NODE) ? k_node : k_cell);
241 int const k_jz = ((jz_type[1] ==
NODE) ? k_node : k_cell);
246 const double zmid = ((zp - zmin) + relative_time*vz)*dzi;
247 double sz_node[depos_order + 1] = {0.};
248 double sz_cell[depos_order + 1] = {0.};
251 if (jx_type[zdir] ==
NODE || jy_type[zdir] ==
NODE || jz_type[zdir] ==
NODE) {
252 l_node = compute_shape_factor(sz_node, zmid);
254 if (jx_type[zdir] == CELL || jy_type[zdir] == CELL || jz_type[zdir] == CELL) {
255 l_cell = compute_shape_factor(sz_cell, zmid - 0.5);
257 amrex::Real sz_jx[depos_order + 1] = {0._rt};
258 amrex::Real sz_jy[depos_order + 1] = {0._rt};
259 amrex::Real sz_jz[depos_order + 1] = {0._rt};
260 for (
int iz=0; iz<=depos_order; iz++)
262 sz_jx[iz] = ((jx_type[zdir] ==
NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
263 sz_jy[iz] = ((jy_type[zdir] ==
NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
264 sz_jz[iz] = ((jz_type[zdir] ==
NODE) ? amrex::Real(sz_node[iz]) : amrex::Real(sz_cell[iz]));
266 int const l_jx = ((jx_type[zdir] ==
NODE) ? l_node : l_cell);
267 int const l_jy = ((jy_type[zdir] ==
NODE) ? l_node : l_cell);
268 int const l_jz = ((jz_type[zdir] ==
NODE) ? l_node : l_cell);
271 #if defined(WARPX_DIM_1D_Z)
272 for (
int iz=0; iz<=depos_order; iz++){
274 &jx_arr(lo.
x+l_jx+iz, 0, 0, 0),
277 &jy_arr(lo.
x+l_jy+iz, 0, 0, 0),
280 &jz_arr(lo.
x+l_jz+iz, 0, 0, 0),
284 #if defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
285 for (
int iz=0; iz<=depos_order; iz++){
286 for (
int ix=0; ix<=depos_order; ix++){
288 &jx_arr(lo.
x+j_jx+ix, lo.
y+l_jx+iz, 0, 0),
289 sx_jx[ix]*sz_jx[iz]*wqx);
291 &jy_arr(lo.
x+j_jy+ix, lo.
y+l_jy+iz, 0, 0),
292 sx_jy[ix]*sz_jy[iz]*wqy);
294 &jz_arr(lo.
x+j_jz+ix, lo.
y+l_jz+iz, 0, 0),
295 sx_jz[ix]*sz_jz[iz]*wqz);
296 #if defined(WARPX_DIM_RZ)
298 for (
int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
311 #elif defined(WARPX_DIM_3D)
312 for (
int iz=0; iz<=depos_order; iz++){
313 for (
int iy=0; iy<=depos_order; iy++){
314 for (
int ix=0; ix<=depos_order; ix++){
316 &jx_arr(lo.
x+j_jx+ix, lo.
y+k_jx+iy, lo.
z+l_jx+iz),
317 sx_jx[ix]*sy_jx[iy]*sz_jx[iz]*wqx);
319 &jy_arr(lo.
x+j_jy+ix, lo.
y+k_jy+iy, lo.
z+l_jy+iz),
320 sx_jy[ix]*sy_jy[iy]*sz_jy[iz]*wqy);
322 &jz_arr(lo.
x+j_jz+ix, lo.
y+k_jz+iy, lo.
z+l_jz+iz),
323 sx_jz[ix]*sy_jz[iy]*sz_jz[iz]*wqz);
330 #if defined(WARPX_USE_GPUCLOCK)
364 template <
int depos_order>
366 const amrex::ParticleReal *
const wp,
367 const amrex::ParticleReal *
const uxp,
368 const amrex::ParticleReal *
const uyp,
369 const amrex::ParticleReal *
const uzp,
370 const int *
const ion_lev,
374 const long np_to_depose,
375 const amrex::Real relative_time,
376 const std::array<amrex::Real,3>&
dx,
377 const std::array<amrex::Real,3>& xyzmin,
380 const int n_rz_azimuthal_modes,
382 const long load_balance_costs_update_algo,
388 #if defined(AMREX_USE_HIP) || defined(AMREX_USE_CUDA)
389 using namespace amrex;
400 constexpr
int zdir = WARPX_ZINDEX;
405 #if defined(WARPX_USE_GPUCLOCK)
406 amrex::Real* cost_real =
nullptr;
414 const auto domain = geom.
Domain();
417 sample_tbox.
grow(depos_order);
425 const int nblocks = a_bins.
numBins();
429 const std::size_t shared_mem_bytes = npts*
sizeof(amrex::Real);
433 "Tile size too big for GPU shared memory current deposition");
440 #if defined(WARPX_USE_GPUCLOCK)
445 const int bin_id = blockIdx.x;
446 const unsigned int bin_start = offsets_ptr[bin_id];
447 const unsigned int bin_stop = offsets_ptr[bin_id+1];
449 if (bin_start == bin_stop) {
return; }
454 ParticleReal xp, yp, zp;
455 GetPosition(permutation[bin_start], xp, yp, zp);
456 #if defined(WARPX_DIM_3D)
457 IntVect iv =
IntVect(
int( amrex::Math::floor((xp-plo[0]) * dxiarr[0]) ),
458 int( amrex::Math::floor((yp-plo[1]) * dxiarr[1]) ),
459 int( amrex::Math::floor((zp-plo[2]) * dxiarr[2]) ));
460 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
461 IntVect iv =
IntVect(
int( amrex::Math::floor((xp-plo[0]) * dxiarr[0]) ),
462 int( amrex::Math::floor((zp-plo[1]) * dxiarr[1]) ));
463 #elif defined(WARPX_DIM_1D_Z)
464 IntVect iv =
IntVect(
int( amrex::Math::floor((zp-plo[0]) * dxiarr[0]) ));
466 iv += domain.smallEnd();
470 buffer_box.
grow(depos_order);
476 amrex::Real*
const shared = gsm.
dataPtr();
486 volatile amrex::Real* vs = shared;
487 for (
int i = threadIdx.x;
i < npts;
i += blockDim.x){
491 for (
unsigned int ip_orig = bin_start+threadIdx.x; ip_orig<bin_stop; ip_orig += blockDim.x)
493 unsigned int ip = permutation[ip_orig];
494 depositComponent<depos_order>(GetPosition, wp, uxp, uyp, uzp, ion_lev, jx_buff, jx_type,
495 relative_time,
dx, xyzmin, lo, q, n_rz_azimuthal_modes,
496 ip, zdir,
NODE, CELL, 0);
500 addLocalToGlobal(tbox_x, jx_arr, jx_buff);
501 for (
int i = threadIdx.x;
i < npts;
i += blockDim.x){
506 for (
unsigned int ip_orig = bin_start+threadIdx.x; ip_orig<bin_stop; ip_orig += blockDim.x)
508 unsigned int ip = permutation[ip_orig];
509 depositComponent<depos_order>(GetPosition, wp, uxp, uyp, uzp, ion_lev, jy_buff, jy_type,
510 relative_time,
dx, xyzmin, lo, q, n_rz_azimuthal_modes,
511 ip, zdir,
NODE, CELL, 1);
515 addLocalToGlobal(tbox_y, jy_arr, jy_buff);
516 for (
int i = threadIdx.x;
i < npts;
i += blockDim.x){
521 for (
unsigned int ip_orig = bin_start+threadIdx.x; ip_orig<bin_stop; ip_orig += blockDim.x)
523 unsigned int ip = permutation[ip_orig];
524 depositComponent<depos_order>(GetPosition, wp, uxp, uyp, uzp, ion_lev, jz_buff, jz_type,
525 relative_time,
dx, xyzmin, lo, q, n_rz_azimuthal_modes,
526 ip, zdir,
NODE, CELL, 2);
530 addLocalToGlobal(tbox_z, jz_arr, jz_buff);
532 #if defined(WARPX_USE_GPUCLOCK)
543 ignore_unused( GetPosition, wp, uxp, uyp, uzp, ion_lev, jx_fab, jy_fab, jz_fab, np_to_depose, relative_time,
dx, xyzmin, lo, q, n_rz_azimuthal_modes, cost, load_balance_costs_update_algo, a_bins, box, geom, a_tbox_max_size);
544 amrex::Abort(
"Shared memory only implemented for HIP/CUDA");
574 template <
int depos_order>
576 const amrex::ParticleReal *
const wp,
577 const amrex::ParticleReal *
const uxp,
578 const amrex::ParticleReal *
const uyp,
579 const amrex::ParticleReal *
const uzp,
580 const int *
const ion_lev,
584 const long np_to_depose,
585 const amrex::Real
dt,
586 const amrex::Real relative_time,
587 const std::array<amrex::Real,3>&
dx,
588 const std::array<amrex::Real, 3> xyzmin,
591 const int n_rz_azimuthal_modes,
592 amrex::Real *
const cost,
593 const long load_balance_costs_update_algo)
595 using namespace amrex;
596 #if !defined(WARPX_DIM_RZ)
600 #if !defined(AMREX_USE_GPU)
606 bool const do_ionization = ion_lev;
607 #if !defined(WARPX_DIM_1D_Z)
608 Real
const dxi = 1.0_rt /
dx[0];
610 #if !defined(WARPX_DIM_1D_Z)
611 Real
const xmin = xyzmin[0];
613 #if defined(WARPX_DIM_3D)
614 Real
const dyi = 1.0_rt /
dx[1];
615 Real
const ymin = xyzmin[1];
617 Real
const dzi = 1.0_rt /
dx[2];
618 Real
const zmin = xyzmin[2];
620 #if defined(WARPX_DIM_3D)
621 Real
const invdtdx = 1.0_rt / (
dt*
dx[1]*
dx[2]);
622 Real
const invdtdy = 1.0_rt / (
dt*
dx[0]*
dx[2]);
623 Real
const invdtdz = 1.0_rt / (
dt*
dx[0]*
dx[1]);
624 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
625 Real
const invdtdx = 1.0_rt / (
dt*
dx[2]);
626 Real
const invdtdz = 1.0_rt / (
dt*
dx[0]);
627 Real
const invvol = 1.0_rt / (
dx[0]*
dx[2]);
628 #elif defined(WARPX_DIM_1D_Z)
629 Real
const invdtdz = 1.0_rt / (
dt*
dx[0]);
630 Real
const invvol = 1.0_rt / (
dx[2]);
633 #if defined(WARPX_DIM_RZ)
638 #if !defined(WARPX_DIM_1D_Z)
639 Real constexpr one_third = 1.0_rt / 3.0_rt;
640 Real constexpr one_sixth = 1.0_rt / 6.0_rt;
644 #if defined(WARPX_USE_GPUCLOCK)
645 amrex::Real* cost_real =
nullptr;
654 #if defined(WARPX_USE_GPUCLOCK)
661 Real
const gaminv = 1.0_rt/std::sqrt(1.0_rt + uxp[ip]*uxp[ip]*clightsq
662 + uyp[ip]*uyp[ip]*clightsq
663 + uzp[ip]*uzp[ip]*clightsq);
671 ParticleReal xp, yp, zp;
672 GetPosition(ip, xp, yp, zp);
674 #if !defined(WARPX_DIM_1D_Z)
675 Real
const wqx = wq*invdtdx;
677 #if defined(WARPX_DIM_3D)
678 Real
const wqy = wq*invdtdy;
680 Real
const wqz = wq*invdtdz;
683 #if defined(WARPX_DIM_RZ)
684 Real
const xp_new = xp + (relative_time + 0.5_rt*
dt)*uxp[ip]*gaminv;
685 Real
const yp_new = yp + (relative_time + 0.5_rt*
dt)*uyp[ip]*gaminv;
686 Real
const xp_mid = xp_new - 0.5_rt*
dt*uxp[ip]*gaminv;
687 Real
const yp_mid = yp_new - 0.5_rt*
dt*uyp[ip]*gaminv;
688 Real
const xp_old = xp_new -
dt*uxp[ip]*gaminv;
689 Real
const yp_old = yp_new -
dt*uyp[ip]*gaminv;
690 Real
const rp_new = std::sqrt(xp_new*xp_new + yp_new*yp_new);
691 Real
const rp_mid = std::sqrt(xp_mid*xp_mid + yp_mid*yp_mid);
692 Real
const rp_old = std::sqrt(xp_old*xp_old + yp_old*yp_old);
693 Real costheta_new, sintheta_new;
694 if (rp_new > 0._rt) {
695 costheta_new = xp_new/rp_new;
696 sintheta_new = yp_new/rp_new;
698 costheta_new = 1._rt;
699 sintheta_new = 0._rt;
701 amrex::Real costheta_mid, sintheta_mid;
702 if (rp_mid > 0._rt) {
703 costheta_mid = xp_mid/rp_mid;
704 sintheta_mid = yp_mid/rp_mid;
706 costheta_mid = 1._rt;
707 sintheta_mid = 0._rt;
709 amrex::Real costheta_old, sintheta_old;
710 if (rp_old > 0._rt) {
711 costheta_old = xp_old/rp_old;
712 sintheta_old = yp_old/rp_old;
714 costheta_old = 1._rt;
715 sintheta_old = 0._rt;
721 double const x_new = (rp_new - xmin)*dxi;
722 double const x_old = (rp_old - xmin)*dxi;
724 #if !defined(WARPX_DIM_1D_Z)
726 double const x_new = (xp - xmin + (relative_time + 0.5_rt*
dt)*uxp[ip]*gaminv)*dxi;
727 double const x_old = x_new -
dt*dxi*uxp[ip]*gaminv;
730 #if defined(WARPX_DIM_3D)
732 double const y_new = (yp - ymin + (relative_time + 0.5_rt*
dt)*uyp[ip]*gaminv)*dyi;
733 double const y_old = y_new -
dt*dyi*uyp[ip]*gaminv;
736 double const z_new = (zp - zmin + (relative_time + 0.5_rt*
dt)*uzp[ip]*gaminv)*dzi;
737 double const z_old = z_new -
dt*dzi*uzp[ip]*gaminv;
739 #if defined(WARPX_DIM_RZ)
740 Real
const vy = (-uxp[ip]*sintheta_mid + uyp[ip]*costheta_mid)*gaminv;
741 #elif defined(WARPX_DIM_XZ)
742 Real
const vy = uyp[ip]*gaminv;
743 #elif defined(WARPX_DIM_1D_Z)
744 Real
const vx = uxp[ip]*gaminv;
745 Real
const vy = uyp[ip]*gaminv;
753 #if !defined(WARPX_DIM_1D_Z)
754 double sx_new[depos_order + 3] = {0.};
755 double sx_old[depos_order + 3] = {0.};
757 #if defined(WARPX_DIM_3D)
759 double sy_new[depos_order + 3] = {0.};
760 double sy_old[depos_order + 3] = {0.};
763 double sz_new[depos_order + 3] = {0.};
764 double sz_old[depos_order + 3] = {0.};
772 #if !defined(WARPX_DIM_1D_Z)
773 const int i_new = compute_shape_factor(sx_new+1, x_new);
774 const int i_old = compute_shifted_shape_factor(sx_old, x_old, i_new);
776 #if defined(WARPX_DIM_3D)
777 const int j_new = compute_shape_factor(sy_new+1, y_new);
778 const int j_old = compute_shifted_shape_factor(sy_old, y_old, j_new);
780 const int k_new = compute_shape_factor(sz_new+1, z_new);
781 const int k_old = compute_shifted_shape_factor(sz_old, z_old, k_new);
784 #if !defined(WARPX_DIM_1D_Z)
785 int dil = 1, diu = 1;
786 if (i_old < i_new) dil = 0;
787 if (i_old > i_new) diu = 0;
789 #if defined(WARPX_DIM_3D)
790 int djl = 1, dju = 1;
791 if (j_old < j_new) djl = 0;
792 if (j_old > j_new) dju = 0;
794 int dkl = 1, dku = 1;
795 if (k_old < k_new) dkl = 0;
796 if (k_old > k_new) dku = 0;
798 #if defined(WARPX_DIM_3D)
800 for (
int k=dkl; k<=depos_order+2-dku; k++) {
801 for (
int j=djl; j<=depos_order+2-dju; j++) {
802 amrex::Real sdxi = 0._rt;
803 for (
int i=dil;
i<=depos_order+1-diu;
i++) {
804 sdxi += wqx*(sx_old[
i] - sx_new[
i])*(
805 one_third*(sy_new[j]*sz_new[k] + sy_old[j]*sz_old[k])
806 +one_sixth*(sy_new[j]*sz_old[k] + sy_old[j]*sz_new[k]));
811 for (
int k=dkl; k<=depos_order+2-dku; k++) {
812 for (
int i=dil;
i<=depos_order+2-diu;
i++) {
813 amrex::Real sdyj = 0._rt;
814 for (
int j=djl; j<=depos_order+1-dju; j++) {
815 sdyj += wqy*(sy_old[j] - sy_new[j])*(
816 one_third*(sx_new[
i]*sz_new[k] + sx_old[
i]*sz_old[k])
817 +one_sixth*(sx_new[
i]*sz_old[k] + sx_old[
i]*sz_new[k]));
822 for (
int j=djl; j<=depos_order+2-dju; j++) {
823 for (
int i=dil;
i<=depos_order+2-diu;
i++) {
824 amrex::Real sdzk = 0._rt;
825 for (
int k=dkl; k<=depos_order+1-dku; k++) {
826 sdzk += wqz*(sz_old[k] - sz_new[k])*(
827 one_third*(sx_new[
i]*sy_new[j] + sx_old[
i]*sy_old[j])
828 +one_sixth*(sx_new[
i]*sy_old[j] + sx_old[
i]*sy_new[j]));
834 #elif defined(WARPX_DIM_XZ) || defined(WARPX_DIM_RZ)
836 for (
int k=dkl; k<=depos_order+2-dku; k++) {
837 amrex::Real sdxi = 0._rt;
838 for (
int i=dil;
i<=depos_order+1-diu;
i++) {
839 sdxi += wqx*(sx_old[
i] - sx_new[
i])*0.5_rt*(sz_new[k] + sz_old[k]);
841 #if defined(WARPX_DIM_RZ)
843 for (
int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
845 const Complex djr_cmplx = 2._rt *sdxi*xy_mid;
848 xy_mid = xy_mid*xy_mid0;
853 for (
int k=dkl; k<=depos_order+2-dku; k++) {
854 for (
int i=dil;
i<=depos_order+2-diu;
i++) {
855 Real
const sdyj = wq*vy*invvol*(
856 one_third*(sx_new[
i]*sz_new[k] + sx_old[
i]*sz_old[k])
857 +one_sixth*(sx_new[
i]*sz_old[k] + sx_old[
i]*sz_new[k]));
859 #if defined(WARPX_DIM_RZ)
864 for (
int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
867 const Complex djt_cmplx = -2._rt * I*(i_new-1 +
i + xmin*dxi)*wq*invdtdx/(amrex::Real)imode
868 *(
Complex(sx_new[
i]*sz_new[k], 0._rt)*(xy_new - xy_mid)
869 +
Complex(sx_old[
i]*sz_old[k], 0._rt)*(xy_mid - xy_old));
872 xy_new = xy_new*xy_new0;
873 xy_mid = xy_mid*xy_mid0;
874 xy_old = xy_old*xy_old0;
879 for (
int i=dil;
i<=depos_order+2-diu;
i++) {
881 for (
int k=dkl; k<=depos_order+1-dku; k++) {
882 sdzk += wqz*(sz_old[k] - sz_new[k])*0.5_rt*(sx_new[
i] + sx_old[
i]);
884 #if defined(WARPX_DIM_RZ)
886 for (
int imode=1 ; imode < n_rz_azimuthal_modes ; imode++) {
888 const Complex djz_cmplx = 2._rt * sdzk * xy_mid;
891 xy_mid = xy_mid*xy_mid0;
896 #elif defined(WARPX_DIM_1D_Z)
898 for (
int k=dkl; k<=depos_order+2-dku; k++) {
899 amrex::Real
const sdxi = wq*vx*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
902 for (
int k=dkl; k<=depos_order+2-dku; k++) {
903 amrex::Real
const sdyj = wq*vy*invvol*0.5_rt*(sz_old[k] + sz_new[k]);
906 amrex::Real sdzk = 0._rt;
907 for (
int k=dkl; k<=depos_order+1-dku; k++) {
908 sdzk += wqz*(sz_old[k] - sz_new[k]);
914 #if defined(WARPX_USE_GPUCLOCK)
952 template <
int depos_order>
954 const amrex::ParticleReal*
const wp,
955 const amrex::ParticleReal*
const uxp,
956 const amrex::ParticleReal*
const uyp,
957 const amrex::ParticleReal*
const uzp,
958 const int*
const ion_lev,
962 const long np_to_depose,
963 const amrex::Real
dt,
964 const amrex::Real relative_time,
965 const std::array<amrex::Real,3>&
dx,
966 const std::array<amrex::Real,3>& xyzmin,
969 const int n_rz_azimuthal_modes,
971 const long load_balance_costs_update_algo)
973 #if defined(WARPX_DIM_RZ)
975 wp, uxp, uyp, uzp, ion_lev, Dx_fab, Dy_fab, Dz_fab,
976 np_to_depose,
dt, relative_time,
dx, xyzmin, lo, q, n_rz_azimuthal_modes);
977 amrex::Abort(
"Vay deposition not implemented in RZ geometry");
980 #if defined(WARPX_DIM_1D_Z)
982 wp, uxp, uyp, uzp, ion_lev, Dx_fab, Dy_fab, Dz_fab,
983 np_to_depose,
dt, relative_time,
dx, xyzmin, lo, q, n_rz_azimuthal_modes);
984 amrex::Abort(
"Vay deposition not implemented in cartesian 1D geometry");
987 #if !defined(AMREX_USE_GPU)
991 #if !(defined WARPX_DIM_RZ || defined WARPX_DIM_1D_Z)
995 const bool do_ionization = ion_lev;
998 const amrex::Real dxi = 1._rt /
dx[0];
999 const amrex::Real dzi = 1._rt /
dx[2];
1000 #if defined(WARPX_DIM_3D)
1001 const amrex::Real dyi = 1._rt /
dx[1];
1005 const amrex::Real invdt = 1._rt /
dt;
1008 #if defined(WARPX_DIM_XZ)
1009 const amrex::Real invvol = dxi * dzi;
1010 #elif defined(WARPX_DIM_3D)
1011 const amrex::Real invvol = dxi * dyi * dzi;
1015 const amrex::Real xmin = xyzmin[0];
1016 const amrex::Real zmin = xyzmin[2];
1017 #if defined(WARPX_DIM_3D)
1018 const amrex::Real ymin = xyzmin[1];
1022 #if defined(WARPX_DIM_3D)
1025 #elif defined(WARPX_DIM_XZ)
1041 #if defined(WARPX_USE_GPUCLOCK)
1042 amrex::Real* cost_real =
nullptr;
1050 #if defined(WARPX_USE_GPUCLOCK)
1057 const amrex::Real invgam = 1._rt / std::sqrt(1._rt + uxp[ip] * uxp[ip] * invcsq
1058 + uyp[ip] * uyp[ip] * invcsq
1059 + uzp[ip] * uzp[ip] * invcsq);
1061 amrex::Real wq = q * wp[ip];
1062 if (do_ionization) wq *= ion_lev[ip];
1065 amrex::ParticleReal xp, yp, zp;
1066 GetPosition(ip, xp, yp, zp);
1069 const amrex::Real vx = uxp[ip] * invgam;
1070 const amrex::Real vy = uyp[ip] * invgam;
1071 const amrex::Real vz = uzp[ip] * invgam;
1074 xp += relative_time * vx;
1075 yp += relative_time * vy;
1076 zp += relative_time * vz;
1079 #if defined(WARPX_DIM_XZ)
1080 const amrex::Real wqy = wq * vy * invvol;
1085 double const x_new = (xp - xmin + 0.5_rt*
dt*vx) * dxi;
1086 double const x_old = (xp - xmin - 0.5_rt*
dt*vx) * dxi;
1087 #if defined(WARPX_DIM_3D)
1089 double const y_new = (yp - ymin + 0.5_rt*
dt*vy) * dyi;
1090 double const y_old = (yp - ymin - 0.5_rt*
dt*vy) * dyi;
1093 double const z_new = (zp - zmin + 0.5_rt*
dt*vz) * dzi;
1094 double const z_old = (zp - zmin - 0.5_rt*
dt*vz) * dzi;
1098 double sx_new[depos_order+1] = {0.};
1099 double sx_old[depos_order+1] = {0.};
1100 #if defined(WARPX_DIM_3D)
1102 double sy_new[depos_order+1] = {0.};
1103 double sy_old[depos_order+1] = {0.};
1106 double sz_new[depos_order+1] = {0.};
1107 double sz_old[depos_order+1] = {0.};
1114 const int i_new = compute_shape_factor(sx_new, x_new);
1115 #if defined(WARPX_DIM_3D)
1118 const int j_new = compute_shape_factor(sy_new, y_new);
1122 const int k_new = compute_shape_factor(sz_new, z_new);
1128 const int i_old = compute_shape_factor(sx_old, x_old);
1129 #if defined(WARPX_DIM_3D)
1132 const int j_old = compute_shape_factor(sy_old, y_old);
1136 const int k_old = compute_shape_factor(sz_old, z_old);
1139 #if defined(WARPX_DIM_XZ)
1141 for (
int k=0; k<=depos_order; k++) {
1142 for (
int i=0;
i<=depos_order;
i++) {
1146 auto const sxn_szn =
static_cast<amrex::Real
>(sx_new[
i] * sz_new[k]);
1147 auto const sxo_szn =
static_cast<amrex::Real
>(sx_old[
i] * sz_new[k]);
1148 auto const sxn_szo =
static_cast<amrex::Real
>(sx_new[
i] * sz_old[k]);
1149 auto const sxo_szo =
static_cast<amrex::Real
>(sx_old[
i] * sz_old[k]);
1151 if (i_new == i_old && k_new == k_old) {
1154 wq * invvol * invdt * (sxn_szn - sxo_szo));
1157 wq * invvol * invdt * (sxn_szo - sxo_szn));
1161 wqy * 0.25_rt * (sxn_szn + sxn_szo + sxo_szn + sxo_szo));
1165 wq * invvol * invdt * sxn_szn);
1168 - wq * invvol * invdt * sxo_szo);
1171 wq * invvol * invdt * sxn_szo);
1174 - wq * invvol * invdt * sxo_szn);
1178 wqy * 0.25_rt * sxn_szn);
1181 wqy * 0.25_rt * sxn_szo);
1184 wqy * 0.25_rt * sxo_szn);
1187 wqy * 0.25_rt * sxo_szo);
1193 #elif defined(WARPX_DIM_3D)
1195 for (
int k=0; k<=depos_order; k++) {
1196 for (
int j=0; j<=depos_order; j++) {
1198 auto const syn_szn =
static_cast<amrex::Real
>(sy_new[j] * sz_new[k]);
1199 auto const syo_szn =
static_cast<amrex::Real
>(sy_old[j] * sz_new[k]);
1200 auto const syn_szo =
static_cast<amrex::Real
>(sy_new[j] * sz_old[k]);
1201 auto const syo_szo =
static_cast<amrex::Real
>(sy_old[j] * sz_old[k]);
1203 for (
int i=0;
i<=depos_order;
i++) {
1205 auto const sxn_syn_szn =
static_cast<amrex::Real
>(sx_new[
i]) * syn_szn;
1206 auto const sxo_syn_szn =
static_cast<amrex::Real
>(sx_old[
i]) * syn_szn;
1207 auto const sxn_syo_szn =
static_cast<amrex::Real
>(sx_new[
i]) * syo_szn;
1208 auto const sxo_syo_szn =
static_cast<amrex::Real
>(sx_old[
i]) * syo_szn;
1209 auto const sxn_syn_szo =
static_cast<amrex::Real
>(sx_new[
i]) * syn_szo;
1210 auto const sxo_syn_szo =
static_cast<amrex::Real
>(sx_old[
i]) * syn_szo;
1211 auto const sxn_syo_szo =
static_cast<amrex::Real
>(sx_new[
i]) * syo_szo;
1212 auto const sxo_syo_szo =
static_cast<amrex::Real
>(sx_old[
i]) * syo_szo;
1214 if (i_new == i_old && j_new == j_old && k_new == k_old) {
1217 wq * invvol * invdt * (sxn_syn_szn - sxo_syo_szo));
1220 wq * invvol * invdt * (sxn_syn_szo - sxo_syo_szn));
1223 wq * invvol * invdt * (sxn_syo_szn - sxo_syn_szo));
1226 wq * invvol * invdt * (sxo_syn_szn - sxn_syo_szo));
1230 wq * invvol * invdt * sxn_syn_szn);
1233 - wq * invvol * invdt * sxo_syo_szo);
1236 wq * invvol * invdt * sxn_syn_szo);
1239 - wq * invvol * invdt * sxo_syo_szn);
1242 wq * invvol * invdt * sxn_syo_szn);
1245 - wq * invvol * invdt * sxo_syn_szo);
1248 wq * invvol * invdt * sxo_syn_szn);
1251 - wq * invvol * invdt * sxn_syo_szo);
1259 #if defined(WARPX_DIM_3D)
1262 const amrex::Real t_a = temp_arr(
i,j,k,0);
1263 const amrex::Real t_b = temp_arr(
i,j,k,1);
1264 const amrex::Real t_c = temp_arr(
i,j,k,2);
1265 const amrex::Real t_d = temp_arr(
i,j,k,3);
1266 Dx_arr(
i,j,k) += (1._rt/6._rt)*(2_rt*t_a + t_b + t_c - 2._rt*t_d);
1267 Dy_arr(
i,j,k) += (1._rt/6._rt)*(2_rt*t_a + t_b - 2._rt*t_c + t_d);
1268 Dz_arr(
i,j,k) += (1._rt/6._rt)*(2_rt*t_a - 2._rt*t_b + t_c + t_d);
1270 #elif defined(WARPX_DIM_XZ)
1273 const amrex::Real t_a = temp_arr(
i,j,0,0);
1274 const amrex::Real t_b = temp_arr(
i,j,0,1);
1275 Dx_arr(
i,j,0) += (0.5_rt)*(t_a + t_b);
1276 Dz_arr(
i,j,0) += (0.5_rt)*(t_a - t_b);
1283 # if defined(WARPX_USE_GPUCLOCK)
1286 *cost += *cost_real;
#define AMREX_ALWAYS_ASSERT(EX)
#define AMREX_D_DECL(a, b, c)
void doDepositionShapeN(const GetParticlePosition &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *const ion_lev, amrex::FArrayBox &jx_fab, amrex::FArrayBox &jy_fab, amrex::FArrayBox &jz_fab, const long np_to_depose, const amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > &xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes, amrex::Real *cost, const long load_balance_costs_update_algo)
Current Deposition for thread thread_num.
Definition: CurrentDeposition.H:53
void doEsirkepovDepositionShapeN(const GetParticlePosition &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *const ion_lev, const amrex::Array4< amrex::Real > &Jx_arr, const amrex::Array4< amrex::Real > &Jy_arr, const amrex::Array4< amrex::Real > &Jz_arr, const long np_to_depose, const amrex::Real dt, const amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes, amrex::Real *const cost, const long load_balance_costs_update_algo)
Esirkepov Current Deposition for thread thread_num.
Definition: CurrentDeposition.H:575
void doVayDepositionShapeN(const GetParticlePosition &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *const ion_lev, amrex::FArrayBox &Dx_fab, amrex::FArrayBox &Dy_fab, amrex::FArrayBox &Dz_fab, const long np_to_depose, const amrex::Real dt, const amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > &xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes, amrex::Real *cost, const long load_balance_costs_update_algo)
Vay current deposition (Vay et al, 2013) for thread thread_num: deposit D in real space and store the...
Definition: CurrentDeposition.H:953
void doDepositionSharedShapeN(const GetParticlePosition &GetPosition, const amrex::ParticleReal *const wp, const amrex::ParticleReal *const uxp, const amrex::ParticleReal *const uyp, const amrex::ParticleReal *const uzp, const int *const ion_lev, amrex::FArrayBox &jx_fab, amrex::FArrayBox &jy_fab, amrex::FArrayBox &jz_fab, const long np_to_depose, const amrex::Real relative_time, const std::array< amrex::Real, 3 > &dx, const std::array< amrex::Real, 3 > &xyzmin, const amrex::Dim3 lo, const amrex::Real q, const int n_rz_azimuthal_modes, amrex::Real *cost, const long load_balance_costs_update_algo, const amrex::DenseBins< WarpXParticleContainer::ParticleType > &a_bins, const amrex::Box &box, const amrex::Geometry &geom, const amrex::IntVect &a_tbox_max_size)
Current Deposition for thread thread_num using shared memory.
Definition: CurrentDeposition.H:365
#define WARPX_ALWAYS_ASSERT_WITH_MESSAGE(EX, MSG)
Definition: TextMsg.H:13
amrex::GpuComplex< amrex::Real > Complex
Definition: WarpX_Complex.H:22
static amrex::IntVect shared_tilesize
tileSize to use for shared current deposition operations
Definition: WarpX.H:370
static int shared_mem_current_tpb
number of threads to use per block in shared deposition
Definition: WarpX.H:367
Defines a timer object to be used on GPU; measures summed thread cycles.
Definition: KernelTimer.H:27
virtual void free(void *pt)=0
virtual void * alloc(std::size_t sz)=0
const Box & box() const noexcept
AMREX_FORCE_INLINE Array4< T const > array() const noexcept
AMREX_GPU_HOST_DEVICE BoxND & grow(int i) noexcept
AMREX_GPU_HOST_DEVICE IntVectND< dim > type() const noexcept
AMREX_GPU_HOST_DEVICE Long numPts() const noexcept
GpuArray< Real, AMREX_SPACEDIM > InvCellSizeArray() const noexcept
index_type * offsetsPtr() noexcept
index_type * permutationPtr() noexcept
Long numBins() const noexcept
const Box & Domain() const noexcept
GpuArray< Real, AMREX_SPACEDIM > ProbLoArray() const noexcept
static std::size_t sharedMemPerBlock() noexcept
static constexpr auto c
vacuum speed of light [m/s]
Definition: constant.H:44
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void AddNoRet(T *sum, T value) noexcept
void streamSynchronize() noexcept
gpuStream_t gpuStream() noexcept
std::enable_if_t< std::is_integral_v< T > > ParallelFor(TypeList< CTOs... > ctos, std::array< int, sizeof...(CTOs)> const &runtime_options, T N, F &&f)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE BoxND< dim > convert(const BoxND< dim > &b, const IntVectND< dim > &typ) noexcept
void launch(T const &n, L &&f) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 end(BoxND< dim > const &box) noexcept
Arena * The_Managed_Arena()
IntVectND< AMREX_SPACEDIM > IntVect
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void ignore_unused(const Ts &...)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Dim3 begin(BoxND< dim > const &box) noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr const T & max(const T &a, const T &b) noexcept
void Abort(const std::string &msg)
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int getTileIndex(const IntVect &iv, const Box &box, const bool a_do_tiling, const IntVect &a_tile_size, Box &tbx)
i
Definition: check_interp_points_and_weights.py:174
int dx
Definition: stencil.py:436
int dt
Definition: stencil.py:440
Definition: ShapeFactors.H:27
Definition: ShapeFactors.H:82
Functor that can be used to extract the positions of the macroparticles inside a ParallelFor kernel.
Definition: GetAndSetPosition.H:53
@ GpuClock
Definition: WarpXAlgorithmSelection.H:138
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr T real() const noexcept
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE constexpr T imag() const noexcept
AMREX_GPU_DEVICE T * dataPtr() noexcept