@@ -33,18 +33,15 @@ namespace tensorium_RG::bssn {
3333 */
3434template <typename T>
3535inline void compute_rhs_A_tilde (const BSSNGridSoA<T> &G, Field3D<T> rhs[6 ], size_t padding = 4 ,
36- const GaugeParameters<T> ¶ms = {}) {
36+ const GaugeParameters<T> ¶ms = {},
37+ Field3D<T> *z4_conformal_trace_cache = nullptr ) {
3738 BSSN_PROFILE_KERNEL (ATilde);
3839 using namespace tensorium_RG ::fd;
3940 const T ko_sigma = scaled_ko_sigma (params.ko_sigma );
4041
4142 size_t I0, I1, J0, J1, K0, K1;
4243 G.domain_bounds (I0, I1, J0, J1, K0, K1);
4344
44- const size_t total = G.A_tilde [0 ].st .nx_tot * G.A_tilde [0 ].st .ny_tot * G.A_tilde [0 ].st .nz_tot ;
45- for (int s = 0 ; s < 6 ; ++s)
46- std::fill (rhs[s].ptr (), rhs[s].ptr () + total, T (0 ));
47-
4845 const size_t i0 = clamped_lower (I0, padding, I1);
4946 const size_t j0 = clamped_lower (J0, padding, J1);
5047 const size_t k0 = clamped_lower (K0, padding, K1);
@@ -77,7 +74,10 @@ inline void compute_rhs_A_tilde(const BSSNGridSoA<T> &G, Field3D<T> rhs[6], size
7774 const ptrdiff_t sy = G.A_tilde [0 ].st .sy ;
7875
7976 // Map (row, col) -> symmetric index 0..5
80- const int map_s[3 ][3 ] = {{0 , 1 , 2 }, {1 , 3 , 4 }, {2 , 4 , 5 }};
77+ constexpr int map_s[3 ][3 ] = {{XX, XY, XZ}, {XY, YY, YZ}, {XZ, YZ, ZZ}};
78+ constexpr int sym_row[6 ] = {0 , 0 , 0 , 1 , 1 , 2 };
79+ constexpr int sym_col[6 ] = {0 , 1 , 2 , 1 , 2 , 2 };
80+ const T ko_scale = T (ko_sigma / G.dx );
8181
8282#pragma omp parallel for collapse(2)
8383 for (size_t i = i0; i < i1; ++i) {
@@ -106,6 +106,8 @@ inline void compute_rhs_A_tilde(const BSSNGridSoA<T> &G, Field3D<T> rhs[6], size
106106 p_Ricci[s] = G.Ricci [s].ptr () + idx_start;
107107 p_rhs[s] = rhs[s].ptr () + idx_start;
108108 }
109+ T *p_z4_trace =
110+ z4_conformal_trace_cache ? (z4_conformal_trace_cache->ptr () + idx_start) : nullptr ;
109111
110112 for (size_t k = k0; k < k1; ++k) {
111113 const T alpha = *p_alpha;
@@ -114,7 +116,19 @@ inline void compute_rhs_A_tilde(const BSSNGridSoA<T> &G, Field3D<T> rhs[6], size
114116 const T inv_chi = T (1 ) / chi_guarded;
115117 const T K = *p_K;
116118 T RicciZ4[6 ];
117- compute_RicciZ4 (G, i, j, k, RicciZ4, params.chi_div_floor );
119+ compute_RicciZ4_core (G, i, j, k, RicciZ4, params.chi_div_floor , inv_12dx,
120+ inv_12dy, inv_12dz, sx, sy);
121+ if (p_z4_trace) {
122+ const T g_xx = *p_gam_inv[0 ];
123+ const T g_xy = *p_gam_inv[1 ];
124+ const T g_xz = *p_gam_inv[2 ];
125+ const T g_yy = *p_gam_inv[3 ];
126+ const T g_yz = *p_gam_inv[4 ];
127+ const T g_zz = *p_gam_inv[5 ];
128+ *p_z4_trace = g_xx * RicciZ4[0 ] + g_yy * RicciZ4[3 ] + g_zz * RicciZ4[5 ] +
129+ T (2 ) * (g_xy * RicciZ4[1 ] + g_xz * RicciZ4[2 ] +
130+ g_yz * RicciZ4[4 ]);
131+ }
118132
119133 // --- 1. Load Local Tensors ---
120134 T gamma_tilde_inv[3 ][3 ];
@@ -124,26 +138,8 @@ inline void compute_rhs_A_tilde(const BSSNGridSoA<T> &G, Field3D<T> rhs[6], size
124138 T Ricci_mat[3 ][3 ];
125139
126140 for (int s = 0 ; s < 6 ; ++s) {
127- int a, b;
128- if (s == 0 ) {
129- a = 0 ;
130- b = 0 ;
131- } else if (s == 1 ) {
132- a = 0 ;
133- b = 1 ;
134- } else if (s == 2 ) {
135- a = 0 ;
136- b = 2 ;
137- } else if (s == 3 ) {
138- a = 1 ;
139- b = 1 ;
140- } else if (s == 4 ) {
141- a = 1 ;
142- b = 2 ;
143- } else {
144- a = 2 ;
145- b = 2 ;
146- }
141+ const int a = sym_row[s];
142+ const int b = sym_col[s];
147143
148144 const T gt = *p_gam[s];
149145 const T gt_inv = *p_gam_inv[s];
@@ -185,26 +181,8 @@ inline void compute_rhs_A_tilde(const BSSNGridSoA<T> &G, Field3D<T> rhs[6], size
185181 T d_g_phys[3 ][3 ][3 ]; // dir, row, col
186182
187183 for (int s = 0 ; s < 6 ; ++s) {
188- int row, col;
189- if (s == 0 ) {
190- row = 0 ;
191- col = 0 ;
192- } else if (s == 1 ) {
193- row = 0 ;
194- col = 1 ;
195- } else if (s == 2 ) {
196- row = 0 ;
197- col = 2 ;
198- } else if (s == 3 ) {
199- row = 1 ;
200- col = 1 ;
201- } else if (s == 4 ) {
202- row = 1 ;
203- col = 2 ;
204- } else {
205- row = 2 ;
206- col = 2 ;
207- }
184+ const int row = sym_row[s];
185+ const int col = sym_col[s];
208186
209187 const T *p_g = p_gam[s];
210188 const T d_gt_x = Dx_ptr (p_g, sx, inv_12dx);
@@ -304,13 +282,9 @@ inline void compute_rhs_A_tilde(const BSSNGridSoA<T> &G, Field3D<T> rhs[6], size
304282 const int s = map_s[a][b];
305283 const T *p_field = p_A[s];
306284
307- const T bx = beta[0 ];
308- const T by = beta[1 ];
309- const T bz = beta[2 ];
310-
311- const T adv = bx * Dx_upwind_ptr (p_field, sx, inv_2dx, bx) +
312- by * Dy_upwind_ptr (p_field, sy, inv_2dy, by) +
313- bz * Dz_upwind_ptr (p_field, inv_2dz, bz);
285+ const T adv = beta[0 ] * Dx_upwind_ptr (p_field, sx, inv_2dx, beta[0 ]) +
286+ beta[1 ] * Dy_upwind_ptr (p_field, sy, inv_2dy, beta[1 ]) +
287+ beta[2 ] * Dz_upwind_ptr (p_field, inv_2dz, beta[2 ]);
314288
315289 T lie = T (0 );
316290 for (int m = 0 ; m < 3 ; ++m) {
@@ -324,7 +298,7 @@ inline void compute_rhs_A_tilde(const BSSNGridSoA<T> &G, Field3D<T> rhs[6], size
324298 alpha * (K * A_mat[a][b] - T (2 ) * A_contracted[a][b]);
325299 const T diss = KO6_axis_ptr (p_field, sx) + KO6_axis_ptr (p_field, sy) +
326300 KO6_axis_ptr (p_field, 1 );
327- const T diss_scaled = (ko_sigma / G. dx ) * diss;
301+ const T diss_scaled = ko_scale * diss;
328302
329303 *p_rhs[s] = adv + lie + term_geom + term_quad + diss_scaled;
330304 }
@@ -342,6 +316,8 @@ inline void compute_rhs_A_tilde(const BSSNGridSoA<T> &G, Field3D<T> rhs[6], size
342316 ++p_Ricci[s];
343317 ++p_rhs[s];
344318 }
319+ if (p_z4_trace)
320+ ++p_z4_trace;
345321 }
346322 }
347323 }
0 commit comments