Skip to content

Commit 375eccf

Browse files
committed
refactor and new Gemm ARM kernel (no AMX or SVE atm)
1 parent 3dbc0e7 commit 375eccf

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1160
-876
lines changed

Tests/Derivatives/Derivatives.cpp

Lines changed: 143 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@ int deriv_test_spectral_fft() {
66
using Tensor3D = tensorium::Tensor<C, 3>;
77

88
const size_t N = 128;
9-
const T L = 1.0;
10-
const T dx = L / N;
11-
const T pi = static_cast<T>(3.14159265358979323846);
12-
const T TWO_PI = 2 * pi;
9+
const T L = 1.0;
10+
const T dx = L / N;
11+
const T pi = static_cast<T>(3.14159265358979323846);
12+
const T TWO_PI = 2 * pi;
1313

1414
Tensor3D f({N, N, N});
1515
Tensor3D df_dx_exact({N, N, N});
@@ -22,7 +22,8 @@ int deriv_test_spectral_fft() {
2222
T y = j * dx;
2323
T z = k * dx;
2424
f(i, j, k) = std::sin(TWO_PI * x) * std::sin(TWO_PI * y) * std::sin(TWO_PI * z);
25-
df_dx_exact(i, j, k) = TWO_PI * std::cos(TWO_PI * x) * std::sin(TWO_PI * y) * std::sin(TWO_PI * z);
25+
df_dx_exact(i, j, k) =
26+
TWO_PI * std::cos(TWO_PI * x) * std::sin(TWO_PI * y) * std::sin(TWO_PI * z);
2627
}
2728
}
2829
}
@@ -32,10 +33,10 @@ int deriv_test_spectral_fft() {
3233

3334
for (size_t i = 0; i < N; ++i) {
3435
int ki = (i <= N / 2) ? i : i - N;
35-
T kx = TWO_PI * ki / L;
36+
T kx = TWO_PI * ki / L;
3637
for (size_t j = 0; j < N; ++j) {
3738
for (size_t k = 0; k < N; ++k) {
38-
F(i,j,k) *= C(0, kx);
39+
F(i, j, k) *= C(0, kx);
3940
}
4041
}
4142
}
@@ -47,7 +48,8 @@ int deriv_test_spectral_fft() {
4748
for (size_t i = 0; i < N; ++i)
4849
for (size_t j = 0; j < N; ++j)
4950
for (size_t k = 0; k < N; ++k)
50-
max_err = std::max(max_err, std::abs(df_dx_exact(i,j,k).real() - df_dx_numeric(i,j,k).real()));
51+
max_err = std::max(
52+
max_err, std::abs(df_dx_exact(i, j, k).real() - df_dx_numeric(i, j, k).real()));
5153

5254
std::cout << "\n=== Spectral Derivative Test (3D FFT) ===\n";
5355
std::cout << "Erreur max sur d/dx (FFT vs exact): " << max_err << "\n";
@@ -56,22 +58,20 @@ int deriv_test_spectral_fft() {
5658
for (size_t i = 0; i < 4; ++i) {
5759
for (size_t j = 0; j < 1; ++j) {
5860
for (size_t k = 0; k < 1; ++k) {
59-
std::cout << "x=" << i * dx << "\tf=" << f(i,j,k).real()
60-
<< "\texact=" << df_dx_exact(i,j,k).real()
61-
<< "\tnumeric=" << df_dx_numeric(i,j,k).real() << "\n";
62-
T err = std::abs(df_dx_numeric(i,j,k).real() - df_dx_exact(i,j,k).real());
63-
std::cout << "\terreur=" << err << "\n";
64-
61+
std::cout << "x=" << i * dx << "\tf=" << f(i, j, k).real()
62+
<< "\texact=" << df_dx_exact(i, j, k).real()
63+
<< "\tnumeric=" << df_dx_numeric(i, j, k).real() << "\n";
64+
T err = std::abs(df_dx_numeric(i, j, k).real() - df_dx_exact(i, j, k).real());
65+
std::cout << "\terreur=" << err << "\n";
6566
}
6667
}
6768
}
6869

69-
70-
std::cout << "\n=== Fonction test cos*sin*cos ===\n";
70+
std::cout << "\n=== Fonction test cos*sin*cos ===\n";
7171

7272
const T A = 2 * pi; // freq_x
7373
const T B = 4 * pi; // freq_y
74-
const T Ce = 6 * pi; // freq_z
74+
const T Ce = 6 * pi; // freq_z
7575

7676
for (size_t i = 0; i < N; ++i) {
7777
for (size_t j = 0; j < N; ++j) {
@@ -85,139 +85,133 @@ int deriv_test_spectral_fft() {
8585
}
8686
}
8787

88-
F = f;
89-
tensorium::SpectralFFT<T>::forward_3D(F);
90-
91-
for (size_t i = 0; i < N; ++i) {
92-
int ki = (i <= N / 2) ? i : i - N;
93-
T kx = TWO_PI * ki / L;
94-
for (size_t j = 0; j < N; ++j) {
95-
for (size_t k = 0; k < N; ++k) {
96-
F(i,j,k) *= C(0, kx);
97-
}
98-
}
99-
}
100-
101-
tensorium::SpectralFFT<T>::backward_3D(F);
102-
df_dx_numeric = F;
103-
104-
max_err = 0;
105-
for (size_t i = 0; i < N; ++i)
106-
for (size_t j = 0; j < N; ++j)
107-
for (size_t k = 0; k < N; ++k)
108-
max_err = std::max(max_err, std::abs(df_dx_exact(i,j,k).real() - df_dx_numeric(i,j,k).real()));
109-
110-
std::cout << "Erreur max (cos*sin*cos): " << max_err << "\n";
111-
for (size_t i = 0; i < 4; ++i) {
112-
size_t j = 1, k = 1;
113-
T x = i * dx;
114-
std::cout << "x=" << x
115-
<< "\tf=" << f(i,j,k).real()
116-
<< "\texact=" << df_dx_exact(i,j,k).real()
117-
<< "\tnumeric=" << df_dx_numeric(i,j,k).real()
118-
<< "\terreur=" << std::abs(df_dx_exact(i,j,k).real() - df_dx_numeric(i,j,k).real()) << "\n";
119-
}
120-
121-
std::ofstream file("spectral_deriv_output.csv");
122-
file << "x,y,z,f,dfdx_exact,dfdx_numeric\n";
123-
for (size_t i = 0; i < N; ++i) {
124-
for (size_t j = 0; j < N; ++j) {
125-
for (size_t k = 0; k < N; ++k) {
126-
T x = i * dx;
127-
T y = j * dx;
128-
T z = k * dx;
129-
file << x << "," << y << "," << z << ","
130-
<< f(i,j,k).real() << ","
131-
<< df_dx_exact(i,j,k).real() << ","
132-
<< df_dx_numeric(i,j,k).real() << "\n";
133-
}
134-
}
135-
}
136-
file.close();
137-
138-
return 0;
88+
F = f;
89+
tensorium::SpectralFFT<T>::forward_3D(F);
90+
91+
for (size_t i = 0; i < N; ++i) {
92+
int ki = (i <= N / 2) ? i : i - N;
93+
T kx = TWO_PI * ki / L;
94+
for (size_t j = 0; j < N; ++j) {
95+
for (size_t k = 0; k < N; ++k) {
96+
F(i, j, k) *= C(0, kx);
97+
}
98+
}
99+
}
100+
101+
tensorium::SpectralFFT<T>::backward_3D(F);
102+
df_dx_numeric = F;
103+
104+
max_err = 0;
105+
for (size_t i = 0; i < N; ++i)
106+
for (size_t j = 0; j < N; ++j)
107+
for (size_t k = 0; k < N; ++k)
108+
max_err = std::max(
109+
max_err, std::abs(df_dx_exact(i, j, k).real() - df_dx_numeric(i, j, k).real()));
110+
111+
std::cout << "Erreur max (cos*sin*cos): " << max_err << "\n";
112+
for (size_t i = 0; i < 4; ++i) {
113+
size_t j = 1, k = 1;
114+
T x = i * dx;
115+
std::cout << "x=" << x << "\tf=" << f(i, j, k).real()
116+
<< "\texact=" << df_dx_exact(i, j, k).real()
117+
<< "\tnumeric=" << df_dx_numeric(i, j, k).real() << "\terreur="
118+
<< std::abs(df_dx_exact(i, j, k).real() - df_dx_numeric(i, j, k).real()) << "\n";
119+
}
120+
121+
std::ofstream file("spectral_deriv_output.csv");
122+
file << "x,y,z,f,dfdx_exact,dfdx_numeric\n";
123+
for (size_t i = 0; i < N; ++i) {
124+
for (size_t j = 0; j < N; ++j) {
125+
for (size_t k = 0; k < N; ++k) {
126+
T x = i * dx;
127+
T y = j * dx;
128+
T z = k * dx;
129+
file << x << "," << y << "," << z << "," << f(i, j, k).real() << ","
130+
<< df_dx_exact(i, j, k).real() << "," << df_dx_numeric(i, j, k).real() << "\n";
131+
}
132+
}
133+
}
134+
file.close();
135+
136+
return 0;
139137
}
140138

141139
int deriv_test() {
142-
std::cout << "\n=== Derivate 2D Test (\u2202/\u2202x) ===\n";
143-
tensorium::Derivate<float> f2d(4, 4);
144-
tensorium::Derivate<float> dfdx2d(4, 4);
145-
146-
for (size_t i = 0; i < 4; ++i)
147-
for (size_t j = 0; j < 4; ++j)
148-
f2d(i, j) = static_cast<float>(i * 10 + j);
149-
150-
tensorium::centered_derivative(f2d, dfdx2d, 0, 1.0f);
151-
152-
std::cout << "\n=== Derivate 2D Test (\u2202/\u2202y) ===\n";
153-
tensorium::Derivate<float> f2d_y(4, 4);
154-
tensorium::Derivate<float> dfdx2d_y(4, 4);
155-
for (size_t i = 0; i < 4; ++i)
156-
for (size_t j = 0; j < 4; ++j)
157-
f2d_y(i, j) = static_cast<float>(i * 10 + j);
158-
tensorium::centered_derivative(f2d_y, dfdx2d_y, 1, 1.0f);
159-
160-
161-
std::cout << "\n=== DerivateND 3D Test (\u2202/\u2202x) ===\n";
162-
std::array<size_t, 3> dims = {4, 4, 4};
163-
tensorium::DerivateND<float, 3> fnd_x(dims), dfdxnd(dims);
164-
for (size_t i = 0; i < 4; ++i)
165-
for (size_t j = 0; j < 4; ++j)
166-
for (size_t k = 0; k < 4; ++k)
167-
fnd_x({i, j, k}) = float(i + j + k);
168-
tensorium::centered_derivative(fnd_x, dfdxnd, 0, 1.0f);
169-
170-
171-
std::cout << "\n=== DerivateND 3D Test (\u2202/\u2202z) ===\n";
172-
tensorium::DerivateND<float, 3> fnd(dims), dfdznd(dims);
173-
for (size_t i = 0; i < 4; ++i)
174-
for (size_t j = 0; j < 4; ++j)
175-
for (size_t k = 0; k < 4; ++k)
176-
fnd({i, j, k}) = float(i + j + k);
177-
tensorium::centered_derivative(fnd, dfdznd, 2, 1.0f);
178-
179-
180-
tensorium::centered_derivative_order4(fnd, dfdznd, 2, 1.0f);
181-
std::cout << "\u2202f/\u2202z (ordre 4) slice at k=2:\n";
182-
183-
184-
const size_t N = 16384;
185-
const float dx = 0.01f;
186-
const float pi = 3.14159265358979323846f;
187-
188-
tensorium::Derivate<float> f(N, 1), df_order2(N, 1), df_order4(N, 1), df_exact(N, 1);
189-
for (size_t i = 0; i < N; ++i) {
190-
float x = i * dx;
191-
f(i, 0) = std::sin(x) + 0.1f * std::sin(10*x);
192-
df_exact(i, 0) = std::cos(x) + 1.0f * std::cos(10*x);
193-
}
194-
f.centered_derivative(f, df_order2, 0, dx);
195-
f.centered_derivative_order4(f, df_order4, 0, dx);
196-
197-
float max_err_order2 = 0.0f, max_err_order4 = 0.0f;
198-
float avg_err_order2 = 0.0f, avg_err_order4 = 0.0f;
199-
size_t valid = 0;
200-
for (size_t i = 2; i < N - 2; ++i) {
201-
float exact = df_exact(i, 0);
202-
float e2 = std::abs(df_order2(i, 0) - exact);
203-
float e4 = std::abs(df_order4(i, 0) - exact);
204-
max_err_order2 = std::max(max_err_order2, e2);
205-
max_err_order4 = std::max(max_err_order4, e4);
206-
avg_err_order2 += e2;
207-
avg_err_order4 += e4;
208-
++valid;
209-
}
210-
avg_err_order2 /= valid;
211-
avg_err_order4 /= valid;
212-
213-
std::cout << "Erreur moyenne (ordre 2): " << avg_err_order2 << "\n";
214-
std::cout << "Erreur moyenne (ordre 4): " << avg_err_order4 << "\n";
215-
216-
std::cout << "\nV\u00e9rification des bords:\n";
217-
std::cout << "df_order4(0,0) = " << df_order4(0, 0) << ", attendu: 0\n";
218-
std::cout << "df_order4(1,0) = " << df_order4(1, 0) << ", attendu: 0\n";
219-
std::cout << "df_order4(N-2,0) = " << df_order4(N-2, 0) << ", attendu: 0\n";
220-
std::cout << "df_order4(N-1,0) = " << df_order4(N-1, 0) << ", attendu: 0\n";
221-
222-
return 0;
140+
std::cout << "\n=== Derivate 2D Test (\u2202/\u2202x) ===\n";
141+
tensorium::Derivate<float> f2d(4, 4);
142+
tensorium::Derivate<float> dfdx2d(4, 4);
143+
144+
for (size_t i = 0; i < 4; ++i)
145+
for (size_t j = 0; j < 4; ++j)
146+
f2d(i, j) = static_cast<float>(i * 10 + j);
147+
148+
tensorium::centered_derivative(f2d, dfdx2d, 0, 1.0f);
149+
150+
std::cout << "\n=== Derivate 2D Test (\u2202/\u2202y) ===\n";
151+
tensorium::Derivate<float> f2d_y(4, 4);
152+
tensorium::Derivate<float> dfdx2d_y(4, 4);
153+
for (size_t i = 0; i < 4; ++i)
154+
for (size_t j = 0; j < 4; ++j)
155+
f2d_y(i, j) = static_cast<float>(i * 10 + j);
156+
tensorium::centered_derivative(f2d_y, dfdx2d_y, 1, 1.0f);
157+
158+
std::cout << "\n=== DerivateND 3D Test (\u2202/\u2202x) ===\n";
159+
std::array<size_t, 3> dims = {4, 4, 4};
160+
tensorium::DerivateND<float, 3> fnd_x(dims), dfdxnd(dims);
161+
for (size_t i = 0; i < 4; ++i)
162+
for (size_t j = 0; j < 4; ++j)
163+
for (size_t k = 0; k < 4; ++k)
164+
fnd_x({i, j, k}) = float(i + j + k);
165+
tensorium::centered_derivative(fnd_x, dfdxnd, 0, 1.0f);
166+
167+
std::cout << "\n=== DerivateND 3D Test (\u2202/\u2202z) ===\n";
168+
tensorium::DerivateND<float, 3> fnd(dims), dfdznd(dims);
169+
for (size_t i = 0; i < 4; ++i)
170+
for (size_t j = 0; j < 4; ++j)
171+
for (size_t k = 0; k < 4; ++k)
172+
fnd({i, j, k}) = float(i + j + k);
173+
tensorium::centered_derivative(fnd, dfdznd, 2, 1.0f);
174+
175+
tensorium::centered_derivative_order4(fnd, dfdznd, 2, 1.0f);
176+
std::cout << "\u2202f/\u2202z (ordre 4) slice at k=2:\n";
177+
178+
const size_t N = 16384;
179+
const float dx = 0.01f;
180+
const float pi = 3.14159265358979323846f;
181+
182+
tensorium::Derivate<float> f(N, 1), df_order2(N, 1), df_order4(N, 1), df_exact(N, 1);
183+
for (size_t i = 0; i < N; ++i) {
184+
float x = i * dx;
185+
f(i, 0) = std::sin(x) + 0.1f * std::sin(10 * x);
186+
df_exact(i, 0) = std::cos(x) + 1.0f * std::cos(10 * x);
187+
}
188+
f.centered_derivative(f, df_order2, 0, dx);
189+
f.centered_derivative_order4(f, df_order4, 0, dx);
190+
191+
float max_err_order2 = 0.0f, max_err_order4 = 0.0f;
192+
float avg_err_order2 = 0.0f, avg_err_order4 = 0.0f;
193+
size_t valid = 0;
194+
for (size_t i = 2; i < N - 2; ++i) {
195+
float exact = df_exact(i, 0);
196+
float e2 = std::abs(df_order2(i, 0) - exact);
197+
float e4 = std::abs(df_order4(i, 0) - exact);
198+
max_err_order2 = std::max(max_err_order2, e2);
199+
max_err_order4 = std::max(max_err_order4, e4);
200+
avg_err_order2 += e2;
201+
avg_err_order4 += e4;
202+
++valid;
203+
}
204+
avg_err_order2 /= valid;
205+
avg_err_order4 /= valid;
206+
207+
std::cout << "Erreur moyenne (ordre 2): " << avg_err_order2 << "\n";
208+
std::cout << "Erreur moyenne (ordre 4): " << avg_err_order4 << "\n";
209+
210+
std::cout << "\nV\u00e9rification des bords:\n";
211+
std::cout << "df_order4(0,0) = " << df_order4(0, 0) << ", attendu: 0\n";
212+
std::cout << "df_order4(1,0) = " << df_order4(1, 0) << ", attendu: 0\n";
213+
std::cout << "df_order4(N-2,0) = " << df_order4(N - 2, 0) << ", attendu: 0\n";
214+
std::cout << "df_order4(N-1,0) = " << df_order4(N - 1, 0) << ", attendu: 0\n";
215+
216+
return 0;
223217
}

0 commit comments

Comments
 (0)