Skip to content

Commit 974ac99

Browse files
sfnSFN-eu
authored andcommitted
Correctness fixes. The port now produces equivalent results to the original CUDA implementation, and further changes should be checked against regressions.
1 parent 36c49fc commit 974ac99

File tree

15 files changed

+242
-539
lines changed

15 files changed

+242
-539
lines changed

src/aliceVision/depthMap_sycl/NormalMapEstimator.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ void NormalMapEstimator::compute(sycl::queue& queue, const std::vector<int>& cam
4242
// get R camera parameters
4343
CameraParams camParams = getCameraParameters(rc, 1 /*downscale*/, _mp);
4444

45+
// init prerequisite
46+
sycl::event event{};
47+
4548
// read input depth map
4649
image::Image<float> in_depthMap;
4750
mvsUtils::readMap(rc, _mp, mvsUtils::EFileType::depthMapFiltered, in_depthMap);
@@ -65,22 +68,23 @@ void NormalMapEstimator::compute(sycl::queue& queue, const std::vector<int>& cam
6568
// copy input depth map into depth/sim map in device memory
6669
// note: we don't need similarity for normal map computation
6770
// we use depth/sim map in order to avoid code duplication
68-
SyclDeviceMemoryPitched<sycl::float2, 2> in_depthSimMap_dmp(out_normalMap_dmp.getSize(), allocSuccess, queue);
71+
SyclDeviceMemoryPitched<sycl::float2, 2> in_depthMap_dmp(out_normalMap_dmp.getSize(), allocSuccess, queue);
6972

7073
if (!allocSuccess) ALICEVISION_THROW_ERROR("Not enough device memory to compute normal map!")
7174

7275
{
73-
SyclHostMemoryHeap<sycl::float2, 2> in_depthSimMap_hmh(in_depthSimMap_dmp.getSize(), queue);
76+
SyclHostMemoryHeap<sycl::float2, 2> in_depthMap_hmh(in_depthMap_dmp.getSize(), queue);
7477

75-
for (int x = 0; x < width; ++x)
76-
for (int y = 0; y < height; ++y)
77-
in_depthSimMap_hmh(size_t(x), size_t(y)) = sycl::float2(in_depthMap(y, x), 1.f);
78+
for (int y = 0; y < height; ++y)
79+
for (int x = 0; x < width; ++x)
80+
in_depthMap_hmh(size_t(x), size_t(y)).x() = in_depthMap(y, x);
7881

79-
in_depthSimMap_dmp.copyFrom(in_depthSimMap_hmh, sycl::event()).wait();
82+
event = in_depthMap_dmp.copyFrom(in_depthMap_hmh, event);
8083
}
8184

8285
// compute normal map synchronosly
83-
sycl_depthSimMapComputeNormal(out_normalMap_dmp, in_depthSimMap_dmp, camParams, 1 /*step*/, roi, queue, sycl::event()).wait();
86+
event = sycl_depthMapComputeNormal(out_normalMap_dmp, in_depthMap_dmp, camParams, 1 /*step*/, roi, queue, event);
87+
event.wait();
8488

8589
// write output normal map
8690
writeNormalMapFiltered(rc, _mp, tileParams, roi, out_normalMap_dmp, queue);

src/aliceVision/depthMap_sycl/Refine.cpp

Lines changed: 16 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ namespace depthMap {
3030
_sgmNormalMap_dmp(queue),
3131
_normalMap_dmp(queue),
3232
_volumeRefineSim_dmp(queue),
33-
_optTmpDepthMap_dmp(queue),
3433
_optImgVariance_dmp(queue),
3534
_queue(queue)
3635
{
@@ -64,42 +63,7 @@ namespace depthMap {
6463

6564
// allocate depth/sim map optimization buffers
6665
if (_refineParams.useColorOptimization)
67-
{
68-
allocationSuccess &= _optTmpDepthMap_dmp.allocate(depthSimMapDim);
6966
allocationSuccess &= _optImgVariance_dmp.allocate(depthSimMapDim);
70-
}
71-
}
72-
73-
double Refine::getDeviceMemoryConsumption() const
74-
{
75-
size_t bytes = 0;
76-
77-
bytes += _sgmDepthPixSizeMap_dmp.getBytesPadded();
78-
bytes += _refinedDepthSimMap_dmp.getBytesPadded();
79-
bytes += _optimizedDepthSimMap_dmp.getBytesPadded();
80-
bytes += _sgmNormalMap_dmp.getBytesPadded();
81-
bytes += _normalMap_dmp.getBytesPadded();
82-
bytes += _volumeRefineSim_dmp.getBytesPadded();
83-
bytes += _optTmpDepthMap_dmp.getBytesPadded();
84-
bytes += _optImgVariance_dmp.getBytesPadded();
85-
86-
return (double(bytes) / (1024.0 * 1024.0));
87-
}
88-
89-
double Refine::getDeviceMemoryConsumptionUnpadded() const
90-
{
91-
size_t bytes = 0;
92-
93-
bytes += _sgmDepthPixSizeMap_dmp.getBytesUnpadded();
94-
bytes += _refinedDepthSimMap_dmp.getBytesUnpadded();
95-
bytes += _optimizedDepthSimMap_dmp.getBytesUnpadded();
96-
bytes += _sgmNormalMap_dmp.getBytesUnpadded();
97-
bytes += _normalMap_dmp.getBytesUnpadded();
98-
bytes += _volumeRefineSim_dmp.getBytesUnpadded();
99-
bytes += _optTmpDepthMap_dmp.getBytesUnpadded();
100-
bytes += _optImgVariance_dmp.getBytesUnpadded();
101-
102-
return (double(bytes) / (1024.0 * 1024.0));
10367
}
10468

10569
sycl::event Refine::refineRc(const Tile& tile,
@@ -206,7 +170,7 @@ sycl::event Refine::refineAndFuseDepthSimMap(const Tile& tile, sycl::event prere
206170

207171
// initialize the similarity volume at 0
208172
// each tc filtered and inverted similarity value will be summed in this volume
209-
prerequisite = sycl_volumeInitialize(_volumeRefineSim_dmp, TSimRefine(0.f), _queue, prerequisite);
173+
prerequisite = sycl_volumeInitialize(_volumeRefineSim_dmp, TSimRefine(0.f), prerequisite);
210174

211175
// get device cache instance
212176
DeviceCache& deviceCache = DeviceCache::getInstance();
@@ -234,37 +198,20 @@ sycl::event Refine::refineAndFuseDepthSimMap(const Tile& tile, sycl::event prere
234198
<< "\t- tc: " << tc << " (" << (tci + 1) << "/" << tile.refineTCams.size() << ")" << std::endl
235199
<< "\t- tile range x: [" << downscaledRoi.x.begin << " - " << downscaledRoi.x.end << "]" << std::endl
236200
<< "\t- tile range y: [" << downscaledRoi.y.begin << " - " << downscaledRoi.y.end << "]" << std::endl);
237-
if (_refineParams.useSgmNormalMap)
238-
{
239-
prerequisite = sycl_volumeRefineSimilarityNormal(
240-
_volumeRefineSim_dmp,
241-
_sgmDepthPixSizeMap_dmp,
242-
_sgmNormalMap_dmp,
243-
rcParams,
244-
tcParams,
245-
rcDeviceMipmapImage,
246-
tcDeviceMipmapImage,
247-
_refineParams,
248-
depthRange,
249-
downscaledRoi,
250-
_queue,
251-
prerequisite);
252-
}
253-
else
254-
{
255-
prerequisite = sycl_volumeRefineSimilarityNoNormal(
256-
_volumeRefineSim_dmp,
257-
_sgmDepthPixSizeMap_dmp,
258-
rcParams,
259-
tcParams,
260-
rcDeviceMipmapImage,
261-
tcDeviceMipmapImage,
262-
_refineParams,
263-
depthRange,
264-
downscaledRoi,
265-
_queue,
266-
prerequisite);
267-
}
201+
202+
prerequisite = sycl_volumeRefineSimilarity(
203+
_volumeRefineSim_dmp,
204+
_sgmDepthPixSizeMap_dmp,
205+
_refineParams.useSgmNormalMap ? &_sgmNormalMap_dmp : nullptr,
206+
rcParams,
207+
tcParams,
208+
rcDeviceMipmapImage,
209+
tcDeviceMipmapImage,
210+
_refineParams,
211+
depthRange,
212+
downscaledRoi,
213+
_queue,
214+
prerequisite);
268215
}
269216

270217
// export intermediate volume information (if requested by user)
@@ -302,7 +249,6 @@ sycl::event Refine::optimizeDepthSimMap(const Tile& tile, sycl::event prerequisi
302249
sycl::event gradientDescent = sycl_depthSimMapOptimizeGradientDescent(
303250
_optimizedDepthSimMap_dmp, // output depth/sim map optimized
304251
_optImgVariance_dmp, // image variance buffer pre-allocate
305-
_optTmpDepthMap_dmp, // temporary depth map buffer pre-allocate
306252
_sgmDepthPixSizeMap_dmp, // input SGM upscaled depth/pixSize map
307253
_refinedDepthSimMap_dmp, // input refined and fused depth/sim map
308254
camParams,
@@ -328,7 +274,7 @@ void Refine::computeAndWriteNormalMap(const Tile& tile, const SyclDeviceMemoryPi
328274
ALICEVISION_LOG_INFO(tile << "Refine compute normal map of view id: " << _mp.getViewId(tile.rc) << ", rc: " << tile.rc << " (" << (tile.rc + 1)
329275
<< " / " << _mp.ncams << ").");
330276

331-
sycl_depthSimMapComputeNormal(_normalMap_dmp, in_depthSimMap_dmp, camParams, _refineParams.stepXY, downscaledRoi, _queue, prerequisite).wait();
277+
sycl_depthMapComputeNormal(_normalMap_dmp, in_depthSimMap_dmp, camParams, _refineParams.stepXY, downscaledRoi, _queue, prerequisite).wait();
332278

333279
writeNormalMap(tile.rc, _mp, _tileParams, tile.roi, _normalMap_dmp, _queue, _refineParams.scale, _refineParams.stepXY, name);
334280
}

src/aliceVision/depthMap_sycl/Refine.hpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -46,18 +46,6 @@ class Refine
4646
// final depth/similarity map getter
4747
inline const SyclDeviceMemoryPitched<sycl::float2, 2>& getDeviceDepthSimMap() const { return _optimizedDepthSimMap_dmp; }
4848

49-
/**
50-
* @brief Get memory consumpyion in device memory.
51-
* @return device memory consumpyion (in MB)
52-
*/
53-
double getDeviceMemoryConsumption() const;
54-
55-
/**
56-
* @brief Get unpadded memory consumpyion in device memory.
57-
* @return unpadded device memory consumpyion (in MB)
58-
*/
59-
double getDeviceMemoryConsumptionUnpadded() const;
60-
6149
/**
6250
* @brief Refine for a single R camera the Semi-Global Matching depth/sim map.
6351
* @param[in] tile The given tile for Refine computation
@@ -113,7 +101,6 @@ class Refine
113101
SyclDeviceMemoryPitched<sycl::float3, 2> _sgmNormalMap_dmp; //< rc upscaled SGM normal map (for experimentation purposes)
114102
SyclDeviceMemoryPitched<sycl::float3, 2> _normalMap_dmp; //< rc normal map (for debug / intermediate results purposes)
115103
SyclDeviceMemoryPitched<TSimRefine, 3> _volumeRefineSim_dmp; //< rc refine similarity volume
116-
SyclDeviceMemoryPitched<float, 2> _optTmpDepthMap_dmp; //< for color optimization: temporary depth map buffer
117104
SyclDeviceMemoryPitched<float, 2> _optImgVariance_dmp; //< for color optimization: image variance buffer
118105
sycl::queue _queue; //< queue for device execution
119106
};

src/aliceVision/depthMap_sycl/Sgm.cpp

Lines changed: 3 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -90,40 +90,6 @@ Sgm::Sgm(const mvsUtils::MultiViewParams& mp,
9090
}
9191
}
9292

93-
double Sgm::getDeviceMemoryConsumption() const
94-
{
95-
size_t bytes = 0;
96-
97-
bytes += _depths_dmp.getBytesPadded();
98-
bytes += _depthThicknessMap_dmp.getBytesPadded();
99-
bytes += _depthSimMap_dmp.getBytesPadded();
100-
bytes += _normalMap_dmp.getBytesPadded();
101-
bytes += _volumeBestSim_dmp.getBytesPadded();
102-
bytes += _volumeSecBestSim_dmp.getBytesPadded();
103-
bytes += _volumeSliceAccA_dmp.getBytesPadded();
104-
bytes += _volumeSliceAccB_dmp.getBytesPadded();
105-
bytes += _volumeAxisAcc_dmp.getBytesPadded();
106-
107-
return (double(bytes) / (1024.0 * 1024.0));
108-
}
109-
110-
double Sgm::getDeviceMemoryConsumptionUnpadded() const
111-
{
112-
size_t bytes = 0;
113-
114-
bytes += _depths_dmp.getBytesUnpadded();
115-
bytes += _depthThicknessMap_dmp.getBytesUnpadded();
116-
bytes += _depthSimMap_dmp.getBytesUnpadded();
117-
bytes += _normalMap_dmp.getBytesUnpadded();
118-
bytes += _volumeBestSim_dmp.getBytesUnpadded();
119-
bytes += _volumeSecBestSim_dmp.getBytesUnpadded();
120-
bytes += _volumeSliceAccA_dmp.getBytesUnpadded();
121-
bytes += _volumeSliceAccB_dmp.getBytesUnpadded();
122-
bytes += _volumeAxisAcc_dmp.getBytesUnpadded();
123-
124-
return (double(bytes) / (1024.0 * 1024.0));
125-
}
126-
12793
sycl::event Sgm::sgmRc(const Tile& tile, const SgmDepthList& tileDepthList, sycl::event prerequisite)
12894
{
12995
const IndexT viewId = _mp.getViewId(tile.rc);
@@ -184,7 +150,7 @@ sycl::event Sgm::sgmRc(const Tile& tile, const SgmDepthList& tileDepthList, sycl
184150

185151
ALICEVISION_LOG_INFO(tile << "SGM compute normal map of view id: " << viewId << ", rc: " << tile.rc << " (" << (tile.rc + 1) << " / "
186152
<< _mp.ncams << ").");
187-
finished = sycl_depthSimMapComputeNormal(_normalMap_dmp, _depthSimMap_dmp, getCameraParameters(tile.rc, _sgmParams.scale, _mp), _sgmParams.stepXY, downscaledRoi, _queue, retrieveDepth);
153+
finished = sycl_depthMapComputeNormal(_normalMap_dmp, _depthSimMap_dmp, getCameraParameters(tile.rc, _sgmParams.scale, _mp), _sgmParams.stepXY, downscaledRoi, _queue, retrieveDepth);
188154

189155
// export intermediate normal map (if requested by user)
190156
if (_sgmParams.exportIntermediateNormalMaps)
@@ -222,8 +188,8 @@ sycl::event Sgm::computeSimilarityVolumes(const Tile& tile, const SgmDepthList&
222188
const ROI downscaledRoi = downscaleROI(tile.roi, _sgmParams.scale * _sgmParams.stepXY);
223189

224190
// initialize the two similarity volumes at 255
225-
prerequisite = sycl_volumeInitialize<TSim>(_volumeBestSim_dmp, 255., _queue, prerequisite);
226-
prerequisite = sycl_volumeInitialize<TSim>(_volumeSecBestSim_dmp, 255., _queue, prerequisite);
191+
prerequisite = sycl_volumeInitialize<TSim>(_volumeBestSim_dmp, 255., prerequisite);
192+
prerequisite = sycl_volumeInitialize<TSim>(_volumeSecBestSim_dmp, 255., prerequisite);
227193

228194
// get device cache instance
229195
DeviceCache& deviceCache = DeviceCache::getInstance();

src/aliceVision/depthMap_sycl/Sgm.hpp

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,18 +62,6 @@ class Sgm
6262
// final normal map getter (optional: could be empty)
6363
inline const SyclDeviceMemoryPitched<sycl::float3, 2>& getDeviceNormalMap() const { return _normalMap_dmp; }
6464

65-
/**
66-
* @brief Get memory consumpyion in device memory.
67-
* @return device memory consumpyion (in MB)
68-
*/
69-
double getDeviceMemoryConsumption() const;
70-
71-
/**
72-
* @brief Get unpadded memory consumpyion in device memory.
73-
* @return unpadded device memory consumpyion (in MB)
74-
*/
75-
double getDeviceMemoryConsumptionUnpadded() const;
76-
7765
/**
7866
* @brief Compute for a single R camera the Semi-Global Matching.
7967
* @param[in] tile The given tile for SGM computation

src/aliceVision/depthMap_sycl/sycl/Patch.hpp

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,14 @@ struct Patch
2525
float d; //< pixel size
2626
};
2727

28-
inline void rotPointAroundVect(sycl::float3& out, const sycl::float3& X, const sycl::float3& vect, const int& angle)
28+
inline void rotPointAroundVect(sycl::float3& point, const sycl::float3& vect, const int& angle)
2929
{
3030
double ux, uy, uz, vx, vy, vz, wx, wy, wz, sa, ca, x, y, z, u, v, w;
3131

32-
double sizeX = sycl::length(X);
33-
x = X.x() / sizeX;
34-
y = X.y() / sizeX;
35-
z = X.z() / sizeX;
32+
const double size = sycl::length(point);
33+
x = point.x() / size;
34+
y = point.y() / size;
35+
z = point.z() / size;
3636
u = vect.x();
3737
v = vect.y();
3838
w = vect.z();
@@ -53,23 +53,21 @@ inline void rotPointAroundVect(sycl::float3& out, const sycl::float3& X, const s
5353
y = v * (ux + vy + wz) + (y * (u * u + w * w) - v * (ux + wz)) * ca + (wx - uz) * sa;
5454
z = w * (ux + vy + wz) + (z * (u * u + v * v) - w * (ux + vy)) * ca + (-vx + uy) * sa;
5555

56-
u = sycl::sqrt(x * x + y * y + z * z);
57-
58-
out[0] = x;
59-
out[1] = y;
60-
out[2] = z;
61-
out *= sizeX/u;
56+
point[0] = x;
57+
point[1] = y;
58+
point[2] = z;
59+
point = sycl::normalize(point) * size;
6260
}
6361

6462
inline static constexpr void rotatePatch(Patch& ptch, const int& rx, const int& ry)
6563
{
6664
// rotate patch around x axis by angle rx
67-
rotPointAroundVect(ptch.n, ptch.n, ptch.x, rx);
68-
rotPointAroundVect(ptch.y, ptch.y, ptch.x, rx);
65+
rotPointAroundVect(ptch.n, ptch.x, rx);
66+
rotPointAroundVect(ptch.y, ptch.x, rx);
6967

7068
// rotate new patch around y axis by angle ry
71-
rotPointAroundVect(ptch.n, ptch.n, ptch.y, ry);
72-
rotPointAroundVect(ptch.x, ptch.x, ptch.y, ry);
69+
rotPointAroundVect(ptch.n, ptch.y, ry);
70+
rotPointAroundVect(ptch.x, ptch.y, ry);
7371
}
7472

7573
inline void movePatch(Patch& ptch, const int& pt)

src/aliceVision/depthMap_sycl/sycl/color.hpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,11 @@ inline sycl::float3 rgb2hsl(const sycl::float3& c)
100100
inline sycl::float3 xyz2lab(const sycl::float3& c)
101101
{
102102
// assuming whitepoint D65, XYZ=(0.95047, 1.00000, 1.08883)
103-
sycl::float3 r = sycl::float3(c.x() / 0.95047f, c.y(), c.z() / 1.08883f);
103+
const sycl::float3 r = c * sycl::float3(1.f / 0.95047f, 1.f, 1.f / 1.08883f);
104104

105-
sycl::float3 f = sycl::float3((r.x() > 216.0f / 24389.0f ? sycl::cbrt(r.x()) : (24389.0f / 27.0f * r.x() + 16.0f) / 116.0f),
106-
(r.y() > 216.0f / 24389.0f ? sycl::cbrt(r.y()) : (24389.0f / 27.0f * r.y() + 16.0f) / 116.0f),
107-
(r.z() > 216.0f / 24389.0f ? sycl::cbrt(r.z()) : (24389.0f / 27.0f * r.z() + 16.0f) / 116.0f));
105+
sycl::float3 f = sycl::float3(r.x() > 216.0f / 24389.0f ? sycl::cbrt(r.x()) : (24389.0f / 27.0f * r.x() + 16.0f) / 116.0f,
106+
r.y() > 216.0f / 24389.0f ? sycl::cbrt(r.y()) : (24389.0f / 27.0f * r.y() + 16.0f) / 116.0f,
107+
r.z() > 216.0f / 24389.0f ? sycl::cbrt(r.z()) : (24389.0f / 27.0f * r.z() + 16.0f) / 116.0f);
108108

109109
sycl::float3 out = sycl::float3(116.0f * f.y() - 16.0f, 500.0f * (f.x() - f.y()), 200.0f * (f.y() - f.z()));
110110

@@ -122,7 +122,7 @@ inline sycl::float3 xyz2lab(const sycl::float3& c)
122122
inline float rgb2gray(const sycl::uchar4& c_char)
123123
{
124124
const sycl::float4 c = c_char.convert<float>();
125-
return sycl::dot(sycl::float3(c.x(), c.y(), c.z()), sycl::float3(0.2989f, 0.5870f, 0.1140f));
125+
return c.x() * 0.2989f + c.y() * 0.5870f + c.z() * 0.1140f;
126126
}
127127

128128
/**

0 commit comments

Comments
 (0)