Skip to content

Commit 98e53de

Browse files
All/some skeletons not properly displayed or missing when many people
1 parent 5002ebd commit 98e53de

4 files changed

Lines changed: 134 additions & 152 deletions

File tree

doc/release_notes.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,7 @@ OpenPose Library - Release Notes
407407
4. Natural sort now works properly with filenames containining numbers longer than the limit of an int.
408408
5. Optionally auto-generated bin folder only contains the required DLLs (depending on the CMake configuration), instead of all of them.
409409
6. When WrapperStructFace and WrapperStructHand are not called and configured for Wrapper, setting body to CPU rendering was not working.
410+
7. Skelton rendering: All or some skeletons were not properly displayed or completely missing on images with many people.
410411
4. Changes/additions that affect the compatibility with the OpenPose Unity Plugin:
411412

412413

include/openpose_private/utilities/render.hu

Lines changed: 44 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,14 @@ namespace op
88
const float* const keypointsPtr, const int numberPeople, const int numberParts, const float threshold)
99
{
1010
const auto globalIdx = threadIdx.x;
11-
// const auto xIndex = 2*globalIdx;
12-
// const auto yIndex = xIndex+1;
13-
const auto xIndex = globalIdx;
14-
const auto yIndex = numberPeople+globalIdx;
1511

1612
// Fill shared parameters
17-
// if (globalIdx < numberPeople)
13+
if (globalIdx < numberPeople)
1814
{
19-
auto minValueX = (float)targetWidth;
20-
auto minValueY = (float)targetHeight;
21-
auto maxValueX = 0.f;
22-
auto maxValueY = 0.f;
15+
float minValueX = (float)targetWidth;
16+
float minValueY = (float)targetHeight;
17+
float maxValueX = 0.f;
18+
float maxValueY = 0.f;
2319
for (auto part = 0 ; part < numberParts ; part++)
2420
{
2521
const auto index = 3 * (globalIdx*numberParts + part);
@@ -51,13 +47,18 @@ namespace op
5147
minValueY -= constantToAdd;
5248
}
5349

50+
// const auto xIndex = 2*globalIdx;
51+
// const auto yIndex = xIndex+1;
52+
const auto xIndex = globalIdx;
53+
const auto yIndex = numberPeople+globalIdx;
5454
minPtr[xIndex] = minValueX;
5555
minPtr[yIndex] = minValueY;
5656
maxPtr[xIndex] = maxValueX;
5757
maxPtr[yIndex] = maxValueY;
5858
}
5959
}
6060

61+
// Note: renderKeypoints is not working for videos with many people, renderKeypointsOld speed was slightly improved instead
6162
__inline__ __device__ void renderKeypoints(
6263
float* targetPtr, float* sharedMaxs, float* sharedMins, float* sharedScaleF, const float* const maxPtr,
6364
const float* const minPtr, const float* const scalePtr, const int globalIdx, const int x, const int y,
@@ -82,9 +83,9 @@ namespace op
8283
if (x < targetWidth && y < targetHeight)
8384
{
8485
const auto baseIndex = 3*(y * targetWidth + x);
85-
auto b = targetPtr[baseIndex];
86-
auto g = targetPtr[baseIndex+1];
87-
auto r = targetPtr[baseIndex+2];
86+
float b = targetPtr[baseIndex];
87+
float g = targetPtr[baseIndex+1];
88+
float r = targetPtr[baseIndex+2];
8889
if (!blendOriginalFrame)
8990
{
9091
b = 0.f;
@@ -104,7 +105,7 @@ namespace op
104105
const auto xIndex = person;
105106
const auto yIndex = numberPeople+person;
106107
if (x <= sharedMaxs[xIndex] && x >= sharedMins[xIndex]
107-
&& y <= sharedMaxs[yIndex] && y >= sharedMins[yIndex])
108+
&& y <= sharedMaxs[yIndex] && y >= sharedMins[yIndex])
108109
{
109110
// Part pair connections
110111
for (auto partPair = 0; partPair < numberPartPairs; partPair++)
@@ -195,7 +196,6 @@ namespace op
195196
if (minr2 <= dist2 && dist2 <= maxr2)
196197
addColorWeighted(r, g, b, &rgbColorsPtr[(part%numberColors)*3], alphaColorToAdd);
197198
}
198-
199199
}
200200
}
201201
}
@@ -218,10 +218,10 @@ namespace op
218218
// Fill shared parameters
219219
if (globalIdx < numberPeople)
220220
{
221-
sharedMins[globalIdx].x = targetWidth;
222-
sharedMins[globalIdx].y = targetHeight;
223-
sharedMaxs[globalIdx].x = 0.f;
224-
sharedMaxs[globalIdx].y = 0.f;
221+
float minValueX = (float)targetWidth;
222+
float minValueY = (float)targetHeight;
223+
float maxValueX = 0.f;
224+
float maxValueY = 0.f;
225225
for (auto part = 0 ; part < numberParts ; part++)
226226
{
227227
const auto index = 3 * (globalIdx*numberParts + part);
@@ -230,38 +230,43 @@ namespace op
230230
const auto score = keypointsPtr[index+2];
231231
if (score > threshold)
232232
{
233-
if (x < sharedMins[globalIdx].x)
234-
sharedMins[globalIdx].x = x;
235-
if (x > sharedMaxs[globalIdx].x)
236-
sharedMaxs[globalIdx].x = x;
237-
if (y < sharedMins[globalIdx].y)
238-
sharedMins[globalIdx].y = y;
239-
if (y > sharedMaxs[globalIdx].y)
240-
sharedMaxs[globalIdx].y = y;
233+
if (x < minValueX)
234+
minValueX = x;
235+
if (x > maxValueX)
236+
maxValueX = x;
237+
if (y < minValueY)
238+
minValueY = y;
239+
if (y > maxValueY)
240+
maxValueY = y;
241241
}
242242
}
243-
if (sharedMaxs[globalIdx].x != 0.f && sharedMaxs[globalIdx].y != 0.f)
243+
if (maxValueX != 0.f && maxValueY != 0.f)
244244
{
245-
const auto averageX = sharedMaxs[globalIdx].x - sharedMins[globalIdx].x;
246-
const auto averageY = sharedMaxs[globalIdx].y - sharedMins[globalIdx].y;
245+
const auto averageX = maxValueX - minValueX;
246+
const auto averageY = maxValueY - minValueY;
247247
// (averageX + averageY) / 2.f / 400.f
248248
sharedScaleF[globalIdx] = fastTruncateCuda((averageX + averageY) / 400.f, 0.33f, 1.f);
249249
const auto constantToAdd = 50.f;
250-
sharedMaxs[globalIdx].x += constantToAdd;
251-
sharedMaxs[globalIdx].y += constantToAdd;
252-
sharedMins[globalIdx].x -= constantToAdd;
253-
sharedMins[globalIdx].y -= constantToAdd;
250+
maxValueX += constantToAdd;
251+
maxValueY += constantToAdd;
252+
minValueX -= constantToAdd;
253+
minValueY -= constantToAdd;
254254
}
255+
256+
sharedMins[globalIdx].x = minValueX;
257+
sharedMins[globalIdx].y = minValueY;
258+
sharedMaxs[globalIdx].x = maxValueX;
259+
sharedMaxs[globalIdx].y = maxValueY;
255260
}
256261
__syncthreads();
257262

258263
// Fill each (x,y) target pixel
259264
if (x < targetWidth && y < targetHeight)
260265
{
261266
const auto baseIndex = 3*(y * targetWidth + x);
262-
auto& b = targetPtr[baseIndex];
263-
auto& g = targetPtr[baseIndex+1];
264-
auto& r = targetPtr[baseIndex+2];
267+
float b = targetPtr[baseIndex];
268+
float g = targetPtr[baseIndex+1];
269+
float r = targetPtr[baseIndex+2];
265270
if (!blendOriginalFrame)
266271
{
267272
b = 0.f;
@@ -372,6 +377,9 @@ namespace op
372377
}
373378
}
374379
}
380+
targetPtr[baseIndex] = b;
381+
targetPtr[baseIndex+1] = g;
382+
targetPtr[baseIndex+2] = r;
375383
}
376384
}
377385
}

src/openpose/pose/poseGpuRenderer.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@ namespace op
7474
opLog("Starting initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
7575
// GPU memory allocation for rendering
7676
#ifdef USE_CUDA
77-
cudaMalloc((void**)(&pGpuPose),
78-
POSE_MAX_PEOPLE * getPoseNumberBodyParts(mPoseModel) * 3 * sizeof(float));
77+
const auto gpuPoseVolume = POSE_MAX_PEOPLE * getPoseNumberBodyParts(mPoseModel) * 3 * sizeof(float);
78+
cudaMalloc((void**)(&pGpuPose), gpuPoseVolume);
7979
cudaMalloc((void**)&pMaxPtr, sizeof(float) * 2 * POSE_MAX_PEOPLE);
8080
cudaMalloc((void**)&pMinPtr, sizeof(float) * 2 * POSE_MAX_PEOPLE);
8181
cudaMalloc((void**)&pScalePtr, sizeof(float) * POSE_MAX_PEOPLE);
@@ -119,9 +119,11 @@ namespace op
119119
scaleKeypoints(poseKeypointsRescaled, scaleInputToOutput);
120120
// Render keypoints
121121
if (!poseKeypoints.empty())
122+
{
123+
const auto gpuPoseVolume = numberPeople * numberBodyParts * 3 * sizeof(float);
122124
cudaMemcpy(
123-
pGpuPose, poseKeypointsRescaled.getConstPtr(),
124-
numberPeople * numberBodyParts * 3 * sizeof(float), cudaMemcpyHostToDevice);
125+
pGpuPose, poseKeypointsRescaled.getConstPtr(), gpuPoseVolume, cudaMemcpyHostToDevice);
126+
}
125127
renderPoseKeypointsGpu(
126128
*spGpuMemory, pMaxPtr, pMinPtr, pScalePtr, mPoseModel, numberPeople, frameSize, pGpuPose,
127129
mRenderThreshold, mShowGooglyEyes, mBlendOriginalFrame, getAlphaKeypoint());

0 commit comments

Comments
 (0)