Skip to content

Commit 4f99aa3

Browse files
authored
Merge pull request #953 from ravikumarrvit/master
feat: Add configurable pgRejectThreshold to Coscheduling plugin
2 parents 6f3bf34 + 7b485c7 commit 4f99aa3

File tree

13 files changed

+337
-20
lines changed

13 files changed

+337
-20
lines changed

apis/config/scheme/scheme_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ profiles:
9797
Name: coscheduling.Name,
9898
Args: &config.CoschedulingArgs{
9999
PermitWaitingTimeSeconds: 60,
100+
PodGroupRejectPercentage: 10,
100101
},
101102
},
102103
{
@@ -380,6 +381,7 @@ profiles:
380381
kind: CoschedulingArgs
381382
permitWaitingTimeSeconds: 10
382383
podGroupBackoffSeconds: 0
384+
podGroupRejectPercentage: 0
383385
name: Coscheduling
384386
- args:
385387
apiVersion: kubescheduler.config.k8s.io/v1

apis/config/types.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,13 @@ type CoschedulingArgs struct {
3232
PermitWaitingTimeSeconds int64
3333
// PodGroupBackoffSeconds is the backoff time in seconds before a pod group can be scheduled again.
3434
PodGroupBackoffSeconds int64
35+
// PodGroupRejectPercentage is the percentage (0-100) of unassigned pods relative to
36+
// minMember below which PostFilter will not reject the PodGroup.
37+
// When the percentage of unassigned pods is at or below this value, PostFilter
38+
// optimistically allows remaining pods to retry scheduling instead of rejecting the group.
39+
// Default: 10 (10%). Set to 0 to always reject on any failure.
40+
// Set to 100 to never reject (disable PostFilter group rejection).
41+
PodGroupRejectPercentage int32
3542
}
3643

3744
// ModeType is a "string" type.

apis/config/v1/defaults.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
var (
3030
defaultPermitWaitingTimeSeconds int64 = 60
3131
defaultPodGroupBackoffSeconds int64 = 0
32+
defaultPodGroupRejectPercentage int32 = 10
3233

3334
defaultNodeResourcesAllocatableMode = Least
3435

@@ -112,6 +113,9 @@ func SetDefaults_CoschedulingArgs(obj *CoschedulingArgs) {
112113
if obj.PodGroupBackoffSeconds == nil {
113114
obj.PodGroupBackoffSeconds = &defaultPodGroupBackoffSeconds
114115
}
116+
if obj.PodGroupRejectPercentage == nil {
117+
obj.PodGroupRejectPercentage = &defaultPodGroupRejectPercentage
118+
}
115119
}
116120

117121
// SetDefaults_NodeResourcesAllocatableArgs sets the defaults parameters for NodeResourceAllocatable.

apis/config/v1/defaults_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,20 @@ func TestSchedulingDefaults(t *testing.T) {
4141
expect: &CoschedulingArgs{
4242
PermitWaitingTimeSeconds: pointer.Int64Ptr(60),
4343
PodGroupBackoffSeconds: pointer.Int64Ptr(0),
44+
PodGroupRejectPercentage: pointer.Int32Ptr(10),
4445
},
4546
},
4647
{
4748
name: "set non default CoschedulingArgs",
4849
config: &CoschedulingArgs{
4950
PermitWaitingTimeSeconds: pointer.Int64Ptr(60),
5051
PodGroupBackoffSeconds: pointer.Int64Ptr(20),
52+
PodGroupRejectPercentage: pointer.Int32Ptr(50),
5153
},
5254
expect: &CoschedulingArgs{
5355
PermitWaitingTimeSeconds: pointer.Int64Ptr(60),
5456
PodGroupBackoffSeconds: pointer.Int64Ptr(20),
57+
PodGroupRejectPercentage: pointer.Int32Ptr(50),
5558
},
5659
},
5760
{

apis/config/v1/types.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ type CoschedulingArgs struct {
3232
PermitWaitingTimeSeconds *int64 `json:"permitWaitingTimeSeconds,omitempty"`
3333
// PodGroupBackoffSeconds is the backoff time in seconds before a pod group can be scheduled again.
3434
PodGroupBackoffSeconds *int64 `json:"podGroupBackoffSeconds,omitempty"`
35+
// PodGroupRejectPercentage is the percentage (0-100) of unassigned pods relative to
36+
// minMember below which PostFilter will not reject the PodGroup.
37+
// Default: 10 (10%). Set to 0 to always reject on any failure.
38+
// Set to 100 to never reject (disable PostFilter group rejection).
39+
PodGroupRejectPercentage *int32 `json:"podGroupRejectPercentage,omitempty"`
3540
}
3641

3742
// ModeType is a type "string".

apis/config/v1/zz_generated.conversion.go

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apis/config/v1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apis/config/validation/validation_pluginargs.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ func ValidateCoschedulingArgs(args *config.CoschedulingArgs, _ *field.Path) erro
104104
allErrs = append(allErrs, field.Invalid(field.NewPath("podGroupBackoffSeconds"),
105105
args.PodGroupBackoffSeconds, "must be greater than 0"))
106106
}
107+
if args.PodGroupRejectPercentage < 0 || args.PodGroupRejectPercentage > 100 {
108+
allErrs = append(allErrs, field.Invalid(field.NewPath("podGroupRejectPercentage"),
109+
args.PodGroupRejectPercentage, "must be between 0 and 100"))
110+
}
107111
if len(allErrs) == 0 {
108112
return nil
109113
}

apis/config/validation/validation_plugingargs_test.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,25 @@ func TestValidateCoschedulingArgs(t *testing.T) {
8383
args: &config.CoschedulingArgs{
8484
PermitWaitingTimeSeconds: 30,
8585
PodGroupBackoffSeconds: 60,
86+
PodGroupRejectPercentage: 10,
87+
},
88+
expectedErr: nil,
89+
},
90+
{
91+
description: "valid PodGroupRejectPercentage at boundaries",
92+
args: &config.CoschedulingArgs{
93+
PermitWaitingTimeSeconds: 30,
94+
PodGroupBackoffSeconds: 0,
95+
PodGroupRejectPercentage: 0,
96+
},
97+
expectedErr: nil,
98+
},
99+
{
100+
description: "valid PodGroupRejectPercentage at upper boundary",
101+
args: &config.CoschedulingArgs{
102+
PermitWaitingTimeSeconds: 30,
103+
PodGroupBackoffSeconds: 0,
104+
PodGroupRejectPercentage: 100,
86105
},
87106
expectedErr: nil,
88107
},
@@ -91,6 +110,7 @@ func TestValidateCoschedulingArgs(t *testing.T) {
91110
args: &config.CoschedulingArgs{
92111
PermitWaitingTimeSeconds: -10,
93112
PodGroupBackoffSeconds: 60,
113+
PodGroupRejectPercentage: 10,
94114
},
95115
expectedErr: fmt.Errorf("permitWaitingTimeSeconds: Invalid value: %v: must be greater than 0", -10),
96116
},
@@ -99,14 +119,34 @@ func TestValidateCoschedulingArgs(t *testing.T) {
99119
args: &config.CoschedulingArgs{
100120
PermitWaitingTimeSeconds: 30,
101121
PodGroupBackoffSeconds: -20,
122+
PodGroupRejectPercentage: 10,
102123
},
103124
expectedErr: fmt.Errorf("podGroupBackoffSeconds: Invalid value: %v: must be greater than 0", -20),
104125
},
126+
{
127+
description: "invalid PodGroupRejectPercentage (negative value)",
128+
args: &config.CoschedulingArgs{
129+
PermitWaitingTimeSeconds: 30,
130+
PodGroupBackoffSeconds: 0,
131+
PodGroupRejectPercentage: -1,
132+
},
133+
expectedErr: fmt.Errorf("podGroupRejectPercentage: Invalid value: %v: must be between 0 and 100", -1),
134+
},
135+
{
136+
description: "invalid PodGroupRejectPercentage (greater than 100)",
137+
args: &config.CoschedulingArgs{
138+
PermitWaitingTimeSeconds: 30,
139+
PodGroupBackoffSeconds: 0,
140+
PodGroupRejectPercentage: 150,
141+
},
142+
expectedErr: fmt.Errorf("podGroupRejectPercentage: Invalid value: %v: must be between 0 and 100", 150),
143+
},
105144
{
106145
description: "both PermitWaitingTimeSeconds and PodGroupBackoffSeconds are negative",
107146
args: &config.CoschedulingArgs{
108147
PermitWaitingTimeSeconds: -30,
109148
PodGroupBackoffSeconds: -20,
149+
PodGroupRejectPercentage: 10,
110150
},
111151
expectedErr: fmt.Errorf(
112152
"[permitWaitingTimeSeconds: Invalid value: %v: must be greater than 0, podGroupBackoffSeconds: Invalid value: %v: must be greater than 0]",

kep/42-podgroup-coscheduling/README.md

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
- [QueueSort](#queuesort)
1616
- [PreFilter](#prefilter)
1717
- [PostFilter](#postfilter)
18+
- [Backoff](#backoff)
1819
- [Permit](#permit)
1920
- [Known Limitations](#known-limitations)
2021
<!-- /toc -->
@@ -158,7 +159,26 @@ For any pod that gets rejected, their pod group would be added to a backoff list
158159

159160
#### PostFilter
160161

161-
If the gap to reach the quorum of a PodGroup is greater than 10%, we reject the whole PodGroup. Note that this plugin should be configured as the last one among PostFilter plugins.
162+
PostFilter handles scheduling failures for pods that belong to a PodGroup. When a pod fails Filter, PostFilter evaluates whether the PodGroup should be rejected based on how far it is from meeting its quorum:
163+
164+
1. If the number of assigned pods already meets `minMember`, no action is taken.
165+
2. If the fraction of unassigned pods is at or below `podGroupRejectPercentage` (default: 10%), PostFilter returns `Unschedulable` without rejecting the group — the remaining pods get another scheduling attempt.
166+
3. If the fraction of unassigned pods exceeds the threshold, PostFilter rejects all waiting pods in the group and optionally triggers backoff (see below).
167+
168+
The `podGroupRejectPercentage` parameter (default: `10`) is configurable in the scheduler's `CoschedulingArgs`. Set it to `0` to always reject on any failure, or `100` to never reject.
169+
170+
Note that this plugin should be configured as the last one among PostFilter plugins.
171+
172+
#### Backoff
173+
174+
When `podGroupBackoffSeconds` is set to a positive value in `CoschedulingArgs`, PostFilter places a failed PodGroup into a time-based backoff cache after rejection. During the backoff window, PreFilter immediately rejects all pods from the PodGroup with `UnschedulableAndUnresolvable`, preventing wasteful scheduling cycles.
175+
176+
Backoff is triggered only when all of the following conditions are met:
177+
- `podGroupBackoffSeconds > 0`
178+
- The fraction of unassigned pods exceeds `podGroupRejectPercentage`
179+
- The total number of pods with the PodGroup label is at least `minMember`
180+
181+
The backoff state is stored in a TTL-based in-memory cache that auto-evicts entries after the configured duration.
162182

163183
#### Permit
164184

0 commit comments

Comments
 (0)