Skip to content

Commit 3a712d4

Browse files
committed
[YUNIKORN-2837] Log & Send Events, Improve logging (#957)
Closes: #957 Signed-off-by: Manikandan R <[email protected]>
1 parent 7c51e82 commit 3a712d4

File tree

6 files changed

+117
-95
lines changed

6 files changed

+117
-95
lines changed

pkg/common/errors.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@ package common
2020

2121
import "errors"
2222

23-
// Common errors
24-
var (
25-
// InvalidQueueName returned when queue name is invalid
26-
InvalidQueueName = errors.New("invalid queue name, max 64 characters consisting of alphanumeric characters and '-', '_', '#', '@', '/', ':' allowed")
27-
)
23+
// InvalidQueueName returned when queue name is invalid
24+
var InvalidQueueName = errors.New("invalid queue name, max 64 characters consisting of alphanumeric characters and '-', '_', '#', '@', '/', ':' allowed")
25+
26+
const PreemptionPreconditionsFailed = "Preemption preconditions failed"
27+
const PreemptionDoesNotGuarantee = "Preemption queue guarantees check failed"
28+
const PreemptionShortfall = "Preemption helped but short of resources"
29+
const PreemptionDoesNotHelp = "Preemption does not help"

pkg/scheduler/objects/application.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,6 +1056,7 @@ func (sa *Application) tryAllocate(headRoom *resources.Resource, allowPreemption
10561056
// preemption occurred, and possibly reservation
10571057
return result
10581058
}
1059+
request.LogAllocationFailure(common.PreemptionDoesNotHelp, true)
10591060
}
10601061
}
10611062
request.LogAllocationFailure(NotEnoughQueueQuota, true) // error message MUST be constant!
@@ -1122,6 +1123,7 @@ func (sa *Application) tryAllocate(headRoom *resources.Resource, allowPreemption
11221123
return result
11231124
}
11241125
}
1126+
request.LogAllocationFailure(common.PreemptionDoesNotHelp, true)
11251127
}
11261128
}
11271129
}
@@ -1394,6 +1396,7 @@ func (sa *Application) tryPreemption(headRoom *resources.Resource, preemptionDel
13941396

13951397
// validate prerequisites for preemption of an ask and mark ask for preemption if successful
13961398
if !preemptor.CheckPreconditions() {
1399+
ask.LogAllocationFailure(common.PreemptionPreconditionsFailed, true)
13971400
return nil, false
13981401
}
13991402

pkg/scheduler/objects/application_test.go

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2009,6 +2009,7 @@ func TestTryAllocatePreemptQueue(t *testing.T) {
20092009
result3 := app2.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 0}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
20102010
assert.Assert(t, result3 == nil, "result3 not expected")
20112011
assert.Assert(t, !alloc2.IsPreempted(), "alloc2 should not have been preempted")
2012+
assertAllocationLog(t, ask3)
20122013

20132014
// pass the time and try again
20142015
ask3.createTime = ask3.createTime.Add(-30 * time.Second)
@@ -2068,28 +2069,25 @@ func TestTryAllocatePreemptNode(t *testing.T) {
20682069
preemptionAttemptsRemaining := 10
20692070

20702071
// consume capacity with 'unlimited' app
2071-
result00 := app0.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 40}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
2072-
assert.Assert(t, result00 != nil, "result00 expected")
2073-
alloc00 := result00.Request
2074-
assert.Assert(t, alloc00 != nil, "alloc00 expected")
2075-
alloc00.SetNodeID(result00.NodeID)
2076-
result01 := app0.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 39}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
2077-
assert.Assert(t, result01 != nil, "result01 expected")
2078-
alloc01 := result01.Request
2079-
assert.Assert(t, alloc01 != nil, "alloc01 expected")
2080-
alloc01.SetNodeID(result01.NodeID)
2072+
for _, r := range []*resources.Resource{resources.NewResourceFromMap(map[string]resources.Quantity{"first": 40}), resources.NewResourceFromMap(map[string]resources.Quantity{"first": 39})} {
2073+
result0 := app0.tryAllocate(r, true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
2074+
assert.Assert(t, result0 != nil, "result0 expected")
2075+
alloc0 := result0.Request
2076+
assert.Assert(t, alloc0 != nil, "alloc0 expected")
2077+
alloc0.SetNodeID(result0.NodeID)
2078+
}
20812079

20822080
// consume remainder of space but not quota
2083-
result1 := app1.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 28}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
2084-
assert.Assert(t, result1 != nil, "result1 expected")
2085-
alloc1 := result1.Request
2086-
assert.Assert(t, alloc1 != nil, "alloc1 expected")
2087-
alloc1.SetNodeID(result1.NodeID)
2088-
result2 := app1.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 23}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
2089-
assert.Assert(t, result2 != nil, "result2 expected")
2090-
alloc2 := result2.Request
2091-
assert.Assert(t, alloc2 != nil, "alloc2 expected")
2092-
alloc2.SetNodeID(result2.NodeID)
2081+
allocs := make([]*Allocation, 0)
2082+
for _, r := range []*resources.Resource{resources.NewResourceFromMap(map[string]resources.Quantity{"first": 28}), resources.NewResourceFromMap(map[string]resources.Quantity{"first": 23})} {
2083+
var alloc1 *Allocation
2084+
result1 := app1.tryAllocate(r, true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
2085+
assert.Assert(t, result1 != nil, "result1 expected")
2086+
alloc1 = result1.Request
2087+
assert.Assert(t, result1.Request != nil, "alloc1 expected")
2088+
alloc1.SetNodeID(result1.NodeID)
2089+
allocs = append(allocs, alloc1)
2090+
}
20932091

20942092
// on first attempt, should see a reservation since we're after the reservation timeout
20952093
ask3.createTime = ask3.createTime.Add(-10 * time.Second)
@@ -2099,18 +2097,24 @@ func TestTryAllocatePreemptNode(t *testing.T) {
20992097
assert.Assert(t, alloc3 != nil, "alloc3 not expected")
21002098
assert.Equal(t, "node1", result3.NodeID, "wrong node assignment")
21012099
assert.Equal(t, Reserved, result3.ResultType, "expected reservation")
2102-
assert.Assert(t, !alloc2.IsPreempted(), "alloc2 should not have been preempted")
2100+
assert.Assert(t, !allocs[1].IsPreempted(), "alloc2 should not have been preempted")
21032101
err = node1.Reserve(app2, ask3)
21042102
assert.NilError(t, err)
21052103

2104+
// preemption delay not yet passed, so preemption should fail
2105+
result3 = app2.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 18}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
2106+
assert.Assert(t, result3 == nil, "result3 expected")
2107+
assert.Assert(t, !allocs[1].IsPreempted(), "alloc1 should have been preempted")
2108+
assertAllocationLog(t, ask3)
2109+
21062110
// pass the time and try again
21072111
ask3.createTime = ask3.createTime.Add(-30 * time.Second)
21082112
result3 = app2.tryAllocate(resources.NewResourceFromMap(map[string]resources.Quantity{"first": 18}), true, 30*time.Second, &preemptionAttemptsRemaining, iterator, iterator, getNode)
21092113
assert.Assert(t, result3 != nil, "result3 expected")
21102114
assert.Equal(t, Reserved, result3.ResultType, "expected reservation")
21112115
alloc3 = result3.Request
21122116
assert.Assert(t, alloc3 != nil, "alloc3 expected")
2113-
assert.Assert(t, alloc1.IsPreempted(), "alloc1 should have been preempted")
2117+
assert.Assert(t, allocs[0].IsPreempted(), "alloc1 should have been preempted")
21142118
}
21152119

21162120
func TestMaxAskPriority(t *testing.T) {

pkg/scheduler/objects/preemption.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626

2727
"go.uber.org/zap"
2828

29+
"github.com/apache/yunikorn-core/pkg/common"
2930
"github.com/apache/yunikorn-core/pkg/common/resources"
3031
"github.com/apache/yunikorn-core/pkg/log"
3132
"github.com/apache/yunikorn-core/pkg/plugins"
@@ -203,7 +204,6 @@ func (p *Preemptor) checkPreemptionQueueGuarantees() bool {
203204
}
204205
}
205206
}
206-
207207
return false
208208
}
209209

@@ -558,6 +558,7 @@ func (p *Preemptor) tryNodes() (string, []*Allocation, bool) {
558558
func (p *Preemptor) TryPreemption() (*AllocationResult, bool) {
559559
// validate that sufficient capacity can be freed
560560
if !p.checkPreemptionQueueGuarantees() {
561+
p.ask.LogAllocationFailure(common.PreemptionDoesNotGuarantee, true)
561562
return nil, false
562563
}
563564

@@ -615,6 +616,7 @@ func (p *Preemptor) TryPreemption() (*AllocationResult, bool) {
615616

616617
if p.ask.GetAllocatedResource().StrictlyGreaterThanOnlyExisting(victimsTotalResource) {
617618
// there is shortfall, so preemption doesn't help
619+
p.ask.LogAllocationFailure(common.PreemptionShortfall, true)
618620
return nil, false
619621
}
620622

0 commit comments

Comments
 (0)