oc-scheduler -> scheduling + logs
This commit is contained in:
@@ -99,9 +99,11 @@ func UpdateExecutionState(payload []byte, dt tools.DataType) {
|
||||
switch dt {
|
||||
case tools.BOOKING:
|
||||
if exec.BookingsState == nil {
|
||||
exec.BookingsState = map[string]bool{}
|
||||
exec.BookingsState = map[string]workflow_execution.BookingState{}
|
||||
}
|
||||
exec.BookingsState[data.ID] = true
|
||||
st := exec.BookingsState[data.ID]
|
||||
st.IsBooked = true
|
||||
exec.BookingsState[data.ID] = st
|
||||
case tools.PURCHASE_RESOURCE:
|
||||
if exec.PurchasesState == nil {
|
||||
exec.PurchasesState = map[string]bool{}
|
||||
@@ -111,7 +113,7 @@ func UpdateExecutionState(payload []byte, dt tools.DataType) {
|
||||
|
||||
allConfirmed := true
|
||||
for _, st := range exec.BookingsState {
|
||||
if !st {
|
||||
if !st.IsBooked {
|
||||
allConfirmed = false
|
||||
break
|
||||
}
|
||||
@@ -145,7 +147,7 @@ func confirmSessionOrder(executionsID string, adminReq *tools.APIRequest) {
|
||||
results, _, _ := order.NewAccessor(adminReq).Search(
|
||||
&dbs.Filters{And: map[string][]dbs.Filter{
|
||||
"executions_id": {{Operator: dbs.EQUAL.String(), Value: executionsID}},
|
||||
}}, "", true)
|
||||
}}, "", true, 0, 10000)
|
||||
for _, obj := range results {
|
||||
if o, ok := obj.(*order.Order); ok {
|
||||
o.IsDraft = false
|
||||
@@ -301,7 +303,7 @@ func Unschedule(executionID string, request *tools.APIRequest) error {
|
||||
|
||||
func RecoverDraft() {
|
||||
adminReq := &tools.APIRequest{Admin: true}
|
||||
results, _, _ := workflow_execution.NewAccessor(adminReq).Search(nil, "*", true)
|
||||
results, _, _ := workflow_execution.NewAccessor(adminReq).Search(nil, "*", true, 0, 10000)
|
||||
for _, obj := range results {
|
||||
exec, ok := obj.(*workflow_execution.WorkflowExecution)
|
||||
if !ok {
|
||||
@@ -350,6 +352,13 @@ func HandleWorkflowDone(resp tools.NATSResponse) {
|
||||
if evt.RealEnd != nil {
|
||||
exec.EndDate = evt.RealEnd
|
||||
}
|
||||
// All bookings are no longer reserved and are done
|
||||
if exec.BookingsState == nil {
|
||||
exec.BookingsState = map[string]workflow_execution.BookingState{}
|
||||
}
|
||||
for id := range exec.BookingsState {
|
||||
exec.BookingsState[id] = workflow_execution.BookingState{IsBooked: false, IsDone: true}
|
||||
}
|
||||
utils.GenericRawUpdateOne(exec, exec.GetID(), workflow_execution.NewAccessor(adminReq))
|
||||
for _, step := range evt.Steps {
|
||||
applyStepToBooking(step, adminReq)
|
||||
@@ -379,6 +388,21 @@ func HandleWorkflowStepDone(resp tools.NATSResponse) {
|
||||
bk.RealEndDate = evt.RealEnd
|
||||
}
|
||||
utils.GenericRawUpdateOne(bk, bk.GetID(), booking.NewAccessor(adminReq))
|
||||
|
||||
// Update BookingsState in the parent WorkflowExecution: resource released, step done
|
||||
execRes, _, execErr := workflow_execution.NewAccessor(adminReq).LoadOne(bk.ExecutionID)
|
||||
if execErr == nil && execRes != nil {
|
||||
exec := execRes.(*workflow_execution.WorkflowExecution)
|
||||
if exec.BookingsState == nil {
|
||||
exec.BookingsState = map[string]workflow_execution.BookingState{}
|
||||
}
|
||||
st := exec.BookingsState[evt.BookingID]
|
||||
st.IsBooked = false
|
||||
st.IsDone = true
|
||||
exec.BookingsState[evt.BookingID] = st
|
||||
utils.GenericRawUpdateOne(exec, exec.GetID(), workflow_execution.NewAccessor(adminReq))
|
||||
}
|
||||
|
||||
switch bk.State {
|
||||
case enum.SUCCESS, enum.FAILURE, enum.FORGOTTEN, enum.CANCELLED:
|
||||
self, err := oclib.GetMySelf()
|
||||
@@ -439,7 +463,7 @@ func scanStaleExecutions() error {
|
||||
res := oclib.NewRequest(oclib.LibDataEnum(oclib.WORKFLOW_EXECUTION), "", myself.GetID(), []string{}, nil).
|
||||
Search(&dbs.Filters{And: map[string][]dbs.Filter{
|
||||
"execution_date": {{Operator: dbs.LTE.String(), Value: primitive.NewDateTimeFromTime(deadline)}},
|
||||
}}, "", false)
|
||||
}}, "", false, 0, 10000)
|
||||
if res.Err != "" {
|
||||
return fmt.Errorf("stale execution search failed: %s", res.Err)
|
||||
}
|
||||
|
||||
@@ -84,6 +84,7 @@ func (s *PlannerService) HandleStore(resp tools.NATSResponse) {
|
||||
return
|
||||
}
|
||||
if err := json.Unmarshal(resp.Payload, &p); err != nil {
|
||||
fmt.Println("RETRIEVE PLANNER ERR", err)
|
||||
return
|
||||
}
|
||||
s.Store(fmt.Sprintf("%v", m["peer_id"]), &p)
|
||||
@@ -128,11 +129,12 @@ func (s *PlannerService) FindDate(wfID string, checkables map[string]utils.Booki
|
||||
if asap {
|
||||
next := s.findNextSlot(checkables, start, end, checkWindowHours)
|
||||
if next != nil {
|
||||
start = *next
|
||||
if end != nil {
|
||||
shifted := next.Add(end.Sub(start))
|
||||
end = &shifted
|
||||
duration := end.Sub(start) // capture before overwriting start
|
||||
e := next.Add(duration)
|
||||
end = &e
|
||||
}
|
||||
start = *next
|
||||
return start, end, true, false, warnings
|
||||
} else {
|
||||
return start, end, false, false, warnings
|
||||
@@ -142,20 +144,84 @@ func (s *PlannerService) FindDate(wfID string, checkables map[string]utils.Booki
|
||||
}
|
||||
|
||||
func (s *PlannerService) Fill(checkables map[string]utils.BookingResource, wfID string) {
|
||||
if missing := s.MissingPeers(checkables); len(missing) > 0 {
|
||||
const plannerFetchTimeout = 2 * time.Second
|
||||
tmpSession := "check-oneshot-" + wfID
|
||||
ch, cancelSub := SubscribeUpdates(s.Subs, &s.SubMu, missing...)
|
||||
owned := s.Refresh(missing, tmpSession)
|
||||
// Collect all peers involved in this check (not just missing ones).
|
||||
// We always re-request every peer because PB_CLOSE_PLANNER is emitted
|
||||
// after each check session, which stops the remote stream. The cached
|
||||
// snapshot may therefore be stale: re-fetching ensures the check is made
|
||||
// against up-to-date availability data.
|
||||
all := s.allPeers(checkables)
|
||||
if len(all) == 0 {
|
||||
return
|
||||
}
|
||||
const plannerFetchTimeout = 5 * time.Second
|
||||
tmpSession := "check-oneshot-" + wfID
|
||||
|
||||
// Mark pending entries and clear any stale planner so the wait loop below
|
||||
// will not return early with an old snapshot.
|
||||
s.Mu.Lock()
|
||||
myself, _ := oclib.GetMySelf()
|
||||
for _, peerID := range all {
|
||||
entry := s.Cache[peerID]
|
||||
if entry == nil {
|
||||
entry = &plannerEntry{}
|
||||
s.Cache[peerID] = entry
|
||||
s.AddedAt[peerID] = time.Now().UTC()
|
||||
go s.EvictAfter(peerID, plannerTTL)
|
||||
}
|
||||
// Reset so MissingPeers sees it as absent until the fresh snapshot arrives.
|
||||
entry.Planner = nil
|
||||
if !entry.Refreshing {
|
||||
entry.Refreshing = true
|
||||
entry.RefreshOwner = tmpSession
|
||||
}
|
||||
}
|
||||
s.Mu.Unlock()
|
||||
defer s.ReleaseRefreshOwnership(all, tmpSession)
|
||||
|
||||
for _, peerID := range all {
|
||||
if myself != nil && myself.PeerID == peerID {
|
||||
go s.RefreshSelf(peerID, &tools.APIRequest{Admin: true})
|
||||
} else {
|
||||
payload, _ := json.Marshal(map[string]any{"peer_id": peerID})
|
||||
utils.Propalgate(peerID, tools.PropalgationMessage{
|
||||
Action: tools.PB_PLANNER,
|
||||
Payload: payload,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
deadline := time.Now().Add(plannerFetchTimeout)
|
||||
for {
|
||||
remaining := s.MissingPeers(checkables)
|
||||
if len(remaining) == 0 {
|
||||
return
|
||||
}
|
||||
wait := time.Until(deadline)
|
||||
if wait <= 0 {
|
||||
return
|
||||
}
|
||||
ch, cancelSub := SubscribeUpdates(s.Subs, &s.SubMu, remaining...)
|
||||
select {
|
||||
case <-ch:
|
||||
case <-time.After(plannerFetchTimeout):
|
||||
case <-time.After(wait):
|
||||
}
|
||||
cancelSub()
|
||||
s.ReleaseRefreshOwnership(owned, tmpSession)
|
||||
}
|
||||
}
|
||||
|
||||
// allPeers returns the deduplicated list of peer IDs for all checkable resources.
|
||||
func (s *PlannerService) allPeers(res map[string]utils.BookingResource) []string {
|
||||
seen := map[string]struct{}{}
|
||||
var out []string
|
||||
for _, r := range res {
|
||||
if _, ok := seen[r.PeerPID]; !ok {
|
||||
seen[r.PeerPID] = struct{}{}
|
||||
out = append(out, r.PeerPID)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// evictAfter waits ttl from first insertion then deletes the cache entry and
|
||||
// emits PB_CLOSE_PLANNER so oc-discovery stops streaming for this peer.
|
||||
// This is the only path that actually removes an entry from PlannerCache;
|
||||
@@ -206,6 +272,10 @@ func SubscribeUpdates[T interface{}](subs map[string][]chan T, mu *sync.RWMutex,
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func (s *PlannerService) Store(peerID string, p *planner.Planner) {
|
||||
if s == nil {
|
||||
fmt.Println("PLANNER IS NULL")
|
||||
return
|
||||
}
|
||||
s.Mu.Lock()
|
||||
entry := s.Cache[peerID]
|
||||
isNew := entry == nil
|
||||
@@ -216,8 +286,9 @@ func (s *PlannerService) Store(peerID string, p *planner.Planner) {
|
||||
go s.EvictAfter(peerID, plannerTTL)
|
||||
}
|
||||
entry.Planner = p
|
||||
s.Cache[peerID] = entry
|
||||
s.Mu.Unlock()
|
||||
utils.Notify[string](&s.SubMu, s.Subs, peerID, peerID)
|
||||
utils.Notify(&s.SubMu, s.Subs, peerID, peerID)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -388,9 +459,15 @@ func (s *PlannerService) checkResourceAvailability(res map[string]utils.BookingR
|
||||
s.Mu.RLock()
|
||||
entry := s.Cache[r.PeerPID]
|
||||
s.Mu.RUnlock()
|
||||
if entry == nil || entry.Planner == nil {
|
||||
fmt.Println("Retrieve", r.PeerPID, s.Cache, entry.Planner)
|
||||
if entry == nil {
|
||||
unavailable = append(unavailable, r.ID)
|
||||
warnings = append(warnings, fmt.Sprintf(
|
||||
"peer %s planner not in cache for resource %s – assuming available", r.PeerPID, r.ID))
|
||||
"resource %s is not available in [%s – %s] : Missing Planner",
|
||||
r.ID, start.Format(time.RFC3339), utils.FormatOptTime(end)))
|
||||
continue
|
||||
}
|
||||
if entry.Planner == nil {
|
||||
continue
|
||||
}
|
||||
if !s.checkInstance(entry.Planner, r.ID, r.InstanceID, start, end) {
|
||||
@@ -419,17 +496,17 @@ func (s *PlannerService) CheckResourceInstance(peerID, resourceID, instanceID st
|
||||
// SubscribePlannerUpdates returns a channel that receives a peerID each time
|
||||
// one of the given peers' planners is updated.
|
||||
func (s *PlannerService) SubscribePlannerUpdates(peerIDs ...string) (<-chan string, func()) {
|
||||
return SubscribeUpdates[string](s.Subs, &s.SubMu, peerIDs...)
|
||||
return SubscribeUpdates(s.Subs, &s.SubMu, peerIDs...)
|
||||
}
|
||||
|
||||
// SubscribeWorkflowUpdates returns a channel signalled when the workflow changes.
|
||||
func (s *PlannerService) SubscribeWorkflowUpdates(wfID string) (<-chan struct{}, func()) {
|
||||
return SubscribeUpdates[struct{}](s.WorkflowSubs, &s.WorkflowSubMu, wfID)
|
||||
return SubscribeUpdates(s.WorkflowSubs, &s.WorkflowSubMu, wfID)
|
||||
}
|
||||
|
||||
// NotifyWorkflow signals all subscribers watching wfID.
|
||||
func (s *PlannerService) NotifyWorkflow(wfID string) {
|
||||
utils.Notify[struct{}](&s.WorkflowSubMu, s.WorkflowSubs, wfID, struct{}{})
|
||||
utils.Notify(&s.WorkflowSubMu, s.WorkflowSubs, wfID, struct{}{})
|
||||
}
|
||||
|
||||
// checkInstance checks availability for the specific instance resolved by the
|
||||
|
||||
@@ -40,15 +40,20 @@ type WorkflowSchedule struct {
|
||||
DurationS float64 `json:"duration_s" default:"-1"`
|
||||
Cron string `json:"cron,omitempty"`
|
||||
|
||||
BookingMode booking.BookingMode `json:"booking_mode,omitempty"`
|
||||
SelectedInstances workflow.ConfigItem `json:"selected_instances"`
|
||||
SelectedPartnerships workflow.ConfigItem `json:"selected_partnerships"`
|
||||
SelectedBuyings workflow.ConfigItem `json:"selected_buyings"`
|
||||
SelectedStrategies workflow.ConfigItem `json:"selected_strategies"`
|
||||
BookingMode booking.BookingMode `json:"booking_mode,omitempty"`
|
||||
SelectedInstances workflow.ConfigItem `json:"selected_instances"`
|
||||
SelectedPartnerships workflow.ConfigItem `json:"selected_partnerships"`
|
||||
SelectedBuyings workflow.ConfigItem `json:"selected_buyings"`
|
||||
SelectedStrategies workflow.ConfigItem `json:"selected_strategies"`
|
||||
SelectedBillingStrategy pricing.BillingStrategy `json:"selected_billing_strategy"`
|
||||
|
||||
// Confirm, when true, triggers Schedule() to confirm the drafts held by this session.
|
||||
Confirm bool `json:"confirm,omitempty"`
|
||||
|
||||
// Asap and Preemption override the query-param mode on a per-message basis.
|
||||
// nil means "not set" (keep previous value).
|
||||
Asap *bool `json:"asap,omitempty"`
|
||||
Preemption *bool `json:"preemption,omitempty"`
|
||||
}
|
||||
|
||||
// CheckResult is the response payload for an availability check.
|
||||
@@ -68,7 +73,6 @@ type CheckResult struct {
|
||||
|
||||
// Check verifies whether the requested slot is available across all resource peers.
|
||||
func (ws *WorkflowSchedule) Check(wfID string, asap bool, preemption bool, request *tools.APIRequest) (*CheckResult, error) {
|
||||
fmt.Println("CHECK", asap, "/", preemption)
|
||||
obj, code, err := workflow.NewAccessor(request).LoadOne(wfID)
|
||||
if code != 200 || err != nil {
|
||||
msg := "could not load workflow " + wfID
|
||||
@@ -81,7 +85,7 @@ func (ws *WorkflowSchedule) Check(wfID string, asap bool, preemption bool, reque
|
||||
|
||||
prepLead := conf.GetConfig().PrepLead()
|
||||
start := ws.Start
|
||||
if asap || start.IsZero() {
|
||||
if asap || preemption || start.IsZero() {
|
||||
start = time.Now().UTC().Add(prepLead)
|
||||
} else if start.Before(time.Now().UTC().Add(prepLead)) {
|
||||
// Explicit date is within the prep window — impossible to guarantee on time.
|
||||
@@ -240,4 +244,3 @@ func (ws *WorkflowSchedule) GetDates() ([]Schedule, error) {
|
||||
}
|
||||
return schedule, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,8 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"oc-scheduler/infrastructure/planner"
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
"cloud.o-forge.io/core/oc-lib/models/booking"
|
||||
"cloud.o-forge.io/core/oc-lib/models/common/enum"
|
||||
@@ -14,7 +16,6 @@ import (
|
||||
"cloud.o-forge.io/core/oc-lib/models/resources/purchase_resource"
|
||||
"cloud.o-forge.io/core/oc-lib/models/utils"
|
||||
"cloud.o-forge.io/core/oc-lib/tools"
|
||||
"oc-scheduler/infrastructure/planner"
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -31,7 +32,7 @@ type SchedulingResourcesService struct {
|
||||
|
||||
var singleton *SchedulingResourcesService
|
||||
|
||||
func init() {
|
||||
func InitSchedulingResource() {
|
||||
singleton = &SchedulingResourcesService{}
|
||||
}
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ func (s *SessionExecutionsService) sessionIDFilter(field, id string) *dbs.Filter
|
||||
|
||||
func (s *SessionExecutionsService) loadSession(dt tools.DataType) []scheduling_resources.SchedulerObject {
|
||||
results := oclib.NewRequestAdmin(oclib.LibDataEnum(dt), nil).Search(
|
||||
s.sessionIDFilter("executions_id", s.ExecutionsSessionID), "", true)
|
||||
s.sessionIDFilter("executions_id", s.ExecutionsSessionID), "", true, 0, 10000)
|
||||
out := make([]scheduling_resources.SchedulerObject, 0, len(results.Data))
|
||||
for _, obj := range results.Data {
|
||||
out = append(out, scheduling_resources.ToSchedulerObject(dt, obj))
|
||||
@@ -55,7 +55,7 @@ func (s *SessionExecutionsService) loadSession(dt tools.DataType) []scheduling_r
|
||||
func (s *SessionExecutionsService) LoadSessionExecs() []*workflow_execution.WorkflowExecution {
|
||||
adminReq := &tools.APIRequest{Admin: true}
|
||||
results, _, _ := workflow_execution.NewAccessor(adminReq).Search(
|
||||
s.sessionIDFilter("executions_id", s.ExecutionsSessionID), "", true)
|
||||
s.sessionIDFilter("executions_id", s.ExecutionsSessionID), "", true, 0, 10000)
|
||||
out := make([]*workflow_execution.WorkflowExecution, 0)
|
||||
for _, obj := range results {
|
||||
if exec, ok := obj.(*workflow_execution.WorkflowExecution); ok {
|
||||
@@ -68,7 +68,7 @@ func (s *SessionExecutionsService) LoadSessionExecs() []*workflow_execution.Work
|
||||
func (s *SessionExecutionsService) loadSessionOrder() *order.Order {
|
||||
adminReq := &tools.APIRequest{Admin: true}
|
||||
results, _, _ := order.NewAccessor(adminReq).Search(
|
||||
s.sessionIDFilter("executions_id", s.ExecutionsSessionID), "", true)
|
||||
s.sessionIDFilter("executions_id", s.ExecutionsSessionID), "", true, 0, 10000)
|
||||
for _, obj := range results {
|
||||
if o, ok := obj.(*order.Order); ok {
|
||||
return o
|
||||
@@ -174,8 +174,18 @@ func (s *SessionExecutionsService) upsertDrafts(
|
||||
|
||||
func (s *SessionExecutionsService) CleanupSession(request *tools.APIRequest) {
|
||||
adminReq := &tools.APIRequest{Admin: true}
|
||||
|
||||
// Delete bookings and purchases directly by executions_id.
|
||||
// We cannot rely on execution.Unschedule here because it uses
|
||||
// exec.PeerBookByGraph which is empty during the draft/check phase.
|
||||
for _, dt := range []tools.DataType{tools.BOOKING, tools.PURCHASE_RESOURCE} {
|
||||
for _, obj := range s.loadSession(dt) {
|
||||
scheduling_resources.GetService().Delete(dt, obj, request)
|
||||
}
|
||||
}
|
||||
|
||||
for _, exec := range s.LoadSessionExecs() {
|
||||
execution.Unschedule(exec.GetID(), request)
|
||||
execution.UnregisterExecLock(exec.GetID())
|
||||
workflow_execution.NewAccessor(adminReq).DeleteOne(exec.GetID())
|
||||
}
|
||||
if o := s.loadSessionOrder(); o != nil {
|
||||
|
||||
@@ -3,6 +3,7 @@ package utils
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -177,6 +178,7 @@ func Notify[T interface{}](mu *sync.RWMutex, registry map[string][]chan T, key s
|
||||
|
||||
func Propalgate(peerID string, message tools.PropalgationMessage) {
|
||||
b, _ := json.Marshal(message)
|
||||
fmt.Println("Propalgate")
|
||||
tools.NewNATSCaller().SetNATSPub(tools.PROPALGATION_EVENT, tools.NATSResponse{
|
||||
FromApp: "oc-scheduler",
|
||||
Datatype: -1,
|
||||
|
||||
Reference in New Issue
Block a user