Scheduler + Observe

This commit is contained in:
mr
2026-04-29 07:45:41 +02:00
parent 4b9b1b8b91
commit 3be023b9af
20 changed files with 1006 additions and 87 deletions

View File

@@ -10,6 +10,7 @@ import (
oclib "cloud.o-forge.io/core/oc-lib"
"cloud.o-forge.io/core/oc-lib/models/booking/planner"
"cloud.o-forge.io/core/oc-lib/models/resources"
"cloud.o-forge.io/core/oc-lib/models/workflow"
"cloud.o-forge.io/core/oc-lib/models/workflow/graph"
"cloud.o-forge.io/core/oc-lib/tools"
@@ -509,6 +510,126 @@ func (s *PlannerService) NotifyWorkflow(wfID string) {
utils.Notify(&s.WorkflowSubMu, s.WorkflowSubs, wfID, struct{}{})
}
// FillForPeers fetches and waits for planners for an explicit list of peer PIDs.
// Same mechanic as Fill but decoupled from the BookingResource map — used for
// dynamic resource resolution where the peer set is not part of checkables.
func (s *PlannerService) FillForPeers(peerPIDs []string, wfID string) {
if len(peerPIDs) == 0 {
return
}
const plannerFetchTimeout = 5 * time.Second
tmpSession := "check-dynamic-" + wfID
s.Mu.Lock()
myself, _ := oclib.GetMySelf()
for _, peerID := range peerPIDs {
entry := s.Cache[peerID]
if entry == nil {
entry = &plannerEntry{}
s.Cache[peerID] = entry
s.AddedAt[peerID] = time.Now().UTC()
go s.EvictAfter(peerID, plannerTTL)
}
entry.Planner = nil
if !entry.Refreshing {
entry.Refreshing = true
entry.RefreshOwner = tmpSession
}
}
s.Mu.Unlock()
defer s.ReleaseRefreshOwnership(peerPIDs, tmpSession)
for _, peerID := range peerPIDs {
if myself != nil && myself.PeerID == peerID {
go s.RefreshSelf(peerID, &tools.APIRequest{Admin: true})
} else {
payload, _ := json.Marshal(map[string]any{"peer_id": peerID})
utils.Propalgate(peerID, tools.PropalgationMessage{
Action: tools.PB_PLANNER,
Payload: payload,
})
}
}
deadline := time.Now().Add(plannerFetchTimeout)
remaining := slices.Clone(peerPIDs)
for len(remaining) > 0 {
wait := time.Until(deadline)
if wait <= 0 {
return
}
ch, cancelSub := SubscribeUpdates(s.Subs, &s.SubMu, remaining...)
select {
case <-ch:
case <-time.After(wait):
cancelSub()
return
}
cancelSub()
remaining = remaining[:0]
s.Mu.RLock()
for _, pid := range peerPIDs {
if entry := s.Cache[pid]; entry == nil || entry.Planner == nil {
remaining = append(remaining, pid)
}
}
s.Mu.RUnlock()
}
}
// FillDynamic resolves all peer DIDs across the given dynamic resources to PIDs,
// fetches their planners via FillForPeers, and returns the DID→PID mapping for use
// in ResolveDynamic. All dynamics are batched into a single planner fetch round.
func (s *PlannerService) FillDynamic(dynamics []*resources.DynamicResource, wfID string) map[string]string {
didToPID := map[string]string{}
peerPIDs := []string{}
access := oclib.NewRequestAdmin(oclib.LibDataEnum(tools.PEER), nil)
for _, d := range dynamics {
for _, did := range d.PeerIds {
if did == "" || didToPID[did] != "" {
continue
}
if data := access.LoadOne(did); data.Data != nil {
if p := data.ToPeer(); p != nil {
didToPID[did] = p.PeerID
peerPIDs = append(peerPIDs, p.PeerID)
}
}
}
}
s.FillForPeers(peerPIDs, wfID)
return didToPID
}
// ResolveDynamic walks the sorted instance list of a DynamicResource via
// GetSelectedInstance and returns true as soon as it finds an instance whose
// peer's planner confirms availability for [start, end].
// d.SelectedIndex is updated to the elected instance on success.
// Peers that did not respond (no planner in cache) are skipped.
func (s *PlannerService) ResolveDynamic(d *resources.DynamicResource, didToPID map[string]string, start time.Time, end *time.Time) bool {
for {
inst := d.GetSelectedInstance(nil)
if inst == nil {
return false // exhausted all candidates
}
did := d.PeerIds[d.SelectedIndex]
resourceID := d.ResourceIds[d.SelectedIndex]
pid, ok := didToPID[did]
if !ok {
continue // peer DID could not be resolved
}
s.Mu.RLock()
entry := s.Cache[pid]
s.Mu.RUnlock()
if entry == nil || entry.Planner == nil {
continue // peer did not respond in time
}
if s.checkInstance(entry.Planner, resourceID, inst.GetID(), start, end) {
return true // d.SelectedIndex points to the elected instance
}
}
}
// checkInstance checks availability for the specific instance resolved by the
// scheduler. When instanceID is empty (no instance selected / none resolvable),
// it falls back to checking all instances known in the planner and returns true