Scheduler + Observe
This commit is contained in:
@@ -10,6 +10,7 @@ import (
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
"cloud.o-forge.io/core/oc-lib/models/booking/planner"
|
||||
"cloud.o-forge.io/core/oc-lib/models/resources"
|
||||
"cloud.o-forge.io/core/oc-lib/models/workflow"
|
||||
"cloud.o-forge.io/core/oc-lib/models/workflow/graph"
|
||||
"cloud.o-forge.io/core/oc-lib/tools"
|
||||
@@ -509,6 +510,126 @@ func (s *PlannerService) NotifyWorkflow(wfID string) {
|
||||
utils.Notify(&s.WorkflowSubMu, s.WorkflowSubs, wfID, struct{}{})
|
||||
}
|
||||
|
||||
// FillForPeers fetches and waits for planners for an explicit list of peer PIDs.
|
||||
// Same mechanic as Fill but decoupled from the BookingResource map — used for
|
||||
// dynamic resource resolution where the peer set is not part of checkables.
|
||||
func (s *PlannerService) FillForPeers(peerPIDs []string, wfID string) {
|
||||
if len(peerPIDs) == 0 {
|
||||
return
|
||||
}
|
||||
const plannerFetchTimeout = 5 * time.Second
|
||||
tmpSession := "check-dynamic-" + wfID
|
||||
|
||||
s.Mu.Lock()
|
||||
myself, _ := oclib.GetMySelf()
|
||||
for _, peerID := range peerPIDs {
|
||||
entry := s.Cache[peerID]
|
||||
if entry == nil {
|
||||
entry = &plannerEntry{}
|
||||
s.Cache[peerID] = entry
|
||||
s.AddedAt[peerID] = time.Now().UTC()
|
||||
go s.EvictAfter(peerID, plannerTTL)
|
||||
}
|
||||
entry.Planner = nil
|
||||
if !entry.Refreshing {
|
||||
entry.Refreshing = true
|
||||
entry.RefreshOwner = tmpSession
|
||||
}
|
||||
}
|
||||
s.Mu.Unlock()
|
||||
defer s.ReleaseRefreshOwnership(peerPIDs, tmpSession)
|
||||
|
||||
for _, peerID := range peerPIDs {
|
||||
if myself != nil && myself.PeerID == peerID {
|
||||
go s.RefreshSelf(peerID, &tools.APIRequest{Admin: true})
|
||||
} else {
|
||||
payload, _ := json.Marshal(map[string]any{"peer_id": peerID})
|
||||
utils.Propalgate(peerID, tools.PropalgationMessage{
|
||||
Action: tools.PB_PLANNER,
|
||||
Payload: payload,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
deadline := time.Now().Add(plannerFetchTimeout)
|
||||
remaining := slices.Clone(peerPIDs)
|
||||
for len(remaining) > 0 {
|
||||
wait := time.Until(deadline)
|
||||
if wait <= 0 {
|
||||
return
|
||||
}
|
||||
ch, cancelSub := SubscribeUpdates(s.Subs, &s.SubMu, remaining...)
|
||||
select {
|
||||
case <-ch:
|
||||
case <-time.After(wait):
|
||||
cancelSub()
|
||||
return
|
||||
}
|
||||
cancelSub()
|
||||
remaining = remaining[:0]
|
||||
s.Mu.RLock()
|
||||
for _, pid := range peerPIDs {
|
||||
if entry := s.Cache[pid]; entry == nil || entry.Planner == nil {
|
||||
remaining = append(remaining, pid)
|
||||
}
|
||||
}
|
||||
s.Mu.RUnlock()
|
||||
}
|
||||
}
|
||||
|
||||
// FillDynamic resolves all peer DIDs across the given dynamic resources to PIDs,
|
||||
// fetches their planners via FillForPeers, and returns the DID→PID mapping for use
|
||||
// in ResolveDynamic. All dynamics are batched into a single planner fetch round.
|
||||
func (s *PlannerService) FillDynamic(dynamics []*resources.DynamicResource, wfID string) map[string]string {
|
||||
didToPID := map[string]string{}
|
||||
peerPIDs := []string{}
|
||||
access := oclib.NewRequestAdmin(oclib.LibDataEnum(tools.PEER), nil)
|
||||
for _, d := range dynamics {
|
||||
for _, did := range d.PeerIds {
|
||||
if did == "" || didToPID[did] != "" {
|
||||
continue
|
||||
}
|
||||
if data := access.LoadOne(did); data.Data != nil {
|
||||
if p := data.ToPeer(); p != nil {
|
||||
didToPID[did] = p.PeerID
|
||||
peerPIDs = append(peerPIDs, p.PeerID)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
s.FillForPeers(peerPIDs, wfID)
|
||||
return didToPID
|
||||
}
|
||||
|
||||
// ResolveDynamic walks the sorted instance list of a DynamicResource via
|
||||
// GetSelectedInstance and returns true as soon as it finds an instance whose
|
||||
// peer's planner confirms availability for [start, end].
|
||||
// d.SelectedIndex is updated to the elected instance on success.
|
||||
// Peers that did not respond (no planner in cache) are skipped.
|
||||
func (s *PlannerService) ResolveDynamic(d *resources.DynamicResource, didToPID map[string]string, start time.Time, end *time.Time) bool {
|
||||
for {
|
||||
inst := d.GetSelectedInstance(nil)
|
||||
if inst == nil {
|
||||
return false // exhausted all candidates
|
||||
}
|
||||
did := d.PeerIds[d.SelectedIndex]
|
||||
resourceID := d.ResourceIds[d.SelectedIndex]
|
||||
pid, ok := didToPID[did]
|
||||
if !ok {
|
||||
continue // peer DID could not be resolved
|
||||
}
|
||||
s.Mu.RLock()
|
||||
entry := s.Cache[pid]
|
||||
s.Mu.RUnlock()
|
||||
if entry == nil || entry.Planner == nil {
|
||||
continue // peer did not respond in time
|
||||
}
|
||||
if s.checkInstance(entry.Planner, resourceID, inst.GetID(), start, end) {
|
||||
return true // d.SelectedIndex points to the elected instance
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// checkInstance checks availability for the specific instance resolved by the
|
||||
// scheduler. When instanceID is empty (no instance selected / none resolvable),
|
||||
// it falls back to checking all instances known in the planner and returns true
|
||||
|
||||
Reference in New Issue
Block a user