oc-scheduler -> scheduling + logs
This commit is contained in:
@@ -84,6 +84,7 @@ func (s *PlannerService) HandleStore(resp tools.NATSResponse) {
|
||||
return
|
||||
}
|
||||
if err := json.Unmarshal(resp.Payload, &p); err != nil {
|
||||
fmt.Println("RETRIEVE PLANNER ERR", err)
|
||||
return
|
||||
}
|
||||
s.Store(fmt.Sprintf("%v", m["peer_id"]), &p)
|
||||
@@ -128,11 +129,12 @@ func (s *PlannerService) FindDate(wfID string, checkables map[string]utils.Booki
|
||||
if asap {
|
||||
next := s.findNextSlot(checkables, start, end, checkWindowHours)
|
||||
if next != nil {
|
||||
start = *next
|
||||
if end != nil {
|
||||
shifted := next.Add(end.Sub(start))
|
||||
end = &shifted
|
||||
duration := end.Sub(start) // capture before overwriting start
|
||||
e := next.Add(duration)
|
||||
end = &e
|
||||
}
|
||||
start = *next
|
||||
return start, end, true, false, warnings
|
||||
} else {
|
||||
return start, end, false, false, warnings
|
||||
@@ -142,20 +144,84 @@ func (s *PlannerService) FindDate(wfID string, checkables map[string]utils.Booki
|
||||
}
|
||||
|
||||
func (s *PlannerService) Fill(checkables map[string]utils.BookingResource, wfID string) {
|
||||
if missing := s.MissingPeers(checkables); len(missing) > 0 {
|
||||
const plannerFetchTimeout = 2 * time.Second
|
||||
tmpSession := "check-oneshot-" + wfID
|
||||
ch, cancelSub := SubscribeUpdates(s.Subs, &s.SubMu, missing...)
|
||||
owned := s.Refresh(missing, tmpSession)
|
||||
// Collect all peers involved in this check (not just missing ones).
|
||||
// We always re-request every peer because PB_CLOSE_PLANNER is emitted
|
||||
// after each check session, which stops the remote stream. The cached
|
||||
// snapshot may therefore be stale: re-fetching ensures the check is made
|
||||
// against up-to-date availability data.
|
||||
all := s.allPeers(checkables)
|
||||
if len(all) == 0 {
|
||||
return
|
||||
}
|
||||
const plannerFetchTimeout = 5 * time.Second
|
||||
tmpSession := "check-oneshot-" + wfID
|
||||
|
||||
// Mark pending entries and clear any stale planner so the wait loop below
|
||||
// will not return early with an old snapshot.
|
||||
s.Mu.Lock()
|
||||
myself, _ := oclib.GetMySelf()
|
||||
for _, peerID := range all {
|
||||
entry := s.Cache[peerID]
|
||||
if entry == nil {
|
||||
entry = &plannerEntry{}
|
||||
s.Cache[peerID] = entry
|
||||
s.AddedAt[peerID] = time.Now().UTC()
|
||||
go s.EvictAfter(peerID, plannerTTL)
|
||||
}
|
||||
// Reset so MissingPeers sees it as absent until the fresh snapshot arrives.
|
||||
entry.Planner = nil
|
||||
if !entry.Refreshing {
|
||||
entry.Refreshing = true
|
||||
entry.RefreshOwner = tmpSession
|
||||
}
|
||||
}
|
||||
s.Mu.Unlock()
|
||||
defer s.ReleaseRefreshOwnership(all, tmpSession)
|
||||
|
||||
for _, peerID := range all {
|
||||
if myself != nil && myself.PeerID == peerID {
|
||||
go s.RefreshSelf(peerID, &tools.APIRequest{Admin: true})
|
||||
} else {
|
||||
payload, _ := json.Marshal(map[string]any{"peer_id": peerID})
|
||||
utils.Propalgate(peerID, tools.PropalgationMessage{
|
||||
Action: tools.PB_PLANNER,
|
||||
Payload: payload,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
deadline := time.Now().Add(plannerFetchTimeout)
|
||||
for {
|
||||
remaining := s.MissingPeers(checkables)
|
||||
if len(remaining) == 0 {
|
||||
return
|
||||
}
|
||||
wait := time.Until(deadline)
|
||||
if wait <= 0 {
|
||||
return
|
||||
}
|
||||
ch, cancelSub := SubscribeUpdates(s.Subs, &s.SubMu, remaining...)
|
||||
select {
|
||||
case <-ch:
|
||||
case <-time.After(plannerFetchTimeout):
|
||||
case <-time.After(wait):
|
||||
}
|
||||
cancelSub()
|
||||
s.ReleaseRefreshOwnership(owned, tmpSession)
|
||||
}
|
||||
}
|
||||
|
||||
// allPeers returns the deduplicated list of peer IDs for all checkable resources.
|
||||
func (s *PlannerService) allPeers(res map[string]utils.BookingResource) []string {
|
||||
seen := map[string]struct{}{}
|
||||
var out []string
|
||||
for _, r := range res {
|
||||
if _, ok := seen[r.PeerPID]; !ok {
|
||||
seen[r.PeerPID] = struct{}{}
|
||||
out = append(out, r.PeerPID)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// evictAfter waits ttl from first insertion then deletes the cache entry and
|
||||
// emits PB_CLOSE_PLANNER so oc-discovery stops streaming for this peer.
|
||||
// This is the only path that actually removes an entry from PlannerCache;
|
||||
@@ -206,6 +272,10 @@ func SubscribeUpdates[T interface{}](subs map[string][]chan T, mu *sync.RWMutex,
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func (s *PlannerService) Store(peerID string, p *planner.Planner) {
|
||||
if s == nil {
|
||||
fmt.Println("PLANNER IS NULL")
|
||||
return
|
||||
}
|
||||
s.Mu.Lock()
|
||||
entry := s.Cache[peerID]
|
||||
isNew := entry == nil
|
||||
@@ -216,8 +286,9 @@ func (s *PlannerService) Store(peerID string, p *planner.Planner) {
|
||||
go s.EvictAfter(peerID, plannerTTL)
|
||||
}
|
||||
entry.Planner = p
|
||||
s.Cache[peerID] = entry
|
||||
s.Mu.Unlock()
|
||||
utils.Notify[string](&s.SubMu, s.Subs, peerID, peerID)
|
||||
utils.Notify(&s.SubMu, s.Subs, peerID, peerID)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -388,9 +459,15 @@ func (s *PlannerService) checkResourceAvailability(res map[string]utils.BookingR
|
||||
s.Mu.RLock()
|
||||
entry := s.Cache[r.PeerPID]
|
||||
s.Mu.RUnlock()
|
||||
if entry == nil || entry.Planner == nil {
|
||||
fmt.Println("Retrieve", r.PeerPID, s.Cache, entry.Planner)
|
||||
if entry == nil {
|
||||
unavailable = append(unavailable, r.ID)
|
||||
warnings = append(warnings, fmt.Sprintf(
|
||||
"peer %s planner not in cache for resource %s – assuming available", r.PeerPID, r.ID))
|
||||
"resource %s is not available in [%s – %s] : Missing Planner",
|
||||
r.ID, start.Format(time.RFC3339), utils.FormatOptTime(end)))
|
||||
continue
|
||||
}
|
||||
if entry.Planner == nil {
|
||||
continue
|
||||
}
|
||||
if !s.checkInstance(entry.Planner, r.ID, r.InstanceID, start, end) {
|
||||
@@ -419,17 +496,17 @@ func (s *PlannerService) CheckResourceInstance(peerID, resourceID, instanceID st
|
||||
// SubscribePlannerUpdates returns a channel that receives a peerID each time
|
||||
// one of the given peers' planners is updated.
|
||||
func (s *PlannerService) SubscribePlannerUpdates(peerIDs ...string) (<-chan string, func()) {
|
||||
return SubscribeUpdates[string](s.Subs, &s.SubMu, peerIDs...)
|
||||
return SubscribeUpdates(s.Subs, &s.SubMu, peerIDs...)
|
||||
}
|
||||
|
||||
// SubscribeWorkflowUpdates returns a channel signalled when the workflow changes.
|
||||
func (s *PlannerService) SubscribeWorkflowUpdates(wfID string) (<-chan struct{}, func()) {
|
||||
return SubscribeUpdates[struct{}](s.WorkflowSubs, &s.WorkflowSubMu, wfID)
|
||||
return SubscribeUpdates(s.WorkflowSubs, &s.WorkflowSubMu, wfID)
|
||||
}
|
||||
|
||||
// NotifyWorkflow signals all subscribers watching wfID.
|
||||
func (s *PlannerService) NotifyWorkflow(wfID string) {
|
||||
utils.Notify[struct{}](&s.WorkflowSubMu, s.WorkflowSubs, wfID, struct{}{})
|
||||
utils.Notify(&s.WorkflowSubMu, s.WorkflowSubs, wfID, struct{}{})
|
||||
}
|
||||
|
||||
// checkInstance checks availability for the specific instance resolved by the
|
||||
|
||||
Reference in New Issue
Block a user