package infrastructure import ( "encoding/json" "errors" "fmt" "strings" "time" oclib "cloud.o-forge.io/core/oc-lib" "cloud.o-forge.io/core/oc-lib/models/bill" "cloud.o-forge.io/core/oc-lib/models/booking" "cloud.o-forge.io/core/oc-lib/models/booking/planner" "cloud.o-forge.io/core/oc-lib/models/common/enum" "cloud.o-forge.io/core/oc-lib/models/common/pricing" "cloud.o-forge.io/core/oc-lib/models/order" "cloud.o-forge.io/core/oc-lib/models/peer" "cloud.o-forge.io/core/oc-lib/models/resources" "cloud.o-forge.io/core/oc-lib/models/resources/purchase_resource" "cloud.o-forge.io/core/oc-lib/models/utils" "cloud.o-forge.io/core/oc-lib/models/workflow" "cloud.o-forge.io/core/oc-lib/models/workflow_execution" "cloud.o-forge.io/core/oc-lib/tools" "github.com/google/uuid" "github.com/robfig/cron" ) /* * WorkflowSchedule is a struct that contains the scheduling information of a workflow * It contains the mode of the schedule (Task or Service), the name of the schedule, the start and end time of the schedule and the cron expression */ // it's a flying object only use in a session time. It's not stored in the database type WorkflowSchedule struct { UUID string `json:"id" validate:"required"` // ExecutionsID is the list of the executions id of the workflow Workflow *workflow.Workflow `json:"workflow,omitempty"` // Workflow is the workflow dependancy of the schedule WorkflowExecution []*workflow_execution.WorkflowExecution `json:"workflow_executions,omitempty"` // WorkflowExecution is the list of executions of the workflow Message string `json:"message,omitempty"` // Message is the message of the schedule Warning string `json:"warning,omitempty"` // Warning is the warning message of the schedule Start time.Time `json:"start" validate:"required,ltfield=End"` // Start is the start time of the schedule, is required and must be less than the End time End *time.Time `json:"end,omitempty"` // End is the end time of the schedule, is required and must be greater than the Start time DurationS float64 `json:"duration_s" default:"-1"` // End is the end time of the schedule Cron string `json:"cron,omitempty"` // here the cron format : ss mm hh dd MM dw task BookingMode booking.BookingMode `json:"booking_mode,omitempty"` // BookingMode qualify the preemption order of the scheduling. if no payment allowed with preemption set up When_Possible SelectedInstances workflow.ConfigItem `json:"selected_instances"` SelectedPartnerships workflow.ConfigItem `json:"selected_partnerships"` SelectedBuyings workflow.ConfigItem `json:"selected_buyings"` SelectedStrategies workflow.ConfigItem `json:"selected_strategies"` SelectedBillingStrategy pricing.BillingStrategy `json:"selected_billing_strategy"` } // TODO PREEMPTION ! /* To schedule a preempted, omg. pour faire ça on doit alors lancé une exécution prioritaire qui passera devant toutes les autres, celon un niveau de priorité. Preemptible = 7, pour le moment il n'existera que 0 et 7. Dans le cas d'une préemption l'exécution est immédiable et bloquera tout le monde tant qu'il n'a pas été exécuté. Une ressource doit pouvoir être preemptible pour être exécutée de la sorte. Se qui implique si on est sur une ressource par ressource que si un élement n'est pas préemptible, alors il devra être effectué dés que possible Dans le cas dés que possible, la start date est immédiate MAIS ! ne pourra se lancé que SI il n'existe pas d'exécution se lançant durant la période indicative. ( Ultra complexe ) */ func NewScheduler(mode int, start string, end string, durationInS float64, cron string) *WorkflowSchedule { ws := &WorkflowSchedule{ UUID: uuid.New().String(), Start: time.Now(), BookingMode: booking.BookingMode(mode), DurationS: durationInS, Cron: cron, } s, err := time.Parse("2006-01-02T15:04:05", start) if err == nil && ws.BookingMode == booking.PLANNED { ws.Start = s // can apply a defined start other than now, if planned } e, err := time.Parse("2006-01-02T15:04:05", end) if err == nil { ws.End = &e } return ws } func (ws *WorkflowSchedule) GetBuyAndBook(wfID string, request *tools.APIRequest) (bool, *workflow.Workflow, []*workflow_execution.WorkflowExecution, []*purchase_resource.PurchaseResource, []*booking.Booking, error) { if request.Caller == nil && request.Caller.URLS == nil && request.Caller.URLS[tools.BOOKING] == nil || request.Caller.URLS[tools.BOOKING][tools.GET] == "" { return false, nil, []*workflow_execution.WorkflowExecution{}, []*purchase_resource.PurchaseResource{}, []*booking.Booking{}, errors.New("no caller defined") } access := workflow.NewAccessor(request) res, code, err := access.LoadOne(wfID) if code != 200 { return false, nil, []*workflow_execution.WorkflowExecution{}, []*purchase_resource.PurchaseResource{}, []*booking.Booking{}, errors.New("could not load the workflow with id: " + err.Error()) } wf := res.(*workflow.Workflow) isPreemptible, longest, priceds, wf, err := wf.Planify(ws.Start, ws.End, ws.SelectedInstances, ws.SelectedPartnerships, ws.SelectedBuyings, ws.SelectedStrategies, int(ws.BookingMode), request) if err != nil { return false, wf, []*workflow_execution.WorkflowExecution{}, []*purchase_resource.PurchaseResource{}, []*booking.Booking{}, err } ws.DurationS = longest ws.Message = "We estimate that the workflow will start at " + ws.Start.String() + " and last " + fmt.Sprintf("%v", ws.DurationS) + " seconds." if ws.End != nil && ws.Start.Add(time.Duration(longest)*time.Second).After(*ws.End) { ws.Warning = "The workflow may be too long to be executed in the given time frame, we will try to book it anyway\n" } execs, err := ws.GetExecutions(wf, isPreemptible) if err != nil { return false, wf, []*workflow_execution.WorkflowExecution{}, []*purchase_resource.PurchaseResource{}, []*booking.Booking{}, err } purchased := []*purchase_resource.PurchaseResource{} bookings := []*booking.Booking{} for _, exec := range execs { purchased = append(purchased, exec.Buy(ws.SelectedBillingStrategy, ws.UUID, wfID, priceds)...) bookings = append(bookings, exec.Book(ws.UUID, wfID, priceds)...) } return true, wf, execs, purchased, bookings, nil } func (ws *WorkflowSchedule) GenerateOrder(purchases []*purchase_resource.PurchaseResource, bookings []*booking.Booking, request *tools.APIRequest) error { newOrder := &order.Order{ AbstractObject: utils.AbstractObject{ Name: "order_" + request.PeerID + "_" + time.Now().UTC().Format("2006-01-02T15:04:05"), IsDraft: true, }, ExecutionsID: ws.UUID, Purchases: purchases, Bookings: bookings, Status: enum.PENDING, } if res, _, err := order.NewAccessor(request).StoreOne(newOrder); err == nil { if _, err := bill.DraftFirstBill(res.(*order.Order), request); err != nil { return err } return nil } else { return err } } func (ws *WorkflowSchedule) Schedules(wfID string, request *tools.APIRequest) (*WorkflowSchedule, *workflow.Workflow, []*workflow_execution.WorkflowExecution, error) { if request == nil { return ws, nil, []*workflow_execution.WorkflowExecution{}, errors.New("no request found") } c := request.Caller if c == nil || c.URLS == nil || c.URLS[tools.BOOKING] == nil { return ws, nil, []*workflow_execution.WorkflowExecution{}, errors.New("no caller defined") } methods := c.URLS[tools.BOOKING] if _, ok := methods[tools.GET]; !ok { return ws, nil, []*workflow_execution.WorkflowExecution{}, errors.New("no path found") } ok, wf, executions, purchases, bookings, err := ws.GetBuyAndBook(wfID, request) ws.WorkflowExecution = executions if !ok || err != nil { return ws, nil, executions, errors.New("could not book the workflow : " + fmt.Sprintf("%v", err)) } ws.Workflow = wf // Resolve our own peer MongoDB-ID once; used to decide local vs NATS routing. selfID, _ := oclib.GetMySelf() errCh := make(chan error, len(purchases)) for _, purchase := range purchases { purchase.IsDraft = true go propagateResource(purchase, purchase.DestPeerID, tools.PURCHASE_RESOURCE, selfID, request, errCh) } for i := 0; i < len(purchases); i++ { if err := <-errCh; err != nil { return ws, wf, executions, errors.New("could not propagate purchase: " + fmt.Sprintf("%v", err)) } } errCh = make(chan error, len(bookings)) for _, bk := range bookings { bk.IsDraft = true go propagateResource(bk, bk.DestPeerID, tools.BOOKING, selfID, request, errCh) } for i := 0; i < len(bookings); i++ { if err := <-errCh; err != nil { return ws, wf, executions, errors.New("could not propagate booking: " + fmt.Sprintf("%v", err)) } } if err := ws.GenerateOrder(purchases, bookings, request); err != nil { return ws, wf, executions, err } fmt.Println("Schedules") for _, exec := range executions { err := exec.PurgeDraft(request) if err != nil { return ws, nil, []*workflow_execution.WorkflowExecution{}, errors.New("purge draft" + fmt.Sprintf("%v", err)) } exec.StoreDraftDefault() utils.GenericStoreOne(exec, workflow_execution.NewAccessor(request)) go EmitConsidersExecution(exec, wf) } fmt.Println("Schedules") wf.GetAccessor(&tools.APIRequest{Admin: true}).UpdateOne(wf, wf.GetID()) return ws, wf, executions, nil } // propagateResource routes a purchase or booking to its destination: // - If destPeerID matches our own peer (selfMongoID), the object is stored // directly in the local DB as draft and the local planner is refreshed. // - Otherwise a NATS CREATE_RESOURCE message is emitted so the destination // peer can process it asynchronously. // // The caller is responsible for setting obj.IsDraft = true before calling. func propagateResource(obj utils.DBObject, destPeerID string, dt tools.DataType, selfMongoID *peer.Peer, request *tools.APIRequest, errCh chan error) { if selfMongoID == nil { return } // booking or purchase if destPeerID == selfMongoID.GetID() { if _, _, err := obj.GetAccessor(request).StoreOne(obj); err != nil { errCh <- fmt.Errorf("could not store %s locally: %w", dt.String(), err) return } // The planner tracks booking time-slots only; purchases do not affect it. if dt == tools.BOOKING { go refreshSelfPlanner(selfMongoID.PeerID, request) } errCh <- nil return } payload, err := json.Marshal(obj) if err != nil { errCh <- fmt.Errorf("could not serialize %s: %w", dt.String(), err) return } tools.NewNATSCaller().SetNATSPub(tools.CREATE_RESOURCE, tools.NATSResponse{ FromApp: "oc-scheduler", Datatype: dt, Method: int(tools.CREATE_RESOURCE), Payload: payload, }) errCh <- nil } /* BOOKING IMPLIED TIME, not of subscription but of execution so is processing time execution time applied on computes data can improve the processing time time should implied a security time border (10sec) if not from the same executions VERIFY THAT WE HANDLE DIFFERENCE BETWEEN LOCATION TIME && BOOKING */ /* * getExecutions is a function that returns the executions of a workflow * it returns an array of workflow_execution.WorkflowExecution */ func (ws *WorkflowSchedule) GetExecutions(workflow *workflow.Workflow, isPreemptible bool) ([]*workflow_execution.WorkflowExecution, error) { workflows_executions := []*workflow_execution.WorkflowExecution{} dates, err := ws.GetDates() if err != nil { return workflows_executions, err } for _, date := range dates { obj := &workflow_execution.WorkflowExecution{ AbstractObject: utils.AbstractObject{ UUID: uuid.New().String(), // set the uuid of the execution Name: workflow.Name + "_execution_" + date.Start.String(), // set the name of the execution }, Priority: 1, ExecutionsID: ws.UUID, ExecDate: date.Start, // set the execution date EndDate: date.End, // set the end date State: enum.DRAFT, // set the state to 1 (scheduled) WorkflowID: workflow.GetID(), // set the workflow id dependancy of the execution } if ws.BookingMode != booking.PLANNED { obj.Priority = 0 } if ws.BookingMode == booking.PREEMPTED && isPreemptible { obj.Priority = 7 } ws.SelectedStrategies = obj.SelectedStrategies ws.SelectedPartnerships = obj.SelectedPartnerships ws.SelectedBuyings = obj.SelectedBuyings ws.SelectedInstances = obj.SelectedInstances workflows_executions = append(workflows_executions, obj) } return workflows_executions, nil } func (ws *WorkflowSchedule) GetDates() ([]Schedule, error) { schedule := []Schedule{} if len(ws.Cron) > 0 { // if cron is set then end date should be set if ws.End == nil { return schedule, errors.New("a cron task should have an end date") } if ws.DurationS <= 0 { ws.DurationS = ws.End.Sub(ws.Start).Seconds() } cronStr := strings.Split(ws.Cron, " ") // split the cron string to treat it if len(cronStr) < 6 { // if the cron string is less than 6 fields, return an error because format is : ss mm hh dd MM dw (6 fields) return schedule, errors.New("Bad cron message: (" + ws.Cron + "). Should be at least ss mm hh dd MM dw") } subCron := strings.Join(cronStr[:6], " ") // cron should be parsed as ss mm hh dd MM dw t (min 6 fields) specParser := cron.NewParser(cron.Second | cron.Minute | cron.Hour | cron.Dom | cron.Month | cron.Dow) // create a new cron parser sched, err := specParser.Parse(subCron) // parse the cron string if err != nil { return schedule, errors.New("Bad cron message: " + err.Error()) } // loop through the cron schedule to set the executions for s := sched.Next(ws.Start); !s.IsZero() && s.Before(*ws.End); s = sched.Next(s) { e := s.Add(time.Duration(ws.DurationS) * time.Second) schedule = append(schedule, Schedule{ Start: s, End: &e, }) } } else { // if no cron, set the execution to the start date schedule = append(schedule, Schedule{ Start: ws.Start, End: ws.End, }) } return schedule, nil } type Schedule struct { Start time.Time End *time.Time } /* * TODO : LARGEST GRAIN PLANIFYING THE WORKFLOW WHEN OPTION IS SET * SET PROTECTION BORDER TIME */ // --------------------------------------------------------------------------- // Slot availability check // --------------------------------------------------------------------------- const ( checkWindowHours = 5 // how far ahead to scan for a free slot (hours) checkStepMin = 15 // time increment per scan step (minutes) ) // CheckResult holds the outcome of a slot availability check. type CheckResult struct { Available bool `json:"available"` Start time.Time `json:"start"` End *time.Time `json:"end,omitempty"` // NextSlot is the nearest free slot found within checkWindowHours when // the requested slot is unavailable, or the preferred (conflict-free) slot // when running in preemption mode. NextSlot *time.Time `json:"next_slot,omitempty"` Warnings []string `json:"warnings,omitempty"` // Preemptible is true when the check was run in preemption mode. Preemptible bool `json:"preemptible,omitempty"` } // bookingResource is the minimum info needed to verify a resource against the // planner cache. type bookingResource struct { id string peerID string instanceID string // resolved from WorkflowSchedule.SelectedInstances } // Check verifies that all booking-relevant resources (storage and compute) of // the given workflow have capacity for the requested time slot. // // - asap=true → ignore ws.Start, begin searching from time.Now() // - preemption → always return Available=true but populate Warnings with // conflicts and NextSlot with the nearest conflict-free alternative func (ws *WorkflowSchedule) Check(wfID string, asap bool, preemption bool, request *tools.APIRequest) (*CheckResult, error) { // 1. Load workflow obj, code, err := workflow.NewAccessor(request).LoadOne(wfID) if code != 200 || err != nil { msg := "could not load workflow " + wfID if err != nil { msg += ": " + err.Error() } return nil, errors.New(msg) } wf := obj.(*workflow.Workflow) // 2. Resolve start start := ws.Start if asap || start.IsZero() { start = time.Now() } // 3. Resolve end – use explicit end/duration or estimate via Planify end := ws.End if end == nil { if ws.DurationS > 0 { e := start.Add(time.Duration(ws.DurationS * float64(time.Second))) end = &e } else { _, longest, _, _, planErr := wf.Planify( start, nil, ws.SelectedInstances, ws.SelectedPartnerships, ws.SelectedBuyings, ws.SelectedStrategies, int(ws.BookingMode), request, ) if planErr == nil && longest > 0 { e := start.Add(time.Duration(longest) * time.Second) end = &e } } } // 4. Extract booking-relevant (storage + compute) resources from the graph, // resolving the selected instance for each resource. checkables := collectBookingResources(wf, ws.SelectedInstances) // 5. Check every resource against its peer's planner unavailable, warnings := checkResourceAvailability(checkables, start, end) result := &CheckResult{ Start: start, End: end, Warnings: warnings, } // 6. Preemption mode: mark as schedulable regardless of conflicts, but // surface warnings and the nearest conflict-free alternative. if preemption { result.Available = true result.Preemptible = true if len(unavailable) > 0 { result.NextSlot = findNextSlot(checkables, start, end, checkWindowHours) } return result, nil } // 7. All resources are free if len(unavailable) == 0 { result.Available = true return result, nil } // 8. Slot unavailable – locate the nearest free slot within the window result.Available = false result.NextSlot = findNextSlot(checkables, start, end, checkWindowHours) return result, nil } // collectBookingResources returns unique storage and compute resources from the // workflow graph. For each resource the selected instance ID is resolved from // selectedInstances (the scheduler's SelectedInstances ConfigItem) so the planner // check targets the exact instance chosen by the user. func collectBookingResources(wf *workflow.Workflow, selectedInstances workflow.ConfigItem) []bookingResource { if wf.Graph == nil { return nil } seen := map[string]bool{} var result []bookingResource resolveInstanceID := func(res interface { GetID() string GetCreatorID() string }) string { idx := selectedInstances.Get(res.GetID()) switch r := res.(type) { case *resources.StorageResource: if inst := r.GetSelectedInstance(idx); inst != nil { return inst.GetID() } case *resources.ComputeResource: if inst := r.GetSelectedInstance(idx); inst != nil { return inst.GetID() } } return "" } for _, item := range wf.GetGraphItems(wf.Graph.IsStorage) { i := item _, res := i.GetResource() if res == nil { continue } id, peerID := res.GetID(), res.GetCreatorID() if peerID == "" || seen[id] { continue } seen[id] = true result = append(result, bookingResource{ id: id, peerID: peerID, instanceID: resolveInstanceID(res), }) } for _, item := range wf.GetGraphItems(wf.Graph.IsCompute) { i := item _, res := i.GetResource() if res == nil { continue } id, peerID := res.GetID(), res.GetCreatorID() if peerID == "" || seen[id] { continue } seen[id] = true result = append(result, bookingResource{ id: id, peerID: peerID, instanceID: resolveInstanceID(res), }) } return result } // checkResourceAvailability returns the IDs of unavailable resources and // human-readable warning messages. func checkResourceAvailability(res []bookingResource, start time.Time, end *time.Time) (unavailable []string, warnings []string) { for _, r := range res { plannerMu.RLock() p := PlannerCache[r.peerID] plannerMu.RUnlock() if p == nil { warnings = append(warnings, fmt.Sprintf( "peer %s planner not in cache for resource %s – assuming available", r.peerID, r.id)) continue } if !checkInstance(p, r.id, r.instanceID, start, end) { unavailable = append(unavailable, r.id) warnings = append(warnings, fmt.Sprintf( "resource %s is not available in [%s – %s]", r.id, start.Format(time.RFC3339), formatOptTime(end))) } } return } // checkInstance checks availability for the specific instance resolved by the // scheduler. When instanceID is empty (no instance selected / none resolvable), // it falls back to checking all instances known in the planner and returns true // if any one has remaining capacity. Returns true when no capacity is recorded. func checkInstance(p *planner.Planner, resourceID string, instanceID string, start time.Time, end *time.Time) bool { if instanceID != "" { return p.Check(resourceID, instanceID, nil, start, end) } // Fallback: accept if any known instance has free capacity caps, ok := p.Capacities[resourceID] if !ok || len(caps) == 0 { return true // no recorded usage → assume free } for id := range caps { if p.Check(resourceID, id, nil, start, end) { return true } } return false } // findNextSlot scans forward from 'from' in checkStepMin increments for up to // windowH hours and returns the first candidate start time at which all // resources are simultaneously free. func findNextSlot(resources []bookingResource, from time.Time, originalEnd *time.Time, windowH int) *time.Time { duration := time.Hour if originalEnd != nil { if d := originalEnd.Sub(from); d > 0 { duration = d } } step := time.Duration(checkStepMin) * time.Minute limit := from.Add(time.Duration(windowH) * time.Hour) for t := from.Add(step); t.Before(limit); t = t.Add(step) { e := t.Add(duration) if unavail, _ := checkResourceAvailability(resources, t, &e); len(unavail) == 0 { return &t } } return nil } func formatOptTime(t *time.Time) string { if t == nil { return "open" } return t.Format(time.RFC3339) } // GetWorkflowPeerIDs loads the workflow and returns the deduplicated list of // creator peer IDs for all its storage and compute resources. // These are the peers whose planners must be watched by a check stream. func GetWorkflowPeerIDs(wfID string, request *tools.APIRequest) ([]string, error) { obj, code, err := workflow.NewAccessor(request).LoadOne(wfID) if code != 200 || err != nil { msg := "could not load workflow " + wfID if err != nil { msg += ": " + err.Error() } return nil, errors.New(msg) } wf := obj.(*workflow.Workflow) if wf.Graph == nil { return nil, nil } seen := map[string]bool{} var peerIDs []string for _, item := range wf.GetGraphItems(wf.Graph.IsStorage) { i := item _, res := i.GetResource() if res == nil { continue } if id := res.GetCreatorID(); id != "" && !seen[id] { seen[id] = true peerIDs = append(peerIDs, id) } } for _, item := range wf.GetGraphItems(wf.Graph.IsCompute) { i := item _, res := i.GetResource() if res == nil { continue } if id := res.GetCreatorID(); id != "" && !seen[id] { seen[id] = true peerIDs = append(peerIDs, id) } } realPeersID := []string{} access := oclib.NewRequestAdmin(oclib.LibDataEnum(tools.PEER), nil) for _, id := range peerIDs { if data := access.LoadOne(id); data.Data != nil { realPeersID = append(realPeersID, data.ToPeer().PeerID) } } return realPeersID, nil }