Decentralized

2026-02-23 18:10:47 +01:00
parent 2ccbfe93ed
commit c8b8955c4b
7 changed files with 1311 additions and 81 deletions
--- a/infrastructure/scheduler.go
+++ b/infrastructure/scheduler.go
@@ -1,18 +1,21 @@
 package infrastructure

 import (
+	"encoding/json"
 	"errors"
 	"fmt"
 	"strings"
-	"sync"
 	"time"

+	oclib "cloud.o-forge.io/core/oc-lib"
 	"cloud.o-forge.io/core/oc-lib/models/bill"
 	"cloud.o-forge.io/core/oc-lib/models/booking"
+	"cloud.o-forge.io/core/oc-lib/models/booking/planner"
 	"cloud.o-forge.io/core/oc-lib/models/common/enum"
 	"cloud.o-forge.io/core/oc-lib/models/common/pricing"
 	"cloud.o-forge.io/core/oc-lib/models/order"
 	"cloud.o-forge.io/core/oc-lib/models/peer"
+	"cloud.o-forge.io/core/oc-lib/models/resources"
 	"cloud.o-forge.io/core/oc-lib/models/resources/purchase_resource"
 	"cloud.o-forge.io/core/oc-lib/models/utils"
 	"cloud.o-forge.io/core/oc-lib/models/workflow"
@@ -112,20 +115,6 @@ func (ws *WorkflowSchedule) GetBuyAndBook(wfID string, request *tools.APIRequest
 		purchased = append(purchased, exec.Buy(ws.SelectedBillingStrategy, ws.UUID, wfID, priceds)...)
 		bookings = append(bookings, exec.Book(ws.UUID, wfID, priceds)...)
 	}
-
-	errCh := make(chan error, len(bookings))
-	var m sync.Mutex
-
-	for _, b := range bookings {
-		go getBooking(b, request, errCh, &m)
-	}
-
-	for i := 0; i < len(bookings); i++ {
-		if err := <-errCh; err != nil {
-			return false, wf, execs, purchased, bookings, err
-		}
-	}
-
 	return true, wf, execs, purchased, bookings, nil
 }

@@ -150,41 +139,6 @@ func (ws *WorkflowSchedule) GenerateOrder(purchases []*purchase_resource.Purchas
 	}
 }

-func getBooking(b *booking.Booking, request *tools.APIRequest, errCh chan error, m *sync.Mutex) {
-	m.Lock()
-	c, err := getCallerCopy(request, errCh)
-	if err != nil {
-		errCh <- err
-		return
-	}
-	m.Unlock()
-
-	meth := c.URLS[tools.BOOKING][tools.GET]
-	meth = strings.ReplaceAll(meth, ":id", b.ResourceID)
-	meth = strings.ReplaceAll(meth, ":start_date", b.ExpectedStartDate.Format("2006-01-02T15:04:05"))
-	meth = strings.ReplaceAll(meth, ":end_date", b.ExpectedEndDate.Format("2006-01-02T15:04:05"))
-	c.URLS[tools.BOOKING][tools.GET] = meth
-	_, err = (&peer.Peer{}).LaunchPeerExecution(b.DestPeerID, b.ResourceID, tools.BOOKING, tools.GET, nil, &c)
-
-	if err != nil {
-		errCh <- fmt.Errorf("%s", "error on "+b.DestPeerID+err.Error())
-		return
-	}
-
-	errCh <- nil
-}
-
-func getCallerCopy(request *tools.APIRequest, errCh chan error) (tools.HTTPCaller, error) {
-	var c tools.HTTPCaller
-	err := request.Caller.DeepCopy(c)
-	if err != nil {
-		errCh <- err
-		return tools.HTTPCaller{}, nil
-	}
-	c.URLS = request.Caller.URLS
-	return c, err
-}
-
 func (ws *WorkflowSchedule) Schedules(wfID string, request *tools.APIRequest) (*WorkflowSchedule, *workflow.Workflow, []*workflow_execution.WorkflowExecution, error) {
 	if request == nil {
 		return ws, nil, []*workflow_execution.WorkflowExecution{}, errors.New("no request found")
@@ -204,27 +158,28 @@ func (ws *WorkflowSchedule) Schedules(wfID string, request *tools.APIRequest) (*
 	}
 	ws.Workflow = wf

-	var errCh = make(chan error, len(bookings))
-	var m sync.Mutex
+	// Resolve our own peer MongoDB-ID once; used to decide local vs NATS routing.
+	selfID, _ := oclib.GetMySelf()

-	for _, purchase := range purchases { // TODO on Decentralize Stream.
-		go ws.CallDatacenter(purchase, purchase.DestPeerID, tools.PURCHASE_RESOURCE, request, errCh, &m)
+	errCh := make(chan error, len(purchases))
+	for _, purchase := range purchases {
+		purchase.IsDraft = true
+		go propagateResource(purchase, purchase.DestPeerID, tools.PURCHASE_RESOURCE, selfID, request, errCh)
 	}
 	for i := 0; i < len(purchases); i++ {
 		if err := <-errCh; err != nil {
-			return ws, wf, executions, errors.New("could not launch the peer execution : " + fmt.Sprintf("%v", err))
+			return ws, wf, executions, errors.New("could not propagate purchase: " + fmt.Sprintf("%v", err))
 		}
 	}

 	errCh = make(chan error, len(bookings))
-
-	for _, booking := range bookings { // TODO on Decentralize Stream.
-		go ws.CallDatacenter(booking, booking.DestPeerID, tools.BOOKING, request, errCh, &m)
+	for _, bk := range bookings {
+		bk.IsDraft = true
+		go propagateResource(bk, bk.DestPeerID, tools.BOOKING, selfID, request, errCh)
 	}
-
 	for i := 0; i < len(bookings); i++ {
 		if err := <-errCh; err != nil {
-			return ws, wf, executions, errors.New("could not launch the peer execution : " + fmt.Sprintf("%v", err))
+			return ws, wf, executions, errors.New("could not propagate booking: " + fmt.Sprintf("%v", err))
 		}
 	}

@@ -240,6 +195,7 @@ func (ws *WorkflowSchedule) Schedules(wfID string, request *tools.APIRequest) (*
 		}
 		exec.StoreDraftDefault()
 		utils.GenericStoreOne(exec, workflow_execution.NewAccessor(request))
+		go EmitConsidersExecution(exec, wf)
 	}
 	fmt.Println("Schedules")

@@ -248,21 +204,40 @@ func (ws *WorkflowSchedule) Schedules(wfID string, request *tools.APIRequest) (*
 	return ws, wf, executions, nil
 }

-func (ws *WorkflowSchedule) CallDatacenter(purchase utils.DBObject, destPeerID string, dt tools.DataType, request *tools.APIRequest, errCh chan error, m *sync.Mutex) {
-	m.Lock()
-	c, err := getCallerCopy(request, errCh)
+// propagateResource routes a purchase or booking to its destination:
+//   - If destPeerID matches our own peer (selfMongoID), the object is stored
+//     directly in the local DB as draft and the local planner is refreshed.
+//   - Otherwise a NATS CREATE_RESOURCE message is emitted so the destination
+//     peer can process it asynchronously.
+//
+// The caller is responsible for setting obj.IsDraft = true before calling.
+func propagateResource(obj utils.DBObject, destPeerID string, dt tools.DataType, selfMongoID *peer.Peer, request *tools.APIRequest, errCh chan error) {
+	if selfMongoID == nil {
+		return
+	} // booking or purchase
+	if destPeerID == selfMongoID.GetID() {
+		if _, _, err := obj.GetAccessor(request).StoreOne(obj); err != nil {
+			errCh <- fmt.Errorf("could not store %s locally: %w", dt.String(), err)
+			return
+		}
+		// The planner tracks booking time-slots only; purchases do not affect it.
+		if dt == tools.BOOKING {
+			go refreshSelfPlanner(selfMongoID.PeerID, request)
+		}
+		errCh <- nil
+		return
+	}
+	payload, err := json.Marshal(obj)
 	if err != nil {
-		errCh <- err
+		errCh <- fmt.Errorf("could not serialize %s: %w", dt.String(), err)
 		return
 	}
-	m.Unlock()
-	if res, err := (&peer.Peer{}).LaunchPeerExecution(destPeerID, "", dt, tools.POST, purchase.Serialize(purchase), &c); err != nil {
-		errCh <- err
-		return
-	} else {
-		data := res["data"].(map[string]interface{})
-		purchase.SetID(fmt.Sprintf("%v", data["id"]))
-	}
+	tools.NewNATSCaller().SetNATSPub(tools.CREATE_RESOURCE, tools.NATSResponse{
+		FromApp:  "oc-scheduler",
+		Datatype: dt,
+		Method:   int(tools.CREATE_RESOURCE),
+		Payload:  payload,
+	})
 	errCh <- nil
 }

@@ -360,3 +335,303 @@ type Schedule struct {
 * TODO : LARGEST GRAIN PLANIFYING THE WORKFLOW WHEN OPTION IS SET
 * SET PROTECTION BORDER TIME
 */
+
+// ---------------------------------------------------------------------------
+// Slot availability check
+// ---------------------------------------------------------------------------
+
+const (
+	checkWindowHours = 5  // how far ahead to scan for a free slot (hours)
+	checkStepMin     = 15 // time increment per scan step (minutes)
+)
+
+// CheckResult holds the outcome of a slot availability check.
+type CheckResult struct {
+	Available bool       `json:"available"`
+	Start     time.Time  `json:"start"`
+	End       *time.Time `json:"end,omitempty"`
+	// NextSlot is the nearest free slot found within checkWindowHours when
+	// the requested slot is unavailable, or the preferred (conflict-free) slot
+	// when running in preemption mode.
+	NextSlot *time.Time `json:"next_slot,omitempty"`
+	Warnings []string   `json:"warnings,omitempty"`
+	// Preemptible is true when the check was run in preemption mode.
+	Preemptible bool `json:"preemptible,omitempty"`
+}
+
+// bookingResource is the minimum info needed to verify a resource against the
+// planner cache.
+type bookingResource struct {
+	id         string
+	peerID     string
+	instanceID string // resolved from WorkflowSchedule.SelectedInstances
+}
+
+// Check verifies that all booking-relevant resources (storage and compute) of
+// the given workflow have capacity for the requested time slot.
+//
+//   - asap=true  → ignore ws.Start, begin searching from time.Now()
+//   - preemption → always return Available=true but populate Warnings with
+//     conflicts and NextSlot with the nearest conflict-free alternative
+func (ws *WorkflowSchedule) Check(wfID string, asap bool, preemption bool, request *tools.APIRequest) (*CheckResult, error) {
+	// 1. Load workflow
+	obj, code, err := workflow.NewAccessor(request).LoadOne(wfID)
+	if code != 200 || err != nil {
+		msg := "could not load workflow " + wfID
+		if err != nil {
+			msg += ": " + err.Error()
+		}
+		return nil, errors.New(msg)
+	}
+	wf := obj.(*workflow.Workflow)
+
+	// 2. Resolve start
+	start := ws.Start
+	if asap || start.IsZero() {
+		start = time.Now()
+	}
+
+	// 3. Resolve end – use explicit end/duration or estimate via Planify
+	end := ws.End
+	if end == nil {
+		if ws.DurationS > 0 {
+			e := start.Add(time.Duration(ws.DurationS * float64(time.Second)))
+			end = &e
+		} else {
+			_, longest, _, _, planErr := wf.Planify(
+				start, nil,
+				ws.SelectedInstances, ws.SelectedPartnerships,
+				ws.SelectedBuyings, ws.SelectedStrategies,
+				int(ws.BookingMode), request,
+			)
+			if planErr == nil && longest > 0 {
+				e := start.Add(time.Duration(longest) * time.Second)
+				end = &e
+			}
+		}
+	}
+
+	// 4. Extract booking-relevant (storage + compute) resources from the graph,
+	//    resolving the selected instance for each resource.
+	checkables := collectBookingResources(wf, ws.SelectedInstances)
+
+	// 5. Check every resource against its peer's planner
+	unavailable, warnings := checkResourceAvailability(checkables, start, end)
+
+	result := &CheckResult{
+		Start:    start,
+		End:      end,
+		Warnings: warnings,
+	}
+
+	// 6. Preemption mode: mark as schedulable regardless of conflicts, but
+	//    surface warnings and the nearest conflict-free alternative.
+	if preemption {
+		result.Available = true
+		result.Preemptible = true
+		if len(unavailable) > 0 {
+			result.NextSlot = findNextSlot(checkables, start, end, checkWindowHours)
+		}
+		return result, nil
+	}
+
+	// 7. All resources are free
+	if len(unavailable) == 0 {
+		result.Available = true
+		return result, nil
+	}
+
+	// 8. Slot unavailable – locate the nearest free slot within the window
+	result.Available = false
+	result.NextSlot = findNextSlot(checkables, start, end, checkWindowHours)
+	return result, nil
+}
+
+// collectBookingResources returns unique storage and compute resources from the
+// workflow graph. For each resource the selected instance ID is resolved from
+// selectedInstances (the scheduler's SelectedInstances ConfigItem) so the planner
+// check targets the exact instance chosen by the user.
+func collectBookingResources(wf *workflow.Workflow, selectedInstances workflow.ConfigItem) []bookingResource {
+	if wf.Graph == nil {
+		return nil
+	}
+	seen := map[string]bool{}
+	var result []bookingResource
+
+	resolveInstanceID := func(res interface {
+		GetID() string
+		GetCreatorID() string
+	}) string {
+		idx := selectedInstances.Get(res.GetID())
+		switch r := res.(type) {
+		case *resources.StorageResource:
+			if inst := r.GetSelectedInstance(idx); inst != nil {
+				return inst.GetID()
+			}
+		case *resources.ComputeResource:
+			if inst := r.GetSelectedInstance(idx); inst != nil {
+				return inst.GetID()
+			}
+		}
+		return ""
+	}
+
+	for _, item := range wf.GetGraphItems(wf.Graph.IsStorage) {
+		i := item
+		_, res := i.GetResource()
+		if res == nil {
+			continue
+		}
+		id, peerID := res.GetID(), res.GetCreatorID()
+		if peerID == "" || seen[id] {
+			continue
+		}
+		seen[id] = true
+		result = append(result, bookingResource{
+			id:         id,
+			peerID:     peerID,
+			instanceID: resolveInstanceID(res),
+		})
+	}
+
+	for _, item := range wf.GetGraphItems(wf.Graph.IsCompute) {
+		i := item
+		_, res := i.GetResource()
+		if res == nil {
+			continue
+		}
+		id, peerID := res.GetID(), res.GetCreatorID()
+		if peerID == "" || seen[id] {
+			continue
+		}
+		seen[id] = true
+		result = append(result, bookingResource{
+			id:         id,
+			peerID:     peerID,
+			instanceID: resolveInstanceID(res),
+		})
+	}
+
+	return result
+}
+
+// checkResourceAvailability returns the IDs of unavailable resources and
+// human-readable warning messages.
+func checkResourceAvailability(res []bookingResource, start time.Time, end *time.Time) (unavailable []string, warnings []string) {
+	for _, r := range res {
+		plannerMu.RLock()
+		p := PlannerCache[r.peerID]
+		plannerMu.RUnlock()
+		if p == nil {
+			warnings = append(warnings, fmt.Sprintf(
+				"peer %s planner not in cache for resource %s – assuming available", r.peerID, r.id))
+			continue
+		}
+		if !checkInstance(p, r.id, r.instanceID, start, end) {
+			unavailable = append(unavailable, r.id)
+			warnings = append(warnings, fmt.Sprintf(
+				"resource %s is not available in [%s – %s]",
+				r.id, start.Format(time.RFC3339), formatOptTime(end)))
+		}
+	}
+	return
+}
+
+// checkInstance checks availability for the specific instance resolved by the
+// scheduler. When instanceID is empty (no instance selected / none resolvable),
+// it falls back to checking all instances known in the planner and returns true
+// if any one has remaining capacity. Returns true when no capacity is recorded.
+func checkInstance(p *planner.Planner, resourceID string, instanceID string, start time.Time, end *time.Time) bool {
+	if instanceID != "" {
+		return p.Check(resourceID, instanceID, nil, start, end)
+	}
+	// Fallback: accept if any known instance has free capacity
+	caps, ok := p.Capacities[resourceID]
+	if !ok || len(caps) == 0 {
+		return true // no recorded usage → assume free
+	}
+	for id := range caps {
+		if p.Check(resourceID, id, nil, start, end) {
+			return true
+		}
+	}
+	return false
+}
+
+// findNextSlot scans forward from 'from' in checkStepMin increments for up to
+// windowH hours and returns the first candidate start time at which all
+// resources are simultaneously free.
+func findNextSlot(resources []bookingResource, from time.Time, originalEnd *time.Time, windowH int) *time.Time {
+	duration := time.Hour
+	if originalEnd != nil {
+		if d := originalEnd.Sub(from); d > 0 {
+			duration = d
+		}
+	}
+	step := time.Duration(checkStepMin) * time.Minute
+	limit := from.Add(time.Duration(windowH) * time.Hour)
+	for t := from.Add(step); t.Before(limit); t = t.Add(step) {
+		e := t.Add(duration)
+		if unavail, _ := checkResourceAvailability(resources, t, &e); len(unavail) == 0 {
+			return &t
+		}
+	}
+	return nil
+}
+
+func formatOptTime(t *time.Time) string {
+	if t == nil {
+		return "open"
+	}
+	return t.Format(time.RFC3339)
+}
+
+// GetWorkflowPeerIDs loads the workflow and returns the deduplicated list of
+// creator peer IDs for all its storage and compute resources.
+// These are the peers whose planners must be watched by a check stream.
+func GetWorkflowPeerIDs(wfID string, request *tools.APIRequest) ([]string, error) {
+	obj, code, err := workflow.NewAccessor(request).LoadOne(wfID)
+	if code != 200 || err != nil {
+		msg := "could not load workflow " + wfID
+		if err != nil {
+			msg += ": " + err.Error()
+		}
+		return nil, errors.New(msg)
+	}
+	wf := obj.(*workflow.Workflow)
+	if wf.Graph == nil {
+		return nil, nil
+	}
+	seen := map[string]bool{}
+	var peerIDs []string
+	for _, item := range wf.GetGraphItems(wf.Graph.IsStorage) {
+		i := item
+		_, res := i.GetResource()
+		if res == nil {
+			continue
+		}
+		if id := res.GetCreatorID(); id != "" && !seen[id] {
+			seen[id] = true
+			peerIDs = append(peerIDs, id)
+		}
+	}
+	for _, item := range wf.GetGraphItems(wf.Graph.IsCompute) {
+		i := item
+		_, res := i.GetResource()
+		if res == nil {
+			continue
+		}
+		if id := res.GetCreatorID(); id != "" && !seen[id] {
+			seen[id] = true
+			peerIDs = append(peerIDs, id)
+		}
+	}
+	realPeersID := []string{}
+	access := oclib.NewRequestAdmin(oclib.LibDataEnum(tools.PEER), nil)
+	for _, id := range peerIDs {
+		if data := access.LoadOne(id); data.Data != nil {
+			realPeersID = append(realPeersID, data.ToPeer().PeerID)
+		}
+	}
+	return realPeersID, nil
+}