Decentralized
This commit is contained in:
@@ -1,18 +1,21 @@
|
||||
package infrastructure
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
"cloud.o-forge.io/core/oc-lib/models/bill"
|
||||
"cloud.o-forge.io/core/oc-lib/models/booking"
|
||||
"cloud.o-forge.io/core/oc-lib/models/booking/planner"
|
||||
"cloud.o-forge.io/core/oc-lib/models/common/enum"
|
||||
"cloud.o-forge.io/core/oc-lib/models/common/pricing"
|
||||
"cloud.o-forge.io/core/oc-lib/models/order"
|
||||
"cloud.o-forge.io/core/oc-lib/models/peer"
|
||||
"cloud.o-forge.io/core/oc-lib/models/resources"
|
||||
"cloud.o-forge.io/core/oc-lib/models/resources/purchase_resource"
|
||||
"cloud.o-forge.io/core/oc-lib/models/utils"
|
||||
"cloud.o-forge.io/core/oc-lib/models/workflow"
|
||||
@@ -112,20 +115,6 @@ func (ws *WorkflowSchedule) GetBuyAndBook(wfID string, request *tools.APIRequest
|
||||
purchased = append(purchased, exec.Buy(ws.SelectedBillingStrategy, ws.UUID, wfID, priceds)...)
|
||||
bookings = append(bookings, exec.Book(ws.UUID, wfID, priceds)...)
|
||||
}
|
||||
|
||||
errCh := make(chan error, len(bookings))
|
||||
var m sync.Mutex
|
||||
|
||||
for _, b := range bookings {
|
||||
go getBooking(b, request, errCh, &m)
|
||||
}
|
||||
|
||||
for i := 0; i < len(bookings); i++ {
|
||||
if err := <-errCh; err != nil {
|
||||
return false, wf, execs, purchased, bookings, err
|
||||
}
|
||||
}
|
||||
|
||||
return true, wf, execs, purchased, bookings, nil
|
||||
}
|
||||
|
||||
@@ -150,41 +139,6 @@ func (ws *WorkflowSchedule) GenerateOrder(purchases []*purchase_resource.Purchas
|
||||
}
|
||||
}
|
||||
|
||||
func getBooking(b *booking.Booking, request *tools.APIRequest, errCh chan error, m *sync.Mutex) {
|
||||
m.Lock()
|
||||
c, err := getCallerCopy(request, errCh)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return
|
||||
}
|
||||
m.Unlock()
|
||||
|
||||
meth := c.URLS[tools.BOOKING][tools.GET]
|
||||
meth = strings.ReplaceAll(meth, ":id", b.ResourceID)
|
||||
meth = strings.ReplaceAll(meth, ":start_date", b.ExpectedStartDate.Format("2006-01-02T15:04:05"))
|
||||
meth = strings.ReplaceAll(meth, ":end_date", b.ExpectedEndDate.Format("2006-01-02T15:04:05"))
|
||||
c.URLS[tools.BOOKING][tools.GET] = meth
|
||||
_, err = (&peer.Peer{}).LaunchPeerExecution(b.DestPeerID, b.ResourceID, tools.BOOKING, tools.GET, nil, &c)
|
||||
|
||||
if err != nil {
|
||||
errCh <- fmt.Errorf("%s", "error on "+b.DestPeerID+err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
errCh <- nil
|
||||
}
|
||||
|
||||
func getCallerCopy(request *tools.APIRequest, errCh chan error) (tools.HTTPCaller, error) {
|
||||
var c tools.HTTPCaller
|
||||
err := request.Caller.DeepCopy(c)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return tools.HTTPCaller{}, nil
|
||||
}
|
||||
c.URLS = request.Caller.URLS
|
||||
return c, err
|
||||
}
|
||||
|
||||
func (ws *WorkflowSchedule) Schedules(wfID string, request *tools.APIRequest) (*WorkflowSchedule, *workflow.Workflow, []*workflow_execution.WorkflowExecution, error) {
|
||||
if request == nil {
|
||||
return ws, nil, []*workflow_execution.WorkflowExecution{}, errors.New("no request found")
|
||||
@@ -204,27 +158,28 @@ func (ws *WorkflowSchedule) Schedules(wfID string, request *tools.APIRequest) (*
|
||||
}
|
||||
ws.Workflow = wf
|
||||
|
||||
var errCh = make(chan error, len(bookings))
|
||||
var m sync.Mutex
|
||||
// Resolve our own peer MongoDB-ID once; used to decide local vs NATS routing.
|
||||
selfID, _ := oclib.GetMySelf()
|
||||
|
||||
for _, purchase := range purchases { // TODO on Decentralize Stream.
|
||||
go ws.CallDatacenter(purchase, purchase.DestPeerID, tools.PURCHASE_RESOURCE, request, errCh, &m)
|
||||
errCh := make(chan error, len(purchases))
|
||||
for _, purchase := range purchases {
|
||||
purchase.IsDraft = true
|
||||
go propagateResource(purchase, purchase.DestPeerID, tools.PURCHASE_RESOURCE, selfID, request, errCh)
|
||||
}
|
||||
for i := 0; i < len(purchases); i++ {
|
||||
if err := <-errCh; err != nil {
|
||||
return ws, wf, executions, errors.New("could not launch the peer execution : " + fmt.Sprintf("%v", err))
|
||||
return ws, wf, executions, errors.New("could not propagate purchase: " + fmt.Sprintf("%v", err))
|
||||
}
|
||||
}
|
||||
|
||||
errCh = make(chan error, len(bookings))
|
||||
|
||||
for _, booking := range bookings { // TODO on Decentralize Stream.
|
||||
go ws.CallDatacenter(booking, booking.DestPeerID, tools.BOOKING, request, errCh, &m)
|
||||
for _, bk := range bookings {
|
||||
bk.IsDraft = true
|
||||
go propagateResource(bk, bk.DestPeerID, tools.BOOKING, selfID, request, errCh)
|
||||
}
|
||||
|
||||
for i := 0; i < len(bookings); i++ {
|
||||
if err := <-errCh; err != nil {
|
||||
return ws, wf, executions, errors.New("could not launch the peer execution : " + fmt.Sprintf("%v", err))
|
||||
return ws, wf, executions, errors.New("could not propagate booking: " + fmt.Sprintf("%v", err))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -240,6 +195,7 @@ func (ws *WorkflowSchedule) Schedules(wfID string, request *tools.APIRequest) (*
|
||||
}
|
||||
exec.StoreDraftDefault()
|
||||
utils.GenericStoreOne(exec, workflow_execution.NewAccessor(request))
|
||||
go EmitConsidersExecution(exec, wf)
|
||||
}
|
||||
fmt.Println("Schedules")
|
||||
|
||||
@@ -248,21 +204,40 @@ func (ws *WorkflowSchedule) Schedules(wfID string, request *tools.APIRequest) (*
|
||||
return ws, wf, executions, nil
|
||||
}
|
||||
|
||||
func (ws *WorkflowSchedule) CallDatacenter(purchase utils.DBObject, destPeerID string, dt tools.DataType, request *tools.APIRequest, errCh chan error, m *sync.Mutex) {
|
||||
m.Lock()
|
||||
c, err := getCallerCopy(request, errCh)
|
||||
// propagateResource routes a purchase or booking to its destination:
|
||||
// - If destPeerID matches our own peer (selfMongoID), the object is stored
|
||||
// directly in the local DB as draft and the local planner is refreshed.
|
||||
// - Otherwise a NATS CREATE_RESOURCE message is emitted so the destination
|
||||
// peer can process it asynchronously.
|
||||
//
|
||||
// The caller is responsible for setting obj.IsDraft = true before calling.
|
||||
func propagateResource(obj utils.DBObject, destPeerID string, dt tools.DataType, selfMongoID *peer.Peer, request *tools.APIRequest, errCh chan error) {
|
||||
if selfMongoID == nil {
|
||||
return
|
||||
} // booking or purchase
|
||||
if destPeerID == selfMongoID.GetID() {
|
||||
if _, _, err := obj.GetAccessor(request).StoreOne(obj); err != nil {
|
||||
errCh <- fmt.Errorf("could not store %s locally: %w", dt.String(), err)
|
||||
return
|
||||
}
|
||||
// The planner tracks booking time-slots only; purchases do not affect it.
|
||||
if dt == tools.BOOKING {
|
||||
go refreshSelfPlanner(selfMongoID.PeerID, request)
|
||||
}
|
||||
errCh <- nil
|
||||
return
|
||||
}
|
||||
payload, err := json.Marshal(obj)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
errCh <- fmt.Errorf("could not serialize %s: %w", dt.String(), err)
|
||||
return
|
||||
}
|
||||
m.Unlock()
|
||||
if res, err := (&peer.Peer{}).LaunchPeerExecution(destPeerID, "", dt, tools.POST, purchase.Serialize(purchase), &c); err != nil {
|
||||
errCh <- err
|
||||
return
|
||||
} else {
|
||||
data := res["data"].(map[string]interface{})
|
||||
purchase.SetID(fmt.Sprintf("%v", data["id"]))
|
||||
}
|
||||
tools.NewNATSCaller().SetNATSPub(tools.CREATE_RESOURCE, tools.NATSResponse{
|
||||
FromApp: "oc-scheduler",
|
||||
Datatype: dt,
|
||||
Method: int(tools.CREATE_RESOURCE),
|
||||
Payload: payload,
|
||||
})
|
||||
errCh <- nil
|
||||
}
|
||||
|
||||
@@ -360,3 +335,303 @@ type Schedule struct {
|
||||
* TODO : LARGEST GRAIN PLANIFYING THE WORKFLOW WHEN OPTION IS SET
|
||||
* SET PROTECTION BORDER TIME
|
||||
*/
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Slot availability check
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const (
|
||||
checkWindowHours = 5 // how far ahead to scan for a free slot (hours)
|
||||
checkStepMin = 15 // time increment per scan step (minutes)
|
||||
)
|
||||
|
||||
// CheckResult holds the outcome of a slot availability check.
|
||||
type CheckResult struct {
|
||||
Available bool `json:"available"`
|
||||
Start time.Time `json:"start"`
|
||||
End *time.Time `json:"end,omitempty"`
|
||||
// NextSlot is the nearest free slot found within checkWindowHours when
|
||||
// the requested slot is unavailable, or the preferred (conflict-free) slot
|
||||
// when running in preemption mode.
|
||||
NextSlot *time.Time `json:"next_slot,omitempty"`
|
||||
Warnings []string `json:"warnings,omitempty"`
|
||||
// Preemptible is true when the check was run in preemption mode.
|
||||
Preemptible bool `json:"preemptible,omitempty"`
|
||||
}
|
||||
|
||||
// bookingResource is the minimum info needed to verify a resource against the
|
||||
// planner cache.
|
||||
type bookingResource struct {
|
||||
id string
|
||||
peerID string
|
||||
instanceID string // resolved from WorkflowSchedule.SelectedInstances
|
||||
}
|
||||
|
||||
// Check verifies that all booking-relevant resources (storage and compute) of
|
||||
// the given workflow have capacity for the requested time slot.
|
||||
//
|
||||
// - asap=true → ignore ws.Start, begin searching from time.Now()
|
||||
// - preemption → always return Available=true but populate Warnings with
|
||||
// conflicts and NextSlot with the nearest conflict-free alternative
|
||||
func (ws *WorkflowSchedule) Check(wfID string, asap bool, preemption bool, request *tools.APIRequest) (*CheckResult, error) {
|
||||
// 1. Load workflow
|
||||
obj, code, err := workflow.NewAccessor(request).LoadOne(wfID)
|
||||
if code != 200 || err != nil {
|
||||
msg := "could not load workflow " + wfID
|
||||
if err != nil {
|
||||
msg += ": " + err.Error()
|
||||
}
|
||||
return nil, errors.New(msg)
|
||||
}
|
||||
wf := obj.(*workflow.Workflow)
|
||||
|
||||
// 2. Resolve start
|
||||
start := ws.Start
|
||||
if asap || start.IsZero() {
|
||||
start = time.Now()
|
||||
}
|
||||
|
||||
// 3. Resolve end – use explicit end/duration or estimate via Planify
|
||||
end := ws.End
|
||||
if end == nil {
|
||||
if ws.DurationS > 0 {
|
||||
e := start.Add(time.Duration(ws.DurationS * float64(time.Second)))
|
||||
end = &e
|
||||
} else {
|
||||
_, longest, _, _, planErr := wf.Planify(
|
||||
start, nil,
|
||||
ws.SelectedInstances, ws.SelectedPartnerships,
|
||||
ws.SelectedBuyings, ws.SelectedStrategies,
|
||||
int(ws.BookingMode), request,
|
||||
)
|
||||
if planErr == nil && longest > 0 {
|
||||
e := start.Add(time.Duration(longest) * time.Second)
|
||||
end = &e
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Extract booking-relevant (storage + compute) resources from the graph,
|
||||
// resolving the selected instance for each resource.
|
||||
checkables := collectBookingResources(wf, ws.SelectedInstances)
|
||||
|
||||
// 5. Check every resource against its peer's planner
|
||||
unavailable, warnings := checkResourceAvailability(checkables, start, end)
|
||||
|
||||
result := &CheckResult{
|
||||
Start: start,
|
||||
End: end,
|
||||
Warnings: warnings,
|
||||
}
|
||||
|
||||
// 6. Preemption mode: mark as schedulable regardless of conflicts, but
|
||||
// surface warnings and the nearest conflict-free alternative.
|
||||
if preemption {
|
||||
result.Available = true
|
||||
result.Preemptible = true
|
||||
if len(unavailable) > 0 {
|
||||
result.NextSlot = findNextSlot(checkables, start, end, checkWindowHours)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// 7. All resources are free
|
||||
if len(unavailable) == 0 {
|
||||
result.Available = true
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// 8. Slot unavailable – locate the nearest free slot within the window
|
||||
result.Available = false
|
||||
result.NextSlot = findNextSlot(checkables, start, end, checkWindowHours)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// collectBookingResources returns unique storage and compute resources from the
|
||||
// workflow graph. For each resource the selected instance ID is resolved from
|
||||
// selectedInstances (the scheduler's SelectedInstances ConfigItem) so the planner
|
||||
// check targets the exact instance chosen by the user.
|
||||
func collectBookingResources(wf *workflow.Workflow, selectedInstances workflow.ConfigItem) []bookingResource {
|
||||
if wf.Graph == nil {
|
||||
return nil
|
||||
}
|
||||
seen := map[string]bool{}
|
||||
var result []bookingResource
|
||||
|
||||
resolveInstanceID := func(res interface {
|
||||
GetID() string
|
||||
GetCreatorID() string
|
||||
}) string {
|
||||
idx := selectedInstances.Get(res.GetID())
|
||||
switch r := res.(type) {
|
||||
case *resources.StorageResource:
|
||||
if inst := r.GetSelectedInstance(idx); inst != nil {
|
||||
return inst.GetID()
|
||||
}
|
||||
case *resources.ComputeResource:
|
||||
if inst := r.GetSelectedInstance(idx); inst != nil {
|
||||
return inst.GetID()
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
for _, item := range wf.GetGraphItems(wf.Graph.IsStorage) {
|
||||
i := item
|
||||
_, res := i.GetResource()
|
||||
if res == nil {
|
||||
continue
|
||||
}
|
||||
id, peerID := res.GetID(), res.GetCreatorID()
|
||||
if peerID == "" || seen[id] {
|
||||
continue
|
||||
}
|
||||
seen[id] = true
|
||||
result = append(result, bookingResource{
|
||||
id: id,
|
||||
peerID: peerID,
|
||||
instanceID: resolveInstanceID(res),
|
||||
})
|
||||
}
|
||||
|
||||
for _, item := range wf.GetGraphItems(wf.Graph.IsCompute) {
|
||||
i := item
|
||||
_, res := i.GetResource()
|
||||
if res == nil {
|
||||
continue
|
||||
}
|
||||
id, peerID := res.GetID(), res.GetCreatorID()
|
||||
if peerID == "" || seen[id] {
|
||||
continue
|
||||
}
|
||||
seen[id] = true
|
||||
result = append(result, bookingResource{
|
||||
id: id,
|
||||
peerID: peerID,
|
||||
instanceID: resolveInstanceID(res),
|
||||
})
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// checkResourceAvailability returns the IDs of unavailable resources and
|
||||
// human-readable warning messages.
|
||||
func checkResourceAvailability(res []bookingResource, start time.Time, end *time.Time) (unavailable []string, warnings []string) {
|
||||
for _, r := range res {
|
||||
plannerMu.RLock()
|
||||
p := PlannerCache[r.peerID]
|
||||
plannerMu.RUnlock()
|
||||
if p == nil {
|
||||
warnings = append(warnings, fmt.Sprintf(
|
||||
"peer %s planner not in cache for resource %s – assuming available", r.peerID, r.id))
|
||||
continue
|
||||
}
|
||||
if !checkInstance(p, r.id, r.instanceID, start, end) {
|
||||
unavailable = append(unavailable, r.id)
|
||||
warnings = append(warnings, fmt.Sprintf(
|
||||
"resource %s is not available in [%s – %s]",
|
||||
r.id, start.Format(time.RFC3339), formatOptTime(end)))
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// checkInstance checks availability for the specific instance resolved by the
|
||||
// scheduler. When instanceID is empty (no instance selected / none resolvable),
|
||||
// it falls back to checking all instances known in the planner and returns true
|
||||
// if any one has remaining capacity. Returns true when no capacity is recorded.
|
||||
func checkInstance(p *planner.Planner, resourceID string, instanceID string, start time.Time, end *time.Time) bool {
|
||||
if instanceID != "" {
|
||||
return p.Check(resourceID, instanceID, nil, start, end)
|
||||
}
|
||||
// Fallback: accept if any known instance has free capacity
|
||||
caps, ok := p.Capacities[resourceID]
|
||||
if !ok || len(caps) == 0 {
|
||||
return true // no recorded usage → assume free
|
||||
}
|
||||
for id := range caps {
|
||||
if p.Check(resourceID, id, nil, start, end) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// findNextSlot scans forward from 'from' in checkStepMin increments for up to
|
||||
// windowH hours and returns the first candidate start time at which all
|
||||
// resources are simultaneously free.
|
||||
func findNextSlot(resources []bookingResource, from time.Time, originalEnd *time.Time, windowH int) *time.Time {
|
||||
duration := time.Hour
|
||||
if originalEnd != nil {
|
||||
if d := originalEnd.Sub(from); d > 0 {
|
||||
duration = d
|
||||
}
|
||||
}
|
||||
step := time.Duration(checkStepMin) * time.Minute
|
||||
limit := from.Add(time.Duration(windowH) * time.Hour)
|
||||
for t := from.Add(step); t.Before(limit); t = t.Add(step) {
|
||||
e := t.Add(duration)
|
||||
if unavail, _ := checkResourceAvailability(resources, t, &e); len(unavail) == 0 {
|
||||
return &t
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func formatOptTime(t *time.Time) string {
|
||||
if t == nil {
|
||||
return "open"
|
||||
}
|
||||
return t.Format(time.RFC3339)
|
||||
}
|
||||
|
||||
// GetWorkflowPeerIDs loads the workflow and returns the deduplicated list of
|
||||
// creator peer IDs for all its storage and compute resources.
|
||||
// These are the peers whose planners must be watched by a check stream.
|
||||
func GetWorkflowPeerIDs(wfID string, request *tools.APIRequest) ([]string, error) {
|
||||
obj, code, err := workflow.NewAccessor(request).LoadOne(wfID)
|
||||
if code != 200 || err != nil {
|
||||
msg := "could not load workflow " + wfID
|
||||
if err != nil {
|
||||
msg += ": " + err.Error()
|
||||
}
|
||||
return nil, errors.New(msg)
|
||||
}
|
||||
wf := obj.(*workflow.Workflow)
|
||||
if wf.Graph == nil {
|
||||
return nil, nil
|
||||
}
|
||||
seen := map[string]bool{}
|
||||
var peerIDs []string
|
||||
for _, item := range wf.GetGraphItems(wf.Graph.IsStorage) {
|
||||
i := item
|
||||
_, res := i.GetResource()
|
||||
if res == nil {
|
||||
continue
|
||||
}
|
||||
if id := res.GetCreatorID(); id != "" && !seen[id] {
|
||||
seen[id] = true
|
||||
peerIDs = append(peerIDs, id)
|
||||
}
|
||||
}
|
||||
for _, item := range wf.GetGraphItems(wf.Graph.IsCompute) {
|
||||
i := item
|
||||
_, res := i.GetResource()
|
||||
if res == nil {
|
||||
continue
|
||||
}
|
||||
if id := res.GetCreatorID(); id != "" && !seen[id] {
|
||||
seen[id] = true
|
||||
peerIDs = append(peerIDs, id)
|
||||
}
|
||||
}
|
||||
realPeersID := []string{}
|
||||
access := oclib.NewRequestAdmin(oclib.LibDataEnum(tools.PEER), nil)
|
||||
for _, id := range peerIDs {
|
||||
if data := access.LoadOne(id); data.Data != nil {
|
||||
realPeersID = append(realPeersID, data.ToPeer().PeerID)
|
||||
}
|
||||
}
|
||||
return realPeersID, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user