475 lines
15 KiB
Go
475 lines
15 KiB
Go
package scheduling_resources
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
oclib "cloud.o-forge.io/core/oc-lib"
|
|
"cloud.o-forge.io/core/oc-lib/models/booking"
|
|
"cloud.o-forge.io/core/oc-lib/models/common/enum"
|
|
"cloud.o-forge.io/core/oc-lib/models/peer"
|
|
"cloud.o-forge.io/core/oc-lib/models/resources/purchase_resource"
|
|
"cloud.o-forge.io/core/oc-lib/models/utils"
|
|
"cloud.o-forge.io/core/oc-lib/tools"
|
|
"oc-scheduler/infrastructure/planner"
|
|
)
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Service
|
|
// ---------------------------------------------------------------------------
|
|
|
|
// SchedulingResourcesService manages the lifecycle of Booking and PurchaseResource
|
|
// as SchedulerObjects. It caches the local peer identity so every operation can
|
|
// route correctly without calling oclib.GetMySelf() on each request.
|
|
type SchedulingResourcesService struct {
|
|
mu sync.RWMutex
|
|
selfPeer *peer.Peer
|
|
}
|
|
|
|
var singleton *SchedulingResourcesService
|
|
|
|
func init() {
|
|
singleton = &SchedulingResourcesService{}
|
|
}
|
|
|
|
// GetService returns the singleton SchedulingResourcesService.
|
|
func GetService() *SchedulingResourcesService {
|
|
return singleton
|
|
}
|
|
|
|
// Self returns the cached local peer, lazily resolving it on first call.
|
|
func (s *SchedulingResourcesService) Self() *peer.Peer {
|
|
s.mu.RLock()
|
|
p := s.selfPeer
|
|
s.mu.RUnlock()
|
|
if p != nil {
|
|
return p
|
|
}
|
|
p, _ = oclib.GetMySelf()
|
|
if p != nil {
|
|
s.mu.Lock()
|
|
s.selfPeer = p
|
|
s.mu.Unlock()
|
|
}
|
|
return p
|
|
}
|
|
|
|
// InvalidateSelf clears the cached self peer (e.g. after a peer re-registration).
|
|
func (s *SchedulingResourcesService) InvalidateSelf() {
|
|
s.mu.Lock()
|
|
s.selfPeer = nil
|
|
s.mu.Unlock()
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// RemoveResourcePayload
|
|
// ---------------------------------------------------------------------------
|
|
|
|
// RemoveResourcePayload is sent via NATS REMOVE_RESOURCE so the receiver can
|
|
// verify the delete order comes from the original scheduler session.
|
|
type RemoveResourcePayload struct {
|
|
ID string `json:"id"`
|
|
SchedulerPeerID string `json:"scheduler_peer_id"`
|
|
ExecutionsID string `json:"executions_id"`
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Propagation — creation
|
|
// ---------------------------------------------------------------------------
|
|
|
|
// PropagateCreate routes a new booking/purchase draft to its destination:
|
|
// - local peer → store in DB + refresh planner
|
|
// - remote peer → emit NATS PROPALGATION_EVENT/PB_CREATE
|
|
func (s *SchedulingResourcesService) PropagateCreate(
|
|
obj utils.DBObject,
|
|
destPeerID string,
|
|
dt tools.DataType,
|
|
request *tools.APIRequest,
|
|
errCh chan error,
|
|
) {
|
|
selfID := s.Self()
|
|
if selfID == nil {
|
|
errCh <- fmt.Errorf("PropagateCreate: local peer not available")
|
|
return
|
|
}
|
|
|
|
if destPeerID == selfID.GetID() {
|
|
stored := oclib.NewRequestAdmin(oclib.LibDataEnum(dt), nil).StoreOne(obj.Serialize(obj))
|
|
if stored.Err != "" || stored.Data == nil {
|
|
errCh <- fmt.Errorf("could not store %s locally: %s", dt.String(), stored.Err)
|
|
return
|
|
}
|
|
if dt == tools.BOOKING {
|
|
planner.GetPlannerService().RefreshSelf(selfID.PeerID, request)
|
|
}
|
|
errCh <- nil
|
|
return
|
|
}
|
|
|
|
m := obj.Serialize(obj)
|
|
if m["dest_peer_id"] != nil {
|
|
if data := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil).LoadOne(fmt.Sprintf("%v", m["dest_peer_id"])); data.Data != nil {
|
|
m["peer_id"] = data.Data.(*peer.Peer).PeerID
|
|
}
|
|
} else if m["peerless"] == true {
|
|
originRef := fmt.Sprintf("%v", m["origin_ref"])
|
|
if !isValidPeerlessRef(originRef) {
|
|
emitPeerBehaviorReport(request.PeerID, tools.BehaviorFraud,
|
|
"peerless booking with invalid or unrecognised Origin.Ref", originRef)
|
|
errCh <- fmt.Errorf("peerless booking rejected: invalid Origin.Ref %q", originRef)
|
|
return
|
|
}
|
|
stored := oclib.NewRequestAdmin(oclib.LibDataEnum(dt), nil).StoreOne(m)
|
|
if stored.Err != "" || stored.Data == nil {
|
|
errCh <- fmt.Errorf("could not store peerless %s locally: %s", dt.String(), stored.Err)
|
|
return
|
|
}
|
|
if dt == tools.BOOKING {
|
|
planner.GetPlannerService().RefreshSelf(selfID.PeerID, request)
|
|
}
|
|
errCh <- nil
|
|
return
|
|
} else {
|
|
fmt.Println("PropagateCreate: no dest_peer_id and not peerless, skipping")
|
|
errCh <- nil
|
|
return
|
|
}
|
|
|
|
payload, err := json.Marshal(m)
|
|
if err != nil {
|
|
errCh <- fmt.Errorf("could not serialize %s: %w", dt.String(), err)
|
|
return
|
|
}
|
|
b, err := json.Marshal(&tools.PropalgationMessage{
|
|
DataType: dt.EnumIndex(),
|
|
Action: tools.PB_CREATE,
|
|
Payload: payload,
|
|
})
|
|
if err == nil {
|
|
tools.NewNATSCaller().SetNATSPub(tools.PROPALGATION_EVENT, tools.NATSResponse{
|
|
FromApp: "oc-scheduler",
|
|
Datatype: dt,
|
|
Method: int(tools.PROPALGATION_EVENT),
|
|
Payload: b,
|
|
})
|
|
}
|
|
errCh <- nil
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Propagation — update / confirmation
|
|
// ---------------------------------------------------------------------------
|
|
|
|
// PropagateWrite routes a booking/purchase update to its destination.
|
|
// Returns true when the resource was confirmed locally (IsDraft=false on self peer)
|
|
// and the caller must trigger considers via execution.UpdateExecutionState.
|
|
func (s *SchedulingResourcesService) PropagateWrite(
|
|
obj utils.DBObject,
|
|
destPeerID string,
|
|
dt tools.DataType,
|
|
request *tools.APIRequest,
|
|
) bool {
|
|
selfID := s.Self()
|
|
if selfID == nil {
|
|
fmt.Println("PropagateWrite: local peer not available")
|
|
return false
|
|
}
|
|
|
|
if destPeerID == selfID.GetID() {
|
|
if _, _, err := utils.GenericRawUpdateOne(obj, obj.GetID(), obj.GetAccessor(request)); err != nil {
|
|
fmt.Printf("PropagateWrite: local update failed for %s %s: %v\n", dt, obj.GetID(), err)
|
|
return false
|
|
}
|
|
if dt == tools.BOOKING {
|
|
planner.GetPlannerService().RefreshSelf(selfID.PeerID, request)
|
|
}
|
|
return !obj.IsDrafted()
|
|
}
|
|
|
|
payload, err := json.Marshal(obj)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
tools.NewNATSCaller().SetNATSPub(tools.CREATE_RESOURCE, tools.NATSResponse{
|
|
FromApp: "oc-scheduler",
|
|
Datatype: dt,
|
|
Method: int(tools.CREATE_RESOURCE),
|
|
Payload: payload,
|
|
})
|
|
return false
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Deletion
|
|
// ---------------------------------------------------------------------------
|
|
|
|
// Delete removes a booking/purchase from its destination peer (local or NATS).
|
|
func (s *SchedulingResourcesService) Delete(dt tools.DataType, bk SchedulerObject, request *tools.APIRequest) {
|
|
selfID := s.Self()
|
|
if selfID == nil {
|
|
fmt.Println("Delete: local peer not available")
|
|
return
|
|
}
|
|
|
|
if bk.GetDestPeer() == selfID.GetID() {
|
|
data := oclib.NewRequestAdmin(oclib.LibDataEnum(dt), nil).DeleteOne(bk.GetID())
|
|
fmt.Println("Delete scheduling resource", bk.GetID(), data.Err)
|
|
if dt == tools.BOOKING {
|
|
planner.GetPlannerService().RefreshSelf(selfID.PeerID, request)
|
|
}
|
|
return
|
|
}
|
|
EmitNATSRemove(bk.GetID(), bk.GetPeerSession(), bk.GetExecutionsId(), dt)
|
|
}
|
|
|
|
// EmitNATSRemove sends a REMOVE_RESOURCE NATS event with auth fields.
|
|
func EmitNATSRemove(id, schedulerPeerID, executionsID string, dt tools.DataType) {
|
|
payload, _ := json.Marshal(RemoveResourcePayload{
|
|
ID: id,
|
|
SchedulerPeerID: schedulerPeerID,
|
|
ExecutionsID: executionsID,
|
|
})
|
|
tools.NewNATSCaller().SetNATSPub(tools.REMOVE_RESOURCE, tools.NATSResponse{
|
|
FromApp: "oc-scheduler",
|
|
Datatype: dt,
|
|
Method: int(tools.REMOVE_RESOURCE),
|
|
Payload: payload,
|
|
})
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Confirmation
|
|
// ---------------------------------------------------------------------------
|
|
|
|
// Confirm sets IsDraft=false on a booking or purchase.
|
|
// For bookings, also advances State to SCHEDULED and refreshes the self planner.
|
|
func Confirm(id string, dt tools.DataType) {
|
|
adminReq := &tools.APIRequest{Admin: true}
|
|
switch dt {
|
|
case tools.BOOKING:
|
|
res, _, err := booking.NewAccessor(adminReq).LoadOne(id)
|
|
if err != nil || res == nil {
|
|
fmt.Printf("Confirm: could not load booking %s: %v\n", id, err)
|
|
return
|
|
}
|
|
bk := res.(*booking.Booking)
|
|
bk.IsDraft = false
|
|
bk.State = enum.SCHEDULED
|
|
if _, _, err := utils.GenericRawUpdateOne(bk, id, booking.NewAccessor(adminReq)); err != nil {
|
|
fmt.Printf("Confirm: could not confirm booking %s: %v\n", id, err)
|
|
return
|
|
}
|
|
if self := GetService().Self(); self != nil {
|
|
planner.GetPlannerService().RefreshSelf(self.PeerID, adminReq)
|
|
}
|
|
case tools.PURCHASE_RESOURCE:
|
|
res, _, err := purchase_resource.NewAccessor(adminReq).LoadOne(id)
|
|
if err != nil || res == nil {
|
|
fmt.Printf("Confirm: could not load purchase %s: %v\n", id, err)
|
|
return
|
|
}
|
|
pr := res.(*purchase_resource.PurchaseResource)
|
|
pr.IsDraft = false
|
|
if _, _, err := utils.GenericRawUpdateOne(pr, id, purchase_resource.NewAccessor(adminReq)); err != nil {
|
|
fmt.Printf("Confirm: could not confirm purchase %s: %v\n", id, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// DraftTimeout deletes a booking/purchase if it is still a draft after 10 minutes.
|
|
func DraftTimeout(id string, dt tools.DataType) {
|
|
adminReq := &tools.APIRequest{Admin: true}
|
|
var res utils.DBObject
|
|
var loadErr error
|
|
switch dt {
|
|
case tools.BOOKING:
|
|
res, _, loadErr = booking.NewAccessor(adminReq).LoadOne(id)
|
|
case tools.PURCHASE_RESOURCE:
|
|
res, _, loadErr = purchase_resource.NewAccessor(adminReq).LoadOne(id)
|
|
default:
|
|
return
|
|
}
|
|
if loadErr != nil || res == nil || !res.IsDrafted() {
|
|
return
|
|
}
|
|
switch dt {
|
|
case tools.BOOKING:
|
|
booking.NewAccessor(adminReq).DeleteOne(id)
|
|
case tools.PURCHASE_RESOURCE:
|
|
purchase_resource.NewAccessor(adminReq).DeleteOne(id)
|
|
}
|
|
fmt.Printf("DraftTimeout: %s %s deleted (still draft after 10 min)\n", dt.String(), id)
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// NATS handlers — incoming booking/purchase
|
|
// ---------------------------------------------------------------------------
|
|
|
|
// HandleCreateBooking processes an incoming booking from NATS.
|
|
// Returns true if the booking was confirmed (IsDraft→false) and considers must be triggered.
|
|
func (s *SchedulingResourcesService) HandleCreateBooking(bk *booking.Booking, adminReq *tools.APIRequest) bool {
|
|
self := s.Self()
|
|
if self == nil {
|
|
return false
|
|
}
|
|
|
|
if existing, _, loadErr := booking.NewAccessor(adminReq).LoadOne(bk.GetID()); loadErr == nil && existing != nil {
|
|
prev := existing.(*booking.Booking)
|
|
if prev.SchedulerPeerID != bk.SchedulerPeerID || prev.ExecutionsID != bk.ExecutionsID {
|
|
fmt.Println("HandleCreateBooking: auth mismatch, ignoring", bk.GetID())
|
|
return false
|
|
}
|
|
if !prev.IsDrafted() && bk.IsDraft {
|
|
return false
|
|
}
|
|
if !bk.IsDraft && !prev.ExpectedStartDate.IsZero() && prev.ExpectedStartDate.Before(time.Now().UTC()) {
|
|
fmt.Println("HandleCreateBooking: expired, deleting", bk.GetID())
|
|
booking.NewAccessor(adminReq).DeleteOne(bk.GetID())
|
|
return false
|
|
}
|
|
if _, _, err := utils.GenericRawUpdateOne(bk, bk.GetID(), booking.NewAccessor(adminReq)); err != nil {
|
|
fmt.Println("HandleCreateBooking: update failed:", err)
|
|
return false
|
|
}
|
|
planner.GetPlannerService().RefreshSelf(self.PeerID, adminReq)
|
|
return !bk.IsDraft
|
|
}
|
|
|
|
// New booking
|
|
if !bk.ExpectedStartDate.IsZero() && bk.ExpectedStartDate.Before(time.Now().UTC()) {
|
|
fmt.Println("HandleCreateBooking: start date in the past, discarding")
|
|
return false
|
|
}
|
|
if !planner.GetPlannerService().CheckResourceInstance(self.PeerID, bk.ResourceID, bk.InstanceID, bk.ExpectedStartDate, bk.ExpectedEndDate) {
|
|
fmt.Println("HandleCreateBooking: conflicts with local planner, discarding")
|
|
return false
|
|
}
|
|
bk.IsDraft = true
|
|
stored, _, err := booking.NewAccessor(adminReq).StoreOne(bk)
|
|
if err != nil {
|
|
fmt.Println("HandleCreateBooking: could not store:", err)
|
|
return false
|
|
}
|
|
storedID := stored.GetID()
|
|
planner.GetPlannerService().RefreshSelf(self.PeerID, adminReq)
|
|
time.AfterFunc(10*time.Minute, func() { DraftTimeout(storedID, tools.BOOKING) })
|
|
return false
|
|
}
|
|
|
|
// HandleCreatePurchase processes an incoming purchase from NATS.
|
|
// Returns true if considers must be triggered.
|
|
func (s *SchedulingResourcesService) HandleCreatePurchase(pr *purchase_resource.PurchaseResource, adminReq *tools.APIRequest) bool {
|
|
self := s.Self()
|
|
if self == nil {
|
|
return false
|
|
}
|
|
if pr.DestPeerID != self.GetID() {
|
|
return false
|
|
}
|
|
|
|
if existing, _, loadErr := purchase_resource.NewAccessor(adminReq).LoadOne(pr.GetID()); loadErr == nil && existing != nil {
|
|
prev := existing.(*purchase_resource.PurchaseResource)
|
|
if prev.SchedulerPeerID != pr.SchedulerPeerID || prev.ExecutionsID != pr.ExecutionsID {
|
|
fmt.Println("HandleCreatePurchase: auth mismatch, ignoring", pr.GetID())
|
|
return false
|
|
}
|
|
if !prev.IsDrafted() && pr.IsDraft {
|
|
return false
|
|
}
|
|
if _, _, err := utils.GenericRawUpdateOne(pr, pr.GetID(), purchase_resource.NewAccessor(adminReq)); err != nil {
|
|
fmt.Println("HandleCreatePurchase: update failed:", err)
|
|
return false
|
|
}
|
|
return !pr.IsDraft
|
|
}
|
|
|
|
pr.IsDraft = true
|
|
stored, _, err := purchase_resource.NewAccessor(adminReq).StoreOne(pr)
|
|
if err != nil {
|
|
fmt.Println("HandleCreatePurchase: could not store:", err)
|
|
return false
|
|
}
|
|
storedID := stored.GetID()
|
|
time.AfterFunc(10*time.Minute, func() { DraftTimeout(storedID, tools.PURCHASE_RESOURCE) })
|
|
return false
|
|
}
|
|
|
|
// HandleRemoveBooking verifies auth and deletes the booking.
|
|
func (s *SchedulingResourcesService) HandleRemoveBooking(p RemoveResourcePayload, adminReq *tools.APIRequest) {
|
|
res, _, loadErr := booking.NewAccessor(adminReq).LoadOne(p.ID)
|
|
if loadErr != nil || res == nil {
|
|
return
|
|
}
|
|
existing := res.(*booking.Booking)
|
|
if existing.SchedulerPeerID != p.SchedulerPeerID || existing.ExecutionsID != p.ExecutionsID {
|
|
fmt.Println("HandleRemoveBooking: auth mismatch, ignoring", p.ID)
|
|
return
|
|
}
|
|
booking.NewAccessor(adminReq).DeleteOne(p.ID)
|
|
if self := s.Self(); self != nil {
|
|
planner.GetPlannerService().RefreshSelf(self.PeerID, adminReq)
|
|
}
|
|
}
|
|
|
|
// HandleRemovePurchase verifies auth and deletes the purchase.
|
|
func (s *SchedulingResourcesService) HandleRemovePurchase(p RemoveResourcePayload, adminReq *tools.APIRequest) {
|
|
res, _, loadErr := purchase_resource.NewAccessor(adminReq).LoadOne(p.ID)
|
|
if loadErr != nil || res == nil {
|
|
return
|
|
}
|
|
existing := res.(*purchase_resource.PurchaseResource)
|
|
if existing.SchedulerPeerID != p.SchedulerPeerID || existing.ExecutionsID != p.ExecutionsID {
|
|
fmt.Println("HandleRemovePurchase: auth mismatch, ignoring", p.ID)
|
|
return
|
|
}
|
|
purchase_resource.NewAccessor(adminReq).DeleteOne(p.ID)
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Internal helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
var knownRegistryPrefixes = []string{
|
|
"docker.io/", "index.docker.io/", "ghcr.io/", "quay.io/",
|
|
"registry.hub.docker.com/", "gcr.io/", "public.ecr.aws/",
|
|
}
|
|
|
|
func isValidPeerlessRef(ref string) bool {
|
|
if ref == "" || ref == "<nil>" {
|
|
return false
|
|
}
|
|
for _, prefix := range knownRegistryPrefixes {
|
|
if strings.HasPrefix(ref, prefix) && len(ref) > len(prefix) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func emitPeerBehaviorReport(targetPeerDID string, severity tools.BehaviorSeverity, reason, evidence string) {
|
|
if targetPeerDID == "" {
|
|
return
|
|
}
|
|
report := tools.PeerBehaviorReport{
|
|
ReporterApp: "oc-scheduler",
|
|
TargetPeerID: targetPeerDID,
|
|
Severity: severity,
|
|
Reason: reason,
|
|
Evidence: evidence,
|
|
At: time.Now().UTC(),
|
|
}
|
|
payload, err := json.Marshal(report)
|
|
if err != nil {
|
|
return
|
|
}
|
|
tools.NewNATSCaller().SetNATSPub(tools.PEER_BEHAVIOR_EVENT, tools.NATSResponse{
|
|
FromApp: "oc-scheduler",
|
|
Datatype: tools.PEER,
|
|
Method: int(tools.PEER_BEHAVIOR_EVENT),
|
|
Payload: payload,
|
|
})
|
|
}
|