2024-07-11 18:25:40 +02:00
|
|
|
package daemons
|
|
|
|
|
|
2024-07-23 12:16:20 +02:00
|
|
|
import (
|
2026-03-26 11:14:29 +01:00
|
|
|
"fmt"
|
2026-05-28 08:33:13 +02:00
|
|
|
"strings"
|
2024-07-23 12:16:20 +02:00
|
|
|
"time"
|
2024-08-19 11:42:26 +02:00
|
|
|
|
2026-03-26 11:14:29 +01:00
|
|
|
"oc-schedulerd/conf"
|
|
|
|
|
|
2024-08-19 11:42:26 +02:00
|
|
|
oclib "cloud.o-forge.io/core/oc-lib"
|
2026-01-14 15:16:19 +01:00
|
|
|
"cloud.o-forge.io/core/oc-lib/models/common/enum"
|
2026-05-28 08:33:13 +02:00
|
|
|
"cloud.o-forge.io/core/oc-lib/models/resources"
|
|
|
|
|
wf "cloud.o-forge.io/core/oc-lib/models/workflow"
|
2024-08-21 14:20:13 +02:00
|
|
|
workflow_execution "cloud.o-forge.io/core/oc-lib/models/workflow_execution"
|
2026-05-28 08:33:13 +02:00
|
|
|
"cloud.o-forge.io/core/oc-lib/tools"
|
|
|
|
|
"github.com/rs/zerolog"
|
2024-07-23 12:16:20 +02:00
|
|
|
)
|
2024-07-11 18:25:40 +02:00
|
|
|
|
2025-05-27 15:48:21 +02:00
|
|
|
var Executions = ScheduledExecution{Execs: map[string]workflow_execution.WorkflowExecution{}}
|
2024-08-20 15:24:46 +02:00
|
|
|
|
|
|
|
|
type ExecutionManager struct{}
|
2024-07-23 12:16:20 +02:00
|
|
|
|
2025-02-14 11:59:32 +01:00
|
|
|
// Loop every second on the Execution's list and move the Execution that must start to a new list
|
2024-07-23 12:16:20 +02:00
|
|
|
// that will be looped over to start them
|
2024-08-19 11:42:26 +02:00
|
|
|
func (em *ExecutionManager) RetrieveNextExecutions() {
|
|
|
|
|
logger := oclib.GetLogger()
|
|
|
|
|
for {
|
2025-02-14 11:59:32 +01:00
|
|
|
Executions.Mu.Lock()
|
|
|
|
|
if len(Executions.Execs) > 0 {
|
|
|
|
|
executions := Executions.Execs
|
2026-01-14 15:16:19 +01:00
|
|
|
orderedExec := map[int]map[string]workflow_execution.WorkflowExecution{}
|
2025-05-20 20:06:48 +02:00
|
|
|
for execId, exec := range executions {
|
2026-01-14 15:16:19 +01:00
|
|
|
if orderedExec[exec.Priority] == nil {
|
|
|
|
|
orderedExec[exec.Priority] = map[string]workflow_execution.WorkflowExecution{}
|
2024-07-23 12:16:20 +02:00
|
|
|
}
|
2026-01-14 15:16:19 +01:00
|
|
|
orderedExec[exec.Priority][execId] = exec
|
2024-07-23 12:16:20 +02:00
|
|
|
}
|
2026-01-14 15:16:19 +01:00
|
|
|
for i := range []int{7, 6, 5, 4, 3, 2, 1, 0} { // priority in reversed
|
|
|
|
|
if orderedExec[i] == nil {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2026-04-10 15:12:39 +02:00
|
|
|
fmt.Println("Next exec", i)
|
2026-03-26 11:14:29 +01:00
|
|
|
lead := time.Duration(conf.GetConfig().PrepLeadSeconds) * time.Second
|
2026-01-14 15:16:19 +01:00
|
|
|
for execId, exec := range orderedExec[i] {
|
2026-04-10 15:12:39 +02:00
|
|
|
fmt.Println("ExecDate Before", exec.ExecDate.Before(time.Now().UTC().Add(lead)))
|
2026-03-26 11:14:29 +01:00
|
|
|
// Fire PrepLeadSeconds before the scheduled start so oc-monitord
|
|
|
|
|
// has time to pre-pull images and set up infra before ExecDate.
|
|
|
|
|
if exec.ExecDate.Before(time.Now().UTC().Add(lead)) {
|
|
|
|
|
logger.Info().Msg(fmt.Sprintf("Launching prep for %s (scheduled %s, lead %s)",
|
|
|
|
|
execId, exec.ExecDate.Format(time.RFC3339), lead))
|
|
|
|
|
// Mark as STARTED immediately (before goroutine) so the next
|
|
|
|
|
// SchedulePolling cycle doesn't re-pick this execution from DB.
|
2026-05-28 08:33:13 +02:00
|
|
|
emitExecStateUpdate(exec.GetID(), enum.STARTED)
|
2026-01-14 15:16:19 +01:00
|
|
|
go em.executeExecution(&exec)
|
|
|
|
|
delete(executions, execId)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-08-19 11:42:26 +02:00
|
|
|
}
|
2025-02-14 11:59:32 +01:00
|
|
|
Executions.Mu.Unlock()
|
2024-07-23 12:16:20 +02:00
|
|
|
time.Sleep(time.Second)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-28 08:33:13 +02:00
|
|
|
// validateWorkflowIntegrity loads the workflow referenced by the execution and
|
|
|
|
|
// runs structural integrity checks before any resource is booked or any pod is
|
|
|
|
|
// started. This is the sovereign enforcement layer — oc-front may be bypassed
|
|
|
|
|
// via direct API calls, so oc-schedulerd re-validates independently.
|
|
|
|
|
//
|
|
|
|
|
// Two layers of validation are applied in order:
|
|
|
|
|
// 1. Structural integrity (cycles, missing compute links, variable refs, …).
|
|
|
|
|
// 2. Autorisation d'Exploitation (AE) — coupling and peer-usage constraints
|
|
|
|
|
// published by resource owners in oc-catalog. Violations are fraudulent and
|
|
|
|
|
// trigger a PEER_BEHAVIOR_EVENT(BehaviorFraud) against the consumer peer.
|
|
|
|
|
//
|
|
|
|
|
// Returns true when the execution is safe to proceed.
|
|
|
|
|
// On failure: emits FAILURE state, logs each violation, and returns false.
|
|
|
|
|
func (em *ExecutionManager) validateWorkflowIntegrity(execution *workflow_execution.WorkflowExecution, logger zerolog.Logger) bool {
|
|
|
|
|
res := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.WORKFLOW), nil).LoadOne(execution.WorkflowID)
|
|
|
|
|
if res.Err != "" || res.Data == nil {
|
|
|
|
|
return true // can't load workflow — let the existing error path handle it downstream
|
|
|
|
|
}
|
|
|
|
|
workflow, ok := res.Data.(*wf.Workflow)
|
|
|
|
|
if !ok {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ── 1. Structural integrity ───────────────────────────────────────────────
|
|
|
|
|
violations := workflow.ValidateIntegrity()
|
|
|
|
|
var structErrors []wf.IntegrityViolation
|
|
|
|
|
for _, v := range violations {
|
|
|
|
|
if v.IsError() {
|
|
|
|
|
structErrors = append(structErrors, v)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if len(structErrors) > 0 {
|
|
|
|
|
msgs := make([]string, 0, len(structErrors))
|
|
|
|
|
for _, v := range structErrors {
|
|
|
|
|
msgs = append(msgs, fmt.Sprintf("[%s] %s", v.Type, v.Message))
|
|
|
|
|
}
|
|
|
|
|
logger.Error().Msg(fmt.Sprintf(
|
|
|
|
|
"workflow '%s' (exec %s) rejected — %d integrity violation(s):\n %s",
|
|
|
|
|
execution.WorkflowID, execution.GetID(), len(structErrors), strings.Join(msgs, "\n "),
|
|
|
|
|
))
|
|
|
|
|
emitExecStateUpdate(execution.GetID(), enum.FAILURE)
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ── 2. Autorisation d'Exploitation (AE) ──────────────────────────────────
|
|
|
|
|
// Build a per-type map of resource IDs referenced in the workflow.
|
|
|
|
|
// The workflow's ResourceSet stores raw IDs in Datas/Processings/etc.
|
|
|
|
|
resourcesByType := map[tools.DataType][]string{
|
|
|
|
|
tools.DATA_RESOURCE: workflow.Datas,
|
|
|
|
|
tools.PROCESSING_RESOURCE: workflow.Processings,
|
|
|
|
|
tools.STORAGE_RESOURCE: workflow.Storages,
|
|
|
|
|
tools.COMPUTE_RESOURCE: workflow.Computes,
|
|
|
|
|
tools.WORKFLOW_RESOURCE: workflow.Workflows,
|
|
|
|
|
tools.SERVICE_RESOURCE: workflow.Services,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Build a flat ID set for coupling membership checks.
|
|
|
|
|
idSet := map[string]struct{}{}
|
|
|
|
|
for _, ids := range resourcesByType {
|
|
|
|
|
for _, id := range ids {
|
|
|
|
|
idSet[id] = struct{}{}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Determine the consumer peer (this peer is executing the workflow).
|
|
|
|
|
consumerPeerID := ""
|
|
|
|
|
if self, err := oclib.GetMySelf(); err == nil && self != nil {
|
|
|
|
|
consumerPeerID = self.GetID()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
aeViolations := checkWorkflowAE(execution.WorkflowID, consumerPeerID, resourcesByType, idSet)
|
|
|
|
|
if len(aeViolations) > 0 {
|
|
|
|
|
msgs := make([]string, 0, len(aeViolations))
|
|
|
|
|
for _, v := range aeViolations {
|
|
|
|
|
msgs = append(msgs, fmt.Sprintf("[%s] %s", v.Type, v.Message))
|
|
|
|
|
}
|
|
|
|
|
logger.Error().Msg(fmt.Sprintf(
|
|
|
|
|
"workflow '%s' (exec %s) rejected — %d AE violation(s):\n %s",
|
|
|
|
|
execution.WorkflowID, execution.GetID(), len(aeViolations), strings.Join(msgs, "\n "),
|
|
|
|
|
))
|
|
|
|
|
resources.EmitAEBehaviorReport(consumerPeerID, aeViolations)
|
|
|
|
|
emitExecStateUpdate(execution.GetID(), enum.FAILURE)
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// checkWorkflowAE loads each workflow resource from the DB and checks its
|
|
|
|
|
// embedded ExploitationAuthorizations against the execution context.
|
|
|
|
|
// Kept in oc-schedulerd (not oc-lib/models/resources) to avoid a circular
|
|
|
|
|
// import: resources → oclib → models → resources.
|
|
|
|
|
func checkWorkflowAE(
|
|
|
|
|
workflowID string,
|
|
|
|
|
consumerPeerID string,
|
|
|
|
|
resourcesByType map[tools.DataType][]string,
|
|
|
|
|
idSet map[string]struct{},
|
|
|
|
|
) []resources.AEViolation {
|
|
|
|
|
now := time.Now().UTC()
|
|
|
|
|
var violations []resources.AEViolation
|
|
|
|
|
|
|
|
|
|
type hasAE interface {
|
|
|
|
|
GetExploitationAuthorizations() []resources.ExploitationAuthorization
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for dt, ids := range resourcesByType {
|
|
|
|
|
for _, id := range ids {
|
|
|
|
|
res := oclib.NewRequestAdmin(oclib.LibDataEnum(dt), nil).LoadOne(id)
|
|
|
|
|
if res.Err != "" || res.Data == nil {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
ra, ok := res.Data.(hasAE)
|
|
|
|
|
if !ok {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
for _, ae := range ra.GetExploitationAuthorizations() {
|
|
|
|
|
vs := ae.CheckAE(id, workflowID, consumerPeerID, idSet, now)
|
|
|
|
|
violations = append(violations, vs...)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return violations
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-12 12:35:49 +02:00
|
|
|
func (em *ExecutionManager) executeExecution(execution *workflow_execution.WorkflowExecution) {
|
2024-08-19 11:42:26 +02:00
|
|
|
// start execution
|
2024-07-29 15:45:32 +02:00
|
|
|
// create the yaml that describes the pod : filename, path/url to Loki
|
2025-04-25 11:14:54 +02:00
|
|
|
var executor Executor
|
|
|
|
|
// exec_method := os.Getenv("MONITOR_METHOD")
|
2024-08-19 11:42:26 +02:00
|
|
|
logger := oclib.GetLogger()
|
2025-05-12 12:35:49 +02:00
|
|
|
|
2026-05-28 08:33:13 +02:00
|
|
|
// Sovereign integrity check — reject before touching any resource.
|
|
|
|
|
if !em.validateWorkflowIntegrity(execution, logger) {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2025-04-25 11:14:54 +02:00
|
|
|
duration := 0
|
2025-05-12 12:35:49 +02:00
|
|
|
if execution.EndDate != nil {
|
|
|
|
|
duration = int(execution.EndDate.Sub(execution.ExecDate).Seconds())
|
2024-07-29 15:45:32 +02:00
|
|
|
}
|
2025-04-25 11:14:54 +02:00
|
|
|
|
2026-03-26 11:14:29 +01:00
|
|
|
if conf.GetConfig().Mode == "kubernetes" {
|
|
|
|
|
executor = NewContainerMonitor(execution.UUID, execution.CreatorID, duration, execution.ExecDate)
|
|
|
|
|
} else {
|
|
|
|
|
executor = NewLocalMonitor(execution.UUID, execution.CreatorID, duration, execution.ExecDate)
|
|
|
|
|
}
|
2025-04-25 11:14:54 +02:00
|
|
|
|
|
|
|
|
if executor == nil {
|
2025-05-12 12:35:49 +02:00
|
|
|
logger.Fatal().Msg("Could not create executor")
|
2026-05-28 08:33:13 +02:00
|
|
|
emitExecStateUpdate(execution.GetID(), enum.FAILURE)
|
2026-02-25 13:19:46 +01:00
|
|
|
return
|
2025-04-25 11:14:54 +02:00
|
|
|
}
|
2025-05-12 12:35:49 +02:00
|
|
|
|
2025-04-25 11:14:54 +02:00
|
|
|
args := executor.PrepareMonitorExec()
|
2026-03-26 11:14:29 +01:00
|
|
|
executor.LaunchMonitor(args, execution.GetID(), conf.GetConfig().KubeNamespace, logger)
|
2024-07-29 15:45:32 +02:00
|
|
|
}
|