Prepull for pod & Asym Jobs

2026-03-25 11:13:12 +01:00
parent 56bc342d24
commit a9284314ef
17 changed files with 754 additions and 512 deletions
@@ -8,13 +8,13 @@ import (
 	"fmt"
 	"oc-monitord/conf"
 	. "oc-monitord/models"
+	"sync"

 	"os"
 	"strings"
 	"time"

 	oclib "cloud.o-forge.io/core/oc-lib"
-	oclib_config "cloud.o-forge.io/core/oc-lib/config"
 	"cloud.o-forge.io/core/oc-lib/logs"
 	"cloud.o-forge.io/core/oc-lib/models/common/enum"
 	"cloud.o-forge.io/core/oc-lib/models/peer"
@@ -24,7 +24,6 @@ import (
 	"cloud.o-forge.io/core/oc-lib/models/workflow/graph"
 	"cloud.o-forge.io/core/oc-lib/models/workflow_execution"
 	"cloud.o-forge.io/core/oc-lib/tools"
-	"github.com/nats-io/nats.go"
 	"github.com/nwtgck/go-fakelish"
 	"github.com/rs/zerolog"
 	"gopkg.in/yaml.v3"
@@ -48,6 +47,12 @@ type ArgoBuilder struct {
 	Timeout int
 	// RemotePeers contient les IDs des peers distants détectés via Admiralty.
 	RemotePeers []string
+	// HasLocalCompute indique qu'au moins un processing s'exécute sur le kube local.
+	// Le kube local doit recevoir son propre ArgoKubeEvent COMPUTE_RESOURCE.
+	HasLocalCompute bool
+	// PeerImages associe chaque peer aux images de conteneurs qu'il doit exécuter.
+	// Clé "" désigne le peer local. Utilisé pour le pre-pull et le release post-exec.
+	PeerImages map[string][]string
 }

 // Workflow est la structure racine du fichier YAML Argo Workflow.
@@ -73,16 +78,25 @@ func (b *Workflow) getDag() *Dag {
 	return b.Spec.Templates[len(b.Spec.Templates)-1].Dag
 }

+// PodSecurityContext mirrors the subset of k8s PodSecurityContext used by Argo.
+type PodSecurityContext struct {
+	RunAsUser *int64 `yaml:"runAsUser,omitempty"`
+	RunAsGroup *int64 `yaml:"runAsGroup,omitempty"`
+	FSGroup   *int64 `yaml:"fsGroup,omitempty"`
+}
+
 // Spec contient la spécification complète du workflow Argo :
 // compte de service, point d'entrée, volumes, templates et timeout.
 type Spec struct {
 	ArtifactRepositoryRef
-	ServiceAccountName string                `yaml:"serviceAccountName,omitempty"`
-	Entrypoint         string                `yaml:"entrypoint"`
-	Arguments          []Parameter           `yaml:"arguments,omitempty"`
+	ServiceAccountName string              `yaml:"serviceAccountName,omitempty"`
+	Entrypoint         string              `yaml:"entrypoint"`
+	Arguments          []Parameter         `yaml:"arguments,omitempty"`
 	Volumes            []VolumeClaimTemplate `yaml:"volumeClaimTemplates,omitempty"`
-	Templates          []Template            `yaml:"templates"`
-	Timeout            int                   `yaml:"activeDeadlineSeconds,omitempty"`
+	ExistingVolumes    []ExistingVolume    `yaml:"volumes,omitempty"`
+	Templates          []Template          `yaml:"templates"`
+	Timeout            int                 `yaml:"activeDeadlineSeconds,omitempty"`
+	SecurityContext    *PodSecurityContext `yaml:"securityContext,omitempty"`
 }

 // CreateDAG est le point d'entrée de la construction du DAG Argo.
@@ -100,7 +114,10 @@ func (b *ArgoBuilder) CreateDAG(exec *workflow_execution.WorkflowExecution, name
 	logger = logs.GetLogger()
 	logger.Info().Msg(fmt.Sprint("Creating DAG ", b.OriginWorkflow.Graph.Items))
 	// Crée un template Argo pour chaque nœud du graphe et collecte les volumes.
-	firstItems, lastItems, volumes := b.createTemplates(exec, namespace)
+	firstItems, lastItems, volumes, err := b.createTemplates(exec, namespace)
+	if err != nil {
+		return 0, firstItems, lastItems, err
+	}
 	b.createVolumes(exec, volumes)

 	if b.Timeout > 0 {
@@ -122,7 +139,7 @@ func (b *ArgoBuilder) CreateDAG(exec *workflow_execution.WorkflowExecution, name
 // Elle gère également le recâblage des dépendances DAG entre sous-workflows
 // imbriqués, et l'ajout du pod de service si nécessaire.
 // Retourne les premières tâches, les dernières tâches et les volumes à créer.
-func (b *ArgoBuilder) createTemplates(exec *workflow_execution.WorkflowExecution, namespace string) ([]string, []string, []VolumeMount) {
+func (b *ArgoBuilder) createTemplates(exec *workflow_execution.WorkflowExecution, namespace string) ([]string, []string, []VolumeMount, error) {
 	volumes := []VolumeMount{}
 	firstItems := []string{}
 	lastItems := []string{}
@@ -138,11 +155,18 @@ func (b *ArgoBuilder) createTemplates(exec *workflow_execution.WorkflowExecution
 		logger.Info().Msg(fmt.Sprint("Creating template for", item.Processing.GetName(), instance))
 		if instance == nil || instance.(*resources.ProcessingInstance).Access == nil && instance.(*resources.ProcessingInstance).Access.Container != nil {
 			logger.Error().Msg("Not enough configuration setup, template can't be created : " + item.Processing.GetName())
-			return firstItems, lastItems, volumes
+			return firstItems, lastItems, volumes, nil
+		}
+		// Un même processing peut être bookié sur plusieurs peers : on crée
+		// un template Argo distinct par peer, déployés en parallèle.
+		for _, pb := range getAllPeersForItem(exec, item.ID) {
+			var err error
+			volumes, firstItems, lastItems, err = b.createArgoTemplates(exec,
+				namespace, item.ID, pb.PeerID, pb.BookingID, item.Processing, volumes, firstItems, lastItems)
+			if err != nil {
+				return firstItems, lastItems, volumes, err
+			}
 		}
-		volumes, firstItems, lastItems = b.createArgoTemplates(exec,
-			namespace,
-			item.ID, item.Processing, volumes, firstItems, lastItems)
 	}

 	// --- Native Tools de type WORKFLOW_EVENT uniquement ---
@@ -157,8 +181,12 @@ func (b *ArgoBuilder) createTemplates(exec *workflow_execution.WorkflowExecution
 		}
 		instance := item.NativeTool.GetSelectedInstance(&index)
 		logger.Info().Msg(fmt.Sprint("Creating template for", item.NativeTool.GetName(), instance))
-		volumes, firstItems, lastItems = b.createArgoTemplates(exec,
-			namespace, item.ID, item.NativeTool, volumes, firstItems, lastItems)
+		var err error
+		volumes, firstItems, lastItems, err = b.createArgoTemplates(exec,
+			namespace, item.ID, "", item.ID, item.NativeTool, volumes, firstItems, lastItems)
+		if err != nil {
+			return firstItems, lastItems, volumes, err
+		}
 	}

 	// --- Sous-workflows : chargement, construction récursive et fusion du DAG ---
@@ -178,7 +206,7 @@ func (b *ArgoBuilder) createTemplates(exec *workflow_execution.WorkflowExecution
 			continue
 		}
 		firstWfTasks[wf] = fi
-		if ok, depsOfIds := subBuilder.isArgoDependancy(wf); ok { // le sous-workflow est une dépendance d'autre chose
+		if ok, depsOfIds := subBuilder.isArgoDependancy(exec, wf); ok { // le sous-workflow est une dépendance d'autre chose
 			latestWfTasks[wf] = li
 			relatedWfTasks[wf] = depsOfIds
 		}
@@ -217,7 +245,7 @@ func (b *ArgoBuilder) createTemplates(exec *workflow_execution.WorkflowExecution
 	// Les premières tâches du sous-workflow héritent des dépendances
 	// que le sous-workflow avait vis-à-vis du DAG principal.
 	for wfID, fi := range firstWfTasks {
-		deps := b.getArgoDependencies(wfID)
+		deps := b.getArgoDependencies(exec, wfID)
 		if len(deps) > 0 {
 			for _, dep := range fi {
 				for _, task := range b.Workflow.getDag().Tasks {
@@ -235,53 +263,67 @@ func (b *ArgoBuilder) createTemplates(exec *workflow_execution.WorkflowExecution
 		dag.Tasks = append(dag.Tasks, Task{Name: "workflow-service-pod", Template: "workflow-service-pod"})
 		b.addServiceToArgo()
 	}
-	return firstItems, lastItems, volumes
+	return firstItems, lastItems, volumes, nil
 }

 // createArgoTemplates crée le template Argo pour un nœud du graphe (processing
-// ou native tool).  Il :
+// ou native tool) sur un peer donné.  Il :
 //  1. Ajoute la tâche au DAG avec ses dépendances.
 //  2. Crée le template de container (ou d'événement pour les native tools).
-//  3. Ajoute les annotations Admiralty si le processing est hébergé sur un peer distant.
+//  3. Ajoute les annotations Admiralty si peerID désigne un peer distant.
 //  4. Crée un service Kubernetes si le processing est déclaré IsService.
 //  5. Configure les annotations de stockage (S3, volumes locaux).
 func (b *ArgoBuilder) createArgoTemplates(
 	exec *workflow_execution.WorkflowExecution,
 	namespace string,
-	id string,
+	graphID string,
+	peerID string,
+	bookingID string,
 	obj resources.ResourceInterface,
 	volumes []VolumeMount,
 	firstItems []string,
-	lastItems []string) ([]VolumeMount, []string, []string) {
+	lastItems []string,
+) ([]VolumeMount, []string, []string, error) {

-	_, firstItems, lastItems = b.addTaskToArgo(exec, b.Workflow.getDag(), id, obj, firstItems, lastItems)
-	template := &Template{Name: getArgoName(obj.GetName(), id)}
+	_, firstItems, lastItems = b.addTaskToArgo(exec, b.Workflow.getDag(), graphID, bookingID, obj, firstItems, lastItems)
+	template := &Template{Name: getArgoName(obj.GetName(), bookingID)}
 	logger.Info().Msg(fmt.Sprint("Creating template for", template.Name))
-	// Vérifie si le processing est sur un peer distant (Admiralty).
-	isReparted, peer := b.isReparted(obj, id)
+
 	if obj.GetType() == tools.PROCESSING_RESOURCE.String() {
 		template.CreateContainer(exec, obj.(*resources.ProcessingResource), b.Workflow.getDag())
 	} else if obj.GetType() == tools.NATIVE_TOOL.String() {
 		template.CreateEventContainer(exec, obj.(*resources.NativeTool), b.Workflow.getDag())
 	}
+	// Enregistre l'image pour le pre-pull sur le peer cible.
+	// peerID == "" désigne le peer local (clé "" dans PeerImages).
+	b.addPeerImage(peerID, template.Container.Image)

+	// Vérifie si le peer est distant (Admiralty).
+	isReparted, remotePeer := b.isPeerReparted(peerID)
 	if isReparted {
-		logger.Debug().Msg("Reparted processing, on " + peer.GetID())
-		b.RemotePeers = append(b.RemotePeers, peer.GetID())
-		template.AddAdmiraltyAnnotations(peer.GetID())
+		logger.Debug().Msg("Reparted processing, on " + remotePeer.GetID())
+		b.RemotePeers = append(b.RemotePeers, remotePeer.GetID())
+		template.AddAdmiraltyAnnotations(remotePeer.GetID())
+	} else {
+		// Processing local : le kube local doit aussi être configuré.
+		b.HasLocalCompute = true
 	}

 	// Si le processing expose un service Kubernetes, on l'enregistre et on
 	// applique le label "app" pour que le Service puisse le sélectionner.
 	if obj.GetType() == tools.PROCESSING_RESOURCE.String() && obj.(*resources.ProcessingResource).IsService {
-		b.CreateService(exec, id, obj)
+		b.CreateService(exec, graphID, obj)
 		template.Metadata.Labels = make(map[string]string)
 		template.Metadata.Labels["app"] = "oc-service-" + obj.GetName()
 	}

-	volumes = b.addStorageAnnotations(exec, id, template, namespace, volumes)
+	var err error
+	volumes, err = b.addStorageAnnotations(exec, graphID, template, namespace, volumes, isReparted)
+	if err != nil {
+		return volumes, firstItems, lastItems, err
+	}
 	b.Workflow.Spec.Templates = append(b.Workflow.Spec.Templates, *template)
-	return volumes, firstItems, lastItems
+	return volumes, firstItems, lastItems, nil
 }

 // addStorageAnnotations parcourt tous les nœuds de stockage liés au processing
@@ -290,7 +332,10 @@ func (b *ArgoBuilder) createArgoTemplates(
 //   - Pour les stockages S3 : appelle waitForConsiders (STORAGE_RESOURCE) pour
 //     attendre la validation PB_CONSIDERS avant de configurer les annotations S3.
 //   - Pour les volumes locaux : ajoute un VolumeMount dans le container.
-func (b *ArgoBuilder) addStorageAnnotations(exec *workflow_execution.WorkflowExecution, id string, template *Template, namespace string, volumes []VolumeMount) []VolumeMount {
+//     Si isReparted est true (step Admiralty), le volume local est marqué comme
+//     réparti afin que createVolumes ne génère pas de PVC local-path incompatible
+//     avec les virtual kubelets.
+func (b *ArgoBuilder) addStorageAnnotations(exec *workflow_execution.WorkflowExecution, id string, template *Template, namespace string, volumes []VolumeMount, isReparted bool) ([]VolumeMount, error) {
 	// Récupère tous les nœuds de stockage connectés au processing courant.
 	related := b.OriginWorkflow.GetByRelatedProcessing(id, b.OriginWorkflow.Graph.IsStorage)

@@ -315,14 +360,27 @@ func (b *ArgoBuilder) addStorageAnnotations(exec *workflow_execution.WorkflowExe
 					// Pour chaque ressource de compute liée à ce stockage S3,
 					// on notifie via NATS et on attend la validation PB_CONSIDERS
 					// avec DataType = STORAGE_RESOURCE avant de continuer.
-					for _, r := range b.getStorageRelatedProcessing(storage.GetID()) {
-						waitForConsiders(exec.ExecutionsID, tools.STORAGE_RESOURCE, ArgoKubeEvent{
+					// Les goroutines tournent en parallèle ; un timeout sur l'une
+					// d'elles est une erreur fatale qui stoppe la suite du build.
+					relatedProcessing := b.getStorageRelatedProcessing(storage.GetID())
+					var wg sync.WaitGroup
+					errCh := make(chan error, len(relatedProcessing))
+					for _, r := range relatedProcessing {
+						wg.Add(1)
+						go waitForConsiders(exec.ExecutionsID, tools.STORAGE_RESOURCE, ArgoKubeEvent{
 							ExecutionsID: exec.ExecutionsID,
 							DestPeerID:   r.GetID(),
 							Type:         tools.STORAGE_RESOURCE,
 							SourcePeerID: storage.GetCreatorID(),
 							OriginID:     conf.GetConfig().PeerID,
-						})
+						}, &wg, errCh)
+					}
+					wg.Wait()
+					close(errCh)
+					for err := range errCh {
+						if err != nil {
+							return volumes, err
+						}
 					}
 					// Configure la référence au dépôt d'artefacts S3 dans le Spec.
 					b.addS3annotations(storage, namespace)
@@ -336,21 +394,43 @@ func (b *ArgoBuilder) addStorageAnnotations(exec *workflow_execution.WorkflowExe
 			}
 		}

-		// Si l'instance de stockage est locale, on monte un volume persistant.
+		// Si l'instance de stockage est locale, on pré-provisionne le PVC via
+		// oc-datacenter (même pattern que MinIO) puis on monte un volume existant.
 		index := 0
 		if s, ok := exec.SelectedInstances[storage.GetID()]; ok {
 			index = s
 		}
 		s := storage.Instances[index]
 		if s.Local {
+			var pvcWg sync.WaitGroup
+			pvcErrCh := make(chan error, 1)
+			pvcWg.Add(1)
+			go waitForConsiders(exec.ExecutionsID, tools.STORAGE_RESOURCE, ArgoKubeEvent{
+				ExecutionsID: exec.ExecutionsID,
+				Type:         tools.STORAGE_RESOURCE,
+				SourcePeerID: conf.GetConfig().PeerID,
+				DestPeerID:   conf.GetConfig().PeerID,
+				OriginID:     conf.GetConfig().PeerID,
+				MinioID:      storage.GetID(),
+				Local:        true,
+				StorageName:  storage.GetName(),
+			}, &pvcWg, pvcErrCh)
+			pvcWg.Wait()
+			close(pvcErrCh)
+			for err := range pvcErrCh {
+				if err != nil {
+					return volumes, err
+				}
+			}
 			volumes = template.Container.AddVolumeMount(VolumeMount{
-				Name:      strings.ReplaceAll(strings.ToLower(storage.GetName()), " ", "-"),
-				MountPath: s.Source,
-				Storage:   storage,
+				Name:       strings.ReplaceAll(strings.ToLower(storage.GetName()), " ", "-"),
+				MountPath:  s.Source,
+				Storage:    storage,
+				IsReparted: isReparted,
 			}, volumes)
 		}
 	}
-	return volumes
+	return volumes, nil
 }

 // getStorageRelatedProcessing retourne la liste des ressources de compute
@@ -408,10 +488,11 @@ func (b *ArgoBuilder) addS3annotations(storage *resources.StorageResource, names
 // Elle résout les dépendances DAG, propage les paramètres d'environnement,
 // d'entrée et de sortie de l'instance sélectionnée, et met à jour les listes
 // firstItems / lastItems utilisées pour le recâblage des sous-workflows.
-func (b *ArgoBuilder) addTaskToArgo(exec *workflow_execution.WorkflowExecution, dag *Dag, graphItemID string, processing resources.ResourceInterface,
+// bookingID est le nom unique de cette instance (peut varier par peer).
+func (b *ArgoBuilder) addTaskToArgo(exec *workflow_execution.WorkflowExecution, dag *Dag, graphItemID string, bookingID string, processing resources.ResourceInterface,
 	firstItems []string, lastItems []string) (*Dag, []string, []string) {

-	unique_name := getArgoName(processing.GetName(), graphItemID)
+	unique_name := getArgoName(processing.GetName(), bookingID)
 	step := Task{Name: unique_name, Template: unique_name}

 	index := 0
@@ -442,7 +523,7 @@ func (b *ArgoBuilder) addTaskToArgo(exec *workflow_execution.WorkflowExecution,
 		}
 	}

-	step.Dependencies = b.getArgoDependencies(graphItemID)
+	step.Dependencies = b.getArgoDependencies(exec, graphItemID)

 	// Détermine si ce nœud est une première ou une dernière tâche du DAG.
 	name := ""
@@ -453,31 +534,43 @@ func (b *ArgoBuilder) addTaskToArgo(exec *workflow_execution.WorkflowExecution,
 		name = b.OriginWorkflow.Graph.Items[graphItemID].Workflow.GetName()
 	}
 	if len(step.Dependencies) == 0 && name != "" {
-		firstItems = append(firstItems, getArgoName(name, graphItemID))
+		firstItems = append(firstItems, getArgoName(name, bookingID))
 	}
-	if ok, _ := b.isArgoDependancy(graphItemID); !ok && name != "" {
-		lastItems = append(lastItems, getArgoName(name, graphItemID))
+	if ok, _ := b.isArgoDependancy(exec, graphItemID); !ok && name != "" {
+		lastItems = append(lastItems, getArgoName(name, bookingID))
 	}

 	dag.Tasks = append(dag.Tasks, step)
 	return dag, firstItems, lastItems
 }

-// createVolumes crée les PersistentVolumeClaims Argo (volumeClaimTemplates)
-// pour chaque volume local référencé dans les templates de processing.
-// TODO: gérer les volumes distants.
+// createVolumes référence les PVCs pré-provisionnés par oc-datacenter comme
+// volumes existants (ExistingVolumes) dans le Spec Argo.
+// Le nom du PVC est calculé de manière déterministe : <storageName>-<executionsID>,
+// identique à ClaimName() dans oc-datacenter/infrastructure/storage/pvc_setter.go.
 func (b *ArgoBuilder) createVolumes(exec *workflow_execution.WorkflowExecution, volumes []VolumeMount) {
+	seen := make(map[string]struct{})
 	for _, volume := range volumes {
-		index := 0
-		if s, ok := exec.SelectedInstances[volume.Storage.GetID()]; ok {
-			index = s
+		name := strings.ReplaceAll(strings.ToLower(volume.Name), " ", "-")
+		if _, ok := seen[name]; ok {
+			continue
+		}
+		seen[name] = struct{}{}
+		claimName := name + "-" + exec.ExecutionsID
+		ev := ExistingVolume{}
+		ev.Name = name
+		ev.PersistentVolumeClaim.ClaimName = claimName
+		b.Workflow.Spec.ExistingVolumes = append(b.Workflow.Spec.ExistingVolumes, ev)
+	}
+	// hostPath PVs are created as root:root 0755. Ensure pods can read/write
+	// by running as root when local volumes are present.
+	if len(b.Workflow.Spec.ExistingVolumes) > 0 && b.Workflow.Spec.SecurityContext == nil {
+		zero := int64(0)
+		b.Workflow.Spec.SecurityContext = &PodSecurityContext{
+			RunAsUser:  &zero,
+			RunAsGroup: &zero,
+			FSGroup:    &zero,
 		}
-		storage := volume.Storage.Instances[index]
-		new_volume := VolumeClaimTemplate{}
-		new_volume.Metadata.Name = strings.ReplaceAll(strings.ToLower(volume.Name), " ", "-")
-		new_volume.Spec.AccessModes = []string{"ReadWriteOnce"}
-		new_volume.Spec.Resources.Requests.Storage = fmt.Sprintf("%v", storage.SizeGB) + storage.SizeType.ToArgo()
-		b.Workflow.Spec.Volumes = append(b.Workflow.Spec.Volumes, new_volume)
 	}
 }

@@ -485,7 +578,7 @@ func (b *ArgoBuilder) createVolumes(exec *workflow_execution.WorkflowExecution,
 // d'au moins un autre nœud du DAG (i.e. s'il existe un lien sortant vers
 // un processing ou un workflow).
 // Retourne true + la liste des noms Argo des nœuds qui en dépendent.
-func (b *ArgoBuilder) isArgoDependancy(id string) (bool, []string) {
+func (b *ArgoBuilder) isArgoDependancy(exec *workflow_execution.WorkflowExecution, id string) (bool, []string) {
 	dependancyOfIDs := []string{}
 	isDeps := false
 	for _, link := range b.OriginWorkflow.Graph.Links {
@@ -496,12 +589,16 @@ func (b *ArgoBuilder) isArgoDependancy(id string) (bool, []string) {
 		source := b.OriginWorkflow.Graph.Items[link.Destination.ID].Processing
 		if id == link.Source.ID && source != nil {
 			isDeps = true
-			dependancyOfIDs = append(dependancyOfIDs, getArgoName(source.GetName(), link.Destination.ID))
+			for _, pb := range getAllPeersForItem(exec, link.Destination.ID) {
+				dependancyOfIDs = append(dependancyOfIDs, getArgoName(source.GetName(), pb.BookingID))
+			}
 		}
 		wourceWF := b.OriginWorkflow.Graph.Items[link.Destination.ID].Workflow
 		if id == link.Source.ID && wourceWF != nil {
 			isDeps = true
-			dependancyOfIDs = append(dependancyOfIDs, getArgoName(wourceWF.GetName(), link.Destination.ID))
+			for _, pb := range getAllPeersForItem(exec, link.Destination.ID) {
+				dependancyOfIDs = append(dependancyOfIDs, getArgoName(wourceWF.GetName(), pb.BookingID))
+			}
 		}
 	}
 	return isDeps, dependancyOfIDs
@@ -509,7 +606,9 @@ func (b *ArgoBuilder) isArgoDependancy(id string) (bool, []string) {

 // getArgoDependencies retourne la liste des noms de tâches Argo dont dépend
 // le nœud identifié par id (liens entrants depuis des processings).
-func (b *ArgoBuilder) getArgoDependencies(id string) (dependencies []string) {
+// Si le processing source est bookié sur N peers, toutes ses instances sont
+// retournées comme dépendances (la tâche courante attend toutes les instances).
+func (b *ArgoBuilder) getArgoDependencies(exec *workflow_execution.WorkflowExecution, id string) (dependencies []string) {
 	for _, link := range b.OriginWorkflow.Graph.Links {
 		if _, ok := b.OriginWorkflow.Graph.Items[link.Source.ID]; !ok {
 			logger.Info().Msg(fmt.Sprint("Could not find the source of the link", link.Source.ID))
@@ -517,9 +616,9 @@ func (b *ArgoBuilder) getArgoDependencies(id string) (dependencies []string) {
 		}
 		source := b.OriginWorkflow.Graph.Items[link.Source.ID].Processing
 		if id == link.Destination.ID && source != nil {
-			dependency_name := getArgoName(source.GetName(), link.Source.ID)
-			dependencies = append(dependencies, dependency_name)
-			continue
+			for _, pb := range getAllPeersForItem(exec, link.Source.ID) {
+				dependencies = append(dependencies, getArgoName(source.GetName(), pb.BookingID))
+			}
 		}
 	}
 	return
@@ -535,139 +634,93 @@ func getArgoName(raw_name string, component_id string) (formatedName string) {
 	return
 }

-// isReparted vérifie si le processing est hébergé sur un Compute appartenant
-// à un peer distant (Relation != 1, i.e. pas le peer local).
-// Si c'est le cas, elle retourne true et le Peer concerné pour qu'Admiralty
-// puisse router les pods vers le bon cluster.
-func (b *ArgoBuilder) isReparted(processing resources.ResourceInterface, graphID string) (bool, *peer.Peer) {
-	computeAttached := b.retrieveProcessingCompute(graphID)
-	if computeAttached == nil {
-		logger.Error().Msg("No compute was found attached to processing " + processing.GetName() + " : " + processing.GetID())
-		panic(0)
+// peerBooking associe un peerID à son bookingID pour un item du graphe.
+type peerBooking struct {
+	PeerID    string
+	BookingID string
+}
+
+// getAllPeersForItem retourne tous les (peerID, bookingID) enregistrés dans
+// PeerBookByGraph pour un item donné.  Si aucun booking n'est trouvé (item
+// non encore planifié ou sous-workflow), retourne une entrée locale de
+// fallback avec BookingID = graphItemID.
+func getAllPeersForItem(exec *workflow_execution.WorkflowExecution, graphItemID string) []peerBooking {
+	var result []peerBooking
+	for peerID, byGraph := range exec.PeerBookByGraph {
+		if bookings, ok := byGraph[graphItemID]; ok && len(bookings) > 0 {
+			result = append(result, peerBooking{PeerID: peerID, BookingID: bookings[0]})
+		}
+	}
+	if len(result) == 0 {
+		result = []peerBooking{{PeerID: "", BookingID: graphItemID}}
+	}
+	return result
+}
+
+// isPeerReparted vérifie si le peerID désigne un peer distant (Relation != 1).
+// Un peerID vide signifie exécution locale : retourne false sans appel réseau.
+func (b *ArgoBuilder) isPeerReparted(peerID string) (bool, *peer.Peer) {
+	if peerID == "" {
+		return false, nil
 	}

-	// Résolution du Peer propriétaire du Compute via l'API oc-lib.
 	req := oclib.NewRequest(oclib.LibDataEnum(oclib.PEER), "", "", nil, nil)
 	if req == nil {
 		fmt.Println("TODO : handle error when trying to create a request on the Peer Collection")
 		return false, nil
 	}

-	res := req.LoadOne(computeAttached.CreatorID)
+	res := req.LoadOne(peerID)
 	if res.Err != "" {
-		fmt.Print("TODO : handle error when requesting PeerID")
-		fmt.Print(res.Err)
+		fmt.Print("TODO : handle error when requesting PeerID: " + res.Err)
 		return false, nil
 	}

-	peer := res.ToPeer()
-
+	p := res.ToPeer()
 	// Relation == 1 signifie "moi-même" : le processing est local.
-	isNotReparted := peer.Relation == 1
-	logger.Info().Msg(fmt.Sprint("Result IsMySelf for ", peer.UUID, " : ", isNotReparted))
-
-	return !isNotReparted, peer
+	isNotReparted := p.Relation == 1
+	logger.Info().Msg(fmt.Sprint("Result IsMySelf for ", p.UUID, " : ", isNotReparted))
+	return !isNotReparted, p
 }

-// retrieveProcessingCompute parcourt les liens du graphe pour retrouver
-// la ressource de Compute directement connectée au nœud graphID.
-// Retourne nil si aucun Compute n'est trouvé.
-func (b *ArgoBuilder) retrieveProcessingCompute(graphID string) *resources.ComputeResource {
-	for _, link := range b.OriginWorkflow.Graph.Links {
-		var oppositeId string
-		if link.Source.ID == graphID {
-			oppositeId = link.Destination.ID
-		} else if link.Destination.ID == graphID {
-			oppositeId = link.Source.ID
-		}
-
-		if oppositeId != "" {
-			dt, res := b.OriginWorkflow.Graph.GetResource(oppositeId)
-			if dt == oclib.COMPUTE_RESOURCE {
-				return res.(*resources.ComputeResource)
-			} else {
-				continue
-			}
-		}
-	}
-	return nil
-}
-
-// waitForConsiders publie un ArgoKubeEvent sur le canal NATS ARGO_KUBE_EVENT
-// puis se bloque jusqu'à réception d'un PropalgationMessage vérifiant :
-//   - Action == PB_CONSIDERS
-//   - DataType == dataType (COMPUTE_RESOURCE ou STORAGE_RESOURCE)
-//   - Payload décodé en JSON contenant "executions_id" == executionsId
-//
-// Cela garantit que l'infrastructure distante (Admiralty ou Minio) a bien
-// pris en compte la demande avant que la construction du workflow continue.
+// waitForConsiders publie un ArgoKubeEvent sur NATS puis attend la confirmation
+// PB_CONSIDERS via le cache global (globalConsidersCache), sans ouvrir de
+// connexion NATS supplémentaire.  Le listener centralisé (StartConsidersListener)
+// dispatche le message vers le bon canal.
 // Un timeout de 5 minutes est appliqué pour éviter un blocage indéfini.
-func waitForConsiders(executionsId string, dataType tools.DataType, event ArgoKubeEvent) {
+func waitForConsiders(executionsId string, dataType tools.DataType, event ArgoKubeEvent, wg *sync.WaitGroup, errCh chan<- error) {
+	defer wg.Done()
+
 	// Sérialise l'événement et le publie sur ARGO_KUBE_EVENT.
 	b, err := json.Marshal(event)
 	if err != nil {
 		logger.Error().Msg("Cannot marshal ArgoKubeEvent: " + err.Error())
+		errCh <- err
 		return
 	}
 	tools.NewNATSCaller().SetNATSPub(tools.ARGO_KUBE_EVENT, tools.NATSResponse{
 		FromApp:  "oc-monitord",
 		Datatype: dataType,
 		User:     "root",
-		Method:   int(tools.PROPALGATION_EVENT),
+		Method:   int(tools.ARGO_KUBE_EVENT),
 		Payload:  b,
 	})

-	// Connexion NATS pour écouter la réponse PB_CONSIDERS.
-	natsURL := oclib_config.GetConfig().NATSUrl
-	if natsURL == "" {
-		logger.Error().Msg("NATS_SERVER not set, skipping PB_CONSIDERS wait")
-		return
-	}
-	nc, err := nats.Connect(natsURL)
-	if err != nil {
-		logger.Error().Msg("NATS connect error waiting for PB_CONSIDERS: " + err.Error())
-		return
-	}
-	defer nc.Close()
+	// Enregistrement dans le cache et attente de la confirmation.
+	// Pour COMPUTE_RESOURCE, SourcePeerID différencie le peer compute (local ou distant).
+	// Pour STORAGE_RESOURCE, SourcePeerID est le peer hébergeant le stockage.
+	key := considersKey(executionsId, dataType, event.SourcePeerID)
+	ch, unregister := globalConsidersCache.register(key)
+	defer unregister()

-	// Souscription au canal PROPALGATION_EVENT avec un buffer de 64 messages.
-	ch := make(chan *nats.Msg, 64)
-	sub, err := nc.ChanSubscribe(tools.PROPALGATION_EVENT.GenerateKey(), ch)
-	if err != nil {
-		logger.Error().Msg("NATS subscribe error waiting for PB_CONSIDERS: " + err.Error())
-		return
-	}
-	defer sub.Unsubscribe()
-
-	timeout := time.After(5 * time.Minute)
-	for {
-		select {
-		case msg := <-ch:
-			// Désérialise le message en PropalgationMessage.
-			var pm tools.PropalgationMessage
-			if err := json.Unmarshal(msg.Data, &pm); err != nil {
-				continue
-			}
-			// Filtre : action, type de données.
-			if pm.Action != tools.PB_CONSIDERS || pm.DataType != int(dataType) {
-				continue
-			}
-			// Filtre : executions_id dans le Payload du PropalgationMessage.
-			var body struct {
-				ExecutionsID string `json:"executions_id"`
-			}
-			if err := json.Unmarshal(pm.Payload, &body); err != nil {
-				continue
-			}
-			if body.ExecutionsID != executionsId {
-				continue
-			}
-			logger.Info().Msg(fmt.Sprintf("PB_CONSIDERS received for executions_id=%s datatype=%s", executionsId, dataType.String()))
-			return
-		case <-timeout:
-			logger.Warn().Msg(fmt.Sprintf("Timeout waiting for PB_CONSIDERS executions_id=%s datatype=%s", executionsId, dataType.String()))
-			return
-		}
+	select {
+	case <-ch:
+		logger.Info().Msg(fmt.Sprintf("PB_CONSIDERS received for executions_id=%s datatype=%s source_peer=%s dest_peer=%s", executionsId, dataType.String(), event.SourcePeerID, event.DestPeerID))
+		errCh <- nil
+	case <-time.After(5 * time.Minute):
+		err := fmt.Errorf("timeout waiting for PB_CONSIDERS executions_id=%s datatype=%s", executionsId, dataType.String())
+		logger.Error().Msg(err.Error())
+		errCh <- err
 	}
 }

@@ -687,8 +740,35 @@ type ArgoKubeEvent struct {
 	// OriginID est le peer qui a initié la demande de provisionnement ;
 	// la réponse PB_CONSIDERS lui sera renvoyée.
 	OriginID string `json:"origin_id"`
+	// MinioID est l'ID de la ressource storage (Minio ou local PVC).
+	MinioID string `json:"minio_id,omitempty"`
+	// Local signale un storage Local=true (PVC pré-provisionné par oc-datacenter).
+	Local bool `json:"local,omitempty"`
+	// StorageName est le nom normalisé du storage, utilisé pour calculer le claimName.
+	StorageName string `json:"storage_name,omitempty"`
+	// Images est la liste des images de conteneurs à pre-pull sur le peer cible
+	// avant le démarrage du workflow. Vide pour les events STORAGE_RESOURCE.
+	Images []string `json:"images,omitempty"`
 }

+// addPeerImage enregistre une image à pre-pull pour un peer donné.
+// Clé "" désigne le peer local. Les doublons sont ignorés.
+func (b *ArgoBuilder) addPeerImage(peerID, image string) {
+	if image == "" {
+		return
+	}
+	if b.PeerImages == nil {
+		b.PeerImages = make(map[string][]string)
+	}
+	for _, existing := range b.PeerImages[peerID] {
+		if existing == image {
+			return
+		}
+	}
+	b.PeerImages[peerID] = append(b.PeerImages[peerID], image)
+}
+
+
 // CompleteBuild finalise la construction du workflow Argo après la génération
 // du DAG.  Elle effectue dans l'ordre :
 //  1. Pour chaque peer distant (Admiralty) : publie un ArgoKubeEvent de type
@@ -701,30 +781,68 @@ type ArgoKubeEvent struct {
 func (b *ArgoBuilder) CompleteBuild(executionsId string) (string, error) {
 	logger.Info().Msg("DEV :: Completing build")

-	// --- Étape 1 : validation Admiralty pour chaque peer distant ---
-	for _, peer := range b.RemotePeers {
-		logger.Info().Msg(fmt.Sprint("DEV :: Launching Admiralty Setup for ", peer))
-		// Publie l'événement COMPUTE_RESOURCE et attend PB_CONSIDERS (bloquant).
-		waitForConsiders(executionsId, tools.COMPUTE_RESOURCE, ArgoKubeEvent{
-			ExecutionsID: executionsId,
-			Type:         tools.COMPUTE_RESOURCE,
-			DestPeerID:   conf.GetConfig().PeerID,
-			SourcePeerID: peer,
-			OriginID:     conf.GetConfig().PeerID,
-		})
+	// --- Étape 1 : validation kube pour tous les peers (local + distants) ---
+	// Les goroutines tournent en parallèle ; un timeout est une erreur fatale.
+	// Déduplique RemotePeers : plusieurs processings peuvent pointer vers le même
+	// peer distant, on ne doit envoyer qu'un seul ArgoKubeEvent par peer.
+	seen := make(map[string]struct{})
+	uniqueRemotePeers := b.RemotePeers[:0]
+	for _, p := range b.RemotePeers {
+		if _, ok := seen[p]; !ok {
+			seen[p] = struct{}{}
+			uniqueRemotePeers = append(uniqueRemotePeers, p)
+		}
 	}
+	b.RemotePeers = uniqueRemotePeers

-	// --- Étape 2 : mise à jour du nom de cluster Admiralty ---
-	// Le nom final du cluster cible est "target-<peerId>-<executionsId>".
-	for _, template := range b.Workflow.Spec.Templates {
-		if len(template.Metadata.Annotations) > 0 {
-			if peerId, ok := template.Metadata.Annotations["multicluster.admiralty.io/clustername"]; ok {
-				template.Metadata.Annotations["multicluster.admiralty.io/clustername"] = "target-" + tools.GetConcatenatedName(peerId, executionsId)
-			}
+	total := len(b.RemotePeers)
+	if b.HasLocalCompute {
+		total++
+	}
+	var wg sync.WaitGroup
+	errCh := make(chan error, total)
+
+	// Le kube local doit aussi être configuré s'il porte au moins un processing.
+	if b.HasLocalCompute {
+		if localPeer, err := oclib.GetMySelf(); err == nil {
+			logger.Info().Msg("DEV :: Launching local kube setup for " + localPeer.GetID())
+			wg.Add(1)
+			go waitForConsiders(executionsId, tools.COMPUTE_RESOURCE, ArgoKubeEvent{
+				ExecutionsID: executionsId,
+				Type:         tools.COMPUTE_RESOURCE,
+				DestPeerID:   localPeer.GetID(),
+				SourcePeerID: localPeer.GetID(),
+				OriginID:     localPeer.GetID(),
+				Images:       b.PeerImages[""], // images à pre-pull sur le cluster local
+			}, &wg, errCh)
 		}
 	}

-	// --- Étape 3 : génération et écriture du fichier YAML ---
+	// Peers distants via Admiralty.
+	for _, peer := range b.RemotePeers {
+		logger.Info().Msg(fmt.Sprint("DEV :: Launching Admiralty Setup for ", peer))
+		if self, err := oclib.GetMySelf(); err == nil {
+			wg.Add(1)
+			go waitForConsiders(executionsId, tools.COMPUTE_RESOURCE, ArgoKubeEvent{
+				ExecutionsID: executionsId,
+				Type:         tools.COMPUTE_RESOURCE,
+				DestPeerID:   self.GetID(),
+				SourcePeerID: peer,
+				OriginID:     self.GetID(),
+				Images:       b.PeerImages[peer], // images à pre-pull sur le cluster distant (via kubeconfig Admiralty)
+			}, &wg, errCh)
+		}
+
+	}
+	wg.Wait()
+	close(errCh)
+	for err := range errCh {
+		if err != nil {
+			return "", err
+		}
+	}
+
+	// --- Étape 2 : génération et écriture du fichier YAML ---
 	random_name := fakelish.GenerateFakeWord(5, 8) + "-" + fakelish.GenerateFakeWord(5, 8)
 	b.Workflow.Metadata.Name = "oc-monitor-" + random_name
 	logger = oclib.GetLogger()
@@ -734,7 +852,7 @@ func (b *ArgoBuilder) CompleteBuild(executionsId string) (string, error) {
 		return "", err
 	}
 	// Nom de fichier horodaté au format DD_MM_YYYY_hhmmss.
-	current_timestamp := time.Now().Format("02_01_2006_150405")
+	current_timestamp := time.Now().UTC().Format("02_01_2006_150405")
 	file_name := random_name + "_" + current_timestamp + ".yml"
 	workflows_dir := "./argo_workflows/"
 	err = os.WriteFile(workflows_dir+file_name, []byte(yamlified), 0660)