Corrected how pods generated by the argo workflow are logged

2025-05-20 09:25:54 +02:00 · 2025-05-20 09:25:54 +02:00 · bdbbd7697a
commit bdbbd7697a
parent 483f747754 6917295fbd
2 changed files with 24 additions and 9 deletions
--- a/logger/argo_logs.go
+++ b/logger/argo_logs.go
@ -7,6 +7,8 @@ import (
 	"oc-monitord/tools"
 	"oc-monitord/utils"
 	"slices"
+	"strings"
+	"sync"
 	"time"

 	"github.com/rs/zerolog"
@ -114,6 +116,8 @@ func LogKubernetesArgo(wfName string, namespace string, watcher watch.Interface)
 	wfl := utils.GetWFLogger("")
 	wfl.Debug().Msg("Starting to log " + wfName)

+	var wg sync.WaitGroup
+	
 	for event := range (watcher.ResultChan()) {
 		wf, ok := event.Object.(*wfv1.Workflow)
 		if !ok {
@ -164,7 +168,9 @@ func LogKubernetesArgo(wfName string, namespace string, watcher watch.Interface)
 			if !slices.Contains(pods,pod.Name){
 				pl := wfl.With().Str("pod",  pod.Name).Logger()
 				if wfName == pod.Name { pods = append(pods, pod.Name); continue }	// One of the node is the Workflow, the others are the pods so don't try to log on the wf name
-				go logKubernetesPods(namespace, wfName, pod.Name, pl)
+				pl.Info().Msg("Found a new pod to log : "  + pod.Name)
+				wg.Add(1)
+				go logKubernetesPods(namespace, wfName, pod.Name, pl, &wg)
 				pods = append(pods, pod.Name)
 			} 
 		}
@ -172,6 +178,7 @@ func LogKubernetesArgo(wfName string, namespace string, watcher watch.Interface)
 		// Stop listening to the chan when the Workflow is completed or something bad happened
 		if node.Phase.Completed() {
 			wfl.Info().Msg(wfName + " worflow completed")
+			wg.Wait()
 			break
 		}
 		if node.Phase.FailedOrError() {
@ -197,7 +204,12 @@ func retrieveCondition(wf *wfv1.Workflow) (c Conditions) {
 }

 // Function needed to be executed as a go thread 
-func logKubernetesPods(executionId string, wfName string,podName string, logger zerolog.Logger){
+func logKubernetesPods(executionId string, wfName string,podName string, logger zerolog.Logger, wg *sync.WaitGroup){
+	
+	s := strings.Split(podName, ".")
+	name := s[0] + "-" + s[1]
+	step := s[1]
+	
 	k, err := tools.NewKubernetesTool()
 	if err != nil {
 		logger.Error().Msg("Could not get Kubernetes tools")
@ -213,8 +225,9 @@ func logKubernetesPods(executionId string, wfName string,podName string, logger
 	scanner := bufio.NewScanner(reader)
 	for scanner.Scan() {
 		log := scanner.Text()
-		podLog := NewArgoPodLog(wfName,podName,log)
+		podLog := NewArgoPodLog(name,step,log)
 		jsonified, _ := json.Marshal(podLog)
 		logger.Info().Msg(string(jsonified))
 	}
+	
 }
--- a/tools/kubernetes.go
+++ b/tools/kubernetes.go
@ -132,6 +132,7 @@ func (k *KubernetesTools) GetArgoWatch(executionId string, wfName string) (watch
 func (k *KubernetesTools) GetPodLogger(ns string, wfName string, nodeName string) (io.ReadCloser, error) {
 	var targetPod v1.Pod

+	
 	pods, err := k.Set.CoreV1().Pods(ns).List(context.Background(), metav1.ListOptions{
 		LabelSelector: "workflows.argoproj.io/workflow="+wfName,
    })
@ -172,7 +173,8 @@ func (k *KubernetesTools) testPodReady(pod v1.Pod, ns string) {
 	
 		var initialized bool
 		for _, cond := range pod.Status.Conditions {
-			if cond.Type == v1.PodReady && cond.Status == v1.ConditionTrue {
+			// It seems that for remote pods the pod gets the Succeeded status before it has time to display the it is ready to run in .status.conditions,so we added the OR condition
+			if (cond.Type == v1.PodReady && cond.Status == v1.ConditionTrue) || pod.Status.Phase == v1.PodSucceeded {		
 				initialized = true
 				return
 			}