Modified how logging with monitord container is implemented, with simpler logic thanks to the argo client library and k8 client-go for pods' logs

This commit is contained in:
pb
2025-04-17 16:51:29 +02:00
parent 6f7acee2df
commit fb8d994be3
5 changed files with 322 additions and 355 deletions

View File

@@ -3,17 +3,15 @@ package tools
import (
"errors"
"io"
"oc-monitord/models"
"sync"
"k8s.io/apimachinery/pkg/watch"
)
type Tool interface {
CreateArgoWorkflow(path string, ns string) (string, error)
CreateAccessSecret(ns string, login string, password string) (string, error)
LogWorkflow(execID string, namespace string, workflowName string, argoFilePath string, stepMax int, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch,
argoLogs *models.ArgoLogs, seen []string,
logFunc func(argoFilePath string, stepMax int, pipe io.ReadCloser, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch,
argoLogs *models.ArgoLogs, seen []string, wg *sync.WaitGroup)) error
GetArgoWatch(executionId string, wfName string) (watch.Interface, error)
GetPodLogger(ns string, wfName string, podName string) (io.ReadCloser, error)
}
var _service = map[string]func() (Tool, error){

View File

@@ -7,21 +7,18 @@ import (
"fmt"
"io"
"oc-monitord/conf"
"oc-monitord/models"
"oc-monitord/utils"
"os"
"sync"
"time"
"cloud.o-forge.io/core/oc-lib/models/common/enum"
wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1"
"github.com/argoproj/argo-workflows/v3/pkg/client/clientset/versioned"
"github.com/google/uuid"
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/serializer"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
)
@@ -59,73 +56,6 @@ func NewKubernetesTool() (Tool, error) {
}, nil
}
func (k *KubernetesTools) LogWorkflow(execID string, namespace string, workflowName string, argoFilePath string, stepMax int, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs,
seen []string, logFunc func(argoFilePath string, stepMax int, pipe io.ReadCloser, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs, seen []string, wg *sync.WaitGroup)) error {
exec := utils.GetExecution(execID)
if exec == nil {
return errors.New("Could not retrieve workflow ID from execution ID " + execID)
}
if exec.State == enum.DRAFT || exec.State == enum.FAILURE || exec.State == enum.SUCCESS {
l := utils.GetWFLogger("")
l.Error().Msg("The execution's state doesn't meet requirement, state is : " + exec.State.String())
return nil
}
k.logWorkflow(namespace, workflowName, argoFilePath, stepMax, current_watch, previous_watch, argoLogs, seen, logFunc)
return k.LogWorkflow(execID, namespace, workflowName, argoFilePath, stepMax, current_watch, previous_watch, argoLogs, seen, logFunc)
}
func (k *KubernetesTools) logWorkflow(namespace string, workflowName string, argoFilePath string, stepMax int, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs,
seen []string,
logFunc func(argoFilePath string, stepMax int, pipe io.ReadCloser, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs, seen []string, wg *sync.WaitGroup)) error {
// List pods related to the Argo workflow
fmt.Println("\n!!!!!!!! !!!!!!!!!! !!!!!!!! &&&& & STARTING LOG\n\n")
labelSelector := fmt.Sprintf("workflows.argoproj.io/workflow=%s", workflowName)
for retries := 0; retries < 10; retries++ { // Retry for up to ~20 seconds
// List workflow pods
wfPods, err := k.Set.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{
LabelSelector: labelSelector,
})
if err != nil {
return err
}
// If we found pods, stream logs
if len(wfPods.Items) > 0 {
var wg sync.WaitGroup
// Stream logs from all matching pods
for _, pod := range wfPods.Items {
for _, container := range pod.Spec.Containers {
wg.Add(1)
go k.streamLogs(namespace, pod.Name, container.Name, argoFilePath, stepMax, &wg, current_watch, previous_watch, argoLogs, seen, logFunc)
}
}
wg.Wait()
return nil
}
time.Sleep(2 * time.Second) // Wait before retrying
}
return errors.New("no pods found for the workflow")
}
// Function to stream logs
func (k *KubernetesTools) streamLogs(namespace string, podName string, containerName string,
argoFilePath string, stepMax int, wg *sync.WaitGroup, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs, seen []string,
logFunc func(argo_file_path string, stepMax int, pipe io.ReadCloser, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs, seen []string, wg *sync.WaitGroup)) {
req := k.Set.CoreV1().Pods(namespace).GetLogs(podName, &corev1.PodLogOptions{
Container: containerName, // Main container
Follow: true, // Equivalent to -f flag in kubectl logs
})
defer wg.Done()
// Open stream
stream, err := req.Stream(context.Background())
if err != nil {
return
}
defer stream.Close()
var internalWg sync.WaitGroup
logFunc(argoFilePath, stepMax, stream, current_watch, previous_watch, argoLogs, seen, &internalWg)
internalWg.Wait()
}
func (k *KubernetesTools) CreateArgoWorkflow(path string, ns string) (string, error) {
// Read workflow YAML file
workflowYAML, err := os.ReadFile(path)
@@ -149,7 +79,7 @@ func (k *KubernetesTools) CreateArgoWorkflow(path string, ns string) (string, er
}
// Create the workflow in the "argo" namespace
createdWf, err := k.VersionedSet.ArgoprojV1alpha1().Workflows(ns).Create(context.Background(), workflow, metav1.CreateOptions{})
createdWf, err := k.VersionedSet.ArgoprojV1alpha1().Workflows(ns).Create(context.TODO(), workflow, metav1.CreateOptions{})
if err != nil {
return "", errors.New("failed to create workflow: " + err.Error())
}
@@ -177,9 +107,81 @@ func (k *KubernetesTools) CreateAccessSecret(ns string, login string, password s
Data: secretData,
}
// Create the Secret in Kubernetes
_, err := k.Set.CoreV1().Secrets(namespace).Create(context.Background(), secret, metav1.CreateOptions{})
_, err := k.Set.CoreV1().Secrets(namespace).Create(context.TODO(), secret, metav1.CreateOptions{})
if err != nil {
return "", errors.New("Error creating secret: " + err.Error())
}
return name, nil
}
func (k *KubernetesTools) GetArgoWatch(executionId string, wfName string) (watch.Interface, error){
wfl := utils.GetWFLogger("")
wfl.Debug().Msg("Starting argo watch with argo lib")
fmt.Println("metadata.name=oc-monitor-"+wfName + " in namespace : " + executionId)
options := metav1.ListOptions{FieldSelector: "metadata.name=oc-monitor-"+wfName}
fmt.Println(options)
watcher, err := k.VersionedSet.ArgoprojV1alpha1().Workflows(executionId).Watch(context.TODO(), options)
if err != nil {
return nil, errors.New("Error executing 'argo watch " + wfName + " -n " + executionId + " with ArgoprojV1alpha1 client")
}
return watcher, nil
}
func (k *KubernetesTools) GetPodLogger(ns string, wfName string, nodeName string) (io.ReadCloser, error) {
var targetPod v1.Pod
pods, err := k.Set.CoreV1().Pods(ns).List(context.Background(), metav1.ListOptions{
LabelSelector: "workflows.argoproj.io/workflow="+wfName,
})
if err != nil {
return nil, fmt.Errorf("failed to list pods: " + err.Error())
}
if len(pods.Items) == 0 {
return nil, fmt.Errorf("no pods found with label workflows.argoproj.io/node-name=" + nodeName)
}
for _, pod := range pods.Items {
if pod.Annotations["workflows.argoproj.io/node-name"] == nodeName {
targetPod = pod
}
}
// k8s API throws an error if we try getting logs while the container are not initialized, so we repeat status check there
k.testPodReady(targetPod, ns)
// When using kubec logs for a pod we see it contacts /api/v1/namespaces/NAMESPACE/pods/oc-monitor-PODNAME/log?container=main so we add this container: main to the call
req, err := k.Set.CoreV1().Pods(ns).GetLogs(targetPod.Name, &v1.PodLogOptions{Follow: true, Container: "main"}). Stream(context.Background())
if err != nil {
return nil, fmt.Errorf(" Error when trying to get logs for " + targetPod.Name + " : " + err.Error())
}
return req, nil
}
func (k *KubernetesTools) testPodReady(pod v1.Pod, ns string) {
for {
pod, err := k.Set.CoreV1().Pods(ns).Get(context.Background(), pod.Name, metav1.GetOptions{})
if err != nil {
wfl := utils.GetWFLogger("")
wfl.Error().Msg("Error fetching pod: " + err.Error() + "\n")
break
}
var initialized bool
for _, cond := range pod.Status.Conditions {
if cond.Type == v1.PodReady && cond.Status == v1.ConditionTrue {
initialized = true
return
}
}
if initialized {
return
}
time.Sleep(2 * time.Second) // avoid hammering the API
}
}