Modified how logging with monitord container is implemented, with simpler logic thanks to the argo client library and k8 client-go for pods' logs

This commit is contained in:
pb 2025-04-17 16:51:29 +02:00
parent 6f7acee2df
commit fb8d994be3
5 changed files with 322 additions and 355 deletions

219
logger/argo_logs.go Normal file
View File

@ -0,0 +1,219 @@
package logger
import (
"bufio"
"encoding/json"
"fmt"
"oc-monitord/tools"
"oc-monitord/utils"
"slices"
"time"
"github.com/rs/zerolog"
"k8s.io/apimachinery/pkg/watch"
wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1"
)
// An object to monitor the logs generated by a specific pod from a workflow execution
type ArgoWatch struct {
Name string
Namespace string
Status string
Conditions
Created string
Started string
Duration string
Progress string
Logs []string
}
type Conditions struct {
PodRunning bool
Completed bool
}
func (a *ArgoWatch) Equals(arg *ArgoWatch) bool {
if arg == nil {
return false
}
return a.Status == arg.Status && a.Progress == arg.Progress && a.Conditions.PodRunning == arg.Conditions.PodRunning && a.Conditions.Completed == arg.Conditions.Completed
}
func NewArgoLogs(name string, namespace string, stepMax int) *ArgoLogs {
return &ArgoLogs{
Name: "oc-monitor-" + name,
Namespace: namespace,
CreatedDate: time.Now().Format("2006-01-02 15:04:05"),
StepCount: 0,
StepMax: stepMax,
stop: false,
Seen: []string{},
}
}
// An object to monitor and log the output of an argo submit
type ArgoLogs struct {
Name string
Namespace string
CreatedDate string
StepCount int
StepMax int
stop bool
Started time.Time
Seen []string
Logs []string
IsStreaming bool
}
func (a *ArgoLogs) NewWatch() *ArgoWatch {
return &ArgoWatch{
Name: a.Name,
Namespace: a.Namespace,
Status: "Pending",
Created: a.CreatedDate,
Started: a.Started.Format("2006-01-02 15:04:05"),
Conditions: Conditions{
PodRunning: a.StepCount > 0 && a.StepCount < a.StepMax,
Completed: a.StepCount == a.StepMax,
},
Progress: fmt.Sprintf("%v/%v", a.StepCount, a.StepMax),
Duration: "0s",
Logs: []string{},
}
}
func (a *ArgoLogs) StartStepRecording(current_watch *ArgoWatch, logger zerolog.Logger) {
jsonified, _ := json.Marshal(current_watch)
logger.Info().Msg(string(jsonified))
a.StepCount += 1
a.Started = time.Now()
}
type ArgoPodLog struct {
PodName string
Step string
Message string
}
func NewArgoPodLog(name string, step string, msg string) ArgoPodLog {
return ArgoPodLog{
PodName: name,
Step: step,
Message: msg,
}
}
func LogKubernetesArgo(wfName string, executionID string, watcher watch.Interface) {
var argoWatcher *ArgoWatch
var pods []string
var node wfv1.NodeStatus
wfl := utils.GetWFLogger("")
for event := range (watcher.ResultChan()) {
wf, ok := event.Object.(*wfv1.Workflow)
if !ok {
wfl.Error().Msg("unexpected type")
continue
}
if len(wf.Status.Nodes) == 0 {
wfl.Debug().Msg("No node status yet") // The first output of the channel doesn't contain Nodes so we skip it
continue
}
conditions := retrieveCondition(wf)
// Retrieving the Status for the main node, which is named after the workflow
if node, ok = wf.Status.Nodes[wfName]; !ok {
bytified, _ := json.MarshalIndent(wf.Status.Nodes,"","\t")
wfl.Fatal().Msg("Could not find the " + wfName + " node in \n" + string(bytified))
}
now := time.Now()
start, _ := time.Parse(time.RFC3339, node.StartedAt.String() )
duration := now.Sub(start)
newWatcher := ArgoWatch{
Name: node.Name,
Namespace: executionID,
Status: string(node.Phase),
Created: node.StartedAt.String(),
Started: node.StartedAt.String(),
Progress: string(node.Progress),
Duration: duration.String(),
Conditions: conditions,
}
if argoWatcher == nil {
argoWatcher = &newWatcher
}
if !newWatcher.Equals(argoWatcher){
jsonified, _ := json.Marshal(newWatcher)
wfl.Info().Msg(string(jsonified))
argoWatcher = &newWatcher
}
// I don't think we need to use WaitGroup here, because the loop itself
// acts as blocking process for the main thread, because Argo watch never closes the channel
for _, pod := range wf.Status.Nodes{
if !slices.Contains(pods,pod.Name){
pl := wfl.With().Str("pod", pod.Name).Logger()
if wfName == pod.Name { pods = append(pods, pod.Name); continue } // One of the node is the Workflow, the others are the pods so don't try to log on the wf name
go logKubernetesPods(executionID, wfName, pod.Name, pl)
pods = append(pods, pod.Name)
}
}
// Stop listening to the chan when the Workflow is completed or something bad happened
if node.Phase.Completed() {
wfl.Info().Msg(wfName + " worflow completed")
break
}
if node.Phase.FailedOrError() {
wfl.Error().Msg(wfName + "has failed, please refer to the logs")
wfl.Error().Msg(node.Message)
break
}
}
}
func retrieveCondition(wf *wfv1.Workflow) (c Conditions) {
for _, cond := range wf.Status.Conditions {
if cond.Type == "PodRunning" {
c.PodRunning = cond.Status == "True"
}
if cond.Type == "Completed" {
c.Completed = cond.Status == "True"
}
}
return
}
// Function needed to be executed as a go thread
func logKubernetesPods(executionId string, wfName string,podName string, logger zerolog.Logger){
k, err := tools.NewKubernetesTool()
if err != nil {
logger.Error().Msg("Could not get Kubernetes tools")
return
}
reader, err := k.GetPodLogger(executionId, wfName, podName)
if err != nil {
logger.Error().Msg(err.Error())
return
}
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
log := scanner.Text()
podLog := NewArgoPodLog(wfName,podName,log)
jsonified, _ := json.Marshal(podLog)
logger.Info().Msg(string(jsonified))
}
}

137
main.go
View File

@ -3,19 +3,17 @@ package main
import ( import (
"bufio" "bufio"
"encoding/base64" "encoding/base64"
"encoding/json"
"fmt" "fmt"
"io" "io"
"os" "os"
"os/exec" "os/exec"
"regexp" "regexp"
"slices"
"strings" "strings"
"sync" "sync"
"time" "time"
"oc-monitord/conf" "oc-monitord/conf"
"oc-monitord/models" l "oc-monitord/logger"
u "oc-monitord/utils" u "oc-monitord/utils"
"oc-monitord/workflow_builder" "oc-monitord/workflow_builder"
@ -45,7 +43,6 @@ import (
var logger zerolog.Logger var logger zerolog.Logger
var wf_logger zerolog.Logger var wf_logger zerolog.Logger
var pods_logger zerolog.Logger
var parser argparse.Parser var parser argparse.Parser
var workflowName string var workflowName string
@ -90,7 +87,7 @@ func main() {
logger.Error().Msg("Could not retrieve workflow " + conf.GetConfig().WorkflowID + " from oc-catalog API") logger.Error().Msg("Could not retrieve workflow " + conf.GetConfig().WorkflowID + " from oc-catalog API")
} }
builder, stepMax, err := new_wf.ExportToArgo(exec.ExecutionsID, conf.GetConfig().Timeout) builder, _, err := new_wf.ExportToArgo(exec.ExecutionsID, conf.GetConfig().Timeout) // Removed stepMax so far, I don't know if we need it anymore
if err != nil { if err != nil {
logger.Error().Msg("Could not create the Argo file for " + conf.GetConfig().WorkflowID) logger.Error().Msg("Could not create the Argo file for " + conf.GetConfig().WorkflowID)
logger.Error().Msg(err.Error()) logger.Error().Msg(err.Error())
@ -105,22 +102,21 @@ func main() {
wf_logger := u.GetWFLogger(workflowName) wf_logger := u.GetWFLogger(workflowName)
wf_logger.Debug().Msg("Testing argo name") wf_logger.Debug().Msg("Testing argo name")
_ = stepMax
if conf.GetConfig().KubeHost == "" { if conf.GetConfig().KubeHost == "" {
// Not in a k8s environment, get conf from parameters // Not in a k8s environment, get conf from parameters
fmt.Println("Executes outside of k8s") fmt.Println("Executes outside of k8s")
executeOutside(argoFilePath, stepMax, builder.Workflow) executeOutside(argoFilePath, builder.Workflow)
} else { } else {
// Executed in a k8s environment // Executed in a k8s environment
fmt.Println("Executes inside a k8s") fmt.Println("Executes inside a k8s")
// executeInside(exec.GetID(), "argo", argo_file_path, stepMax) // commenting to use conf.ExecutionID instead of exec.GetID() // executeInside(exec.GetID(), "argo", argo_file_path, stepMax) // commenting to use conf.ExecutionID instead of exec.GetID()
executeInside(conf.GetConfig().ExecutionID, conf.GetConfig().ExecutionID, argoFilePath, stepMax) executeInside(conf.GetConfig().ExecutionID, conf.GetConfig().ExecutionID, argoFilePath)
} }
} }
// So far we only log the output from // So far we only log the output from
func executeInside(execID string, ns string, argo_file_path string, stepMax int) { func executeInside(execID string, ns string, argo_file_path string) {
t, err := tools2.NewService(conf.GetConfig().Mode) t, err := tools2.NewService(conf.GetConfig().Mode)
if err != nil { if err != nil {
logger.Error().Msg("Could not create KubernetesTool") logger.Error().Msg("Could not create KubernetesTool")
@ -128,14 +124,20 @@ func executeInside(execID string, ns string, argo_file_path string, stepMax int)
} }
name, err := t.CreateArgoWorkflow(argo_file_path, ns) name, err := t.CreateArgoWorkflow(argo_file_path, ns)
_ = name
if err != nil { if err != nil {
logger.Error().Msg("Could not create argo workflow : " + err.Error()) logger.Error().Msg("Could not create argo workflow : " + err.Error())
fmt.Println("CA :" + conf.GetConfig().KubeCA)
fmt.Println("Cert :" + conf.GetConfig().KubeCert)
fmt.Println("Data :" + conf.GetConfig().KubeData)
return return
} else { } else {
argoLogs := models.NewArgoLogs(workflowName, "argo", stepMax) watcher, err := t.GetArgoWatch(execID, workflowName)
argoLogs.StartStepRecording(argoLogs.NewWatch(), wf_logger) if err != nil {
err := t.LogWorkflow(execID, ns, name, argo_file_path, stepMax, argoLogs.NewWatch(), argoLogs.NewWatch(), argoLogs, []string{}, logWorkflow) logger.Error().Msg("Could not retrieve Watcher : " + err.Error())
}
l.LogKubernetesArgo(name, execID, watcher)
if err != nil { if err != nil {
logger.Error().Msg("Could not log workflow : " + err.Error()) logger.Error().Msg("Could not log workflow : " + err.Error())
} }
@ -143,11 +145,9 @@ func executeInside(execID string, ns string, argo_file_path string, stepMax int)
} }
func executeOutside(argo_file_path string, stepMax int, workflow workflow_builder.Workflow) { func executeOutside(argo_file_path string, workflow workflow_builder.Workflow) {
// var stdoutSubmit, stderrSubmit, stdout_logs, stderr_logs io.ReadCloser
var stdoutSubmit, stderrSubmit io.ReadCloser var stdoutSubmit, stderrSubmit io.ReadCloser
var stdoutLogs, stderrLogs io.ReadCloser var stdoutLogs, stderrLogs io.ReadCloser
// var stderr io.ReadCloser
var wg sync.WaitGroup var wg sync.WaitGroup
var err error var err error
@ -159,28 +159,19 @@ func executeOutside(argo_file_path string, stepMax int, workflow workflow_builde
return return
} }
// //======== Code block that implemented a method that logs both locally and container executed wf
// // Need to be improved, did not log well for local executions
// split := strings.Split(argo_file_path, "_")
// argoLogs := models.NewArgoLogs(split[0], conf.GetConfig().ExecutionID, stepMax)
// argoLogs.StartStepRecording(argoLogs.NewWatch(), wf_logger)
// argoLogs.IsStreaming = true // Used to determine wether or not the logs are read from a docker container or on localhost
// // go logWorkflow(argo_file_path, stepMax, stdout, argoLogs.NewWatch(), argoLogs.NewWatch(), argoLogs, []string{}, &wg)
// // =======
var steps []string
for _, template := range workflow.Spec.Templates {
steps = append(steps, template.Name)
}
cmdLogs := exec.Command("argo", "logs", "oc-monitor-"+workflowName, "-n", conf.GetConfig().ExecutionID, "--follow","--no-color") cmdLogs := exec.Command("argo", "logs", "oc-monitor-"+workflowName, "-n", conf.GetConfig().ExecutionID, "--follow","--no-color")
if stdoutLogs, err = cmdLogs.StdoutPipe(); err != nil { if stdoutLogs, err = cmdLogs.StdoutPipe(); err != nil {
wf_logger.Error().Msg("Could not retrieve stdoutpipe for 'argo logs'" + err.Error()) wf_logger.Error().Msg("Could not retrieve stdoutpipe for 'argo logs'" + err.Error())
return return
} }
go models.LogLocalWorkflow(workflowName, stdoutSubmit, &wg) var steps []string
go models.LogPods(workflowName, stdoutLogs, steps, &wg) for _, template := range workflow.Spec.Templates {
steps = append(steps, template.Name)
}
go l.LogLocalWorkflow(workflowName, stdoutSubmit, &wg)
go l.LogLocalPod(workflowName, stdoutLogs, steps, &wg)
fmt.Println("Starting argo submit") fmt.Println("Starting argo submit")
if err := cmdSubmit.Start(); err != nil { if err := cmdSubmit.Start(); err != nil {
@ -209,68 +200,6 @@ func executeOutside(argo_file_path string, stepMax int, workflow workflow_builde
wg.Wait() wg.Wait()
} }
// !!!! BUGGED !!!!
// Should be refactored to create a function dedicated to logging output from execution in a container
// LogLocalWorkflow() has been implemented to be used when oc-monitord is executed locally
// We could improve this function by creating an object with the same attribute as the output
// and only send a new log if the current object has different values than the previous
func logWorkflow(argo_file_path string, stepMax int, pipe io.ReadCloser,
current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch,
argoLogs *models.ArgoLogs, seen []string, wg *sync.WaitGroup) {
scanner := bufio.NewScanner(pipe)
count := 0
see := ""
seeit := 0
for scanner.Scan() {
log := scanner.Text()
if strings.Contains(log, "capturing logs") && count == 0 {
if !argoLogs.IsStreaming {
wg.Add(1)
}
seeit++
} else if count == 0 && !argoLogs.IsStreaming {
break
}
if count == 1 {
see = log
if slices.Contains(argoLogs.Seen, see) && !argoLogs.IsStreaming {
wg.Done()
seeit--
break
}
}
if !slices.Contains(current_watch.Logs, log) {
current_watch.Logs = append(current_watch.Logs, strings.ReplaceAll(log, "\"", ""))
}
count++
if strings.Contains(log, "sub-process exited") || argoLogs.IsStreaming {
current_watch = argoLogs.StopStepRecording(current_watch)
argoLogs.Seen = append(argoLogs.Seen, see)
if checkStatus(current_watch, previous_watch, argoLogs) {
count = 0
if !argoLogs.IsStreaming {
wg.Done()
}
seeit--
}
jsonified, err := json.Marshal(current_watch)
if err != nil {
logger.Error().Msg("Could not create watch log")
}
if current_watch.Status == "Failed" {
wf_logger.Error().Msg(string(jsonified))
} else {
wf_logger.Info().Msg(string(jsonified))
}
previous_watch = current_watch
current_watch = &models.ArgoWatch{}
if argoLogs.IsStreaming {
current_watch.Logs = []string{}
}
}
}
}
func loadConfig(is_k8s bool, parser *argparse.Parser) { func loadConfig(is_k8s bool, parser *argparse.Parser) {
var o *onion.Onion var o *onion.Onion
@ -375,26 +304,6 @@ func getContainerName(argo_file string) string {
return container_name return container_name
} }
// Uses the ArgoWatch object to update status of the workflow execution object
func checkStatus(current *models.ArgoWatch, previous *models.ArgoWatch, argoLogs *models.ArgoLogs) bool {
if previous == nil || current.Status != previous.Status || argoLogs.IsStreaming {
argoLogs.StepCount += 1
if len(current.Logs) > 0 {
newLogs := []string{}
for _, log := range current.Logs {
if !slices.Contains(argoLogs.Logs, log) {
newLogs = append(newLogs, log)
}
}
updateStatus(current.Status, strings.Join(newLogs, "\n"))
current.Logs = newLogs
argoLogs.Logs = append(argoLogs.Logs, newLogs...)
} else {
updateStatus(current.Status, "")
}
}
return previous == nil || current.Status != previous.Status || argoLogs.IsStreaming
}
func updateStatus(status string, log string) { func updateStatus(status string, log string) {
exec_id := conf.GetConfig().ExecutionID exec_id := conf.GetConfig().ExecutionID

View File

@ -1,161 +0,0 @@
package models
import (
"encoding/json"
"fmt"
"strconv"
"strings"
"time"
"github.com/acarl005/stripansi"
"github.com/rs/zerolog"
)
// An object to monitor the logs generated by a specific pod from a workflow execution
type ArgoWatch struct {
Name string
Namespace string
Status string
Conditions
Created string
Started string
Duration string
Progress string
Logs []string
}
type Conditions struct {
PodRunning bool
Completed bool
}
func (a *ArgoWatch) Equals(arg *ArgoWatch) bool {
if arg == nil {
return false
}
return a.Status == arg.Status && a.Progress == arg.Progress && a.Conditions.PodRunning == arg.Conditions.PodRunning && a.Conditions.Completed == arg.Conditions.Completed
}
func NewArgoLogs(name string, namespace string, stepMax int) *ArgoLogs {
return &ArgoLogs{
Name: "oc-monitor-" + name,
Namespace: namespace,
CreatedDate: time.Now().Format("2006-01-02 15:04:05"),
StepCount: 0,
StepMax: stepMax,
stop: false,
Seen: []string{},
}
}
// An object to monitor and log the output of an argo submit
type ArgoLogs struct {
Name string
Namespace string
CreatedDate string
StepCount int
StepMax int
stop bool
Started time.Time
Seen []string
Logs []string
IsStreaming bool
}
func (a *ArgoLogs) NewWatch() *ArgoWatch {
return &ArgoWatch{
Name: a.Name,
Namespace: a.Namespace,
Status: "Pending",
Created: a.CreatedDate,
Started: a.Started.Format("2006-01-02 15:04:05"),
Conditions: Conditions{
PodRunning: a.StepCount > 0 && a.StepCount < a.StepMax,
Completed: a.StepCount == a.StepMax,
},
Progress: fmt.Sprintf("%v/%v", a.StepCount, a.StepMax),
Duration: "0s",
Logs: []string{},
}
}
func (a *ArgoLogs) StartStepRecording(current_watch *ArgoWatch, logger zerolog.Logger) {
jsonified, _ := json.Marshal(current_watch)
logger.Info().Msg(string(jsonified))
a.StepCount += 1
a.Started = time.Now()
}
func (a *ArgoLogs) StopStepRecording(current *ArgoWatch) *ArgoWatch {
fn := strings.Split(a.Name, "_")
logs := []string{}
err := false
end := ""
for _, input := range current.Logs {
line := strings.TrimSpace(input)
if line == "" || !strings.Contains(line, fn[0]) || !strings.Contains(line, ":") {
continue
}
step := strings.Split(line, ":")
if strings.Contains(line, "sub-process exited") {
b := strings.Split(line, "time=\"")
if len(b) > 1 {
end = b[1][:19]
}
}
if len(step) < 2 || strings.Contains(line, "time=") || strings.TrimSpace(strings.Join(step[1:], " : ")) == "" || strings.TrimSpace(strings.Join(step[1:], " : ")) == a.Name {
continue
}
log := stripansi.Strip(strings.TrimSpace(strings.Join(step[1:], " : ")))
t, e := strconv.Unquote(log)
if e == nil {
logs = append(logs, t)
} else {
logs = append(logs, strings.ReplaceAll(log, "\"", "`"))
}
if strings.Contains(logs[len(logs)-1], "Error") {
err = true
}
}
status := "Pending"
if a.StepCount > 0 {
status = "Running"
}
if a.StepCount == a.StepMax {
if err {
status = "Failed"
} else {
status = "Succeeded"
}
}
duration := float64(0)
if end != "" {
timeE, _ := time.Parse("2006-01-02T15:04:05", end)
duration = timeE.Sub(a.Started).Seconds()
}
current.Conditions = Conditions{
PodRunning: a.StepCount > 0 && a.StepCount < a.StepMax,
Completed: a.StepCount == a.StepMax,
}
current.Progress = fmt.Sprintf("%v/%v", a.StepCount, a.StepMax)
current.Duration = fmt.Sprintf("%v", fmt.Sprintf("%.2f", duration)+"s")
current.Status = status
return current
}
type ArgoPodLog struct {
PodName string
Step string
Message string
}
func NewArgoPodLog(name string, step string, msg string) ArgoPodLog {
return ArgoPodLog{
PodName: name,
Step: step,
Message: msg,
}
}

View File

@ -3,17 +3,15 @@ package tools
import ( import (
"errors" "errors"
"io" "io"
"oc-monitord/models"
"sync" "k8s.io/apimachinery/pkg/watch"
) )
type Tool interface { type Tool interface {
CreateArgoWorkflow(path string, ns string) (string, error) CreateArgoWorkflow(path string, ns string) (string, error)
CreateAccessSecret(ns string, login string, password string) (string, error) CreateAccessSecret(ns string, login string, password string) (string, error)
LogWorkflow(execID string, namespace string, workflowName string, argoFilePath string, stepMax int, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, GetArgoWatch(executionId string, wfName string) (watch.Interface, error)
argoLogs *models.ArgoLogs, seen []string, GetPodLogger(ns string, wfName string, podName string) (io.ReadCloser, error)
logFunc func(argoFilePath string, stepMax int, pipe io.ReadCloser, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch,
argoLogs *models.ArgoLogs, seen []string, wg *sync.WaitGroup)) error
} }
var _service = map[string]func() (Tool, error){ var _service = map[string]func() (Tool, error){

View File

@ -7,21 +7,18 @@ import (
"fmt" "fmt"
"io" "io"
"oc-monitord/conf" "oc-monitord/conf"
"oc-monitord/models"
"oc-monitord/utils" "oc-monitord/utils"
"os" "os"
"sync"
"time" "time"
"cloud.o-forge.io/core/oc-lib/models/common/enum"
wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1" wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1"
"github.com/argoproj/argo-workflows/v3/pkg/client/clientset/versioned" "github.com/argoproj/argo-workflows/v3/pkg/client/clientset/versioned"
"github.com/google/uuid" "github.com/google/uuid"
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1" v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/serializer" "k8s.io/apimachinery/pkg/runtime/serializer"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest" "k8s.io/client-go/rest"
) )
@ -59,73 +56,6 @@ func NewKubernetesTool() (Tool, error) {
}, nil }, nil
} }
func (k *KubernetesTools) LogWorkflow(execID string, namespace string, workflowName string, argoFilePath string, stepMax int, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs,
seen []string, logFunc func(argoFilePath string, stepMax int, pipe io.ReadCloser, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs, seen []string, wg *sync.WaitGroup)) error {
exec := utils.GetExecution(execID)
if exec == nil {
return errors.New("Could not retrieve workflow ID from execution ID " + execID)
}
if exec.State == enum.DRAFT || exec.State == enum.FAILURE || exec.State == enum.SUCCESS {
l := utils.GetWFLogger("")
l.Error().Msg("The execution's state doesn't meet requirement, state is : " + exec.State.String())
return nil
}
k.logWorkflow(namespace, workflowName, argoFilePath, stepMax, current_watch, previous_watch, argoLogs, seen, logFunc)
return k.LogWorkflow(execID, namespace, workflowName, argoFilePath, stepMax, current_watch, previous_watch, argoLogs, seen, logFunc)
}
func (k *KubernetesTools) logWorkflow(namespace string, workflowName string, argoFilePath string, stepMax int, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs,
seen []string,
logFunc func(argoFilePath string, stepMax int, pipe io.ReadCloser, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs, seen []string, wg *sync.WaitGroup)) error {
// List pods related to the Argo workflow
fmt.Println("\n!!!!!!!! !!!!!!!!!! !!!!!!!! &&&& & STARTING LOG\n\n")
labelSelector := fmt.Sprintf("workflows.argoproj.io/workflow=%s", workflowName)
for retries := 0; retries < 10; retries++ { // Retry for up to ~20 seconds
// List workflow pods
wfPods, err := k.Set.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{
LabelSelector: labelSelector,
})
if err != nil {
return err
}
// If we found pods, stream logs
if len(wfPods.Items) > 0 {
var wg sync.WaitGroup
// Stream logs from all matching pods
for _, pod := range wfPods.Items {
for _, container := range pod.Spec.Containers {
wg.Add(1)
go k.streamLogs(namespace, pod.Name, container.Name, argoFilePath, stepMax, &wg, current_watch, previous_watch, argoLogs, seen, logFunc)
}
}
wg.Wait()
return nil
}
time.Sleep(2 * time.Second) // Wait before retrying
}
return errors.New("no pods found for the workflow")
}
// Function to stream logs
func (k *KubernetesTools) streamLogs(namespace string, podName string, containerName string,
argoFilePath string, stepMax int, wg *sync.WaitGroup, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs, seen []string,
logFunc func(argo_file_path string, stepMax int, pipe io.ReadCloser, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs, seen []string, wg *sync.WaitGroup)) {
req := k.Set.CoreV1().Pods(namespace).GetLogs(podName, &corev1.PodLogOptions{
Container: containerName, // Main container
Follow: true, // Equivalent to -f flag in kubectl logs
})
defer wg.Done()
// Open stream
stream, err := req.Stream(context.Background())
if err != nil {
return
}
defer stream.Close()
var internalWg sync.WaitGroup
logFunc(argoFilePath, stepMax, stream, current_watch, previous_watch, argoLogs, seen, &internalWg)
internalWg.Wait()
}
func (k *KubernetesTools) CreateArgoWorkflow(path string, ns string) (string, error) { func (k *KubernetesTools) CreateArgoWorkflow(path string, ns string) (string, error) {
// Read workflow YAML file // Read workflow YAML file
workflowYAML, err := os.ReadFile(path) workflowYAML, err := os.ReadFile(path)
@ -149,7 +79,7 @@ func (k *KubernetesTools) CreateArgoWorkflow(path string, ns string) (string, er
} }
// Create the workflow in the "argo" namespace // Create the workflow in the "argo" namespace
createdWf, err := k.VersionedSet.ArgoprojV1alpha1().Workflows(ns).Create(context.Background(), workflow, metav1.CreateOptions{}) createdWf, err := k.VersionedSet.ArgoprojV1alpha1().Workflows(ns).Create(context.TODO(), workflow, metav1.CreateOptions{})
if err != nil { if err != nil {
return "", errors.New("failed to create workflow: " + err.Error()) return "", errors.New("failed to create workflow: " + err.Error())
} }
@ -177,9 +107,81 @@ func (k *KubernetesTools) CreateAccessSecret(ns string, login string, password s
Data: secretData, Data: secretData,
} }
// Create the Secret in Kubernetes // Create the Secret in Kubernetes
_, err := k.Set.CoreV1().Secrets(namespace).Create(context.Background(), secret, metav1.CreateOptions{}) _, err := k.Set.CoreV1().Secrets(namespace).Create(context.TODO(), secret, metav1.CreateOptions{})
if err != nil { if err != nil {
return "", errors.New("Error creating secret: " + err.Error()) return "", errors.New("Error creating secret: " + err.Error())
} }
return name, nil return name, nil
} }
func (k *KubernetesTools) GetArgoWatch(executionId string, wfName string) (watch.Interface, error){
wfl := utils.GetWFLogger("")
wfl.Debug().Msg("Starting argo watch with argo lib")
fmt.Println("metadata.name=oc-monitor-"+wfName + " in namespace : " + executionId)
options := metav1.ListOptions{FieldSelector: "metadata.name=oc-monitor-"+wfName}
fmt.Println(options)
watcher, err := k.VersionedSet.ArgoprojV1alpha1().Workflows(executionId).Watch(context.TODO(), options)
if err != nil {
return nil, errors.New("Error executing 'argo watch " + wfName + " -n " + executionId + " with ArgoprojV1alpha1 client")
}
return watcher, nil
}
func (k *KubernetesTools) GetPodLogger(ns string, wfName string, nodeName string) (io.ReadCloser, error) {
var targetPod v1.Pod
pods, err := k.Set.CoreV1().Pods(ns).List(context.Background(), metav1.ListOptions{
LabelSelector: "workflows.argoproj.io/workflow="+wfName,
})
if err != nil {
return nil, fmt.Errorf("failed to list pods: " + err.Error())
}
if len(pods.Items) == 0 {
return nil, fmt.Errorf("no pods found with label workflows.argoproj.io/node-name=" + nodeName)
}
for _, pod := range pods.Items {
if pod.Annotations["workflows.argoproj.io/node-name"] == nodeName {
targetPod = pod
}
}
// k8s API throws an error if we try getting logs while the container are not initialized, so we repeat status check there
k.testPodReady(targetPod, ns)
// When using kubec logs for a pod we see it contacts /api/v1/namespaces/NAMESPACE/pods/oc-monitor-PODNAME/log?container=main so we add this container: main to the call
req, err := k.Set.CoreV1().Pods(ns).GetLogs(targetPod.Name, &v1.PodLogOptions{Follow: true, Container: "main"}). Stream(context.Background())
if err != nil {
return nil, fmt.Errorf(" Error when trying to get logs for " + targetPod.Name + " : " + err.Error())
}
return req, nil
}
func (k *KubernetesTools) testPodReady(pod v1.Pod, ns string) {
for {
pod, err := k.Set.CoreV1().Pods(ns).Get(context.Background(), pod.Name, metav1.GetOptions{})
if err != nil {
wfl := utils.GetWFLogger("")
wfl.Error().Msg("Error fetching pod: " + err.Error() + "\n")
break
}
var initialized bool
for _, cond := range pod.Status.Conditions {
if cond.Type == v1.PodReady && cond.Status == v1.ConditionTrue {
initialized = true
return
}
}
if initialized {
return
}
time.Sleep(2 * time.Second) // avoid hammering the API
}
}