Modified how logging with monitord container is implemented, with simpler logic thanks to the argo client library and k8 client-go for pods' logs
This commit is contained in:
parent
6f7acee2df
commit
fb8d994be3
219
logger/argo_logs.go
Normal file
219
logger/argo_logs.go
Normal file
@ -0,0 +1,219 @@
|
|||||||
|
package logger
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"oc-monitord/tools"
|
||||||
|
"oc-monitord/utils"
|
||||||
|
"slices"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/rs/zerolog"
|
||||||
|
"k8s.io/apimachinery/pkg/watch"
|
||||||
|
|
||||||
|
wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1"
|
||||||
|
)
|
||||||
|
|
||||||
|
// An object to monitor the logs generated by a specific pod from a workflow execution
|
||||||
|
type ArgoWatch struct {
|
||||||
|
Name string
|
||||||
|
Namespace string
|
||||||
|
Status string
|
||||||
|
Conditions
|
||||||
|
Created string
|
||||||
|
Started string
|
||||||
|
Duration string
|
||||||
|
Progress string
|
||||||
|
Logs []string
|
||||||
|
}
|
||||||
|
|
||||||
|
type Conditions struct {
|
||||||
|
PodRunning bool
|
||||||
|
Completed bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *ArgoWatch) Equals(arg *ArgoWatch) bool {
|
||||||
|
if arg == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return a.Status == arg.Status && a.Progress == arg.Progress && a.Conditions.PodRunning == arg.Conditions.PodRunning && a.Conditions.Completed == arg.Conditions.Completed
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewArgoLogs(name string, namespace string, stepMax int) *ArgoLogs {
|
||||||
|
return &ArgoLogs{
|
||||||
|
Name: "oc-monitor-" + name,
|
||||||
|
Namespace: namespace,
|
||||||
|
CreatedDate: time.Now().Format("2006-01-02 15:04:05"),
|
||||||
|
StepCount: 0,
|
||||||
|
StepMax: stepMax,
|
||||||
|
stop: false,
|
||||||
|
Seen: []string{},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// An object to monitor and log the output of an argo submit
|
||||||
|
type ArgoLogs struct {
|
||||||
|
Name string
|
||||||
|
Namespace string
|
||||||
|
CreatedDate string
|
||||||
|
StepCount int
|
||||||
|
StepMax int
|
||||||
|
stop bool
|
||||||
|
Started time.Time
|
||||||
|
Seen []string
|
||||||
|
Logs []string
|
||||||
|
IsStreaming bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *ArgoLogs) NewWatch() *ArgoWatch {
|
||||||
|
return &ArgoWatch{
|
||||||
|
Name: a.Name,
|
||||||
|
Namespace: a.Namespace,
|
||||||
|
Status: "Pending",
|
||||||
|
Created: a.CreatedDate,
|
||||||
|
Started: a.Started.Format("2006-01-02 15:04:05"),
|
||||||
|
Conditions: Conditions{
|
||||||
|
PodRunning: a.StepCount > 0 && a.StepCount < a.StepMax,
|
||||||
|
Completed: a.StepCount == a.StepMax,
|
||||||
|
},
|
||||||
|
Progress: fmt.Sprintf("%v/%v", a.StepCount, a.StepMax),
|
||||||
|
Duration: "0s",
|
||||||
|
Logs: []string{},
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *ArgoLogs) StartStepRecording(current_watch *ArgoWatch, logger zerolog.Logger) {
|
||||||
|
jsonified, _ := json.Marshal(current_watch)
|
||||||
|
logger.Info().Msg(string(jsonified))
|
||||||
|
a.StepCount += 1
|
||||||
|
a.Started = time.Now()
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
type ArgoPodLog struct {
|
||||||
|
PodName string
|
||||||
|
Step string
|
||||||
|
Message string
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewArgoPodLog(name string, step string, msg string) ArgoPodLog {
|
||||||
|
return ArgoPodLog{
|
||||||
|
PodName: name,
|
||||||
|
Step: step,
|
||||||
|
Message: msg,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func LogKubernetesArgo(wfName string, executionID string, watcher watch.Interface) {
|
||||||
|
var argoWatcher *ArgoWatch
|
||||||
|
var pods []string
|
||||||
|
var node wfv1.NodeStatus
|
||||||
|
|
||||||
|
wfl := utils.GetWFLogger("")
|
||||||
|
|
||||||
|
for event := range (watcher.ResultChan()) {
|
||||||
|
wf, ok := event.Object.(*wfv1.Workflow)
|
||||||
|
if !ok {
|
||||||
|
wfl.Error().Msg("unexpected type")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if len(wf.Status.Nodes) == 0 {
|
||||||
|
wfl.Debug().Msg("No node status yet") // The first output of the channel doesn't contain Nodes so we skip it
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
conditions := retrieveCondition(wf)
|
||||||
|
|
||||||
|
// Retrieving the Status for the main node, which is named after the workflow
|
||||||
|
if node, ok = wf.Status.Nodes[wfName]; !ok {
|
||||||
|
bytified, _ := json.MarshalIndent(wf.Status.Nodes,"","\t")
|
||||||
|
wfl.Fatal().Msg("Could not find the " + wfName + " node in \n" + string(bytified))
|
||||||
|
}
|
||||||
|
|
||||||
|
now := time.Now()
|
||||||
|
start, _ := time.Parse(time.RFC3339, node.StartedAt.String() )
|
||||||
|
duration := now.Sub(start)
|
||||||
|
|
||||||
|
newWatcher := ArgoWatch{
|
||||||
|
Name: node.Name,
|
||||||
|
Namespace: executionID,
|
||||||
|
Status: string(node.Phase),
|
||||||
|
Created: node.StartedAt.String(),
|
||||||
|
Started: node.StartedAt.String(),
|
||||||
|
Progress: string(node.Progress),
|
||||||
|
Duration: duration.String(),
|
||||||
|
Conditions: conditions,
|
||||||
|
}
|
||||||
|
|
||||||
|
if argoWatcher == nil {
|
||||||
|
argoWatcher = &newWatcher
|
||||||
|
}
|
||||||
|
|
||||||
|
if !newWatcher.Equals(argoWatcher){
|
||||||
|
jsonified, _ := json.Marshal(newWatcher)
|
||||||
|
wfl.Info().Msg(string(jsonified))
|
||||||
|
argoWatcher = &newWatcher
|
||||||
|
}
|
||||||
|
|
||||||
|
// I don't think we need to use WaitGroup here, because the loop itself
|
||||||
|
// acts as blocking process for the main thread, because Argo watch never closes the channel
|
||||||
|
for _, pod := range wf.Status.Nodes{
|
||||||
|
if !slices.Contains(pods,pod.Name){
|
||||||
|
pl := wfl.With().Str("pod", pod.Name).Logger()
|
||||||
|
if wfName == pod.Name { pods = append(pods, pod.Name); continue } // One of the node is the Workflow, the others are the pods so don't try to log on the wf name
|
||||||
|
go logKubernetesPods(executionID, wfName, pod.Name, pl)
|
||||||
|
pods = append(pods, pod.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop listening to the chan when the Workflow is completed or something bad happened
|
||||||
|
if node.Phase.Completed() {
|
||||||
|
wfl.Info().Msg(wfName + " worflow completed")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if node.Phase.FailedOrError() {
|
||||||
|
wfl.Error().Msg(wfName + "has failed, please refer to the logs")
|
||||||
|
wfl.Error().Msg(node.Message)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func retrieveCondition(wf *wfv1.Workflow) (c Conditions) {
|
||||||
|
for _, cond := range wf.Status.Conditions {
|
||||||
|
if cond.Type == "PodRunning" {
|
||||||
|
c.PodRunning = cond.Status == "True"
|
||||||
|
}
|
||||||
|
if cond.Type == "Completed" {
|
||||||
|
c.Completed = cond.Status == "True"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Function needed to be executed as a go thread
|
||||||
|
func logKubernetesPods(executionId string, wfName string,podName string, logger zerolog.Logger){
|
||||||
|
k, err := tools.NewKubernetesTool()
|
||||||
|
if err != nil {
|
||||||
|
logger.Error().Msg("Could not get Kubernetes tools")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
reader, err := k.GetPodLogger(executionId, wfName, podName)
|
||||||
|
if err != nil {
|
||||||
|
logger.Error().Msg(err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
scanner := bufio.NewScanner(reader)
|
||||||
|
for scanner.Scan() {
|
||||||
|
log := scanner.Text()
|
||||||
|
podLog := NewArgoPodLog(wfName,podName,log)
|
||||||
|
jsonified, _ := json.Marshal(podLog)
|
||||||
|
logger.Info().Msg(string(jsonified))
|
||||||
|
}
|
||||||
|
}
|
137
main.go
137
main.go
@ -3,19 +3,17 @@ package main
|
|||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"regexp"
|
"regexp"
|
||||||
"slices"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"oc-monitord/conf"
|
"oc-monitord/conf"
|
||||||
"oc-monitord/models"
|
l "oc-monitord/logger"
|
||||||
u "oc-monitord/utils"
|
u "oc-monitord/utils"
|
||||||
"oc-monitord/workflow_builder"
|
"oc-monitord/workflow_builder"
|
||||||
|
|
||||||
@ -45,7 +43,6 @@ import (
|
|||||||
|
|
||||||
var logger zerolog.Logger
|
var logger zerolog.Logger
|
||||||
var wf_logger zerolog.Logger
|
var wf_logger zerolog.Logger
|
||||||
var pods_logger zerolog.Logger
|
|
||||||
var parser argparse.Parser
|
var parser argparse.Parser
|
||||||
var workflowName string
|
var workflowName string
|
||||||
|
|
||||||
@ -90,7 +87,7 @@ func main() {
|
|||||||
logger.Error().Msg("Could not retrieve workflow " + conf.GetConfig().WorkflowID + " from oc-catalog API")
|
logger.Error().Msg("Could not retrieve workflow " + conf.GetConfig().WorkflowID + " from oc-catalog API")
|
||||||
}
|
}
|
||||||
|
|
||||||
builder, stepMax, err := new_wf.ExportToArgo(exec.ExecutionsID, conf.GetConfig().Timeout)
|
builder, _, err := new_wf.ExportToArgo(exec.ExecutionsID, conf.GetConfig().Timeout) // Removed stepMax so far, I don't know if we need it anymore
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error().Msg("Could not create the Argo file for " + conf.GetConfig().WorkflowID)
|
logger.Error().Msg("Could not create the Argo file for " + conf.GetConfig().WorkflowID)
|
||||||
logger.Error().Msg(err.Error())
|
logger.Error().Msg(err.Error())
|
||||||
@ -105,22 +102,21 @@ func main() {
|
|||||||
|
|
||||||
wf_logger := u.GetWFLogger(workflowName)
|
wf_logger := u.GetWFLogger(workflowName)
|
||||||
wf_logger.Debug().Msg("Testing argo name")
|
wf_logger.Debug().Msg("Testing argo name")
|
||||||
_ = stepMax
|
|
||||||
|
|
||||||
if conf.GetConfig().KubeHost == "" {
|
if conf.GetConfig().KubeHost == "" {
|
||||||
// Not in a k8s environment, get conf from parameters
|
// Not in a k8s environment, get conf from parameters
|
||||||
fmt.Println("Executes outside of k8s")
|
fmt.Println("Executes outside of k8s")
|
||||||
executeOutside(argoFilePath, stepMax, builder.Workflow)
|
executeOutside(argoFilePath, builder.Workflow)
|
||||||
} else {
|
} else {
|
||||||
// Executed in a k8s environment
|
// Executed in a k8s environment
|
||||||
fmt.Println("Executes inside a k8s")
|
fmt.Println("Executes inside a k8s")
|
||||||
// executeInside(exec.GetID(), "argo", argo_file_path, stepMax) // commenting to use conf.ExecutionID instead of exec.GetID()
|
// executeInside(exec.GetID(), "argo", argo_file_path, stepMax) // commenting to use conf.ExecutionID instead of exec.GetID()
|
||||||
executeInside(conf.GetConfig().ExecutionID, conf.GetConfig().ExecutionID, argoFilePath, stepMax)
|
executeInside(conf.GetConfig().ExecutionID, conf.GetConfig().ExecutionID, argoFilePath)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// So far we only log the output from
|
// So far we only log the output from
|
||||||
func executeInside(execID string, ns string, argo_file_path string, stepMax int) {
|
func executeInside(execID string, ns string, argo_file_path string) {
|
||||||
t, err := tools2.NewService(conf.GetConfig().Mode)
|
t, err := tools2.NewService(conf.GetConfig().Mode)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error().Msg("Could not create KubernetesTool")
|
logger.Error().Msg("Could not create KubernetesTool")
|
||||||
@ -128,14 +124,20 @@ func executeInside(execID string, ns string, argo_file_path string, stepMax int)
|
|||||||
}
|
}
|
||||||
|
|
||||||
name, err := t.CreateArgoWorkflow(argo_file_path, ns)
|
name, err := t.CreateArgoWorkflow(argo_file_path, ns)
|
||||||
|
_ = name
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error().Msg("Could not create argo workflow : " + err.Error())
|
logger.Error().Msg("Could not create argo workflow : " + err.Error())
|
||||||
|
fmt.Println("CA :" + conf.GetConfig().KubeCA)
|
||||||
|
fmt.Println("Cert :" + conf.GetConfig().KubeCert)
|
||||||
|
fmt.Println("Data :" + conf.GetConfig().KubeData)
|
||||||
return
|
return
|
||||||
} else {
|
} else {
|
||||||
argoLogs := models.NewArgoLogs(workflowName, "argo", stepMax)
|
watcher, err := t.GetArgoWatch(execID, workflowName)
|
||||||
argoLogs.StartStepRecording(argoLogs.NewWatch(), wf_logger)
|
if err != nil {
|
||||||
err := t.LogWorkflow(execID, ns, name, argo_file_path, stepMax, argoLogs.NewWatch(), argoLogs.NewWatch(), argoLogs, []string{}, logWorkflow)
|
logger.Error().Msg("Could not retrieve Watcher : " + err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
l.LogKubernetesArgo(name, execID, watcher)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error().Msg("Could not log workflow : " + err.Error())
|
logger.Error().Msg("Could not log workflow : " + err.Error())
|
||||||
}
|
}
|
||||||
@ -143,11 +145,9 @@ func executeInside(execID string, ns string, argo_file_path string, stepMax int)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func executeOutside(argo_file_path string, stepMax int, workflow workflow_builder.Workflow) {
|
func executeOutside(argo_file_path string, workflow workflow_builder.Workflow) {
|
||||||
// var stdoutSubmit, stderrSubmit, stdout_logs, stderr_logs io.ReadCloser
|
|
||||||
var stdoutSubmit, stderrSubmit io.ReadCloser
|
var stdoutSubmit, stderrSubmit io.ReadCloser
|
||||||
var stdoutLogs, stderrLogs io.ReadCloser
|
var stdoutLogs, stderrLogs io.ReadCloser
|
||||||
// var stderr io.ReadCloser
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
@ -159,28 +159,19 @@ func executeOutside(argo_file_path string, stepMax int, workflow workflow_builde
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// //======== Code block that implemented a method that logs both locally and container executed wf
|
|
||||||
// // Need to be improved, did not log well for local executions
|
|
||||||
// split := strings.Split(argo_file_path, "_")
|
|
||||||
// argoLogs := models.NewArgoLogs(split[0], conf.GetConfig().ExecutionID, stepMax)
|
|
||||||
// argoLogs.StartStepRecording(argoLogs.NewWatch(), wf_logger)
|
|
||||||
// argoLogs.IsStreaming = true // Used to determine wether or not the logs are read from a docker container or on localhost
|
|
||||||
// // go logWorkflow(argo_file_path, stepMax, stdout, argoLogs.NewWatch(), argoLogs.NewWatch(), argoLogs, []string{}, &wg)
|
|
||||||
// // =======
|
|
||||||
|
|
||||||
var steps []string
|
|
||||||
for _, template := range workflow.Spec.Templates {
|
|
||||||
steps = append(steps, template.Name)
|
|
||||||
}
|
|
||||||
|
|
||||||
cmdLogs := exec.Command("argo", "logs", "oc-monitor-"+workflowName, "-n", conf.GetConfig().ExecutionID, "--follow","--no-color")
|
cmdLogs := exec.Command("argo", "logs", "oc-monitor-"+workflowName, "-n", conf.GetConfig().ExecutionID, "--follow","--no-color")
|
||||||
if stdoutLogs, err = cmdLogs.StdoutPipe(); err != nil {
|
if stdoutLogs, err = cmdLogs.StdoutPipe(); err != nil {
|
||||||
wf_logger.Error().Msg("Could not retrieve stdoutpipe for 'argo logs'" + err.Error())
|
wf_logger.Error().Msg("Could not retrieve stdoutpipe for 'argo logs'" + err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
go models.LogLocalWorkflow(workflowName, stdoutSubmit, &wg)
|
var steps []string
|
||||||
go models.LogPods(workflowName, stdoutLogs, steps, &wg)
|
for _, template := range workflow.Spec.Templates {
|
||||||
|
steps = append(steps, template.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
go l.LogLocalWorkflow(workflowName, stdoutSubmit, &wg)
|
||||||
|
go l.LogLocalPod(workflowName, stdoutLogs, steps, &wg)
|
||||||
|
|
||||||
fmt.Println("Starting argo submit")
|
fmt.Println("Starting argo submit")
|
||||||
if err := cmdSubmit.Start(); err != nil {
|
if err := cmdSubmit.Start(); err != nil {
|
||||||
@ -209,68 +200,6 @@ func executeOutside(argo_file_path string, stepMax int, workflow workflow_builde
|
|||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
// !!!! BUGGED !!!!
|
|
||||||
// Should be refactored to create a function dedicated to logging output from execution in a container
|
|
||||||
// LogLocalWorkflow() has been implemented to be used when oc-monitord is executed locally
|
|
||||||
|
|
||||||
// We could improve this function by creating an object with the same attribute as the output
|
|
||||||
// and only send a new log if the current object has different values than the previous
|
|
||||||
func logWorkflow(argo_file_path string, stepMax int, pipe io.ReadCloser,
|
|
||||||
current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch,
|
|
||||||
argoLogs *models.ArgoLogs, seen []string, wg *sync.WaitGroup) {
|
|
||||||
scanner := bufio.NewScanner(pipe)
|
|
||||||
count := 0
|
|
||||||
see := ""
|
|
||||||
seeit := 0
|
|
||||||
for scanner.Scan() {
|
|
||||||
log := scanner.Text()
|
|
||||||
if strings.Contains(log, "capturing logs") && count == 0 {
|
|
||||||
if !argoLogs.IsStreaming {
|
|
||||||
wg.Add(1)
|
|
||||||
}
|
|
||||||
seeit++
|
|
||||||
} else if count == 0 && !argoLogs.IsStreaming {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
if count == 1 {
|
|
||||||
see = log
|
|
||||||
if slices.Contains(argoLogs.Seen, see) && !argoLogs.IsStreaming {
|
|
||||||
wg.Done()
|
|
||||||
seeit--
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !slices.Contains(current_watch.Logs, log) {
|
|
||||||
current_watch.Logs = append(current_watch.Logs, strings.ReplaceAll(log, "\"", ""))
|
|
||||||
}
|
|
||||||
count++
|
|
||||||
if strings.Contains(log, "sub-process exited") || argoLogs.IsStreaming {
|
|
||||||
current_watch = argoLogs.StopStepRecording(current_watch)
|
|
||||||
argoLogs.Seen = append(argoLogs.Seen, see)
|
|
||||||
if checkStatus(current_watch, previous_watch, argoLogs) {
|
|
||||||
count = 0
|
|
||||||
if !argoLogs.IsStreaming {
|
|
||||||
wg.Done()
|
|
||||||
}
|
|
||||||
seeit--
|
|
||||||
}
|
|
||||||
jsonified, err := json.Marshal(current_watch)
|
|
||||||
if err != nil {
|
|
||||||
logger.Error().Msg("Could not create watch log")
|
|
||||||
}
|
|
||||||
if current_watch.Status == "Failed" {
|
|
||||||
wf_logger.Error().Msg(string(jsonified))
|
|
||||||
} else {
|
|
||||||
wf_logger.Info().Msg(string(jsonified))
|
|
||||||
}
|
|
||||||
previous_watch = current_watch
|
|
||||||
current_watch = &models.ArgoWatch{}
|
|
||||||
if argoLogs.IsStreaming {
|
|
||||||
current_watch.Logs = []string{}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func loadConfig(is_k8s bool, parser *argparse.Parser) {
|
func loadConfig(is_k8s bool, parser *argparse.Parser) {
|
||||||
var o *onion.Onion
|
var o *onion.Onion
|
||||||
@ -375,26 +304,6 @@ func getContainerName(argo_file string) string {
|
|||||||
return container_name
|
return container_name
|
||||||
}
|
}
|
||||||
|
|
||||||
// Uses the ArgoWatch object to update status of the workflow execution object
|
|
||||||
func checkStatus(current *models.ArgoWatch, previous *models.ArgoWatch, argoLogs *models.ArgoLogs) bool {
|
|
||||||
if previous == nil || current.Status != previous.Status || argoLogs.IsStreaming {
|
|
||||||
argoLogs.StepCount += 1
|
|
||||||
if len(current.Logs) > 0 {
|
|
||||||
newLogs := []string{}
|
|
||||||
for _, log := range current.Logs {
|
|
||||||
if !slices.Contains(argoLogs.Logs, log) {
|
|
||||||
newLogs = append(newLogs, log)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
updateStatus(current.Status, strings.Join(newLogs, "\n"))
|
|
||||||
current.Logs = newLogs
|
|
||||||
argoLogs.Logs = append(argoLogs.Logs, newLogs...)
|
|
||||||
} else {
|
|
||||||
updateStatus(current.Status, "")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return previous == nil || current.Status != previous.Status || argoLogs.IsStreaming
|
|
||||||
}
|
|
||||||
|
|
||||||
func updateStatus(status string, log string) {
|
func updateStatus(status string, log string) {
|
||||||
exec_id := conf.GetConfig().ExecutionID
|
exec_id := conf.GetConfig().ExecutionID
|
||||||
|
@ -1,161 +0,0 @@
|
|||||||
package models
|
|
||||||
|
|
||||||
import (
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/acarl005/stripansi"
|
|
||||||
"github.com/rs/zerolog"
|
|
||||||
)
|
|
||||||
|
|
||||||
// An object to monitor the logs generated by a specific pod from a workflow execution
|
|
||||||
type ArgoWatch struct {
|
|
||||||
Name string
|
|
||||||
Namespace string
|
|
||||||
Status string
|
|
||||||
Conditions
|
|
||||||
Created string
|
|
||||||
Started string
|
|
||||||
Duration string
|
|
||||||
Progress string
|
|
||||||
Logs []string
|
|
||||||
}
|
|
||||||
|
|
||||||
type Conditions struct {
|
|
||||||
PodRunning bool
|
|
||||||
Completed bool
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *ArgoWatch) Equals(arg *ArgoWatch) bool {
|
|
||||||
if arg == nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return a.Status == arg.Status && a.Progress == arg.Progress && a.Conditions.PodRunning == arg.Conditions.PodRunning && a.Conditions.Completed == arg.Conditions.Completed
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewArgoLogs(name string, namespace string, stepMax int) *ArgoLogs {
|
|
||||||
return &ArgoLogs{
|
|
||||||
Name: "oc-monitor-" + name,
|
|
||||||
Namespace: namespace,
|
|
||||||
CreatedDate: time.Now().Format("2006-01-02 15:04:05"),
|
|
||||||
StepCount: 0,
|
|
||||||
StepMax: stepMax,
|
|
||||||
stop: false,
|
|
||||||
Seen: []string{},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// An object to monitor and log the output of an argo submit
|
|
||||||
type ArgoLogs struct {
|
|
||||||
Name string
|
|
||||||
Namespace string
|
|
||||||
CreatedDate string
|
|
||||||
StepCount int
|
|
||||||
StepMax int
|
|
||||||
stop bool
|
|
||||||
Started time.Time
|
|
||||||
Seen []string
|
|
||||||
Logs []string
|
|
||||||
IsStreaming bool
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *ArgoLogs) NewWatch() *ArgoWatch {
|
|
||||||
return &ArgoWatch{
|
|
||||||
Name: a.Name,
|
|
||||||
Namespace: a.Namespace,
|
|
||||||
Status: "Pending",
|
|
||||||
Created: a.CreatedDate,
|
|
||||||
Started: a.Started.Format("2006-01-02 15:04:05"),
|
|
||||||
Conditions: Conditions{
|
|
||||||
PodRunning: a.StepCount > 0 && a.StepCount < a.StepMax,
|
|
||||||
Completed: a.StepCount == a.StepMax,
|
|
||||||
},
|
|
||||||
Progress: fmt.Sprintf("%v/%v", a.StepCount, a.StepMax),
|
|
||||||
Duration: "0s",
|
|
||||||
Logs: []string{},
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *ArgoLogs) StartStepRecording(current_watch *ArgoWatch, logger zerolog.Logger) {
|
|
||||||
jsonified, _ := json.Marshal(current_watch)
|
|
||||||
logger.Info().Msg(string(jsonified))
|
|
||||||
a.StepCount += 1
|
|
||||||
a.Started = time.Now()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *ArgoLogs) StopStepRecording(current *ArgoWatch) *ArgoWatch {
|
|
||||||
fn := strings.Split(a.Name, "_")
|
|
||||||
logs := []string{}
|
|
||||||
err := false
|
|
||||||
end := ""
|
|
||||||
for _, input := range current.Logs {
|
|
||||||
line := strings.TrimSpace(input)
|
|
||||||
if line == "" || !strings.Contains(line, fn[0]) || !strings.Contains(line, ":") {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
step := strings.Split(line, ":")
|
|
||||||
if strings.Contains(line, "sub-process exited") {
|
|
||||||
b := strings.Split(line, "time=\"")
|
|
||||||
if len(b) > 1 {
|
|
||||||
end = b[1][:19]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(step) < 2 || strings.Contains(line, "time=") || strings.TrimSpace(strings.Join(step[1:], " : ")) == "" || strings.TrimSpace(strings.Join(step[1:], " : ")) == a.Name {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
log := stripansi.Strip(strings.TrimSpace(strings.Join(step[1:], " : ")))
|
|
||||||
t, e := strconv.Unquote(log)
|
|
||||||
if e == nil {
|
|
||||||
logs = append(logs, t)
|
|
||||||
} else {
|
|
||||||
logs = append(logs, strings.ReplaceAll(log, "\"", "`"))
|
|
||||||
}
|
|
||||||
|
|
||||||
if strings.Contains(logs[len(logs)-1], "Error") {
|
|
||||||
err = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
status := "Pending"
|
|
||||||
if a.StepCount > 0 {
|
|
||||||
status = "Running"
|
|
||||||
}
|
|
||||||
if a.StepCount == a.StepMax {
|
|
||||||
if err {
|
|
||||||
status = "Failed"
|
|
||||||
} else {
|
|
||||||
status = "Succeeded"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
duration := float64(0)
|
|
||||||
if end != "" {
|
|
||||||
timeE, _ := time.Parse("2006-01-02T15:04:05", end)
|
|
||||||
duration = timeE.Sub(a.Started).Seconds()
|
|
||||||
}
|
|
||||||
current.Conditions = Conditions{
|
|
||||||
PodRunning: a.StepCount > 0 && a.StepCount < a.StepMax,
|
|
||||||
Completed: a.StepCount == a.StepMax,
|
|
||||||
}
|
|
||||||
current.Progress = fmt.Sprintf("%v/%v", a.StepCount, a.StepMax)
|
|
||||||
current.Duration = fmt.Sprintf("%v", fmt.Sprintf("%.2f", duration)+"s")
|
|
||||||
|
|
||||||
current.Status = status
|
|
||||||
return current
|
|
||||||
}
|
|
||||||
|
|
||||||
type ArgoPodLog struct {
|
|
||||||
PodName string
|
|
||||||
Step string
|
|
||||||
Message string
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewArgoPodLog(name string, step string, msg string) ArgoPodLog {
|
|
||||||
return ArgoPodLog{
|
|
||||||
PodName: name,
|
|
||||||
Step: step,
|
|
||||||
Message: msg,
|
|
||||||
}
|
|
||||||
}
|
|
@ -3,17 +3,15 @@ package tools
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"io"
|
"io"
|
||||||
"oc-monitord/models"
|
|
||||||
"sync"
|
"k8s.io/apimachinery/pkg/watch"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Tool interface {
|
type Tool interface {
|
||||||
CreateArgoWorkflow(path string, ns string) (string, error)
|
CreateArgoWorkflow(path string, ns string) (string, error)
|
||||||
CreateAccessSecret(ns string, login string, password string) (string, error)
|
CreateAccessSecret(ns string, login string, password string) (string, error)
|
||||||
LogWorkflow(execID string, namespace string, workflowName string, argoFilePath string, stepMax int, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch,
|
GetArgoWatch(executionId string, wfName string) (watch.Interface, error)
|
||||||
argoLogs *models.ArgoLogs, seen []string,
|
GetPodLogger(ns string, wfName string, podName string) (io.ReadCloser, error)
|
||||||
logFunc func(argoFilePath string, stepMax int, pipe io.ReadCloser, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch,
|
|
||||||
argoLogs *models.ArgoLogs, seen []string, wg *sync.WaitGroup)) error
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var _service = map[string]func() (Tool, error){
|
var _service = map[string]func() (Tool, error){
|
||||||
|
@ -7,21 +7,18 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"oc-monitord/conf"
|
"oc-monitord/conf"
|
||||||
"oc-monitord/models"
|
|
||||||
"oc-monitord/utils"
|
"oc-monitord/utils"
|
||||||
"os"
|
"os"
|
||||||
"sync"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"cloud.o-forge.io/core/oc-lib/models/common/enum"
|
|
||||||
wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1"
|
wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1"
|
||||||
"github.com/argoproj/argo-workflows/v3/pkg/client/clientset/versioned"
|
"github.com/argoproj/argo-workflows/v3/pkg/client/clientset/versioned"
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
corev1 "k8s.io/api/core/v1"
|
|
||||||
v1 "k8s.io/api/core/v1"
|
v1 "k8s.io/api/core/v1"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/runtime"
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
"k8s.io/apimachinery/pkg/runtime/serializer"
|
"k8s.io/apimachinery/pkg/runtime/serializer"
|
||||||
|
"k8s.io/apimachinery/pkg/watch"
|
||||||
"k8s.io/client-go/kubernetes"
|
"k8s.io/client-go/kubernetes"
|
||||||
"k8s.io/client-go/rest"
|
"k8s.io/client-go/rest"
|
||||||
)
|
)
|
||||||
@ -59,73 +56,6 @@ func NewKubernetesTool() (Tool, error) {
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (k *KubernetesTools) LogWorkflow(execID string, namespace string, workflowName string, argoFilePath string, stepMax int, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs,
|
|
||||||
seen []string, logFunc func(argoFilePath string, stepMax int, pipe io.ReadCloser, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs, seen []string, wg *sync.WaitGroup)) error {
|
|
||||||
exec := utils.GetExecution(execID)
|
|
||||||
if exec == nil {
|
|
||||||
return errors.New("Could not retrieve workflow ID from execution ID " + execID)
|
|
||||||
}
|
|
||||||
if exec.State == enum.DRAFT || exec.State == enum.FAILURE || exec.State == enum.SUCCESS {
|
|
||||||
l := utils.GetWFLogger("")
|
|
||||||
l.Error().Msg("The execution's state doesn't meet requirement, state is : " + exec.State.String())
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
k.logWorkflow(namespace, workflowName, argoFilePath, stepMax, current_watch, previous_watch, argoLogs, seen, logFunc)
|
|
||||||
return k.LogWorkflow(execID, namespace, workflowName, argoFilePath, stepMax, current_watch, previous_watch, argoLogs, seen, logFunc)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (k *KubernetesTools) logWorkflow(namespace string, workflowName string, argoFilePath string, stepMax int, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs,
|
|
||||||
seen []string,
|
|
||||||
logFunc func(argoFilePath string, stepMax int, pipe io.ReadCloser, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs, seen []string, wg *sync.WaitGroup)) error {
|
|
||||||
// List pods related to the Argo workflow
|
|
||||||
fmt.Println("\n!!!!!!!! !!!!!!!!!! !!!!!!!! &&&& & STARTING LOG\n\n")
|
|
||||||
labelSelector := fmt.Sprintf("workflows.argoproj.io/workflow=%s", workflowName)
|
|
||||||
for retries := 0; retries < 10; retries++ { // Retry for up to ~20 seconds
|
|
||||||
// List workflow pods
|
|
||||||
wfPods, err := k.Set.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{
|
|
||||||
LabelSelector: labelSelector,
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// If we found pods, stream logs
|
|
||||||
if len(wfPods.Items) > 0 {
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
// Stream logs from all matching pods
|
|
||||||
for _, pod := range wfPods.Items {
|
|
||||||
for _, container := range pod.Spec.Containers {
|
|
||||||
wg.Add(1)
|
|
||||||
go k.streamLogs(namespace, pod.Name, container.Name, argoFilePath, stepMax, &wg, current_watch, previous_watch, argoLogs, seen, logFunc)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
wg.Wait()
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
time.Sleep(2 * time.Second) // Wait before retrying
|
|
||||||
}
|
|
||||||
return errors.New("no pods found for the workflow")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Function to stream logs
|
|
||||||
func (k *KubernetesTools) streamLogs(namespace string, podName string, containerName string,
|
|
||||||
argoFilePath string, stepMax int, wg *sync.WaitGroup, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs, seen []string,
|
|
||||||
logFunc func(argo_file_path string, stepMax int, pipe io.ReadCloser, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs, seen []string, wg *sync.WaitGroup)) {
|
|
||||||
req := k.Set.CoreV1().Pods(namespace).GetLogs(podName, &corev1.PodLogOptions{
|
|
||||||
Container: containerName, // Main container
|
|
||||||
Follow: true, // Equivalent to -f flag in kubectl logs
|
|
||||||
})
|
|
||||||
defer wg.Done()
|
|
||||||
// Open stream
|
|
||||||
stream, err := req.Stream(context.Background())
|
|
||||||
if err != nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
defer stream.Close()
|
|
||||||
var internalWg sync.WaitGroup
|
|
||||||
logFunc(argoFilePath, stepMax, stream, current_watch, previous_watch, argoLogs, seen, &internalWg)
|
|
||||||
internalWg.Wait()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (k *KubernetesTools) CreateArgoWorkflow(path string, ns string) (string, error) {
|
func (k *KubernetesTools) CreateArgoWorkflow(path string, ns string) (string, error) {
|
||||||
// Read workflow YAML file
|
// Read workflow YAML file
|
||||||
workflowYAML, err := os.ReadFile(path)
|
workflowYAML, err := os.ReadFile(path)
|
||||||
@ -149,7 +79,7 @@ func (k *KubernetesTools) CreateArgoWorkflow(path string, ns string) (string, er
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Create the workflow in the "argo" namespace
|
// Create the workflow in the "argo" namespace
|
||||||
createdWf, err := k.VersionedSet.ArgoprojV1alpha1().Workflows(ns).Create(context.Background(), workflow, metav1.CreateOptions{})
|
createdWf, err := k.VersionedSet.ArgoprojV1alpha1().Workflows(ns).Create(context.TODO(), workflow, metav1.CreateOptions{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", errors.New("failed to create workflow: " + err.Error())
|
return "", errors.New("failed to create workflow: " + err.Error())
|
||||||
}
|
}
|
||||||
@ -177,9 +107,81 @@ func (k *KubernetesTools) CreateAccessSecret(ns string, login string, password s
|
|||||||
Data: secretData,
|
Data: secretData,
|
||||||
}
|
}
|
||||||
// Create the Secret in Kubernetes
|
// Create the Secret in Kubernetes
|
||||||
_, err := k.Set.CoreV1().Secrets(namespace).Create(context.Background(), secret, metav1.CreateOptions{})
|
_, err := k.Set.CoreV1().Secrets(namespace).Create(context.TODO(), secret, metav1.CreateOptions{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", errors.New("Error creating secret: " + err.Error())
|
return "", errors.New("Error creating secret: " + err.Error())
|
||||||
}
|
}
|
||||||
return name, nil
|
return name, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (k *KubernetesTools) GetArgoWatch(executionId string, wfName string) (watch.Interface, error){
|
||||||
|
wfl := utils.GetWFLogger("")
|
||||||
|
wfl.Debug().Msg("Starting argo watch with argo lib")
|
||||||
|
fmt.Println("metadata.name=oc-monitor-"+wfName + " in namespace : " + executionId)
|
||||||
|
options := metav1.ListOptions{FieldSelector: "metadata.name=oc-monitor-"+wfName}
|
||||||
|
fmt.Println(options)
|
||||||
|
watcher, err := k.VersionedSet.ArgoprojV1alpha1().Workflows(executionId).Watch(context.TODO(), options)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.New("Error executing 'argo watch " + wfName + " -n " + executionId + " with ArgoprojV1alpha1 client")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return watcher, nil
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (k *KubernetesTools) GetPodLogger(ns string, wfName string, nodeName string) (io.ReadCloser, error) {
|
||||||
|
var targetPod v1.Pod
|
||||||
|
|
||||||
|
pods, err := k.Set.CoreV1().Pods(ns).List(context.Background(), metav1.ListOptions{
|
||||||
|
LabelSelector: "workflows.argoproj.io/workflow="+wfName,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to list pods: " + err.Error())
|
||||||
|
}
|
||||||
|
if len(pods.Items) == 0 {
|
||||||
|
return nil, fmt.Errorf("no pods found with label workflows.argoproj.io/node-name=" + nodeName)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, pod := range pods.Items {
|
||||||
|
if pod.Annotations["workflows.argoproj.io/node-name"] == nodeName {
|
||||||
|
targetPod = pod
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// k8s API throws an error if we try getting logs while the container are not initialized, so we repeat status check there
|
||||||
|
k.testPodReady(targetPod, ns)
|
||||||
|
|
||||||
|
// When using kubec logs for a pod we see it contacts /api/v1/namespaces/NAMESPACE/pods/oc-monitor-PODNAME/log?container=main so we add this container: main to the call
|
||||||
|
req, err := k.Set.CoreV1().Pods(ns).GetLogs(targetPod.Name, &v1.PodLogOptions{Follow: true, Container: "main"}). Stream(context.Background())
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf(" Error when trying to get logs for " + targetPod.Name + " : " + err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
return req, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (k *KubernetesTools) testPodReady(pod v1.Pod, ns string) {
|
||||||
|
for {
|
||||||
|
pod, err := k.Set.CoreV1().Pods(ns).Get(context.Background(), pod.Name, metav1.GetOptions{})
|
||||||
|
if err != nil {
|
||||||
|
wfl := utils.GetWFLogger("")
|
||||||
|
wfl.Error().Msg("Error fetching pod: " + err.Error() + "\n")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
var initialized bool
|
||||||
|
for _, cond := range pod.Status.Conditions {
|
||||||
|
if cond.Type == v1.PodReady && cond.Status == v1.ConditionTrue {
|
||||||
|
initialized = true
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if initialized {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
time.Sleep(2 * time.Second) // avoid hammering the API
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user