package main import ( "bufio" "encoding/base64" "encoding/json" "fmt" "io" "os" "os/exec" "regexp" "slices" "strings" "sync" "time" "oc-monitord/conf" "oc-monitord/models" u "oc-monitord/utils" "oc-monitord/workflow_builder" oclib "cloud.o-forge.io/core/oc-lib" "cloud.o-forge.io/core/oc-lib/logs" "cloud.o-forge.io/core/oc-lib/models/booking" "cloud.o-forge.io/core/oc-lib/models/peer" "cloud.o-forge.io/core/oc-lib/models/utils" "cloud.o-forge.io/core/oc-lib/models/workflow_execution" "cloud.o-forge.io/core/oc-lib/tools" tools2 "oc-monitord/tools" "github.com/akamensky/argparse" "github.com/google/uuid" "github.com/goraz/onion" "github.com/rs/zerolog" ) // Command-line args: // - url: Loki URL (default: "http://127.0.0.1:3100") // - execution: Workflow Execution ID (required) to identify the current execution, allows to retrieve Workflow // - mongo: MongoDB URL (default: "mongodb://127.0.0.1:27017") // - db: MongoDB database name (default: "DC_myDC") // - timeout: Execution timeout (default: -1) var logger zerolog.Logger var wf_logger zerolog.Logger var pods_logger zerolog.Logger var parser argparse.Parser var workflowName string const defaultConfigFile = "/etc/oc/ocmonitord_conf.json" const localConfigFile = "./conf/local_ocmonitord_conf.json" func main() { os.Setenv("test_service", "true") // Only for service demo, delete before merging on main parser = *argparse.NewParser("oc-monitord", "Launch the execution of a workflow given as a parameter and sends the produced logs to a loki database") loadConfig(false, &parser) oclib.InitDaemon("oc-monitord") oclib.SetConfig( conf.GetConfig().MongoURL, conf.GetConfig().Database, conf.GetConfig().NatsURL, conf.GetConfig().LokiURL, conf.GetConfig().Logs, ) logger = logs.CreateLogger("oc-monitord") logger.Debug().Msg("Loki URL : " + conf.GetConfig().LokiURL) logger.Debug().Msg("Workflow executed : " + conf.GetConfig().ExecutionID) exec := u.GetExecution(conf.GetConfig().ExecutionID) conf.GetConfig().WorkflowID = exec.WorkflowID logger.Debug().Msg("Starting construction of yaml argo for workflow :" + exec.WorkflowID) if _, err := os.Stat("./argo_workflows/"); os.IsNotExist(err) { os.Mkdir("./argo_workflows/", 0755) logger.Info().Msg("Created argo_workflows/") } // // create argo new_wf := workflow_builder.WorflowDB{} err := new_wf.LoadFrom(conf.GetConfig().WorkflowID, conf.GetConfig().PeerID) if err != nil { logger.Error().Msg("Could not retrieve workflow " + conf.GetConfig().WorkflowID + " from oc-catalog API") } builder, stepMax, err := new_wf.ExportToArgo(exec.ExecutionsID, conf.GetConfig().Timeout) if err != nil { logger.Error().Msg("Could not create the Argo file for " + conf.GetConfig().WorkflowID) logger.Error().Msg(err.Error()) } argo_file_path, err := builder.CompleteBuild(exec.ExecutionsID) if err != nil { logger.Error().Msg(err.Error()) } workflowName = getContainerName(argo_file_path) wf_logger = logger.With().Str("argo_name", workflowName).Str("workflow_id", conf.GetConfig().WorkflowID).Str("workflow_execution_id", conf.GetConfig().ExecutionID).Logger() wf_logger.Debug().Msg("Testing argo name") _ = stepMax if conf.GetConfig().KubeHost == "" { // Not in a k8s environment, get conf from parameters fmt.Println("Executes outside of k8s") executeOutside(argo_file_path, stepMax, builder.Workflow) } else { // Executed in a k8s environment fmt.Println("Executes inside a k8s") executeInside(exec.GetID(), "argo", argo_file_path, stepMax) } } // So far we only log the output from func executeInside(execID string, ns string, argo_file_path string, stepMax int) { t, err := tools2.NewService(conf.GetConfig().Mode) if err != nil { logger.Error().Msg("Could not create KubernetesTool") } name, err := t.CreateArgoWorkflow(argo_file_path, ns) if err != nil { logger.Error().Msg("Could not create argo workflow : " + err.Error()) } else { split := strings.Split(argo_file_path, "_") argoLogs := models.NewArgoLogs(split[0], "argo", stepMax) argoLogs.StartStepRecording(argoLogs.NewWatch(), wf_logger) err := t.LogWorkflow(execID, ns, name, argo_file_path, stepMax, argoLogs.NewWatch(), argoLogs.NewWatch(), argoLogs, []string{}, logWorkflow) if err != nil { logger.Error().Msg("Could not log workflow : " + err.Error()) } } } func executeOutside(argo_file_path string, stepMax int, workflow workflow_builder.Workflow) { // var stdoutSubmit, stderrSubmit, stdout_logs, stderr_logs io.ReadCloser var stdoutSubmit, stderrSubmit io.ReadCloser var stdoutLogs, stderrLogs io.ReadCloser // var stderr io.ReadCloser var wg sync.WaitGroup var err error logger.Debug().Msg("executing :" + "argo submit --watch " + argo_file_path + " --serviceaccount sa-" + conf.GetConfig().ExecutionID + " -n " + conf.GetConfig().ExecutionID ) cmdSubmit := exec.Command("argo", "submit", "--watch", argo_file_path, "--serviceaccount", "sa-"+conf.GetConfig().ExecutionID, "-n", conf.GetConfig().ExecutionID) if stdoutSubmit, err = cmdSubmit.StdoutPipe(); err != nil { wf_logger.Error().Msg("Could not retrieve stdoutpipe " + err.Error()) return } // //======== Code block that implemented a method that logs both locally and container executed wf // // Need to be improved, did not log well for local executions // split := strings.Split(argo_file_path, "_") // argoLogs := models.NewArgoLogs(split[0], conf.GetConfig().ExecutionID, stepMax) // argoLogs.StartStepRecording(argoLogs.NewWatch(), wf_logger) // argoLogs.IsStreaming = true // Used to determine wether or not the logs are read from a docker container or on localhost // // go logWorkflow(argo_file_path, stepMax, stdout, argoLogs.NewWatch(), argoLogs.NewWatch(), argoLogs, []string{}, &wg) // // ======= var steps []string for _, template := range workflow.Spec.Templates { steps = append(steps, template.Name) } cmdLogs := exec.Command("argo", "logs", "oc-monitor-"+workflowName, "-n", conf.GetConfig().ExecutionID, "--follow","--no-color") if stdoutLogs, err = cmdLogs.StdoutPipe(); err != nil { wf_logger.Error().Msg("Could not retrieve stdoutpipe for 'argo logs'" + err.Error()) return } go models.LogLocalWorkflow(workflowName, stdoutSubmit, &wg) go models.LogPods(workflowName, stdoutLogs, steps, &wg) fmt.Println("Starting argo submit") if err := cmdSubmit.Start(); err != nil { wf_logger.Error().Msg("Could not start argo submit") wf_logger.Error().Msg(err.Error() + bufio.NewScanner(stderrSubmit).Text()) updateStatus("fatal", "") } time.Sleep(5 * time.Second) fmt.Println("Running argo logs") if err := cmdLogs.Run(); err != nil { wf_logger.Error().Msg("Could not run '" + strings.Join(cmdLogs.Args, " ") + "'") wf_logger.Fatal().Msg(err.Error() + bufio.NewScanner(stderrLogs).Text()) } fmt.Println("Waiting argo submit") if err := cmdSubmit.Wait(); err != nil { wf_logger.Error().Msg("Could not execute argo submit") wf_logger.Error().Msg(err.Error() + bufio.NewScanner(stderrSubmit).Text()) updateStatus("fatal", "") } wg.Wait() } // !!!! BUGGED !!!! // Should be refactored to create a function dedicated to logging output from execution in a container // LogLocalWorkflow() has been implemented to be used when oc-monitord is executed locally // We could improve this function by creating an object with the same attribute as the output // and only send a new log if the current object has different values than the previous func logWorkflow(argo_file_path string, stepMax int, pipe io.ReadCloser, current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch, argoLogs *models.ArgoLogs, seen []string, wg *sync.WaitGroup) { scanner := bufio.NewScanner(pipe) count := 0 see := "" seeit := 0 for scanner.Scan() { log := scanner.Text() if strings.Contains(log, "capturing logs") && count == 0 { if !argoLogs.IsStreaming { wg.Add(1) } seeit++ } else if count == 0 && !argoLogs.IsStreaming { break } if count == 1 { see = log if slices.Contains(argoLogs.Seen, see) && !argoLogs.IsStreaming { wg.Done() seeit-- break } } if !slices.Contains(current_watch.Logs, log) { current_watch.Logs = append(current_watch.Logs, strings.ReplaceAll(log, "\"", "")) } count++ if strings.Contains(log, "sub-process exited") || argoLogs.IsStreaming { current_watch = argoLogs.StopStepRecording(current_watch) argoLogs.Seen = append(argoLogs.Seen, see) if checkStatus(current_watch, previous_watch, argoLogs) { count = 0 if !argoLogs.IsStreaming { wg.Done() } seeit-- } jsonified, err := json.Marshal(current_watch) if err != nil { logger.Error().Msg("Could not create watch log") } if current_watch.Status == "Failed" { wf_logger.Error().Msg(string(jsonified)) } else { wf_logger.Info().Msg(string(jsonified)) } previous_watch = current_watch current_watch = &models.ArgoWatch{} if argoLogs.IsStreaming { current_watch.Logs = []string{} } } } } func loadConfig(is_k8s bool, parser *argparse.Parser) { var o *onion.Onion o = initOnion(o) setConf(is_k8s, o, parser) // if !IsValidUUID(conf.GetConfig().ExecutionID) { // logger.Fatal().Msg("Provided ID is not an UUID") // } } func setConf(is_k8s bool, o *onion.Onion, parser *argparse.Parser) { url := parser.String("u", "url", &argparse.Options{Required: true, Default: "http://127.0.0.1:3100", Help: "Url to the Loki database logs will be sent to"}) mode := parser.String("M", "mode", &argparse.Options{Required: false, Default: "", Help: "Mode of the execution"}) execution := parser.String("e", "execution", &argparse.Options{Required: true, Help: "Execution ID of the workflow to request from oc-catalog API"}) peer := parser.String("p", "peer", &argparse.Options{Required: false, Default: "", Help: "Peer ID of the workflow to request from oc-catalog API"}) mongo := parser.String("m", "mongo", &argparse.Options{Required: true, Default: "mongodb://127.0.0.1:27017", Help: "URL to reach the MongoDB"}) db := parser.String("d", "database", &argparse.Options{Required: true, Default: "DC_myDC", Help: "Name of the database to query in MongoDB"}) timeout := parser.Int("t", "timeout", &argparse.Options{Required: false, Default: -1, Help: "Timeout for the execution of the workflow"}) ca := parser.String("c", "ca", &argparse.Options{Required: false, Default: "", Help: "CA file for the Kubernetes cluster"}) cert := parser.String("C", "cert", &argparse.Options{Required: false, Default: "", Help: "Cert file for the Kubernetes cluster"}) data := parser.String("D", "data", &argparse.Options{Required: false, Default: "", Help: "Data file for the Kubernetes cluster"}) host := parser.String("H", "host", &argparse.Options{Required: false, Default: "", Help: "Host for the Kubernetes cluster"}) port := parser.String("P", "port", &argparse.Options{Required: false, Default: "6443", Help: "Port for the Kubernetes cluster"}) err := parser.Parse(os.Args) if err != nil { fmt.Println(parser.Usage(err)) os.Exit(1) } conf.GetConfig().Logs = "debug" conf.GetConfig().LokiURL = *url conf.GetConfig().MongoURL = *mongo conf.GetConfig().Database = *db conf.GetConfig().Timeout = *timeout conf.GetConfig().Mode = *mode conf.GetConfig().ExecutionID = *execution conf.GetConfig().PeerID = *peer conf.GetConfig().KubeHost = *host conf.GetConfig().KubePort = *port decoded, err := base64.StdEncoding.DecodeString(*ca) if err == nil { conf.GetConfig().KubeCA = string(decoded) } decoded, err = base64.StdEncoding.DecodeString(*cert) if err == nil { conf.GetConfig().KubeCert = string(decoded) } decoded, err = base64.StdEncoding.DecodeString(*data) if err == nil { conf.GetConfig().KubeData = string(decoded) } } func initOnion(o *onion.Onion) *onion.Onion { logger = logs.CreateLogger("oc-monitord") configFile := "" l3 := onion.NewEnvLayerPrefix("_", "OCMONITORD") l2, err := onion.NewFileLayer(defaultConfigFile, nil) if err == nil { logger.Info().Msg("Config file found : " + defaultConfigFile) configFile = defaultConfigFile } l1, err := onion.NewFileLayer(localConfigFile, nil) if err == nil { logger.Info().Msg("Local config file found " + localConfigFile + ", overriding default file") configFile = localConfigFile } if configFile == "" { logger.Info().Msg("No config file found, using env") o = onion.New(l3) } else if l1 == nil && l2 == nil { o = onion.New(l1, l2, l3) } else if l1 == nil { o = onion.New(l2, l3) } else if l2 == nil { o = onion.New(l1, l3) } return o } func IsValidUUID(u string) bool { _, err := uuid.Parse(u) return err == nil } func getContainerName(argo_file string) string { regex := "([a-zA-Z]+-[a-zA-Z]+)" re := regexp.MustCompile(regex) container_name := re.FindString(argo_file) return container_name } // Uses the ArgoWatch object to update status of the workflow execution object func checkStatus(current *models.ArgoWatch, previous *models.ArgoWatch, argoLogs *models.ArgoLogs) bool { if previous == nil || current.Status != previous.Status || argoLogs.IsStreaming { argoLogs.StepCount += 1 if len(current.Logs) > 0 { newLogs := []string{} for _, log := range current.Logs { if !slices.Contains(argoLogs.Logs, log) { newLogs = append(newLogs, log) } } updateStatus(current.Status, strings.Join(newLogs, "\n")) current.Logs = newLogs argoLogs.Logs = append(argoLogs.Logs, newLogs...) } else { updateStatus(current.Status, "") } } return previous == nil || current.Status != previous.Status || argoLogs.IsStreaming } func updateStatus(status string, log string) { exec_id := conf.GetConfig().ExecutionID wf_exec := &workflow_execution.WorkflowExecution{AbstractObject: utils.AbstractObject{UUID: conf.GetConfig().ExecutionID}} wf_exec.ArgoStatusToState(status) exec, _, err := workflow_execution.NewAccessor(&tools.APIRequest{ PeerID: conf.GetConfig().PeerID, }).UpdateOne(wf_exec, exec_id) if err != nil { logger.Error().Msg("Could not update status for workflow execution " + exec_id + err.Error()) } splitted := strings.Split(log, "-") if len(splitted) > 1 { we := exec.(*workflow_execution.WorkflowExecution) itemID := splitted[len(splitted)-1] // TODO: in logs found item ID caller := &tools.HTTPCaller{ URLS: map[tools.DataType]map[tools.METHOD]string{ tools.PEER: { tools.POST: "/status/", }, tools.BOOKING: { tools.PUT: "http://localhost:8080/booking/:id", }, }, } if we.PeerBookByGraph != nil { for peerID, val := range we.PeerBookByGraph { if val[itemID] == nil { continue } for _, log := range val[itemID] { (&peer.Peer{}).LaunchPeerExecution(peerID, log, tools.BOOKING, tools.PUT, &booking.Booking{ State: we.State, }, caller) } } } } }