Docker OC-MONITORD

This commit is contained in:
mr
2025-02-17 16:54:25 +01:00
parent 34547e8b2f
commit 91a87fbc4d
7 changed files with 244 additions and 150 deletions

212
main.go
View File

@@ -2,18 +2,20 @@ package main
import (
"bufio"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"os"
"os/exec"
"regexp"
"strconv"
"slices"
"strings"
"sync"
"oc-monitord/conf"
"oc-monitord/models"
u "oc-monitord/utils"
"oc-monitord/workflow_builder"
oclib "cloud.o-forge.io/core/oc-lib"
@@ -49,19 +51,10 @@ const defaultConfigFile = "/etc/oc/ocmonitord_conf.json"
const localConfigFile = "./conf/local_ocmonitord_conf.json"
func main() {
os.Setenv("test_service", "true") // Only for service demo, delete before merging on main
// Test if monitor is launched outside (with parameters) or in a k8s environment (env variables sets)
if os.Getenv("KUBERNETES_SERVICE_HOST") == "" {
// Not in a k8s environment, get conf from parameters
fmt.Println("Executes outside of k8s")
parser = *argparse.NewParser("oc-monitord", "Launch the execution of a workflow given as a parameter and sends the produced logs to a loki database")
loadConfig(false, &parser)
} else {
// Executed in a k8s environment
fmt.Println("Executes inside a k8s")
loadConfig(true, nil)
}
os.Setenv("test_service", "true") // Only for service demo, delete before merging on main
parser = *argparse.NewParser("oc-monitord", "Launch the execution of a workflow given as a parameter and sends the produced logs to a loki database")
loadConfig(false, &parser)
oclib.InitDaemon("oc-monitord")
oclib.SetConfig(
@@ -76,7 +69,7 @@ func main() {
logger.Debug().Msg("Loki URL : " + conf.GetConfig().LokiURL)
logger.Debug().Msg("Workflow executed : " + conf.GetConfig().ExecutionID)
exec := getExecution(conf.GetConfig().ExecutionID)
exec := u.GetExecution(conf.GetConfig().ExecutionID)
conf.GetConfig().WorkflowID = exec.WorkflowID
logger.Debug().Msg("Starting construction of yaml argo for workflow :" + exec.WorkflowID)
@@ -106,33 +99,36 @@ func main() {
wf_logger = logger.With().Str("argo_name", workflowName).Str("workflow_id", conf.GetConfig().WorkflowID).Str("workflow_execution_id", conf.GetConfig().ExecutionID).Logger()
wf_logger.Debug().Msg("Testing argo name")
if os.Getenv("KUBERNETES_SERVICE_HOST") == "" {
if conf.GetConfig().KubeHost == "" {
// Not in a k8s environment, get conf from parameters
fmt.Println("Executes outside of k8s")
executeInside(argo_file_path, stepMax)
executeOutside(argo_file_path, stepMax)
} else {
// Executed in a k8s environment
fmt.Println("Executes inside a k8s")
loadConfig(true, nil)
executeInside(exec.GetID(), "argo", argo_file_path, stepMax)
}
}
func getExecution(exec_id string) *workflow_execution.WorkflowExecution {
res := oclib.NewRequest(oclib.LibDataEnum(oclib.WORKFLOW_EXECUTION), "", conf.GetConfig().PeerID, []string{}, nil).LoadOne(exec_id)
if res.Code != 200 {
logger.Error().Msg("Could not retrieve workflow ID from execution ID " + exec_id)
return nil
}
return res.ToWorkflowExecution()
}
// So far we only log the output from
func executeInside(argo_file_path string, stepMax int) {
func executeInside(execID string, ns string, argo_file_path string, stepMax int) {
t, err := tools2.NewService(conf.GetConfig().Mode)
if err != nil {
logger.Error().Msg("Could not create KubernetesTool")
}
t.CreateArgoWorkflow(argo_file_path)
t.LogWorkflow("argo", workflowName, argo_file_path, stepMax, logWorkflow)
name, err := t.CreateArgoWorkflow(argo_file_path, ns)
if err != nil {
logger.Error().Msg("Could not create argo workflow : " + err.Error())
} else {
split := strings.Split(argo_file_path, "_")
argoLogs := models.NewArgoLogs(split[0], "argo", stepMax)
argoLogs.StartStepRecording(argoLogs.NewWatch(), wf_logger)
err := t.LogWorkflow(execID, ns, name, argo_file_path, stepMax, argoLogs.NewWatch(), argoLogs.NewWatch(), argoLogs, []string{}, logWorkflow)
if err != nil {
logger.Error().Msg("Could not log workflow : " + err.Error())
}
}
}
func executeOutside(argo_file_path string, stepMax int) {
@@ -140,7 +136,7 @@ func executeOutside(argo_file_path string, stepMax int) {
var stdout, stderr io.ReadCloser
// var stderr io.ReadCloser
var err error
cmd := exec.Command("argo", "submit", "--log", "./argo_workflows/"+argo_file_path, "--serviceaccount=argo", "-n", "argo")
cmd := exec.Command("argo", "submit", "--log", argo_file_path, "--serviceaccount=argo", "-n", "argo")
if stdout, err = cmd.StdoutPipe(); err != nil {
wf_logger.Error().Msg("Could not retrieve stdoutpipe " + err.Error())
return
@@ -149,7 +145,11 @@ func executeOutside(argo_file_path string, stepMax int) {
panic(err)
}
var wg sync.WaitGroup
go logWorkflow(argo_file_path, stepMax, stdout, &wg)
split := strings.Split(argo_file_path, "_")
argoLogs := models.NewArgoLogs(split[0], "argo", stepMax)
argoLogs.StartStepRecording(argoLogs.NewWatch(), wf_logger)
argoLogs.IsStreaming = true
go logWorkflow(argo_file_path, stepMax, stdout, argoLogs.NewWatch(), argoLogs.NewWatch(), argoLogs, []string{}, &wg)
if err := cmd.Wait(); err != nil {
wf_logger.Error().Msg("Could not execute argo submit")
@@ -161,33 +161,53 @@ func executeOutside(argo_file_path string, stepMax int) {
// We could improve this function by creating an object with the same attribute as the output
// and only send a new log if the current object has different values than the previous
func logWorkflow(argo_file_path string, stepMax int, pipe io.ReadCloser, wg *sync.WaitGroup) {
var current_watch, previous_watch *models.ArgoWatch
split := strings.Split(argo_file_path, "_")
argoLogs := models.NewArgoLogs(split[0], "argo", stepMax)
watch_output := make([]string, 0)
func logWorkflow(argo_file_path string, stepMax int, pipe io.ReadCloser,
current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch,
argoLogs *models.ArgoLogs, seen []string, wg *sync.WaitGroup) {
scanner := bufio.NewScanner(pipe)
count := 0
see := ""
seeit := 0
for scanner.Scan() {
log := scanner.Text()
watch_output = append(watch_output, log)
if strings.Contains(log, "Progress:") {
current_watch = argoLogs.StopStepRecording(watch_output)
watch_output = []string{}
} else if strings.Contains(log, "sub-process exited") {
current_watch = argoLogs.StopStepRecording(watch_output)
if strings.Contains(log, "capturing logs") && count == 0 {
if !argoLogs.IsStreaming {
wg.Add(1)
}
seeit++
} else if count == 0 {
if argoLogs.IsStreaming {
continue
} else {
break
}
}
if current_watch != nil && !current_watch.Equals(previous_watch) && current_watch.Name != "" {
wg.Add(1)
checkStatus(current_watch, previous_watch)
if count == 1 {
see = log
if slices.Contains(argoLogs.Seen, see) && !argoLogs.IsStreaming {
wg.Done()
seeit--
break
}
}
if !slices.Contains(current_watch.Logs, log) {
current_watch.Logs = append(current_watch.Logs, log)
}
count++
if strings.Contains(log, "sub-process exited") {
current_watch = argoLogs.StopStepRecording(current_watch)
argoLogs.Seen = append(argoLogs.Seen, see)
if checkStatus(current_watch, previous_watch, argoLogs) {
count = 0
if !argoLogs.IsStreaming {
wg.Done()
}
seeit--
}
jsonified, err := json.Marshal(current_watch)
if err != nil {
logger.Error().Msg("Could not create watch log")
}
if strings.Contains(strings.ToLower(strings.Join(current_watch.Logs, " ")), "error") || strings.Contains(strings.ToLower(strings.ToLower(strings.Join(current_watch.Logs, " "))), "err") {
current_watch.Status = "Failed"
}
if current_watch.Status == "Failed" {
wf_logger.Error().Msg(string(jsonified))
} else {
@@ -195,8 +215,6 @@ func logWorkflow(argo_file_path string, stepMax int, pipe io.ReadCloser, wg *syn
}
previous_watch = current_watch
current_watch = &models.ArgoWatch{}
watch_output = []string{}
wg.Done()
}
}
}
@@ -212,44 +230,50 @@ func loadConfig(is_k8s bool, parser *argparse.Parser) {
}
func setConf(is_k8s bool, o *onion.Onion, parser *argparse.Parser) {
if is_k8s {
conf.GetConfig().LokiURL = o.GetStringDefault("lokiurl", "http://127.0.0.1:3100")
i, err := strconv.Atoi(o.GetString("timeout"))
if err == nil {
conf.GetConfig().Timeout = i
} else {
logger.Error().Msg("Could not parse timeout, using default value")
}
conf.GetConfig().ExecutionID = o.GetString("workflow")
conf.GetConfig().PeerID = o.GetString("peer")
mongo := o.GetStringDefault("mongourl", "mongodb://127.0.0.1:27017")
db := o.GetStringDefault("database", "DC_myDC")
url := parser.String("u", "url", &argparse.Options{Required: true, Default: "http://127.0.0.1:3100", Help: "Url to the Loki database logs will be sent to"})
mode := parser.String("M", "mode", &argparse.Options{Required: false, Default: "", Help: "Mode of the execution"})
execution := parser.String("e", "execution", &argparse.Options{Required: true, Help: "Execution ID of the workflow to request from oc-catalog API"})
peer := parser.String("p", "peer", &argparse.Options{Required: false, Default: "", Help: "Peer ID of the workflow to request from oc-catalog API"})
mongo := parser.String("m", "mongo", &argparse.Options{Required: true, Default: "mongodb://127.0.0.1:27017", Help: "URL to reach the MongoDB"})
db := parser.String("d", "database", &argparse.Options{Required: true, Default: "DC_myDC", Help: "Name of the database to query in MongoDB"})
timeout := parser.Int("t", "timeout", &argparse.Options{Required: false, Default: -1, Help: "Timeout for the execution of the workflow"})
conf.GetConfig().MongoURL = mongo
conf.GetConfig().Database = db
} else {
url := parser.String("u", "url", &argparse.Options{Required: true, Default: "http://127.0.0.1:3100", Help: "Url to the Loki database logs will be sent to"})
mode := parser.String("M", "mode", &argparse.Options{Required: false, Default: "kubernetes", Help: "Mode of the execution"})
execution := parser.String("e", "execution", &argparse.Options{Required: true, Help: "Execution ID of the workflow to request from oc-catalog API"})
peer := parser.String("p", "peer", &argparse.Options{Required: false, Default: "", Help: "Peer ID of the workflow to request from oc-catalog API"})
mongo := parser.String("m", "mongo", &argparse.Options{Required: true, Default: "mongodb://127.0.0.1:27017", Help: "URL to reach the MongoDB"})
db := parser.String("d", "database", &argparse.Options{Required: true, Default: "DC_myDC", Help: "Name of the database to query in MongoDB"})
timeout := parser.Int("t", "timeout", &argparse.Options{Required: false, Default: -1, Help: "Timeout for the execution of the workflow"})
err := parser.Parse(os.Args)
if err != nil {
fmt.Println(parser.Usage(err))
os.Exit(1)
}
conf.GetConfig().Logs = "debug"
conf.GetConfig().LokiURL = *url
conf.GetConfig().MongoURL = *mongo
conf.GetConfig().Database = *db
conf.GetConfig().Timeout = *timeout
conf.GetConfig().Mode = *mode
conf.GetConfig().ExecutionID = *execution
conf.GetConfig().PeerID = *peer
ca := parser.String("c", "ca", &argparse.Options{Required: false, Default: "", Help: "CA file for the Kubernetes cluster"})
cert := parser.String("C", "cert", &argparse.Options{Required: false, Default: "", Help: "Cert file for the Kubernetes cluster"})
data := parser.String("D", "data", &argparse.Options{Required: false, Default: "", Help: "Data file for the Kubernetes cluster"})
host := parser.String("H", "host", &argparse.Options{Required: false, Default: "", Help: "Host for the Kubernetes cluster"})
port := parser.String("P", "port", &argparse.Options{Required: false, Default: "6443", Help: "Port for the Kubernetes cluster"})
err := parser.Parse(os.Args)
if err != nil {
fmt.Println(parser.Usage(err))
os.Exit(1)
}
conf.GetConfig().Logs = "debug"
conf.GetConfig().LokiURL = *url
conf.GetConfig().MongoURL = *mongo
conf.GetConfig().Database = *db
conf.GetConfig().Timeout = *timeout
conf.GetConfig().Mode = *mode
conf.GetConfig().ExecutionID = *execution
conf.GetConfig().PeerID = *peer
conf.GetConfig().KubeHost = *host
conf.GetConfig().KubePort = *port
decoded, err := base64.StdEncoding.DecodeString(*ca)
if err == nil {
conf.GetConfig().KubeCA = string(decoded)
}
decoded, err = base64.StdEncoding.DecodeString(*cert)
if err == nil {
conf.GetConfig().KubeCert = string(decoded)
}
decoded, err = base64.StdEncoding.DecodeString(*data)
if err == nil {
conf.GetConfig().KubeData = string(decoded)
}
}
func initOnion(o *onion.Onion) *onion.Onion {
@@ -294,14 +318,24 @@ func getContainerName(argo_file string) string {
}
// Uses the ArgoWatch object to update status of the workflow execution object
func checkStatus(current *models.ArgoWatch, previous *models.ArgoWatch) {
if previous != nil && current.Status != previous.Status {
func checkStatus(current *models.ArgoWatch, previous *models.ArgoWatch, argoLogs *models.ArgoLogs) bool {
if previous == nil || current.Status != previous.Status || argoLogs.IsStreaming {
argoLogs.StepCount += 1
if len(current.Logs) > 0 {
updateStatus(current.Status, current.Logs[0])
newLogs := []string{}
for _, log := range current.Logs {
if !slices.Contains(argoLogs.Logs, log) {
newLogs = append(newLogs, log)
}
}
updateStatus(current.Status, strings.Join(newLogs, "\n"))
current.Logs = newLogs
argoLogs.Logs = append(argoLogs.Logs, newLogs...)
} else {
updateStatus(current.Status, "")
}
}
return previous == nil || current.Status != previous.Status || argoLogs.IsStreaming
}
func updateStatus(status string, log string) {