oc-monitord/main.go

347 lines
11 KiB
Go
Raw Normal View History

2024-07-04 09:14:25 +02:00
package main
import (
2024-08-06 11:40:30 +02:00
"bufio"
2025-02-17 16:54:25 +01:00
"encoding/base64"
2024-08-06 11:40:30 +02:00
"fmt"
"io"
"os"
2024-08-06 11:40:30 +02:00
"os/exec"
"regexp"
"strings"
2024-08-07 17:24:09 +02:00
"sync"
"time"
"oc-monitord/conf"
l "oc-monitord/logger"
2025-02-17 16:54:25 +01:00
u "oc-monitord/utils"
"oc-monitord/workflow_builder"
2024-08-06 11:40:30 +02:00
oclib "cloud.o-forge.io/core/oc-lib"
2024-08-07 17:24:09 +02:00
"cloud.o-forge.io/core/oc-lib/logs"
2025-02-14 12:00:29 +01:00
"cloud.o-forge.io/core/oc-lib/models/booking"
"cloud.o-forge.io/core/oc-lib/models/peer"
2024-09-24 11:43:42 +02:00
"cloud.o-forge.io/core/oc-lib/models/utils"
2024-08-07 17:24:09 +02:00
"cloud.o-forge.io/core/oc-lib/models/workflow_execution"
2025-02-05 08:36:26 +01:00
"cloud.o-forge.io/core/oc-lib/tools"
2024-08-07 17:24:09 +02:00
2025-02-14 12:00:29 +01:00
tools2 "oc-monitord/tools"
"github.com/akamensky/argparse"
2024-08-06 11:40:30 +02:00
"github.com/google/uuid"
"github.com/goraz/onion"
"github.com/rs/zerolog"
2024-07-04 09:14:25 +02:00
)
2024-08-29 10:17:31 +02:00
// Command-line args:
// - url: Loki URL (default: "http://127.0.0.1:3100")
// - execution: Workflow Execution ID (required) to identify the current execution, allows to retrieve Workflow
// - mongo: MongoDB URL (default: "mongodb://127.0.0.1:27017")
// - db: MongoDB database name (default: "DC_myDC")
// - timeout: Execution timeout (default: -1)
var logger zerolog.Logger
var wf_logger zerolog.Logger
var parser argparse.Parser
var workflowName string
const defaultConfigFile = "/etc/oc/ocmonitord_conf.json"
const localConfigFile = "./conf/local_ocmonitord_conf.json"
2024-07-04 09:14:25 +02:00
func main() {
2024-08-29 10:17:31 +02:00
2025-02-17 16:54:25 +01:00
os.Setenv("test_service", "true") // Only for service demo, delete before merging on main
parser = *argparse.NewParser("oc-monitord", "Launch the execution of a workflow given as a parameter and sends the produced logs to a loki database")
loadConfig(false, &parser)
2024-11-07 13:36:28 +01:00
oclib.InitDaemon("oc-monitord")
2024-09-05 11:23:43 +02:00
oclib.SetConfig(
conf.GetConfig().MongoURL,
conf.GetConfig().Database,
conf.GetConfig().NatsURL,
conf.GetConfig().LokiURL,
conf.GetConfig().Logs,
)
2024-09-25 15:46:39 +02:00
logger = u.GetLogger()
2024-09-25 15:46:39 +02:00
logger.Debug().Msg("Loki URL : " + conf.GetConfig().LokiURL)
logger.Debug().Msg("Workflow executed : " + conf.GetConfig().ExecutionID)
2025-02-17 16:54:25 +01:00
exec := u.GetExecution(conf.GetConfig().ExecutionID)
2025-02-14 12:00:29 +01:00
conf.GetConfig().WorkflowID = exec.WorkflowID
2025-02-14 12:00:29 +01:00
logger.Debug().Msg("Starting construction of yaml argo for workflow :" + exec.WorkflowID)
2024-08-13 16:15:39 +02:00
if _, err := os.Stat("./argo_workflows/"); os.IsNotExist(err) {
os.Mkdir("./argo_workflows/", 0755)
2024-08-13 16:15:39 +02:00
logger.Info().Msg("Created argo_workflows/")
}
2024-08-07 17:24:09 +02:00
// // create argo
2024-08-06 11:40:30 +02:00
new_wf := workflow_builder.WorflowDB{}
2025-02-05 08:36:26 +01:00
err := new_wf.LoadFrom(conf.GetConfig().WorkflowID, conf.GetConfig().PeerID)
2024-08-06 11:40:30 +02:00
if err != nil {
2024-08-06 11:40:30 +02:00
logger.Error().Msg("Could not retrieve workflow " + conf.GetConfig().WorkflowID + " from oc-catalog API")
}
builder, _, err := new_wf.ExportToArgo(exec.ExecutionsID, conf.GetConfig().Timeout) // Removed stepMax so far, I don't know if we need it anymore
2024-08-06 11:40:30 +02:00
if err != nil {
logger.Error().Msg("Could not create the Argo file for " + conf.GetConfig().WorkflowID)
2024-08-06 11:40:30 +02:00
logger.Error().Msg(err.Error())
2024-07-04 09:14:25 +02:00
}
argoFilePath, err := builder.CompleteBuild(exec.ExecutionsID)
if err != nil {
logger.Error().Msg(err.Error())
}
2024-08-06 11:40:30 +02:00
workflowName = getContainerName(argoFilePath)
2024-08-06 11:40:30 +02:00
wf_logger := u.GetWFLogger(workflowName)
2024-08-06 11:40:30 +02:00
wf_logger.Debug().Msg("Testing argo name")
if conf.GetConfig().KubeHost == "" {
// Not in a k8s environment, get conf from parameters
fmt.Println("Executes outside of k8s")
executeOutside(argoFilePath, builder.Workflow)
} else {
// Executed in a k8s environment
fmt.Println("Executes inside a k8s")
// executeInside(exec.GetID(), "argo", argo_file_path, stepMax) // commenting to use conf.ExecutionID instead of exec.GetID()
executeInside(conf.GetConfig().ExecutionID, conf.GetConfig().ExecutionID, argoFilePath)
2024-08-07 17:24:09 +02:00
}
2024-07-04 09:14:25 +02:00
}
// So far we only log the output from
func executeInside(execID string, ns string, argo_file_path string) {
2025-02-14 12:00:29 +01:00
t, err := tools2.NewService(conf.GetConfig().Mode)
if err != nil {
logger.Error().Msg("Could not create KubernetesTool")
return
2025-02-14 12:00:29 +01:00
}
2025-02-17 16:54:25 +01:00
name, err := t.CreateArgoWorkflow(argo_file_path, ns)
_ = name
2025-02-17 16:54:25 +01:00
if err != nil {
logger.Error().Msg("Could not create argo workflow : " + err.Error())
fmt.Println("CA :" + conf.GetConfig().KubeCA)
fmt.Println("Cert :" + conf.GetConfig().KubeCert)
fmt.Println("Data :" + conf.GetConfig().KubeData)
return
2025-02-17 16:54:25 +01:00
} else {
watcher, err := t.GetArgoWatch(execID, workflowName)
if err != nil {
logger.Error().Msg("Could not retrieve Watcher : " + err.Error())
}
l.LogKubernetesArgo(name, execID, watcher)
2025-02-17 16:54:25 +01:00
if err != nil {
logger.Error().Msg("Could not log workflow : " + err.Error())
}
}
2025-02-14 12:00:29 +01:00
}
2024-09-03 15:57:35 +02:00
func executeOutside(argo_file_path string, workflow workflow_builder.Workflow) {
var stdoutSubmit, stderrSubmit io.ReadCloser
var stdoutLogs, stderrLogs io.ReadCloser
var wg sync.WaitGroup
2024-08-06 11:40:30 +02:00
var err error
2025-04-10 11:10:16 +02:00
logger.Debug().Msg("executing :" + "argo submit --watch " + argo_file_path + " --serviceaccount sa-" + conf.GetConfig().ExecutionID + " -n " + conf.GetConfig().ExecutionID )
cmdSubmit := exec.Command("argo", "submit", "--watch", argo_file_path, "--serviceaccount", "sa-"+conf.GetConfig().ExecutionID, "-n", conf.GetConfig().ExecutionID)
if stdoutSubmit, err = cmdSubmit.StdoutPipe(); err != nil {
2024-08-06 11:40:30 +02:00
wf_logger.Error().Msg("Could not retrieve stdoutpipe " + err.Error())
return
}
2025-04-10 11:10:16 +02:00
cmdLogs := exec.Command("argo", "logs", "oc-monitor-"+workflowName, "-n", conf.GetConfig().ExecutionID, "--follow","--no-color")
if stdoutLogs, err = cmdLogs.StdoutPipe(); err != nil {
wf_logger.Error().Msg("Could not retrieve stdoutpipe for 'argo logs'" + err.Error())
return
2024-08-08 10:11:40 +02:00
}
var steps []string
for _, template := range workflow.Spec.Templates {
steps = append(steps, template.Name)
}
go l.LogLocalWorkflow(workflowName, stdoutSubmit, &wg)
go l.LogLocalPod(workflowName, stdoutLogs, steps, &wg)
fmt.Println("Starting argo submit")
if err := cmdSubmit.Start(); err != nil {
wf_logger.Error().Msg("Could not start argo submit")
wf_logger.Error().Msg(err.Error() + bufio.NewScanner(stderrSubmit).Text())
updateStatus("fatal", "")
}
time.Sleep(5 * time.Second)
fmt.Println("Running argo logs")
if err := cmdLogs.Run(); err != nil {
2025-04-10 11:10:16 +02:00
wf_logger.Error().Msg("Could not run '" + strings.Join(cmdLogs.Args, " ") + "'")
wf_logger.Fatal().Msg(err.Error() + bufio.NewScanner(stderrLogs).Text())
}
fmt.Println("Waiting argo submit")
if err := cmdSubmit.Wait(); err != nil {
2024-08-06 11:40:30 +02:00
wf_logger.Error().Msg("Could not execute argo submit")
wf_logger.Error().Msg(err.Error() + bufio.NewScanner(stderrSubmit).Text())
2025-02-14 12:00:29 +01:00
updateStatus("fatal", "")
2024-08-06 11:40:30 +02:00
}
2024-08-07 17:24:09 +02:00
wg.Wait()
2024-08-06 11:40:30 +02:00
}
func loadConfig(is_k8s bool, parser *argparse.Parser) {
var o *onion.Onion
o = initOnion(o)
2024-08-06 11:40:30 +02:00
setConf(is_k8s, o, parser)
// if !IsValidUUID(conf.GetConfig().ExecutionID) {
// logger.Fatal().Msg("Provided ID is not an UUID")
// }
2024-08-06 11:40:30 +02:00
}
func setConf(is_k8s bool, o *onion.Onion, parser *argparse.Parser) {
2025-02-17 16:54:25 +01:00
url := parser.String("u", "url", &argparse.Options{Required: true, Default: "http://127.0.0.1:3100", Help: "Url to the Loki database logs will be sent to"})
mode := parser.String("M", "mode", &argparse.Options{Required: false, Default: "", Help: "Mode of the execution"})
execution := parser.String("e", "execution", &argparse.Options{Required: true, Help: "Execution ID of the workflow to request from oc-catalog API"})
peer := parser.String("p", "peer", &argparse.Options{Required: false, Default: "", Help: "Peer ID of the workflow to request from oc-catalog API"})
mongo := parser.String("m", "mongo", &argparse.Options{Required: true, Default: "mongodb://127.0.0.1:27017", Help: "URL to reach the MongoDB"})
db := parser.String("d", "database", &argparse.Options{Required: true, Default: "DC_myDC", Help: "Name of the database to query in MongoDB"})
timeout := parser.Int("t", "timeout", &argparse.Options{Required: false, Default: -1, Help: "Timeout for the execution of the workflow"})
ca := parser.String("c", "ca", &argparse.Options{Required: false, Default: "", Help: "CA file for the Kubernetes cluster"})
cert := parser.String("C", "cert", &argparse.Options{Required: false, Default: "", Help: "Cert file for the Kubernetes cluster"})
data := parser.String("D", "data", &argparse.Options{Required: false, Default: "", Help: "Data file for the Kubernetes cluster"})
host := parser.String("H", "host", &argparse.Options{Required: false, Default: "", Help: "Host for the Kubernetes cluster"})
port := parser.String("P", "port", &argparse.Options{Required: false, Default: "6443", Help: "Port for the Kubernetes cluster"})
// argoHost := parser.String("h", "argoHost", &argparse.Options{Required: false, Default: "", Help: "Host where Argo is running from"}) // can't use -h because its reserved to help
2025-02-17 16:54:25 +01:00
err := parser.Parse(os.Args)
if err != nil {
fmt.Println(parser.Usage(err))
os.Exit(1)
}
conf.GetConfig().Logs = "debug"
conf.GetConfig().LokiURL = *url
conf.GetConfig().MongoURL = *mongo
conf.GetConfig().Database = *db
conf.GetConfig().Timeout = *timeout
conf.GetConfig().Mode = *mode
conf.GetConfig().ExecutionID = *execution
conf.GetConfig().PeerID = *peer
conf.GetConfig().KubeHost = *host
conf.GetConfig().KubePort = *port
// conf.GetConfig().ArgoHost = *argoHost
2025-02-17 16:54:25 +01:00
decoded, err := base64.StdEncoding.DecodeString(*ca)
if err == nil {
conf.GetConfig().KubeCA = string(decoded)
}
decoded, err = base64.StdEncoding.DecodeString(*cert)
if err == nil {
conf.GetConfig().KubeCert = string(decoded)
}
decoded, err = base64.StdEncoding.DecodeString(*data)
if err == nil {
conf.GetConfig().KubeData = string(decoded)
2024-08-06 11:40:30 +02:00
}
}
func initOnion(o *onion.Onion) *onion.Onion {
2024-09-05 11:23:43 +02:00
logger = logs.CreateLogger("oc-monitord")
configFile := ""
l3 := onion.NewEnvLayerPrefix("_", "OCMONITORD")
l2, err := onion.NewFileLayer(defaultConfigFile, nil)
if err == nil {
logger.Info().Msg("Config file found : " + defaultConfigFile)
configFile = defaultConfigFile
}
l1, err := onion.NewFileLayer(localConfigFile, nil)
if err == nil {
logger.Info().Msg("Local config file found " + localConfigFile + ", overriding default file")
configFile = localConfigFile
}
if configFile == "" {
logger.Info().Msg("No config file found, using env")
o = onion.New(l3)
} else if l1 == nil && l2 == nil {
o = onion.New(l1, l2, l3)
} else if l1 == nil {
o = onion.New(l2, l3)
} else if l2 == nil {
o = onion.New(l1, l3)
}
2024-08-06 11:40:30 +02:00
return o
}
2024-08-06 11:40:30 +02:00
func IsValidUUID(u string) bool {
_, err := uuid.Parse(u)
return err == nil
}
func getContainerName(argo_file string) string {
2024-08-06 11:40:30 +02:00
regex := "([a-zA-Z]+-[a-zA-Z]+)"
re := regexp.MustCompile(regex)
2024-08-06 11:40:30 +02:00
container_name := re.FindString(argo_file)
2024-08-06 11:40:30 +02:00
return container_name
}
2024-08-07 17:24:09 +02:00
2025-02-14 12:00:29 +01:00
func updateStatus(status string, log string) {
2024-08-07 17:24:09 +02:00
exec_id := conf.GetConfig().ExecutionID
2025-02-14 12:00:29 +01:00
wf_exec := &workflow_execution.WorkflowExecution{AbstractObject: utils.AbstractObject{UUID: conf.GetConfig().ExecutionID}}
2024-08-07 17:24:09 +02:00
wf_exec.ArgoStatusToState(status)
2025-02-14 12:00:29 +01:00
exec, _, err := workflow_execution.NewAccessor(&tools.APIRequest{
2025-02-05 08:36:26 +01:00
PeerID: conf.GetConfig().PeerID,
}).UpdateOne(wf_exec, exec_id)
if err != nil {
logger.Error().Msg("Could not update status for workflow execution " + exec_id + err.Error())
2024-08-07 17:24:09 +02:00
}
2025-02-14 12:00:29 +01:00
splitted := strings.Split(log, "-")
if len(splitted) > 1 {
we := exec.(*workflow_execution.WorkflowExecution)
itemID := splitted[len(splitted)-1] // TODO: in logs found item ID
caller := &tools.HTTPCaller{
URLS: map[tools.DataType]map[tools.METHOD]string{
tools.PEER: {
tools.POST: "/status/",
},
tools.BOOKING: {
tools.PUT: "http://localhost:8080/booking/:id",
},
},
}
if we.PeerBookByGraph != nil {
for peerID, val := range we.PeerBookByGraph {
if val[itemID] == nil {
continue
}
for _, log := range val[itemID] {
(&peer.Peer{}).LaunchPeerExecution(peerID, log, tools.BOOKING, tools.PUT, &booking.Booking{
State: we.State,
}, caller)
}
}
}
}
}