380 lines
12 KiB
Go
380 lines
12 KiB
Go
package main
|
|
|
|
import (
|
|
"bufio"
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"os/exec"
|
|
"regexp"
|
|
"slices"
|
|
"strings"
|
|
"sync"
|
|
|
|
"oc-monitord/conf"
|
|
"oc-monitord/models"
|
|
u "oc-monitord/utils"
|
|
"oc-monitord/workflow_builder"
|
|
|
|
oclib "cloud.o-forge.io/core/oc-lib"
|
|
|
|
"cloud.o-forge.io/core/oc-lib/logs"
|
|
"cloud.o-forge.io/core/oc-lib/models/booking"
|
|
"cloud.o-forge.io/core/oc-lib/models/peer"
|
|
"cloud.o-forge.io/core/oc-lib/models/utils"
|
|
"cloud.o-forge.io/core/oc-lib/models/workflow_execution"
|
|
"cloud.o-forge.io/core/oc-lib/tools"
|
|
|
|
tools2 "oc-monitord/tools"
|
|
|
|
"github.com/akamensky/argparse"
|
|
"github.com/google/uuid"
|
|
"github.com/goraz/onion"
|
|
"github.com/rs/zerolog"
|
|
)
|
|
|
|
// Command-line args:
|
|
// - url: Loki URL (default: "http://127.0.0.1:3100")
|
|
// - execution: Workflow Execution ID (required) to identify the current execution, allows to retrieve Workflow
|
|
// - mongo: MongoDB URL (default: "mongodb://127.0.0.1:27017")
|
|
// - db: MongoDB database name (default: "DC_myDC")
|
|
// - timeout: Execution timeout (default: -1)
|
|
|
|
var logger zerolog.Logger
|
|
var wf_logger zerolog.Logger
|
|
var parser argparse.Parser
|
|
var workflowName string
|
|
|
|
const defaultConfigFile = "/etc/oc/ocmonitord_conf.json"
|
|
const localConfigFile = "./conf/local_ocmonitord_conf.json"
|
|
|
|
func main() {
|
|
|
|
os.Setenv("test_service", "true") // Only for service demo, delete before merging on main
|
|
parser = *argparse.NewParser("oc-monitord", "Launch the execution of a workflow given as a parameter and sends the produced logs to a loki database")
|
|
loadConfig(false, &parser)
|
|
oclib.InitDaemon("oc-monitord")
|
|
|
|
oclib.SetConfig(
|
|
conf.GetConfig().MongoURL,
|
|
conf.GetConfig().Database,
|
|
conf.GetConfig().NatsURL,
|
|
conf.GetConfig().LokiURL,
|
|
conf.GetConfig().Logs,
|
|
)
|
|
|
|
logger = logs.CreateLogger("oc-monitord")
|
|
|
|
logger.Debug().Msg("Loki URL : " + conf.GetConfig().LokiURL)
|
|
logger.Debug().Msg("Workflow executed : " + conf.GetConfig().ExecutionID)
|
|
exec := u.GetExecution(conf.GetConfig().ExecutionID)
|
|
conf.GetConfig().WorkflowID = exec.WorkflowID
|
|
|
|
logger.Debug().Msg("Starting construction of yaml argo for workflow :" + exec.WorkflowID)
|
|
|
|
if _, err := os.Stat("./argo_workflows/"); os.IsNotExist(err) {
|
|
os.Mkdir("./argo_workflows/", 0755)
|
|
logger.Info().Msg("Created argo_workflows/")
|
|
}
|
|
|
|
// // create argo
|
|
new_wf := workflow_builder.WorflowDB{}
|
|
|
|
err := new_wf.LoadFrom(conf.GetConfig().WorkflowID, conf.GetConfig().PeerID)
|
|
if err != nil {
|
|
logger.Error().Msg("Could not retrieve workflow " + conf.GetConfig().WorkflowID + " from oc-catalog API")
|
|
}
|
|
|
|
argo_file_path, stepMax, err := new_wf.ExportToArgo(exec.ExecutionsID, conf.GetConfig().Timeout)
|
|
if err != nil {
|
|
logger.Error().Msg("Could not create the Argo file for " + conf.GetConfig().WorkflowID)
|
|
logger.Error().Msg(err.Error())
|
|
}
|
|
logger.Debug().Msg("Created :" + argo_file_path)
|
|
|
|
workflowName = getContainerName(argo_file_path)
|
|
|
|
wf_logger = logger.With().Str("argo_name", workflowName).Str("workflow_id", conf.GetConfig().WorkflowID).Str("workflow_execution_id", conf.GetConfig().ExecutionID).Logger()
|
|
wf_logger.Debug().Msg("Testing argo name")
|
|
|
|
if conf.GetConfig().KubeHost == "" {
|
|
// Not in a k8s environment, get conf from parameters
|
|
fmt.Println("Executes outside of k8s")
|
|
executeOutside(argo_file_path, stepMax)
|
|
} else {
|
|
// Executed in a k8s environment
|
|
fmt.Println("Executes inside a k8s")
|
|
executeInside(exec.GetID(), "argo", argo_file_path, stepMax)
|
|
}
|
|
}
|
|
|
|
// So far we only log the output from
|
|
func executeInside(execID string, ns string, argo_file_path string, stepMax int) {
|
|
t, err := tools2.NewService(conf.GetConfig().Mode)
|
|
if err != nil {
|
|
logger.Error().Msg("Could not create KubernetesTool")
|
|
}
|
|
name, err := t.CreateArgoWorkflow(argo_file_path, ns)
|
|
if err != nil {
|
|
logger.Error().Msg("Could not create argo workflow : " + err.Error())
|
|
} else {
|
|
split := strings.Split(argo_file_path, "_")
|
|
argoLogs := models.NewArgoLogs(split[0], "argo", stepMax)
|
|
argoLogs.StartStepRecording(argoLogs.NewWatch(), wf_logger)
|
|
err := t.LogWorkflow(execID, ns, name, argo_file_path, stepMax, argoLogs.NewWatch(), argoLogs.NewWatch(), argoLogs, []string{}, logWorkflow)
|
|
if err != nil {
|
|
logger.Error().Msg("Could not log workflow : " + err.Error())
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
func executeOutside(argo_file_path string, stepMax int) {
|
|
// var stdout, stderr, stdout_logs, stderr_logs io.ReadCloser
|
|
var stdout, stderr io.ReadCloser
|
|
// var stderr io.ReadCloser
|
|
var err error
|
|
cmd := exec.Command("argo", "submit", "--log", argo_file_path, "--serviceaccount=argo", "-n", "argo")
|
|
if stdout, err = cmd.StdoutPipe(); err != nil {
|
|
wf_logger.Error().Msg("Could not retrieve stdoutpipe " + err.Error())
|
|
return
|
|
}
|
|
if err := cmd.Start(); err != nil {
|
|
panic(err)
|
|
}
|
|
var wg sync.WaitGroup
|
|
split := strings.Split(argo_file_path, "_")
|
|
argoLogs := models.NewArgoLogs(split[0], "argo", stepMax)
|
|
argoLogs.StartStepRecording(argoLogs.NewWatch(), wf_logger)
|
|
argoLogs.IsStreaming = true
|
|
go logWorkflow(argo_file_path, stepMax, stdout, argoLogs.NewWatch(), argoLogs.NewWatch(), argoLogs, []string{}, &wg)
|
|
|
|
if err := cmd.Wait(); err != nil {
|
|
wf_logger.Error().Msg("Could not execute argo submit")
|
|
wf_logger.Error().Msg(err.Error() + bufio.NewScanner(stderr).Text())
|
|
updateStatus("fatal", "")
|
|
}
|
|
wg.Wait()
|
|
}
|
|
|
|
// We could improve this function by creating an object with the same attribute as the output
|
|
// and only send a new log if the current object has different values than the previous
|
|
func logWorkflow(argo_file_path string, stepMax int, pipe io.ReadCloser,
|
|
current_watch *models.ArgoWatch, previous_watch *models.ArgoWatch,
|
|
argoLogs *models.ArgoLogs, seen []string, wg *sync.WaitGroup) {
|
|
scanner := bufio.NewScanner(pipe)
|
|
count := 0
|
|
see := ""
|
|
seeit := 0
|
|
for scanner.Scan() {
|
|
log := scanner.Text()
|
|
if strings.Contains(log, "capturing logs") && count == 0 {
|
|
if !argoLogs.IsStreaming {
|
|
wg.Add(1)
|
|
}
|
|
seeit++
|
|
} else if count == 0 {
|
|
if argoLogs.IsStreaming {
|
|
continue
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
if count == 1 {
|
|
see = log
|
|
if slices.Contains(argoLogs.Seen, see) && !argoLogs.IsStreaming {
|
|
wg.Done()
|
|
seeit--
|
|
break
|
|
}
|
|
}
|
|
if !slices.Contains(current_watch.Logs, log) {
|
|
current_watch.Logs = append(current_watch.Logs, log)
|
|
}
|
|
count++
|
|
if strings.Contains(log, "sub-process exited") {
|
|
current_watch = argoLogs.StopStepRecording(current_watch)
|
|
argoLogs.Seen = append(argoLogs.Seen, see)
|
|
if checkStatus(current_watch, previous_watch, argoLogs) {
|
|
count = 0
|
|
if !argoLogs.IsStreaming {
|
|
wg.Done()
|
|
}
|
|
seeit--
|
|
}
|
|
jsonified, err := json.Marshal(current_watch)
|
|
if err != nil {
|
|
logger.Error().Msg("Could not create watch log")
|
|
}
|
|
if current_watch.Status == "Failed" {
|
|
wf_logger.Error().Msg(string(jsonified))
|
|
} else {
|
|
wf_logger.Info().Msg(string(jsonified))
|
|
}
|
|
previous_watch = current_watch
|
|
current_watch = &models.ArgoWatch{}
|
|
}
|
|
}
|
|
}
|
|
|
|
func loadConfig(is_k8s bool, parser *argparse.Parser) {
|
|
var o *onion.Onion
|
|
o = initOnion(o)
|
|
setConf(is_k8s, o, parser)
|
|
|
|
if !IsValidUUID(conf.GetConfig().ExecutionID) {
|
|
logger.Fatal().Msg("Provided ID is not an UUID")
|
|
}
|
|
}
|
|
|
|
func setConf(is_k8s bool, o *onion.Onion, parser *argparse.Parser) {
|
|
url := parser.String("u", "url", &argparse.Options{Required: true, Default: "http://127.0.0.1:3100", Help: "Url to the Loki database logs will be sent to"})
|
|
mode := parser.String("M", "mode", &argparse.Options{Required: false, Default: "", Help: "Mode of the execution"})
|
|
execution := parser.String("e", "execution", &argparse.Options{Required: true, Help: "Execution ID of the workflow to request from oc-catalog API"})
|
|
peer := parser.String("p", "peer", &argparse.Options{Required: false, Default: "", Help: "Peer ID of the workflow to request from oc-catalog API"})
|
|
mongo := parser.String("m", "mongo", &argparse.Options{Required: true, Default: "mongodb://127.0.0.1:27017", Help: "URL to reach the MongoDB"})
|
|
db := parser.String("d", "database", &argparse.Options{Required: true, Default: "DC_myDC", Help: "Name of the database to query in MongoDB"})
|
|
timeout := parser.Int("t", "timeout", &argparse.Options{Required: false, Default: -1, Help: "Timeout for the execution of the workflow"})
|
|
|
|
ca := parser.String("c", "ca", &argparse.Options{Required: false, Default: "", Help: "CA file for the Kubernetes cluster"})
|
|
cert := parser.String("C", "cert", &argparse.Options{Required: false, Default: "", Help: "Cert file for the Kubernetes cluster"})
|
|
data := parser.String("D", "data", &argparse.Options{Required: false, Default: "", Help: "Data file for the Kubernetes cluster"})
|
|
|
|
host := parser.String("H", "host", &argparse.Options{Required: false, Default: "", Help: "Host for the Kubernetes cluster"})
|
|
port := parser.String("P", "port", &argparse.Options{Required: false, Default: "6443", Help: "Port for the Kubernetes cluster"})
|
|
|
|
err := parser.Parse(os.Args)
|
|
if err != nil {
|
|
fmt.Println(parser.Usage(err))
|
|
os.Exit(1)
|
|
}
|
|
conf.GetConfig().Logs = "debug"
|
|
conf.GetConfig().LokiURL = *url
|
|
conf.GetConfig().MongoURL = *mongo
|
|
conf.GetConfig().Database = *db
|
|
conf.GetConfig().Timeout = *timeout
|
|
conf.GetConfig().Mode = *mode
|
|
conf.GetConfig().ExecutionID = *execution
|
|
conf.GetConfig().PeerID = *peer
|
|
|
|
conf.GetConfig().KubeHost = *host
|
|
conf.GetConfig().KubePort = *port
|
|
|
|
decoded, err := base64.StdEncoding.DecodeString(*ca)
|
|
if err == nil {
|
|
conf.GetConfig().KubeCA = string(decoded)
|
|
}
|
|
decoded, err = base64.StdEncoding.DecodeString(*cert)
|
|
if err == nil {
|
|
conf.GetConfig().KubeCert = string(decoded)
|
|
}
|
|
decoded, err = base64.StdEncoding.DecodeString(*data)
|
|
if err == nil {
|
|
conf.GetConfig().KubeData = string(decoded)
|
|
}
|
|
}
|
|
|
|
func initOnion(o *onion.Onion) *onion.Onion {
|
|
logger = logs.CreateLogger("oc-monitord")
|
|
configFile := ""
|
|
|
|
l3 := onion.NewEnvLayerPrefix("_", "OCMONITORD")
|
|
l2, err := onion.NewFileLayer(defaultConfigFile, nil)
|
|
if err == nil {
|
|
logger.Info().Msg("Config file found : " + defaultConfigFile)
|
|
configFile = defaultConfigFile
|
|
}
|
|
l1, err := onion.NewFileLayer(localConfigFile, nil)
|
|
if err == nil {
|
|
logger.Info().Msg("Local config file found " + localConfigFile + ", overriding default file")
|
|
configFile = localConfigFile
|
|
}
|
|
if configFile == "" {
|
|
logger.Info().Msg("No config file found, using env")
|
|
o = onion.New(l3)
|
|
} else if l1 == nil && l2 == nil {
|
|
o = onion.New(l1, l2, l3)
|
|
} else if l1 == nil {
|
|
o = onion.New(l2, l3)
|
|
} else if l2 == nil {
|
|
o = onion.New(l1, l3)
|
|
}
|
|
return o
|
|
}
|
|
|
|
func IsValidUUID(u string) bool {
|
|
_, err := uuid.Parse(u)
|
|
return err == nil
|
|
}
|
|
|
|
func getContainerName(argo_file string) string {
|
|
regex := "([a-zA-Z]+-[a-zA-Z]+)"
|
|
re := regexp.MustCompile(regex)
|
|
|
|
container_name := re.FindString(argo_file)
|
|
return container_name
|
|
}
|
|
|
|
// Uses the ArgoWatch object to update status of the workflow execution object
|
|
func checkStatus(current *models.ArgoWatch, previous *models.ArgoWatch, argoLogs *models.ArgoLogs) bool {
|
|
if previous == nil || current.Status != previous.Status || argoLogs.IsStreaming {
|
|
argoLogs.StepCount += 1
|
|
if len(current.Logs) > 0 {
|
|
newLogs := []string{}
|
|
for _, log := range current.Logs {
|
|
if !slices.Contains(argoLogs.Logs, log) {
|
|
newLogs = append(newLogs, log)
|
|
}
|
|
}
|
|
updateStatus(current.Status, strings.Join(newLogs, "\n"))
|
|
current.Logs = newLogs
|
|
argoLogs.Logs = append(argoLogs.Logs, newLogs...)
|
|
} else {
|
|
updateStatus(current.Status, "")
|
|
}
|
|
}
|
|
return previous == nil || current.Status != previous.Status || argoLogs.IsStreaming
|
|
}
|
|
|
|
func updateStatus(status string, log string) {
|
|
exec_id := conf.GetConfig().ExecutionID
|
|
|
|
wf_exec := &workflow_execution.WorkflowExecution{AbstractObject: utils.AbstractObject{UUID: conf.GetConfig().ExecutionID}}
|
|
wf_exec.ArgoStatusToState(status)
|
|
exec, _, err := workflow_execution.NewAccessor(&tools.APIRequest{
|
|
PeerID: conf.GetConfig().PeerID,
|
|
}).UpdateOne(wf_exec, exec_id)
|
|
if err != nil {
|
|
logger.Error().Msg("Could not update status for workflow execution " + exec_id + err.Error())
|
|
}
|
|
splitted := strings.Split(log, "-")
|
|
if len(splitted) > 1 {
|
|
we := exec.(*workflow_execution.WorkflowExecution)
|
|
itemID := splitted[len(splitted)-1] // TODO: in logs found item ID
|
|
caller := &tools.HTTPCaller{
|
|
URLS: map[tools.DataType]map[tools.METHOD]string{
|
|
tools.PEER: {
|
|
tools.POST: "/status/",
|
|
},
|
|
tools.BOOKING: {
|
|
tools.PUT: "http://localhost:8080/booking/:id",
|
|
},
|
|
},
|
|
}
|
|
if we.PeerBookByGraph != nil {
|
|
for peerID, val := range we.PeerBookByGraph {
|
|
if val[itemID] == nil {
|
|
continue
|
|
}
|
|
for _, log := range val[itemID] {
|
|
(&peer.Peer{}).LaunchPeerExecution(peerID, log, tools.BOOKING, tools.PUT, &booking.Booking{
|
|
State: we.State,
|
|
}, caller)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|