Execution workflow execute change
This commit is contained in:
@@ -11,6 +11,8 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
"cloud.o-forge.io/core/oc-lib/models/common/enum"
|
||||
"github.com/rs/zerolog"
|
||||
"k8s.io/apimachinery/pkg/watch"
|
||||
|
||||
@@ -93,7 +95,6 @@ func (a *ArgoLogs) StartStepRecording(current_watch *ArgoWatch, logger zerolog.L
|
||||
a.Started = time.Now()
|
||||
}
|
||||
|
||||
|
||||
type ArgoPodLog struct {
|
||||
PodName string
|
||||
Step string
|
||||
@@ -108,7 +109,7 @@ func NewArgoPodLog(name string, step string, msg string) ArgoPodLog {
|
||||
}
|
||||
}
|
||||
|
||||
func LogKubernetesArgo(wfName string, namespace string, watcher watch.Interface) {
|
||||
func LogKubernetesArgo(wfName string, execID string, namespace string, watcher watch.Interface) {
|
||||
var argoWatcher *ArgoWatch
|
||||
var pods []string
|
||||
var node wfv1.NodeStatus
|
||||
@@ -118,7 +119,7 @@ func LogKubernetesArgo(wfName string, namespace string, watcher watch.Interface)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
|
||||
for event := range (watcher.ResultChan()) {
|
||||
for event := range watcher.ResultChan() {
|
||||
wf, ok := event.Object.(*wfv1.Workflow)
|
||||
if !ok {
|
||||
wfl.Error().Msg("unexpected type")
|
||||
@@ -167,7 +168,10 @@ func LogKubernetesArgo(wfName string, namespace string, watcher watch.Interface)
|
||||
for _, pod := range wf.Status.Nodes {
|
||||
if !slices.Contains(pods, pod.Name) {
|
||||
pl := wfl.With().Str("pod", pod.Name).Logger()
|
||||
if wfName == pod.Name { pods = append(pods, pod.Name); continue } // One of the node is the Workflow, the others are the pods so don't try to log on the wf name
|
||||
if wfName == pod.Name {
|
||||
pods = append(pods, pod.Name)
|
||||
continue
|
||||
} // One of the node is the Workflow, the others are the pods so don't try to log on the wf name
|
||||
pl.Info().Msg("Found a new pod to log : " + pod.Name)
|
||||
wg.Add(1)
|
||||
go logKubernetesPods(namespace, wfName, pod.Name, pl, &wg)
|
||||
@@ -180,10 +184,16 @@ func LogKubernetesArgo(wfName string, namespace string, watcher watch.Interface)
|
||||
wfl.Info().Msg(wfName + " worflow completed")
|
||||
wg.Wait()
|
||||
wfl.Info().Msg(wfName + " exiting")
|
||||
oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.WORKFLOW_EXECUTION), nil).UpdateOne(map[string]interface{}{
|
||||
"state": enum.SUCCESS.EnumIndex(),
|
||||
}, execID)
|
||||
break
|
||||
}
|
||||
if node.Phase.FailedOrError() {
|
||||
wfl.Error().Msg(wfName + "has failed, please refer to the logs")
|
||||
oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.WORKFLOW_EXECUTION), nil).UpdateOne(map[string]interface{}{
|
||||
"state": enum.FAILURE.EnumIndex(),
|
||||
}, execID)
|
||||
wfl.Error().Msg(node.Message)
|
||||
break
|
||||
}
|
||||
|
||||
97
main.go
97
main.go
@@ -1,16 +1,11 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"oc-monitord/conf"
|
||||
l "oc-monitord/logger"
|
||||
@@ -21,6 +16,7 @@ import (
|
||||
|
||||
"cloud.o-forge.io/core/oc-lib/logs"
|
||||
"cloud.o-forge.io/core/oc-lib/models/booking"
|
||||
"cloud.o-forge.io/core/oc-lib/models/common/enum"
|
||||
"cloud.o-forge.io/core/oc-lib/models/peer"
|
||||
"cloud.o-forge.io/core/oc-lib/models/utils"
|
||||
"cloud.o-forge.io/core/oc-lib/models/workflow_execution"
|
||||
@@ -53,7 +49,7 @@ func main() {
|
||||
|
||||
os.Setenv("test_service", "true") // Only for service demo, delete before merging on main
|
||||
parser = *argparse.NewParser("oc-monitord", "Launch the execution of a workflow given as a parameter and sends the produced logs to a loki database")
|
||||
loadConfig(false, &parser)
|
||||
loadConfig(&parser)
|
||||
oclib.InitDaemon("oc-monitord")
|
||||
|
||||
logger = u.GetLogger()
|
||||
@@ -63,6 +59,10 @@ func main() {
|
||||
exec := u.GetExecution(conf.GetConfig().ExecutionID)
|
||||
if exec == nil {
|
||||
logger.Fatal().Msg("Could not retrieve workflow ID from execution ID " + conf.GetConfig().ExecutionID + " on peer " + conf.GetConfig().PeerID)
|
||||
oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.WORKFLOW_EXECUTION), nil).UpdateOne(map[string]interface{}{
|
||||
"state": enum.FAILURE.EnumIndex(),
|
||||
}, conf.GetConfig().ExecutionID)
|
||||
return
|
||||
}
|
||||
conf.GetConfig().WorkflowID = exec.WorkflowID
|
||||
|
||||
@@ -85,29 +85,36 @@ func main() {
|
||||
if err != nil {
|
||||
logger.Error().Msg("Could not create the Argo file for " + conf.GetConfig().WorkflowID)
|
||||
logger.Error().Msg(err.Error())
|
||||
oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.WORKFLOW_EXECUTION), nil).UpdateOne(map[string]interface{}{
|
||||
"state": enum.FAILURE.EnumIndex(),
|
||||
}, exec.GetID())
|
||||
return
|
||||
}
|
||||
|
||||
argoFilePath, err := builder.CompleteBuild(exec.ExecutionsID)
|
||||
if err != nil {
|
||||
logger.Error().Msg("Error when completing the build of the workflow: " + err.Error())
|
||||
oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.WORKFLOW_EXECUTION), nil).UpdateOne(map[string]interface{}{
|
||||
"state": enum.FAILURE.EnumIndex(),
|
||||
}, exec.GetID())
|
||||
return
|
||||
}
|
||||
|
||||
workflowName = getContainerName(argoFilePath)
|
||||
|
||||
if conf.GetConfig().KubeHost == "" {
|
||||
// Not in a k8s environment, get conf from parameters
|
||||
logger.Info().Msg("Executes outside of k8s")
|
||||
executeOutside(argoFilePath, builder.Workflow)
|
||||
panic("can't exec with no kube for argo deployment")
|
||||
} else {
|
||||
// Executed in a k8s environment
|
||||
logger.Info().Msg("Executes inside a k8s")
|
||||
// executeInside(exec.GetID(), "argo", argo_file_path, stepMax) // commenting to use conf.ExecutionID instead of exec.GetID()
|
||||
executeInside(exec.ExecutionsID, argoFilePath)
|
||||
executeInside(exec.ExecutionsID, exec.GetID(), argoFilePath)
|
||||
}
|
||||
}
|
||||
|
||||
// So far we only log the output from
|
||||
func executeInside(ns string, argo_file_path string) {
|
||||
func executeInside(ns string, execID string, argo_file_path string) {
|
||||
t, err := tools2.NewService(conf.GetConfig().Mode)
|
||||
if err != nil {
|
||||
logger.Error().Msg("Could not create KubernetesTool")
|
||||
@@ -126,84 +133,28 @@ func executeInside(ns string, argo_file_path string) {
|
||||
watcher, err := t.GetArgoWatch(ns, workflowName)
|
||||
if err != nil {
|
||||
logger.Error().Msg("Could not retrieve Watcher : " + err.Error())
|
||||
oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.WORKFLOW_EXECUTION), nil).UpdateOne(map[string]interface{}{
|
||||
"state": enum.FAILURE.EnumIndex(),
|
||||
}, execID)
|
||||
}
|
||||
|
||||
l.LogKubernetesArgo(name, ns, watcher)
|
||||
if err != nil {
|
||||
logger.Error().Msg("Could not log workflow : " + err.Error())
|
||||
}
|
||||
|
||||
l.LogKubernetesArgo(name, execID, ns, watcher)
|
||||
logger.Info().Msg("Finished, exiting...")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func executeOutside(argo_file_path string, workflow workflow_builder.Workflow) {
|
||||
var stdoutSubmit, stderrSubmit io.ReadCloser
|
||||
var stdoutLogs, stderrLogs io.ReadCloser
|
||||
var wg sync.WaitGroup
|
||||
var err error
|
||||
|
||||
logger.Debug().Msg("executing :" + "argo submit --watch " + argo_file_path + " --serviceaccount sa-" + conf.GetConfig().ExecutionID + " -n " + conf.GetConfig().ExecutionID)
|
||||
|
||||
cmdSubmit := exec.Command("argo", "submit", "--watch", argo_file_path, "--serviceaccount", "sa-"+conf.GetConfig().ExecutionID, "-n", conf.GetConfig().ExecutionID)
|
||||
if stdoutSubmit, err = cmdSubmit.StdoutPipe(); err != nil {
|
||||
wf_logger.Error().Msg("Could not retrieve stdoutpipe " + err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
cmdLogs := exec.Command("argo", "logs", "oc-monitor-"+workflowName, "-n", conf.GetConfig().ExecutionID, "--follow", "--no-color")
|
||||
if stdoutLogs, err = cmdLogs.StdoutPipe(); err != nil {
|
||||
wf_logger.Error().Msg("Could not retrieve stdoutpipe for 'argo logs'" + err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
var steps []string
|
||||
for _, template := range workflow.Spec.Templates {
|
||||
steps = append(steps, template.Name)
|
||||
}
|
||||
|
||||
go l.LogLocalWorkflow(workflowName, stdoutSubmit, &wg)
|
||||
go l.LogLocalPod(workflowName, stdoutLogs, steps, &wg)
|
||||
|
||||
logger.Info().Msg("Starting argo submit")
|
||||
if err := cmdSubmit.Start(); err != nil {
|
||||
wf_logger.Error().Msg("Could not start argo submit")
|
||||
wf_logger.Error().Msg(err.Error() + bufio.NewScanner(stderrSubmit).Text())
|
||||
updateStatus("fatal", "")
|
||||
}
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
|
||||
logger.Info().Msg("Running argo logs")
|
||||
if err := cmdLogs.Run(); err != nil {
|
||||
wf_logger.Error().Msg("Could not run '" + strings.Join(cmdLogs.Args, " ") + "'")
|
||||
|
||||
wf_logger.Fatal().Msg(err.Error() + bufio.NewScanner(stderrLogs).Text())
|
||||
|
||||
}
|
||||
|
||||
logger.Info().Msg("Waiting argo submit")
|
||||
if err := cmdSubmit.Wait(); err != nil {
|
||||
wf_logger.Error().Msg("Could not execute argo submit")
|
||||
wf_logger.Error().Msg(err.Error() + bufio.NewScanner(stderrSubmit).Text())
|
||||
updateStatus("fatal", "")
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func loadConfig(is_k8s bool, parser *argparse.Parser) {
|
||||
func loadConfig(parser *argparse.Parser) {
|
||||
var o *onion.Onion
|
||||
o = initOnion(o)
|
||||
setConf(is_k8s, o, parser)
|
||||
setConf(parser)
|
||||
|
||||
// if !IsValidUUID(conf.GetConfig().ExecutionID) {
|
||||
// logger.Fatal().Msg("Provided ID is not an UUID")
|
||||
// }
|
||||
}
|
||||
|
||||
func setConf(is_k8s bool, o *onion.Onion, parser *argparse.Parser) {
|
||||
func setConf(parser *argparse.Parser) {
|
||||
url := parser.String("u", "url", &argparse.Options{Required: true, Default: "http://127.0.0.1:3100", Help: "Url to the Loki database logs will be sent to"})
|
||||
mode := parser.String("M", "mode", &argparse.Options{Required: false, Default: "", Help: "Mode of the execution"})
|
||||
execution := parser.String("e", "execution", &argparse.Options{Required: true, Help: "Execution ID of the workflow to request from oc-catalog API"})
|
||||
|
||||
Reference in New Issue
Block a user