Prepull for pod & Asym Jobs
This commit is contained in:
@@ -21,6 +21,7 @@ var _service = map[string]func() (Tool, error){
|
||||
}
|
||||
|
||||
func NewService(name string) (Tool, error) {
|
||||
return NewKubernetesTool()
|
||||
service, ok := _service[name]
|
||||
if !ok {
|
||||
return nil, errors.New("service not found")
|
||||
|
||||
@@ -8,7 +8,6 @@ import (
|
||||
"oc-monitord/conf"
|
||||
"oc-monitord/utils"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
wfv1 "github.com/argoproj/argo-workflows/v3/pkg/apis/workflow/v1alpha1"
|
||||
"github.com/argoproj/argo-workflows/v3/pkg/client/clientset/versioned"
|
||||
@@ -76,7 +75,7 @@ func (k *KubernetesTools) CreateArgoWorkflow(path string, ns string) (string, er
|
||||
if !ok {
|
||||
return "", errors.New("decoded object is not a Workflow")
|
||||
}
|
||||
|
||||
fmt.Println("NAMESPACE", ns)
|
||||
// Create the workflow in the "argo" namespace
|
||||
createdWf, err := k.VersionedSet.ArgoprojV1alpha1().Workflows(ns).Create(context.TODO(), workflow, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
@@ -96,7 +95,7 @@ func (k *KubernetesTools) CreateAccessSecret(access string, password string, sto
|
||||
}
|
||||
|
||||
// Define the Secret object
|
||||
name := storageId+"-secret-s3"
|
||||
name := storageId + "-secret-s3"
|
||||
secret := &v1.Secret{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
@@ -116,9 +115,9 @@ func (k *KubernetesTools) CreateAccessSecret(access string, password string, sto
|
||||
|
||||
func (k *KubernetesTools) GetS3Secret(storageId string, namespace string) *v1.Secret {
|
||||
|
||||
secret, err := k.Set.CoreV1().Secrets(namespace).Get(context.TODO(), storageId + "-secret-s3", metav1.GetOptions{})
|
||||
secret, err := k.Set.CoreV1().Secrets(namespace).Get(context.TODO(), storageId+"-secret-s3", metav1.GetOptions{})
|
||||
// Get(context.TODO(),storageId + "-artifact-server", metav1.GetOptions{})
|
||||
|
||||
|
||||
if err != nil && !k8serrors.IsNotFound(err) {
|
||||
l := utils.GetLogger()
|
||||
l.Fatal().Msg("An error happened when retrieving secret in " + namespace + " : " + err.Error())
|
||||
@@ -128,77 +127,83 @@ func (k *KubernetesTools) GetS3Secret(storageId string, namespace string) *v1.Se
|
||||
}
|
||||
|
||||
return secret
|
||||
// return secret
|
||||
// return secret
|
||||
}
|
||||
|
||||
|
||||
func (k *KubernetesTools) GetArgoWatch(executionId string, wfName string) (watch.Interface, error){
|
||||
options := metav1.ListOptions{FieldSelector: "metadata.name=oc-monitor-"+wfName}
|
||||
func (k *KubernetesTools) GetArgoWatch(executionId string, wfName string) (watch.Interface, error) {
|
||||
options := metav1.ListOptions{FieldSelector: "metadata.name=oc-monitor-" + wfName}
|
||||
|
||||
watcher, err := k.VersionedSet.ArgoprojV1alpha1().Workflows(executionId).Watch(context.Background(), options)
|
||||
if err != nil {
|
||||
return nil, errors.New("Error executing 'argo watch " + wfName + " -n " + executionId + " with ArgoprojV1alpha1 client")
|
||||
}
|
||||
|
||||
return watcher, nil
|
||||
return watcher, nil
|
||||
|
||||
}
|
||||
|
||||
func (k *KubernetesTools) GetPodLogger(ns string, wfName string, nodeName string) (io.ReadCloser, error) {
|
||||
var targetPod v1.Pod
|
||||
|
||||
|
||||
pods, err := k.Set.CoreV1().Pods(ns).List(context.Background(), metav1.ListOptions{
|
||||
LabelSelector: "workflows.argoproj.io/workflow="+wfName,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to list pods: " + err.Error())
|
||||
}
|
||||
if len(pods.Items) == 0 {
|
||||
|
||||
return nil, fmt.Errorf("no pods found with label workflows.argoproj.io/workflow="+ wfName + " no pods found with label workflows.argoproj.io/node-name=" + nodeName + " in namespace " + ns)
|
||||
}
|
||||
|
||||
for _, pod := range pods.Items {
|
||||
LabelSelector: "workflows.argoproj.io/workflow=" + wfName,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%s", "failed to list pods: "+err.Error())
|
||||
}
|
||||
if len(pods.Items) == 0 {
|
||||
|
||||
return nil, fmt.Errorf("%s", "no pods found with label workflows.argoproj.io/workflow="+wfName+" no pods found with label workflows.argoproj.io/node-name="+nodeName+" in namespace "+ns)
|
||||
}
|
||||
|
||||
for _, pod := range pods.Items {
|
||||
if pod.Annotations["workflows.argoproj.io/node-name"] == nodeName {
|
||||
targetPod = pod
|
||||
}
|
||||
}
|
||||
|
||||
if targetPod.Name == "" {
|
||||
return nil, fmt.Errorf("no pod found matching node-name %s in namespace %s", nodeName, ns)
|
||||
}
|
||||
|
||||
// k8s API throws an error if we try getting logs while the container are not initialized, so we repeat status check there
|
||||
k.testPodReady(targetPod, ns)
|
||||
|
||||
|
||||
// When using kubec logs for a pod we see it contacts /api/v1/namespaces/NAMESPACE/pods/oc-monitor-PODNAME/log?container=main so we add this container: main to the call
|
||||
req, err := k.Set.CoreV1().Pods(ns).GetLogs(targetPod.Name, &v1.PodLogOptions{Follow: true, Container: "main"}). Stream(context.Background())
|
||||
req, err := k.Set.CoreV1().Pods(ns).GetLogs(targetPod.Name, &v1.PodLogOptions{Follow: true, Container: "main"}).Stream(context.Background())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf(" Error when trying to get logs for " + targetPod.Name + " : " + err.Error())
|
||||
return nil, fmt.Errorf("%s", " Error when trying to get logs for "+targetPod.Name+" : "+err.Error())
|
||||
}
|
||||
|
||||
return req, nil
|
||||
}
|
||||
|
||||
func (k *KubernetesTools) testPodReady(pod v1.Pod, ns string) {
|
||||
for {
|
||||
pod, err := k.Set.CoreV1().Pods(ns).Get(context.Background(), pod.Name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
wfl := utils.GetWFLogger("")
|
||||
wfl.Error().Msg("Error fetching pod: " + err.Error() + "\n")
|
||||
break
|
||||
wfl := utils.GetWFLogger("")
|
||||
|
||||
watcher, err := k.Set.CoreV1().Pods(ns).Watch(context.Background(), metav1.ListOptions{
|
||||
FieldSelector: "metadata.name=" + pod.Name,
|
||||
ResourceVersion: pod.ResourceVersion,
|
||||
})
|
||||
if err != nil {
|
||||
wfl.Error().Msg("Error watching pod: " + err.Error() + "\n")
|
||||
return
|
||||
}
|
||||
defer watcher.Stop()
|
||||
|
||||
for event := range watcher.ResultChan() {
|
||||
p, ok := event.Object.(*v1.Pod)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
var initialized bool
|
||||
for _, cond := range pod.Status.Conditions {
|
||||
// It seems that for remote pods the pod gets the Succeeded status before it has time to display the it is ready to run in .status.conditions,so we added the OR condition
|
||||
if (cond.Type == v1.PodReady && cond.Status == v1.ConditionTrue) || pod.Status.Phase == v1.PodSucceeded {
|
||||
initialized = true
|
||||
// It seems that for remote pods the pod gets the Succeeded status before it has time to display the it is ready to run in .status.conditions,so we added the OR condition
|
||||
if p.Status.Phase == v1.PodSucceeded {
|
||||
return
|
||||
}
|
||||
for _, cond := range p.Status.Conditions {
|
||||
if cond.Type == v1.PodReady && cond.Status == v1.ConditionTrue {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
if initialized {
|
||||
return
|
||||
}
|
||||
|
||||
time.Sleep(2 * time.Second) // avoid hammering the API
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user