improved graph/workflow retrieval

This commit is contained in:
pb
2024-07-23 12:16:20 +02:00
parent 0856c90930
commit 559bac5eb9
5 changed files with 143 additions and 57 deletions

View File

@@ -0,0 +1,277 @@
// A class that translates the informations held in the graph object
// via its lists of components into an argo file, using the a list of
// link ID to build the dag
package workflow_builder
import (
"fmt"
"os"
"slices"
"strings"
"time"
. "oc-scheduler/models"
"github.com/beego/beego/v2/core/logs"
"github.com/nwtgck/go-fakelish"
"gopkg.in/yaml.v3"
)
type ArgoBuilder struct {
graph Graph
branches [][]string
Workflow Workflow
}
type Workflow struct {
ApiVersion string `yaml:"apiVersion"`
Kind string `yaml:"kind"`
Metadata struct {
GenerateName string `yaml:"generateName"`
} `yaml:"metadata"`
Spec Spec `yaml:"spec,omitempty"`
}
type Spec struct {
Entrypoint string `yaml:"entrypoint"`
Arguments []Parameter `yaml:"arguments,omitempty"`
Volumes []VolumeClaimTemplate `yaml:"volumeClaimTemplates,omitempty"`
Templates []Template `yaml:"templates"`
}
func (b *ArgoBuilder) CreateDAG() (string, error) {
fmt.Println("list of branches : ", b.branches)
b.createTemplates()
b.createDAGstep()
b.createVolumes()
b.Workflow.Spec.Entrypoint = "dag"
b.Workflow.ApiVersion = "argoproj.io/v1alpha1"
b.Workflow.Kind = "Workflow"
random_name := generateWfName()
b.Workflow.Metadata.GenerateName = "oc-test-" + random_name
yamlified, err := yaml.Marshal(b.Workflow)
if err != nil {
logs.Error("Could not transform object to yaml file")
return "", err
}
// Give a unique name to each argo file with its timestamp DD:MM:YYYY_hhmmss
current_timestamp := time.Now().Format("02_01_2006_150405")
file_name := random_name + "_" + current_timestamp + ".yml"
workflows_dir := "argo_workflows/"
err = os.WriteFile(workflows_dir + file_name , []byte(yamlified), 0660)
if err != nil {
logs.Error("Could not write the yaml file")
return "",err
}
return file_name, nil
}
func (b *ArgoBuilder) createTemplates() {
for _, comp := range b.graph.Computings{
image_name := strings.Split(comp.Command," ")[0] // TODO : decide where to store the image name, GUI or models.computing.Image
temp_container := Container{Image: image_name} // TODO : decide where to store the image name, GUI or models.computing.Image
temp_container.Command = getComputingCommands(comp.Command)
temp_container.Args = getComputingArgs(comp.Arguments,comp.Command)
input_names := getComputingEnvironmentName(comp.Environment)
var inputs_container []Parameter
for _, name := range input_names {
inputs_container = append(inputs_container, Parameter{Name: name})
}
argo_name := getArgoName(comp.Name,comp.ID)
new_temp := Template{Name: argo_name, Container: temp_container}
new_temp.Inputs.Parameters = inputs_container
new_temp.Container.VolumeMounts = append(new_temp.Container.VolumeMounts, VolumeMount{Name: "workdir",MountPath: "/mnt/vol"}) // TODO : replace this with a search of the storage / data source name
b.Workflow.Spec.Templates = append(b.Workflow.Spec.Templates, new_temp)
}
}
func (b *ArgoBuilder) createDAGstep() {
new_dag := Dag{}
for _, comp := range b.graph.Computings{
unique_name := getArgoName(comp.Name,comp.ID)
step := Task{Name: unique_name, Template: unique_name}
comp_envs := getComputingEnvironment(comp.Environment)
for name, value := range comp_envs {
step.Arguments.Parameters = append(step.Arguments.Parameters, Parameter{Name: name, Value: value})
}
// retrieves the name (computing.name-computing.ID)
step.Dependencies = b.getDependency(comp.ID)
new_dag.Tasks = append(new_dag.Tasks, step)
}
b.Workflow.Spec.Templates = append (b.Workflow.Spec.Templates, Template{Name: "dag", Dag: new_dag})
}
func (b *ArgoBuilder) createVolumes() {
// For testing purposes we only declare one volume, mounted in each computing
new_volume := VolumeClaimTemplate{}
new_volume.Metadata.Name = "workdir"
new_volume.Spec.AccessModes = []string{"ReadWriteOnce"}
new_volume.Spec.Resources.Requests.Storage = "1Gi"
b.Workflow.Spec.Volumes = append(b.Workflow.Spec.Volumes, new_volume)
}
func (b *ArgoBuilder) getDependency(current_computing_id string) (dependencies []string) {
var dependencies_id []string
for _, link := range b.graph.Links {
if current_computing_id == link.Destination && b.graph.getComponentType(link.Source) == "computing" && !slices.Contains(dependencies_id,link.Source) {
dependencies_id = append(dependencies_id, link.Source)
}
}
for _, dependency := range dependencies_id {
dependency_name := getArgoName(b.graph.getComponentName(dependency),dependency)
dependencies = append(dependencies, dependency_name)
}
return
}
// func (b *ArgoBuilder) componentInBranch(component_id string, branch []string) bool {
// for _, link := range branch {
// if b.graph.Links[link].Source == component_id || b.graph.Links[link].Destination == component_id {
// return true
// }
// }
// return false
// }
// func (b *ArgoBuilder) findPreviousComputing(computing_id string, branch []string, index int) string {
// for i := index; i >= 0 ; i-- {
// previousLink := b.graph.Links[branch[i]]
// if previousLink.Source != computing_id && b.graph.getComponentType(previousLink.Source) == "computing"{
// name := getArgoName(b.graph.getComponentName(previousLink.Source),previousLink.Source)
// return name
// }
// if previousLink.Destination != computing_id && b.graph.getComponentType(previousLink.Destination) == "computing"{
// name := getArgoName(b.graph.getComponentName(previousLink.Destination),previousLink.Destination)
// return name
// }
// }
// return ""
// }
func getComputingCommands(user_input string) (list_command []string) {
user_input = removeImageName(user_input)
if len(user_input) == 0 {
return
}
list_command = strings.Split(user_input, " ")
for i := range list_command {
list_command[i] = list_command[i]
}
return
}
func getComputingArgs(user_input []string, command string) (list_args []string) {
if len(user_input) == 0 {
return
}
// quickfix that might need improvement
if(strings.Contains(command,"sh -c")){
list_args = append(list_args, strings.Join(user_input," "))
return
}
for _, arg := range user_input{
list_args = append(list_args, arg)
}
return
}
// Currently implements code to overcome problems in data structure
func getComputingEnvironment(user_input []string) (map_env map[string]string) {
if len(user_input) == 0 {
return
}
if(len(user_input) == 1){
user_input = strings.Split(user_input[0],",")
}
map_env = make(map[string]string,0)
for _, str := range user_input {
new_pair := strings.Split(str,"=")
if(len(new_pair) != 2) {
logs.Error("Error extracting the environment variable from ", str)
panic(0)
}
map_env[new_pair[0]] = new_pair[1]
}
return
}
func getComputingEnvironmentName(user_input []string) (list_names []string){
env_map := getComputingEnvironment(user_input)
for name := range env_map {
list_names = append(list_names, name)
}
return
}
func generateWfName() (Name string){
Name = fakelish.GenerateFakeWord(5, 8) + "-" + fakelish.GenerateFakeWord(5, 8)
return
}
func getArgoName(raw_name string, component_id string) (formatedName string){
formatedName = strings.ReplaceAll(raw_name," ","-")
formatedName += "-" + component_id
formatedName = strings.ToLower(formatedName)
return
}
func printYAML(data interface{}) {
yamlData, err := yaml.Marshal(data)
if err != nil {
fmt.Printf("Error marshalling YAML: %v\n", err)
return
}
fmt.Println(string(yamlData))
}
func removeImageName(user_input string) string {
// First command is the name of the container for now
if len(strings.Split(user_input, " ")) == 1 {
return ""
}
slice_input := strings.Split(user_input, " ")
new_slice := slice_input[1:]
user_input = strings.Join(new_slice," ")
return user_input
}

358
workflow_builder/graph.go Normal file
View File

@@ -0,0 +1,358 @@
package workflow_builder
import (
"encoding/json"
"fmt"
"maps"
"oc-scheduler/conf"
"oc-scheduler/logger"
models "oc-scheduler/models"
catalog_models "cloud.o-forge.io/core/oc-catalog/models" // this will be replaced with oc-lib
"github.com/beego/beego/v2/core/logs"
"github.com/tidwall/gjson"
)
type Graph struct {
workflow_name string // used to test if the graph has been instatiated, private so can only be set by a graph's method
Datas []catalog_models.DataModel
Computings []catalog_models.ComputingModel
Datacenters []catalog_models.DatacenterModel
Storages []catalog_models.StorageModel
Links map[string]catalog_models.Link
ws models.HttpQuery
}
// Create a dictionnaries with each existing workflow from a workspace, associated to the JSON representation of its content
func (g *Graph) GetGraphList(apiurl string) (map[string]string, error) {
g.ws.Init(apiurl)
body, err := g.ws.Get("v1/workspace/list")
if err != nil {
return nil, err
}
workspaces := make(map[string]string)
result := gjson.Get(string(body), "Workflows")
result.ForEach(func(key, value gjson.Result) bool {
workspaces[key.Str] = value.String()
return true // keep iterating
})
return workspaces, nil
}
// Should the parameter be removed, since we have oc-catalog url in the conf ?
func (g *Graph) GetGraph(apiurl string, workflow string) (string, error) {
g.ws.Init(apiurl)
body, err := g.ws.Get("v1/workflow/" + workflow)
if err != nil {
return "", err
}
graph := string(body)
// result := gjson.Get(string(body), "Workflows")
// result.ForEach(func(key, value gjson.Result) bool {
// workspaces[key.Str] = value.String()
// return true // keep iterating
// })
return graph, nil
}
// Create the objects from the mxgraphxml stored in the workflow given as a parameter
func (g *Graph) LoadFrom(workflow_name string) error {
// Extract the xmlgraph from the given workspace
graph, err := g.GetGraph(conf.GetConfig().OcCatalogUrl,workflow_name)
if err != nil {
return err
}
// os.WriteFile("graph.xml", []byte(decodedValue), 0660)
g.GetWorkflowComponents(graph)
g.GetLinks(graph)
g.workflow_name = workflow_name
return nil
}
// Create the objects that correspond to each component
// in a workflow, combining the user input and the base components attributes
func (g *Graph) GetWorkflowComponents(workflow string){
types := []string{"computing","datacenter","data","storage"} // create a constant for more maintainability OR even better get the list of all component's type for this WF
for _, component_type := range types {
// Retrieve the dict of component for a specific type in the workflow
result := gjson.Get(workflow, component_type)
if (result.Type != gjson.Null) {
result.ForEach(func(id, value gjson.Result) bool{
comp_id := value.Get("referenceID").Str
if (comp_id != "") {
switch component_type {
case "computing":
g.AddComputingModel(comp_id, value, id.Str)
case "data":
g.AddDataModel(comp_id, value, id.Str)
case "datacenter":
g.AddDatacenterModel(comp_id, value, id.Str)
case "storage":
g.AddStorageModel(comp_id, value, id.Str)
default :
logs.Critical("Component type doesn't match a know type : " + component_type)
}
}
return true
})
}
}
}
func (g *Graph) GetLinks(workflow string){
g.Links = make(map[string]catalog_models.Link)
result := gjson.Get(workflow, "link")
if (result.Type != gjson.Null) {
result.ForEach(func(id, value gjson.Result) bool{
var l catalog_models.Link
json.Unmarshal([]byte(value.Raw),&l)
g.Links[id.Str] = l
return true
})
}
}
func (g *Graph) AddDataModel(id string, user_input gjson.Result, wf_id string) error {
var d catalog_models.DataModel
resp, err := g.ws.Get("v1/data/" + id)
if err != nil {
return err
}
json.Unmarshal(resp, &d)
json.Unmarshal([]byte(user_input.Raw),&d.DataNEWModel)
d.ID = wf_id
g.Datas = append(g.Datas, d)
return nil
}
func (g *Graph) AddDatacenterModel(id string, user_input gjson.Result, wf_id string) error {
var d catalog_models.DatacenterModel
resp, err := g.ws.Get("v1/datacenter/" + id)
if err != nil {
return err
}
json.Unmarshal(resp, &d)
json.Unmarshal([]byte(user_input.Raw),&d.DatacenterNEWModel)
d.ID = wf_id
g.Datacenters = append(g.Datacenters, d)
return nil
}
func (g *Graph) AddComputingModel(id string, user_input gjson.Result, wf_id string) error {
var c catalog_models.ComputingModel
resp, err := g.ws.Get("v1/computing/" + id)
if err != nil {
return err
}
json.Unmarshal(resp, &c)
json.Unmarshal([]byte(user_input.Raw),&c.ComputingNEWModel)
c.ID = wf_id
g.Computings = append(g.Computings, c)
return nil
}
func (g *Graph) AddStorageModel(id string, user_input gjson.Result, wf_id string) error {
var s catalog_models.StorageModel
resp, err := g.ws.Get("v1/storage/" + id)
if err != nil {
return err
}
json.Unmarshal(resp, &s)
json.Unmarshal([]byte(user_input.Raw),&s.StorageNEWModel)
s.ID = wf_id
g.Storages = append(g.Storages, s)
return nil
}
func (g *Graph) ExportToArgo() (string, error) {
if len(g.workflow_name) == 0 {
return "",fmt.Errorf("can't export a graph that has not been loaded yet")
}
end_links := make(map[string]catalog_models.Link)
for i, link := range g.Links {
if (!link.DCLink && !g.isSource(link.Destination,i)){
end_links[i] = link
}
}
// index_list := make([]int, len(g.Links))
// list_branches := make([][]string,0)
list_branches := g.getListBranches(end_links, nil,nil)
for _, branch := range list_branches{
str := ""
for _, link := range branch{
str = str + " --> " + g.getComponentName(g.Links[link].Source) + " linked with " + g.getComponentName(g.Links[link].Destination)
}
fmt.Println(str)
}
fmt.Println("Identified branches : ", list_branches)
argo_builder := ArgoBuilder{graph : *g, branches: list_branches}
filename, err := argo_builder.CreateDAG()
if err != nil {
logger.Logger.Error().Msg("Could not create the argo file for " + g.workflow_name)
return "", err
}
return filename, nil
}
// Return a list containing the IDs of each link that make up a branch in the graph
func (g *Graph) getListBranches(end_links map[string]catalog_models.Link, unvisited_links_list map[string]catalog_models.Link, current_branch []string) (list_branches [][]string) {
if current_branch == nil {
current_branch = make([]string, 0)
}
if unvisited_links_list == nil {
unvisited_links_list = make(map[string]catalog_models.Link,len(g.Links))
maps.Copy(unvisited_links_list,g.Links)
fmt.Println(unvisited_links_list)
}
for link_id, _ := range end_links {
j := link_id
new_branches := make([][]string,0)
previous_index := g.getPreviousLink(j, unvisited_links_list)
if len(previous_index) == 0 {
list_branches = append(list_branches, []string{link_id})
}
for _, id_link := range previous_index {
current_branch = append([]string{link_id},current_branch...)
delete(unvisited_links_list, link_id)
// create a new branch for each previous link, appending the current path to this node to the created branch
new_end_link := make(map[string]catalog_models.Link,0)
new_end_link[id_link] = g.Links[id_link]
new_branches = g.getListBranches(new_end_link,unvisited_links_list,current_branch)
for _, new_branch := range new_branches{
current_branch = append(new_branch,link_id)
list_branches = append(list_branches, current_branch)
}
}
}
return
}
func (g *Graph) ExportToHelm(id string) error {
return nil
}
// Return if it exists a link where Destination is the same as comp_id
func (g *Graph) isDestination(comp_id string,link_id string) bool {
for i, link := range g.Links{
if(i !=link_id && link.Destination == comp_id){
return true
}
}
return false
}
// Return if it exists a link where Source is the same as comp_id
func (g *Graph) isSource(comp_id string,link_id string) bool {
for i, link := range g.Links{
if(i !=link_id && link.Source == comp_id && !link.DCLink){
return true
}
}
return false
}
// Returns an index number if their is a link in g.Links
// with the same Destination id that the Source id in g.Links[linkIndex]
// or nil if not
func (g *Graph) getPreviousLink(link_id string,map_link map[string]catalog_models.Link) (previous_id []string) {
for k, link := range map_link{
if(k != link_id && link.Destination == g.Links[link_id].Source){
previous_id = append(previous_id, k)
}
}
return
}
func (g *Graph) getComponentName(id string) string {
for _, comp := range g.Computings{
if comp.ID == id {
return comp.Name
}
}
for _, storage := range g.Storages{
if storage.ID == id {
return storage.Name
}
}
for _, data := range g.Datas{
if data.ID == id {
return data.Name
}
}
return ""
}
// returns either computing, data or storage
func (g *Graph) getComponentType(component_id string) string {
for _, comp := range g.Computings {
if comp.ID == component_id{
return "computing"
}
}
for _, data := range g.Datas {
if data.ID == component_id{
return "data"
}
}
for _, storage := range g.Storages {
if storage.ID == component_id{
return "storage"
}
}
return ""
}
// Returns a slice of id, in case the link is made of twice the same type of component
func (g *Graph) getComponentByType(compType string, link catalog_models.Link) (ids []string){
if(g.getComponentType(link.Source) == compType){
ids = append(ids, link.Source)
}
if(g.getComponentType(link.Destination) == compType){
ids = append(ids, link.Destination)
}
return
}