Merge branch 'argo_workflow'

This commit is contained in:
pb 2024-04-16 18:54:05 +02:00
commit 9bd5be7758
10 changed files with 368 additions and 112 deletions

View File

@ -0,0 +1,47 @@
@startuml class_links
class LinkGraph {
+ []Link links
+ void AddLinkToGraph(Link link)
+ boolean,str HasPrevious(Link)
+ boolean,str HasNext(Link)
}
class Link {
+ str source
+ str destination
+ boolean DCLink
+ *Link NewLink(interface{} src, interface{} dst)
+ void AddLinkToDataCenter()
}
note left of LinkGraph::HasPrevious
checks if the component whose ID is in src is the dst of
any other Link of the list on Link
end note
note top of Link
Links need to be redefined in the sense that they are currently used
both :
- to connect a component to a DC
- to represent the interractions between components
end note
note right of Link::DCLink
set at construction of the object and used to order the links
end note
note left of Link::NewLink
Must test if the parameters check the type constraints
and raise errors for the GUI if necessary
end note
LinkGraph*--"links"Link
@enduml

View File

@ -2,11 +2,12 @@
- [ ] In most of the components from 'models/' we have a method to add input and output to the model, however this linking of components is already done in oc-schedule when parsing the MxGraph. We need to determine if adding relations between components inside the objects themself is necessary.
- When running in debug mode with a breakpoint inside the first line of computing.addLink it is only called once
- [ ]
## MxGraph
- [ ] The ConsumeMxGraphModel is way too long, it should refactored and broken down in different sub methods
- mxcell are put inside an <object> tag when the settings have been opened, wether values have been set or not. Maybe we could find a way to make mxgraph add these whenever we add a component to the graph.
- then identify the links only
- [ ] It is unclear what are the inputs and the ouputs. It seems like they were implemented to link two components, but it seems redundant with the identification of links
- [ ] It is unclear what are the inputs and the ouputs. It seems like they were implemented to link two components, but it seems redundant with the identification of links
- This has been potentially tackled with the creation of a class to handle links between components. The components do no handle their own connections with other components, this task is delegated to the Link and Worlflow classes.

25
docs/linking_errors.md Normal file
View File

@ -0,0 +1,25 @@
# Handling errors during workflows' post
Every time an user modify a worflow through oc-search's GUI its representation is sent through an XML to oc-catalog API.
To ensure a correct execution of the workflow we must look for irregularities in the workflow's design. Of course an error can be the result of a workflow being currently built by the user, with the corrective action coming.
This document aims at laying down all the requirements that each component must respect and show wether they have been implemented in the code or not.
## Computing
- [x] A computing component must be paired with a datacenter component
## Data
- [x] A data component must be linked to at least one computing component
## Datacenter
- [x] A datacenter component must be linked to at least one computing component
## Storage
- [x] A storage component must have at least one target or be the source of another component

1
go.mod
View File

@ -5,6 +5,7 @@ go 1.15
require github.com/beego/beego/v2 v2.0.1
require (
github.com/Klathmon/StructToMap v0.0.0-20140724123129-3d0229e2dce7
github.com/antihax/optional v1.0.0
github.com/aws/aws-sdk-go v1.36.29 // indirect
github.com/beego/beego v1.12.12

2
go.sum
View File

@ -1,6 +1,8 @@
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/Klathmon/StructToMap v0.0.0-20140724123129-3d0229e2dce7 h1:n0MD6UkwbgGHtXsmfgVzC2+ZbHzIsScpbq9ZGI18074=
github.com/Klathmon/StructToMap v0.0.0-20140724123129-3d0229e2dce7/go.mod h1:xdrQDwHlKUmv8yiElMx6W0W10cLkqpeSEUUib8KGtv4=
github.com/Knetic/govaluate v3.0.0+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0=
github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0=
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=

View File

@ -77,15 +77,15 @@ func (model ComputingModel) getName() string {
type ComputingObject struct {
ReferenceID primitive.ObjectID `json:"referenceID" description:"Computing model ID"`
Inputs []string `json:"inputs"`
Outputs []string `json:"outputs"`
Inputs []string `json:"inputs"`
Outputs []string `json:"outputs"`
Image string `json:"image,omitempty"`
Command string `json:"command,omitempty"`
Arguments []string `json:"arguments,omitempty"`
Environment []string `json:"environment,omitempty"`
Ports []string `json:"ports,omitempty"`
DataCenterID string `json:"datacenterID" description:"Datacenter where the computing will be executed"`
DataCenterID string `json:"datacenterID" description:"Datacenter where the computing will be executed"`
}
func (obj ComputingObject) getHost() *string {

55
models/links.go Normal file
View File

@ -0,0 +1,55 @@
package models
import (
"cloud.o-forge.io/core/oc-catalog/models/rtype"
)
type Link struct {
// ID primitive.ObjectID `json:"ID" bson:"_id" required:"true" example:"5099803df3f4948bd2f98391"`
Source string `json:"source" description:"id in the workflow of the source object"`
Destination string `json:"destination" description:"id in the workflow of the destination object"`
DCLink bool `json:"dcLink" description:"is this a link with a datacenter"`
}
// Use ResourceObject parameter to process certain components type differently
// and Id's to identify each component as a node in an oriented graph
// In the case of DCLink we choose to always consider the DC as the destination
// in order to facilitate some logic
func NewLink(src ResourceObject, srcId string, dst ResourceObject, dstId string) (link Link) {
link.Source = srcId
link.Destination = dstId
// If the link is between a DC and a component make sure that the DC is destination
// and if the component is computing, update the DataCenterID
if (src.getRtype() == rtype.DATACENTER || dst.getRtype() == rtype.DATACENTER){
var linked ResourceObject
link.DCLink = true
if src.getRtype() == rtype.DATACENTER {
linked = dst
} else {
linked = src
}
if( link.DCLink && src.getRtype() == rtype.DATACENTER){
link.Destination = srcId
link.Source = dstId
}
if (linked.getRtype() == rtype.COMPUTING){
linked.(*ComputingObject).DataCenterID = link.Destination
}
}
return
}
// So far only computing components expect the ID of the DC in their attributes
// func (l *Link) AddLinkToDataCenter(component models.ComputingModel) {
// }

View File

@ -8,22 +8,29 @@ type MxGraphModel struct {
XMLName xml.Name `xml:"mxGraphModel"`
Root struct {
XMLName xml.Name `xml:"root"`
MxCell []MxCell `xml:"mxCell"`
XMLName xml.Name `xml:"root"`
MxCell []MxCell `xml:"mxCell"`
MxObject *[]MxObject `xml:"object"`
MxLink []MxLink
}
}
type MxCell struct {
XMLName xml.Name `xml:"mxCell"`
ID string `xml:"id,attr"`
Parent *string `xml:"parent,attr"`
RID *string `xml:"rID,attr"`
Source *string `xml:"source,attr"`
Target *string `xml:"target,attr"`
Rtype *string `xml:"rType,attr"`
XMLName xml.Name `xml:"mxCell"`
ID string `xml:"id,attr"`
RID *string `xml:"rID,attr"`
Rtype string `xml:"rType,attr"`
Parent *string `xml:"parent,attr"`
Edge *string `xml:"edge,attr"`
Source *string `xml:"source,attr"`
Target *string `xml:"target,attr"`
}
type MxLink struct {
ID string `xml:"id,attr"`
Source string `xml:"source,attr"`
Target string `xml:"target,attr"`
}
type MxObject struct {
XMLName xml.Name `xml:"object"`
@ -32,6 +39,39 @@ type MxObject struct {
MxCell MxCell `xml:"mxCell"`
}
// Didn't manage to differentiate Links and cells containing components using
// only structures and unmarshal, so we use this method post-umarshalling
func (g *MxGraphModel) createLinks() {
var cells_without_links []MxCell
for i, mxcell := range g.Root.MxCell {
if mxcell.Edge != nil {
mxcell.processLinks()
newLink := MxLink{mxcell.ID,*mxcell.Source,*mxcell.Target}
g.Root.MxLink = append(g.Root.MxLink,newLink)
} else {
cells_without_links = append(cells_without_links,g.Root.MxCell[i])
}
}
g.Root.MxCell = nil
g.Root.MxCell = cells_without_links
}
func (cell *MxCell) processLinks() {
v := ""
if cell.Source == nil {
cell.Source = &v
}
if cell.Target == nil {
cell.Target = &v
}
}
type mxissue struct {
msg string
}

View File

@ -52,16 +52,19 @@ const SchedulesDB = "schedules"
type Workflow struct {
// The key of the map is the ID of the object itself
Data map[string]DataObject `json:"data"`
Computing map[string]ComputingObject `json:"computing"`
Storage map[string]StorageObject `json:"storage"`
Datacenter map[string]DatacenterObject `json:"datacenter"` //TODO: Decide if there should be multiple objects of a datacenter
Data map[string]DataObject `json:"data"`
Computing map[string]ComputingObject `json:"computing"`
Storage map[string]StorageObject `json:"storage"`
Datacenter map[string]DatacenterObject `json:"datacenter"` //TODO: Decide if there should be multiple objects of a datacenter
Links map[string]Link `json:"link"`
Schedules WorkflowSchedule `json:"schedules"`
MxgraphXML string `description:"State of the mxgraph"`
}
// TODO : describe what use case this interface satisfies
type ResourceObject interface {
getHost() *string
getName() *string
@ -179,6 +182,13 @@ func (w *Workflow) AddObj(robj ResourceObject) *primitive.ObjectID {
return &outputID
}
func (w *Workflow) AddLinkToWorkflow (link Link, id string){
if w.Links == nil {
w.Links = make(map[string]Link)
}
w.Links[id] = link
}
func (w *Workflow) UpdateDB(userID, workflowName string) error {
_, err := services.MngoCollWorkspace.UpdateOne(services.MngoCtx,
@ -536,9 +546,8 @@ func ParseMxGraph(username, workflowName, xmlData string) (err error, mxissues [
return err, nil
}
// Move the attribute of the object's tags into the mxCell's for an easier processing
// currentWorkflow.extractMxCell(xmlModel)
xmlModel.createLinks()
targetWorkspaceWorkflow, err, mxissues := userWorkspace.ConsumeMxGraphModel(xmlModel)
if err != nil {
@ -586,7 +595,6 @@ func FindSliceInSlice(slice1 []string, slice2 []string) (int, int, bool) {
return -1, -1, false
}
// TODO : correct this method to suppport mxcells with settings
func (ws Workspace) ConsumeMxGraphModel(xmlmodel MxGraphModel) (returned_wf *Workflow, err error, issues []error) {
returned_wf = &Workflow{}
@ -597,21 +605,9 @@ func (ws Workspace) ConsumeMxGraphModel(xmlmodel MxGraphModel) (returned_wf *Wor
return xmlmodel.Root.MxCell[i].RID != nil
})
// For each cell of the xml graph,
// in the case cell has a rID retrieve its rType from the value of rID of the component in the worfklow
// retrieve the component's type
// create an object from the rType
// update the existing workflow with the new component
// or by defautlt : the cell represents an arrow
// if the source or the target of the arrow is a datacenter
// define which end of the arrow is the DC
// if the other other end of the arrow is a computing component
// create a computing object
// attach the DC to it
// update the workflow with the object : create the list of this type of component or update the list with the id of the component with the object
// Create the object and add it to the appropriate list
// for all the components with setting, which are identified
// by a MxObject tag in the xml
for _, object := range *xmlmodel.Root.MxObject{
resObj, err, mxissues := returned_wf.mxCellToComponent(object.MxCell,ws)
@ -669,93 +665,72 @@ func (ws Workspace) ConsumeMxGraphModel(xmlmodel MxGraphModel) (returned_wf *Wor
continue
}
if sourceObj.getRtype() == rtype.DATACENTER || targetObj.getRtype() == rtype.DATACENTER {
var datacenter, datacenterLinked *string
if sourceObj.getRtype() == rtype.DATACENTER {
datacenter = cell.Source
datacenterLinked = cell.Target
} else {
datacenter = cell.Target
datacenterLinked = cell.Source
}
switch returned_wf.GetResource(datacenterLinked).getRtype() {
case rtype.COMPUTING:
computingObj := returned_wf.GetResource(datacenterLinked).(*ComputingObject)
// We should always get a ID because we already registered resources and discarded which doesn't correspond to existent models
computingObj.DataCenterID = *datacenter
returned_wf.UpdateObj(computingObj, *datacenterLinked)
}
} else {
targetObj.addLink(INPUT, *cell.Source)
returned_wf.UpdateObj(targetObj, *cell.Target) // save back
// If we have a relationship of:
// Source ----> Target
//
// The Source will be in the INPUTs of the Target.
// But we also must make sure that the Target will be in the OUTPUTs of the Source
sourceObj.addLink(OUTPUT, *cell.Target)
returned_wf.UpdateObj(sourceObj, *cell.Source)
}
// Not root nor resource. Should be only links
// If is a invalid link, we can't save it in the DB
// We should always get a ID because we already registered resources and discarded which doesn't correspond to existent models
// save back
// If we have a relationship of:
// Source ----> Target
//
// The Source will be in the INPUTs of the Target.
// But we also must make sure that the Target will be in the OUTPUTs of the Source
}
}
dcslist := make(map[string]bool)
dataslist := make(map[string]bool)
// datalist := make(map[string]bool)
issues = returned_wf.CreateLinks(xmlmodel.Root.MxLink, issues)
issues = returned_wf.CheckLinks(issues)
// dcslist := make(map[string]bool)
// dataslist := make(map[string]bool)
// // datalist := make(map[string]bool)
// Test wether the computing components are linked with a DC
for _, comp := range returned_wf.Computing {
if comp.DataCenterID == "" {
issues = append(issues, errors.New("Computing "+*comp.getName()+" without a Datacenter"))
} else {
// If doesn't exist in the list, means is new element to register as used
dcslist[comp.DataCenterID] = true
// // Test wether the computing components are linked with a DC
// for _, comp := range returned_wf.Computing {
// if comp.DataCenterID == "" {
// issues = append(issues, errors.New("Computing "+*comp.getName()+" without a Datacenter"))
// } else {
// // If doesn't exist in the list, means is new element to register as used
// dcslist[comp.DataCenterID] = true
}
// }
for _, dcin := range comp.Inputs {
switch returned_wf.GetResource(&dcin).getRtype() {
case rtype.DATA:
dataslist[dcin] = true
}
}
// for _, dcin := range comp.Inputs {
// switch returned_wf.GetResource(&dcin).getRtype() {
// case rtype.DATA:
// dataslist[dcin] = true
// }
// }
for _, dcout := range comp.Outputs {
switch returned_wf.GetResource(&dcout).getRtype() {
case rtype.DATA:
dataslist[dcout] = true
}
}
// for _, dcout := range comp.Outputs {
// switch returned_wf.GetResource(&dcout).getRtype() {
// case rtype.DATA:
// dataslist[dcout] = true
// }
// }
}
// }
for _, storage_component := range returned_wf.Storage {
if storage_component.Inputs == nil && storage_component.Outputs == nil {
issues = append(issues, errors.New("Storage "+*storage_component.getName()+" without compatible inputs and outputs"))
}
}
// for _, storage_component := range returned_wf.Storage {
// if storage_component.Inputs == nil && storage_component.Outputs == nil {
// issues = append(issues, errors.New("Storage "+*storage_component.getName()+" without compatible inputs and outputs"))
// }
// }
for dcID, dc_component := range returned_wf.Datacenter {
// if rID doesn't exist in the list, it means that it's not used
if _, ok := dcslist[dcID]; !ok {
issues = append(issues, errors.New("DC "+*dc_component.getName()+" not atached to any Computing"))
}
}
// for dcID, dc_component := range returned_wf.Datacenter {
// // if rID doesn't exist in the list, it means that it's not used
// if _, ok := dcslist[dcID]; !ok {
// issues = append(issues, errors.New("DC "+*dc_component.getName()+" not attached to any Computing"))
// }
// }
for dcID, data_component := range returned_wf.Data {
// if rID doesn't exist in the list, it means that it's not used
if _, ok := dataslist[dcID]; !ok {
issues = append(issues, errors.New("Data "+*data_component.getName()+" not atached to any Computing"))
}
}
// for dcID, data_component := range returned_wf.Data {
// // if rID doesn't exist in the list, it means that it's not used
// if _, ok := dataslist[dcID]; !ok {
// issues = append(issues, errors.New("Data "+*data_component.getName()+" not attached to any Computing"))
// }
// }
//////////////////////////////////////////////////////////
// //
@ -858,6 +833,116 @@ func (ws Workspace) ConsumeMxGraphModel(xmlmodel MxGraphModel) (returned_wf *Wor
return
}
func (w *Workflow) CreateLinks(links []MxLink, issues []error) []error {
for _, link := range links {
if (len(link.Source) > 0 && len(link.Target) > 0){
sourceObj := w.GetResource(&link.Source)
targetObj := w.GetResource(&link.Target)
link_object := NewLink(sourceObj,link.Source, targetObj, link.Target)
w.AddLinkToWorkflow(link_object,link.ID)
} else {
issues = append(issues, w.processLinkErrors(link))
}
}
return issues
}
func (w *Workflow) processLinkErrors(link MxLink) (issue error) {
if len(link.Source) == 0 && len(link.Target) == 0 {
issue = errors.New("Arrow "+link.ID+" is alone")
} else if len(link.Source) == 0{
targetObj := w.GetResource(&link.Target)
issue = errors.New("Arrow ("+link.ID+") to "+*targetObj.getName()+" without parent")
} else {
sourceObj := w.GetResource(&link.Source)
issue = errors.New("Arrow "+link.ID+" from "+*sourceObj.getName()+" without target")
}
return issue
}
func (w *Workflow) CheckLinks(issues []error) []error {
// Check that storage components have a valid link
for id, storage := range w.Storage {
if(!w.IsComponentSrc(id) && !w.IsComponentDst(id)){
issues = append(issues, errors.New("Storage "+*storage.getName()+" without compatible inputs and outputs"))
}
}
// Check that data components are linked to a computing component
for id, data := range w.Data {
if(!w.HasLinkageToComputing(id)){
issues = append(issues, errors.New("Data "+*data.getName()+" not attached to any Computing"))
}
}
// Check that DC is linked to a computing component
for id, dc:= range w.Datacenter {
if(!w.HasLinkageToComputing(id)){
issues = append(issues, errors.New("Datacenter "+*dc.getName()+" not attached to any Computing"))
}
}
// Check that all data computing components are linked to a DC
for id,comp:= range w.Computing {
if(!w.HasLinkageToDC(id)){
issues = append(issues, errors.New("Computing "+*comp.getName()+" not attached to any datacenter"))
}
}
return issues
}
func (w *Workflow) IsComponentSrc(id string) bool {
for _, link := range w.Links{
if(link.Source == id && link.Source != ""){
return true
}
}
return false
}
func (w *Workflow) IsComponentDst(id string) bool {
for _, link := range w.Links{
if(link.Destination == id && link.Source != ""){
return true
}
}
return false
}
func (w *Workflow) HasLinkageToComputing(id string) bool {
for idComputing, _ := range w.Computing {
if( (w.IsComponentSrc(id) && w.IsComponentDst(idComputing)) || (w.IsComponentSrc(idComputing) && w.IsComponentDst(id))){
return true
}
}
return false
}
func (w *Workflow) HasLinkageToDC(id string) bool {
for _, link := range w.Links{
if(link.Source == id && link.DCLink){
return true
}
}
return false
}
func sumExecutionReqs(exeqReq ...ExecutionRequirementsModel) (ret ExecutionRequirementsModel) {
for _, v := range exeqReq {
ret.CPUs += v.CPUs

Binary file not shown.

After

Width:  |  Height:  |  Size: 345 KiB