WatchDog Kube
This commit is contained in:
@@ -5,24 +5,57 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"oc-datacenter/infrastructure/minio"
|
||||
"oc-datacenter/infrastructure/storage"
|
||||
"sync"
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
"cloud.o-forge.io/core/oc-lib/models/workflow_execution"
|
||||
"cloud.o-forge.io/core/oc-lib/tools"
|
||||
)
|
||||
|
||||
// roleWaiters maps executionID → channel expecting the role-assignment message from OC discovery.
|
||||
var roleWaiters sync.Map
|
||||
|
||||
// teardownInfraForExecution handles infrastructure cleanup when a workflow terminates.
|
||||
// oc-datacenter is responsible only for infra here — booking/execution state
|
||||
// is managed by oc-scheduler.
|
||||
func teardownInfraForExecution(executionID string, executionsID string) {
|
||||
logger := oclib.GetLogger()
|
||||
|
||||
myself, err := oclib.GetMySelf()
|
||||
if err != nil || myself == nil {
|
||||
return
|
||||
}
|
||||
selfPeerID := myself.GetID()
|
||||
|
||||
adminReq := &tools.APIRequest{Admin: true}
|
||||
res, _, loadErr := workflow_execution.NewAccessor(adminReq).LoadOne(executionID)
|
||||
if loadErr != nil || res == nil {
|
||||
logger.Warn().Msgf("teardownInfraForExecution: execution %s not found", executionID)
|
||||
return
|
||||
}
|
||||
exec := res.(*workflow_execution.WorkflowExecution)
|
||||
|
||||
ctx := context.Background()
|
||||
teardownAdmiraltyIfRemote(exec, selfPeerID)
|
||||
teardownMinioForExecution(ctx, executionsID, selfPeerID)
|
||||
teardownPVCForExecution(ctx, executionsID, selfPeerID)
|
||||
}
|
||||
|
||||
// ArgoKubeEvent carries the peer-routing metadata for a resource provisioning event.
|
||||
//
|
||||
// When MinioID is non-empty the event concerns Minio credential provisioning;
|
||||
// otherwise it concerns Admiralty kubeconfig provisioning.
|
||||
// When MinioID is non-empty and Local is false, the event concerns Minio credential provisioning.
|
||||
// When Local is true, the event concerns local PVC provisioning.
|
||||
// Otherwise it concerns Admiralty kubeconfig provisioning.
|
||||
type ArgoKubeEvent struct {
|
||||
ExecutionsID string `json:"executions_id"`
|
||||
DestPeerID string `json:"dest_peer_id"`
|
||||
Type tools.DataType `json:"data_type"`
|
||||
SourcePeerID string `json:"source_peer_id"`
|
||||
MinioID string `json:"minio_id,omitempty"`
|
||||
// Local signals that this STORAGE_RESOURCE event is for a local PVC (not Minio).
|
||||
Local bool `json:"local,omitempty"`
|
||||
StorageName string `json:"storage_name,omitempty"`
|
||||
// OriginID is the peer that initiated the request; the PB_CONSIDERS
|
||||
// response is routed back to this peer once provisioning completes.
|
||||
OriginID string `json:"origin_id,omitempty"`
|
||||
@@ -34,7 +67,7 @@ func ListenNATS() {
|
||||
tools.NewNATSCaller().ListenNats(map[tools.NATSMethod]func(tools.NATSResponse){
|
||||
// ─── ARGO_KUBE_EVENT ────────────────────────────────────────────────────────
|
||||
// Triggered by oc-discovery to notify this peer of a provisioning task.
|
||||
// Dispatches to Admiralty or Minio based on whether MinioID is set.
|
||||
// Dispatches to Admiralty, Minio, or local PVC based on event fields.
|
||||
tools.ARGO_KUBE_EVENT: func(resp tools.NATSResponse) {
|
||||
argo := &ArgoKubeEvent{}
|
||||
if err := json.Unmarshal(resp.Payload, argo); err != nil {
|
||||
@@ -42,50 +75,87 @@ func ListenNATS() {
|
||||
}
|
||||
|
||||
if argo.Type == tools.STORAGE_RESOURCE {
|
||||
fmt.Println("DETECT STORAGE ARGO_KUBE_EVENT")
|
||||
// ── Minio credential provisioning ──────────────────────────────
|
||||
setter := minio.NewMinioSetter(argo.ExecutionsID, argo.MinioID)
|
||||
if argo.SourcePeerID == argo.DestPeerID {
|
||||
fmt.Println("CONFIG MYSELF")
|
||||
err := CreateNamespace(argo.ExecutionsID)
|
||||
fmt.Println("NS", err)
|
||||
// Same peer: source creates credentials and immediately stores them.
|
||||
go setter.InitializeAsSource(context.Background(), argo.SourcePeerID, argo.DestPeerID, argo.OriginID)
|
||||
} else {
|
||||
// Different peers: publish Phase-1 PB_MINIO_CONFIG (Access == "")
|
||||
// so oc-discovery routes the role-assignment to the Minio host.
|
||||
phase1 := minio.MinioCredentialEvent{
|
||||
if argo.Local {
|
||||
fmt.Println("DETECT LOCAL PVC ARGO_KUBE_EVENT")
|
||||
// ── Local PVC provisioning ──────────────────────────────────
|
||||
setter := storage.NewPVCSetter(argo.ExecutionsID, argo.MinioID)
|
||||
event := storage.PVCProvisionEvent{
|
||||
ExecutionsID: argo.ExecutionsID,
|
||||
MinioID: argo.MinioID,
|
||||
StorageID: argo.MinioID,
|
||||
StorageName: argo.StorageName,
|
||||
SourcePeerID: argo.SourcePeerID,
|
||||
DestPeerID: argo.DestPeerID,
|
||||
OriginID: argo.OriginID,
|
||||
}
|
||||
if b, err := json.Marshal(phase1); err == nil {
|
||||
if b2, err := json.Marshal(&tools.PropalgationMessage{
|
||||
Payload: b,
|
||||
Action: tools.PB_MINIO_CONFIG,
|
||||
}); err == nil {
|
||||
fmt.Println("CONFIG THEM")
|
||||
go tools.NewNATSCaller().SetNATSPub(tools.PROPALGATION_EVENT, tools.NATSResponse{
|
||||
FromApp: "oc-datacenter",
|
||||
Datatype: -1,
|
||||
User: resp.User,
|
||||
Method: int(tools.PROPALGATION_EVENT),
|
||||
Payload: b2,
|
||||
})
|
||||
if argo.SourcePeerID == argo.DestPeerID {
|
||||
fmt.Println("CONFIG PVC MYSELF")
|
||||
err := CreateNamespace(argo.ExecutionsID)
|
||||
fmt.Println("NS", err)
|
||||
go setter.InitializeAsSource(context.Background(), event, true)
|
||||
} else {
|
||||
// Cross-peer: route to dest peer via PB_PVC_CONFIG.
|
||||
if b, err := json.Marshal(event); err == nil {
|
||||
if b2, err := json.Marshal(&tools.PropalgationMessage{
|
||||
Payload: b,
|
||||
Action: tools.PB_PVC_CONFIG,
|
||||
}); err == nil {
|
||||
fmt.Println("CONFIG PVC THEM")
|
||||
go tools.NewNATSCaller().SetNATSPub(tools.PROPALGATION_EVENT, tools.NATSResponse{
|
||||
FromApp: "oc-datacenter",
|
||||
Datatype: -1,
|
||||
User: resp.User,
|
||||
Method: int(tools.PROPALGATION_EVENT),
|
||||
Payload: b2,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
fmt.Println("DETECT STORAGE ARGO_KUBE_EVENT")
|
||||
// ── Minio credential provisioning ──────────────────────────────
|
||||
setter := minio.NewMinioSetter(argo.ExecutionsID, argo.MinioID)
|
||||
if argo.SourcePeerID == argo.DestPeerID {
|
||||
fmt.Println("CONFIG MYSELF")
|
||||
err := CreateNamespace(argo.ExecutionsID)
|
||||
fmt.Println("NS", err)
|
||||
go setter.InitializeAsSource(context.Background(), argo.SourcePeerID, argo.DestPeerID, argo.OriginID, true)
|
||||
} else {
|
||||
// Different peers: publish Phase-1 PB_MINIO_CONFIG (Access == "")
|
||||
// so oc-discovery routes the role-assignment to the Minio host.
|
||||
phase1 := minio.MinioCredentialEvent{
|
||||
ExecutionsID: argo.ExecutionsID,
|
||||
MinioID: argo.MinioID,
|
||||
SourcePeerID: argo.SourcePeerID,
|
||||
DestPeerID: argo.DestPeerID,
|
||||
OriginID: argo.OriginID,
|
||||
}
|
||||
if b, err := json.Marshal(phase1); err == nil {
|
||||
if b2, err := json.Marshal(&tools.PropalgationMessage{
|
||||
Payload: b,
|
||||
Action: tools.PB_MINIO_CONFIG,
|
||||
}); err == nil {
|
||||
fmt.Println("CONFIG THEM")
|
||||
go tools.NewNATSCaller().SetNATSPub(tools.PROPALGATION_EVENT, tools.NATSResponse{
|
||||
FromApp: "oc-datacenter",
|
||||
Datatype: -1,
|
||||
User: resp.User,
|
||||
Method: int(tools.PROPALGATION_EVENT),
|
||||
Payload: b2,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
fmt.Println("DETECT COMPUTE ARGO_KUBE_EVENT")
|
||||
// ── Admiralty kubeconfig provisioning (existing behaviour) ──────
|
||||
fmt.Println(argo.SourcePeerID, argo.DestPeerID)
|
||||
if argo.SourcePeerID == argo.DestPeerID {
|
||||
fmt.Println("CONFIG MYSELF")
|
||||
err := CreateNamespace(argo.ExecutionsID)
|
||||
fmt.Println("NS", err)
|
||||
go NewAdmiraltySetter(argo.ExecutionsID).InitializeAsSource(
|
||||
context.Background(), argo.SourcePeerID, argo.DestPeerID, argo.OriginID)
|
||||
context.Background(), argo.SourcePeerID, argo.DestPeerID, argo.OriginID, true)
|
||||
} else if b, err := json.Marshal(argo); err == nil {
|
||||
if b2, err := json.Marshal(&tools.PropalgationMessage{
|
||||
Payload: b,
|
||||
@@ -113,14 +183,16 @@ func ListenNATS() {
|
||||
if err := json.Unmarshal(resp.Payload, &kubeconfigEvent); err == nil {
|
||||
if kubeconfigEvent.Kubeconfig != "" {
|
||||
// Phase 2: kubeconfig present → this peer is the TARGET (scheduler).
|
||||
fmt.Println("CreateAdmiraltyTarget")
|
||||
NewAdmiraltySetter(kubeconfigEvent.ExecutionsID).InitializeAsTarget(
|
||||
context.Background(), kubeconfigEvent)
|
||||
context.Background(), kubeconfigEvent, false)
|
||||
} else {
|
||||
err := CreateNamespace(kubeconfigEvent.ExecutionsID)
|
||||
fmt.Println("NS", err)
|
||||
// Phase 1: no kubeconfig → this peer is the SOURCE (compute).
|
||||
fmt.Println("CreateAdmiraltySource")
|
||||
NewAdmiraltySetter(kubeconfigEvent.ExecutionsID).InitializeAsSource(
|
||||
context.Background(), kubeconfigEvent.SourcePeerID, kubeconfigEvent.DestPeerID, kubeconfigEvent.OriginID)
|
||||
context.Background(), kubeconfigEvent.SourcePeerID, kubeconfigEvent.DestPeerID, kubeconfigEvent.OriginID, false)
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -134,27 +206,59 @@ func ListenNATS() {
|
||||
if minioEvent.Access != "" {
|
||||
// Phase 2: credentials present → this peer is the TARGET (compute).
|
||||
minio.NewMinioSetter(minioEvent.ExecutionsID, minioEvent.MinioID).InitializeAsTarget(
|
||||
context.Background(), minioEvent)
|
||||
context.Background(), minioEvent, false)
|
||||
} else {
|
||||
err := CreateNamespace(minioEvent.ExecutionsID)
|
||||
fmt.Println("NS", err)
|
||||
// Phase 1: no credentials → this peer is the SOURCE (Minio host).
|
||||
minio.NewMinioSetter(minioEvent.ExecutionsID, minioEvent.MinioID).InitializeAsSource(
|
||||
context.Background(), minioEvent.SourcePeerID, minioEvent.DestPeerID, minioEvent.OriginID)
|
||||
context.Background(), minioEvent.SourcePeerID, minioEvent.DestPeerID, minioEvent.OriginID, false)
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
// ─── PVC_CONFIG_EVENT ────────────────────────────────────────────────────────
|
||||
// Forwarded by oc-discovery for cross-peer local PVC provisioning.
|
||||
// The dest peer creates the PVC in its own cluster.
|
||||
tools.PVC_CONFIG_EVENT: func(resp tools.NATSResponse) {
|
||||
event := storage.PVCProvisionEvent{}
|
||||
if err := json.Unmarshal(resp.Payload, &event); err == nil {
|
||||
err := CreateNamespace(event.ExecutionsID)
|
||||
fmt.Println("NS", err)
|
||||
storage.NewPVCSetter(event.ExecutionsID, event.StorageID).InitializeAsSource(
|
||||
context.Background(), event, false)
|
||||
}
|
||||
},
|
||||
|
||||
// ─── WORKFLOW_DONE_EVENT ─────────────────────────────────────────────────────
|
||||
// Emitted by oc-monitord when the top-level Argo workflow reaches a terminal
|
||||
// phase. oc-datacenter is responsible only for infrastructure teardown here:
|
||||
// booking/execution state management is handled entirely by oc-scheduler.
|
||||
tools.WORKFLOW_DONE_EVENT: func(resp tools.NATSResponse) {
|
||||
var evt tools.WorkflowLifecycleEvent
|
||||
if err := json.Unmarshal(resp.Payload, &evt); err != nil || evt.ExecutionsID == "" {
|
||||
return
|
||||
}
|
||||
go teardownInfraForExecution(evt.ExecutionID, evt.ExecutionsID)
|
||||
},
|
||||
|
||||
// ─── REMOVE_RESOURCE ────────────────────────────────────────────────────────
|
||||
// Routed by oc-discovery via ProtocolDeleteResource for datacenter teardown.
|
||||
// Only STORAGE_RESOURCE and COMPUTE_RESOURCE deletions are handled here.
|
||||
tools.REMOVE_RESOURCE: func(resp tools.NATSResponse) {
|
||||
switch resp.Datatype {
|
||||
case tools.STORAGE_RESOURCE:
|
||||
deleteEvent := minio.MinioDeleteEvent{}
|
||||
if err := json.Unmarshal(resp.Payload, &deleteEvent); err == nil && deleteEvent.ExecutionsID != "" {
|
||||
go minio.NewMinioSetter(deleteEvent.ExecutionsID, deleteEvent.MinioID).
|
||||
TeardownAsSource(context.Background(), deleteEvent)
|
||||
// Try PVC delete first (Local=true), fall back to Minio.
|
||||
pvcEvent := storage.PVCDeleteEvent{}
|
||||
if err := json.Unmarshal(resp.Payload, &pvcEvent); err == nil && pvcEvent.ExecutionsID != "" && pvcEvent.StorageName != "" {
|
||||
go storage.NewPVCSetter(pvcEvent.ExecutionsID, pvcEvent.StorageID).
|
||||
TeardownAsSource(context.Background(), pvcEvent)
|
||||
} else {
|
||||
deleteEvent := minio.MinioDeleteEvent{}
|
||||
if err := json.Unmarshal(resp.Payload, &deleteEvent); err == nil && deleteEvent.ExecutionsID != "" {
|
||||
go minio.NewMinioSetter(deleteEvent.ExecutionsID, deleteEvent.MinioID).
|
||||
TeardownAsSource(context.Background(), deleteEvent)
|
||||
}
|
||||
}
|
||||
case tools.COMPUTE_RESOURCE:
|
||||
argo := &ArgoKubeEvent{}
|
||||
|
||||
Reference in New Issue
Block a user