Simplify but Complete Catalog
This commit is contained in:
582
infrastructure/docker_scraper.go
Normal file
582
infrastructure/docker_scraper.go
Normal file
@@ -0,0 +1,582 @@
|
||||
package infrastructure
|
||||
|
||||
// docker_scraper.go — Seeds the catalog with official Docker Hub images as
|
||||
// ProcessingResources at API startup, then refreshes on a configurable interval.
|
||||
//
|
||||
// Each image version (tag) becomes one *peerless* ProcessingInstance:
|
||||
// - CreatorID = "" (no owning peer)
|
||||
// - Partnerships = nil (no partnerships)
|
||||
// - Origin.Ref = "docker.io/<img>:<tag>" (non-empty registry ref)
|
||||
//
|
||||
// This satisfies ResourceInstance.IsPeerless() and makes every instance freely
|
||||
// accessible to all peers without any pricing negotiation.
|
||||
//
|
||||
// Environment variables (all optional):
|
||||
//
|
||||
// DOCKER_SCRAPER_ENABLED true | false (default: true)
|
||||
// DOCKER_SCRAPER_IMAGES comma-separated list of images to track.
|
||||
// Format: "name" for official library images,
|
||||
// "org/name" for user/org images.
|
||||
// Default: a curated set of popular official images.
|
||||
// DOCKER_SCRAPER_MAX_TAGS max tags to import per image (default: 10)
|
||||
// DOCKER_SCRAPER_INTERVAL_H refresh interval in hours (default: 24)
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"cloud.o-forge.io/core/oc-lib/dbs"
|
||||
"cloud.o-forge.io/core/oc-lib/models/common/enum"
|
||||
"cloud.o-forge.io/core/oc-lib/models/common/models"
|
||||
"cloud.o-forge.io/core/oc-lib/models/resources"
|
||||
"cloud.o-forge.io/core/oc-lib/models/utils"
|
||||
"cloud.o-forge.io/core/oc-lib/tools"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// ─── Configuration ────────────────────────────────────────────────────────────
|
||||
|
||||
// DockerImageSpec identifies one Docker Hub repository to scrape.
|
||||
// Namespace is "library" for official images, or the org/user name otherwise.
|
||||
type DockerImageSpec struct {
|
||||
Namespace string
|
||||
Name string
|
||||
}
|
||||
|
||||
// scraperConfig holds all runtime parameters for the Docker Hub scraper.
|
||||
type scraperConfig struct {
|
||||
Enabled bool
|
||||
Images []DockerImageSpec
|
||||
MaxTags int
|
||||
IntervalHours int
|
||||
}
|
||||
|
||||
// defaultImages is the baseline catalog seeded when DOCKER_SCRAPER_IMAGES is not set.
|
||||
var defaultImages = []DockerImageSpec{
|
||||
{Namespace: "library", Name: "ubuntu"},
|
||||
{Namespace: "library", Name: "debian"},
|
||||
{Namespace: "library", Name: "alpine"},
|
||||
{Namespace: "library", Name: "python"},
|
||||
{Namespace: "library", Name: "golang"},
|
||||
{Namespace: "library", Name: "node"},
|
||||
{Namespace: "library", Name: "nginx"},
|
||||
{Namespace: "library", Name: "postgres"},
|
||||
{Namespace: "library", Name: "redis"},
|
||||
{Namespace: "library", Name: "mysql"},
|
||||
{Namespace: "library", Name: "redmine"},
|
||||
{Namespace: "library", Name: "ruby"},
|
||||
{Namespace: "library", Name: "rabbitmq"},
|
||||
{Namespace: "library", Name: "nextcloud"},
|
||||
{Namespace: "library", Name: "php"},
|
||||
{Namespace: "library", Name: "wordpress"},
|
||||
{Namespace: "library", Name: "fluentd"},
|
||||
{Namespace: "library", Name: "gradle"},
|
||||
{Namespace: "library", Name: "mongo"},
|
||||
{Namespace: "library", Name: "clickhouse"},
|
||||
{Namespace: "library", Name: "mariadb"},
|
||||
{Namespace: "library", Name: "eclipse-temurin"},
|
||||
{Namespace: "library", Name: "sonarqube"},
|
||||
{Namespace: "library", Name: "neo4j"},
|
||||
{Namespace: "library", Name: "rust"},
|
||||
{Namespace: "library", Name: "oraclelinux"},
|
||||
{Namespace: "library", Name: "openjdk"},
|
||||
{Namespace: "library", Name: "traefik"},
|
||||
{Namespace: "library", Name: "ghost"},
|
||||
{Namespace: "library", Name: "docker"},
|
||||
{Namespace: "library", Name: "websphere-liberty"},
|
||||
{Namespace: "library", Name: "open-liberty"},
|
||||
{Namespace: "library", Name: "storm"},
|
||||
{Namespace: "library", Name: "swift"},
|
||||
{Namespace: "library", Name: "rocket.chat"},
|
||||
{Namespace: "library", Name: "odoo"},
|
||||
{Namespace: "library", Name: "busybox"},
|
||||
{Namespace: "library", Name: "nats"},
|
||||
{Namespace: "library", Name: "mageia"},
|
||||
{Namespace: "library", Name: "tomcat"},
|
||||
{Namespace: "library", Name: "perl"},
|
||||
{Namespace: "library", Name: "xwiki"},
|
||||
{Namespace: "library", Name: "cassandra"},
|
||||
{Namespace: "library", Name: "varnish"},
|
||||
{Namespace: "library", Name: "ibm-semeru-runtimes"},
|
||||
{Namespace: "library", Name: "archlinux"},
|
||||
{Namespace: "library", Name: "clojure"},
|
||||
{Namespace: "library", Name: "maven"},
|
||||
{Namespace: "library", Name: "buildpack-deps"},
|
||||
{Namespace: "library", Name: "solr"},
|
||||
{Namespace: "library", Name: "groovy"},
|
||||
{Namespace: "library", Name: "phpmyadmin"},
|
||||
{Namespace: "library", Name: "hylang"},
|
||||
{Namespace: "library", Name: "joomla"},
|
||||
{Namespace: "library", Name: "matomo"},
|
||||
{Namespace: "library", Name: "drupal"},
|
||||
{Namespace: "library", Name: "yourls"},
|
||||
{Namespace: "library", Name: "haproxy"},
|
||||
{Namespace: "library", Name: "elixir"},
|
||||
{Namespace: "library", Name: "geonetwork"},
|
||||
{Namespace: "library", Name: "convertigo"},
|
||||
{Namespace: "library", Name: "erlang"},
|
||||
{Namespace: "library", Name: "azul-zulu"},
|
||||
{Namespace: "library", Name: "kibana"},
|
||||
{Namespace: "library", Name: "percona"},
|
||||
{Namespace: "library", Name: "logstash"},
|
||||
{Namespace: "library", Name: "elasticsearch"},
|
||||
{Namespace: "library", Name: "krakend"},
|
||||
{Namespace: "library", Name: "postfixadmin"},
|
||||
{Namespace: "library", Name: "monica"},
|
||||
{Namespace: "library", Name: "friendica"},
|
||||
{Namespace: "library", Name: "sapmachine"},
|
||||
{Namespace: "library", Name: "dart"},
|
||||
{Namespace: "library", Name: "spiped"},
|
||||
{Namespace: "library", Name: "amazoncorreto"},
|
||||
{Namespace: "library", Name: "zookeeper"},
|
||||
{Namespace: "library", Name: "julia"},
|
||||
{Namespace: "library", Name: "gcc"},
|
||||
{Namespace: "library", Name: "ibmjava"},
|
||||
{Namespace: "library", Name: "mediawiki"},
|
||||
{Namespace: "library", Name: "couchbase"},
|
||||
{Namespace: "library", Name: "jetty"},
|
||||
{Namespace: "library", Name: "sparl"},
|
||||
{Namespace: "library", Name: "tomee"},
|
||||
{Namespace: "library", Name: "kapacitor"},
|
||||
{Namespace: "library", Name: "ros"},
|
||||
{Namespace: "library", Name: "silverpeas"},
|
||||
{Namespace: "library", Name: "jruby"},
|
||||
{Namespace: "library", Name: "neurodebian"},
|
||||
{Namespace: "library", Name: "flink"},
|
||||
{Namespace: "library", Name: "pypy"},
|
||||
{Namespace: "library", Name: "orientdb"},
|
||||
{Namespace: "library", Name: "liquidbase"},
|
||||
{Namespace: "library", Name: "haxe"},
|
||||
{Namespace: "library", Name: "r-base"},
|
||||
{Namespace: "library", Name: "lighstreamer"},
|
||||
{Namespace: "library", Name: "kong"},
|
||||
{Namespace: "library", Name: "aerospike"},
|
||||
{Namespace: "library", Name: "influxdb"},
|
||||
{Namespace: "library", Name: "irssi"},
|
||||
{Namespace: "library", Name: "rakudo-star"},
|
||||
{Namespace: "library", Name: "satosa"},
|
||||
{Namespace: "library", Name: "rethinkdb"},
|
||||
{Namespace: "library", Name: "chronograf"},
|
||||
{Namespace: "library", Name: "memcached"},
|
||||
{Namespace: "library", Name: "backdrop"},
|
||||
{Namespace: "library", Name: "telegraf"},
|
||||
{Namespace: "library", Name: "httpd"},
|
||||
{Namespace: "library", Name: "haskell"},
|
||||
{Namespace: "library", Name: "emqx"},
|
||||
{Namespace: "library", Name: "swipl"},
|
||||
{Namespace: "library", Name: "couchdb"},
|
||||
{Namespace: "library", Name: "hitch"},
|
||||
{Namespace: "library", Name: "composer"},
|
||||
{Namespace: "library", Name: "adminer"},
|
||||
{Namespace: "library", Name: "amazonlinux"},
|
||||
{Namespace: "library", Name: "bash"},
|
||||
{Namespace: "library", Name: "caddy"},
|
||||
{Namespace: "library", Name: "arangodb"},
|
||||
{Namespace: "library", Name: "bonita"},
|
||||
{Namespace: "library", Name: "photon"},
|
||||
{Namespace: "library", Name: "almalinux"},
|
||||
{Namespace: "library", Name: "teamspeak"},
|
||||
{Namespace: "library", Name: "fedora"},
|
||||
{Namespace: "library", Name: "eclipse-mosquitto"},
|
||||
{Namespace: "library", Name: "registry"},
|
||||
{Namespace: "library", Name: "eggdrop"},
|
||||
{Namespace: "library", Name: "znc"},
|
||||
{Namespace: "library", Name: "api-firewall"},
|
||||
{Namespace: "library", Name: "alt"},
|
||||
{Namespace: "library", Name: "unit"},
|
||||
{Namespace: "library", Name: "clearlinux"},
|
||||
{Namespace: "library", Name: "gazebo"},
|
||||
{Namespace: "library", Name: "mongo-express"},
|
||||
{Namespace: "library", Name: "plone"},
|
||||
{Namespace: "library", Name: "cirros"},
|
||||
{Namespace: "library", Name: "mono"},
|
||||
{Namespace: "library", Name: "nats-streaming"},
|
||||
{Namespace: "library", Name: "sl"},
|
||||
{Namespace: "library", Name: "rockylinux"},
|
||||
{Namespace: "library", Name: "notary"},
|
||||
{Namespace: "library", Name: "vault"},
|
||||
{Namespace: "library", Name: "jobber"},
|
||||
{Namespace: "library", Name: "consul"},
|
||||
{Namespace: "library", Name: "php-zendserver"},
|
||||
{Namespace: "library", Name: "centos"},
|
||||
{Namespace: "library", Name: "express-gateway"},
|
||||
{Namespace: "library", Name: "clefos"},
|
||||
{Namespace: "library", Name: "adoptopenjdk"},
|
||||
{Namespace: "library", Name: "thrift"},
|
||||
{Namespace: "library", Name: "rapidoid"},
|
||||
{Namespace: "library", Name: "kaazing-gateway"},
|
||||
{Namespace: "library", Name: "nuxeo"},
|
||||
{Namespace: "library", Name: "neo4j"},
|
||||
{Namespace: "library", Name: "fsharp"},
|
||||
{Namespace: "library", Name: "sourcemage"},
|
||||
{Namespace: "library", Name: "swarm"},
|
||||
{Namespace: "library", Name: "euleros"},
|
||||
{Namespace: "library", Name: "crux"},
|
||||
{Namespace: "library", Name: "sentry"},
|
||||
{Namespace: "library", Name: "known"},
|
||||
{Namespace: "library", Name: "opensuse"},
|
||||
{Namespace: "library", Name: "owncloud"},
|
||||
{Namespace: "library", Name: "piwik"},
|
||||
{Namespace: "library", Name: "jenkins"},
|
||||
{Namespace: "library", Name: "celery"},
|
||||
{Namespace: "library", Name: "iojs"},
|
||||
{Namespace: "library", Name: "java"},
|
||||
{Namespace: "library", Name: "rails"},
|
||||
{Namespace: "library", Name: "django"},
|
||||
{Namespace: "library", Name: "glassfish"},
|
||||
{Namespace: "library", Name: "hipache"},
|
||||
{Namespace: "library", Name: "ubuntu-upstart"},
|
||||
{Namespace: "library", Name: "ubuntu-debootstrap"},
|
||||
{Namespace: "library", Name: "docker-dev"},
|
||||
{Namespace: "library", Name: "scratch"},
|
||||
}
|
||||
|
||||
// scraperConfigFromEnv reads scraper configuration from environment variables
|
||||
// and returns a populated scraperConfig with sensible defaults.
|
||||
func scraperConfigFromEnv() scraperConfig {
|
||||
cfg := scraperConfig{
|
||||
Enabled: true,
|
||||
MaxTags: 10,
|
||||
IntervalHours: 24,
|
||||
Images: defaultImages,
|
||||
}
|
||||
|
||||
if v := os.Getenv("DOCKER_SCRAPER_ENABLED"); v == "false" {
|
||||
cfg.Enabled = false
|
||||
}
|
||||
if v := os.Getenv("DOCKER_SCRAPER_MAX_TAGS"); v != "" {
|
||||
if n, err := strconv.Atoi(v); err == nil && n > 0 {
|
||||
cfg.MaxTags = n
|
||||
}
|
||||
}
|
||||
if v := os.Getenv("DOCKER_SCRAPER_INTERVAL_H"); v != "" {
|
||||
if h, err := strconv.Atoi(v); err == nil && h > 0 {
|
||||
cfg.IntervalHours = h
|
||||
}
|
||||
}
|
||||
if v := os.Getenv("DOCKER_SCRAPER_IMAGES"); v != "" {
|
||||
var specs []DockerImageSpec
|
||||
for _, raw := range strings.Split(v, ",") {
|
||||
raw = strings.TrimSpace(raw)
|
||||
if raw == "" {
|
||||
continue
|
||||
}
|
||||
if parts := strings.SplitN(raw, "/", 2); len(parts) == 2 {
|
||||
specs = append(specs, DockerImageSpec{Namespace: parts[0], Name: parts[1]})
|
||||
} else {
|
||||
specs = append(specs, DockerImageSpec{Namespace: "library", Name: raw})
|
||||
}
|
||||
}
|
||||
if len(specs) > 0 {
|
||||
cfg.Images = specs
|
||||
}
|
||||
}
|
||||
return cfg
|
||||
}
|
||||
|
||||
// ─── Docker Hub API types ──────────────────────────────────────────────────────
|
||||
|
||||
type hubRepoInfo struct {
|
||||
Description string `json:"description"`
|
||||
FullDescription string `json:"full_description"`
|
||||
}
|
||||
|
||||
type hubTagImage struct {
|
||||
Architecture string `json:"architecture"`
|
||||
OS string `json:"os"`
|
||||
Digest string `json:"digest"`
|
||||
}
|
||||
|
||||
type hubTag struct {
|
||||
Name string `json:"name"`
|
||||
FullSize int64 `json:"full_size"`
|
||||
LastUpdated string `json:"last_updated"`
|
||||
Images []hubTagImage `json:"images"`
|
||||
}
|
||||
|
||||
type hubTagsResponse struct {
|
||||
Count int `json:"count"`
|
||||
Results []hubTag `json:"results"`
|
||||
}
|
||||
|
||||
// reMarkdownImage matches the first Markdown image in a string, e.g. 
|
||||
var reMarkdownImage = regexp.MustCompile(`!\[[^\]]*\]\((https?://[^)]+)\)`)
|
||||
|
||||
// extractLogoURL returns the first image URL found in a Markdown string, or "".
|
||||
func extractLogoURL(markdown string) string {
|
||||
if m := reMarkdownImage.FindStringSubmatch(markdown); len(m) == 2 {
|
||||
return m[1]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// fetchJSON performs a GET request to url and decodes the JSON body into out.
|
||||
func fetchJSON(url string, out interface{}) error {
|
||||
resp, err := http.Get(url) //nolint:noctx
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("HTTP %d for %s", resp.StatusCode, url)
|
||||
}
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return json.Unmarshal(body, out)
|
||||
}
|
||||
|
||||
// ─── Entry point ──────────────────────────────────────────────────────────────
|
||||
|
||||
// StartDockerScraper starts the background Docker Hub scraper goroutine.
|
||||
// It runs a full scrape immediately at startup, then repeats on the configured
|
||||
// interval. This function blocks forever and is designed to be called via
|
||||
// `go infrastructure.StartDockerScraper()` from main().
|
||||
func StartDockerScraper() {
|
||||
cfg := scraperConfigFromEnv()
|
||||
if !cfg.Enabled {
|
||||
fmt.Println("[docker-scraper] disabled (DOCKER_SCRAPER_ENABLED=false)")
|
||||
return
|
||||
}
|
||||
fmt.Printf("[docker-scraper] started — images=%d maxTags=%d interval=%dh\n",
|
||||
len(cfg.Images), cfg.MaxTags, cfg.IntervalHours)
|
||||
|
||||
runScrape(cfg)
|
||||
|
||||
ticker := time.NewTicker(time.Duration(cfg.IntervalHours) * time.Hour)
|
||||
defer ticker.Stop()
|
||||
for range ticker.C {
|
||||
runScrape(cfg)
|
||||
}
|
||||
}
|
||||
|
||||
// runScrape executes one full scrape cycle for all configured images.
|
||||
func runScrape(cfg scraperConfig) {
|
||||
fmt.Printf("[docker-scraper] cycle started at %s\n", time.Now().Format(time.RFC3339))
|
||||
for _, spec := range cfg.Images {
|
||||
if err := scrapeImage(spec, cfg.MaxTags); err != nil {
|
||||
fmt.Printf("[docker-scraper] %s/%s: %v\n", spec.Namespace, spec.Name, err)
|
||||
}
|
||||
}
|
||||
fmt.Printf("[docker-scraper] cycle done at %s\n", time.Now().Format(time.RFC3339))
|
||||
}
|
||||
|
||||
// ─── Per-image scraping ────────────────────────────────────────────────────────
|
||||
|
||||
// scrapeImage fetches Docker Hub metadata for one repository, then either creates
|
||||
// a new ProcessingResource in the catalog or extends the existing one with any
|
||||
// missing tag-instances.
|
||||
func scrapeImage(spec DockerImageSpec, maxTags int) error {
|
||||
// ── Fetch image metadata ──────────────────────────────────────────────────
|
||||
var info hubRepoInfo
|
||||
repoURL := fmt.Sprintf("https://hub.docker.com/v2/repositories/%s/%s/",
|
||||
spec.Namespace, spec.Name)
|
||||
if err := fetchJSON(repoURL, &info); err != nil {
|
||||
return fmt.Errorf("fetch repo info: %w", err)
|
||||
}
|
||||
|
||||
// ── Fetch tags ────────────────────────────────────────────────────────────
|
||||
tagsURL := fmt.Sprintf(
|
||||
"https://hub.docker.com/v2/repositories/%s/%s/tags?page_size=%d&ordering=last_updated",
|
||||
spec.Namespace, spec.Name, maxTags)
|
||||
var tagsResp hubTagsResponse
|
||||
if err := fetchJSON(tagsURL, &tagsResp); err != nil {
|
||||
return fmt.Errorf("fetch tags: %w", err)
|
||||
}
|
||||
if len(tagsResp.Results) == 0 {
|
||||
return nil // nothing to upsert
|
||||
}
|
||||
|
||||
adminReq := &tools.APIRequest{Admin: true}
|
||||
accessor := (&resources.ProcessingResource{}).GetAccessor(adminReq)
|
||||
|
||||
resourceName := spec.resourceName()
|
||||
existing := findProcessingResourceByName(accessor, resourceName)
|
||||
|
||||
if existing == nil {
|
||||
return createDockerProcessingResource(accessor, spec, resourceName, info, tagsResp.Results)
|
||||
}
|
||||
return syncDockerInstances(accessor, existing, spec, tagsResp.Results)
|
||||
}
|
||||
|
||||
// resourceName returns the canonical catalog name for a DockerImageSpec.
|
||||
// Official (library) images use just the image name; others use "org/image".
|
||||
func (s DockerImageSpec) resourceName() string {
|
||||
if s.Namespace == "library" {
|
||||
return s.Name
|
||||
}
|
||||
return s.Namespace + "/" + s.Name
|
||||
}
|
||||
|
||||
// dockerRef builds the canonical pull reference for an image+tag pair.
|
||||
func dockerRef(spec DockerImageSpec, tag string) string {
|
||||
if spec.Namespace == "library" {
|
||||
return "docker.io/" + spec.Name + ":" + tag
|
||||
}
|
||||
return "docker.io/" + spec.Namespace + "/" + spec.Name + ":" + tag
|
||||
}
|
||||
|
||||
// ─── DB helpers ───────────────────────────────────────────────────────────────
|
||||
|
||||
// findProcessingResourceByName loads all ProcessingResources (both draft and
|
||||
// published) and returns the first whose name matches exactly.
|
||||
func findProcessingResourceByName(accessor utils.Accessor, name string) *resources.ProcessingResource {
|
||||
filters := &dbs.Filters{
|
||||
Or: map[string][]dbs.Filter{
|
||||
"abstractresource.abstractobject.name": {{
|
||||
Operator: dbs.LIKE.String(),
|
||||
Value: name,
|
||||
}},
|
||||
"abstractobject.name": {{
|
||||
Operator: dbs.LIKE.String(),
|
||||
Value: name,
|
||||
}},
|
||||
},
|
||||
}
|
||||
for _, draft := range []bool{false, true} {
|
||||
results, _, _ := accessor.Search(filters, "", draft)
|
||||
for _, r := range results {
|
||||
if pr, ok := r.(*resources.ProcessingResource); ok && pr.GetName() == name {
|
||||
return pr
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// createDockerProcessingResource stores a brand-new ProcessingResource with one
|
||||
// peerless instance per Docker Hub tag, then publishes it (IsDraft = false).
|
||||
func createDockerProcessingResource(
|
||||
accessor utils.Accessor,
|
||||
spec DockerImageSpec,
|
||||
name string,
|
||||
info hubRepoInfo,
|
||||
tags []hubTag,
|
||||
) error {
|
||||
resource := &resources.ProcessingResource{
|
||||
AbstractInstanciatedResource: resources.AbstractInstanciatedResource[*resources.ProcessingInstance]{
|
||||
AbstractResource: resources.AbstractResource{
|
||||
AbstractObject: utils.AbstractObject{
|
||||
UUID: uuid.New().String(),
|
||||
Name: name,
|
||||
},
|
||||
Description: info.FullDescription,
|
||||
ShortDescription: info.Description,
|
||||
Logo: extractLogoURL(info.FullDescription),
|
||||
Owners: []utils.Owner{
|
||||
{Name: "https://hub.docker.com/", Logo: "https://icones8.fr/icon/Wln8Z3PcXanx/logo-docker"},
|
||||
},
|
||||
},
|
||||
},
|
||||
Infrastructure: enum.DOCKER,
|
||||
OpenSource: true,
|
||||
IsService: false,
|
||||
}
|
||||
|
||||
for i := range tags {
|
||||
resource.AddInstances(buildPeerlessInstance(spec, tags[i]))
|
||||
}
|
||||
|
||||
// StoreOne goes through GenericStoreOne which calls AbstractResource.StoreDraftDefault()
|
||||
// setting IsDraft=true. We then publish with a raw update.
|
||||
stored, _, err := accessor.StoreOne(resource)
|
||||
if err != nil {
|
||||
return fmt.Errorf("store %q: %w", name, err)
|
||||
}
|
||||
pr := stored.(*resources.ProcessingResource)
|
||||
pr.IsDraft = false
|
||||
if _, _, err := utils.GenericRawUpdateOne(pr, pr.GetID(), accessor); err != nil {
|
||||
return fmt.Errorf("publish %q: %w", name, err)
|
||||
}
|
||||
fmt.Printf("[docker-scraper] created %q with %d instances\n", name, len(tags))
|
||||
return nil
|
||||
}
|
||||
|
||||
// syncDockerInstances adds to an existing ProcessingResource any tag-instances
|
||||
// that are not yet present (identified by Origin.Ref). Already-present tags
|
||||
// are left untouched to preserve any manually enriched metadata.
|
||||
func syncDockerInstances(
|
||||
accessor utils.Accessor,
|
||||
resource *resources.ProcessingResource,
|
||||
spec DockerImageSpec,
|
||||
tags []hubTag,
|
||||
) error {
|
||||
existing := map[string]bool{}
|
||||
for _, inst := range resource.Instances {
|
||||
existing[inst.GetOrigin().Ref] = true
|
||||
}
|
||||
|
||||
added := 0
|
||||
for i := range tags {
|
||||
ref := dockerRef(spec, tags[i].Name)
|
||||
if existing[ref] {
|
||||
continue
|
||||
}
|
||||
resource.AddInstances(buildPeerlessInstance(spec, tags[i]))
|
||||
added++
|
||||
}
|
||||
if added == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if _, _, err := utils.GenericRawUpdateOne(resource, resource.GetID(), accessor); err != nil {
|
||||
return fmt.Errorf("sync instances for %q: %w", resource.GetName(), err)
|
||||
}
|
||||
fmt.Printf("[docker-scraper] added %d new instances to %q\n", added, resource.GetName())
|
||||
return nil
|
||||
}
|
||||
|
||||
// ─── Instance builder ─────────────────────────────────────────────────────────
|
||||
|
||||
// buildPeerlessInstance creates a ProcessingInstance that satisfies
|
||||
// ResourceInstance.IsPeerless():
|
||||
//
|
||||
// CreatorID = "" (zero value — no owning peer)
|
||||
// Partnerships = nil (zero value — no partnerships)
|
||||
// Origin.Ref != "" (set to the canonical docker pull reference)
|
||||
//
|
||||
// ProcessingInstance.StoreDraftDefault() enforces this invariant on write.
|
||||
func buildPeerlessInstance(spec DockerImageSpec, tag hubTag) *resources.ProcessingInstance {
|
||||
ref := dockerRef(spec, tag.Name)
|
||||
|
||||
// Collect architecture hint from the first image manifest entry (if any).
|
||||
arch := ""
|
||||
if len(tag.Images) > 0 {
|
||||
arch = tag.Images[0].Architecture
|
||||
}
|
||||
|
||||
return &resources.ProcessingInstance{
|
||||
ResourceInstance: resources.ResourceInstance[*resources.ResourcePartnerShip[*resources.ProcessingResourcePricingProfile]]{
|
||||
AbstractObject: utils.AbstractObject{
|
||||
UUID: uuid.New().String(),
|
||||
Name: tag.Name,
|
||||
// CreatorID intentionally left empty — required for IsPeerless()
|
||||
},
|
||||
Origin: resources.OriginMeta{
|
||||
Type: resources.OriginPublic,
|
||||
Ref: ref,
|
||||
License: "", // filled in per-image if known (e.g. MIT, Apache-2.0)
|
||||
Verified: true, // official Docker Hub images are considered verified
|
||||
},
|
||||
// Env / Inputs / Outputs left empty — can be enriched manually or by
|
||||
// future scrapers that read image labels / Docker Hub documentation.
|
||||
},
|
||||
Access: &resources.ProcessingResourceAccess{
|
||||
Container: &models.Container{
|
||||
Image: ref,
|
||||
// Command, Args, Env, Volumes left empty — image defaults apply.
|
||||
Env: map[string]string{
|
||||
"ARCH": arch,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -2,12 +2,13 @@ package infrastructure
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"slices"
|
||||
"sync"
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
"cloud.o-forge.io/core/oc-lib/models/booking"
|
||||
"cloud.o-forge.io/core/oc-lib/models/resources"
|
||||
"cloud.o-forge.io/core/oc-lib/models/utils"
|
||||
"cloud.o-forge.io/core/oc-lib/tools"
|
||||
)
|
||||
|
||||
@@ -20,14 +21,16 @@ var ressourceCols = []oclib.LibDataEnum{
|
||||
}
|
||||
|
||||
var SearchMu sync.RWMutex
|
||||
var SearchStream = map[string]chan resources.ResourceInterface{}
|
||||
var SearchStream = map[string]chan []byte{}
|
||||
var SearchStreamSeen = map[string][]string{}
|
||||
|
||||
func EmitNATS(user string, groups []string, message tools.PropalgationMessage) {
|
||||
b, _ := json.Marshal(message)
|
||||
switch message.Action {
|
||||
case tools.PB_SEARCH:
|
||||
SearchMu.Lock()
|
||||
SearchStream[user] = make(chan resources.ResourceInterface, 128)
|
||||
SearchStream[user] = make(chan []byte, 128)
|
||||
SearchStreamSeen[user] = make([]string, 128)
|
||||
SearchMu.Unlock()
|
||||
tools.NewNATSCaller().SetNATSPub(tools.PROPALGATION_EVENT, tools.NATSResponse{
|
||||
FromApp: "oc-catalog",
|
||||
@@ -65,13 +68,123 @@ func ListenNATS() {
|
||||
return
|
||||
}
|
||||
p, err := resources.ToResource(int(resp.Datatype), resp.Payload)
|
||||
if err == nil {
|
||||
fmt.Println("SearchStream", p)
|
||||
SearchMu.Lock()
|
||||
fmt.Println(SearchStream, resp.User)
|
||||
SearchStream[resp.User] <- p // TODO when do we update it in our catalog ?*
|
||||
SearchMu.Unlock()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
// Exclude resources already present in the local catalog
|
||||
if check := oclib.NewRequestAdmin(oclib.LibDataEnum(resp.Datatype), nil).LoadOne(p.GetID()); check.Data == nil {
|
||||
p.SetNotInCatalog(true)
|
||||
return
|
||||
}
|
||||
wrapped, merr := json.Marshal(map[string]interface{}{
|
||||
"dtype": p.GetType(),
|
||||
"data": p,
|
||||
})
|
||||
if merr != nil {
|
||||
return
|
||||
}
|
||||
SearchMu.Lock()
|
||||
if SearchStreamSeen[resp.User] != nil && slices.Contains(SearchStreamSeen[resp.User], p.GetID()) {
|
||||
SearchStream[resp.User] <- wrapped // TODO when do we update it in our catalog ?
|
||||
}
|
||||
if SearchStreamSeen[resp.User] == nil {
|
||||
SearchStreamSeen[resp.User] = []string{}
|
||||
}
|
||||
SearchStreamSeen[resp.User] = append(SearchStreamSeen[resp.User], p.GetID())
|
||||
SearchMu.Unlock()
|
||||
},
|
||||
|
||||
// ── WORKFLOW_STEP_DONE_EVENT ─────────────────────────────────────────
|
||||
// Real-time update: one booking just completed → update its resource instance.
|
||||
tools.WORKFLOW_STEP_DONE_EVENT: func(resp tools.NATSResponse) {
|
||||
var evt tools.WorkflowLifecycleEvent
|
||||
if err := json.Unmarshal(resp.Payload, &evt); err != nil || evt.BookingID == "" {
|
||||
return
|
||||
}
|
||||
updateInstanceFromStep(tools.StepMetric{
|
||||
BookingID: evt.BookingID,
|
||||
State: evt.State,
|
||||
RealStart: evt.RealStart,
|
||||
RealEnd: evt.RealEnd,
|
||||
})
|
||||
},
|
||||
|
||||
// ── WORKFLOW_DONE_EVENT ──────────────────────────────────────────────
|
||||
// Recap: apply all steps in case STEP_DONE events were missed while
|
||||
// oc-catalog was down. Processing is idempotent (same duration wins
|
||||
// when times are identical; running average converges anyway).
|
||||
tools.WORKFLOW_DONE_EVENT: func(resp tools.NATSResponse) {
|
||||
var evt tools.WorkflowLifecycleEvent
|
||||
if err := json.Unmarshal(resp.Payload, &evt); err != nil {
|
||||
return
|
||||
}
|
||||
for _, step := range evt.Steps {
|
||||
updateInstanceFromStep(step)
|
||||
}
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// updateInstanceFromStep loads the booking identified by step.BookingID, then
|
||||
// updates the AverageDuration of the resource instance only if this peer owns
|
||||
// the resource (creator_id == self PeerID). Idempotent: safe to call twice.
|
||||
func updateInstanceFromStep(step tools.StepMetric) {
|
||||
if step.RealStart == nil || step.RealEnd == nil {
|
||||
return
|
||||
}
|
||||
actualS := step.RealEnd.Sub(*step.RealStart).Seconds()
|
||||
if actualS <= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
adminReq := &tools.APIRequest{Admin: true}
|
||||
|
||||
// Resolve resource info from the booking.
|
||||
bkRes, _, err := booking.NewAccessor(adminReq).LoadOne(step.BookingID)
|
||||
if err != nil || bkRes == nil {
|
||||
return
|
||||
}
|
||||
bk := bkRes.(*booking.Booking)
|
||||
|
||||
// Only update resources this peer owns.
|
||||
self, selfErr := oclib.GetMySelf()
|
||||
if selfErr != nil || self == nil {
|
||||
return
|
||||
}
|
||||
|
||||
switch bk.ResourceType {
|
||||
case tools.COMPUTE_RESOURCE, tools.LIVE_DATACENTER:
|
||||
res, _, err := (&resources.ComputeResource{}).GetAccessor(adminReq).LoadOne(bk.ResourceID)
|
||||
if err != nil || res == nil {
|
||||
return
|
||||
}
|
||||
compute := res.(*resources.ComputeResource)
|
||||
if compute.GetCreatorID() != self.PeerID {
|
||||
return
|
||||
}
|
||||
for _, inst := range compute.Instances {
|
||||
if inst.GetID() == bk.InstanceID {
|
||||
inst.UpdateAverageDuration(actualS)
|
||||
break
|
||||
}
|
||||
}
|
||||
utils.GenericRawUpdateOne(compute, compute.GetID(), compute.GetAccessor(adminReq))
|
||||
|
||||
case tools.STORAGE_RESOURCE, tools.LIVE_STORAGE:
|
||||
res, _, err := (&resources.StorageResource{}).GetAccessor(adminReq).LoadOne(bk.ResourceID)
|
||||
if err != nil || res == nil {
|
||||
return
|
||||
}
|
||||
storage := res.(*resources.StorageResource)
|
||||
if storage.GetCreatorID() != self.PeerID {
|
||||
return
|
||||
}
|
||||
for _, inst := range storage.Instances {
|
||||
if inst.GetID() == bk.InstanceID {
|
||||
inst.UpdateAverageDuration(actualS)
|
||||
break
|
||||
}
|
||||
}
|
||||
utils.GenericRawUpdateOne(storage, storage.GetID(), storage.GetAccessor(adminReq))
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user