Native Indexer Mode
This commit is contained in:
364
daemons/node/common/native_stream.go
Normal file
364
daemons/node/common/native_stream.go
Normal file
@@ -0,0 +1,364 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"math/rand"
|
||||
"oc-discovery/conf"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
"github.com/libp2p/go-libp2p/core/host"
|
||||
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||
)
|
||||
|
||||
const (
|
||||
ProtocolNativeSubscription = "/opencloud/native/subscribe/1.0"
|
||||
ProtocolNativeGetIndexers = "/opencloud/native/indexers/1.0"
|
||||
// ProtocolNativeConsensus is used by nodes/indexers to cross-validate an indexer
|
||||
// pool against all configured native peers.
|
||||
ProtocolNativeConsensus = "/opencloud/native/consensus/1.0"
|
||||
RecommendedHeartbeatInterval = 60 * time.Second
|
||||
|
||||
// TopicIndexerRegistry is the PubSub topic used by native indexers to gossip
|
||||
// newly registered indexer PeerIDs to neighbouring natives.
|
||||
TopicIndexerRegistry = "oc-indexer-registry"
|
||||
|
||||
// consensusQueryTimeout is the per-native timeout for a consensus query.
|
||||
consensusQueryTimeout = 3 * time.Second
|
||||
// consensusCollectTimeout is the total wait for all native responses.
|
||||
consensusCollectTimeout = 4 * time.Second
|
||||
)
|
||||
|
||||
// ConsensusRequest is sent by a node/indexer to a native to validate a candidate
|
||||
// indexer list. The native replies with what it trusts and what it suggests instead.
|
||||
type ConsensusRequest struct {
|
||||
Candidates []string `json:"candidates"`
|
||||
}
|
||||
|
||||
// ConsensusResponse is returned by a native during a consensus challenge.
|
||||
// Trusted = candidates the native considers alive.
|
||||
// Suggestions = extras the native knows and trusts but that were not in the candidate list.
|
||||
type ConsensusResponse struct {
|
||||
Trusted []string `json:"trusted"`
|
||||
Suggestions []string `json:"suggestions,omitempty"`
|
||||
}
|
||||
|
||||
// IndexerRegistration is sent by an indexer to a native to signal its alive state.
|
||||
// Only Addr is required; PeerID is derived from it if omitted.
|
||||
type IndexerRegistration struct {
|
||||
PeerID string `json:"peer_id,omitempty"`
|
||||
Addr string `json:"addr"`
|
||||
}
|
||||
|
||||
// GetIndexersRequest asks a native for a pool of live indexers.
|
||||
type GetIndexersRequest struct {
|
||||
Count int `json:"count"`
|
||||
}
|
||||
|
||||
// GetIndexersResponse is returned by the native with live indexer multiaddrs.
|
||||
type GetIndexersResponse struct {
|
||||
Indexers []string `json:"indexers"`
|
||||
IsSelfFallback bool `json:"is_self_fallback,omitempty"`
|
||||
}
|
||||
|
||||
var StaticNatives = map[string]*pp.AddrInfo{}
|
||||
var StreamNativeMu sync.RWMutex
|
||||
var StreamNatives ProtocolStream = ProtocolStream{}
|
||||
|
||||
// ConnectToNatives is the client-side entry point for nodes/indexers that have
|
||||
// NativeIndexerAddresses configured. It:
|
||||
// 1. Connects (long-lived heartbeat) to all configured natives.
|
||||
// 2. Fetches an initial indexer pool from the FIRST responsive native.
|
||||
// 3. Challenges that pool to ALL natives (consensus round 1).
|
||||
// 4. If the confirmed list is short, samples native suggestions and re-challenges (round 2).
|
||||
// 5. Populates StaticIndexers with majority-confirmed indexers.
|
||||
func ConnectToNatives(h host.Host, minIndexer int, maxIndexer int, myPID pp.ID) error {
|
||||
logger := oclib.GetLogger()
|
||||
|
||||
// Parse in config order: the first entry is the primary pool source.
|
||||
orderedAddrs := []string{}
|
||||
for _, addr := range strings.Split(conf.GetConfig().NativeIndexerAddresses, ",") {
|
||||
addr = strings.TrimSpace(addr)
|
||||
if addr == "" {
|
||||
continue
|
||||
}
|
||||
ad, err := pp.AddrInfoFromString(addr)
|
||||
if err != nil {
|
||||
logger.Err(err).Msg("ConnectToNatives: invalid addr")
|
||||
continue
|
||||
}
|
||||
StaticNatives[addr] = ad
|
||||
orderedAddrs = append(orderedAddrs, addr)
|
||||
}
|
||||
if len(StaticNatives) == 0 {
|
||||
return errors.New("no valid native addresses configured")
|
||||
}
|
||||
|
||||
// Long-lived heartbeat connections to keep the native mesh active.
|
||||
SendHeartbeat(context.Background(), ProtocolHeartbeat,
|
||||
conf.GetConfig().Name, h, StreamNatives, StaticNatives, 20*time.Second)
|
||||
|
||||
// Step 1: get an initial pool from the FIRST responsive native (in config order).
|
||||
var candidates []string
|
||||
var isFallback bool
|
||||
for _, addr := range orderedAddrs {
|
||||
ad := StaticNatives[addr]
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
if err := h.Connect(ctx, *ad); err != nil {
|
||||
cancel()
|
||||
continue
|
||||
}
|
||||
s, err := h.NewStream(ctx, ad.ID, ProtocolNativeGetIndexers)
|
||||
cancel()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
req := GetIndexersRequest{Count: maxIndexer}
|
||||
if encErr := json.NewEncoder(s).Encode(req); encErr != nil {
|
||||
s.Close()
|
||||
continue
|
||||
}
|
||||
var resp GetIndexersResponse
|
||||
if decErr := json.NewDecoder(s).Decode(&resp); decErr != nil {
|
||||
s.Close()
|
||||
continue
|
||||
}
|
||||
s.Close()
|
||||
candidates = resp.Indexers
|
||||
isFallback = resp.IsSelfFallback
|
||||
break // first responsive native only
|
||||
}
|
||||
|
||||
if len(candidates) == 0 {
|
||||
if minIndexer > 0 {
|
||||
return errors.New("ConnectToNatives: no indexers available from any native")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// If the native is already the fallback indexer, use it directly — no consensus needed.
|
||||
if isFallback {
|
||||
for _, addr := range candidates {
|
||||
ad, err := pp.AddrInfoFromString(addr)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
StaticIndexers[addr] = ad
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Step 2: challenge the pool to ALL configured natives and score by majority vote.
|
||||
confirmed, suggestions := clientSideConsensus(h, candidates)
|
||||
|
||||
// Step 3: if we still have gaps, sample from suggestions and re-challenge.
|
||||
if len(confirmed) < maxIndexer && len(suggestions) > 0 {
|
||||
rand.Shuffle(len(suggestions), func(i, j int) { suggestions[i], suggestions[j] = suggestions[j], suggestions[i] })
|
||||
gap := maxIndexer - len(confirmed)
|
||||
if gap > len(suggestions) {
|
||||
gap = len(suggestions)
|
||||
}
|
||||
confirmed2, _ := clientSideConsensus(h, append(confirmed, suggestions[:gap]...))
|
||||
if len(confirmed2) > 0 {
|
||||
confirmed = confirmed2
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: populate StaticIndexers with confirmed addresses.
|
||||
for _, addr := range confirmed {
|
||||
ad, err := pp.AddrInfoFromString(addr)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
StaticIndexers[addr] = ad
|
||||
}
|
||||
|
||||
if minIndexer > 0 && len(StaticIndexers) < minIndexer {
|
||||
return errors.New("not enough majority-confirmed indexers available")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// clientSideConsensus challenges a candidate list to ALL configured native peers
|
||||
// in parallel. Each native replies with the candidates it trusts plus extras it
|
||||
// recommends. An indexer is confirmed when strictly more than 50% of responding
|
||||
// natives trust it. The remaining addresses from native suggestions are returned
|
||||
// as suggestions for a possible second round.
|
||||
func clientSideConsensus(h host.Host, candidates []string) (confirmed []string, suggestions []string) {
|
||||
if len(candidates) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
StreamNativeMu.RLock()
|
||||
peers := make([]*pp.AddrInfo, 0, len(StaticNatives))
|
||||
for _, ad := range StaticNatives {
|
||||
peers = append(peers, ad)
|
||||
}
|
||||
StreamNativeMu.RUnlock()
|
||||
|
||||
if len(peers) == 0 {
|
||||
// No natives to challenge: trust candidates as-is.
|
||||
return candidates, nil
|
||||
}
|
||||
|
||||
type nativeResult struct {
|
||||
trusted []string
|
||||
suggestions []string
|
||||
responded bool
|
||||
}
|
||||
ch := make(chan nativeResult, len(peers))
|
||||
|
||||
for _, ad := range peers {
|
||||
go func(ad *pp.AddrInfo) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), consensusQueryTimeout)
|
||||
defer cancel()
|
||||
if err := h.Connect(ctx, *ad); err != nil {
|
||||
ch <- nativeResult{}
|
||||
return
|
||||
}
|
||||
s, err := h.NewStream(ctx, ad.ID, ProtocolNativeConsensus)
|
||||
if err != nil {
|
||||
ch <- nativeResult{}
|
||||
return
|
||||
}
|
||||
defer s.Close()
|
||||
if err := json.NewEncoder(s).Encode(ConsensusRequest{Candidates: candidates}); err != nil {
|
||||
ch <- nativeResult{}
|
||||
return
|
||||
}
|
||||
var resp ConsensusResponse
|
||||
if err := json.NewDecoder(s).Decode(&resp); err != nil {
|
||||
ch <- nativeResult{}
|
||||
return
|
||||
}
|
||||
ch <- nativeResult{trusted: resp.Trusted, suggestions: resp.Suggestions, responded: true}
|
||||
}(ad)
|
||||
}
|
||||
|
||||
// Collect responses up to consensusCollectTimeout.
|
||||
timer := time.NewTimer(consensusCollectTimeout)
|
||||
defer timer.Stop()
|
||||
|
||||
trustedCounts := map[string]int{}
|
||||
suggestionPool := map[string]struct{}{}
|
||||
total := 0 // counts only natives that actually responded
|
||||
collected := 0
|
||||
|
||||
collect:
|
||||
for collected < len(peers) {
|
||||
select {
|
||||
case r := <-ch:
|
||||
collected++
|
||||
if !r.responded {
|
||||
continue // timeout / error: skip, do not count as vote
|
||||
}
|
||||
total++
|
||||
seen := map[string]struct{}{}
|
||||
for _, addr := range r.trusted {
|
||||
if _, already := seen[addr]; !already {
|
||||
trustedCounts[addr]++
|
||||
seen[addr] = struct{}{}
|
||||
}
|
||||
}
|
||||
for _, addr := range r.suggestions {
|
||||
suggestionPool[addr] = struct{}{}
|
||||
}
|
||||
case <-timer.C:
|
||||
break collect
|
||||
}
|
||||
}
|
||||
|
||||
if total == 0 {
|
||||
// No native responded: fall back to trusting the candidates as-is.
|
||||
return candidates, nil
|
||||
}
|
||||
|
||||
confirmedSet := map[string]struct{}{}
|
||||
for addr, count := range trustedCounts {
|
||||
if count*2 > total { // strictly >50%
|
||||
confirmed = append(confirmed, addr)
|
||||
confirmedSet[addr] = struct{}{}
|
||||
}
|
||||
}
|
||||
for addr := range suggestionPool {
|
||||
if _, ok := confirmedSet[addr]; !ok {
|
||||
suggestions = append(suggestions, addr)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
const ProtocolIndexerHeartbeat = "/opencloud/heartbeat/indexer/1.0"
|
||||
|
||||
// RegisterWithNative sends a one-shot registration to each configured native indexer.
|
||||
// Should be called periodically every RecommendedHeartbeatInterval.
|
||||
func RegisterWithNative(h host.Host, nativeAddressesStr string) {
|
||||
logger := oclib.GetLogger()
|
||||
myAddr := ""
|
||||
if len(h.Addrs()) > 0 {
|
||||
myAddr = h.Addrs()[0].String() + "/p2p/" + h.ID().String()
|
||||
}
|
||||
reg := IndexerRegistration{
|
||||
PeerID: h.ID().String(),
|
||||
Addr: myAddr,
|
||||
}
|
||||
for _, addr := range strings.Split(nativeAddressesStr, ",") {
|
||||
addr = strings.TrimSpace(addr)
|
||||
if addr == "" {
|
||||
continue
|
||||
}
|
||||
ad, err := pp.AddrInfoFromString(addr)
|
||||
if err != nil {
|
||||
logger.Err(err).Msg("RegisterWithNative: invalid addr")
|
||||
continue
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
if err := h.Connect(ctx, *ad); err != nil {
|
||||
cancel()
|
||||
continue
|
||||
}
|
||||
s, err := h.NewStream(ctx, ad.ID, ProtocolNativeSubscription)
|
||||
cancel()
|
||||
if err != nil {
|
||||
logger.Err(err).Msg("RegisterWithNative: stream open failed")
|
||||
continue
|
||||
}
|
||||
if err := json.NewEncoder(s).Encode(reg); err != nil {
|
||||
logger.Err(err).Msg("RegisterWithNative: encode failed")
|
||||
}
|
||||
s.Close()
|
||||
}
|
||||
}
|
||||
|
||||
// EnsureNativePeers populates StaticNatives from config and starts heartbeat
|
||||
// connections to other natives. Safe to call multiple times; heartbeat is only
|
||||
// started once (when StaticNatives transitions from empty to non-empty).
|
||||
func EnsureNativePeers(h host.Host) {
|
||||
nativeAddrs := conf.GetConfig().NativeIndexerAddresses
|
||||
if nativeAddrs == "" {
|
||||
return
|
||||
}
|
||||
StreamNativeMu.Lock()
|
||||
wasEmpty := len(StaticNatives) == 0
|
||||
for _, addr := range strings.Split(nativeAddrs, ",") {
|
||||
addr = strings.TrimSpace(addr)
|
||||
if addr == "" {
|
||||
continue
|
||||
}
|
||||
ad, err := pp.AddrInfoFromString(addr)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
StaticNatives[addr] = ad
|
||||
}
|
||||
StreamNativeMu.Unlock()
|
||||
|
||||
if wasEmpty && len(StaticNatives) > 0 {
|
||||
SendHeartbeat(context.Background(), ProtocolIndexerHeartbeat,
|
||||
conf.GetConfig().Name, h, StreamNatives, StaticNatives, 20*time.Second)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user