Full Flow : Catalog + Peer
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"math/rand"
|
||||
"slices"
|
||||
"strings"
|
||||
@@ -15,6 +16,7 @@ import (
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
pubsub "github.com/libp2p/go-libp2p-pubsub"
|
||||
"github.com/libp2p/go-libp2p/core/crypto"
|
||||
"github.com/libp2p/go-libp2p/core/network"
|
||||
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||
)
|
||||
@@ -36,10 +38,15 @@ const (
|
||||
)
|
||||
|
||||
// liveIndexerEntry tracks a registered indexer in the native's in-memory cache and DHT.
|
||||
// PubKey and Signature are forwarded from the IndexerRegistration so the DHT validator
|
||||
// can verify that the entry was produced by the peer owning the declared PeerID.
|
||||
type liveIndexerEntry struct {
|
||||
PeerID string `json:"peer_id"`
|
||||
Addr string `json:"addr"`
|
||||
ExpiresAt time.Time `json:"expires_at"`
|
||||
PeerID string `json:"peer_id"`
|
||||
Addr string `json:"addr"`
|
||||
ExpiresAt time.Time `json:"expires_at"`
|
||||
RegTimestamp int64 `json:"reg_ts,omitempty"` // Timestamp from the original IndexerRegistration
|
||||
PubKey []byte `json:"pub_key,omitempty"`
|
||||
Signature []byte `json:"sig,omitempty"`
|
||||
}
|
||||
|
||||
// NativeState holds runtime state specific to native indexer operation.
|
||||
@@ -53,13 +60,18 @@ type NativeState struct {
|
||||
// including entries written by other natives.
|
||||
knownPeerIDs map[string]string
|
||||
knownMu sync.RWMutex
|
||||
|
||||
// cancel stops background goroutines (runOffloadLoop, refreshIndexersFromDHT)
|
||||
// when the native shuts down.
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
func newNativeState() *NativeState {
|
||||
func newNativeState(cancel context.CancelFunc) *NativeState {
|
||||
return &NativeState{
|
||||
liveIndexers: map[string]*liveIndexerEntry{},
|
||||
responsiblePeers: map[pp.ID]struct{}{},
|
||||
knownPeerIDs: map[string]string{},
|
||||
cancel: cancel,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -77,6 +89,18 @@ func (v IndexerRecordValidator) Validate(_ string, value []byte) error {
|
||||
if e.ExpiresAt.Before(time.Now().UTC()) {
|
||||
return errors.New("expired indexer record")
|
||||
}
|
||||
// Verify self-signature when present — rejects entries forged by a
|
||||
// compromised native that does not control the declared PeerID.
|
||||
if len(e.Signature) > 0 && len(e.PubKey) > 0 {
|
||||
pub, err := crypto.UnmarshalPublicKey(e.PubKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("indexer entry: invalid public key: %w", err)
|
||||
}
|
||||
payload := []byte(fmt.Sprintf("%s|%s|%d", e.PeerID, e.Addr, e.RegTimestamp))
|
||||
if ok, err := pub.Verify(payload, e.Signature); err != nil || !ok {
|
||||
return errors.New("indexer entry: invalid signature")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -99,9 +123,11 @@ func (v IndexerRecordValidator) Select(_ string, values [][]byte) (int, error) {
|
||||
// InitNative registers native-specific stream handlers and starts background loops.
|
||||
// Must be called after DHT is initialized.
|
||||
func (ix *IndexerService) InitNative() {
|
||||
ix.Native = newNativeState()
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
ix.Native = newNativeState(cancel)
|
||||
ix.Host.SetStreamHandler(common.ProtocolHeartbeat, ix.HandleHeartbeat) // specific heartbeat for Indexer.
|
||||
ix.Host.SetStreamHandler(common.ProtocolNativeSubscription, ix.handleNativeSubscription)
|
||||
ix.Host.SetStreamHandler(common.ProtocolNativeUnsubscribe, ix.handleNativeUnsubscribe)
|
||||
ix.Host.SetStreamHandler(common.ProtocolNativeGetIndexers, ix.handleNativeGetIndexers)
|
||||
ix.Host.SetStreamHandler(common.ProtocolNativeConsensus, ix.handleNativeConsensus)
|
||||
ix.Host.SetStreamHandler(common.ProtocolNativeGetPeers, ix.handleNativeGetPeers)
|
||||
@@ -109,8 +135,8 @@ func (ix *IndexerService) InitNative() {
|
||||
ix.subscribeIndexerRegistry()
|
||||
// Ensure long connections to other configured natives (native-to-native mesh).
|
||||
common.EnsureNativePeers(ix.Host)
|
||||
go ix.runOffloadLoop()
|
||||
go ix.refreshIndexersFromDHT()
|
||||
go ix.runOffloadLoop(ctx)
|
||||
go ix.refreshIndexersFromDHT(ctx)
|
||||
}
|
||||
|
||||
// subscribeIndexerRegistry joins the PubSub topic used by natives to gossip newly
|
||||
@@ -118,14 +144,40 @@ func (ix *IndexerService) InitNative() {
|
||||
func (ix *IndexerService) subscribeIndexerRegistry() {
|
||||
logger := oclib.GetLogger()
|
||||
ix.PS.RegisterTopicValidator(common.TopicIndexerRegistry, func(_ context.Context, _ pp.ID, msg *pubsub.Message) bool {
|
||||
// Reject empty or syntactically invalid multiaddrs before they reach the
|
||||
// message loop. A compromised native could otherwise gossip arbitrary data.
|
||||
addr := string(msg.Data)
|
||||
if addr == "" {
|
||||
// Parse as a signed IndexerRegistration.
|
||||
var reg common.IndexerRegistration
|
||||
if err := json.Unmarshal(msg.Data, ®); err != nil {
|
||||
return false
|
||||
}
|
||||
_, err := pp.AddrInfoFromString(addr)
|
||||
return err == nil
|
||||
if reg.Addr == "" {
|
||||
return false
|
||||
}
|
||||
if _, err := pp.AddrInfoFromString(reg.Addr); err != nil {
|
||||
return false
|
||||
}
|
||||
// Verify the self-signature when present (rejects forged gossip from a
|
||||
// compromised native that does not control the announced PeerID).
|
||||
if ok, _ := reg.Verify(); !ok {
|
||||
return false
|
||||
}
|
||||
// Accept only messages from known native peers or from this host itself.
|
||||
// This prevents external PSK participants from injecting registry entries.
|
||||
from := msg.GetFrom()
|
||||
if from == ix.Host.ID() {
|
||||
return true
|
||||
}
|
||||
common.StreamNativeMu.RLock()
|
||||
_, knownNative := common.StaticNatives[from.String()]
|
||||
if !knownNative {
|
||||
for _, ad := range common.StaticNatives {
|
||||
if ad.ID == from {
|
||||
knownNative = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
common.StreamNativeMu.RUnlock()
|
||||
return knownNative
|
||||
})
|
||||
topic, err := ix.PS.Join(common.TopicIndexerRegistry)
|
||||
if err != nil {
|
||||
@@ -147,18 +199,18 @@ func (ix *IndexerService) subscribeIndexerRegistry() {
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
addr := string(msg.Data)
|
||||
if addr == "" {
|
||||
// The gossip payload is a JSON-encoded IndexerRegistration (signed).
|
||||
var gossipReg common.IndexerRegistration
|
||||
if jsonErr := json.Unmarshal(msg.Data, &gossipReg); jsonErr != nil {
|
||||
continue
|
||||
}
|
||||
if peer, err := pp.AddrInfoFromString(addr); err == nil {
|
||||
ix.Native.knownMu.Lock()
|
||||
ix.Native.knownPeerIDs[peer.ID.String()] = addr
|
||||
ix.Native.knownMu.Unlock()
|
||||
|
||||
if gossipReg.Addr == "" || gossipReg.PeerID == "" {
|
||||
continue
|
||||
}
|
||||
// A neighbouring native registered this PeerID; add to known set for DHT refresh.
|
||||
|
||||
ix.Native.knownMu.Lock()
|
||||
ix.Native.knownPeerIDs[gossipReg.PeerID] = gossipReg.Addr
|
||||
ix.Native.knownMu.Unlock()
|
||||
}
|
||||
}()
|
||||
}
|
||||
@@ -171,86 +223,172 @@ func (ix *IndexerService) handleNativeSubscription(s network.Stream) {
|
||||
logger := oclib.GetLogger()
|
||||
|
||||
logger.Info().Msg("Subscription")
|
||||
for {
|
||||
var reg common.IndexerRegistration
|
||||
if err := json.NewDecoder(s).Decode(®); err != nil {
|
||||
logger.Err(err).Msg("native subscription: decode")
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) ||
|
||||
strings.Contains(err.Error(), "reset") ||
|
||||
strings.Contains(err.Error(), "closed") ||
|
||||
strings.Contains(err.Error(), "too many connections") {
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
logger.Info().Msg("Subscription " + reg.Addr)
|
||||
|
||||
if reg.Addr == "" {
|
||||
logger.Error().Msg("native subscription: missing addr")
|
||||
return
|
||||
}
|
||||
if reg.PeerID == "" {
|
||||
ad, err := pp.AddrInfoFromString(reg.Addr)
|
||||
if err != nil {
|
||||
logger.Err(err).Msg("native subscription: invalid addr")
|
||||
return
|
||||
}
|
||||
reg.PeerID = ad.ID.String()
|
||||
}
|
||||
|
||||
// Reject registrations with an invalid self-signature.
|
||||
if ok, err := reg.Verify(); !ok {
|
||||
logger.Warn().Str("peer", reg.PeerID).Err(err).Msg("native subscription: invalid signature, rejecting")
|
||||
return
|
||||
}
|
||||
|
||||
// Build entry with a fresh TTL — must happen before the cache write so the
|
||||
// TTL window is not consumed by DHT retries.
|
||||
entry := &liveIndexerEntry{
|
||||
PeerID: reg.PeerID,
|
||||
Addr: reg.Addr,
|
||||
ExpiresAt: time.Now().UTC().Add(IndexerTTL),
|
||||
RegTimestamp: reg.Timestamp,
|
||||
PubKey: reg.PubKey,
|
||||
Signature: reg.Signature,
|
||||
}
|
||||
|
||||
// Verify that the declared address is actually reachable before admitting
|
||||
// the registration. This async dial runs in the background; the indexer is
|
||||
// tentatively admitted immediately (so heartbeats don't get stuck) but is
|
||||
// evicted from the cache if the dial fails within 5 s.
|
||||
go func(e *liveIndexerEntry) {
|
||||
ad, err := pp.AddrInfoFromString(e.Addr)
|
||||
if err != nil {
|
||||
logger.Warn().Str("addr", e.Addr).Msg("native subscription: invalid addr during validation, rejecting")
|
||||
ix.Native.liveIndexersMu.Lock()
|
||||
if cur := ix.Native.liveIndexers[e.PeerID]; cur == e {
|
||||
delete(ix.Native.liveIndexers, e.PeerID)
|
||||
}
|
||||
ix.Native.liveIndexersMu.Unlock()
|
||||
return
|
||||
}
|
||||
dialCtx, dialCancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer dialCancel()
|
||||
if err := ix.Host.Connect(dialCtx, *ad); err != nil {
|
||||
logger.Warn().Str("addr", e.Addr).Err(err).Msg("native subscription: declared address unreachable, rejecting")
|
||||
ix.Native.liveIndexersMu.Lock()
|
||||
if cur := ix.Native.liveIndexers[e.PeerID]; cur == e {
|
||||
delete(ix.Native.liveIndexers, e.PeerID)
|
||||
}
|
||||
ix.Native.liveIndexersMu.Unlock()
|
||||
}
|
||||
}(entry)
|
||||
|
||||
// Update local cache and known set immediately so concurrent GetIndexers calls
|
||||
// can already see this indexer without waiting for the DHT write to complete.
|
||||
ix.Native.liveIndexersMu.Lock()
|
||||
_, isRenewal := ix.Native.liveIndexers[reg.PeerID]
|
||||
ix.Native.liveIndexers[reg.PeerID] = entry
|
||||
ix.Native.liveIndexersMu.Unlock()
|
||||
|
||||
ix.Native.knownMu.Lock()
|
||||
ix.Native.knownPeerIDs[reg.PeerID] = reg.Addr
|
||||
ix.Native.knownMu.Unlock()
|
||||
|
||||
// Gossip the signed registration to neighbouring natives.
|
||||
// The payload is JSON-encoded so the receiver can verify the self-signature.
|
||||
ix.PubsubMu.RLock()
|
||||
topic := ix.LongLivedPubSubs[common.TopicIndexerRegistry]
|
||||
ix.PubsubMu.RUnlock()
|
||||
if topic != nil {
|
||||
if gossipData, marshalErr := json.Marshal(reg); marshalErr == nil {
|
||||
if err := topic.Publish(context.Background(), gossipData); err != nil {
|
||||
logger.Err(err).Msg("native subscription: registry gossip publish")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if isRenewal {
|
||||
// logger.Debug().Str("peer", reg.PeerID).Msg("native: indexer TTL renewed : " + fmt.Sprintf("%v", len(ix.Native.liveIndexers)))
|
||||
} else {
|
||||
logger.Info().Str("peer", reg.PeerID).Msg("native: indexer registered : " + fmt.Sprintf("%v", len(ix.Native.liveIndexers)))
|
||||
}
|
||||
|
||||
// Persist in DHT asynchronously with bounded retry.
|
||||
// Max retry window = IndexerTTL (90 s) — retrying past entry expiry is pointless.
|
||||
// Backoff: 10 s → 20 s → 40 s, then repeats at 40 s until deadline.
|
||||
key := ix.genIndexerKey(reg.PeerID)
|
||||
data, err := json.Marshal(entry)
|
||||
if err != nil {
|
||||
logger.Err(err).Msg("native subscription: marshal entry")
|
||||
return
|
||||
}
|
||||
go func() {
|
||||
deadline := time.Now().Add(IndexerTTL)
|
||||
backoff := 10 * time.Second
|
||||
for {
|
||||
if time.Now().After(deadline) {
|
||||
logger.Warn().Str("key", key).Msg("native subscription: DHT put abandoned, entry TTL exceeded")
|
||||
return
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
err := ix.DHT.PutValue(ctx, key, data)
|
||||
cancel()
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
logger.Err(err).Msg("native subscription: DHT put " + key)
|
||||
if !strings.Contains(err.Error(), "failed to find any peer in table") {
|
||||
return // non-retryable error
|
||||
}
|
||||
remaining := time.Until(deadline)
|
||||
if backoff > remaining {
|
||||
backoff = remaining
|
||||
}
|
||||
if backoff <= 0 {
|
||||
return
|
||||
}
|
||||
time.Sleep(backoff)
|
||||
if backoff < 40*time.Second {
|
||||
backoff *= 2
|
||||
}
|
||||
}
|
||||
}()
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// handleNativeUnsubscribe removes a departing indexer from the local cache and
|
||||
// known set immediately, without waiting for TTL expiry.
|
||||
func (ix *IndexerService) handleNativeUnsubscribe(s network.Stream) {
|
||||
defer s.Close()
|
||||
logger := oclib.GetLogger()
|
||||
var reg common.IndexerRegistration
|
||||
if err := json.NewDecoder(s).Decode(®); err != nil {
|
||||
logger.Err(err).Msg("native subscription: decode")
|
||||
return
|
||||
}
|
||||
logger.Info().Msg("Subscription " + reg.Addr)
|
||||
|
||||
if reg.Addr == "" {
|
||||
logger.Error().Msg("native subscription: missing addr")
|
||||
logger.Err(err).Msg("native unsubscribe: decode")
|
||||
return
|
||||
}
|
||||
if reg.PeerID == "" {
|
||||
ad, err := pp.AddrInfoFromString(reg.Addr)
|
||||
if err != nil {
|
||||
logger.Err(err).Msg("native subscription: invalid addr")
|
||||
return
|
||||
}
|
||||
reg.PeerID = ad.ID.String()
|
||||
}
|
||||
|
||||
// Build entry with a fresh TTL — must happen before the cache write so the 66s
|
||||
// window is not consumed by DHT retries.
|
||||
entry := &liveIndexerEntry{
|
||||
PeerID: reg.PeerID,
|
||||
Addr: reg.Addr,
|
||||
ExpiresAt: time.Now().UTC().Add(IndexerTTL),
|
||||
}
|
||||
|
||||
// Update local cache and known set immediately so concurrent GetIndexers calls
|
||||
// can already see this indexer without waiting for the DHT write to complete.
|
||||
ix.Native.liveIndexersMu.Lock()
|
||||
_, isRenewal := ix.Native.liveIndexers[reg.PeerID]
|
||||
ix.Native.liveIndexers[reg.PeerID] = entry
|
||||
ix.Native.liveIndexersMu.Unlock()
|
||||
|
||||
ix.Native.knownMu.Lock()
|
||||
ix.Native.knownPeerIDs[reg.PeerID] = reg.Addr
|
||||
ix.Native.knownMu.Unlock()
|
||||
|
||||
// Gossip PeerID to neighbouring natives so they discover it via DHT.
|
||||
ix.PubsubMu.RLock()
|
||||
topic := ix.LongLivedPubSubs[common.TopicIndexerRegistry]
|
||||
ix.PubsubMu.RUnlock()
|
||||
if topic != nil {
|
||||
if err := topic.Publish(context.Background(), []byte(reg.Addr)); err != nil {
|
||||
logger.Err(err).Msg("native subscription: registry gossip publish")
|
||||
}
|
||||
}
|
||||
|
||||
if isRenewal {
|
||||
logger.Debug().Str("peer", reg.PeerID).Msg("native: indexer TTL renewed : " + fmt.Sprintf("%v", len(ix.Native.liveIndexers)))
|
||||
} else {
|
||||
logger.Info().Str("peer", reg.PeerID).Msg("native: indexer registered : " + fmt.Sprintf("%v", len(ix.Native.liveIndexers)))
|
||||
}
|
||||
|
||||
// Persist in DHT asynchronously — retries must not block the handler or consume
|
||||
// the local cache TTL.
|
||||
key := ix.genIndexerKey(reg.PeerID)
|
||||
data, err := json.Marshal(entry)
|
||||
if err != nil {
|
||||
logger.Err(err).Msg("native subscription: marshal entry")
|
||||
logger.Warn().Msg("native unsubscribe: missing peer_id")
|
||||
return
|
||||
}
|
||||
go func() {
|
||||
for {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
if err := ix.DHT.PutValue(ctx, key, data); err != nil {
|
||||
cancel()
|
||||
logger.Err(err).Msg("native subscription: DHT put " + key)
|
||||
if strings.Contains(err.Error(), "failed to find any peer in table") {
|
||||
time.Sleep(10 * time.Second)
|
||||
continue
|
||||
}
|
||||
return
|
||||
}
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
}()
|
||||
ix.Native.liveIndexersMu.Lock()
|
||||
delete(ix.Native.liveIndexers, reg.PeerID)
|
||||
ix.Native.liveIndexersMu.Unlock()
|
||||
ix.Native.knownMu.Lock()
|
||||
delete(ix.Native.knownPeerIDs, reg.PeerID)
|
||||
ix.Native.knownMu.Unlock()
|
||||
logger.Info().Str("peer", reg.PeerID).Msg("native: indexer explicitly unregistered")
|
||||
}
|
||||
|
||||
// handleNativeGetIndexers returns this native's own list of reachable indexers.
|
||||
@@ -260,39 +398,47 @@ func (ix *IndexerService) handleNativeSubscription(s network.Stream) {
|
||||
func (ix *IndexerService) handleNativeGetIndexers(s network.Stream) {
|
||||
defer s.Close()
|
||||
logger := oclib.GetLogger()
|
||||
for {
|
||||
var req common.GetIndexersRequest
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
logger.Err(err).Msg("native get indexers: decode")
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) ||
|
||||
strings.Contains(err.Error(), "reset") ||
|
||||
strings.Contains(err.Error(), "closed") ||
|
||||
strings.Contains(err.Error(), "too many connections") {
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
if req.Count <= 0 {
|
||||
req.Count = 3
|
||||
}
|
||||
callerPeerID := s.Conn().RemotePeer().String()
|
||||
reachable := ix.reachableLiveIndexers(req.Count, callerPeerID)
|
||||
var resp common.GetIndexersResponse
|
||||
|
||||
var req common.GetIndexersRequest
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
logger.Err(err).Msg("native get indexers: decode")
|
||||
return
|
||||
}
|
||||
if req.Count <= 0 {
|
||||
req.Count = 3
|
||||
}
|
||||
callerPeerID := s.Conn().RemotePeer().String()
|
||||
reachable := ix.reachableLiveIndexers(req.Count, callerPeerID)
|
||||
var resp common.GetIndexersResponse
|
||||
|
||||
if len(reachable) == 0 {
|
||||
// No live indexers reachable — try to self-delegate.
|
||||
if ix.selfDelegate(s.Conn().RemotePeer(), &resp) {
|
||||
logger.Info().Str("peer", callerPeerID).Msg("native: no indexers, acting as fallback for node")
|
||||
if len(reachable) == 0 {
|
||||
// No live indexers reachable — try to self-delegate.
|
||||
if ix.selfDelegate(s.Conn().RemotePeer(), &resp) {
|
||||
logger.Info().Str("peer", callerPeerID).Msg("native: no indexers, acting as fallback for node")
|
||||
} else {
|
||||
// Fallback pool saturated: return empty so the caller retries another
|
||||
// native instead of piling more load onto this one.
|
||||
logger.Warn().Str("peer", callerPeerID).Int("pool", maxFallbackPeers).Msg(
|
||||
"native: fallback pool saturated, refusing self-delegation")
|
||||
}
|
||||
} else {
|
||||
// Fallback pool saturated: return empty so the caller retries another
|
||||
// native instead of piling more load onto this one.
|
||||
logger.Warn().Str("peer", callerPeerID).Int("pool", maxFallbackPeers).Msg(
|
||||
"native: fallback pool saturated, refusing self-delegation")
|
||||
rand.Shuffle(len(reachable), func(i, j int) { reachable[i], reachable[j] = reachable[j], reachable[i] })
|
||||
if req.Count > len(reachable) {
|
||||
req.Count = len(reachable)
|
||||
}
|
||||
resp.Indexers = reachable[:req.Count]
|
||||
}
|
||||
} else {
|
||||
rand.Shuffle(len(reachable), func(i, j int) { reachable[i], reachable[j] = reachable[j], reachable[i] })
|
||||
if req.Count > len(reachable) {
|
||||
req.Count = len(reachable)
|
||||
}
|
||||
resp.Indexers = reachable[:req.Count]
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
||||
logger.Err(err).Msg("native get indexers: encode response")
|
||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
||||
logger.Err(err).Msg("native get indexers: encode response")
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
@@ -303,39 +449,47 @@ func (ix *IndexerService) handleNativeGetIndexers(s network.Stream) {
|
||||
func (ix *IndexerService) handleNativeConsensus(s network.Stream) {
|
||||
defer s.Close()
|
||||
logger := oclib.GetLogger()
|
||||
|
||||
var req common.ConsensusRequest
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
logger.Err(err).Msg("native consensus: decode")
|
||||
return
|
||||
}
|
||||
|
||||
myList := ix.reachableLiveIndexers(-1, s.Conn().RemotePeer().String())
|
||||
mySet := make(map[string]struct{}, len(myList))
|
||||
for _, addr := range myList {
|
||||
mySet[addr] = struct{}{}
|
||||
}
|
||||
|
||||
trusted := []string{}
|
||||
candidateSet := make(map[string]struct{}, len(req.Candidates))
|
||||
for _, addr := range req.Candidates {
|
||||
candidateSet[addr] = struct{}{}
|
||||
if _, ok := mySet[addr]; ok {
|
||||
trusted = append(trusted, addr) // candidate we also confirm as reachable
|
||||
for {
|
||||
var req common.ConsensusRequest
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
logger.Err(err).Msg("native consensus: decode")
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) ||
|
||||
strings.Contains(err.Error(), "reset") ||
|
||||
strings.Contains(err.Error(), "closed") ||
|
||||
strings.Contains(err.Error(), "too many connections") {
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Extras we trust but that the requester didn't include → suggestions.
|
||||
suggestions := []string{}
|
||||
for _, addr := range myList {
|
||||
if _, inCandidates := candidateSet[addr]; !inCandidates {
|
||||
suggestions = append(suggestions, addr)
|
||||
myList := ix.reachableLiveIndexers(-1, s.Conn().RemotePeer().String())
|
||||
mySet := make(map[string]struct{}, len(myList))
|
||||
for _, addr := range myList {
|
||||
mySet[addr] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
resp := common.ConsensusResponse{Trusted: trusted, Suggestions: suggestions}
|
||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
||||
logger.Err(err).Msg("native consensus: encode response")
|
||||
trusted := []string{}
|
||||
candidateSet := make(map[string]struct{}, len(req.Candidates))
|
||||
for _, addr := range req.Candidates {
|
||||
candidateSet[addr] = struct{}{}
|
||||
if _, ok := mySet[addr]; ok {
|
||||
trusted = append(trusted, addr) // candidate we also confirm as reachable
|
||||
}
|
||||
}
|
||||
|
||||
// Extras we trust but that the requester didn't include → suggestions.
|
||||
suggestions := []string{}
|
||||
for _, addr := range myList {
|
||||
if _, inCandidates := candidateSet[addr]; !inCandidates {
|
||||
suggestions = append(suggestions, addr)
|
||||
}
|
||||
}
|
||||
|
||||
resp := common.ConsensusResponse{Trusted: trusted, Suggestions: suggestions}
|
||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
||||
logger.Err(err).Msg("native consensus: encode response")
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
@@ -406,11 +560,16 @@ func (ix *IndexerService) reachableLiveIndexers(count int, from ...string) []str
|
||||
// refreshIndexersFromDHT runs in background and queries the shared DHT for every known
|
||||
// indexer PeerID whose local cache entry is missing or expired. This supplements the
|
||||
// local cache with entries written by neighbouring natives.
|
||||
func (ix *IndexerService) refreshIndexersFromDHT() {
|
||||
func (ix *IndexerService) refreshIndexersFromDHT(ctx context.Context) {
|
||||
t := time.NewTicker(dhtRefreshInterval)
|
||||
defer t.Stop()
|
||||
logger := oclib.GetLogger()
|
||||
for range t.C {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-t.C:
|
||||
}
|
||||
ix.Native.knownMu.RLock()
|
||||
peerIDs := make([]string, 0, len(ix.Native.knownPeerIDs))
|
||||
for pid := range ix.Native.knownPeerIDs {
|
||||
@@ -427,10 +586,10 @@ func (ix *IndexerService) refreshIndexersFromDHT() {
|
||||
continue // still fresh in local cache
|
||||
}
|
||||
key := ix.genIndexerKey(pid)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
ch, err := ix.DHT.SearchValue(ctx, key)
|
||||
dhtCtx, dhtCancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
ch, err := ix.DHT.SearchValue(dhtCtx, key)
|
||||
if err != nil {
|
||||
cancel()
|
||||
dhtCancel()
|
||||
continue
|
||||
}
|
||||
var best *liveIndexerEntry
|
||||
@@ -445,7 +604,7 @@ func (ix *IndexerService) refreshIndexersFromDHT() {
|
||||
}
|
||||
}
|
||||
}
|
||||
cancel()
|
||||
dhtCancel()
|
||||
if best != nil {
|
||||
ix.Native.liveIndexersMu.Lock()
|
||||
ix.Native.liveIndexers[best.PeerID] = best
|
||||
@@ -468,11 +627,16 @@ func (ix *IndexerService) genIndexerKey(peerID string) string {
|
||||
|
||||
// runOffloadLoop periodically checks if real indexers are available and releases
|
||||
// responsible peers so they can reconnect to actual indexers on their next attempt.
|
||||
func (ix *IndexerService) runOffloadLoop() {
|
||||
func (ix *IndexerService) runOffloadLoop(ctx context.Context) {
|
||||
t := time.NewTicker(offloadInterval)
|
||||
defer t.Stop()
|
||||
logger := oclib.GetLogger()
|
||||
for range t.C {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-t.C:
|
||||
}
|
||||
fmt.Println("runOffloadLoop", ix.Native.responsiblePeers)
|
||||
ix.Native.responsibleMu.RLock()
|
||||
count := len(ix.Native.responsiblePeers)
|
||||
@@ -540,38 +704,46 @@ func (ix *IndexerService) runOffloadLoop() {
|
||||
func (ix *IndexerService) handleNativeGetPeers(s network.Stream) {
|
||||
defer s.Close()
|
||||
logger := oclib.GetLogger()
|
||||
|
||||
var req common.GetNativePeersRequest
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
logger.Err(err).Msg("native get peers: decode")
|
||||
return
|
||||
}
|
||||
if req.Count <= 0 {
|
||||
req.Count = 1
|
||||
}
|
||||
|
||||
excludeSet := make(map[string]struct{}, len(req.Exclude))
|
||||
for _, e := range req.Exclude {
|
||||
excludeSet[e] = struct{}{}
|
||||
}
|
||||
|
||||
common.StreamNativeMu.RLock()
|
||||
candidates := make([]string, 0, len(common.StaticNatives))
|
||||
for addr := range common.StaticNatives {
|
||||
if _, excluded := excludeSet[addr]; !excluded {
|
||||
candidates = append(candidates, addr)
|
||||
for {
|
||||
var req common.GetNativePeersRequest
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
logger.Err(err).Msg("native get peers: decode")
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) ||
|
||||
strings.Contains(err.Error(), "reset") ||
|
||||
strings.Contains(err.Error(), "closed") ||
|
||||
strings.Contains(err.Error(), "too many connections") {
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
if req.Count <= 0 {
|
||||
req.Count = 1
|
||||
}
|
||||
}
|
||||
common.StreamNativeMu.RUnlock()
|
||||
|
||||
rand.Shuffle(len(candidates), func(i, j int) { candidates[i], candidates[j] = candidates[j], candidates[i] })
|
||||
if req.Count > len(candidates) {
|
||||
req.Count = len(candidates)
|
||||
}
|
||||
excludeSet := make(map[string]struct{}, len(req.Exclude))
|
||||
for _, e := range req.Exclude {
|
||||
excludeSet[e] = struct{}{}
|
||||
}
|
||||
|
||||
resp := common.GetNativePeersResponse{Peers: candidates[:req.Count]}
|
||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
||||
logger.Err(err).Msg("native get peers: encode response")
|
||||
common.StreamNativeMu.RLock()
|
||||
candidates := make([]string, 0, len(common.StaticNatives))
|
||||
for addr := range common.StaticNatives {
|
||||
if _, excluded := excludeSet[addr]; !excluded {
|
||||
candidates = append(candidates, addr)
|
||||
}
|
||||
}
|
||||
common.StreamNativeMu.RUnlock()
|
||||
|
||||
rand.Shuffle(len(candidates), func(i, j int) { candidates[i], candidates[j] = candidates[j], candidates[i] })
|
||||
if req.Count > len(candidates) {
|
||||
req.Count = len(candidates)
|
||||
}
|
||||
|
||||
resp := common.GetNativePeersResponse{Peers: candidates[:req.Count]}
|
||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
||||
logger.Err(err).Msg("native get peers: encode response")
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user