Full Flow : Catalog + Peer
This commit is contained in:
@@ -5,6 +5,8 @@ import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"oc-discovery/conf"
|
||||
"oc-discovery/daemons/node/common"
|
||||
"strings"
|
||||
@@ -83,30 +85,17 @@ func (pr *PeerRecord) ExtractPeer(ourkey string, key string, pubKey crypto.PubKe
|
||||
NATSAddress: pr.NATSAddress,
|
||||
WalletAddress: pr.WalletAddress,
|
||||
}
|
||||
b, err := json.Marshal(p)
|
||||
if err != nil {
|
||||
return pp.SELF == p.Relation, nil, err
|
||||
}
|
||||
|
||||
if time.Now().UTC().After(pr.ExpiryDate) {
|
||||
return pp.SELF == p.Relation, nil, errors.New("peer " + key + " is offline")
|
||||
}
|
||||
go tools.NewNATSCaller().SetNATSPub(tools.CREATE_RESOURCE, tools.NATSResponse{
|
||||
FromApp: "oc-discovery",
|
||||
Datatype: tools.PEER,
|
||||
Method: int(tools.CREATE_RESOURCE),
|
||||
SearchAttr: "peer_id",
|
||||
Payload: b,
|
||||
})
|
||||
|
||||
return pp.SELF == p.Relation, p, nil
|
||||
}
|
||||
|
||||
type GetValue struct {
|
||||
Key string `json:"key"`
|
||||
PeerID peer.ID `json:"peer_id"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Search bool `json:"search,omitempty"`
|
||||
Key string `json:"key"`
|
||||
PeerID string `json:"peer_id,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Search bool `json:"search,omitempty"`
|
||||
}
|
||||
|
||||
type GetResponse struct {
|
||||
@@ -132,29 +121,33 @@ func (ix *IndexerService) initNodeHandler() {
|
||||
// Each heartbeat from a node carries a freshly signed PeerRecord.
|
||||
// Republish it to the DHT so the record never expires as long as the node
|
||||
// is alive — no separate publish stream needed from the node side.
|
||||
ix.AfterHeartbeat = func(pid peer.ID) {
|
||||
ctx1, cancel1 := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel1()
|
||||
res, err := ix.DHT.GetValue(ctx1, ix.genPIDKey(pid.String()))
|
||||
if err != nil {
|
||||
logger.Warn().Err(err)
|
||||
return
|
||||
}
|
||||
did := string(res)
|
||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel2()
|
||||
res, err = ix.DHT.GetValue(ctx2, ix.genKey(did))
|
||||
if err != nil {
|
||||
logger.Warn().Err(err)
|
||||
return
|
||||
}
|
||||
ix.AfterHeartbeat = func(hb *common.Heartbeat) {
|
||||
// Priority 1: use the fresh signed PeerRecord embedded in the heartbeat.
|
||||
// Each heartbeat tick, the node re-signs with ExpiryDate = now+2min, so
|
||||
// this record is always fresh. Fetching from DHT would give a stale expiry.
|
||||
var rec PeerRecord
|
||||
if err := json.Unmarshal(res, &rec); err != nil {
|
||||
logger.Warn().Err(err).Str("peer", pid.String()).Msg("indexer: heartbeat record unmarshal failed")
|
||||
return
|
||||
if len(hb.Record) > 0 {
|
||||
if err := json.Unmarshal(hb.Record, &rec); err != nil {
|
||||
logger.Warn().Err(err).Msg("indexer: heartbeat embedded record unmarshal failed")
|
||||
return
|
||||
}
|
||||
} else {
|
||||
// Fallback: node didn't embed a record yet (first heartbeat before claimInfo).
|
||||
// Fetch from DHT using the DID resolved by HandleHeartbeat.
|
||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
res, err := ix.DHT.GetValue(ctx2, ix.genKey(hb.DID))
|
||||
cancel2()
|
||||
if err != nil {
|
||||
logger.Warn().Err(err).Str("did", hb.DID).Msg("indexer: DHT fetch for refresh failed")
|
||||
return
|
||||
}
|
||||
if err := json.Unmarshal(res, &rec); err != nil {
|
||||
logger.Warn().Err(err).Str("did", hb.DID).Msg("indexer: heartbeat record unmarshal failed")
|
||||
return
|
||||
}
|
||||
}
|
||||
if _, err := rec.Verify(); err != nil {
|
||||
logger.Warn().Err(err).Str("peer", pid.String()).Msg("indexer: heartbeat record signature invalid")
|
||||
logger.Warn().Err(err).Str("did", rec.DID).Msg("indexer: heartbeat record signature invalid")
|
||||
return
|
||||
}
|
||||
data, err := json.Marshal(rec)
|
||||
@@ -162,12 +155,14 @@ func (ix *IndexerService) initNodeHandler() {
|
||||
return
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
logger.Info().Msg("REFRESH PutValue " + ix.genKey(rec.DID))
|
||||
if err := ix.DHT.PutValue(ctx, ix.genKey(rec.DID), data); err != nil {
|
||||
logger.Warn().Err(err).Str("did", rec.DID).Msg("indexer: DHT refresh failed")
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
cancel()
|
||||
ix.publishNameEvent(NameIndexAdd, rec.Name, rec.PeerID, rec.DID)
|
||||
if rec.Name != "" {
|
||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
ix.DHT.PutValue(ctx2, ix.genNameKey(rec.Name), []byte(rec.DID))
|
||||
@@ -188,132 +183,151 @@ func (ix *IndexerService) initNodeHandler() {
|
||||
func (ix *IndexerService) handleNodePublish(s network.Stream) {
|
||||
defer s.Close()
|
||||
logger := oclib.GetLogger()
|
||||
for {
|
||||
var rec PeerRecord
|
||||
if err := json.NewDecoder(s).Decode(&rec); err != nil {
|
||||
logger.Err(err)
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) ||
|
||||
strings.Contains(err.Error(), "reset") ||
|
||||
strings.Contains(err.Error(), "closed") ||
|
||||
strings.Contains(err.Error(), "too many connections") {
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
if _, err := rec.Verify(); err != nil {
|
||||
logger.Err(err)
|
||||
return
|
||||
}
|
||||
if rec.PeerID == "" || rec.ExpiryDate.Before(time.Now().UTC()) {
|
||||
logger.Err(errors.New(rec.PeerID + " is expired."))
|
||||
return
|
||||
}
|
||||
pid, err := peer.Decode(rec.PeerID)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
var rec PeerRecord
|
||||
if err := json.NewDecoder(s).Decode(&rec); err != nil {
|
||||
logger.Err(err)
|
||||
return
|
||||
}
|
||||
if _, err := rec.Verify(); err != nil {
|
||||
logger.Err(err)
|
||||
return
|
||||
}
|
||||
if rec.PeerID == "" || rec.ExpiryDate.Before(time.Now().UTC()) {
|
||||
logger.Err(errors.New(rec.PeerID + " is expired."))
|
||||
return
|
||||
}
|
||||
pid, err := peer.Decode(rec.PeerID)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
ix.StreamMU.Lock()
|
||||
defer ix.StreamMU.Unlock()
|
||||
if ix.StreamRecords[common.ProtocolHeartbeat] == nil {
|
||||
ix.StreamRecords[common.ProtocolHeartbeat] = map[peer.ID]*common.StreamRecord[PeerRecord]{}
|
||||
}
|
||||
streams := ix.StreamRecords[common.ProtocolHeartbeat]
|
||||
if srec, ok := streams[pid]; ok {
|
||||
srec.DID = rec.DID
|
||||
srec.Record = rec
|
||||
srec.HeartbeatStream.UptimeTracker.LastSeen = time.Now().UTC()
|
||||
}
|
||||
|
||||
ix.StreamMU.Lock()
|
||||
defer ix.StreamMU.Unlock()
|
||||
if ix.StreamRecords[common.ProtocolHeartbeat] == nil {
|
||||
ix.StreamRecords[common.ProtocolHeartbeat] = map[peer.ID]*common.StreamRecord[PeerRecord]{}
|
||||
}
|
||||
streams := ix.StreamRecords[common.ProtocolHeartbeat]
|
||||
if srec, ok := streams[pid]; ok {
|
||||
srec.DID = rec.DID
|
||||
srec.Record = rec
|
||||
srec.HeartbeatStream.UptimeTracker.LastSeen = time.Now().UTC()
|
||||
}
|
||||
|
||||
key := ix.genKey(rec.DID)
|
||||
data, err := json.Marshal(rec)
|
||||
if err != nil {
|
||||
logger.Err(err)
|
||||
return
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
if err := ix.DHT.PutValue(ctx, key, data); err != nil {
|
||||
logger.Err(err)
|
||||
key := ix.genKey(rec.DID)
|
||||
data, err := json.Marshal(rec)
|
||||
if err != nil {
|
||||
logger.Err(err)
|
||||
return
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
if err := ix.DHT.PutValue(ctx, key, data); err != nil {
|
||||
logger.Err(err)
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
cancel()
|
||||
|
||||
// Secondary index: /name/<name> → DID, so peers can resolve by human-readable name.
|
||||
if rec.Name != "" {
|
||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
if err := ix.DHT.PutValue(ctx2, ix.genNameKey(rec.Name), []byte(rec.DID)); err != nil {
|
||||
logger.Err(err).Str("name", rec.Name).Msg("indexer: failed to write name index")
|
||||
fmt.Println("publishNameEvent")
|
||||
ix.publishNameEvent(NameIndexAdd, rec.Name, rec.PeerID, rec.DID)
|
||||
|
||||
// Secondary index: /name/<name> → DID, so peers can resolve by human-readable name.
|
||||
if rec.Name != "" {
|
||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
if err := ix.DHT.PutValue(ctx2, ix.genNameKey(rec.Name), []byte(rec.DID)); err != nil {
|
||||
logger.Err(err).Str("name", rec.Name).Msg("indexer: failed to write name index")
|
||||
}
|
||||
cancel2()
|
||||
}
|
||||
cancel2()
|
||||
}
|
||||
// Secondary index: /pid/<peerID> → DID, so peers can resolve by libp2p PeerID.
|
||||
if rec.PeerID != "" {
|
||||
ctx3, cancel3 := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
if err := ix.DHT.PutValue(ctx3, ix.genPIDKey(rec.PeerID), []byte(rec.DID)); err != nil {
|
||||
logger.Err(err).Str("pid", rec.PeerID).Msg("indexer: failed to write pid index")
|
||||
// Secondary index: /pid/<peerID> → DID, so peers can resolve by libp2p PeerID.
|
||||
if rec.PeerID != "" {
|
||||
ctx3, cancel3 := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
if err := ix.DHT.PutValue(ctx3, ix.genPIDKey(rec.PeerID), []byte(rec.DID)); err != nil {
|
||||
logger.Err(err).Str("pid", rec.PeerID).Msg("indexer: failed to write pid index")
|
||||
}
|
||||
cancel3()
|
||||
}
|
||||
cancel3()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (ix *IndexerService) handleNodeGet(s network.Stream) {
|
||||
|
||||
defer s.Close()
|
||||
logger := oclib.GetLogger()
|
||||
|
||||
var req GetValue
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
logger.Err(err)
|
||||
return
|
||||
}
|
||||
|
||||
resp := GetResponse{Found: false, Records: map[string]PeerRecord{}}
|
||||
|
||||
keys := []string{}
|
||||
// Name substring search — scan in-memory connected nodes first, then DHT exact match.
|
||||
if req.Name != "" {
|
||||
if req.Search {
|
||||
for _, did := range ix.LookupNameIndex(strings.ToLower(req.Name)) {
|
||||
keys = append(keys, did)
|
||||
for {
|
||||
var req GetValue
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) ||
|
||||
strings.Contains(err.Error(), "reset") ||
|
||||
strings.Contains(err.Error(), "closed") ||
|
||||
strings.Contains(err.Error(), "too many connections") {
|
||||
return
|
||||
}
|
||||
} else {
|
||||
// 2. DHT exact-name lookup: covers nodes that published but aren't currently connected.
|
||||
nameCtx, nameCancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
if ch, err := ix.DHT.SearchValue(nameCtx, ix.genNameKey(req.Name)); err == nil {
|
||||
for did := range ch {
|
||||
keys = append(keys, string(did))
|
||||
break
|
||||
logger.Err(err)
|
||||
continue
|
||||
}
|
||||
|
||||
resp := GetResponse{Found: false, Records: map[string]PeerRecord{}}
|
||||
|
||||
fmt.Println("handleNodeGet", req.Search, req.Name)
|
||||
keys := []string{}
|
||||
// Name substring search — scan in-memory connected nodes first, then DHT exact match.
|
||||
if req.Name != "" {
|
||||
if req.Search {
|
||||
for _, did := range ix.LookupNameIndex(strings.ToLower(req.Name)) {
|
||||
keys = append(keys, did)
|
||||
}
|
||||
}
|
||||
nameCancel()
|
||||
}
|
||||
} else if req.PeerID != "" {
|
||||
pidCtx, pidCancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
if did, err := ix.DHT.GetValue(pidCtx, ix.genPIDKey(req.PeerID.String())); err == nil {
|
||||
keys = append(keys, string(did))
|
||||
}
|
||||
pidCancel()
|
||||
} else {
|
||||
keys = append(keys, req.Key)
|
||||
}
|
||||
|
||||
// DHT record fetch by DID key (covers exact-name and PeerID paths).
|
||||
if len(keys) > 0 {
|
||||
for _, k := range keys {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
c, err := ix.DHT.GetValue(ctx, ix.genKey(k))
|
||||
cancel()
|
||||
if err == nil {
|
||||
var rec PeerRecord
|
||||
if json.Unmarshal(c, &rec) == nil {
|
||||
// Filter by PeerID only when one was explicitly specified.
|
||||
if req.PeerID == "" || rec.PeerID == req.PeerID.String() {
|
||||
resp.Records[rec.PeerID] = rec
|
||||
} else {
|
||||
// 2. DHT exact-name lookup: covers nodes that published but aren't currently connected.
|
||||
nameCtx, nameCancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
if ch, err := ix.DHT.SearchValue(nameCtx, ix.genNameKey(req.Name)); err == nil {
|
||||
for did := range ch {
|
||||
keys = append(keys, string(did))
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if req.Name == "" && req.PeerID == "" {
|
||||
logger.Err(err).Msg("Failed to fetch PeerRecord from DHT " + req.Key)
|
||||
nameCancel()
|
||||
}
|
||||
} else if req.PeerID != "" {
|
||||
pidCtx, pidCancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
if did, err := ix.DHT.GetValue(pidCtx, ix.genPIDKey(req.PeerID)); err == nil {
|
||||
keys = append(keys, string(did))
|
||||
}
|
||||
pidCancel()
|
||||
} else {
|
||||
keys = append(keys, req.Key)
|
||||
}
|
||||
|
||||
// DHT record fetch by DID key (covers exact-name and PeerID paths).
|
||||
if len(keys) > 0 {
|
||||
for _, k := range keys {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
fmt.Println("TRY TO CATCH DID", ix.genKey(k))
|
||||
c, err := ix.DHT.GetValue(ctx, ix.genKey(k))
|
||||
cancel()
|
||||
fmt.Println("TRY TO CATCH DID ERR", ix.genKey(k), c, err)
|
||||
if err == nil {
|
||||
var rec PeerRecord
|
||||
if json.Unmarshal(c, &rec) == nil {
|
||||
fmt.Println("CATCH DID ERR", ix.genKey(k), rec)
|
||||
resp.Records[rec.PeerID] = rec
|
||||
}
|
||||
} else if req.Name == "" && req.PeerID == "" {
|
||||
logger.Err(err).Msg("Failed to fetch PeerRecord from DHT " + req.Key)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resp.Found = len(resp.Records) > 0
|
||||
_ = json.NewEncoder(s).Encode(resp)
|
||||
resp.Found = len(resp.Records) > 0
|
||||
_ = json.NewEncoder(s).Encode(resp)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// handleGetNatives returns this indexer's configured native addresses,
|
||||
@@ -321,30 +335,38 @@ func (ix *IndexerService) handleNodeGet(s network.Stream) {
|
||||
func (ix *IndexerService) handleGetNatives(s network.Stream) {
|
||||
defer s.Close()
|
||||
logger := oclib.GetLogger()
|
||||
|
||||
var req common.GetIndexerNativesRequest
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
logger.Err(err).Msg("indexer get natives: decode")
|
||||
return
|
||||
}
|
||||
|
||||
excludeSet := make(map[string]struct{}, len(req.Exclude))
|
||||
for _, e := range req.Exclude {
|
||||
excludeSet[e] = struct{}{}
|
||||
}
|
||||
|
||||
resp := common.GetIndexerNativesResponse{}
|
||||
for _, addr := range strings.Split(conf.GetConfig().NativeIndexerAddresses, ",") {
|
||||
addr = strings.TrimSpace(addr)
|
||||
if addr == "" {
|
||||
for {
|
||||
var req common.GetIndexerNativesRequest
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
logger.Err(err).Msg("indexer get natives: decode")
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) ||
|
||||
strings.Contains(err.Error(), "reset") ||
|
||||
strings.Contains(err.Error(), "closed") ||
|
||||
strings.Contains(err.Error(), "too many connections") {
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
if _, excluded := excludeSet[addr]; !excluded {
|
||||
resp.Natives = append(resp.Natives, addr)
|
||||
}
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
||||
logger.Err(err).Msg("indexer get natives: encode response")
|
||||
excludeSet := make(map[string]struct{}, len(req.Exclude))
|
||||
for _, e := range req.Exclude {
|
||||
excludeSet[e] = struct{}{}
|
||||
}
|
||||
|
||||
resp := common.GetIndexerNativesResponse{}
|
||||
for _, addr := range strings.Split(conf.GetConfig().NativeIndexerAddresses, ",") {
|
||||
addr = strings.TrimSpace(addr)
|
||||
if addr == "" {
|
||||
continue
|
||||
}
|
||||
if _, excluded := excludeSet[addr]; !excluded {
|
||||
resp.Natives = append(resp.Natives, addr)
|
||||
}
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
||||
logger.Err(err).Msg("indexer get natives: encode response")
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package indexer
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -158,11 +159,13 @@ func (ix *IndexerService) LookupNameIndex(needle string) map[string]string {
|
||||
ix.nameIndex.indexMu.RLock()
|
||||
defer ix.nameIndex.indexMu.RUnlock()
|
||||
for name, peers := range ix.nameIndex.index {
|
||||
fmt.Println(strings.Contains(strings.ToLower(name), needleLow), needleLow, strings.ToLower(name))
|
||||
if strings.Contains(strings.ToLower(name), needleLow) {
|
||||
for peerID, did := range peers {
|
||||
result[peerID] = did
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Println("RESULT", result)
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"math/rand"
|
||||
"slices"
|
||||
"strings"
|
||||
@@ -15,6 +16,7 @@ import (
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
pubsub "github.com/libp2p/go-libp2p-pubsub"
|
||||
"github.com/libp2p/go-libp2p/core/crypto"
|
||||
"github.com/libp2p/go-libp2p/core/network"
|
||||
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||
)
|
||||
@@ -36,10 +38,15 @@ const (
|
||||
)
|
||||
|
||||
// liveIndexerEntry tracks a registered indexer in the native's in-memory cache and DHT.
|
||||
// PubKey and Signature are forwarded from the IndexerRegistration so the DHT validator
|
||||
// can verify that the entry was produced by the peer owning the declared PeerID.
|
||||
type liveIndexerEntry struct {
|
||||
PeerID string `json:"peer_id"`
|
||||
Addr string `json:"addr"`
|
||||
ExpiresAt time.Time `json:"expires_at"`
|
||||
PeerID string `json:"peer_id"`
|
||||
Addr string `json:"addr"`
|
||||
ExpiresAt time.Time `json:"expires_at"`
|
||||
RegTimestamp int64 `json:"reg_ts,omitempty"` // Timestamp from the original IndexerRegistration
|
||||
PubKey []byte `json:"pub_key,omitempty"`
|
||||
Signature []byte `json:"sig,omitempty"`
|
||||
}
|
||||
|
||||
// NativeState holds runtime state specific to native indexer operation.
|
||||
@@ -53,13 +60,18 @@ type NativeState struct {
|
||||
// including entries written by other natives.
|
||||
knownPeerIDs map[string]string
|
||||
knownMu sync.RWMutex
|
||||
|
||||
// cancel stops background goroutines (runOffloadLoop, refreshIndexersFromDHT)
|
||||
// when the native shuts down.
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
func newNativeState() *NativeState {
|
||||
func newNativeState(cancel context.CancelFunc) *NativeState {
|
||||
return &NativeState{
|
||||
liveIndexers: map[string]*liveIndexerEntry{},
|
||||
responsiblePeers: map[pp.ID]struct{}{},
|
||||
knownPeerIDs: map[string]string{},
|
||||
cancel: cancel,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -77,6 +89,18 @@ func (v IndexerRecordValidator) Validate(_ string, value []byte) error {
|
||||
if e.ExpiresAt.Before(time.Now().UTC()) {
|
||||
return errors.New("expired indexer record")
|
||||
}
|
||||
// Verify self-signature when present — rejects entries forged by a
|
||||
// compromised native that does not control the declared PeerID.
|
||||
if len(e.Signature) > 0 && len(e.PubKey) > 0 {
|
||||
pub, err := crypto.UnmarshalPublicKey(e.PubKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("indexer entry: invalid public key: %w", err)
|
||||
}
|
||||
payload := []byte(fmt.Sprintf("%s|%s|%d", e.PeerID, e.Addr, e.RegTimestamp))
|
||||
if ok, err := pub.Verify(payload, e.Signature); err != nil || !ok {
|
||||
return errors.New("indexer entry: invalid signature")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -99,9 +123,11 @@ func (v IndexerRecordValidator) Select(_ string, values [][]byte) (int, error) {
|
||||
// InitNative registers native-specific stream handlers and starts background loops.
|
||||
// Must be called after DHT is initialized.
|
||||
func (ix *IndexerService) InitNative() {
|
||||
ix.Native = newNativeState()
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
ix.Native = newNativeState(cancel)
|
||||
ix.Host.SetStreamHandler(common.ProtocolHeartbeat, ix.HandleHeartbeat) // specific heartbeat for Indexer.
|
||||
ix.Host.SetStreamHandler(common.ProtocolNativeSubscription, ix.handleNativeSubscription)
|
||||
ix.Host.SetStreamHandler(common.ProtocolNativeUnsubscribe, ix.handleNativeUnsubscribe)
|
||||
ix.Host.SetStreamHandler(common.ProtocolNativeGetIndexers, ix.handleNativeGetIndexers)
|
||||
ix.Host.SetStreamHandler(common.ProtocolNativeConsensus, ix.handleNativeConsensus)
|
||||
ix.Host.SetStreamHandler(common.ProtocolNativeGetPeers, ix.handleNativeGetPeers)
|
||||
@@ -109,8 +135,8 @@ func (ix *IndexerService) InitNative() {
|
||||
ix.subscribeIndexerRegistry()
|
||||
// Ensure long connections to other configured natives (native-to-native mesh).
|
||||
common.EnsureNativePeers(ix.Host)
|
||||
go ix.runOffloadLoop()
|
||||
go ix.refreshIndexersFromDHT()
|
||||
go ix.runOffloadLoop(ctx)
|
||||
go ix.refreshIndexersFromDHT(ctx)
|
||||
}
|
||||
|
||||
// subscribeIndexerRegistry joins the PubSub topic used by natives to gossip newly
|
||||
@@ -118,14 +144,40 @@ func (ix *IndexerService) InitNative() {
|
||||
func (ix *IndexerService) subscribeIndexerRegistry() {
|
||||
logger := oclib.GetLogger()
|
||||
ix.PS.RegisterTopicValidator(common.TopicIndexerRegistry, func(_ context.Context, _ pp.ID, msg *pubsub.Message) bool {
|
||||
// Reject empty or syntactically invalid multiaddrs before they reach the
|
||||
// message loop. A compromised native could otherwise gossip arbitrary data.
|
||||
addr := string(msg.Data)
|
||||
if addr == "" {
|
||||
// Parse as a signed IndexerRegistration.
|
||||
var reg common.IndexerRegistration
|
||||
if err := json.Unmarshal(msg.Data, ®); err != nil {
|
||||
return false
|
||||
}
|
||||
_, err := pp.AddrInfoFromString(addr)
|
||||
return err == nil
|
||||
if reg.Addr == "" {
|
||||
return false
|
||||
}
|
||||
if _, err := pp.AddrInfoFromString(reg.Addr); err != nil {
|
||||
return false
|
||||
}
|
||||
// Verify the self-signature when present (rejects forged gossip from a
|
||||
// compromised native that does not control the announced PeerID).
|
||||
if ok, _ := reg.Verify(); !ok {
|
||||
return false
|
||||
}
|
||||
// Accept only messages from known native peers or from this host itself.
|
||||
// This prevents external PSK participants from injecting registry entries.
|
||||
from := msg.GetFrom()
|
||||
if from == ix.Host.ID() {
|
||||
return true
|
||||
}
|
||||
common.StreamNativeMu.RLock()
|
||||
_, knownNative := common.StaticNatives[from.String()]
|
||||
if !knownNative {
|
||||
for _, ad := range common.StaticNatives {
|
||||
if ad.ID == from {
|
||||
knownNative = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
common.StreamNativeMu.RUnlock()
|
||||
return knownNative
|
||||
})
|
||||
topic, err := ix.PS.Join(common.TopicIndexerRegistry)
|
||||
if err != nil {
|
||||
@@ -147,18 +199,18 @@ func (ix *IndexerService) subscribeIndexerRegistry() {
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
addr := string(msg.Data)
|
||||
if addr == "" {
|
||||
// The gossip payload is a JSON-encoded IndexerRegistration (signed).
|
||||
var gossipReg common.IndexerRegistration
|
||||
if jsonErr := json.Unmarshal(msg.Data, &gossipReg); jsonErr != nil {
|
||||
continue
|
||||
}
|
||||
if peer, err := pp.AddrInfoFromString(addr); err == nil {
|
||||
ix.Native.knownMu.Lock()
|
||||
ix.Native.knownPeerIDs[peer.ID.String()] = addr
|
||||
ix.Native.knownMu.Unlock()
|
||||
|
||||
if gossipReg.Addr == "" || gossipReg.PeerID == "" {
|
||||
continue
|
||||
}
|
||||
// A neighbouring native registered this PeerID; add to known set for DHT refresh.
|
||||
|
||||
ix.Native.knownMu.Lock()
|
||||
ix.Native.knownPeerIDs[gossipReg.PeerID] = gossipReg.Addr
|
||||
ix.Native.knownMu.Unlock()
|
||||
}
|
||||
}()
|
||||
}
|
||||
@@ -171,86 +223,172 @@ func (ix *IndexerService) handleNativeSubscription(s network.Stream) {
|
||||
logger := oclib.GetLogger()
|
||||
|
||||
logger.Info().Msg("Subscription")
|
||||
for {
|
||||
var reg common.IndexerRegistration
|
||||
if err := json.NewDecoder(s).Decode(®); err != nil {
|
||||
logger.Err(err).Msg("native subscription: decode")
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) ||
|
||||
strings.Contains(err.Error(), "reset") ||
|
||||
strings.Contains(err.Error(), "closed") ||
|
||||
strings.Contains(err.Error(), "too many connections") {
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
logger.Info().Msg("Subscription " + reg.Addr)
|
||||
|
||||
if reg.Addr == "" {
|
||||
logger.Error().Msg("native subscription: missing addr")
|
||||
return
|
||||
}
|
||||
if reg.PeerID == "" {
|
||||
ad, err := pp.AddrInfoFromString(reg.Addr)
|
||||
if err != nil {
|
||||
logger.Err(err).Msg("native subscription: invalid addr")
|
||||
return
|
||||
}
|
||||
reg.PeerID = ad.ID.String()
|
||||
}
|
||||
|
||||
// Reject registrations with an invalid self-signature.
|
||||
if ok, err := reg.Verify(); !ok {
|
||||
logger.Warn().Str("peer", reg.PeerID).Err(err).Msg("native subscription: invalid signature, rejecting")
|
||||
return
|
||||
}
|
||||
|
||||
// Build entry with a fresh TTL — must happen before the cache write so the
|
||||
// TTL window is not consumed by DHT retries.
|
||||
entry := &liveIndexerEntry{
|
||||
PeerID: reg.PeerID,
|
||||
Addr: reg.Addr,
|
||||
ExpiresAt: time.Now().UTC().Add(IndexerTTL),
|
||||
RegTimestamp: reg.Timestamp,
|
||||
PubKey: reg.PubKey,
|
||||
Signature: reg.Signature,
|
||||
}
|
||||
|
||||
// Verify that the declared address is actually reachable before admitting
|
||||
// the registration. This async dial runs in the background; the indexer is
|
||||
// tentatively admitted immediately (so heartbeats don't get stuck) but is
|
||||
// evicted from the cache if the dial fails within 5 s.
|
||||
go func(e *liveIndexerEntry) {
|
||||
ad, err := pp.AddrInfoFromString(e.Addr)
|
||||
if err != nil {
|
||||
logger.Warn().Str("addr", e.Addr).Msg("native subscription: invalid addr during validation, rejecting")
|
||||
ix.Native.liveIndexersMu.Lock()
|
||||
if cur := ix.Native.liveIndexers[e.PeerID]; cur == e {
|
||||
delete(ix.Native.liveIndexers, e.PeerID)
|
||||
}
|
||||
ix.Native.liveIndexersMu.Unlock()
|
||||
return
|
||||
}
|
||||
dialCtx, dialCancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer dialCancel()
|
||||
if err := ix.Host.Connect(dialCtx, *ad); err != nil {
|
||||
logger.Warn().Str("addr", e.Addr).Err(err).Msg("native subscription: declared address unreachable, rejecting")
|
||||
ix.Native.liveIndexersMu.Lock()
|
||||
if cur := ix.Native.liveIndexers[e.PeerID]; cur == e {
|
||||
delete(ix.Native.liveIndexers, e.PeerID)
|
||||
}
|
||||
ix.Native.liveIndexersMu.Unlock()
|
||||
}
|
||||
}(entry)
|
||||
|
||||
// Update local cache and known set immediately so concurrent GetIndexers calls
|
||||
// can already see this indexer without waiting for the DHT write to complete.
|
||||
ix.Native.liveIndexersMu.Lock()
|
||||
_, isRenewal := ix.Native.liveIndexers[reg.PeerID]
|
||||
ix.Native.liveIndexers[reg.PeerID] = entry
|
||||
ix.Native.liveIndexersMu.Unlock()
|
||||
|
||||
ix.Native.knownMu.Lock()
|
||||
ix.Native.knownPeerIDs[reg.PeerID] = reg.Addr
|
||||
ix.Native.knownMu.Unlock()
|
||||
|
||||
// Gossip the signed registration to neighbouring natives.
|
||||
// The payload is JSON-encoded so the receiver can verify the self-signature.
|
||||
ix.PubsubMu.RLock()
|
||||
topic := ix.LongLivedPubSubs[common.TopicIndexerRegistry]
|
||||
ix.PubsubMu.RUnlock()
|
||||
if topic != nil {
|
||||
if gossipData, marshalErr := json.Marshal(reg); marshalErr == nil {
|
||||
if err := topic.Publish(context.Background(), gossipData); err != nil {
|
||||
logger.Err(err).Msg("native subscription: registry gossip publish")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if isRenewal {
|
||||
// logger.Debug().Str("peer", reg.PeerID).Msg("native: indexer TTL renewed : " + fmt.Sprintf("%v", len(ix.Native.liveIndexers)))
|
||||
} else {
|
||||
logger.Info().Str("peer", reg.PeerID).Msg("native: indexer registered : " + fmt.Sprintf("%v", len(ix.Native.liveIndexers)))
|
||||
}
|
||||
|
||||
// Persist in DHT asynchronously with bounded retry.
|
||||
// Max retry window = IndexerTTL (90 s) — retrying past entry expiry is pointless.
|
||||
// Backoff: 10 s → 20 s → 40 s, then repeats at 40 s until deadline.
|
||||
key := ix.genIndexerKey(reg.PeerID)
|
||||
data, err := json.Marshal(entry)
|
||||
if err != nil {
|
||||
logger.Err(err).Msg("native subscription: marshal entry")
|
||||
return
|
||||
}
|
||||
go func() {
|
||||
deadline := time.Now().Add(IndexerTTL)
|
||||
backoff := 10 * time.Second
|
||||
for {
|
||||
if time.Now().After(deadline) {
|
||||
logger.Warn().Str("key", key).Msg("native subscription: DHT put abandoned, entry TTL exceeded")
|
||||
return
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
err := ix.DHT.PutValue(ctx, key, data)
|
||||
cancel()
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
logger.Err(err).Msg("native subscription: DHT put " + key)
|
||||
if !strings.Contains(err.Error(), "failed to find any peer in table") {
|
||||
return // non-retryable error
|
||||
}
|
||||
remaining := time.Until(deadline)
|
||||
if backoff > remaining {
|
||||
backoff = remaining
|
||||
}
|
||||
if backoff <= 0 {
|
||||
return
|
||||
}
|
||||
time.Sleep(backoff)
|
||||
if backoff < 40*time.Second {
|
||||
backoff *= 2
|
||||
}
|
||||
}
|
||||
}()
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// handleNativeUnsubscribe removes a departing indexer from the local cache and
|
||||
// known set immediately, without waiting for TTL expiry.
|
||||
func (ix *IndexerService) handleNativeUnsubscribe(s network.Stream) {
|
||||
defer s.Close()
|
||||
logger := oclib.GetLogger()
|
||||
var reg common.IndexerRegistration
|
||||
if err := json.NewDecoder(s).Decode(®); err != nil {
|
||||
logger.Err(err).Msg("native subscription: decode")
|
||||
return
|
||||
}
|
||||
logger.Info().Msg("Subscription " + reg.Addr)
|
||||
|
||||
if reg.Addr == "" {
|
||||
logger.Error().Msg("native subscription: missing addr")
|
||||
logger.Err(err).Msg("native unsubscribe: decode")
|
||||
return
|
||||
}
|
||||
if reg.PeerID == "" {
|
||||
ad, err := pp.AddrInfoFromString(reg.Addr)
|
||||
if err != nil {
|
||||
logger.Err(err).Msg("native subscription: invalid addr")
|
||||
return
|
||||
}
|
||||
reg.PeerID = ad.ID.String()
|
||||
}
|
||||
|
||||
// Build entry with a fresh TTL — must happen before the cache write so the 66s
|
||||
// window is not consumed by DHT retries.
|
||||
entry := &liveIndexerEntry{
|
||||
PeerID: reg.PeerID,
|
||||
Addr: reg.Addr,
|
||||
ExpiresAt: time.Now().UTC().Add(IndexerTTL),
|
||||
}
|
||||
|
||||
// Update local cache and known set immediately so concurrent GetIndexers calls
|
||||
// can already see this indexer without waiting for the DHT write to complete.
|
||||
ix.Native.liveIndexersMu.Lock()
|
||||
_, isRenewal := ix.Native.liveIndexers[reg.PeerID]
|
||||
ix.Native.liveIndexers[reg.PeerID] = entry
|
||||
ix.Native.liveIndexersMu.Unlock()
|
||||
|
||||
ix.Native.knownMu.Lock()
|
||||
ix.Native.knownPeerIDs[reg.PeerID] = reg.Addr
|
||||
ix.Native.knownMu.Unlock()
|
||||
|
||||
// Gossip PeerID to neighbouring natives so they discover it via DHT.
|
||||
ix.PubsubMu.RLock()
|
||||
topic := ix.LongLivedPubSubs[common.TopicIndexerRegistry]
|
||||
ix.PubsubMu.RUnlock()
|
||||
if topic != nil {
|
||||
if err := topic.Publish(context.Background(), []byte(reg.Addr)); err != nil {
|
||||
logger.Err(err).Msg("native subscription: registry gossip publish")
|
||||
}
|
||||
}
|
||||
|
||||
if isRenewal {
|
||||
logger.Debug().Str("peer", reg.PeerID).Msg("native: indexer TTL renewed : " + fmt.Sprintf("%v", len(ix.Native.liveIndexers)))
|
||||
} else {
|
||||
logger.Info().Str("peer", reg.PeerID).Msg("native: indexer registered : " + fmt.Sprintf("%v", len(ix.Native.liveIndexers)))
|
||||
}
|
||||
|
||||
// Persist in DHT asynchronously — retries must not block the handler or consume
|
||||
// the local cache TTL.
|
||||
key := ix.genIndexerKey(reg.PeerID)
|
||||
data, err := json.Marshal(entry)
|
||||
if err != nil {
|
||||
logger.Err(err).Msg("native subscription: marshal entry")
|
||||
logger.Warn().Msg("native unsubscribe: missing peer_id")
|
||||
return
|
||||
}
|
||||
go func() {
|
||||
for {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
if err := ix.DHT.PutValue(ctx, key, data); err != nil {
|
||||
cancel()
|
||||
logger.Err(err).Msg("native subscription: DHT put " + key)
|
||||
if strings.Contains(err.Error(), "failed to find any peer in table") {
|
||||
time.Sleep(10 * time.Second)
|
||||
continue
|
||||
}
|
||||
return
|
||||
}
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
}()
|
||||
ix.Native.liveIndexersMu.Lock()
|
||||
delete(ix.Native.liveIndexers, reg.PeerID)
|
||||
ix.Native.liveIndexersMu.Unlock()
|
||||
ix.Native.knownMu.Lock()
|
||||
delete(ix.Native.knownPeerIDs, reg.PeerID)
|
||||
ix.Native.knownMu.Unlock()
|
||||
logger.Info().Str("peer", reg.PeerID).Msg("native: indexer explicitly unregistered")
|
||||
}
|
||||
|
||||
// handleNativeGetIndexers returns this native's own list of reachable indexers.
|
||||
@@ -260,39 +398,47 @@ func (ix *IndexerService) handleNativeSubscription(s network.Stream) {
|
||||
func (ix *IndexerService) handleNativeGetIndexers(s network.Stream) {
|
||||
defer s.Close()
|
||||
logger := oclib.GetLogger()
|
||||
for {
|
||||
var req common.GetIndexersRequest
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
logger.Err(err).Msg("native get indexers: decode")
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) ||
|
||||
strings.Contains(err.Error(), "reset") ||
|
||||
strings.Contains(err.Error(), "closed") ||
|
||||
strings.Contains(err.Error(), "too many connections") {
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
if req.Count <= 0 {
|
||||
req.Count = 3
|
||||
}
|
||||
callerPeerID := s.Conn().RemotePeer().String()
|
||||
reachable := ix.reachableLiveIndexers(req.Count, callerPeerID)
|
||||
var resp common.GetIndexersResponse
|
||||
|
||||
var req common.GetIndexersRequest
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
logger.Err(err).Msg("native get indexers: decode")
|
||||
return
|
||||
}
|
||||
if req.Count <= 0 {
|
||||
req.Count = 3
|
||||
}
|
||||
callerPeerID := s.Conn().RemotePeer().String()
|
||||
reachable := ix.reachableLiveIndexers(req.Count, callerPeerID)
|
||||
var resp common.GetIndexersResponse
|
||||
|
||||
if len(reachable) == 0 {
|
||||
// No live indexers reachable — try to self-delegate.
|
||||
if ix.selfDelegate(s.Conn().RemotePeer(), &resp) {
|
||||
logger.Info().Str("peer", callerPeerID).Msg("native: no indexers, acting as fallback for node")
|
||||
if len(reachable) == 0 {
|
||||
// No live indexers reachable — try to self-delegate.
|
||||
if ix.selfDelegate(s.Conn().RemotePeer(), &resp) {
|
||||
logger.Info().Str("peer", callerPeerID).Msg("native: no indexers, acting as fallback for node")
|
||||
} else {
|
||||
// Fallback pool saturated: return empty so the caller retries another
|
||||
// native instead of piling more load onto this one.
|
||||
logger.Warn().Str("peer", callerPeerID).Int("pool", maxFallbackPeers).Msg(
|
||||
"native: fallback pool saturated, refusing self-delegation")
|
||||
}
|
||||
} else {
|
||||
// Fallback pool saturated: return empty so the caller retries another
|
||||
// native instead of piling more load onto this one.
|
||||
logger.Warn().Str("peer", callerPeerID).Int("pool", maxFallbackPeers).Msg(
|
||||
"native: fallback pool saturated, refusing self-delegation")
|
||||
rand.Shuffle(len(reachable), func(i, j int) { reachable[i], reachable[j] = reachable[j], reachable[i] })
|
||||
if req.Count > len(reachable) {
|
||||
req.Count = len(reachable)
|
||||
}
|
||||
resp.Indexers = reachable[:req.Count]
|
||||
}
|
||||
} else {
|
||||
rand.Shuffle(len(reachable), func(i, j int) { reachable[i], reachable[j] = reachable[j], reachable[i] })
|
||||
if req.Count > len(reachable) {
|
||||
req.Count = len(reachable)
|
||||
}
|
||||
resp.Indexers = reachable[:req.Count]
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
||||
logger.Err(err).Msg("native get indexers: encode response")
|
||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
||||
logger.Err(err).Msg("native get indexers: encode response")
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
@@ -303,39 +449,47 @@ func (ix *IndexerService) handleNativeGetIndexers(s network.Stream) {
|
||||
func (ix *IndexerService) handleNativeConsensus(s network.Stream) {
|
||||
defer s.Close()
|
||||
logger := oclib.GetLogger()
|
||||
|
||||
var req common.ConsensusRequest
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
logger.Err(err).Msg("native consensus: decode")
|
||||
return
|
||||
}
|
||||
|
||||
myList := ix.reachableLiveIndexers(-1, s.Conn().RemotePeer().String())
|
||||
mySet := make(map[string]struct{}, len(myList))
|
||||
for _, addr := range myList {
|
||||
mySet[addr] = struct{}{}
|
||||
}
|
||||
|
||||
trusted := []string{}
|
||||
candidateSet := make(map[string]struct{}, len(req.Candidates))
|
||||
for _, addr := range req.Candidates {
|
||||
candidateSet[addr] = struct{}{}
|
||||
if _, ok := mySet[addr]; ok {
|
||||
trusted = append(trusted, addr) // candidate we also confirm as reachable
|
||||
for {
|
||||
var req common.ConsensusRequest
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
logger.Err(err).Msg("native consensus: decode")
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) ||
|
||||
strings.Contains(err.Error(), "reset") ||
|
||||
strings.Contains(err.Error(), "closed") ||
|
||||
strings.Contains(err.Error(), "too many connections") {
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Extras we trust but that the requester didn't include → suggestions.
|
||||
suggestions := []string{}
|
||||
for _, addr := range myList {
|
||||
if _, inCandidates := candidateSet[addr]; !inCandidates {
|
||||
suggestions = append(suggestions, addr)
|
||||
myList := ix.reachableLiveIndexers(-1, s.Conn().RemotePeer().String())
|
||||
mySet := make(map[string]struct{}, len(myList))
|
||||
for _, addr := range myList {
|
||||
mySet[addr] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
resp := common.ConsensusResponse{Trusted: trusted, Suggestions: suggestions}
|
||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
||||
logger.Err(err).Msg("native consensus: encode response")
|
||||
trusted := []string{}
|
||||
candidateSet := make(map[string]struct{}, len(req.Candidates))
|
||||
for _, addr := range req.Candidates {
|
||||
candidateSet[addr] = struct{}{}
|
||||
if _, ok := mySet[addr]; ok {
|
||||
trusted = append(trusted, addr) // candidate we also confirm as reachable
|
||||
}
|
||||
}
|
||||
|
||||
// Extras we trust but that the requester didn't include → suggestions.
|
||||
suggestions := []string{}
|
||||
for _, addr := range myList {
|
||||
if _, inCandidates := candidateSet[addr]; !inCandidates {
|
||||
suggestions = append(suggestions, addr)
|
||||
}
|
||||
}
|
||||
|
||||
resp := common.ConsensusResponse{Trusted: trusted, Suggestions: suggestions}
|
||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
||||
logger.Err(err).Msg("native consensus: encode response")
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
@@ -406,11 +560,16 @@ func (ix *IndexerService) reachableLiveIndexers(count int, from ...string) []str
|
||||
// refreshIndexersFromDHT runs in background and queries the shared DHT for every known
|
||||
// indexer PeerID whose local cache entry is missing or expired. This supplements the
|
||||
// local cache with entries written by neighbouring natives.
|
||||
func (ix *IndexerService) refreshIndexersFromDHT() {
|
||||
func (ix *IndexerService) refreshIndexersFromDHT(ctx context.Context) {
|
||||
t := time.NewTicker(dhtRefreshInterval)
|
||||
defer t.Stop()
|
||||
logger := oclib.GetLogger()
|
||||
for range t.C {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-t.C:
|
||||
}
|
||||
ix.Native.knownMu.RLock()
|
||||
peerIDs := make([]string, 0, len(ix.Native.knownPeerIDs))
|
||||
for pid := range ix.Native.knownPeerIDs {
|
||||
@@ -427,10 +586,10 @@ func (ix *IndexerService) refreshIndexersFromDHT() {
|
||||
continue // still fresh in local cache
|
||||
}
|
||||
key := ix.genIndexerKey(pid)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
ch, err := ix.DHT.SearchValue(ctx, key)
|
||||
dhtCtx, dhtCancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
ch, err := ix.DHT.SearchValue(dhtCtx, key)
|
||||
if err != nil {
|
||||
cancel()
|
||||
dhtCancel()
|
||||
continue
|
||||
}
|
||||
var best *liveIndexerEntry
|
||||
@@ -445,7 +604,7 @@ func (ix *IndexerService) refreshIndexersFromDHT() {
|
||||
}
|
||||
}
|
||||
}
|
||||
cancel()
|
||||
dhtCancel()
|
||||
if best != nil {
|
||||
ix.Native.liveIndexersMu.Lock()
|
||||
ix.Native.liveIndexers[best.PeerID] = best
|
||||
@@ -468,11 +627,16 @@ func (ix *IndexerService) genIndexerKey(peerID string) string {
|
||||
|
||||
// runOffloadLoop periodically checks if real indexers are available and releases
|
||||
// responsible peers so they can reconnect to actual indexers on their next attempt.
|
||||
func (ix *IndexerService) runOffloadLoop() {
|
||||
func (ix *IndexerService) runOffloadLoop(ctx context.Context) {
|
||||
t := time.NewTicker(offloadInterval)
|
||||
defer t.Stop()
|
||||
logger := oclib.GetLogger()
|
||||
for range t.C {
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-t.C:
|
||||
}
|
||||
fmt.Println("runOffloadLoop", ix.Native.responsiblePeers)
|
||||
ix.Native.responsibleMu.RLock()
|
||||
count := len(ix.Native.responsiblePeers)
|
||||
@@ -540,38 +704,46 @@ func (ix *IndexerService) runOffloadLoop() {
|
||||
func (ix *IndexerService) handleNativeGetPeers(s network.Stream) {
|
||||
defer s.Close()
|
||||
logger := oclib.GetLogger()
|
||||
|
||||
var req common.GetNativePeersRequest
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
logger.Err(err).Msg("native get peers: decode")
|
||||
return
|
||||
}
|
||||
if req.Count <= 0 {
|
||||
req.Count = 1
|
||||
}
|
||||
|
||||
excludeSet := make(map[string]struct{}, len(req.Exclude))
|
||||
for _, e := range req.Exclude {
|
||||
excludeSet[e] = struct{}{}
|
||||
}
|
||||
|
||||
common.StreamNativeMu.RLock()
|
||||
candidates := make([]string, 0, len(common.StaticNatives))
|
||||
for addr := range common.StaticNatives {
|
||||
if _, excluded := excludeSet[addr]; !excluded {
|
||||
candidates = append(candidates, addr)
|
||||
for {
|
||||
var req common.GetNativePeersRequest
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
logger.Err(err).Msg("native get peers: decode")
|
||||
if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) ||
|
||||
strings.Contains(err.Error(), "reset") ||
|
||||
strings.Contains(err.Error(), "closed") ||
|
||||
strings.Contains(err.Error(), "too many connections") {
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
if req.Count <= 0 {
|
||||
req.Count = 1
|
||||
}
|
||||
}
|
||||
common.StreamNativeMu.RUnlock()
|
||||
|
||||
rand.Shuffle(len(candidates), func(i, j int) { candidates[i], candidates[j] = candidates[j], candidates[i] })
|
||||
if req.Count > len(candidates) {
|
||||
req.Count = len(candidates)
|
||||
}
|
||||
excludeSet := make(map[string]struct{}, len(req.Exclude))
|
||||
for _, e := range req.Exclude {
|
||||
excludeSet[e] = struct{}{}
|
||||
}
|
||||
|
||||
resp := common.GetNativePeersResponse{Peers: candidates[:req.Count]}
|
||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
||||
logger.Err(err).Msg("native get peers: encode response")
|
||||
common.StreamNativeMu.RLock()
|
||||
candidates := make([]string, 0, len(common.StaticNatives))
|
||||
for addr := range common.StaticNatives {
|
||||
if _, excluded := excludeSet[addr]; !excluded {
|
||||
candidates = append(candidates, addr)
|
||||
}
|
||||
}
|
||||
common.StreamNativeMu.RUnlock()
|
||||
|
||||
rand.Shuffle(len(candidates), func(i, j int) { candidates[i], candidates[j] = candidates[j], candidates[i] })
|
||||
if req.Count > len(candidates) {
|
||||
req.Count = len(candidates)
|
||||
}
|
||||
|
||||
resp := common.GetNativePeersResponse{Peers: candidates[:req.Count]}
|
||||
if err := json.NewEncoder(s).Encode(resp); err != nil {
|
||||
logger.Err(err).Msg("native get peers: encode response")
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"oc-discovery/conf"
|
||||
"oc-discovery/daemons/node/common"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
@@ -60,9 +61,19 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int, isNative boo
|
||||
}
|
||||
}
|
||||
|
||||
if ix.DHT, err = dht.New(
|
||||
context.Background(),
|
||||
ix.Host,
|
||||
// Parse bootstrap peers from configured native/indexer addresses so that the
|
||||
// DHT can find its routing table entries even in a fresh deployment.
|
||||
var bootstrapPeers []pp.AddrInfo
|
||||
for _, addrStr := range strings.Split(conf.GetConfig().NativeIndexerAddresses+","+conf.GetConfig().IndexerAddresses, ",") {
|
||||
addrStr = strings.TrimSpace(addrStr)
|
||||
if addrStr == "" {
|
||||
continue
|
||||
}
|
||||
if ad, err := pp.AddrInfoFromString(addrStr); err == nil {
|
||||
bootstrapPeers = append(bootstrapPeers, *ad)
|
||||
}
|
||||
}
|
||||
dhtOpts := []dht.Option{
|
||||
dht.Mode(dht.ModeServer),
|
||||
dht.ProtocolPrefix("oc"), // 🔥 réseau privé
|
||||
dht.Validator(record.NamespacedValidator{
|
||||
@@ -71,7 +82,11 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int, isNative boo
|
||||
"name": DefaultValidator{},
|
||||
"pid": DefaultValidator{},
|
||||
}),
|
||||
); err != nil {
|
||||
}
|
||||
if len(bootstrapPeers) > 0 {
|
||||
dhtOpts = append(dhtOpts, dht.BootstrapPeers(bootstrapPeers...))
|
||||
}
|
||||
if ix.DHT, err = dht.New(context.Background(), ix.Host, dhtOpts...); err != nil {
|
||||
logger.Info().Msg(err.Error())
|
||||
return nil
|
||||
}
|
||||
@@ -90,6 +105,16 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int, isNative boo
|
||||
}
|
||||
|
||||
func (ix *IndexerService) Close() {
|
||||
if ix.Native != nil && ix.Native.cancel != nil {
|
||||
ix.Native.cancel()
|
||||
}
|
||||
// Explicitly deregister from natives on clean shutdown so they evict this
|
||||
// indexer immediately rather than waiting for TTL expiry (~90 s).
|
||||
if !ix.IsNative {
|
||||
if nativeAddrs := conf.GetConfig().NativeIndexerAddresses; nativeAddrs != "" {
|
||||
common.UnregisterFromNative(ix.Host, nativeAddrs)
|
||||
}
|
||||
}
|
||||
ix.DHT.Close()
|
||||
ix.PS.UnregisterTopicValidator(common.TopicPubSubSearch)
|
||||
if ix.nameIndex != nil {
|
||||
|
||||
Reference in New Issue
Block a user