oc-discovery -> conf
This commit is contained in:
@@ -6,6 +6,7 @@ import (
|
||||
"time"
|
||||
|
||||
"oc-discovery/conf"
|
||||
"oc-discovery/daemons/node/common"
|
||||
|
||||
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||
)
|
||||
@@ -13,62 +14,14 @@ import (
|
||||
// ── defaults ──────────────────────────────────────────────────────────────────
|
||||
|
||||
const (
|
||||
defaultMaxConnPerWindow = 20
|
||||
defaultConnWindowSecs = 30
|
||||
defaultMaxHBPerMinute = 5
|
||||
defaultMaxPublishPerMin = 10
|
||||
defaultMaxGetPerMin = 50
|
||||
strikeThreshold = 3
|
||||
banDuration = 10 * time.Minute
|
||||
behaviorWindowDur = 60 * time.Second
|
||||
defaultMaxHBPerMinute = 5
|
||||
defaultMaxPublishPerMin = 10
|
||||
defaultMaxGetPerMin = 50
|
||||
strikeThreshold = 3
|
||||
banDuration = 10 * time.Minute
|
||||
behaviorWindowDur = 60 * time.Second
|
||||
)
|
||||
|
||||
func cfgOr(v, def int) int {
|
||||
if v > 0 {
|
||||
return v
|
||||
}
|
||||
return def
|
||||
}
|
||||
|
||||
// ── ConnectionRateGuard ───────────────────────────────────────────────────────
|
||||
|
||||
// ConnectionRateGuard limits the number of NEW incoming connections accepted
|
||||
// within a sliding time window. It protects public indexers against coordinated
|
||||
// registration floods (Sybil bursts).
|
||||
type ConnectionRateGuard struct {
|
||||
mu sync.Mutex
|
||||
window []time.Time
|
||||
maxInWindow int
|
||||
windowDur time.Duration
|
||||
}
|
||||
|
||||
func newConnectionRateGuard() *ConnectionRateGuard {
|
||||
cfg := conf.GetConfig()
|
||||
return &ConnectionRateGuard{
|
||||
maxInWindow: cfgOr(cfg.MaxConnPerWindow, defaultMaxConnPerWindow),
|
||||
windowDur: time.Duration(cfgOr(cfg.ConnWindowSecs, defaultConnWindowSecs)) * time.Second,
|
||||
}
|
||||
}
|
||||
|
||||
// Allow returns true if a new connection may be accepted.
|
||||
// The internal window is pruned on each call so memory stays bounded.
|
||||
func (g *ConnectionRateGuard) Allow() bool {
|
||||
g.mu.Lock()
|
||||
defer g.mu.Unlock()
|
||||
now := time.Now()
|
||||
cutoff := now.Add(-g.windowDur)
|
||||
i := 0
|
||||
for i < len(g.window) && g.window[i].Before(cutoff) {
|
||||
i++
|
||||
}
|
||||
g.window = g.window[i:]
|
||||
if len(g.window) >= g.maxInWindow {
|
||||
return false
|
||||
}
|
||||
g.window = append(g.window, now)
|
||||
return true
|
||||
}
|
||||
|
||||
// ── per-node state ────────────────────────────────────────────────────────────
|
||||
|
||||
type nodeBehavior struct {
|
||||
@@ -130,9 +83,9 @@ func newNodeBehaviorTracker() *NodeBehaviorTracker {
|
||||
cfg := conf.GetConfig()
|
||||
return &NodeBehaviorTracker{
|
||||
nodes: make(map[pp.ID]*nodeBehavior),
|
||||
maxHB: cfgOr(cfg.MaxHBPerMinute, defaultMaxHBPerMinute),
|
||||
maxPub: cfgOr(cfg.MaxPublishPerMinute, defaultMaxPublishPerMin),
|
||||
maxGet: cfgOr(cfg.MaxGetPerMinute, defaultMaxGetPerMin),
|
||||
maxHB: common.CfgOr(cfg.MaxHBPerMinute, defaultMaxHBPerMinute),
|
||||
maxPub: common.CfgOr(cfg.MaxPublishPerMinute, defaultMaxPublishPerMin),
|
||||
maxGet: common.CfgOr(cfg.MaxGetPerMinute, defaultMaxGetPerMin),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -21,21 +21,37 @@ import (
|
||||
lpp "github.com/libp2p/go-libp2p/core/peer"
|
||||
)
|
||||
|
||||
// DefaultTTLSeconds is the default TTL for peer records when the publisher
|
||||
// does not declare a custom TTL. Exported so the node package can reference it.
|
||||
const DefaultTTLSeconds = 120
|
||||
|
||||
// maxTTLSeconds caps how far in the future a publisher can set their ExpiryDate.
|
||||
const maxTTLSeconds = 86400 // 24h
|
||||
|
||||
// tombstoneTTL is how long a signed delete record stays alive in the DHT —
|
||||
// long enough to propagate everywhere, short enough not to linger forever.
|
||||
const tombstoneTTL = 10 * time.Minute
|
||||
|
||||
type PeerRecordPayload struct {
|
||||
Name string `json:"name"`
|
||||
DID string `json:"did"`
|
||||
PubKey []byte `json:"pub_key"`
|
||||
ExpiryDate time.Time `json:"expiry_date"`
|
||||
// TTLSeconds is the publisher's declared lifetime for this record in seconds.
|
||||
// 0 means "use the default (120 s)". Included in the signed payload so it
|
||||
// cannot be altered by an intermediary.
|
||||
TTLSeconds int `json:"ttl_seconds,omitempty"`
|
||||
}
|
||||
|
||||
type PeerRecord struct {
|
||||
PeerRecordPayload
|
||||
PeerID string `json:"peer_id"`
|
||||
APIUrl string `json:"api_url"`
|
||||
StreamAddress string `json:"stream_address"`
|
||||
NATSAddress string `json:"nats_address"`
|
||||
WalletAddress string `json:"wallet_address"`
|
||||
Signature []byte `json:"signature"`
|
||||
PeerID string `json:"peer_id"`
|
||||
APIUrl string `json:"api_url"`
|
||||
StreamAddress string `json:"stream_address"`
|
||||
NATSAddress string `json:"nats_address"`
|
||||
WalletAddress string `json:"wallet_address"`
|
||||
Location *pp.PeerLocation `json:"location,omitempty"`
|
||||
Signature []byte `json:"signature"`
|
||||
}
|
||||
|
||||
func (p *PeerRecord) Sign() error {
|
||||
@@ -84,6 +100,7 @@ func (pr *PeerRecord) ExtractPeer(ourkey string, key string, pubKey crypto.PubKe
|
||||
StreamAddress: pr.StreamAddress,
|
||||
NATSAddress: pr.NATSAddress,
|
||||
WalletAddress: pr.WalletAddress,
|
||||
Location: pr.Location,
|
||||
}
|
||||
if time.Now().UTC().After(pr.ExpiryDate) {
|
||||
return pp.SELF == p.Relation, nil, errors.New("peer " + key + " is offline")
|
||||
@@ -91,6 +108,42 @@ func (pr *PeerRecord) ExtractPeer(ourkey string, key string, pubKey crypto.PubKe
|
||||
return pp.SELF == p.Relation, p, nil
|
||||
}
|
||||
|
||||
// TombstonePayload is the signed body of a delete request.
|
||||
// Only the owner's private key can produce a valid signature over this payload.
|
||||
type TombstonePayload struct {
|
||||
DID string `json:"did"`
|
||||
PeerID string `json:"peer_id"`
|
||||
DeletedAt time.Time `json:"deleted_at"`
|
||||
}
|
||||
|
||||
// TombstoneRecord is stored in the DHT at /node/{DID} to signal that a peer
|
||||
// has voluntarily left the network. The Tombstone bool field acts as a
|
||||
// discriminator so validators can distinguish it from a live PeerRecord.
|
||||
type TombstoneRecord struct {
|
||||
TombstonePayload
|
||||
PubKey []byte `json:"pub_key"`
|
||||
Tombstone bool `json:"tombstone"`
|
||||
Signature []byte `json:"signature"`
|
||||
}
|
||||
|
||||
func (ts *TombstoneRecord) Verify() (crypto.PubKey, error) {
|
||||
pubKey, err := crypto.UnmarshalPublicKey(ts.PubKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
payload, _ := json.Marshal(ts.TombstonePayload)
|
||||
if ok, _ := pubKey.Verify(payload, ts.Signature); !ok {
|
||||
return nil, errors.New("invalid tombstone signature")
|
||||
}
|
||||
return pubKey, nil
|
||||
}
|
||||
|
||||
// isTombstone returns true if data is a valid, well-formed TombstoneRecord.
|
||||
func isTombstone(data []byte) bool {
|
||||
var ts TombstoneRecord
|
||||
return json.Unmarshal(data, &ts) == nil && ts.Tombstone
|
||||
}
|
||||
|
||||
type GetValue struct {
|
||||
Key string `json:"key"`
|
||||
PeerID string `json:"peer_id,omitempty"`
|
||||
@@ -147,9 +200,9 @@ func (ix *IndexerService) isPeerKnown(pid lpp.ID) bool {
|
||||
return false
|
||||
}
|
||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
_, err = ix.DHT.GetValue(ctx2, ix.genKey(string(did)))
|
||||
val, err := ix.DHT.GetValue(ctx2, ix.genKey(string(did)))
|
||||
cancel2()
|
||||
return err == nil
|
||||
return err == nil && !isTombstone(val)
|
||||
}
|
||||
|
||||
func (ix *IndexerService) initNodeHandler() {
|
||||
@@ -188,6 +241,18 @@ func (ix *IndexerService) initNodeHandler() {
|
||||
logger.Warn().Err(err).Str("did", rec.DID).Msg("indexer: heartbeat record signature invalid")
|
||||
return
|
||||
}
|
||||
// Don't republish if a tombstone was recently stored for this DID:
|
||||
// the peer explicitly left and we must not re-animate their record.
|
||||
ix.deletedDIDsMu.Lock()
|
||||
if t, ok := ix.deletedDIDs[rec.DID]; ok {
|
||||
if time.Since(t) < tombstoneTTL {
|
||||
ix.deletedDIDsMu.Unlock()
|
||||
return
|
||||
}
|
||||
// tombstoneTTL elapsed — peer is allowed to re-register.
|
||||
delete(ix.deletedDIDs, rec.DID)
|
||||
}
|
||||
ix.deletedDIDsMu.Unlock()
|
||||
// Keep StreamRecord.Record in sync so BuildHeartbeatResponse always
|
||||
// sees a populated PeerRecord (Name, DID, etc.) regardless of whether
|
||||
// handleNodePublish ran before or after the heartbeat stream was opened.
|
||||
@@ -220,6 +285,8 @@ func (ix *IndexerService) initNodeHandler() {
|
||||
ix.Host.SetStreamHandler(common.ProtocolHeartbeat, ix.HandleHeartbeat)
|
||||
ix.Host.SetStreamHandler(common.ProtocolPublish, ix.handleNodePublish)
|
||||
ix.Host.SetStreamHandler(common.ProtocolGet, ix.handleNodeGet)
|
||||
ix.Host.SetStreamHandler(common.ProtocolDelete, ix.handleNodeDelete)
|
||||
ix.Host.SetStreamHandler(common.ProtocolIndirectProbe, ix.handleIndirectProbe)
|
||||
ix.Host.SetStreamHandler(common.ProtocolIndexerCandidates, ix.handleCandidateRequest)
|
||||
ix.initSearchHandlers()
|
||||
}
|
||||
@@ -383,12 +450,12 @@ func (ix *IndexerService) handleNodeGet(s network.Stream) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
c, err := ix.DHT.GetValue(ctx, ix.genKey(key))
|
||||
cancel()
|
||||
if err == nil {
|
||||
if err == nil && !isTombstone(c) {
|
||||
var rec PeerRecord
|
||||
if json.Unmarshal(c, &rec) == nil {
|
||||
resp.Records[rec.PeerID] = rec
|
||||
}
|
||||
} else {
|
||||
} else if err != nil {
|
||||
logger.Err(err).Msg("Failed to fetch PeerRecord from DHT " + key)
|
||||
}
|
||||
}
|
||||
@@ -399,3 +466,121 @@ func (ix *IndexerService) handleNodeGet(s network.Stream) {
|
||||
}
|
||||
}
|
||||
|
||||
// handleNodeDelete processes a signed delete (tombstone) request from a peer.
|
||||
// It verifies that the request is:
|
||||
// - marked as a tombstone
|
||||
// - recent (within 5 minutes, preventing replay attacks)
|
||||
// - sent by the actual peer whose record is being deleted (PeerID == remotePeer)
|
||||
// - signed by the matching private key
|
||||
//
|
||||
// On success it stores the tombstone in the DHT, evicts the peer from the local
|
||||
// stream records, and marks the DID in deletedDIDs so AfterHeartbeat cannot
|
||||
// accidentally republish the record during the tombstoneTTL window.
|
||||
func (ix *IndexerService) handleNodeDelete(s network.Stream) {
|
||||
defer s.Close()
|
||||
logger := oclib.GetLogger()
|
||||
remotePeer := s.Conn().RemotePeer()
|
||||
s.SetDeadline(time.Now().Add(10 * time.Second))
|
||||
|
||||
var ts TombstoneRecord
|
||||
if err := json.NewDecoder(s).Decode(&ts); err != nil || !ts.Tombstone {
|
||||
s.Reset()
|
||||
return
|
||||
}
|
||||
if ts.PeerID == "" || ts.DID == "" {
|
||||
s.Reset()
|
||||
return
|
||||
}
|
||||
if time.Since(ts.DeletedAt) > 5*time.Minute {
|
||||
logger.Warn().Str("peer", remotePeer.String()).Msg("[delete] stale tombstone rejected")
|
||||
s.Reset()
|
||||
return
|
||||
}
|
||||
if ts.PeerID != remotePeer.String() {
|
||||
logger.Warn().Str("peer", remotePeer.String()).Msg("[delete] tombstone PeerID mismatch")
|
||||
s.Reset()
|
||||
return
|
||||
}
|
||||
if _, err := ts.Verify(); err != nil {
|
||||
logger.Warn().Err(err).Str("peer", remotePeer.String()).Msg("[delete] invalid tombstone signature")
|
||||
s.Reset()
|
||||
return
|
||||
}
|
||||
|
||||
// Mark DID as deleted in-memory before writing to DHT so AfterHeartbeat
|
||||
// cannot win a race and republish the live record on top of the tombstone.
|
||||
ix.deletedDIDsMu.Lock()
|
||||
ix.deletedDIDs[ts.DID] = ts.DeletedAt
|
||||
ix.deletedDIDsMu.Unlock()
|
||||
|
||||
data, _ := json.Marshal(ts)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
if err := ix.DHT.PutValue(ctx, ix.genKey(ts.DID), data); err != nil {
|
||||
logger.Warn().Err(err).Str("did", ts.DID).Msg("[delete] DHT write tombstone failed")
|
||||
}
|
||||
cancel()
|
||||
|
||||
// Invalidate the /pid/ secondary index so isPeerKnown returns false quickly.
|
||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
if err := ix.DHT.PutValue(ctx2, ix.genPIDKey(ts.PeerID), []byte("")); err != nil {
|
||||
logger.Warn().Err(err).Str("pid", ts.PeerID).Msg("[delete] DHT clear pid failed")
|
||||
}
|
||||
cancel2()
|
||||
|
||||
// Evict from active stream records.
|
||||
if pid, err := lpp.Decode(ts.PeerID); err == nil {
|
||||
ix.StreamMU.Lock()
|
||||
delete(ix.StreamRecords[common.ProtocolHeartbeat], pid)
|
||||
ix.StreamMU.Unlock()
|
||||
}
|
||||
|
||||
logger.Info().Str("did", ts.DID).Str("peer", ts.PeerID).Msg("[delete] tombstone stored, peer evicted")
|
||||
}
|
||||
|
||||
// handleIndirectProbe is the SWIM inter-indexer probe handler.
|
||||
// A node opens this stream toward a live indexer to ask: "can you reach peer X?"
|
||||
// The indexer attempts a ProtocolBandwidthProbe to X and reports back.
|
||||
// This is the only protocol that indexers use to communicate with each other;
|
||||
// no persistent inter-indexer connections are maintained.
|
||||
func (ix *IndexerService) handleIndirectProbe(s network.Stream) {
|
||||
defer s.Close()
|
||||
s.SetDeadline(time.Now().Add(10 * time.Second))
|
||||
|
||||
var req common.IndirectProbeRequest
|
||||
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||
s.Reset()
|
||||
return
|
||||
}
|
||||
|
||||
respond := func(reachable bool, latencyMs int64) {
|
||||
json.NewEncoder(s).Encode(common.IndirectProbeResponse{
|
||||
Reachable: reachable,
|
||||
LatencyMs: latencyMs,
|
||||
})
|
||||
}
|
||||
|
||||
// Connect to target if not already connected.
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second)
|
||||
defer cancel()
|
||||
if ix.Host.Network().Connectedness(req.Target.ID) != network.Connected {
|
||||
if err := ix.Host.Connect(ctx, req.Target); err != nil {
|
||||
respond(false, 0)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Open a bandwidth probe stream — already registered on all nodes/indexers.
|
||||
start := time.Now()
|
||||
ps, err := ix.Host.NewStream(ctx, req.Target.ID, common.ProtocolBandwidthProbe)
|
||||
if err != nil {
|
||||
respond(false, 0)
|
||||
return
|
||||
}
|
||||
defer ps.Reset()
|
||||
ps.SetDeadline(time.Now().Add(3 * time.Second))
|
||||
ps.Write([]byte("ping"))
|
||||
buf := make([]byte, 4)
|
||||
_, err = ps.Read(buf)
|
||||
latency := time.Since(start).Milliseconds()
|
||||
respond(err == nil, latency)
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ import (
|
||||
"oc-discovery/daemons/node/common"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
@@ -30,9 +31,9 @@ type dhtCacheEntry struct {
|
||||
// SuggestMigrate to a small batch at a time; peers that don't migrate within
|
||||
// offloadGracePeriod are moved to alreadyTried so a new batch can be picked.
|
||||
type offloadState struct {
|
||||
inBatch map[pp.ID]time.Time // peer → time added to current batch
|
||||
inBatch map[pp.ID]time.Time // peer → time added to current batch
|
||||
alreadyTried map[pp.ID]struct{} // peers proposed to that didn't migrate
|
||||
mu sync.Mutex
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
const (
|
||||
@@ -64,9 +65,20 @@ type IndexerService struct {
|
||||
pendingSearchesMu sync.Mutex
|
||||
// behavior tracks per-node compliance (heartbeat rate, publish/get volume,
|
||||
// identity consistency, signature failures).
|
||||
behavior *NodeBehaviorTracker
|
||||
behavior *NodeBehaviorTracker
|
||||
// connGuard limits new-connection bursts to protect public indexers.
|
||||
connGuard *ConnectionRateGuard
|
||||
// deletedDIDs tracks recently tombstoned DIDs to prevent AfterHeartbeat
|
||||
// from republishing records that were explicitly deleted by the peer.
|
||||
// Entries are cleared automatically after tombstoneTTL.
|
||||
deletedDIDs map[string]time.Time
|
||||
deletedDIDsMu sync.RWMutex
|
||||
// SWIM incarnation: incremented when a connecting node signals suspicion via
|
||||
// SuspectedIncarnation. The new value is broadcast back so nodes can clear
|
||||
// their suspect state (refutation mechanism).
|
||||
incarnation atomic.Uint64
|
||||
// eventQueue holds SWIM membership events to be piggybacked on responses
|
||||
// (infection-style dissemination toward connected nodes).
|
||||
eventQueue *common.MembershipEventQueue
|
||||
}
|
||||
|
||||
// NewIndexerService creates an IndexerService.
|
||||
@@ -81,7 +93,8 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
|
||||
referencedNodes: map[pp.ID]PeerRecord{},
|
||||
pendingSearches: map[string]chan []common.SearchHit{},
|
||||
behavior: newNodeBehaviorTracker(),
|
||||
connGuard: newConnectionRateGuard(),
|
||||
deletedDIDs: make(map[string]time.Time),
|
||||
eventQueue: &common.MembershipEventQueue{},
|
||||
}
|
||||
if ps == nil {
|
||||
ps, err = pubsub.NewGossipSub(context.Background(), ix.Host)
|
||||
@@ -96,6 +109,21 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
|
||||
common.ConnectToIndexers(h, conf.GetConfig().MinIndexer, conf.GetConfig().MaxIndexer*2)
|
||||
logger.Info().Msg("subscribe to decentralized search flow as strict indexer...")
|
||||
go ix.SubscribeToSearch(ix.PS, nil)
|
||||
ix.AllowInbound = func(remotePeer pp.ID, isNew bool) error {
|
||||
/*if ix.behavior.IsBanned(remotePeer) {
|
||||
return errors.New("peer is banned")
|
||||
}*/
|
||||
if isNew {
|
||||
// DB blacklist check: blocks reconnection after EvictPeer + blacklist.
|
||||
/*if !ix.isPeerKnown(remotePeer) {
|
||||
return errors.New("peer is blacklisted or unknown")
|
||||
}*/
|
||||
if !ix.ConnGuard.Allow() {
|
||||
return errors.New("connection rate limit exceeded, retry later")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
ix.LongLivedStreamRecordedService.AfterDelete = func(pid pp.ID, name, did string) {
|
||||
@@ -106,16 +134,7 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
|
||||
|
||||
// AllowInbound: fired once per stream open, before any heartbeat is decoded.
|
||||
// 1. Reject peers that are currently banned (behavioral strikes).
|
||||
// 2. For genuinely new connections, apply the burst guard.
|
||||
ix.AllowInbound = func(remotePeer pp.ID, isNew bool) error {
|
||||
if ix.behavior.IsBanned(remotePeer) {
|
||||
return errors.New("peer is banned")
|
||||
}
|
||||
if isNew && !ix.connGuard.Allow() {
|
||||
return errors.New("connection rate limit exceeded, retry later")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
// 2. For genuinely new connections, check the DB blacklist and apply the burst guard.
|
||||
|
||||
// ValidateHeartbeat: fired on every heartbeat tick for an established stream.
|
||||
// Checks heartbeat cadence — rejects if the node is sending too fast.
|
||||
@@ -162,7 +181,11 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
|
||||
|
||||
// Build and send a HeartbeatResponse after each received node heartbeat.
|
||||
// Raw metrics only — no pre-cooked score. Node computes the score itself.
|
||||
ix.BuildHeartbeatResponse = func(remotePeer pp.ID, need int, challenges []string, challengeDID string, referent bool, rawRecord json.RawMessage) *common.HeartbeatResponse {
|
||||
ix.BuildHeartbeatResponse = func(remotePeer pp.ID, hb *common.Heartbeat) *common.HeartbeatResponse {
|
||||
logger := oclib.GetLogger()
|
||||
need, challenges, challengeDID, referent, rawRecord :=
|
||||
hb.Need, hb.Challenges, hb.ChallengeDID, hb.Referent, hb.Record
|
||||
|
||||
ix.StreamMU.RLock()
|
||||
peerCount := len(ix.StreamRecords[common.ProtocolHeartbeat])
|
||||
// Collect lastSeen per active peer for challenge responses.
|
||||
@@ -197,6 +220,31 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
|
||||
// Update referent designation: node marks its best-scored indexer with Referent=true.
|
||||
ix.updateReferent(remotePeer, remotePeerRecord, referent)
|
||||
|
||||
// SWIM refutation: if the node signals our current incarnation as suspected,
|
||||
// increment it and broadcast an alive event so other nodes can clear suspicion.
|
||||
inc := ix.incarnation.Load()
|
||||
if hb.SuspectedIncarnation != nil && *hb.SuspectedIncarnation == inc {
|
||||
inc = ix.incarnation.Add(1)
|
||||
logger.Info().
|
||||
Str("suspected_by", remotePeer.String()).
|
||||
Uint64("new_incarnation", inc).
|
||||
Msg("[swim] refuting suspicion — incarnation incremented")
|
||||
ix.eventQueue.Add(common.MemberEvent{
|
||||
Type: common.MemberAlive,
|
||||
PeerID: ix.Host.ID().String(),
|
||||
Incarnation: inc,
|
||||
HopsLeft: common.InitialEventHops,
|
||||
})
|
||||
}
|
||||
|
||||
// Relay incoming SWIM events from the node into our event queue so they
|
||||
// propagate to other connected nodes (infection-style forwarding).
|
||||
for _, ev := range hb.MembershipEvents {
|
||||
if ev.HopsLeft > 0 {
|
||||
ix.eventQueue.Add(ev)
|
||||
}
|
||||
}
|
||||
|
||||
maxN := ix.MaxNodesConn()
|
||||
fillRate := 0.0
|
||||
if maxN > 0 {
|
||||
@@ -356,6 +404,10 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
|
||||
}()
|
||||
}
|
||||
|
||||
// Attach SWIM incarnation and piggybacked membership events.
|
||||
resp.Incarnation = ix.incarnation.Load()
|
||||
resp.MembershipEvents = ix.eventQueue.Drain(5)
|
||||
|
||||
return resp
|
||||
}
|
||||
|
||||
@@ -489,6 +541,23 @@ func (ix *IndexerService) startDHTProvide(fillRateFn func() float64) {
|
||||
}()
|
||||
}
|
||||
|
||||
// EvictPeer immediately closes the heartbeat stream of a peer and removes it
|
||||
// from the active stream records. Used when a peer is auto-blacklisted.
|
||||
func (ix *IndexerService) EvictPeer(peerID string) {
|
||||
pid, err := pp.Decode(peerID)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
ix.StreamMU.Lock()
|
||||
defer ix.StreamMU.Unlock()
|
||||
if rec, ok := ix.StreamRecords[common.ProtocolHeartbeat][pid]; ok {
|
||||
if rec.HeartbeatStream != nil && rec.HeartbeatStream.Stream != nil {
|
||||
rec.HeartbeatStream.Stream.Reset()
|
||||
}
|
||||
delete(ix.StreamRecords[common.ProtocolHeartbeat], pid)
|
||||
}
|
||||
}
|
||||
|
||||
func (ix *IndexerService) Close() {
|
||||
if ix.dhtProvideCancel != nil {
|
||||
ix.dhtProvideCancel()
|
||||
|
||||
@@ -19,6 +19,21 @@ func (v DefaultValidator) Select(key string, values [][]byte) (int, error) {
|
||||
type PeerRecordValidator struct{}
|
||||
|
||||
func (v PeerRecordValidator) Validate(key string, value []byte) error {
|
||||
// Accept valid tombstones — deletion must be storable so it can propagate
|
||||
// and win over stale live records on other DHT nodes via Select().
|
||||
var ts TombstoneRecord
|
||||
if err := json.Unmarshal(value, &ts); err == nil && ts.Tombstone {
|
||||
if ts.PeerID == "" || ts.DID == "" {
|
||||
return errors.New("tombstone: missing fields")
|
||||
}
|
||||
if time.Since(ts.DeletedAt) > tombstoneTTL {
|
||||
return errors.New("tombstone: expired")
|
||||
}
|
||||
if _, err := ts.Verify(); err != nil {
|
||||
return errors.New("tombstone: " + err.Error())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var rec PeerRecord
|
||||
if err := json.Unmarshal(value, &rec); err != nil {
|
||||
@@ -35,6 +50,12 @@ func (v PeerRecordValidator) Validate(key string, value []byte) error {
|
||||
return errors.New("record expired")
|
||||
}
|
||||
|
||||
// TTL cap: publisher cannot set an expiry further than maxTTLSeconds in
|
||||
// the future. Prevents abuse (e.g. records designed to linger for years).
|
||||
if rec.ExpiryDate.After(time.Now().UTC().Add(maxTTLSeconds * time.Second)) {
|
||||
return errors.New("TTL exceeds maximum allowed")
|
||||
}
|
||||
|
||||
// Signature verification
|
||||
if _, err := rec.Verify(); err != nil {
|
||||
return errors.New("invalid signature")
|
||||
@@ -44,6 +65,14 @@ func (v PeerRecordValidator) Validate(key string, value []byte) error {
|
||||
}
|
||||
|
||||
func (v PeerRecordValidator) Select(key string, values [][]byte) (int, error) {
|
||||
// Tombstone always wins: a signed delete supersedes any live record,
|
||||
// even if the live record has a later ExpiryDate.
|
||||
for i, val := range values {
|
||||
var ts TombstoneRecord
|
||||
if err := json.Unmarshal(val, &ts); err == nil && ts.Tombstone {
|
||||
return i, nil
|
||||
}
|
||||
}
|
||||
|
||||
var newest time.Time
|
||||
index := 0
|
||||
|
||||
Reference in New Issue
Block a user