oc-discovery -> conf
This commit is contained in:
@@ -13,6 +13,26 @@ import (
|
||||
oclib "cloud.o-forge.io/core/oc-lib"
|
||||
)
|
||||
|
||||
// MemberEventType is the SWIM membership event classification.
|
||||
type MemberEventType string
|
||||
|
||||
const (
|
||||
MemberAlive MemberEventType = "alive"
|
||||
MemberSuspect MemberEventType = "suspect"
|
||||
MemberDead MemberEventType = "dead"
|
||||
)
|
||||
|
||||
// MemberEvent is a SWIM membership event piggybacked on heartbeats (infection-style).
|
||||
// HopsLeft starts at InitialEventHops and is decremented on each retransmission.
|
||||
// Receivers discard events whose HopsLeft reaches 0 instead of forwarding them further.
|
||||
// Deduplication by (PeerID, Incarnation): higher incarnation or higher-priority type wins.
|
||||
type MemberEvent struct {
|
||||
Type MemberEventType `json:"type"`
|
||||
PeerID string `json:"peer_id"`
|
||||
Incarnation uint64 `json:"incarnation"`
|
||||
HopsLeft int `json:"hops_left"`
|
||||
}
|
||||
|
||||
type Heartbeat struct {
|
||||
Name string `json:"name"`
|
||||
Stream *Stream `json:"stream"`
|
||||
@@ -39,6 +59,13 @@ type Heartbeat struct {
|
||||
// Only one indexer per node receives Referent=true at a time (the best-scored one).
|
||||
// The indexer stores the node in its referencedNodes for distributed search.
|
||||
Referent bool `json:"referent,omitempty"`
|
||||
// SuspectedIncarnation is set when this node currently suspects the target indexer.
|
||||
// If the value matches the indexer's own incarnation, the indexer increments its
|
||||
// incarnation and replies with the new value — this is the SWIM refutation signal.
|
||||
SuspectedIncarnation *uint64 `json:"suspected_incarnation,omitempty"`
|
||||
// MembershipEvents carries SWIM events piggybacked on this heartbeat.
|
||||
// Events are forwarded infection-style until HopsLeft reaches 0.
|
||||
MembershipEvents []MemberEvent `json:"membership_events,omitempty"`
|
||||
}
|
||||
|
||||
// SearchPeerRequest is sent by a node to an indexer via ProtocolSearchPeer.
|
||||
@@ -104,6 +131,13 @@ type HeartbeatResponse struct {
|
||||
// Seeds: node de-stickies this indexer once it has MinIndexer non-seed alternatives.
|
||||
// Non-seeds: node removes this indexer immediately if it has enough alternatives.
|
||||
SuggestMigrate bool `json:"suggest_migrate,omitempty"`
|
||||
// Incarnation is this indexer's current SWIM incarnation number.
|
||||
// It is incremented whenever the indexer refutes a suspicion signal.
|
||||
// The node tracks this to detect explicit refutations and to clear suspect state.
|
||||
Incarnation uint64 `json:"incarnation,omitempty"`
|
||||
// MembershipEvents carries SWIM events piggybacked on this response.
|
||||
// The node should forward them to its other indexers (infection-style).
|
||||
MembershipEvents []MemberEvent `json:"membership_events,omitempty"`
|
||||
}
|
||||
|
||||
// ComputeIndexerScore computes a composite quality score [0, 100] for the connecting peer.
|
||||
|
||||
@@ -24,6 +24,11 @@ var TimeWatcher time.Time
|
||||
// retryRunning guards against launching multiple retryUntilSeedResponds goroutines.
|
||||
var retryRunning atomic.Bool
|
||||
|
||||
// suspectTimeout is the maximum time a peer can stay in suspect state before
|
||||
// being declared dead and evicted. Aligned with 3 heartbeat intervals so the
|
||||
// peer has at least 3 chances to respond or refute the suspicion signal.
|
||||
const suspectTimeout = 3 * RecommendedHeartbeatInterval
|
||||
|
||||
func ConnectToIndexers(h host.Host, minIndexer int, maxIndexer int, recordFn ...func() json.RawMessage) error {
|
||||
TimeWatcher = time.Now().UTC()
|
||||
logger := oclib.GetLogger()
|
||||
@@ -304,6 +309,11 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
|
||||
if recFn != nil {
|
||||
baseHB.Record = recFn()
|
||||
}
|
||||
// Piggyback SWIM membership events on every outgoing heartbeat batch.
|
||||
// All peers in the pool receive the same events this tick.
|
||||
if isIndexerHB {
|
||||
baseHB.MembershipEvents = NodeEventQueue.Drain(5)
|
||||
}
|
||||
// Determine the referent indexer: highest-scored one receives Referent=true
|
||||
// so it stores us in its referencedNodes for distributed search.
|
||||
var referentAddr string
|
||||
@@ -323,6 +333,13 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
|
||||
if isIndexerHB && referentAddr != "" && ai.Addr == referentAddr {
|
||||
hb.Referent = true
|
||||
}
|
||||
// SWIM: signal suspicion so the peer can refute by incrementing incarnation.
|
||||
if isIndexerHB {
|
||||
if score := directory.GetScore(ai.Addr); score != nil && !score.UptimeTracker.SuspectedAt.IsZero() {
|
||||
inc := score.UptimeTracker.LastKnownIncarnation
|
||||
hb.SuspectedIncarnation = &inc
|
||||
}
|
||||
}
|
||||
// Ensure an IndexerScore entry exists for this peer.
|
||||
var score *Score
|
||||
if isIndexerHB {
|
||||
@@ -378,6 +395,40 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
|
||||
score.UptimeTracker.RecordHeartbeat()
|
||||
score.UptimeTracker.ConsecutiveFails = 0 // reset on success
|
||||
|
||||
// SWIM: clear suspect state on any successful direct heartbeat.
|
||||
// The peer proved it is reachable; if it also incremented its incarnation
|
||||
// that is an explicit refutation — log it distinctly.
|
||||
if !score.UptimeTracker.SuspectedAt.IsZero() {
|
||||
wasExplicitRefutation := resp != nil &&
|
||||
resp.Incarnation > 0 &&
|
||||
resp.Incarnation > score.UptimeTracker.LastKnownIncarnation
|
||||
if wasExplicitRefutation {
|
||||
logger.Info().Str("peer", ai.Info.ID.String()).
|
||||
Uint64("old_incarnation", score.UptimeTracker.LastKnownIncarnation).
|
||||
Uint64("new_incarnation", resp.Incarnation).
|
||||
Msg("[swim] explicit refutation: incarnation incremented, suspicion cleared")
|
||||
} else {
|
||||
logger.Info().Str("peer", ai.Info.ID.String()).
|
||||
Msg("[swim] suspect cleared — peer responded to direct probe")
|
||||
}
|
||||
score.UptimeTracker.SuspectedAt = time.Time{}
|
||||
// Propagate alive event so other nodes can clear their own suspect state.
|
||||
inc := score.UptimeTracker.LastKnownIncarnation
|
||||
if resp != nil && resp.Incarnation > 0 {
|
||||
inc = resp.Incarnation
|
||||
}
|
||||
NodeEventQueue.Add(MemberEvent{
|
||||
Type: MemberAlive,
|
||||
PeerID: ai.Info.ID.String(),
|
||||
Incarnation: inc,
|
||||
HopsLeft: InitialEventHops,
|
||||
})
|
||||
}
|
||||
// Always update last known incarnation.
|
||||
if resp != nil && resp.Incarnation > score.UptimeTracker.LastKnownIncarnation {
|
||||
score.UptimeTracker.LastKnownIncarnation = resp.Incarnation
|
||||
}
|
||||
|
||||
maxRTT := BaseRoundTrip * 10
|
||||
latencyScore := 1.0 - float64(rtt)/float64(maxRTT)
|
||||
if latencyScore < 0 {
|
||||
@@ -458,6 +509,15 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
|
||||
score.witnessConsistent++
|
||||
}
|
||||
}
|
||||
|
||||
// SWIM infection: process membership events piggybacked on this response.
|
||||
// Events with HopsLeft > 0 are re-queued for forwarding to other indexers.
|
||||
for _, ev := range resp.MembershipEvents {
|
||||
if ev.HopsLeft > 0 {
|
||||
NodeEventQueue.Add(ev)
|
||||
}
|
||||
applyMemberEvent(ev, directory)
|
||||
}
|
||||
}
|
||||
|
||||
score.Score = score.ComputeNodeSideScore(latencyScore)
|
||||
@@ -530,6 +590,59 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
|
||||
}()
|
||||
}
|
||||
|
||||
// runIndirectProbe asks up to k live indexers (voters) to probe target via
|
||||
// ProtocolBandwidthProbe and returns true if the majority report reachable.
|
||||
// This is the SWIM explicit indirect ping — called only on heartbeat failure.
|
||||
func runIndirectProbe(h host.Host, target pp.AddrInfo, voters []Entry, k int) bool {
|
||||
if k > len(voters) {
|
||||
k = len(voters)
|
||||
}
|
||||
if k == 0 {
|
||||
return false
|
||||
}
|
||||
shuffled := make([]Entry, len(voters))
|
||||
copy(shuffled, voters)
|
||||
rand.Shuffle(len(shuffled), func(i, j int) { shuffled[i], shuffled[j] = shuffled[j], shuffled[i] })
|
||||
shuffled = shuffled[:k]
|
||||
|
||||
type result struct{ reachable bool }
|
||||
ch := make(chan result, k)
|
||||
for _, voter := range shuffled {
|
||||
if voter.Info == nil {
|
||||
ch <- result{false}
|
||||
continue
|
||||
}
|
||||
go func(v pp.AddrInfo) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 8*time.Second)
|
||||
defer cancel()
|
||||
s, err := h.NewStream(ctx, v.ID, ProtocolIndirectProbe)
|
||||
if err != nil {
|
||||
ch <- result{false}
|
||||
return
|
||||
}
|
||||
s.SetDeadline(time.Now().Add(8 * time.Second))
|
||||
defer s.Close()
|
||||
if err := json.NewEncoder(s).Encode(IndirectProbeRequest{Target: target}); err != nil {
|
||||
ch <- result{false}
|
||||
return
|
||||
}
|
||||
var resp IndirectProbeResponse
|
||||
if err := json.NewDecoder(s).Decode(&resp); err != nil {
|
||||
ch <- result{false}
|
||||
return
|
||||
}
|
||||
ch <- result{resp.Reachable}
|
||||
}(*voter.Info)
|
||||
}
|
||||
reachable := 0
|
||||
for range k {
|
||||
if (<-ch).reachable {
|
||||
reachable++
|
||||
}
|
||||
}
|
||||
return reachable > k/2
|
||||
}
|
||||
|
||||
func HeartbeatFailure(h host.Host, proto protocol.ID, directory *Directory,
|
||||
addr string, info *pp.AddrInfo, isIndexerHB bool, maxPool int, err error) {
|
||||
logger := oclib.GetLogger()
|
||||
@@ -545,22 +658,96 @@ func HeartbeatFailure(h host.Host, proto protocol.ID, directory *Directory,
|
||||
Msg("[pool] seed heartbeat failed — keeping in pool, ticker will retry " + err.Error())
|
||||
return
|
||||
}
|
||||
// Indirect probing via other alive indexers:
|
||||
// If other indexers in the pool are still responding, they act as implicit
|
||||
// third-party witnesses confirming our connectivity is fine — the failed
|
||||
// indexer is genuinely dead, evict immediately.
|
||||
// If this is the last indexer, there is no third party. Retry up to 3 times
|
||||
// (consecutive failures tracked in UptimeTracker) before declaring it dead.
|
||||
if len(directory.GetAddrs()) <= 1 {
|
||||
score.UptimeTracker.ConsecutiveFails++
|
||||
if score.UptimeTracker.ConsecutiveFails < 3 {
|
||||
|
||||
voters := directory.GetAddrs()
|
||||
if len(voters) <= 1 {
|
||||
// Last indexer: no peer available to proxy a probe.
|
||||
// Enter suspect state on first failure; evict only after suspectTimeout.
|
||||
if score.UptimeTracker.SuspectedAt.IsZero() {
|
||||
score.UptimeTracker.SuspectedAt = time.Now().UTC()
|
||||
score.UptimeTracker.ConsecutiveFails++
|
||||
NodeEventQueue.Add(MemberEvent{
|
||||
Type: MemberSuspect,
|
||||
PeerID: info.ID.String(),
|
||||
Incarnation: score.UptimeTracker.LastKnownIncarnation,
|
||||
HopsLeft: InitialEventHops,
|
||||
})
|
||||
logger.Warn().Str("peer", info.ID.String()).
|
||||
Int("attempt", score.UptimeTracker.ConsecutiveFails).
|
||||
Msg("[indirect] last indexer failed, retrying before eviction")
|
||||
Msg("[swim] last indexer suspect — waiting for refutation or timeout")
|
||||
return
|
||||
}
|
||||
if time.Since(score.UptimeTracker.SuspectedAt) < suspectTimeout {
|
||||
logger.Warn().Str("peer", info.ID.String()).
|
||||
Dur("suspected_for", time.Since(score.UptimeTracker.SuspectedAt)).
|
||||
Msg("[swim] last indexer still failing, holding in suspect state")
|
||||
return
|
||||
}
|
||||
// suspectTimeout exceeded with no refutation — declare dead.
|
||||
logger.Warn().Str("peer", info.ID.String()).
|
||||
Msg("[indirect] last indexer failed 3 times consecutively, evicting")
|
||||
Msg("[swim] last indexer suspect timeout exceeded, evicting")
|
||||
NodeEventQueue.Add(MemberEvent{
|
||||
Type: MemberDead,
|
||||
PeerID: info.ID.String(),
|
||||
Incarnation: score.UptimeTracker.LastKnownIncarnation,
|
||||
HopsLeft: InitialEventHops,
|
||||
})
|
||||
} else if score.UptimeTracker.SuspectedAt.IsZero() {
|
||||
// First miss with other live indexers available:
|
||||
// enter suspect state and run an indirect probe asynchronously.
|
||||
score.UptimeTracker.SuspectedAt = time.Now().UTC()
|
||||
score.UptimeTracker.ConsecutiveFails++
|
||||
NodeEventQueue.Add(MemberEvent{
|
||||
Type: MemberSuspect,
|
||||
PeerID: info.ID.String(),
|
||||
Incarnation: score.UptimeTracker.LastKnownIncarnation,
|
||||
HopsLeft: InitialEventHops,
|
||||
})
|
||||
probeTarget := *info
|
||||
go func() {
|
||||
alive := runIndirectProbe(h, probeTarget, voters, 2)
|
||||
if alive {
|
||||
// Other indexers confirm the target is reachable → our direct
|
||||
// link may be temporarily broken. Keep suspected; the next
|
||||
// heartbeat tick will retry the direct probe.
|
||||
logger.Warn().Str("peer", probeTarget.ID.String()).
|
||||
Msg("[swim] indirect probe: target reachable by peers, keeping (suspected)")
|
||||
} else {
|
||||
// Majority of probes also failed → the indexer is genuinely dead.
|
||||
logger.Warn().Str("peer", probeTarget.ID.String()).
|
||||
Msg("[swim] indirect probe: target unreachable, evicting")
|
||||
NodeEventQueue.Add(MemberEvent{
|
||||
Type: MemberDead,
|
||||
PeerID: probeTarget.ID.String(),
|
||||
Incarnation: score.UptimeTracker.LastKnownIncarnation,
|
||||
HopsLeft: InitialEventHops,
|
||||
})
|
||||
consensusVoters := evictPeer(directory, addr, probeTarget.ID, proto)
|
||||
need := max(maxPool-len(consensusVoters), 1)
|
||||
if len(consensusVoters) > 0 {
|
||||
TriggerConsensus(h, consensusVoters, need)
|
||||
} else {
|
||||
replenishIndexersFromDHT(h, need)
|
||||
}
|
||||
}
|
||||
}()
|
||||
return // decision deferred to probe goroutine
|
||||
} else if time.Since(score.UptimeTracker.SuspectedAt) < suspectTimeout {
|
||||
// Still within suspect window — the next tick's SuspectedIncarnation
|
||||
// in the heartbeat may trigger a refutation. Keep retrying.
|
||||
logger.Warn().Str("peer", info.ID.String()).
|
||||
Dur("suspected_for", time.Since(score.UptimeTracker.SuspectedAt)).
|
||||
Msg("[swim] suspected peer still failing, waiting for refutation or timeout")
|
||||
return
|
||||
} else {
|
||||
// suspectTimeout exceeded — declare dead and fall through to eviction.
|
||||
logger.Warn().Str("peer", info.ID.String()).
|
||||
Msg("[swim] suspect timeout exceeded, evicting")
|
||||
NodeEventQueue.Add(MemberEvent{
|
||||
Type: MemberDead,
|
||||
PeerID: info.ID.String(),
|
||||
Incarnation: score.UptimeTracker.LastKnownIncarnation,
|
||||
HopsLeft: InitialEventHops,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -587,3 +774,34 @@ func HeartbeatFailure(h host.Host, proto protocol.ID, directory *Directory,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// applyMemberEvent applies an incoming SWIM membership event to the local directory.
|
||||
// Only MemberAlive events with a higher incarnation can clear an existing suspect state;
|
||||
// MemberSuspect / MemberDead from gossip are informational — we do not act on them
|
||||
// unilaterally since the node has its own direct-probe evidence.
|
||||
func applyMemberEvent(ev MemberEvent, directory *Directory) {
|
||||
if ev.Type != MemberAlive {
|
||||
return
|
||||
}
|
||||
logger := oclib.GetLogger()
|
||||
for _, ai := range directory.GetAddrs() {
|
||||
if ai.Info == nil || ai.Info.ID.String() != ev.PeerID {
|
||||
continue
|
||||
}
|
||||
score := directory.GetScore(ai.Addr)
|
||||
if score == nil || score.UptimeTracker == nil {
|
||||
return
|
||||
}
|
||||
if ev.Incarnation > score.UptimeTracker.LastKnownIncarnation {
|
||||
score.UptimeTracker.LastKnownIncarnation = ev.Incarnation
|
||||
if !score.UptimeTracker.SuspectedAt.IsZero() {
|
||||
score.UptimeTracker.SuspectedAt = time.Time{}
|
||||
score.UptimeTracker.ConsecutiveFails = 0
|
||||
logger.Info().Str("peer", ev.PeerID).
|
||||
Uint64("incarnation", ev.Incarnation).
|
||||
Msg("[swim] alive event via gossip cleared suspicion")
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
@@ -146,6 +146,22 @@ func (s *LongLivedPubSubService) SubscribeToSearch(ps *pubsub.PubSub, f *func(co
|
||||
if f != nil {
|
||||
return SubscribeEvents(s, context.Background(), TopicPubSubSearch, -1, *f)
|
||||
}
|
||||
// Even when no handler is needed (e.g. strict indexers), we must call
|
||||
// topic.Subscribe() so that this peer sends a SUBSCRIBE control message
|
||||
// to connected peers and joins the GossipSub mesh as a forwarder.
|
||||
// Without this, messages cannot be relayed through indexers between nodes.
|
||||
topic := s.LongLivedPubSubs[TopicPubSubSearch]
|
||||
sub, err := topic.Subscribe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
go func() {
|
||||
for {
|
||||
if _, err := sub.Next(context.Background()); err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -161,26 +177,27 @@ func SubscribeEvents[T interface{}](s *LongLivedPubSubService,
|
||||
return err
|
||||
}
|
||||
// launch loop waiting for results.
|
||||
go waitResults(s, ctx, sub, proto, timeout, f)
|
||||
go waitResults(topic, s, ctx, sub, proto, timeout, f)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func waitResults[T interface{}](s *LongLivedPubSubService, ctx context.Context, sub *pubsub.Subscription, proto string, timeout int, f func(context.Context, T, string)) {
|
||||
func waitResults[T interface{}](topic *pubsub.Topic, s *LongLivedPubSubService, ctx context.Context, sub *pubsub.Subscription, proto string, timeout int, f func(context.Context, T, string)) {
|
||||
defer ctx.Done()
|
||||
for {
|
||||
s.PubsubMu.Lock() // check safely if cache is actually notified subscribed to topic
|
||||
if s.LongLivedPubSubs[proto] == nil { // if not kill the loop.
|
||||
s.PubsubMu.Unlock()
|
||||
break
|
||||
s.LongLivedPubSubs[proto] = topic
|
||||
}
|
||||
s.PubsubMu.Unlock()
|
||||
|
||||
// if still subscribed -> wait for new message
|
||||
var cancel context.CancelFunc
|
||||
if timeout != -1 {
|
||||
ctx, cancel = context.WithTimeout(ctx, time.Duration(timeout)*time.Second)
|
||||
defer cancel()
|
||||
}
|
||||
|
||||
msg, err := sub.Next(ctx)
|
||||
if err != nil {
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
@@ -197,5 +214,6 @@ func waitResults[T interface{}](s *LongLivedPubSubService, ctx context.Context,
|
||||
continue
|
||||
}
|
||||
f(ctx, evt, fmt.Sprintf("%v", proto))
|
||||
fmt.Println("DEADLOCK ?")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,7 +21,12 @@ type UptimeTracker struct {
|
||||
FirstSeen time.Time
|
||||
LastSeen time.Time
|
||||
TotalOnline time.Duration
|
||||
ConsecutiveFails int // incremented on each heartbeat failure; reset to 0 on success
|
||||
ConsecutiveFails int // kept for compatibility / logging; primary eviction uses SuspectedAt
|
||||
SuspectedAt time.Time // SWIM: non-zero when this peer is in suspect state
|
||||
// LastKnownIncarnation is the last incarnation number received from this peer.
|
||||
// When a peer sees itself suspected (SuspectedIncarnation in heartbeat) it
|
||||
// increments its incarnation and the node clears the suspect state on receipt.
|
||||
LastKnownIncarnation uint64
|
||||
}
|
||||
|
||||
// RecordHeartbeat accumulates online time gap-aware: only counts the interval if
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"math/rand"
|
||||
"oc-discovery/conf"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -22,6 +23,8 @@ type LongLivedStreamRecordedService[T interface{}] struct {
|
||||
StreamRecords map[protocol.ID]map[pp.ID]*StreamRecord[T]
|
||||
StreamMU sync.RWMutex
|
||||
maxNodesConn int
|
||||
ConnGuard *ConnectionRateGuard
|
||||
|
||||
// AllowInbound, when set, is called once at stream open before any heartbeat
|
||||
// is decoded. remotePeer is the connecting peer; isNew is true when no
|
||||
// StreamRecord exists yet (first-ever connection). Return a non-nil error
|
||||
@@ -39,13 +42,9 @@ type LongLivedStreamRecordedService[T interface{}] struct {
|
||||
AfterDelete func(pid pp.ID, name string, did string)
|
||||
// BuildHeartbeatResponse, when set, is called after each successfully decoded
|
||||
// heartbeat to build the response sent back to the node.
|
||||
// remotePeer is the peer that sent the heartbeat (used for offload routing).
|
||||
// need is how many more indexers the node wants (from hb.Need).
|
||||
// referent is true when the node designated this indexer as its search referent.
|
||||
// rawRecord is the fresh signed PeerRecord embedded in the heartbeat (hb.Record),
|
||||
// passed directly so the handler does not race with AfterHeartbeat goroutine
|
||||
// updating StreamRecord.Record.
|
||||
BuildHeartbeatResponse func(remotePeer pp.ID, need int, challenges []string, challengeDID string, referent bool, rawRecord json.RawMessage) *HeartbeatResponse
|
||||
// remotePeer is the connecting peer. hb is the full decoded heartbeat, including
|
||||
// SWIM fields (SuspectedIncarnation, MembershipEvents) and record/challenge data.
|
||||
BuildHeartbeatResponse func(remotePeer pp.ID, hb *Heartbeat) *HeartbeatResponse
|
||||
}
|
||||
|
||||
func (ix *LongLivedStreamRecordedService[T]) MaxNodesConn() int {
|
||||
@@ -57,6 +56,7 @@ func NewStreamRecordedService[T interface{}](h host.Host, maxNodesConn int) *Lon
|
||||
LongLivedPubSubService: NewLongLivedPubSubService(h),
|
||||
StreamRecords: map[protocol.ID]map[pp.ID]*StreamRecord[T]{},
|
||||
maxNodesConn: maxNodesConn,
|
||||
ConnGuard: newConnectionRateGuard(),
|
||||
}
|
||||
go service.StartGC(30 * time.Second)
|
||||
// Garbage collection is needed on every Map of Long-Lived Stream... it may be a top level redesigned
|
||||
@@ -247,7 +247,7 @@ func (ix *LongLivedStreamRecordedService[T]) HandleHeartbeat(s network.Stream) {
|
||||
}
|
||||
// Send response back to the node (bidirectional heartbeat).
|
||||
if ix.BuildHeartbeatResponse != nil {
|
||||
if resp := ix.BuildHeartbeatResponse(s.Conn().RemotePeer(), hb.Need, hb.Challenges, hb.ChallengeDID, hb.Referent, hb.Record); resp != nil {
|
||||
if resp := ix.BuildHeartbeatResponse(s.Conn().RemotePeer(), hb); resp != nil {
|
||||
s.SetWriteDeadline(time.Now().Add(3 * time.Second))
|
||||
json.NewEncoder(s).Encode(resp)
|
||||
s.SetWriteDeadline(time.Time{})
|
||||
@@ -303,3 +303,52 @@ func CheckHeartbeat(h host.Host, s network.Stream, dec *json.Decoder, streams ma
|
||||
return &pid, &hb, err
|
||||
}
|
||||
}
|
||||
|
||||
// ── ConnectionRateGuard ───────────────────────────────────────────────────────
|
||||
|
||||
// ConnectionRateGuard limits the number of NEW incoming connections accepted
|
||||
// within a sliding time window. It protects public indexers against coordinated
|
||||
// registration floods (Sybil bursts).
|
||||
|
||||
const defaultMaxConnPerWindow = 20
|
||||
const defaultConnWindowSecs = 30
|
||||
|
||||
type ConnectionRateGuard struct {
|
||||
mu sync.Mutex
|
||||
window []time.Time
|
||||
maxInWindow int
|
||||
windowDur time.Duration
|
||||
}
|
||||
|
||||
func newConnectionRateGuard() *ConnectionRateGuard {
|
||||
cfg := conf.GetConfig()
|
||||
return &ConnectionRateGuard{
|
||||
maxInWindow: CfgOr(cfg.MaxConnPerWindow, defaultMaxConnPerWindow),
|
||||
windowDur: time.Duration(CfgOr(cfg.ConnWindowSecs, defaultConnWindowSecs)) * time.Second,
|
||||
}
|
||||
}
|
||||
|
||||
// Allow returns true if a new connection may be accepted.
|
||||
// The internal window is pruned on each call so memory stays bounded.
|
||||
func (g *ConnectionRateGuard) Allow() bool {
|
||||
g.mu.Lock()
|
||||
defer g.mu.Unlock()
|
||||
now := time.Now()
|
||||
cutoff := now.Add(-g.windowDur)
|
||||
i := 0
|
||||
for i < len(g.window) && g.window[i].Before(cutoff) {
|
||||
i++
|
||||
}
|
||||
g.window = g.window[i:]
|
||||
if len(g.window) >= g.maxInWindow {
|
||||
return false
|
||||
}
|
||||
g.window = append(g.window, now)
|
||||
return true
|
||||
}
|
||||
func CfgOr(v, def int) int {
|
||||
if v > 0 {
|
||||
return v
|
||||
}
|
||||
return def
|
||||
}
|
||||
|
||||
@@ -14,11 +14,110 @@ import (
|
||||
"github.com/libp2p/go-libp2p/core/protocol"
|
||||
)
|
||||
|
||||
// InitialEventHops is the starting hop count for SWIM membership events.
|
||||
// floor(log2(typical max-pool)) + 1 gives O(log n) propagation rounds.
|
||||
const InitialEventHops = 4
|
||||
|
||||
const maxMemberEventQueue = 50
|
||||
|
||||
// MembershipEventQueue holds SWIM membership events to be piggybacked on
|
||||
// outgoing heartbeats (infection-style dissemination). Bounded at
|
||||
// maxMemberEventQueue entries; events are deduplicated by PeerID.
|
||||
type MembershipEventQueue struct {
|
||||
mu sync.Mutex
|
||||
events []MemberEvent
|
||||
}
|
||||
|
||||
// memberEventPriority maps event types to an integer so higher-severity
|
||||
// events override lower-severity ones for the same PeerID.
|
||||
func memberEventPriority(t MemberEventType) int {
|
||||
switch t {
|
||||
case MemberDead:
|
||||
return 3
|
||||
case MemberSuspect:
|
||||
return 2
|
||||
case MemberAlive:
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Add inserts or updates a membership event.
|
||||
// An incoming event replaces the existing entry for the same PeerID when:
|
||||
// - its Incarnation is higher, OR
|
||||
// - the Incarnation is equal but the event type is higher-severity.
|
||||
func (q *MembershipEventQueue) Add(e MemberEvent) {
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
for i, ex := range q.events {
|
||||
if ex.PeerID == e.PeerID {
|
||||
if e.Incarnation > ex.Incarnation ||
|
||||
(e.Incarnation == ex.Incarnation && memberEventPriority(e.Type) > memberEventPriority(ex.Type)) {
|
||||
q.events[i] = e
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
if len(q.events) >= maxMemberEventQueue {
|
||||
q.events = q.events[1:] // drop oldest
|
||||
}
|
||||
q.events = append(q.events, e)
|
||||
}
|
||||
|
||||
// Drain returns up to max events ready for transmission.
|
||||
// HopsLeft is decremented on each call; events that reach 0 are removed from
|
||||
// the queue (they have already propagated enough rounds).
|
||||
func (q *MembershipEventQueue) Drain(max int) []MemberEvent {
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
if len(q.events) == 0 {
|
||||
return nil
|
||||
}
|
||||
out := make([]MemberEvent, 0, max)
|
||||
kept := q.events[:0]
|
||||
for _, e := range q.events {
|
||||
if len(out) < max {
|
||||
e.HopsLeft--
|
||||
out = append(out, e)
|
||||
if e.HopsLeft > 0 {
|
||||
kept = append(kept, e)
|
||||
}
|
||||
// HopsLeft reached 0: event has propagated enough, drop from queue.
|
||||
} else {
|
||||
kept = append(kept, e)
|
||||
}
|
||||
}
|
||||
q.events = kept
|
||||
return out
|
||||
}
|
||||
|
||||
// NodeEventQueue is the global SWIM event queue for the node side.
|
||||
// Events are added on suspect/dead detection and drained into outgoing heartbeats.
|
||||
var NodeEventQueue = &MembershipEventQueue{}
|
||||
|
||||
const (
|
||||
ProtocolPublish = "/opencloud/record/publish/1.0"
|
||||
ProtocolGet = "/opencloud/record/get/1.0"
|
||||
ProtocolDelete = "/opencloud/record/delete/1.0"
|
||||
// ProtocolIndirectProbe is opened by a node toward a live indexer to ask it
|
||||
// to actively probe a suspected indexer on the node's behalf (SWIM indirect ping).
|
||||
// It is the only inter-indexer protocol — indexers do not maintain persistent
|
||||
// connections to each other; this stream is one-shot and short-lived.
|
||||
ProtocolIndirectProbe = "/opencloud/indexer/probe/1.0"
|
||||
)
|
||||
|
||||
// IndirectProbeRequest is sent by a node over ProtocolIndirectProbe.
|
||||
// The receiving indexer must attempt to reach Target and report back.
|
||||
type IndirectProbeRequest struct {
|
||||
Target pp.AddrInfo `json:"target"`
|
||||
}
|
||||
|
||||
// IndirectProbeResponse is the reply from the probing indexer.
|
||||
type IndirectProbeResponse struct {
|
||||
Reachable bool `json:"reachable"`
|
||||
LatencyMs int64 `json:"latency_ms,omitempty"`
|
||||
}
|
||||
|
||||
const ProtocolHeartbeat = "/opencloud/heartbeat/1.0"
|
||||
|
||||
// ProtocolWitnessQuery is opened by a node to ask a peer what it thinks of a given indexer.
|
||||
|
||||
Reference in New Issue
Block a user