oc-discovery -> conf

This commit is contained in:
mr
2026-04-08 10:04:41 +02:00
parent 46dee0a6cb
commit 29b26d366e
21 changed files with 1934 additions and 119 deletions

View File

@@ -24,6 +24,11 @@ var TimeWatcher time.Time
// retryRunning guards against launching multiple retryUntilSeedResponds goroutines.
var retryRunning atomic.Bool
// suspectTimeout is the maximum time a peer can stay in suspect state before
// being declared dead and evicted. Aligned with 3 heartbeat intervals so the
// peer has at least 3 chances to respond or refute the suspicion signal.
const suspectTimeout = 3 * RecommendedHeartbeatInterval
func ConnectToIndexers(h host.Host, minIndexer int, maxIndexer int, recordFn ...func() json.RawMessage) error {
TimeWatcher = time.Now().UTC()
logger := oclib.GetLogger()
@@ -304,6 +309,11 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
if recFn != nil {
baseHB.Record = recFn()
}
// Piggyback SWIM membership events on every outgoing heartbeat batch.
// All peers in the pool receive the same events this tick.
if isIndexerHB {
baseHB.MembershipEvents = NodeEventQueue.Drain(5)
}
// Determine the referent indexer: highest-scored one receives Referent=true
// so it stores us in its referencedNodes for distributed search.
var referentAddr string
@@ -323,6 +333,13 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
if isIndexerHB && referentAddr != "" && ai.Addr == referentAddr {
hb.Referent = true
}
// SWIM: signal suspicion so the peer can refute by incrementing incarnation.
if isIndexerHB {
if score := directory.GetScore(ai.Addr); score != nil && !score.UptimeTracker.SuspectedAt.IsZero() {
inc := score.UptimeTracker.LastKnownIncarnation
hb.SuspectedIncarnation = &inc
}
}
// Ensure an IndexerScore entry exists for this peer.
var score *Score
if isIndexerHB {
@@ -378,6 +395,40 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
score.UptimeTracker.RecordHeartbeat()
score.UptimeTracker.ConsecutiveFails = 0 // reset on success
// SWIM: clear suspect state on any successful direct heartbeat.
// The peer proved it is reachable; if it also incremented its incarnation
// that is an explicit refutation — log it distinctly.
if !score.UptimeTracker.SuspectedAt.IsZero() {
wasExplicitRefutation := resp != nil &&
resp.Incarnation > 0 &&
resp.Incarnation > score.UptimeTracker.LastKnownIncarnation
if wasExplicitRefutation {
logger.Info().Str("peer", ai.Info.ID.String()).
Uint64("old_incarnation", score.UptimeTracker.LastKnownIncarnation).
Uint64("new_incarnation", resp.Incarnation).
Msg("[swim] explicit refutation: incarnation incremented, suspicion cleared")
} else {
logger.Info().Str("peer", ai.Info.ID.String()).
Msg("[swim] suspect cleared — peer responded to direct probe")
}
score.UptimeTracker.SuspectedAt = time.Time{}
// Propagate alive event so other nodes can clear their own suspect state.
inc := score.UptimeTracker.LastKnownIncarnation
if resp != nil && resp.Incarnation > 0 {
inc = resp.Incarnation
}
NodeEventQueue.Add(MemberEvent{
Type: MemberAlive,
PeerID: ai.Info.ID.String(),
Incarnation: inc,
HopsLeft: InitialEventHops,
})
}
// Always update last known incarnation.
if resp != nil && resp.Incarnation > score.UptimeTracker.LastKnownIncarnation {
score.UptimeTracker.LastKnownIncarnation = resp.Incarnation
}
maxRTT := BaseRoundTrip * 10
latencyScore := 1.0 - float64(rtt)/float64(maxRTT)
if latencyScore < 0 {
@@ -458,6 +509,15 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
score.witnessConsistent++
}
}
// SWIM infection: process membership events piggybacked on this response.
// Events with HopsLeft > 0 are re-queued for forwarding to other indexers.
for _, ev := range resp.MembershipEvents {
if ev.HopsLeft > 0 {
NodeEventQueue.Add(ev)
}
applyMemberEvent(ev, directory)
}
}
score.Score = score.ComputeNodeSideScore(latencyScore)
@@ -530,6 +590,59 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
}()
}
// runIndirectProbe asks up to k live indexers (voters) to probe target via
// ProtocolBandwidthProbe and returns true if the majority report reachable.
// This is the SWIM explicit indirect ping — called only on heartbeat failure.
func runIndirectProbe(h host.Host, target pp.AddrInfo, voters []Entry, k int) bool {
if k > len(voters) {
k = len(voters)
}
if k == 0 {
return false
}
shuffled := make([]Entry, len(voters))
copy(shuffled, voters)
rand.Shuffle(len(shuffled), func(i, j int) { shuffled[i], shuffled[j] = shuffled[j], shuffled[i] })
shuffled = shuffled[:k]
type result struct{ reachable bool }
ch := make(chan result, k)
for _, voter := range shuffled {
if voter.Info == nil {
ch <- result{false}
continue
}
go func(v pp.AddrInfo) {
ctx, cancel := context.WithTimeout(context.Background(), 8*time.Second)
defer cancel()
s, err := h.NewStream(ctx, v.ID, ProtocolIndirectProbe)
if err != nil {
ch <- result{false}
return
}
s.SetDeadline(time.Now().Add(8 * time.Second))
defer s.Close()
if err := json.NewEncoder(s).Encode(IndirectProbeRequest{Target: target}); err != nil {
ch <- result{false}
return
}
var resp IndirectProbeResponse
if err := json.NewDecoder(s).Decode(&resp); err != nil {
ch <- result{false}
return
}
ch <- result{resp.Reachable}
}(*voter.Info)
}
reachable := 0
for range k {
if (<-ch).reachable {
reachable++
}
}
return reachable > k/2
}
func HeartbeatFailure(h host.Host, proto protocol.ID, directory *Directory,
addr string, info *pp.AddrInfo, isIndexerHB bool, maxPool int, err error) {
logger := oclib.GetLogger()
@@ -545,22 +658,96 @@ func HeartbeatFailure(h host.Host, proto protocol.ID, directory *Directory,
Msg("[pool] seed heartbeat failed — keeping in pool, ticker will retry " + err.Error())
return
}
// Indirect probing via other alive indexers:
// If other indexers in the pool are still responding, they act as implicit
// third-party witnesses confirming our connectivity is fine — the failed
// indexer is genuinely dead, evict immediately.
// If this is the last indexer, there is no third party. Retry up to 3 times
// (consecutive failures tracked in UptimeTracker) before declaring it dead.
if len(directory.GetAddrs()) <= 1 {
score.UptimeTracker.ConsecutiveFails++
if score.UptimeTracker.ConsecutiveFails < 3 {
voters := directory.GetAddrs()
if len(voters) <= 1 {
// Last indexer: no peer available to proxy a probe.
// Enter suspect state on first failure; evict only after suspectTimeout.
if score.UptimeTracker.SuspectedAt.IsZero() {
score.UptimeTracker.SuspectedAt = time.Now().UTC()
score.UptimeTracker.ConsecutiveFails++
NodeEventQueue.Add(MemberEvent{
Type: MemberSuspect,
PeerID: info.ID.String(),
Incarnation: score.UptimeTracker.LastKnownIncarnation,
HopsLeft: InitialEventHops,
})
logger.Warn().Str("peer", info.ID.String()).
Int("attempt", score.UptimeTracker.ConsecutiveFails).
Msg("[indirect] last indexer failed, retrying before eviction")
Msg("[swim] last indexer suspect — waiting for refutation or timeout")
return
}
if time.Since(score.UptimeTracker.SuspectedAt) < suspectTimeout {
logger.Warn().Str("peer", info.ID.String()).
Dur("suspected_for", time.Since(score.UptimeTracker.SuspectedAt)).
Msg("[swim] last indexer still failing, holding in suspect state")
return
}
// suspectTimeout exceeded with no refutation — declare dead.
logger.Warn().Str("peer", info.ID.String()).
Msg("[indirect] last indexer failed 3 times consecutively, evicting")
Msg("[swim] last indexer suspect timeout exceeded, evicting")
NodeEventQueue.Add(MemberEvent{
Type: MemberDead,
PeerID: info.ID.String(),
Incarnation: score.UptimeTracker.LastKnownIncarnation,
HopsLeft: InitialEventHops,
})
} else if score.UptimeTracker.SuspectedAt.IsZero() {
// First miss with other live indexers available:
// enter suspect state and run an indirect probe asynchronously.
score.UptimeTracker.SuspectedAt = time.Now().UTC()
score.UptimeTracker.ConsecutiveFails++
NodeEventQueue.Add(MemberEvent{
Type: MemberSuspect,
PeerID: info.ID.String(),
Incarnation: score.UptimeTracker.LastKnownIncarnation,
HopsLeft: InitialEventHops,
})
probeTarget := *info
go func() {
alive := runIndirectProbe(h, probeTarget, voters, 2)
if alive {
// Other indexers confirm the target is reachable → our direct
// link may be temporarily broken. Keep suspected; the next
// heartbeat tick will retry the direct probe.
logger.Warn().Str("peer", probeTarget.ID.String()).
Msg("[swim] indirect probe: target reachable by peers, keeping (suspected)")
} else {
// Majority of probes also failed → the indexer is genuinely dead.
logger.Warn().Str("peer", probeTarget.ID.String()).
Msg("[swim] indirect probe: target unreachable, evicting")
NodeEventQueue.Add(MemberEvent{
Type: MemberDead,
PeerID: probeTarget.ID.String(),
Incarnation: score.UptimeTracker.LastKnownIncarnation,
HopsLeft: InitialEventHops,
})
consensusVoters := evictPeer(directory, addr, probeTarget.ID, proto)
need := max(maxPool-len(consensusVoters), 1)
if len(consensusVoters) > 0 {
TriggerConsensus(h, consensusVoters, need)
} else {
replenishIndexersFromDHT(h, need)
}
}
}()
return // decision deferred to probe goroutine
} else if time.Since(score.UptimeTracker.SuspectedAt) < suspectTimeout {
// Still within suspect window — the next tick's SuspectedIncarnation
// in the heartbeat may trigger a refutation. Keep retrying.
logger.Warn().Str("peer", info.ID.String()).
Dur("suspected_for", time.Since(score.UptimeTracker.SuspectedAt)).
Msg("[swim] suspected peer still failing, waiting for refutation or timeout")
return
} else {
// suspectTimeout exceeded — declare dead and fall through to eviction.
logger.Warn().Str("peer", info.ID.String()).
Msg("[swim] suspect timeout exceeded, evicting")
NodeEventQueue.Add(MemberEvent{
Type: MemberDead,
PeerID: info.ID.String(),
Incarnation: score.UptimeTracker.LastKnownIncarnation,
HopsLeft: InitialEventHops,
})
}
}
}
@@ -587,3 +774,34 @@ func HeartbeatFailure(h host.Host, proto protocol.ID, directory *Directory,
}
}
}
// applyMemberEvent applies an incoming SWIM membership event to the local directory.
// Only MemberAlive events with a higher incarnation can clear an existing suspect state;
// MemberSuspect / MemberDead from gossip are informational — we do not act on them
// unilaterally since the node has its own direct-probe evidence.
func applyMemberEvent(ev MemberEvent, directory *Directory) {
if ev.Type != MemberAlive {
return
}
logger := oclib.GetLogger()
for _, ai := range directory.GetAddrs() {
if ai.Info == nil || ai.Info.ID.String() != ev.PeerID {
continue
}
score := directory.GetScore(ai.Addr)
if score == nil || score.UptimeTracker == nil {
return
}
if ev.Incarnation > score.UptimeTracker.LastKnownIncarnation {
score.UptimeTracker.LastKnownIncarnation = ev.Incarnation
if !score.UptimeTracker.SuspectedAt.IsZero() {
score.UptimeTracker.SuspectedAt = time.Time{}
score.UptimeTracker.ConsecutiveFails = 0
logger.Info().Str("peer", ev.PeerID).
Uint64("incarnation", ev.Incarnation).
Msg("[swim] alive event via gossip cleared suspicion")
}
}
return
}
}