oc-discovery -> conf
This commit is contained in:
@@ -30,6 +30,14 @@ type Config struct {
|
|||||||
MaxHBPerMinute int // default 5
|
MaxHBPerMinute int // default 5
|
||||||
MaxPublishPerMinute int // default 10
|
MaxPublishPerMinute int // default 10
|
||||||
MaxGetPerMinute int // default 50
|
MaxGetPerMinute int // default 50
|
||||||
|
|
||||||
|
// LocationGranularity controls how precisely this node discloses its position.
|
||||||
|
// 0 = opt-out (no location published)
|
||||||
|
// 1 = continent (±15°)
|
||||||
|
// 2 = country (±3°) — default
|
||||||
|
// 3 = region (±0.5°)
|
||||||
|
// 4 = city (±0.05°)
|
||||||
|
LocationGranularity int // default 2
|
||||||
}
|
}
|
||||||
|
|
||||||
var instance *Config
|
var instance *Config
|
||||||
|
|||||||
@@ -13,6 +13,26 @@ import (
|
|||||||
oclib "cloud.o-forge.io/core/oc-lib"
|
oclib "cloud.o-forge.io/core/oc-lib"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// MemberEventType is the SWIM membership event classification.
|
||||||
|
type MemberEventType string
|
||||||
|
|
||||||
|
const (
|
||||||
|
MemberAlive MemberEventType = "alive"
|
||||||
|
MemberSuspect MemberEventType = "suspect"
|
||||||
|
MemberDead MemberEventType = "dead"
|
||||||
|
)
|
||||||
|
|
||||||
|
// MemberEvent is a SWIM membership event piggybacked on heartbeats (infection-style).
|
||||||
|
// HopsLeft starts at InitialEventHops and is decremented on each retransmission.
|
||||||
|
// Receivers discard events whose HopsLeft reaches 0 instead of forwarding them further.
|
||||||
|
// Deduplication by (PeerID, Incarnation): higher incarnation or higher-priority type wins.
|
||||||
|
type MemberEvent struct {
|
||||||
|
Type MemberEventType `json:"type"`
|
||||||
|
PeerID string `json:"peer_id"`
|
||||||
|
Incarnation uint64 `json:"incarnation"`
|
||||||
|
HopsLeft int `json:"hops_left"`
|
||||||
|
}
|
||||||
|
|
||||||
type Heartbeat struct {
|
type Heartbeat struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
Stream *Stream `json:"stream"`
|
Stream *Stream `json:"stream"`
|
||||||
@@ -39,6 +59,13 @@ type Heartbeat struct {
|
|||||||
// Only one indexer per node receives Referent=true at a time (the best-scored one).
|
// Only one indexer per node receives Referent=true at a time (the best-scored one).
|
||||||
// The indexer stores the node in its referencedNodes for distributed search.
|
// The indexer stores the node in its referencedNodes for distributed search.
|
||||||
Referent bool `json:"referent,omitempty"`
|
Referent bool `json:"referent,omitempty"`
|
||||||
|
// SuspectedIncarnation is set when this node currently suspects the target indexer.
|
||||||
|
// If the value matches the indexer's own incarnation, the indexer increments its
|
||||||
|
// incarnation and replies with the new value — this is the SWIM refutation signal.
|
||||||
|
SuspectedIncarnation *uint64 `json:"suspected_incarnation,omitempty"`
|
||||||
|
// MembershipEvents carries SWIM events piggybacked on this heartbeat.
|
||||||
|
// Events are forwarded infection-style until HopsLeft reaches 0.
|
||||||
|
MembershipEvents []MemberEvent `json:"membership_events,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// SearchPeerRequest is sent by a node to an indexer via ProtocolSearchPeer.
|
// SearchPeerRequest is sent by a node to an indexer via ProtocolSearchPeer.
|
||||||
@@ -104,6 +131,13 @@ type HeartbeatResponse struct {
|
|||||||
// Seeds: node de-stickies this indexer once it has MinIndexer non-seed alternatives.
|
// Seeds: node de-stickies this indexer once it has MinIndexer non-seed alternatives.
|
||||||
// Non-seeds: node removes this indexer immediately if it has enough alternatives.
|
// Non-seeds: node removes this indexer immediately if it has enough alternatives.
|
||||||
SuggestMigrate bool `json:"suggest_migrate,omitempty"`
|
SuggestMigrate bool `json:"suggest_migrate,omitempty"`
|
||||||
|
// Incarnation is this indexer's current SWIM incarnation number.
|
||||||
|
// It is incremented whenever the indexer refutes a suspicion signal.
|
||||||
|
// The node tracks this to detect explicit refutations and to clear suspect state.
|
||||||
|
Incarnation uint64 `json:"incarnation,omitempty"`
|
||||||
|
// MembershipEvents carries SWIM events piggybacked on this response.
|
||||||
|
// The node should forward them to its other indexers (infection-style).
|
||||||
|
MembershipEvents []MemberEvent `json:"membership_events,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ComputeIndexerScore computes a composite quality score [0, 100] for the connecting peer.
|
// ComputeIndexerScore computes a composite quality score [0, 100] for the connecting peer.
|
||||||
|
|||||||
@@ -24,6 +24,11 @@ var TimeWatcher time.Time
|
|||||||
// retryRunning guards against launching multiple retryUntilSeedResponds goroutines.
|
// retryRunning guards against launching multiple retryUntilSeedResponds goroutines.
|
||||||
var retryRunning atomic.Bool
|
var retryRunning atomic.Bool
|
||||||
|
|
||||||
|
// suspectTimeout is the maximum time a peer can stay in suspect state before
|
||||||
|
// being declared dead and evicted. Aligned with 3 heartbeat intervals so the
|
||||||
|
// peer has at least 3 chances to respond or refute the suspicion signal.
|
||||||
|
const suspectTimeout = 3 * RecommendedHeartbeatInterval
|
||||||
|
|
||||||
func ConnectToIndexers(h host.Host, minIndexer int, maxIndexer int, recordFn ...func() json.RawMessage) error {
|
func ConnectToIndexers(h host.Host, minIndexer int, maxIndexer int, recordFn ...func() json.RawMessage) error {
|
||||||
TimeWatcher = time.Now().UTC()
|
TimeWatcher = time.Now().UTC()
|
||||||
logger := oclib.GetLogger()
|
logger := oclib.GetLogger()
|
||||||
@@ -304,6 +309,11 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
|
|||||||
if recFn != nil {
|
if recFn != nil {
|
||||||
baseHB.Record = recFn()
|
baseHB.Record = recFn()
|
||||||
}
|
}
|
||||||
|
// Piggyback SWIM membership events on every outgoing heartbeat batch.
|
||||||
|
// All peers in the pool receive the same events this tick.
|
||||||
|
if isIndexerHB {
|
||||||
|
baseHB.MembershipEvents = NodeEventQueue.Drain(5)
|
||||||
|
}
|
||||||
// Determine the referent indexer: highest-scored one receives Referent=true
|
// Determine the referent indexer: highest-scored one receives Referent=true
|
||||||
// so it stores us in its referencedNodes for distributed search.
|
// so it stores us in its referencedNodes for distributed search.
|
||||||
var referentAddr string
|
var referentAddr string
|
||||||
@@ -323,6 +333,13 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
|
|||||||
if isIndexerHB && referentAddr != "" && ai.Addr == referentAddr {
|
if isIndexerHB && referentAddr != "" && ai.Addr == referentAddr {
|
||||||
hb.Referent = true
|
hb.Referent = true
|
||||||
}
|
}
|
||||||
|
// SWIM: signal suspicion so the peer can refute by incrementing incarnation.
|
||||||
|
if isIndexerHB {
|
||||||
|
if score := directory.GetScore(ai.Addr); score != nil && !score.UptimeTracker.SuspectedAt.IsZero() {
|
||||||
|
inc := score.UptimeTracker.LastKnownIncarnation
|
||||||
|
hb.SuspectedIncarnation = &inc
|
||||||
|
}
|
||||||
|
}
|
||||||
// Ensure an IndexerScore entry exists for this peer.
|
// Ensure an IndexerScore entry exists for this peer.
|
||||||
var score *Score
|
var score *Score
|
||||||
if isIndexerHB {
|
if isIndexerHB {
|
||||||
@@ -378,6 +395,40 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
|
|||||||
score.UptimeTracker.RecordHeartbeat()
|
score.UptimeTracker.RecordHeartbeat()
|
||||||
score.UptimeTracker.ConsecutiveFails = 0 // reset on success
|
score.UptimeTracker.ConsecutiveFails = 0 // reset on success
|
||||||
|
|
||||||
|
// SWIM: clear suspect state on any successful direct heartbeat.
|
||||||
|
// The peer proved it is reachable; if it also incremented its incarnation
|
||||||
|
// that is an explicit refutation — log it distinctly.
|
||||||
|
if !score.UptimeTracker.SuspectedAt.IsZero() {
|
||||||
|
wasExplicitRefutation := resp != nil &&
|
||||||
|
resp.Incarnation > 0 &&
|
||||||
|
resp.Incarnation > score.UptimeTracker.LastKnownIncarnation
|
||||||
|
if wasExplicitRefutation {
|
||||||
|
logger.Info().Str("peer", ai.Info.ID.String()).
|
||||||
|
Uint64("old_incarnation", score.UptimeTracker.LastKnownIncarnation).
|
||||||
|
Uint64("new_incarnation", resp.Incarnation).
|
||||||
|
Msg("[swim] explicit refutation: incarnation incremented, suspicion cleared")
|
||||||
|
} else {
|
||||||
|
logger.Info().Str("peer", ai.Info.ID.String()).
|
||||||
|
Msg("[swim] suspect cleared — peer responded to direct probe")
|
||||||
|
}
|
||||||
|
score.UptimeTracker.SuspectedAt = time.Time{}
|
||||||
|
// Propagate alive event so other nodes can clear their own suspect state.
|
||||||
|
inc := score.UptimeTracker.LastKnownIncarnation
|
||||||
|
if resp != nil && resp.Incarnation > 0 {
|
||||||
|
inc = resp.Incarnation
|
||||||
|
}
|
||||||
|
NodeEventQueue.Add(MemberEvent{
|
||||||
|
Type: MemberAlive,
|
||||||
|
PeerID: ai.Info.ID.String(),
|
||||||
|
Incarnation: inc,
|
||||||
|
HopsLeft: InitialEventHops,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
// Always update last known incarnation.
|
||||||
|
if resp != nil && resp.Incarnation > score.UptimeTracker.LastKnownIncarnation {
|
||||||
|
score.UptimeTracker.LastKnownIncarnation = resp.Incarnation
|
||||||
|
}
|
||||||
|
|
||||||
maxRTT := BaseRoundTrip * 10
|
maxRTT := BaseRoundTrip * 10
|
||||||
latencyScore := 1.0 - float64(rtt)/float64(maxRTT)
|
latencyScore := 1.0 - float64(rtt)/float64(maxRTT)
|
||||||
if latencyScore < 0 {
|
if latencyScore < 0 {
|
||||||
@@ -458,6 +509,15 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
|
|||||||
score.witnessConsistent++
|
score.witnessConsistent++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SWIM infection: process membership events piggybacked on this response.
|
||||||
|
// Events with HopsLeft > 0 are re-queued for forwarding to other indexers.
|
||||||
|
for _, ev := range resp.MembershipEvents {
|
||||||
|
if ev.HopsLeft > 0 {
|
||||||
|
NodeEventQueue.Add(ev)
|
||||||
|
}
|
||||||
|
applyMemberEvent(ev, directory)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
score.Score = score.ComputeNodeSideScore(latencyScore)
|
score.Score = score.ComputeNodeSideScore(latencyScore)
|
||||||
@@ -530,6 +590,59 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
|
|||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// runIndirectProbe asks up to k live indexers (voters) to probe target via
|
||||||
|
// ProtocolBandwidthProbe and returns true if the majority report reachable.
|
||||||
|
// This is the SWIM explicit indirect ping — called only on heartbeat failure.
|
||||||
|
func runIndirectProbe(h host.Host, target pp.AddrInfo, voters []Entry, k int) bool {
|
||||||
|
if k > len(voters) {
|
||||||
|
k = len(voters)
|
||||||
|
}
|
||||||
|
if k == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
shuffled := make([]Entry, len(voters))
|
||||||
|
copy(shuffled, voters)
|
||||||
|
rand.Shuffle(len(shuffled), func(i, j int) { shuffled[i], shuffled[j] = shuffled[j], shuffled[i] })
|
||||||
|
shuffled = shuffled[:k]
|
||||||
|
|
||||||
|
type result struct{ reachable bool }
|
||||||
|
ch := make(chan result, k)
|
||||||
|
for _, voter := range shuffled {
|
||||||
|
if voter.Info == nil {
|
||||||
|
ch <- result{false}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
go func(v pp.AddrInfo) {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 8*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
s, err := h.NewStream(ctx, v.ID, ProtocolIndirectProbe)
|
||||||
|
if err != nil {
|
||||||
|
ch <- result{false}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.SetDeadline(time.Now().Add(8 * time.Second))
|
||||||
|
defer s.Close()
|
||||||
|
if err := json.NewEncoder(s).Encode(IndirectProbeRequest{Target: target}); err != nil {
|
||||||
|
ch <- result{false}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var resp IndirectProbeResponse
|
||||||
|
if err := json.NewDecoder(s).Decode(&resp); err != nil {
|
||||||
|
ch <- result{false}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ch <- result{resp.Reachable}
|
||||||
|
}(*voter.Info)
|
||||||
|
}
|
||||||
|
reachable := 0
|
||||||
|
for range k {
|
||||||
|
if (<-ch).reachable {
|
||||||
|
reachable++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return reachable > k/2
|
||||||
|
}
|
||||||
|
|
||||||
func HeartbeatFailure(h host.Host, proto protocol.ID, directory *Directory,
|
func HeartbeatFailure(h host.Host, proto protocol.ID, directory *Directory,
|
||||||
addr string, info *pp.AddrInfo, isIndexerHB bool, maxPool int, err error) {
|
addr string, info *pp.AddrInfo, isIndexerHB bool, maxPool int, err error) {
|
||||||
logger := oclib.GetLogger()
|
logger := oclib.GetLogger()
|
||||||
@@ -545,22 +658,96 @@ func HeartbeatFailure(h host.Host, proto protocol.ID, directory *Directory,
|
|||||||
Msg("[pool] seed heartbeat failed — keeping in pool, ticker will retry " + err.Error())
|
Msg("[pool] seed heartbeat failed — keeping in pool, ticker will retry " + err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// Indirect probing via other alive indexers:
|
|
||||||
// If other indexers in the pool are still responding, they act as implicit
|
voters := directory.GetAddrs()
|
||||||
// third-party witnesses confirming our connectivity is fine — the failed
|
if len(voters) <= 1 {
|
||||||
// indexer is genuinely dead, evict immediately.
|
// Last indexer: no peer available to proxy a probe.
|
||||||
// If this is the last indexer, there is no third party. Retry up to 3 times
|
// Enter suspect state on first failure; evict only after suspectTimeout.
|
||||||
// (consecutive failures tracked in UptimeTracker) before declaring it dead.
|
if score.UptimeTracker.SuspectedAt.IsZero() {
|
||||||
if len(directory.GetAddrs()) <= 1 {
|
score.UptimeTracker.SuspectedAt = time.Now().UTC()
|
||||||
score.UptimeTracker.ConsecutiveFails++
|
score.UptimeTracker.ConsecutiveFails++
|
||||||
if score.UptimeTracker.ConsecutiveFails < 3 {
|
NodeEventQueue.Add(MemberEvent{
|
||||||
|
Type: MemberSuspect,
|
||||||
|
PeerID: info.ID.String(),
|
||||||
|
Incarnation: score.UptimeTracker.LastKnownIncarnation,
|
||||||
|
HopsLeft: InitialEventHops,
|
||||||
|
})
|
||||||
logger.Warn().Str("peer", info.ID.String()).
|
logger.Warn().Str("peer", info.ID.String()).
|
||||||
Int("attempt", score.UptimeTracker.ConsecutiveFails).
|
Msg("[swim] last indexer suspect — waiting for refutation or timeout")
|
||||||
Msg("[indirect] last indexer failed, retrying before eviction")
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
if time.Since(score.UptimeTracker.SuspectedAt) < suspectTimeout {
|
||||||
|
logger.Warn().Str("peer", info.ID.String()).
|
||||||
|
Dur("suspected_for", time.Since(score.UptimeTracker.SuspectedAt)).
|
||||||
|
Msg("[swim] last indexer still failing, holding in suspect state")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// suspectTimeout exceeded with no refutation — declare dead.
|
||||||
logger.Warn().Str("peer", info.ID.String()).
|
logger.Warn().Str("peer", info.ID.String()).
|
||||||
Msg("[indirect] last indexer failed 3 times consecutively, evicting")
|
Msg("[swim] last indexer suspect timeout exceeded, evicting")
|
||||||
|
NodeEventQueue.Add(MemberEvent{
|
||||||
|
Type: MemberDead,
|
||||||
|
PeerID: info.ID.String(),
|
||||||
|
Incarnation: score.UptimeTracker.LastKnownIncarnation,
|
||||||
|
HopsLeft: InitialEventHops,
|
||||||
|
})
|
||||||
|
} else if score.UptimeTracker.SuspectedAt.IsZero() {
|
||||||
|
// First miss with other live indexers available:
|
||||||
|
// enter suspect state and run an indirect probe asynchronously.
|
||||||
|
score.UptimeTracker.SuspectedAt = time.Now().UTC()
|
||||||
|
score.UptimeTracker.ConsecutiveFails++
|
||||||
|
NodeEventQueue.Add(MemberEvent{
|
||||||
|
Type: MemberSuspect,
|
||||||
|
PeerID: info.ID.String(),
|
||||||
|
Incarnation: score.UptimeTracker.LastKnownIncarnation,
|
||||||
|
HopsLeft: InitialEventHops,
|
||||||
|
})
|
||||||
|
probeTarget := *info
|
||||||
|
go func() {
|
||||||
|
alive := runIndirectProbe(h, probeTarget, voters, 2)
|
||||||
|
if alive {
|
||||||
|
// Other indexers confirm the target is reachable → our direct
|
||||||
|
// link may be temporarily broken. Keep suspected; the next
|
||||||
|
// heartbeat tick will retry the direct probe.
|
||||||
|
logger.Warn().Str("peer", probeTarget.ID.String()).
|
||||||
|
Msg("[swim] indirect probe: target reachable by peers, keeping (suspected)")
|
||||||
|
} else {
|
||||||
|
// Majority of probes also failed → the indexer is genuinely dead.
|
||||||
|
logger.Warn().Str("peer", probeTarget.ID.String()).
|
||||||
|
Msg("[swim] indirect probe: target unreachable, evicting")
|
||||||
|
NodeEventQueue.Add(MemberEvent{
|
||||||
|
Type: MemberDead,
|
||||||
|
PeerID: probeTarget.ID.String(),
|
||||||
|
Incarnation: score.UptimeTracker.LastKnownIncarnation,
|
||||||
|
HopsLeft: InitialEventHops,
|
||||||
|
})
|
||||||
|
consensusVoters := evictPeer(directory, addr, probeTarget.ID, proto)
|
||||||
|
need := max(maxPool-len(consensusVoters), 1)
|
||||||
|
if len(consensusVoters) > 0 {
|
||||||
|
TriggerConsensus(h, consensusVoters, need)
|
||||||
|
} else {
|
||||||
|
replenishIndexersFromDHT(h, need)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return // decision deferred to probe goroutine
|
||||||
|
} else if time.Since(score.UptimeTracker.SuspectedAt) < suspectTimeout {
|
||||||
|
// Still within suspect window — the next tick's SuspectedIncarnation
|
||||||
|
// in the heartbeat may trigger a refutation. Keep retrying.
|
||||||
|
logger.Warn().Str("peer", info.ID.String()).
|
||||||
|
Dur("suspected_for", time.Since(score.UptimeTracker.SuspectedAt)).
|
||||||
|
Msg("[swim] suspected peer still failing, waiting for refutation or timeout")
|
||||||
|
return
|
||||||
|
} else {
|
||||||
|
// suspectTimeout exceeded — declare dead and fall through to eviction.
|
||||||
|
logger.Warn().Str("peer", info.ID.String()).
|
||||||
|
Msg("[swim] suspect timeout exceeded, evicting")
|
||||||
|
NodeEventQueue.Add(MemberEvent{
|
||||||
|
Type: MemberDead,
|
||||||
|
PeerID: info.ID.String(),
|
||||||
|
Incarnation: score.UptimeTracker.LastKnownIncarnation,
|
||||||
|
HopsLeft: InitialEventHops,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -587,3 +774,34 @@ func HeartbeatFailure(h host.Host, proto protocol.ID, directory *Directory,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// applyMemberEvent applies an incoming SWIM membership event to the local directory.
|
||||||
|
// Only MemberAlive events with a higher incarnation can clear an existing suspect state;
|
||||||
|
// MemberSuspect / MemberDead from gossip are informational — we do not act on them
|
||||||
|
// unilaterally since the node has its own direct-probe evidence.
|
||||||
|
func applyMemberEvent(ev MemberEvent, directory *Directory) {
|
||||||
|
if ev.Type != MemberAlive {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
for _, ai := range directory.GetAddrs() {
|
||||||
|
if ai.Info == nil || ai.Info.ID.String() != ev.PeerID {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
score := directory.GetScore(ai.Addr)
|
||||||
|
if score == nil || score.UptimeTracker == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if ev.Incarnation > score.UptimeTracker.LastKnownIncarnation {
|
||||||
|
score.UptimeTracker.LastKnownIncarnation = ev.Incarnation
|
||||||
|
if !score.UptimeTracker.SuspectedAt.IsZero() {
|
||||||
|
score.UptimeTracker.SuspectedAt = time.Time{}
|
||||||
|
score.UptimeTracker.ConsecutiveFails = 0
|
||||||
|
logger.Info().Str("peer", ev.PeerID).
|
||||||
|
Uint64("incarnation", ev.Incarnation).
|
||||||
|
Msg("[swim] alive event via gossip cleared suspicion")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -146,6 +146,22 @@ func (s *LongLivedPubSubService) SubscribeToSearch(ps *pubsub.PubSub, f *func(co
|
|||||||
if f != nil {
|
if f != nil {
|
||||||
return SubscribeEvents(s, context.Background(), TopicPubSubSearch, -1, *f)
|
return SubscribeEvents(s, context.Background(), TopicPubSubSearch, -1, *f)
|
||||||
}
|
}
|
||||||
|
// Even when no handler is needed (e.g. strict indexers), we must call
|
||||||
|
// topic.Subscribe() so that this peer sends a SUBSCRIBE control message
|
||||||
|
// to connected peers and joins the GossipSub mesh as a forwarder.
|
||||||
|
// Without this, messages cannot be relayed through indexers between nodes.
|
||||||
|
topic := s.LongLivedPubSubs[TopicPubSubSearch]
|
||||||
|
sub, err := topic.Subscribe()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
go func() {
|
||||||
|
for {
|
||||||
|
if _, err := sub.Next(context.Background()); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -161,26 +177,27 @@ func SubscribeEvents[T interface{}](s *LongLivedPubSubService,
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
// launch loop waiting for results.
|
// launch loop waiting for results.
|
||||||
go waitResults(s, ctx, sub, proto, timeout, f)
|
go waitResults(topic, s, ctx, sub, proto, timeout, f)
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func waitResults[T interface{}](s *LongLivedPubSubService, ctx context.Context, sub *pubsub.Subscription, proto string, timeout int, f func(context.Context, T, string)) {
|
func waitResults[T interface{}](topic *pubsub.Topic, s *LongLivedPubSubService, ctx context.Context, sub *pubsub.Subscription, proto string, timeout int, f func(context.Context, T, string)) {
|
||||||
defer ctx.Done()
|
defer ctx.Done()
|
||||||
for {
|
for {
|
||||||
s.PubsubMu.Lock() // check safely if cache is actually notified subscribed to topic
|
s.PubsubMu.Lock() // check safely if cache is actually notified subscribed to topic
|
||||||
if s.LongLivedPubSubs[proto] == nil { // if not kill the loop.
|
if s.LongLivedPubSubs[proto] == nil { // if not kill the loop.
|
||||||
s.PubsubMu.Unlock()
|
s.LongLivedPubSubs[proto] = topic
|
||||||
break
|
|
||||||
}
|
}
|
||||||
s.PubsubMu.Unlock()
|
s.PubsubMu.Unlock()
|
||||||
|
|
||||||
// if still subscribed -> wait for new message
|
// if still subscribed -> wait for new message
|
||||||
var cancel context.CancelFunc
|
var cancel context.CancelFunc
|
||||||
if timeout != -1 {
|
if timeout != -1 {
|
||||||
ctx, cancel = context.WithTimeout(ctx, time.Duration(timeout)*time.Second)
|
ctx, cancel = context.WithTimeout(ctx, time.Duration(timeout)*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
}
|
}
|
||||||
|
|
||||||
msg, err := sub.Next(ctx)
|
msg, err := sub.Next(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, context.DeadlineExceeded) {
|
if errors.Is(err, context.DeadlineExceeded) {
|
||||||
@@ -197,5 +214,6 @@ func waitResults[T interface{}](s *LongLivedPubSubService, ctx context.Context,
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
f(ctx, evt, fmt.Sprintf("%v", proto))
|
f(ctx, evt, fmt.Sprintf("%v", proto))
|
||||||
|
fmt.Println("DEADLOCK ?")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,7 +21,12 @@ type UptimeTracker struct {
|
|||||||
FirstSeen time.Time
|
FirstSeen time.Time
|
||||||
LastSeen time.Time
|
LastSeen time.Time
|
||||||
TotalOnline time.Duration
|
TotalOnline time.Duration
|
||||||
ConsecutiveFails int // incremented on each heartbeat failure; reset to 0 on success
|
ConsecutiveFails int // kept for compatibility / logging; primary eviction uses SuspectedAt
|
||||||
|
SuspectedAt time.Time // SWIM: non-zero when this peer is in suspect state
|
||||||
|
// LastKnownIncarnation is the last incarnation number received from this peer.
|
||||||
|
// When a peer sees itself suspected (SuspectedIncarnation in heartbeat) it
|
||||||
|
// increments its incarnation and the node clears the suspect state on receipt.
|
||||||
|
LastKnownIncarnation uint64
|
||||||
}
|
}
|
||||||
|
|
||||||
// RecordHeartbeat accumulates online time gap-aware: only counts the interval if
|
// RecordHeartbeat accumulates online time gap-aware: only counts the interval if
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"math/rand"
|
"math/rand"
|
||||||
|
"oc-discovery/conf"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -22,6 +23,8 @@ type LongLivedStreamRecordedService[T interface{}] struct {
|
|||||||
StreamRecords map[protocol.ID]map[pp.ID]*StreamRecord[T]
|
StreamRecords map[protocol.ID]map[pp.ID]*StreamRecord[T]
|
||||||
StreamMU sync.RWMutex
|
StreamMU sync.RWMutex
|
||||||
maxNodesConn int
|
maxNodesConn int
|
||||||
|
ConnGuard *ConnectionRateGuard
|
||||||
|
|
||||||
// AllowInbound, when set, is called once at stream open before any heartbeat
|
// AllowInbound, when set, is called once at stream open before any heartbeat
|
||||||
// is decoded. remotePeer is the connecting peer; isNew is true when no
|
// is decoded. remotePeer is the connecting peer; isNew is true when no
|
||||||
// StreamRecord exists yet (first-ever connection). Return a non-nil error
|
// StreamRecord exists yet (first-ever connection). Return a non-nil error
|
||||||
@@ -39,13 +42,9 @@ type LongLivedStreamRecordedService[T interface{}] struct {
|
|||||||
AfterDelete func(pid pp.ID, name string, did string)
|
AfterDelete func(pid pp.ID, name string, did string)
|
||||||
// BuildHeartbeatResponse, when set, is called after each successfully decoded
|
// BuildHeartbeatResponse, when set, is called after each successfully decoded
|
||||||
// heartbeat to build the response sent back to the node.
|
// heartbeat to build the response sent back to the node.
|
||||||
// remotePeer is the peer that sent the heartbeat (used for offload routing).
|
// remotePeer is the connecting peer. hb is the full decoded heartbeat, including
|
||||||
// need is how many more indexers the node wants (from hb.Need).
|
// SWIM fields (SuspectedIncarnation, MembershipEvents) and record/challenge data.
|
||||||
// referent is true when the node designated this indexer as its search referent.
|
BuildHeartbeatResponse func(remotePeer pp.ID, hb *Heartbeat) *HeartbeatResponse
|
||||||
// rawRecord is the fresh signed PeerRecord embedded in the heartbeat (hb.Record),
|
|
||||||
// passed directly so the handler does not race with AfterHeartbeat goroutine
|
|
||||||
// updating StreamRecord.Record.
|
|
||||||
BuildHeartbeatResponse func(remotePeer pp.ID, need int, challenges []string, challengeDID string, referent bool, rawRecord json.RawMessage) *HeartbeatResponse
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ix *LongLivedStreamRecordedService[T]) MaxNodesConn() int {
|
func (ix *LongLivedStreamRecordedService[T]) MaxNodesConn() int {
|
||||||
@@ -57,6 +56,7 @@ func NewStreamRecordedService[T interface{}](h host.Host, maxNodesConn int) *Lon
|
|||||||
LongLivedPubSubService: NewLongLivedPubSubService(h),
|
LongLivedPubSubService: NewLongLivedPubSubService(h),
|
||||||
StreamRecords: map[protocol.ID]map[pp.ID]*StreamRecord[T]{},
|
StreamRecords: map[protocol.ID]map[pp.ID]*StreamRecord[T]{},
|
||||||
maxNodesConn: maxNodesConn,
|
maxNodesConn: maxNodesConn,
|
||||||
|
ConnGuard: newConnectionRateGuard(),
|
||||||
}
|
}
|
||||||
go service.StartGC(30 * time.Second)
|
go service.StartGC(30 * time.Second)
|
||||||
// Garbage collection is needed on every Map of Long-Lived Stream... it may be a top level redesigned
|
// Garbage collection is needed on every Map of Long-Lived Stream... it may be a top level redesigned
|
||||||
@@ -247,7 +247,7 @@ func (ix *LongLivedStreamRecordedService[T]) HandleHeartbeat(s network.Stream) {
|
|||||||
}
|
}
|
||||||
// Send response back to the node (bidirectional heartbeat).
|
// Send response back to the node (bidirectional heartbeat).
|
||||||
if ix.BuildHeartbeatResponse != nil {
|
if ix.BuildHeartbeatResponse != nil {
|
||||||
if resp := ix.BuildHeartbeatResponse(s.Conn().RemotePeer(), hb.Need, hb.Challenges, hb.ChallengeDID, hb.Referent, hb.Record); resp != nil {
|
if resp := ix.BuildHeartbeatResponse(s.Conn().RemotePeer(), hb); resp != nil {
|
||||||
s.SetWriteDeadline(time.Now().Add(3 * time.Second))
|
s.SetWriteDeadline(time.Now().Add(3 * time.Second))
|
||||||
json.NewEncoder(s).Encode(resp)
|
json.NewEncoder(s).Encode(resp)
|
||||||
s.SetWriteDeadline(time.Time{})
|
s.SetWriteDeadline(time.Time{})
|
||||||
@@ -303,3 +303,52 @@ func CheckHeartbeat(h host.Host, s network.Stream, dec *json.Decoder, streams ma
|
|||||||
return &pid, &hb, err
|
return &pid, &hb, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── ConnectionRateGuard ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
// ConnectionRateGuard limits the number of NEW incoming connections accepted
|
||||||
|
// within a sliding time window. It protects public indexers against coordinated
|
||||||
|
// registration floods (Sybil bursts).
|
||||||
|
|
||||||
|
const defaultMaxConnPerWindow = 20
|
||||||
|
const defaultConnWindowSecs = 30
|
||||||
|
|
||||||
|
type ConnectionRateGuard struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
window []time.Time
|
||||||
|
maxInWindow int
|
||||||
|
windowDur time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
func newConnectionRateGuard() *ConnectionRateGuard {
|
||||||
|
cfg := conf.GetConfig()
|
||||||
|
return &ConnectionRateGuard{
|
||||||
|
maxInWindow: CfgOr(cfg.MaxConnPerWindow, defaultMaxConnPerWindow),
|
||||||
|
windowDur: time.Duration(CfgOr(cfg.ConnWindowSecs, defaultConnWindowSecs)) * time.Second,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allow returns true if a new connection may be accepted.
|
||||||
|
// The internal window is pruned on each call so memory stays bounded.
|
||||||
|
func (g *ConnectionRateGuard) Allow() bool {
|
||||||
|
g.mu.Lock()
|
||||||
|
defer g.mu.Unlock()
|
||||||
|
now := time.Now()
|
||||||
|
cutoff := now.Add(-g.windowDur)
|
||||||
|
i := 0
|
||||||
|
for i < len(g.window) && g.window[i].Before(cutoff) {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
g.window = g.window[i:]
|
||||||
|
if len(g.window) >= g.maxInWindow {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
g.window = append(g.window, now)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
func CfgOr(v, def int) int {
|
||||||
|
if v > 0 {
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
return def
|
||||||
|
}
|
||||||
|
|||||||
@@ -14,11 +14,110 @@ import (
|
|||||||
"github.com/libp2p/go-libp2p/core/protocol"
|
"github.com/libp2p/go-libp2p/core/protocol"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// InitialEventHops is the starting hop count for SWIM membership events.
|
||||||
|
// floor(log2(typical max-pool)) + 1 gives O(log n) propagation rounds.
|
||||||
|
const InitialEventHops = 4
|
||||||
|
|
||||||
|
const maxMemberEventQueue = 50
|
||||||
|
|
||||||
|
// MembershipEventQueue holds SWIM membership events to be piggybacked on
|
||||||
|
// outgoing heartbeats (infection-style dissemination). Bounded at
|
||||||
|
// maxMemberEventQueue entries; events are deduplicated by PeerID.
|
||||||
|
type MembershipEventQueue struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
events []MemberEvent
|
||||||
|
}
|
||||||
|
|
||||||
|
// memberEventPriority maps event types to an integer so higher-severity
|
||||||
|
// events override lower-severity ones for the same PeerID.
|
||||||
|
func memberEventPriority(t MemberEventType) int {
|
||||||
|
switch t {
|
||||||
|
case MemberDead:
|
||||||
|
return 3
|
||||||
|
case MemberSuspect:
|
||||||
|
return 2
|
||||||
|
case MemberAlive:
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add inserts or updates a membership event.
|
||||||
|
// An incoming event replaces the existing entry for the same PeerID when:
|
||||||
|
// - its Incarnation is higher, OR
|
||||||
|
// - the Incarnation is equal but the event type is higher-severity.
|
||||||
|
func (q *MembershipEventQueue) Add(e MemberEvent) {
|
||||||
|
q.mu.Lock()
|
||||||
|
defer q.mu.Unlock()
|
||||||
|
for i, ex := range q.events {
|
||||||
|
if ex.PeerID == e.PeerID {
|
||||||
|
if e.Incarnation > ex.Incarnation ||
|
||||||
|
(e.Incarnation == ex.Incarnation && memberEventPriority(e.Type) > memberEventPriority(ex.Type)) {
|
||||||
|
q.events[i] = e
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(q.events) >= maxMemberEventQueue {
|
||||||
|
q.events = q.events[1:] // drop oldest
|
||||||
|
}
|
||||||
|
q.events = append(q.events, e)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Drain returns up to max events ready for transmission.
|
||||||
|
// HopsLeft is decremented on each call; events that reach 0 are removed from
|
||||||
|
// the queue (they have already propagated enough rounds).
|
||||||
|
func (q *MembershipEventQueue) Drain(max int) []MemberEvent {
|
||||||
|
q.mu.Lock()
|
||||||
|
defer q.mu.Unlock()
|
||||||
|
if len(q.events) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
out := make([]MemberEvent, 0, max)
|
||||||
|
kept := q.events[:0]
|
||||||
|
for _, e := range q.events {
|
||||||
|
if len(out) < max {
|
||||||
|
e.HopsLeft--
|
||||||
|
out = append(out, e)
|
||||||
|
if e.HopsLeft > 0 {
|
||||||
|
kept = append(kept, e)
|
||||||
|
}
|
||||||
|
// HopsLeft reached 0: event has propagated enough, drop from queue.
|
||||||
|
} else {
|
||||||
|
kept = append(kept, e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
q.events = kept
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// NodeEventQueue is the global SWIM event queue for the node side.
|
||||||
|
// Events are added on suspect/dead detection and drained into outgoing heartbeats.
|
||||||
|
var NodeEventQueue = &MembershipEventQueue{}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
ProtocolPublish = "/opencloud/record/publish/1.0"
|
ProtocolPublish = "/opencloud/record/publish/1.0"
|
||||||
ProtocolGet = "/opencloud/record/get/1.0"
|
ProtocolGet = "/opencloud/record/get/1.0"
|
||||||
|
ProtocolDelete = "/opencloud/record/delete/1.0"
|
||||||
|
// ProtocolIndirectProbe is opened by a node toward a live indexer to ask it
|
||||||
|
// to actively probe a suspected indexer on the node's behalf (SWIM indirect ping).
|
||||||
|
// It is the only inter-indexer protocol — indexers do not maintain persistent
|
||||||
|
// connections to each other; this stream is one-shot and short-lived.
|
||||||
|
ProtocolIndirectProbe = "/opencloud/indexer/probe/1.0"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// IndirectProbeRequest is sent by a node over ProtocolIndirectProbe.
|
||||||
|
// The receiving indexer must attempt to reach Target and report back.
|
||||||
|
type IndirectProbeRequest struct {
|
||||||
|
Target pp.AddrInfo `json:"target"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// IndirectProbeResponse is the reply from the probing indexer.
|
||||||
|
type IndirectProbeResponse struct {
|
||||||
|
Reachable bool `json:"reachable"`
|
||||||
|
LatencyMs int64 `json:"latency_ms,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
const ProtocolHeartbeat = "/opencloud/heartbeat/1.0"
|
const ProtocolHeartbeat = "/opencloud/heartbeat/1.0"
|
||||||
|
|
||||||
// ProtocolWitnessQuery is opened by a node to ask a peer what it thinks of a given indexer.
|
// ProtocolWitnessQuery is opened by a node to ask a peer what it thinks of a given indexer.
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"oc-discovery/conf"
|
"oc-discovery/conf"
|
||||||
|
"oc-discovery/daemons/node/common"
|
||||||
|
|
||||||
pp "github.com/libp2p/go-libp2p/core/peer"
|
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||||
)
|
)
|
||||||
@@ -13,62 +14,14 @@ import (
|
|||||||
// ── defaults ──────────────────────────────────────────────────────────────────
|
// ── defaults ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
const (
|
const (
|
||||||
defaultMaxConnPerWindow = 20
|
defaultMaxHBPerMinute = 5
|
||||||
defaultConnWindowSecs = 30
|
defaultMaxPublishPerMin = 10
|
||||||
defaultMaxHBPerMinute = 5
|
defaultMaxGetPerMin = 50
|
||||||
defaultMaxPublishPerMin = 10
|
strikeThreshold = 3
|
||||||
defaultMaxGetPerMin = 50
|
banDuration = 10 * time.Minute
|
||||||
strikeThreshold = 3
|
behaviorWindowDur = 60 * time.Second
|
||||||
banDuration = 10 * time.Minute
|
|
||||||
behaviorWindowDur = 60 * time.Second
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func cfgOr(v, def int) int {
|
|
||||||
if v > 0 {
|
|
||||||
return v
|
|
||||||
}
|
|
||||||
return def
|
|
||||||
}
|
|
||||||
|
|
||||||
// ── ConnectionRateGuard ───────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
// ConnectionRateGuard limits the number of NEW incoming connections accepted
|
|
||||||
// within a sliding time window. It protects public indexers against coordinated
|
|
||||||
// registration floods (Sybil bursts).
|
|
||||||
type ConnectionRateGuard struct {
|
|
||||||
mu sync.Mutex
|
|
||||||
window []time.Time
|
|
||||||
maxInWindow int
|
|
||||||
windowDur time.Duration
|
|
||||||
}
|
|
||||||
|
|
||||||
func newConnectionRateGuard() *ConnectionRateGuard {
|
|
||||||
cfg := conf.GetConfig()
|
|
||||||
return &ConnectionRateGuard{
|
|
||||||
maxInWindow: cfgOr(cfg.MaxConnPerWindow, defaultMaxConnPerWindow),
|
|
||||||
windowDur: time.Duration(cfgOr(cfg.ConnWindowSecs, defaultConnWindowSecs)) * time.Second,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allow returns true if a new connection may be accepted.
|
|
||||||
// The internal window is pruned on each call so memory stays bounded.
|
|
||||||
func (g *ConnectionRateGuard) Allow() bool {
|
|
||||||
g.mu.Lock()
|
|
||||||
defer g.mu.Unlock()
|
|
||||||
now := time.Now()
|
|
||||||
cutoff := now.Add(-g.windowDur)
|
|
||||||
i := 0
|
|
||||||
for i < len(g.window) && g.window[i].Before(cutoff) {
|
|
||||||
i++
|
|
||||||
}
|
|
||||||
g.window = g.window[i:]
|
|
||||||
if len(g.window) >= g.maxInWindow {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
g.window = append(g.window, now)
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
// ── per-node state ────────────────────────────────────────────────────────────
|
// ── per-node state ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
type nodeBehavior struct {
|
type nodeBehavior struct {
|
||||||
@@ -130,9 +83,9 @@ func newNodeBehaviorTracker() *NodeBehaviorTracker {
|
|||||||
cfg := conf.GetConfig()
|
cfg := conf.GetConfig()
|
||||||
return &NodeBehaviorTracker{
|
return &NodeBehaviorTracker{
|
||||||
nodes: make(map[pp.ID]*nodeBehavior),
|
nodes: make(map[pp.ID]*nodeBehavior),
|
||||||
maxHB: cfgOr(cfg.MaxHBPerMinute, defaultMaxHBPerMinute),
|
maxHB: common.CfgOr(cfg.MaxHBPerMinute, defaultMaxHBPerMinute),
|
||||||
maxPub: cfgOr(cfg.MaxPublishPerMinute, defaultMaxPublishPerMin),
|
maxPub: common.CfgOr(cfg.MaxPublishPerMinute, defaultMaxPublishPerMin),
|
||||||
maxGet: cfgOr(cfg.MaxGetPerMinute, defaultMaxGetPerMin),
|
maxGet: common.CfgOr(cfg.MaxGetPerMinute, defaultMaxGetPerMin),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -21,21 +21,37 @@ import (
|
|||||||
lpp "github.com/libp2p/go-libp2p/core/peer"
|
lpp "github.com/libp2p/go-libp2p/core/peer"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// DefaultTTLSeconds is the default TTL for peer records when the publisher
|
||||||
|
// does not declare a custom TTL. Exported so the node package can reference it.
|
||||||
|
const DefaultTTLSeconds = 120
|
||||||
|
|
||||||
|
// maxTTLSeconds caps how far in the future a publisher can set their ExpiryDate.
|
||||||
|
const maxTTLSeconds = 86400 // 24h
|
||||||
|
|
||||||
|
// tombstoneTTL is how long a signed delete record stays alive in the DHT —
|
||||||
|
// long enough to propagate everywhere, short enough not to linger forever.
|
||||||
|
const tombstoneTTL = 10 * time.Minute
|
||||||
|
|
||||||
type PeerRecordPayload struct {
|
type PeerRecordPayload struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
DID string `json:"did"`
|
DID string `json:"did"`
|
||||||
PubKey []byte `json:"pub_key"`
|
PubKey []byte `json:"pub_key"`
|
||||||
ExpiryDate time.Time `json:"expiry_date"`
|
ExpiryDate time.Time `json:"expiry_date"`
|
||||||
|
// TTLSeconds is the publisher's declared lifetime for this record in seconds.
|
||||||
|
// 0 means "use the default (120 s)". Included in the signed payload so it
|
||||||
|
// cannot be altered by an intermediary.
|
||||||
|
TTLSeconds int `json:"ttl_seconds,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type PeerRecord struct {
|
type PeerRecord struct {
|
||||||
PeerRecordPayload
|
PeerRecordPayload
|
||||||
PeerID string `json:"peer_id"`
|
PeerID string `json:"peer_id"`
|
||||||
APIUrl string `json:"api_url"`
|
APIUrl string `json:"api_url"`
|
||||||
StreamAddress string `json:"stream_address"`
|
StreamAddress string `json:"stream_address"`
|
||||||
NATSAddress string `json:"nats_address"`
|
NATSAddress string `json:"nats_address"`
|
||||||
WalletAddress string `json:"wallet_address"`
|
WalletAddress string `json:"wallet_address"`
|
||||||
Signature []byte `json:"signature"`
|
Location *pp.PeerLocation `json:"location,omitempty"`
|
||||||
|
Signature []byte `json:"signature"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *PeerRecord) Sign() error {
|
func (p *PeerRecord) Sign() error {
|
||||||
@@ -84,6 +100,7 @@ func (pr *PeerRecord) ExtractPeer(ourkey string, key string, pubKey crypto.PubKe
|
|||||||
StreamAddress: pr.StreamAddress,
|
StreamAddress: pr.StreamAddress,
|
||||||
NATSAddress: pr.NATSAddress,
|
NATSAddress: pr.NATSAddress,
|
||||||
WalletAddress: pr.WalletAddress,
|
WalletAddress: pr.WalletAddress,
|
||||||
|
Location: pr.Location,
|
||||||
}
|
}
|
||||||
if time.Now().UTC().After(pr.ExpiryDate) {
|
if time.Now().UTC().After(pr.ExpiryDate) {
|
||||||
return pp.SELF == p.Relation, nil, errors.New("peer " + key + " is offline")
|
return pp.SELF == p.Relation, nil, errors.New("peer " + key + " is offline")
|
||||||
@@ -91,6 +108,42 @@ func (pr *PeerRecord) ExtractPeer(ourkey string, key string, pubKey crypto.PubKe
|
|||||||
return pp.SELF == p.Relation, p, nil
|
return pp.SELF == p.Relation, p, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TombstonePayload is the signed body of a delete request.
|
||||||
|
// Only the owner's private key can produce a valid signature over this payload.
|
||||||
|
type TombstonePayload struct {
|
||||||
|
DID string `json:"did"`
|
||||||
|
PeerID string `json:"peer_id"`
|
||||||
|
DeletedAt time.Time `json:"deleted_at"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// TombstoneRecord is stored in the DHT at /node/{DID} to signal that a peer
|
||||||
|
// has voluntarily left the network. The Tombstone bool field acts as a
|
||||||
|
// discriminator so validators can distinguish it from a live PeerRecord.
|
||||||
|
type TombstoneRecord struct {
|
||||||
|
TombstonePayload
|
||||||
|
PubKey []byte `json:"pub_key"`
|
||||||
|
Tombstone bool `json:"tombstone"`
|
||||||
|
Signature []byte `json:"signature"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ts *TombstoneRecord) Verify() (crypto.PubKey, error) {
|
||||||
|
pubKey, err := crypto.UnmarshalPublicKey(ts.PubKey)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
payload, _ := json.Marshal(ts.TombstonePayload)
|
||||||
|
if ok, _ := pubKey.Verify(payload, ts.Signature); !ok {
|
||||||
|
return nil, errors.New("invalid tombstone signature")
|
||||||
|
}
|
||||||
|
return pubKey, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// isTombstone returns true if data is a valid, well-formed TombstoneRecord.
|
||||||
|
func isTombstone(data []byte) bool {
|
||||||
|
var ts TombstoneRecord
|
||||||
|
return json.Unmarshal(data, &ts) == nil && ts.Tombstone
|
||||||
|
}
|
||||||
|
|
||||||
type GetValue struct {
|
type GetValue struct {
|
||||||
Key string `json:"key"`
|
Key string `json:"key"`
|
||||||
PeerID string `json:"peer_id,omitempty"`
|
PeerID string `json:"peer_id,omitempty"`
|
||||||
@@ -147,9 +200,9 @@ func (ix *IndexerService) isPeerKnown(pid lpp.ID) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
ctx2, cancel2 := context.WithTimeout(context.Background(), 3*time.Second)
|
ctx2, cancel2 := context.WithTimeout(context.Background(), 3*time.Second)
|
||||||
_, err = ix.DHT.GetValue(ctx2, ix.genKey(string(did)))
|
val, err := ix.DHT.GetValue(ctx2, ix.genKey(string(did)))
|
||||||
cancel2()
|
cancel2()
|
||||||
return err == nil
|
return err == nil && !isTombstone(val)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ix *IndexerService) initNodeHandler() {
|
func (ix *IndexerService) initNodeHandler() {
|
||||||
@@ -188,6 +241,18 @@ func (ix *IndexerService) initNodeHandler() {
|
|||||||
logger.Warn().Err(err).Str("did", rec.DID).Msg("indexer: heartbeat record signature invalid")
|
logger.Warn().Err(err).Str("did", rec.DID).Msg("indexer: heartbeat record signature invalid")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
// Don't republish if a tombstone was recently stored for this DID:
|
||||||
|
// the peer explicitly left and we must not re-animate their record.
|
||||||
|
ix.deletedDIDsMu.Lock()
|
||||||
|
if t, ok := ix.deletedDIDs[rec.DID]; ok {
|
||||||
|
if time.Since(t) < tombstoneTTL {
|
||||||
|
ix.deletedDIDsMu.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// tombstoneTTL elapsed — peer is allowed to re-register.
|
||||||
|
delete(ix.deletedDIDs, rec.DID)
|
||||||
|
}
|
||||||
|
ix.deletedDIDsMu.Unlock()
|
||||||
// Keep StreamRecord.Record in sync so BuildHeartbeatResponse always
|
// Keep StreamRecord.Record in sync so BuildHeartbeatResponse always
|
||||||
// sees a populated PeerRecord (Name, DID, etc.) regardless of whether
|
// sees a populated PeerRecord (Name, DID, etc.) regardless of whether
|
||||||
// handleNodePublish ran before or after the heartbeat stream was opened.
|
// handleNodePublish ran before or after the heartbeat stream was opened.
|
||||||
@@ -220,6 +285,8 @@ func (ix *IndexerService) initNodeHandler() {
|
|||||||
ix.Host.SetStreamHandler(common.ProtocolHeartbeat, ix.HandleHeartbeat)
|
ix.Host.SetStreamHandler(common.ProtocolHeartbeat, ix.HandleHeartbeat)
|
||||||
ix.Host.SetStreamHandler(common.ProtocolPublish, ix.handleNodePublish)
|
ix.Host.SetStreamHandler(common.ProtocolPublish, ix.handleNodePublish)
|
||||||
ix.Host.SetStreamHandler(common.ProtocolGet, ix.handleNodeGet)
|
ix.Host.SetStreamHandler(common.ProtocolGet, ix.handleNodeGet)
|
||||||
|
ix.Host.SetStreamHandler(common.ProtocolDelete, ix.handleNodeDelete)
|
||||||
|
ix.Host.SetStreamHandler(common.ProtocolIndirectProbe, ix.handleIndirectProbe)
|
||||||
ix.Host.SetStreamHandler(common.ProtocolIndexerCandidates, ix.handleCandidateRequest)
|
ix.Host.SetStreamHandler(common.ProtocolIndexerCandidates, ix.handleCandidateRequest)
|
||||||
ix.initSearchHandlers()
|
ix.initSearchHandlers()
|
||||||
}
|
}
|
||||||
@@ -383,12 +450,12 @@ func (ix *IndexerService) handleNodeGet(s network.Stream) {
|
|||||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
c, err := ix.DHT.GetValue(ctx, ix.genKey(key))
|
c, err := ix.DHT.GetValue(ctx, ix.genKey(key))
|
||||||
cancel()
|
cancel()
|
||||||
if err == nil {
|
if err == nil && !isTombstone(c) {
|
||||||
var rec PeerRecord
|
var rec PeerRecord
|
||||||
if json.Unmarshal(c, &rec) == nil {
|
if json.Unmarshal(c, &rec) == nil {
|
||||||
resp.Records[rec.PeerID] = rec
|
resp.Records[rec.PeerID] = rec
|
||||||
}
|
}
|
||||||
} else {
|
} else if err != nil {
|
||||||
logger.Err(err).Msg("Failed to fetch PeerRecord from DHT " + key)
|
logger.Err(err).Msg("Failed to fetch PeerRecord from DHT " + key)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -399,3 +466,121 @@ func (ix *IndexerService) handleNodeGet(s network.Stream) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// handleNodeDelete processes a signed delete (tombstone) request from a peer.
|
||||||
|
// It verifies that the request is:
|
||||||
|
// - marked as a tombstone
|
||||||
|
// - recent (within 5 minutes, preventing replay attacks)
|
||||||
|
// - sent by the actual peer whose record is being deleted (PeerID == remotePeer)
|
||||||
|
// - signed by the matching private key
|
||||||
|
//
|
||||||
|
// On success it stores the tombstone in the DHT, evicts the peer from the local
|
||||||
|
// stream records, and marks the DID in deletedDIDs so AfterHeartbeat cannot
|
||||||
|
// accidentally republish the record during the tombstoneTTL window.
|
||||||
|
func (ix *IndexerService) handleNodeDelete(s network.Stream) {
|
||||||
|
defer s.Close()
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
remotePeer := s.Conn().RemotePeer()
|
||||||
|
s.SetDeadline(time.Now().Add(10 * time.Second))
|
||||||
|
|
||||||
|
var ts TombstoneRecord
|
||||||
|
if err := json.NewDecoder(s).Decode(&ts); err != nil || !ts.Tombstone {
|
||||||
|
s.Reset()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if ts.PeerID == "" || ts.DID == "" {
|
||||||
|
s.Reset()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if time.Since(ts.DeletedAt) > 5*time.Minute {
|
||||||
|
logger.Warn().Str("peer", remotePeer.String()).Msg("[delete] stale tombstone rejected")
|
||||||
|
s.Reset()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if ts.PeerID != remotePeer.String() {
|
||||||
|
logger.Warn().Str("peer", remotePeer.String()).Msg("[delete] tombstone PeerID mismatch")
|
||||||
|
s.Reset()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if _, err := ts.Verify(); err != nil {
|
||||||
|
logger.Warn().Err(err).Str("peer", remotePeer.String()).Msg("[delete] invalid tombstone signature")
|
||||||
|
s.Reset()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark DID as deleted in-memory before writing to DHT so AfterHeartbeat
|
||||||
|
// cannot win a race and republish the live record on top of the tombstone.
|
||||||
|
ix.deletedDIDsMu.Lock()
|
||||||
|
ix.deletedDIDs[ts.DID] = ts.DeletedAt
|
||||||
|
ix.deletedDIDsMu.Unlock()
|
||||||
|
|
||||||
|
data, _ := json.Marshal(ts)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
if err := ix.DHT.PutValue(ctx, ix.genKey(ts.DID), data); err != nil {
|
||||||
|
logger.Warn().Err(err).Str("did", ts.DID).Msg("[delete] DHT write tombstone failed")
|
||||||
|
}
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
// Invalidate the /pid/ secondary index so isPeerKnown returns false quickly.
|
||||||
|
ctx2, cancel2 := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
if err := ix.DHT.PutValue(ctx2, ix.genPIDKey(ts.PeerID), []byte("")); err != nil {
|
||||||
|
logger.Warn().Err(err).Str("pid", ts.PeerID).Msg("[delete] DHT clear pid failed")
|
||||||
|
}
|
||||||
|
cancel2()
|
||||||
|
|
||||||
|
// Evict from active stream records.
|
||||||
|
if pid, err := lpp.Decode(ts.PeerID); err == nil {
|
||||||
|
ix.StreamMU.Lock()
|
||||||
|
delete(ix.StreamRecords[common.ProtocolHeartbeat], pid)
|
||||||
|
ix.StreamMU.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Info().Str("did", ts.DID).Str("peer", ts.PeerID).Msg("[delete] tombstone stored, peer evicted")
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleIndirectProbe is the SWIM inter-indexer probe handler.
|
||||||
|
// A node opens this stream toward a live indexer to ask: "can you reach peer X?"
|
||||||
|
// The indexer attempts a ProtocolBandwidthProbe to X and reports back.
|
||||||
|
// This is the only protocol that indexers use to communicate with each other;
|
||||||
|
// no persistent inter-indexer connections are maintained.
|
||||||
|
func (ix *IndexerService) handleIndirectProbe(s network.Stream) {
|
||||||
|
defer s.Close()
|
||||||
|
s.SetDeadline(time.Now().Add(10 * time.Second))
|
||||||
|
|
||||||
|
var req common.IndirectProbeRequest
|
||||||
|
if err := json.NewDecoder(s).Decode(&req); err != nil {
|
||||||
|
s.Reset()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
respond := func(reachable bool, latencyMs int64) {
|
||||||
|
json.NewEncoder(s).Encode(common.IndirectProbeResponse{
|
||||||
|
Reachable: reachable,
|
||||||
|
LatencyMs: latencyMs,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Connect to target if not already connected.
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
if ix.Host.Network().Connectedness(req.Target.ID) != network.Connected {
|
||||||
|
if err := ix.Host.Connect(ctx, req.Target); err != nil {
|
||||||
|
respond(false, 0)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open a bandwidth probe stream — already registered on all nodes/indexers.
|
||||||
|
start := time.Now()
|
||||||
|
ps, err := ix.Host.NewStream(ctx, req.Target.ID, common.ProtocolBandwidthProbe)
|
||||||
|
if err != nil {
|
||||||
|
respond(false, 0)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer ps.Reset()
|
||||||
|
ps.SetDeadline(time.Now().Add(3 * time.Second))
|
||||||
|
ps.Write([]byte("ping"))
|
||||||
|
buf := make([]byte, 4)
|
||||||
|
_, err = ps.Read(buf)
|
||||||
|
latency := time.Since(start).Milliseconds()
|
||||||
|
respond(err == nil, latency)
|
||||||
|
}
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import (
|
|||||||
"oc-discovery/daemons/node/common"
|
"oc-discovery/daemons/node/common"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
oclib "cloud.o-forge.io/core/oc-lib"
|
oclib "cloud.o-forge.io/core/oc-lib"
|
||||||
@@ -30,9 +31,9 @@ type dhtCacheEntry struct {
|
|||||||
// SuggestMigrate to a small batch at a time; peers that don't migrate within
|
// SuggestMigrate to a small batch at a time; peers that don't migrate within
|
||||||
// offloadGracePeriod are moved to alreadyTried so a new batch can be picked.
|
// offloadGracePeriod are moved to alreadyTried so a new batch can be picked.
|
||||||
type offloadState struct {
|
type offloadState struct {
|
||||||
inBatch map[pp.ID]time.Time // peer → time added to current batch
|
inBatch map[pp.ID]time.Time // peer → time added to current batch
|
||||||
alreadyTried map[pp.ID]struct{} // peers proposed to that didn't migrate
|
alreadyTried map[pp.ID]struct{} // peers proposed to that didn't migrate
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -64,9 +65,20 @@ type IndexerService struct {
|
|||||||
pendingSearchesMu sync.Mutex
|
pendingSearchesMu sync.Mutex
|
||||||
// behavior tracks per-node compliance (heartbeat rate, publish/get volume,
|
// behavior tracks per-node compliance (heartbeat rate, publish/get volume,
|
||||||
// identity consistency, signature failures).
|
// identity consistency, signature failures).
|
||||||
behavior *NodeBehaviorTracker
|
behavior *NodeBehaviorTracker
|
||||||
// connGuard limits new-connection bursts to protect public indexers.
|
// connGuard limits new-connection bursts to protect public indexers.
|
||||||
connGuard *ConnectionRateGuard
|
// deletedDIDs tracks recently tombstoned DIDs to prevent AfterHeartbeat
|
||||||
|
// from republishing records that were explicitly deleted by the peer.
|
||||||
|
// Entries are cleared automatically after tombstoneTTL.
|
||||||
|
deletedDIDs map[string]time.Time
|
||||||
|
deletedDIDsMu sync.RWMutex
|
||||||
|
// SWIM incarnation: incremented when a connecting node signals suspicion via
|
||||||
|
// SuspectedIncarnation. The new value is broadcast back so nodes can clear
|
||||||
|
// their suspect state (refutation mechanism).
|
||||||
|
incarnation atomic.Uint64
|
||||||
|
// eventQueue holds SWIM membership events to be piggybacked on responses
|
||||||
|
// (infection-style dissemination toward connected nodes).
|
||||||
|
eventQueue *common.MembershipEventQueue
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewIndexerService creates an IndexerService.
|
// NewIndexerService creates an IndexerService.
|
||||||
@@ -81,7 +93,8 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
|
|||||||
referencedNodes: map[pp.ID]PeerRecord{},
|
referencedNodes: map[pp.ID]PeerRecord{},
|
||||||
pendingSearches: map[string]chan []common.SearchHit{},
|
pendingSearches: map[string]chan []common.SearchHit{},
|
||||||
behavior: newNodeBehaviorTracker(),
|
behavior: newNodeBehaviorTracker(),
|
||||||
connGuard: newConnectionRateGuard(),
|
deletedDIDs: make(map[string]time.Time),
|
||||||
|
eventQueue: &common.MembershipEventQueue{},
|
||||||
}
|
}
|
||||||
if ps == nil {
|
if ps == nil {
|
||||||
ps, err = pubsub.NewGossipSub(context.Background(), ix.Host)
|
ps, err = pubsub.NewGossipSub(context.Background(), ix.Host)
|
||||||
@@ -96,6 +109,21 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
|
|||||||
common.ConnectToIndexers(h, conf.GetConfig().MinIndexer, conf.GetConfig().MaxIndexer*2)
|
common.ConnectToIndexers(h, conf.GetConfig().MinIndexer, conf.GetConfig().MaxIndexer*2)
|
||||||
logger.Info().Msg("subscribe to decentralized search flow as strict indexer...")
|
logger.Info().Msg("subscribe to decentralized search flow as strict indexer...")
|
||||||
go ix.SubscribeToSearch(ix.PS, nil)
|
go ix.SubscribeToSearch(ix.PS, nil)
|
||||||
|
ix.AllowInbound = func(remotePeer pp.ID, isNew bool) error {
|
||||||
|
/*if ix.behavior.IsBanned(remotePeer) {
|
||||||
|
return errors.New("peer is banned")
|
||||||
|
}*/
|
||||||
|
if isNew {
|
||||||
|
// DB blacklist check: blocks reconnection after EvictPeer + blacklist.
|
||||||
|
/*if !ix.isPeerKnown(remotePeer) {
|
||||||
|
return errors.New("peer is blacklisted or unknown")
|
||||||
|
}*/
|
||||||
|
if !ix.ConnGuard.Allow() {
|
||||||
|
return errors.New("connection rate limit exceeded, retry later")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ix.LongLivedStreamRecordedService.AfterDelete = func(pid pp.ID, name, did string) {
|
ix.LongLivedStreamRecordedService.AfterDelete = func(pid pp.ID, name, did string) {
|
||||||
@@ -106,16 +134,7 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
|
|||||||
|
|
||||||
// AllowInbound: fired once per stream open, before any heartbeat is decoded.
|
// AllowInbound: fired once per stream open, before any heartbeat is decoded.
|
||||||
// 1. Reject peers that are currently banned (behavioral strikes).
|
// 1. Reject peers that are currently banned (behavioral strikes).
|
||||||
// 2. For genuinely new connections, apply the burst guard.
|
// 2. For genuinely new connections, check the DB blacklist and apply the burst guard.
|
||||||
ix.AllowInbound = func(remotePeer pp.ID, isNew bool) error {
|
|
||||||
if ix.behavior.IsBanned(remotePeer) {
|
|
||||||
return errors.New("peer is banned")
|
|
||||||
}
|
|
||||||
if isNew && !ix.connGuard.Allow() {
|
|
||||||
return errors.New("connection rate limit exceeded, retry later")
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// ValidateHeartbeat: fired on every heartbeat tick for an established stream.
|
// ValidateHeartbeat: fired on every heartbeat tick for an established stream.
|
||||||
// Checks heartbeat cadence — rejects if the node is sending too fast.
|
// Checks heartbeat cadence — rejects if the node is sending too fast.
|
||||||
@@ -162,7 +181,11 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
|
|||||||
|
|
||||||
// Build and send a HeartbeatResponse after each received node heartbeat.
|
// Build and send a HeartbeatResponse after each received node heartbeat.
|
||||||
// Raw metrics only — no pre-cooked score. Node computes the score itself.
|
// Raw metrics only — no pre-cooked score. Node computes the score itself.
|
||||||
ix.BuildHeartbeatResponse = func(remotePeer pp.ID, need int, challenges []string, challengeDID string, referent bool, rawRecord json.RawMessage) *common.HeartbeatResponse {
|
ix.BuildHeartbeatResponse = func(remotePeer pp.ID, hb *common.Heartbeat) *common.HeartbeatResponse {
|
||||||
|
logger := oclib.GetLogger()
|
||||||
|
need, challenges, challengeDID, referent, rawRecord :=
|
||||||
|
hb.Need, hb.Challenges, hb.ChallengeDID, hb.Referent, hb.Record
|
||||||
|
|
||||||
ix.StreamMU.RLock()
|
ix.StreamMU.RLock()
|
||||||
peerCount := len(ix.StreamRecords[common.ProtocolHeartbeat])
|
peerCount := len(ix.StreamRecords[common.ProtocolHeartbeat])
|
||||||
// Collect lastSeen per active peer for challenge responses.
|
// Collect lastSeen per active peer for challenge responses.
|
||||||
@@ -197,6 +220,31 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
|
|||||||
// Update referent designation: node marks its best-scored indexer with Referent=true.
|
// Update referent designation: node marks its best-scored indexer with Referent=true.
|
||||||
ix.updateReferent(remotePeer, remotePeerRecord, referent)
|
ix.updateReferent(remotePeer, remotePeerRecord, referent)
|
||||||
|
|
||||||
|
// SWIM refutation: if the node signals our current incarnation as suspected,
|
||||||
|
// increment it and broadcast an alive event so other nodes can clear suspicion.
|
||||||
|
inc := ix.incarnation.Load()
|
||||||
|
if hb.SuspectedIncarnation != nil && *hb.SuspectedIncarnation == inc {
|
||||||
|
inc = ix.incarnation.Add(1)
|
||||||
|
logger.Info().
|
||||||
|
Str("suspected_by", remotePeer.String()).
|
||||||
|
Uint64("new_incarnation", inc).
|
||||||
|
Msg("[swim] refuting suspicion — incarnation incremented")
|
||||||
|
ix.eventQueue.Add(common.MemberEvent{
|
||||||
|
Type: common.MemberAlive,
|
||||||
|
PeerID: ix.Host.ID().String(),
|
||||||
|
Incarnation: inc,
|
||||||
|
HopsLeft: common.InitialEventHops,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Relay incoming SWIM events from the node into our event queue so they
|
||||||
|
// propagate to other connected nodes (infection-style forwarding).
|
||||||
|
for _, ev := range hb.MembershipEvents {
|
||||||
|
if ev.HopsLeft > 0 {
|
||||||
|
ix.eventQueue.Add(ev)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
maxN := ix.MaxNodesConn()
|
maxN := ix.MaxNodesConn()
|
||||||
fillRate := 0.0
|
fillRate := 0.0
|
||||||
if maxN > 0 {
|
if maxN > 0 {
|
||||||
@@ -356,6 +404,10 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
|
|||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Attach SWIM incarnation and piggybacked membership events.
|
||||||
|
resp.Incarnation = ix.incarnation.Load()
|
||||||
|
resp.MembershipEvents = ix.eventQueue.Drain(5)
|
||||||
|
|
||||||
return resp
|
return resp
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -489,6 +541,23 @@ func (ix *IndexerService) startDHTProvide(fillRateFn func() float64) {
|
|||||||
}()
|
}()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// EvictPeer immediately closes the heartbeat stream of a peer and removes it
|
||||||
|
// from the active stream records. Used when a peer is auto-blacklisted.
|
||||||
|
func (ix *IndexerService) EvictPeer(peerID string) {
|
||||||
|
pid, err := pp.Decode(peerID)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ix.StreamMU.Lock()
|
||||||
|
defer ix.StreamMU.Unlock()
|
||||||
|
if rec, ok := ix.StreamRecords[common.ProtocolHeartbeat][pid]; ok {
|
||||||
|
if rec.HeartbeatStream != nil && rec.HeartbeatStream.Stream != nil {
|
||||||
|
rec.HeartbeatStream.Stream.Reset()
|
||||||
|
}
|
||||||
|
delete(ix.StreamRecords[common.ProtocolHeartbeat], pid)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (ix *IndexerService) Close() {
|
func (ix *IndexerService) Close() {
|
||||||
if ix.dhtProvideCancel != nil {
|
if ix.dhtProvideCancel != nil {
|
||||||
ix.dhtProvideCancel()
|
ix.dhtProvideCancel()
|
||||||
|
|||||||
@@ -19,6 +19,21 @@ func (v DefaultValidator) Select(key string, values [][]byte) (int, error) {
|
|||||||
type PeerRecordValidator struct{}
|
type PeerRecordValidator struct{}
|
||||||
|
|
||||||
func (v PeerRecordValidator) Validate(key string, value []byte) error {
|
func (v PeerRecordValidator) Validate(key string, value []byte) error {
|
||||||
|
// Accept valid tombstones — deletion must be storable so it can propagate
|
||||||
|
// and win over stale live records on other DHT nodes via Select().
|
||||||
|
var ts TombstoneRecord
|
||||||
|
if err := json.Unmarshal(value, &ts); err == nil && ts.Tombstone {
|
||||||
|
if ts.PeerID == "" || ts.DID == "" {
|
||||||
|
return errors.New("tombstone: missing fields")
|
||||||
|
}
|
||||||
|
if time.Since(ts.DeletedAt) > tombstoneTTL {
|
||||||
|
return errors.New("tombstone: expired")
|
||||||
|
}
|
||||||
|
if _, err := ts.Verify(); err != nil {
|
||||||
|
return errors.New("tombstone: " + err.Error())
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
var rec PeerRecord
|
var rec PeerRecord
|
||||||
if err := json.Unmarshal(value, &rec); err != nil {
|
if err := json.Unmarshal(value, &rec); err != nil {
|
||||||
@@ -35,6 +50,12 @@ func (v PeerRecordValidator) Validate(key string, value []byte) error {
|
|||||||
return errors.New("record expired")
|
return errors.New("record expired")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TTL cap: publisher cannot set an expiry further than maxTTLSeconds in
|
||||||
|
// the future. Prevents abuse (e.g. records designed to linger for years).
|
||||||
|
if rec.ExpiryDate.After(time.Now().UTC().Add(maxTTLSeconds * time.Second)) {
|
||||||
|
return errors.New("TTL exceeds maximum allowed")
|
||||||
|
}
|
||||||
|
|
||||||
// Signature verification
|
// Signature verification
|
||||||
if _, err := rec.Verify(); err != nil {
|
if _, err := rec.Verify(); err != nil {
|
||||||
return errors.New("invalid signature")
|
return errors.New("invalid signature")
|
||||||
@@ -44,6 +65,14 @@ func (v PeerRecordValidator) Validate(key string, value []byte) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (v PeerRecordValidator) Select(key string, values [][]byte) (int, error) {
|
func (v PeerRecordValidator) Select(key string, values [][]byte) (int, error) {
|
||||||
|
// Tombstone always wins: a signed delete supersedes any live record,
|
||||||
|
// even if the live record has a later ExpiryDate.
|
||||||
|
for i, val := range values {
|
||||||
|
var ts TombstoneRecord
|
||||||
|
if err := json.Unmarshal(val, &ts); err == nil && ts.Tombstone {
|
||||||
|
return i, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var newest time.Time
|
var newest time.Time
|
||||||
index := 0
|
index := 0
|
||||||
|
|||||||
99
daemons/node/location/location.go
Normal file
99
daemons/node/location/location.go
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
// Package location resolves the geographic position of this node via IP
|
||||||
|
// geolocation and applies a privacy-preserving random offset proportional
|
||||||
|
// to the chosen granularity level before publishing the result.
|
||||||
|
package location
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"math/rand"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
peer "cloud.o-forge.io/core/oc-lib/models/peer"
|
||||||
|
)
|
||||||
|
|
||||||
|
// fuzzRadius returns the maximum random offset (in degrees) for each axis
|
||||||
|
// given a granularity level.
|
||||||
|
//
|
||||||
|
// 0 → no location
|
||||||
|
// 1 → continent ±15° lat / ±20° lng
|
||||||
|
// 2 → country ±3° lat / ±4° lng (default)
|
||||||
|
// 3 → region ±0.5° lat / ±0.7° lng
|
||||||
|
// 4 → city ±0.05° lat / ±0.07° lng
|
||||||
|
func fuzzRadius(granularity int) (latR, lngR float64) {
|
||||||
|
switch granularity {
|
||||||
|
case 1:
|
||||||
|
return 15.0, 20.0
|
||||||
|
case 2:
|
||||||
|
return 3.0, 4.0
|
||||||
|
case 3:
|
||||||
|
return 0.5, 0.7
|
||||||
|
case 4:
|
||||||
|
return 0.05, 0.07
|
||||||
|
default:
|
||||||
|
return 3.0, 4.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// clamp keeps a value inside [min, max].
|
||||||
|
func clamp(v, min, max float64) float64 {
|
||||||
|
if v < min {
|
||||||
|
return min
|
||||||
|
}
|
||||||
|
if v > max {
|
||||||
|
return max
|
||||||
|
}
|
||||||
|
return v
|
||||||
|
}
|
||||||
|
|
||||||
|
// ipAPIResponse is the subset of fields returned by ip-api.com/json.
|
||||||
|
type ipAPIResponse struct {
|
||||||
|
Status string `json:"status"`
|
||||||
|
Lat float64 `json:"lat"`
|
||||||
|
Lon float64 `json:"lon"`
|
||||||
|
Country string `json:"country"`
|
||||||
|
Region string `json:"regionName"`
|
||||||
|
City string `json:"city"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Geolocate resolves the current public IP location via ip-api.com (free,
|
||||||
|
// no key required for non-commercial use), then fuzzes the result according
|
||||||
|
// to granularity.
|
||||||
|
//
|
||||||
|
// Returns nil if granularity == 0 (opt-out) or if the lookup fails.
|
||||||
|
func Geolocate(granularity int) *peer.PeerLocation {
|
||||||
|
if granularity == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
client := &http.Client{Timeout: 5 * time.Second}
|
||||||
|
resp, err := client.Get("http://ip-api.com/json?fields=status,lat,lon,country,regionName,city")
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
var result ipAPIResponse
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil || result.Status != "success" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
latR, lngR := fuzzRadius(granularity)
|
||||||
|
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||||
|
|
||||||
|
fuzzedLat := result.Lat + (rng.Float64()*2-1)*latR
|
||||||
|
fuzzedLng := result.Lon + (rng.Float64()*2-1)*lngR
|
||||||
|
|
||||||
|
fuzzedLat = clamp(fuzzedLat, -85.0, 85.0)
|
||||||
|
fuzzedLng = clamp(fuzzedLng, -180.0, 180.0)
|
||||||
|
|
||||||
|
fmt.Printf("[location] granularity=%d raw=(%.4f,%.4f) fuzzed=(%.4f,%.4f)\n",
|
||||||
|
granularity, result.Lat, result.Lon, fuzzedLat, fuzzedLng)
|
||||||
|
|
||||||
|
return &peer.PeerLocation{
|
||||||
|
Latitude: fuzzedLat,
|
||||||
|
Longitude: fuzzedLng,
|
||||||
|
Granularity: granularity,
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -10,6 +10,7 @@ import (
|
|||||||
|
|
||||||
oclib "cloud.o-forge.io/core/oc-lib"
|
oclib "cloud.o-forge.io/core/oc-lib"
|
||||||
"cloud.o-forge.io/core/oc-lib/config"
|
"cloud.o-forge.io/core/oc-lib/config"
|
||||||
|
pp_model "cloud.o-forge.io/core/oc-lib/models/peer"
|
||||||
"cloud.o-forge.io/core/oc-lib/tools"
|
"cloud.o-forge.io/core/oc-lib/tools"
|
||||||
pp "github.com/libp2p/go-libp2p/core/peer"
|
pp "github.com/libp2p/go-libp2p/core/peer"
|
||||||
"github.com/libp2p/go-libp2p/core/protocol"
|
"github.com/libp2p/go-libp2p/core/protocol"
|
||||||
@@ -25,8 +26,10 @@ type executionConsidersPayload struct {
|
|||||||
|
|
||||||
func ListenNATS(n *Node) {
|
func ListenNATS(n *Node) {
|
||||||
tools.NewNATSCaller().ListenNats(map[tools.NATSMethod]func(tools.NATSResponse){
|
tools.NewNATSCaller().ListenNats(map[tools.NATSMethod]func(tools.NATSResponse){
|
||||||
|
tools.PEER_BEHAVIOR_EVENT: func(resp tools.NATSResponse) { //nolint:typecheck
|
||||||
|
handlePeerBehaviorEvent(n, resp)
|
||||||
|
},
|
||||||
tools.PROPALGATION_EVENT: func(resp tools.NATSResponse) {
|
tools.PROPALGATION_EVENT: func(resp tools.NATSResponse) {
|
||||||
fmt.Println("PROPALGATION")
|
|
||||||
if resp.FromApp == config.GetAppName() {
|
if resp.FromApp == config.GetAppName() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -41,7 +44,6 @@ func ListenNATS(n *Node) {
|
|||||||
dtt := tools.DataType(propalgation.DataType)
|
dtt := tools.DataType(propalgation.DataType)
|
||||||
dt = &dtt
|
dt = &dtt
|
||||||
}
|
}
|
||||||
fmt.Println("PROPALGATION ACT", propalgation.DataType, propalgation.Action, propalgation.Action == tools.PB_CREATE, err)
|
|
||||||
if err == nil {
|
if err == nil {
|
||||||
switch propalgation.Action {
|
switch propalgation.Action {
|
||||||
case tools.PB_ADMIRALTY_CONFIG, tools.PB_MINIO_CONFIG:
|
case tools.PB_ADMIRALTY_CONFIG, tools.PB_MINIO_CONFIG:
|
||||||
@@ -116,6 +118,7 @@ func ListenNATS(n *Node) {
|
|||||||
}
|
}
|
||||||
n.StreamService.Mu.Unlock()
|
n.StreamService.Mu.Unlock()
|
||||||
} else {
|
} else {
|
||||||
|
fmt.Println("REACH PLANNER")
|
||||||
n.StreamService.PublishCommon(nil, resp.User, resp.Groups, fmt.Sprintf("%v", m["peer_id"]), stream.ProtocolSendPlanner, b)
|
n.StreamService.PublishCommon(nil, resp.User, resp.Groups, fmt.Sprintf("%v", m["peer_id"]), stream.ProtocolSendPlanner, b)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -158,6 +161,8 @@ func ListenNATS(n *Node) {
|
|||||||
} else {
|
} else {
|
||||||
m := map[string]interface{}{}
|
m := map[string]interface{}{}
|
||||||
if err := json.Unmarshal(propalgation.Payload, &m); err == nil {
|
if err := json.Unmarshal(propalgation.Payload, &m); err == nil {
|
||||||
|
fmt.Println("PB_SEARCH CATA", m)
|
||||||
|
|
||||||
n.PubSubService.SearchPublishEvent(
|
n.PubSubService.SearchPublishEvent(
|
||||||
context.Background(),
|
context.Background(),
|
||||||
dt,
|
dt,
|
||||||
@@ -172,3 +177,66 @@ func ListenNATS(n *Node) {
|
|||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// handlePeerBehaviorEvent applies a PeerBehaviorReport received from a trusted
|
||||||
|
// service (oc-scheduler, oc-datacenter, …). It:
|
||||||
|
// 1. Loads the target peer from the local DB.
|
||||||
|
// 2. Deducts the trust penalty and appends a BehaviorWarning.
|
||||||
|
// 3. Auto-blacklists and evicts the peer stream when TrustScore ≤ threshold.
|
||||||
|
//
|
||||||
|
// oc-discovery does NOT re-emit a PROPALGATION_EVENT: propagation is strictly
|
||||||
|
// inbound (oc-catalog → oc-discovery). The blacklist takes effect locally at
|
||||||
|
// the next isPeerKnown() call, and immediately via EvictPeer().
|
||||||
|
func handlePeerBehaviorEvent(n *Node, resp tools.NATSResponse) {
|
||||||
|
var report tools.PeerBehaviorReport
|
||||||
|
if err := json.Unmarshal(resp.Payload, &report); err != nil {
|
||||||
|
fmt.Println("handlePeerBehaviorEvent: unmarshal error:", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if report.TargetPeerID == "" {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
access := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil)
|
||||||
|
data := access.LoadOne(report.TargetPeerID)
|
||||||
|
if data.Data == nil {
|
||||||
|
fmt.Println("handlePeerBehaviorEvent: peer not found:", report.TargetPeerID)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
p := data.ToPeer()
|
||||||
|
if p == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Self-protection: never penalise ourselves.
|
||||||
|
if self, err := oclib.GetMySelf(); err == nil && self != nil && self.GetID() == p.GetID() {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
shouldBlacklist := p.ApplyBehaviorReport(report)
|
||||||
|
if shouldBlacklist && p.Relation != pp_model.BLACKLIST {
|
||||||
|
p.Relation = pp_model.BLACKLIST
|
||||||
|
fmt.Printf("handlePeerBehaviorEvent: auto-blacklisting peer %s — reason: %s\n",
|
||||||
|
p.PeerID, p.BlacklistReason)
|
||||||
|
// Immediately evict any active stream so the peer can no longer heartbeat.
|
||||||
|
if n.IndexerService != nil {
|
||||||
|
n.IndexerService.EvictPeer(p.PeerID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Persist updated trust score + relation locally.
|
||||||
|
if updated := access.UpdateOne(p.Serialize(p), p.GetID()); updated.Err != "" {
|
||||||
|
fmt.Println("handlePeerBehaviorEvent: could not update peer:", updated.Err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Notify oc-peer (and any other local NATS consumer) of the updated peer record
|
||||||
|
// via CREATE_RESOURCE so they can synchronise their own state.
|
||||||
|
if b, err := json.Marshal(p.Serialize(p)); err == nil {
|
||||||
|
tools.NewNATSCaller().SetNATSPub(tools.CREATE_RESOURCE, tools.NATSResponse{
|
||||||
|
FromApp: "oc-discovery",
|
||||||
|
Datatype: tools.PEER,
|
||||||
|
Method: int(tools.CREATE_RESOURCE),
|
||||||
|
Payload: b,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import (
|
|||||||
"oc-discovery/conf"
|
"oc-discovery/conf"
|
||||||
"oc-discovery/daemons/node/common"
|
"oc-discovery/daemons/node/common"
|
||||||
"oc-discovery/daemons/node/indexer"
|
"oc-discovery/daemons/node/indexer"
|
||||||
|
"oc-discovery/daemons/node/location"
|
||||||
"oc-discovery/daemons/node/pubsub"
|
"oc-discovery/daemons/node/pubsub"
|
||||||
"oc-discovery/daemons/node/stream"
|
"oc-discovery/daemons/node/stream"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -108,7 +109,11 @@ func InitNode(isNode bool, isIndexer bool) (*Node, error) {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
fresh := *node.peerRecord
|
fresh := *node.peerRecord
|
||||||
fresh.PeerRecordPayload.ExpiryDate = time.Now().UTC().Add(2 * time.Minute)
|
ttl := time.Duration(fresh.TTLSeconds) * time.Second
|
||||||
|
if ttl <= 0 {
|
||||||
|
ttl = indexer.DefaultTTLSeconds * time.Second
|
||||||
|
}
|
||||||
|
fresh.PeerRecordPayload.ExpiryDate = time.Now().UTC().Add(ttl)
|
||||||
payload, _ := json.Marshal(fresh.PeerRecordPayload)
|
payload, _ := json.Marshal(fresh.PeerRecordPayload)
|
||||||
fresh.Signature, err = priv.Sign(payload)
|
fresh.Signature, err = priv.Sign(payload)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -170,10 +175,24 @@ func InitNode(isNode bool, isIndexer bool) (*Node, error) {
|
|||||||
if err != nil || evt.From == node.PeerID.String() {
|
if err != nil || evt.From == node.PeerID.String() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
fmt.Println("PUBSUB SendResponse bef peerrece")
|
||||||
if p, err := node.GetPeerRecord(ctx, evt.From); err == nil && len(p) > 0 && m["search"] != nil {
|
if p, err := node.GetPeerRecord(ctx, evt.From); err == nil && len(p) > 0 && m["search"] != nil {
|
||||||
|
fmt.Println("PUBSUB SendResponse af peerrece", m)
|
||||||
node.StreamService.SendResponse(p[0], &evt, fmt.Sprintf("%v", m["search"]))
|
node.StreamService.SendResponse(p[0], &evt, fmt.Sprintf("%v", m["search"]))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
node.AllowInbound = func(remotePeer pp.ID, isNew bool) error {
|
||||||
|
if isNew {
|
||||||
|
// DB blacklist check: blocks reconnection after EvictPeer + blacklist.
|
||||||
|
if !node.isPeerKnown(remotePeer) {
|
||||||
|
return errors.New("peer is blacklisted or unknown")
|
||||||
|
}
|
||||||
|
if !node.ConnGuard.Allow() {
|
||||||
|
return errors.New("connection rate limit exceeded, retry later")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
logger.Info().Msg("subscribe to decentralized search flow...")
|
logger.Info().Msg("subscribe to decentralized search flow...")
|
||||||
go node.SubscribeToSearch(node.PS, &f)
|
go node.SubscribeToSearch(node.PS, &f)
|
||||||
logger.Info().Msg("connect to NATS")
|
logger.Info().Msg("connect to NATS")
|
||||||
@@ -187,6 +206,39 @@ func InitNode(isNode bool, isIndexer bool) (*Node, error) {
|
|||||||
return node, nil
|
return node, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// isPeerKnown is the stream-level gate: returns true if pid is allowed.
|
||||||
|
// Check order (fast → slow):
|
||||||
|
// 1. In-memory stream records — currently heartbeating to this indexer.
|
||||||
|
// 2. Local DB by peer_id — known peer, blacklist enforced here.
|
||||||
|
// 3. DHT /pid/{peerID} → /node/{DID} — registered on any indexer.
|
||||||
|
//
|
||||||
|
// ProtocolHeartbeat and ProtocolPublish handlers do NOT call this — they are
|
||||||
|
// the streams through which a node first makes itself known.
|
||||||
|
func (d *Node) isPeerKnown(pid pp.ID) bool {
|
||||||
|
// 1. Fast path: active heartbeat session.
|
||||||
|
d.StreamMU.RLock()
|
||||||
|
_, active := d.StreamRecords[common.ProtocolHeartbeat][pid]
|
||||||
|
d.StreamMU.RUnlock()
|
||||||
|
if active {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// 2. Local DB: known peer (handles blacklist).
|
||||||
|
access := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil)
|
||||||
|
results := access.Search(&dbs.Filters{
|
||||||
|
And: map[string][]dbs.Filter{
|
||||||
|
"peer_id": {{Operator: dbs.EQUAL.String(), Value: pid.String()}},
|
||||||
|
},
|
||||||
|
}, pid.String(), false)
|
||||||
|
for _, item := range results.Data {
|
||||||
|
p, ok := item.(*peer.Peer)
|
||||||
|
if !ok || p.PeerID != pid.String() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return p.Relation != peer.BLACKLIST
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
func (d *Node) Close() {
|
func (d *Node) Close() {
|
||||||
if d.isIndexer && d.IndexerService != nil {
|
if d.isIndexer && d.IndexerService != nil {
|
||||||
d.IndexerService.Close()
|
d.IndexerService.Close()
|
||||||
@@ -211,11 +263,16 @@ func (d *Node) publishPeerRecord(
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
stream := common.Indexers.Streams.GetPerID(common.ProtocolPublish, ad.Info.ID)
|
stream := common.Indexers.Streams.GetPerID(common.ProtocolPublish, ad.Info.ID)
|
||||||
|
ttl := time.Duration(rec.TTLSeconds) * time.Second
|
||||||
|
if ttl <= 0 {
|
||||||
|
ttl = indexer.DefaultTTLSeconds * time.Second
|
||||||
|
}
|
||||||
base := indexer.PeerRecordPayload{
|
base := indexer.PeerRecordPayload{
|
||||||
Name: rec.Name,
|
Name: rec.Name,
|
||||||
DID: rec.DID,
|
DID: rec.DID,
|
||||||
PubKey: rec.PubKey,
|
PubKey: rec.PubKey,
|
||||||
ExpiryDate: time.Now().UTC().Add(2 * time.Minute),
|
TTLSeconds: rec.TTLSeconds,
|
||||||
|
ExpiryDate: time.Now().UTC().Add(ttl),
|
||||||
}
|
}
|
||||||
payload, _ := json.Marshal(base)
|
payload, _ := json.Marshal(base)
|
||||||
rec.PeerRecordPayload = base
|
rec.PeerRecordPayload = base
|
||||||
@@ -377,13 +434,12 @@ func (d *Node) claimInfo(
|
|||||||
}
|
}
|
||||||
|
|
||||||
now := time.Now().UTC()
|
now := time.Now().UTC()
|
||||||
expiry := now.Add(150 * time.Second)
|
|
||||||
|
|
||||||
pRec := indexer.PeerRecordPayload{
|
pRec := indexer.PeerRecordPayload{
|
||||||
Name: name,
|
Name: name,
|
||||||
DID: did, // REAL PEER ID
|
DID: did, // REAL PEER ID
|
||||||
PubKey: pubBytes,
|
PubKey: pubBytes,
|
||||||
ExpiryDate: expiry,
|
TTLSeconds: indexer.DefaultTTLSeconds,
|
||||||
|
ExpiryDate: now.Add(indexer.DefaultTTLSeconds * time.Second),
|
||||||
}
|
}
|
||||||
d.PeerID = d.Host.ID()
|
d.PeerID = d.Host.ID()
|
||||||
payload, _ := json.Marshal(pRec)
|
payload, _ := json.Marshal(pRec)
|
||||||
@@ -400,6 +456,7 @@ func (d *Node) claimInfo(
|
|||||||
rec.StreamAddress = "/ip4/" + conf.GetConfig().Hostname + "/tcp/" + fmt.Sprintf("%v", conf.GetConfig().NodeEndpointPort) + "/p2p/" + rec.PeerID
|
rec.StreamAddress = "/ip4/" + conf.GetConfig().Hostname + "/tcp/" + fmt.Sprintf("%v", conf.GetConfig().NodeEndpointPort) + "/p2p/" + rec.PeerID
|
||||||
rec.NATSAddress = oclib.GetConfig().NATSUrl
|
rec.NATSAddress = oclib.GetConfig().NATSUrl
|
||||||
rec.WalletAddress = "my-wallet"
|
rec.WalletAddress = "my-wallet"
|
||||||
|
rec.Location = location.Geolocate(conf.GetConfig().LocationGranularity)
|
||||||
|
|
||||||
if err := d.publishPeerRecord(rec); err != nil {
|
if err := d.publishPeerRecord(rec); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -424,6 +481,55 @@ func (d *Node) claimInfo(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DeleteRecord broadcasts a signed tombstone to all connected indexers, signalling
|
||||||
|
// that this node is voluntarily leaving the network.
|
||||||
|
// Each indexer verifies the signature, stores the tombstone in the DHT (replacing
|
||||||
|
// the live record), and evicts the peer from its active pool.
|
||||||
|
// After a successful call, d.peerRecord is set to nil.
|
||||||
|
func (d *Node) DeleteRecord() error {
|
||||||
|
if d.peerRecord == nil {
|
||||||
|
return errors.New("no peer record to delete")
|
||||||
|
}
|
||||||
|
priv, err := tools.LoadKeyFromFilePrivate()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
pubBytes, err := crypto.MarshalPublicKey(priv.GetPublic())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
tp := indexer.TombstonePayload{
|
||||||
|
DID: d.peerRecord.DID,
|
||||||
|
PeerID: d.PeerID.String(),
|
||||||
|
DeletedAt: time.Now().UTC(),
|
||||||
|
}
|
||||||
|
payloadBytes, _ := json.Marshal(tp)
|
||||||
|
sig, err := priv.Sign(payloadBytes)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
ts := &indexer.TombstoneRecord{
|
||||||
|
TombstonePayload: tp,
|
||||||
|
PubKey: pubBytes,
|
||||||
|
Tombstone: true,
|
||||||
|
Signature: sig,
|
||||||
|
}
|
||||||
|
data, _ := json.Marshal(ts)
|
||||||
|
for _, ad := range common.Indexers.GetAddrs() {
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
s, err := d.Host.NewStream(ctx, ad.Info.ID, common.ProtocolDelete)
|
||||||
|
cancel()
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
s.SetDeadline(time.Now().Add(5 * time.Second))
|
||||||
|
s.Write(data)
|
||||||
|
s.Close()
|
||||||
|
}
|
||||||
|
d.peerRecord = nil
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
TODO:
|
TODO:
|
||||||
- Le booking est un flow neuf décentralisé :
|
- Le booking est un flow neuf décentralisé :
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"oc-discovery/conf"
|
"oc-discovery/conf"
|
||||||
"oc-discovery/daemons/node/common"
|
"oc-discovery/daemons/node/common"
|
||||||
"oc-discovery/daemons/node/stream"
|
"oc-discovery/daemons/node/stream"
|
||||||
@@ -46,6 +47,7 @@ func (ps *PubSubService) SearchPublishEvent(
|
|||||||
// The returned composite key is used as User in the GossipSub event so that
|
// The returned composite key is used as User in the GossipSub event so that
|
||||||
// remote peers echo it back unchanged, allowing IsActive to validate results.
|
// remote peers echo it back unchanged, allowing IsActive to validate results.
|
||||||
searchKey := ps.StreamService.ResourceSearches.Register(user, cancel, idleTimeout)
|
searchKey := ps.StreamService.ResourceSearches.Register(user, cancel, idleTimeout)
|
||||||
|
fmt.Println("PUBLISH ON PUBSUB", common.TopicPubSubSearch, searchKey)
|
||||||
return ps.publishEvent(searchCtx, dt, tools.PB_SEARCH, common.TopicPubSubSearch, searchKey, b)
|
return ps.publishEvent(searchCtx, dt, tools.PB_SEARCH, common.TopicPubSubSearch, searchKey, b)
|
||||||
default:
|
default:
|
||||||
return errors.New("no type of research found")
|
return errors.New("no type of research found")
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ func (ps *StreamService) handleEvent(protocol string, evt *common.Event) error {
|
|||||||
}
|
}
|
||||||
}*/
|
}*/
|
||||||
if protocol == ProtocolSendPlanner {
|
if protocol == ProtocolSendPlanner {
|
||||||
|
fmt.Println("sendPlanner", evt)
|
||||||
if err := ps.sendPlanner(evt); err != nil {
|
if err := ps.sendPlanner(evt); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ func (ps *StreamService) PublishesCommon(dt *tools.DataType, user string, groups
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (ps *StreamService) PublishCommon(dt *tools.DataType, user string, groups []string, toPeerID string, proto protocol.ID, resource []byte) (*common.Stream, error) {
|
func (ps *StreamService) PublishCommon(dt *tools.DataType, user string, groups []string, toPeerID string, proto protocol.ID, resource []byte) (*common.Stream, error) {
|
||||||
fmt.Println("PublishCommon")
|
fmt.Println("PublishCommon", toPeerID)
|
||||||
if toPeerID == ps.Key.String() {
|
if toPeerID == ps.Key.String() {
|
||||||
fmt.Println("Can't send to ourself !")
|
fmt.Println("Can't send to ourself !")
|
||||||
return nil, errors.New("Can't send to ourself !")
|
return nil, errors.New("Can't send to ourself !")
|
||||||
@@ -127,6 +127,7 @@ func (s *StreamService) write(
|
|||||||
}
|
}
|
||||||
// should create a very temp stream
|
// should create a very temp stream
|
||||||
if s.Streams, err = common.TempStream(s.Host, *peerID, proto, did, s.Streams, pts, &s.Mu); err != nil {
|
if s.Streams, err = common.TempStream(s.Host, *peerID, proto, did, s.Streams, pts, &s.Mu); err != nil {
|
||||||
|
fmt.Println("TempStream", err)
|
||||||
return nil, errors.New("no stream available for protocol " + fmt.Sprintf("%v", proto) + " from PID " + peerID.ID.String())
|
return nil, errors.New("no stream available for protocol " + fmt.Sprintf("%v", proto) + " from PID " + peerID.ID.String())
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
2
go.mod
2
go.mod
@@ -3,7 +3,7 @@ module oc-discovery
|
|||||||
go 1.25.0
|
go 1.25.0
|
||||||
|
|
||||||
require (
|
require (
|
||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260318143822-5976795d4406
|
cloud.o-forge.io/core/oc-lib v0.0.0-20260331181901-f3b5a54545ee
|
||||||
github.com/ipfs/go-cid v0.6.0
|
github.com/ipfs/go-cid v0.6.0
|
||||||
github.com/libp2p/go-libp2p v0.47.0
|
github.com/libp2p/go-libp2p v0.47.0
|
||||||
github.com/libp2p/go-libp2p-record v0.3.1
|
github.com/libp2p/go-libp2p-record v0.3.1
|
||||||
|
|||||||
6
go.sum
6
go.sum
@@ -8,6 +8,12 @@ cloud.o-forge.io/core/oc-lib v0.0.0-20260312141150-a335c905b3a2 h1:DuB6SDThFVJVQ
|
|||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260312141150-a335c905b3a2/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
|
cloud.o-forge.io/core/oc-lib v0.0.0-20260312141150-a335c905b3a2/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
|
||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260318143822-5976795d4406 h1:FN1EtRWn228JprAbnY5K863Fzj+SzMqQtKRtwvECbLw=
|
cloud.o-forge.io/core/oc-lib v0.0.0-20260318143822-5976795d4406 h1:FN1EtRWn228JprAbnY5K863Fzj+SzMqQtKRtwvECbLw=
|
||||||
cloud.o-forge.io/core/oc-lib v0.0.0-20260318143822-5976795d4406/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
|
cloud.o-forge.io/core/oc-lib v0.0.0-20260318143822-5976795d4406/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
|
||||||
|
cloud.o-forge.io/core/oc-lib v0.0.0-20260325092016-4580200e8057 h1:pR+lZzcCWZ0kke2r2xXa7OpdbLpPW3gZSWZ8gGHh274=
|
||||||
|
cloud.o-forge.io/core/oc-lib v0.0.0-20260325092016-4580200e8057/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
|
||||||
|
cloud.o-forge.io/core/oc-lib v0.0.0-20260331144112-c0722483b86c h1:wTIridvhud8zwMsMkwxgrQ+j+6UAo2IHDr3N80AA6zc=
|
||||||
|
cloud.o-forge.io/core/oc-lib v0.0.0-20260331144112-c0722483b86c/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
|
||||||
|
cloud.o-forge.io/core/oc-lib v0.0.0-20260331181901-f3b5a54545ee h1:iJ1kgMbBOBIHwS4jHOVB5zFqOd7J9ZlweQBuchnmvT0=
|
||||||
|
cloud.o-forge.io/core/oc-lib v0.0.0-20260331181901-f3b5a54545ee/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
|
||||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||||
github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
|
github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
|
||||||
github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
|
github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=
|
||||||
|
|||||||
1
main.go
1
main.go
@@ -35,6 +35,7 @@ func main() {
|
|||||||
|
|
||||||
conf.GetConfig().MinIndexer = o.GetIntDefault("MIN_INDEXER", 1)
|
conf.GetConfig().MinIndexer = o.GetIntDefault("MIN_INDEXER", 1)
|
||||||
conf.GetConfig().MaxIndexer = o.GetIntDefault("MAX_INDEXER", 5)
|
conf.GetConfig().MaxIndexer = o.GetIntDefault("MAX_INDEXER", 5)
|
||||||
|
conf.GetConfig().LocationGranularity = o.GetIntDefault("LOCATION_GRANULARITY", 2)
|
||||||
|
|
||||||
ctx, stop := signal.NotifyContext(
|
ctx, stop := signal.NotifyContext(
|
||||||
context.Background(),
|
context.Background(),
|
||||||
|
|||||||
Reference in New Issue
Block a user