oc-discovery -> conf

This commit is contained in:
mr
2026-04-08 10:04:41 +02:00
parent 46dee0a6cb
commit 29b26d366e
21 changed files with 1934 additions and 119 deletions

View File

@@ -30,6 +30,14 @@ type Config struct {
MaxHBPerMinute int // default 5 MaxHBPerMinute int // default 5
MaxPublishPerMinute int // default 10 MaxPublishPerMinute int // default 10
MaxGetPerMinute int // default 50 MaxGetPerMinute int // default 50
// LocationGranularity controls how precisely this node discloses its position.
// 0 = opt-out (no location published)
// 1 = continent (±15°)
// 2 = country (±3°) — default
// 3 = region (±0.5°)
// 4 = city (±0.05°)
LocationGranularity int // default 2
} }
var instance *Config var instance *Config

View File

@@ -13,6 +13,26 @@ import (
oclib "cloud.o-forge.io/core/oc-lib" oclib "cloud.o-forge.io/core/oc-lib"
) )
// MemberEventType is the SWIM membership event classification.
type MemberEventType string
const (
MemberAlive MemberEventType = "alive"
MemberSuspect MemberEventType = "suspect"
MemberDead MemberEventType = "dead"
)
// MemberEvent is a SWIM membership event piggybacked on heartbeats (infection-style).
// HopsLeft starts at InitialEventHops and is decremented on each retransmission.
// Receivers discard events whose HopsLeft reaches 0 instead of forwarding them further.
// Deduplication by (PeerID, Incarnation): higher incarnation or higher-priority type wins.
type MemberEvent struct {
Type MemberEventType `json:"type"`
PeerID string `json:"peer_id"`
Incarnation uint64 `json:"incarnation"`
HopsLeft int `json:"hops_left"`
}
type Heartbeat struct { type Heartbeat struct {
Name string `json:"name"` Name string `json:"name"`
Stream *Stream `json:"stream"` Stream *Stream `json:"stream"`
@@ -39,6 +59,13 @@ type Heartbeat struct {
// Only one indexer per node receives Referent=true at a time (the best-scored one). // Only one indexer per node receives Referent=true at a time (the best-scored one).
// The indexer stores the node in its referencedNodes for distributed search. // The indexer stores the node in its referencedNodes for distributed search.
Referent bool `json:"referent,omitempty"` Referent bool `json:"referent,omitempty"`
// SuspectedIncarnation is set when this node currently suspects the target indexer.
// If the value matches the indexer's own incarnation, the indexer increments its
// incarnation and replies with the new value — this is the SWIM refutation signal.
SuspectedIncarnation *uint64 `json:"suspected_incarnation,omitempty"`
// MembershipEvents carries SWIM events piggybacked on this heartbeat.
// Events are forwarded infection-style until HopsLeft reaches 0.
MembershipEvents []MemberEvent `json:"membership_events,omitempty"`
} }
// SearchPeerRequest is sent by a node to an indexer via ProtocolSearchPeer. // SearchPeerRequest is sent by a node to an indexer via ProtocolSearchPeer.
@@ -104,6 +131,13 @@ type HeartbeatResponse struct {
// Seeds: node de-stickies this indexer once it has MinIndexer non-seed alternatives. // Seeds: node de-stickies this indexer once it has MinIndexer non-seed alternatives.
// Non-seeds: node removes this indexer immediately if it has enough alternatives. // Non-seeds: node removes this indexer immediately if it has enough alternatives.
SuggestMigrate bool `json:"suggest_migrate,omitempty"` SuggestMigrate bool `json:"suggest_migrate,omitempty"`
// Incarnation is this indexer's current SWIM incarnation number.
// It is incremented whenever the indexer refutes a suspicion signal.
// The node tracks this to detect explicit refutations and to clear suspect state.
Incarnation uint64 `json:"incarnation,omitempty"`
// MembershipEvents carries SWIM events piggybacked on this response.
// The node should forward them to its other indexers (infection-style).
MembershipEvents []MemberEvent `json:"membership_events,omitempty"`
} }
// ComputeIndexerScore computes a composite quality score [0, 100] for the connecting peer. // ComputeIndexerScore computes a composite quality score [0, 100] for the connecting peer.

View File

@@ -24,6 +24,11 @@ var TimeWatcher time.Time
// retryRunning guards against launching multiple retryUntilSeedResponds goroutines. // retryRunning guards against launching multiple retryUntilSeedResponds goroutines.
var retryRunning atomic.Bool var retryRunning atomic.Bool
// suspectTimeout is the maximum time a peer can stay in suspect state before
// being declared dead and evicted. Aligned with 3 heartbeat intervals so the
// peer has at least 3 chances to respond or refute the suspicion signal.
const suspectTimeout = 3 * RecommendedHeartbeatInterval
func ConnectToIndexers(h host.Host, minIndexer int, maxIndexer int, recordFn ...func() json.RawMessage) error { func ConnectToIndexers(h host.Host, minIndexer int, maxIndexer int, recordFn ...func() json.RawMessage) error {
TimeWatcher = time.Now().UTC() TimeWatcher = time.Now().UTC()
logger := oclib.GetLogger() logger := oclib.GetLogger()
@@ -304,6 +309,11 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
if recFn != nil { if recFn != nil {
baseHB.Record = recFn() baseHB.Record = recFn()
} }
// Piggyback SWIM membership events on every outgoing heartbeat batch.
// All peers in the pool receive the same events this tick.
if isIndexerHB {
baseHB.MembershipEvents = NodeEventQueue.Drain(5)
}
// Determine the referent indexer: highest-scored one receives Referent=true // Determine the referent indexer: highest-scored one receives Referent=true
// so it stores us in its referencedNodes for distributed search. // so it stores us in its referencedNodes for distributed search.
var referentAddr string var referentAddr string
@@ -323,6 +333,13 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
if isIndexerHB && referentAddr != "" && ai.Addr == referentAddr { if isIndexerHB && referentAddr != "" && ai.Addr == referentAddr {
hb.Referent = true hb.Referent = true
} }
// SWIM: signal suspicion so the peer can refute by incrementing incarnation.
if isIndexerHB {
if score := directory.GetScore(ai.Addr); score != nil && !score.UptimeTracker.SuspectedAt.IsZero() {
inc := score.UptimeTracker.LastKnownIncarnation
hb.SuspectedIncarnation = &inc
}
}
// Ensure an IndexerScore entry exists for this peer. // Ensure an IndexerScore entry exists for this peer.
var score *Score var score *Score
if isIndexerHB { if isIndexerHB {
@@ -378,6 +395,40 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
score.UptimeTracker.RecordHeartbeat() score.UptimeTracker.RecordHeartbeat()
score.UptimeTracker.ConsecutiveFails = 0 // reset on success score.UptimeTracker.ConsecutiveFails = 0 // reset on success
// SWIM: clear suspect state on any successful direct heartbeat.
// The peer proved it is reachable; if it also incremented its incarnation
// that is an explicit refutation — log it distinctly.
if !score.UptimeTracker.SuspectedAt.IsZero() {
wasExplicitRefutation := resp != nil &&
resp.Incarnation > 0 &&
resp.Incarnation > score.UptimeTracker.LastKnownIncarnation
if wasExplicitRefutation {
logger.Info().Str("peer", ai.Info.ID.String()).
Uint64("old_incarnation", score.UptimeTracker.LastKnownIncarnation).
Uint64("new_incarnation", resp.Incarnation).
Msg("[swim] explicit refutation: incarnation incremented, suspicion cleared")
} else {
logger.Info().Str("peer", ai.Info.ID.String()).
Msg("[swim] suspect cleared — peer responded to direct probe")
}
score.UptimeTracker.SuspectedAt = time.Time{}
// Propagate alive event so other nodes can clear their own suspect state.
inc := score.UptimeTracker.LastKnownIncarnation
if resp != nil && resp.Incarnation > 0 {
inc = resp.Incarnation
}
NodeEventQueue.Add(MemberEvent{
Type: MemberAlive,
PeerID: ai.Info.ID.String(),
Incarnation: inc,
HopsLeft: InitialEventHops,
})
}
// Always update last known incarnation.
if resp != nil && resp.Incarnation > score.UptimeTracker.LastKnownIncarnation {
score.UptimeTracker.LastKnownIncarnation = resp.Incarnation
}
maxRTT := BaseRoundTrip * 10 maxRTT := BaseRoundTrip * 10
latencyScore := 1.0 - float64(rtt)/float64(maxRTT) latencyScore := 1.0 - float64(rtt)/float64(maxRTT)
if latencyScore < 0 { if latencyScore < 0 {
@@ -458,6 +509,15 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
score.witnessConsistent++ score.witnessConsistent++
} }
} }
// SWIM infection: process membership events piggybacked on this response.
// Events with HopsLeft > 0 are re-queued for forwarding to other indexers.
for _, ev := range resp.MembershipEvents {
if ev.HopsLeft > 0 {
NodeEventQueue.Add(ev)
}
applyMemberEvent(ev, directory)
}
} }
score.Score = score.ComputeNodeSideScore(latencyScore) score.Score = score.ComputeNodeSideScore(latencyScore)
@@ -530,6 +590,59 @@ func SendHeartbeat(ctx context.Context, proto protocol.ID, name string, h host.H
}() }()
} }
// runIndirectProbe asks up to k live indexers (voters) to probe target via
// ProtocolBandwidthProbe and returns true if the majority report reachable.
// This is the SWIM explicit indirect ping — called only on heartbeat failure.
func runIndirectProbe(h host.Host, target pp.AddrInfo, voters []Entry, k int) bool {
if k > len(voters) {
k = len(voters)
}
if k == 0 {
return false
}
shuffled := make([]Entry, len(voters))
copy(shuffled, voters)
rand.Shuffle(len(shuffled), func(i, j int) { shuffled[i], shuffled[j] = shuffled[j], shuffled[i] })
shuffled = shuffled[:k]
type result struct{ reachable bool }
ch := make(chan result, k)
for _, voter := range shuffled {
if voter.Info == nil {
ch <- result{false}
continue
}
go func(v pp.AddrInfo) {
ctx, cancel := context.WithTimeout(context.Background(), 8*time.Second)
defer cancel()
s, err := h.NewStream(ctx, v.ID, ProtocolIndirectProbe)
if err != nil {
ch <- result{false}
return
}
s.SetDeadline(time.Now().Add(8 * time.Second))
defer s.Close()
if err := json.NewEncoder(s).Encode(IndirectProbeRequest{Target: target}); err != nil {
ch <- result{false}
return
}
var resp IndirectProbeResponse
if err := json.NewDecoder(s).Decode(&resp); err != nil {
ch <- result{false}
return
}
ch <- result{resp.Reachable}
}(*voter.Info)
}
reachable := 0
for range k {
if (<-ch).reachable {
reachable++
}
}
return reachable > k/2
}
func HeartbeatFailure(h host.Host, proto protocol.ID, directory *Directory, func HeartbeatFailure(h host.Host, proto protocol.ID, directory *Directory,
addr string, info *pp.AddrInfo, isIndexerHB bool, maxPool int, err error) { addr string, info *pp.AddrInfo, isIndexerHB bool, maxPool int, err error) {
logger := oclib.GetLogger() logger := oclib.GetLogger()
@@ -545,22 +658,96 @@ func HeartbeatFailure(h host.Host, proto protocol.ID, directory *Directory,
Msg("[pool] seed heartbeat failed — keeping in pool, ticker will retry " + err.Error()) Msg("[pool] seed heartbeat failed — keeping in pool, ticker will retry " + err.Error())
return return
} }
// Indirect probing via other alive indexers:
// If other indexers in the pool are still responding, they act as implicit voters := directory.GetAddrs()
// third-party witnesses confirming our connectivity is fine — the failed if len(voters) <= 1 {
// indexer is genuinely dead, evict immediately. // Last indexer: no peer available to proxy a probe.
// If this is the last indexer, there is no third party. Retry up to 3 times // Enter suspect state on first failure; evict only after suspectTimeout.
// (consecutive failures tracked in UptimeTracker) before declaring it dead. if score.UptimeTracker.SuspectedAt.IsZero() {
if len(directory.GetAddrs()) <= 1 { score.UptimeTracker.SuspectedAt = time.Now().UTC()
score.UptimeTracker.ConsecutiveFails++ score.UptimeTracker.ConsecutiveFails++
if score.UptimeTracker.ConsecutiveFails < 3 { NodeEventQueue.Add(MemberEvent{
Type: MemberSuspect,
PeerID: info.ID.String(),
Incarnation: score.UptimeTracker.LastKnownIncarnation,
HopsLeft: InitialEventHops,
})
logger.Warn().Str("peer", info.ID.String()). logger.Warn().Str("peer", info.ID.String()).
Int("attempt", score.UptimeTracker.ConsecutiveFails). Msg("[swim] last indexer suspect — waiting for refutation or timeout")
Msg("[indirect] last indexer failed, retrying before eviction")
return return
} }
if time.Since(score.UptimeTracker.SuspectedAt) < suspectTimeout {
logger.Warn().Str("peer", info.ID.String()). logger.Warn().Str("peer", info.ID.String()).
Msg("[indirect] last indexer failed 3 times consecutively, evicting") Dur("suspected_for", time.Since(score.UptimeTracker.SuspectedAt)).
Msg("[swim] last indexer still failing, holding in suspect state")
return
}
// suspectTimeout exceeded with no refutation — declare dead.
logger.Warn().Str("peer", info.ID.String()).
Msg("[swim] last indexer suspect timeout exceeded, evicting")
NodeEventQueue.Add(MemberEvent{
Type: MemberDead,
PeerID: info.ID.String(),
Incarnation: score.UptimeTracker.LastKnownIncarnation,
HopsLeft: InitialEventHops,
})
} else if score.UptimeTracker.SuspectedAt.IsZero() {
// First miss with other live indexers available:
// enter suspect state and run an indirect probe asynchronously.
score.UptimeTracker.SuspectedAt = time.Now().UTC()
score.UptimeTracker.ConsecutiveFails++
NodeEventQueue.Add(MemberEvent{
Type: MemberSuspect,
PeerID: info.ID.String(),
Incarnation: score.UptimeTracker.LastKnownIncarnation,
HopsLeft: InitialEventHops,
})
probeTarget := *info
go func() {
alive := runIndirectProbe(h, probeTarget, voters, 2)
if alive {
// Other indexers confirm the target is reachable → our direct
// link may be temporarily broken. Keep suspected; the next
// heartbeat tick will retry the direct probe.
logger.Warn().Str("peer", probeTarget.ID.String()).
Msg("[swim] indirect probe: target reachable by peers, keeping (suspected)")
} else {
// Majority of probes also failed → the indexer is genuinely dead.
logger.Warn().Str("peer", probeTarget.ID.String()).
Msg("[swim] indirect probe: target unreachable, evicting")
NodeEventQueue.Add(MemberEvent{
Type: MemberDead,
PeerID: probeTarget.ID.String(),
Incarnation: score.UptimeTracker.LastKnownIncarnation,
HopsLeft: InitialEventHops,
})
consensusVoters := evictPeer(directory, addr, probeTarget.ID, proto)
need := max(maxPool-len(consensusVoters), 1)
if len(consensusVoters) > 0 {
TriggerConsensus(h, consensusVoters, need)
} else {
replenishIndexersFromDHT(h, need)
}
}
}()
return // decision deferred to probe goroutine
} else if time.Since(score.UptimeTracker.SuspectedAt) < suspectTimeout {
// Still within suspect window — the next tick's SuspectedIncarnation
// in the heartbeat may trigger a refutation. Keep retrying.
logger.Warn().Str("peer", info.ID.String()).
Dur("suspected_for", time.Since(score.UptimeTracker.SuspectedAt)).
Msg("[swim] suspected peer still failing, waiting for refutation or timeout")
return
} else {
// suspectTimeout exceeded — declare dead and fall through to eviction.
logger.Warn().Str("peer", info.ID.String()).
Msg("[swim] suspect timeout exceeded, evicting")
NodeEventQueue.Add(MemberEvent{
Type: MemberDead,
PeerID: info.ID.String(),
Incarnation: score.UptimeTracker.LastKnownIncarnation,
HopsLeft: InitialEventHops,
})
} }
} }
} }
@@ -587,3 +774,34 @@ func HeartbeatFailure(h host.Host, proto protocol.ID, directory *Directory,
} }
} }
} }
// applyMemberEvent applies an incoming SWIM membership event to the local directory.
// Only MemberAlive events with a higher incarnation can clear an existing suspect state;
// MemberSuspect / MemberDead from gossip are informational — we do not act on them
// unilaterally since the node has its own direct-probe evidence.
func applyMemberEvent(ev MemberEvent, directory *Directory) {
if ev.Type != MemberAlive {
return
}
logger := oclib.GetLogger()
for _, ai := range directory.GetAddrs() {
if ai.Info == nil || ai.Info.ID.String() != ev.PeerID {
continue
}
score := directory.GetScore(ai.Addr)
if score == nil || score.UptimeTracker == nil {
return
}
if ev.Incarnation > score.UptimeTracker.LastKnownIncarnation {
score.UptimeTracker.LastKnownIncarnation = ev.Incarnation
if !score.UptimeTracker.SuspectedAt.IsZero() {
score.UptimeTracker.SuspectedAt = time.Time{}
score.UptimeTracker.ConsecutiveFails = 0
logger.Info().Str("peer", ev.PeerID).
Uint64("incarnation", ev.Incarnation).
Msg("[swim] alive event via gossip cleared suspicion")
}
}
return
}
}

View File

@@ -146,6 +146,22 @@ func (s *LongLivedPubSubService) SubscribeToSearch(ps *pubsub.PubSub, f *func(co
if f != nil { if f != nil {
return SubscribeEvents(s, context.Background(), TopicPubSubSearch, -1, *f) return SubscribeEvents(s, context.Background(), TopicPubSubSearch, -1, *f)
} }
// Even when no handler is needed (e.g. strict indexers), we must call
// topic.Subscribe() so that this peer sends a SUBSCRIBE control message
// to connected peers and joins the GossipSub mesh as a forwarder.
// Without this, messages cannot be relayed through indexers between nodes.
topic := s.LongLivedPubSubs[TopicPubSubSearch]
sub, err := topic.Subscribe()
if err != nil {
return err
}
go func() {
for {
if _, err := sub.Next(context.Background()); err != nil {
return
}
}
}()
return nil return nil
} }
@@ -161,26 +177,27 @@ func SubscribeEvents[T interface{}](s *LongLivedPubSubService,
return err return err
} }
// launch loop waiting for results. // launch loop waiting for results.
go waitResults(s, ctx, sub, proto, timeout, f) go waitResults(topic, s, ctx, sub, proto, timeout, f)
return nil return nil
} }
func waitResults[T interface{}](s *LongLivedPubSubService, ctx context.Context, sub *pubsub.Subscription, proto string, timeout int, f func(context.Context, T, string)) { func waitResults[T interface{}](topic *pubsub.Topic, s *LongLivedPubSubService, ctx context.Context, sub *pubsub.Subscription, proto string, timeout int, f func(context.Context, T, string)) {
defer ctx.Done() defer ctx.Done()
for { for {
s.PubsubMu.Lock() // check safely if cache is actually notified subscribed to topic s.PubsubMu.Lock() // check safely if cache is actually notified subscribed to topic
if s.LongLivedPubSubs[proto] == nil { // if not kill the loop. if s.LongLivedPubSubs[proto] == nil { // if not kill the loop.
s.PubsubMu.Unlock() s.LongLivedPubSubs[proto] = topic
break
} }
s.PubsubMu.Unlock() s.PubsubMu.Unlock()
// if still subscribed -> wait for new message // if still subscribed -> wait for new message
var cancel context.CancelFunc var cancel context.CancelFunc
if timeout != -1 { if timeout != -1 {
ctx, cancel = context.WithTimeout(ctx, time.Duration(timeout)*time.Second) ctx, cancel = context.WithTimeout(ctx, time.Duration(timeout)*time.Second)
defer cancel() defer cancel()
} }
msg, err := sub.Next(ctx) msg, err := sub.Next(ctx)
if err != nil { if err != nil {
if errors.Is(err, context.DeadlineExceeded) { if errors.Is(err, context.DeadlineExceeded) {
@@ -197,5 +214,6 @@ func waitResults[T interface{}](s *LongLivedPubSubService, ctx context.Context,
continue continue
} }
f(ctx, evt, fmt.Sprintf("%v", proto)) f(ctx, evt, fmt.Sprintf("%v", proto))
fmt.Println("DEADLOCK ?")
} }
} }

View File

@@ -21,7 +21,12 @@ type UptimeTracker struct {
FirstSeen time.Time FirstSeen time.Time
LastSeen time.Time LastSeen time.Time
TotalOnline time.Duration TotalOnline time.Duration
ConsecutiveFails int // incremented on each heartbeat failure; reset to 0 on success ConsecutiveFails int // kept for compatibility / logging; primary eviction uses SuspectedAt
SuspectedAt time.Time // SWIM: non-zero when this peer is in suspect state
// LastKnownIncarnation is the last incarnation number received from this peer.
// When a peer sees itself suspected (SuspectedIncarnation in heartbeat) it
// increments its incarnation and the node clears the suspect state on receipt.
LastKnownIncarnation uint64
} }
// RecordHeartbeat accumulates online time gap-aware: only counts the interval if // RecordHeartbeat accumulates online time gap-aware: only counts the interval if

View File

@@ -6,6 +6,7 @@ import (
"fmt" "fmt"
"io" "io"
"math/rand" "math/rand"
"oc-discovery/conf"
"strings" "strings"
"sync" "sync"
"time" "time"
@@ -22,6 +23,8 @@ type LongLivedStreamRecordedService[T interface{}] struct {
StreamRecords map[protocol.ID]map[pp.ID]*StreamRecord[T] StreamRecords map[protocol.ID]map[pp.ID]*StreamRecord[T]
StreamMU sync.RWMutex StreamMU sync.RWMutex
maxNodesConn int maxNodesConn int
ConnGuard *ConnectionRateGuard
// AllowInbound, when set, is called once at stream open before any heartbeat // AllowInbound, when set, is called once at stream open before any heartbeat
// is decoded. remotePeer is the connecting peer; isNew is true when no // is decoded. remotePeer is the connecting peer; isNew is true when no
// StreamRecord exists yet (first-ever connection). Return a non-nil error // StreamRecord exists yet (first-ever connection). Return a non-nil error
@@ -39,13 +42,9 @@ type LongLivedStreamRecordedService[T interface{}] struct {
AfterDelete func(pid pp.ID, name string, did string) AfterDelete func(pid pp.ID, name string, did string)
// BuildHeartbeatResponse, when set, is called after each successfully decoded // BuildHeartbeatResponse, when set, is called after each successfully decoded
// heartbeat to build the response sent back to the node. // heartbeat to build the response sent back to the node.
// remotePeer is the peer that sent the heartbeat (used for offload routing). // remotePeer is the connecting peer. hb is the full decoded heartbeat, including
// need is how many more indexers the node wants (from hb.Need). // SWIM fields (SuspectedIncarnation, MembershipEvents) and record/challenge data.
// referent is true when the node designated this indexer as its search referent. BuildHeartbeatResponse func(remotePeer pp.ID, hb *Heartbeat) *HeartbeatResponse
// rawRecord is the fresh signed PeerRecord embedded in the heartbeat (hb.Record),
// passed directly so the handler does not race with AfterHeartbeat goroutine
// updating StreamRecord.Record.
BuildHeartbeatResponse func(remotePeer pp.ID, need int, challenges []string, challengeDID string, referent bool, rawRecord json.RawMessage) *HeartbeatResponse
} }
func (ix *LongLivedStreamRecordedService[T]) MaxNodesConn() int { func (ix *LongLivedStreamRecordedService[T]) MaxNodesConn() int {
@@ -57,6 +56,7 @@ func NewStreamRecordedService[T interface{}](h host.Host, maxNodesConn int) *Lon
LongLivedPubSubService: NewLongLivedPubSubService(h), LongLivedPubSubService: NewLongLivedPubSubService(h),
StreamRecords: map[protocol.ID]map[pp.ID]*StreamRecord[T]{}, StreamRecords: map[protocol.ID]map[pp.ID]*StreamRecord[T]{},
maxNodesConn: maxNodesConn, maxNodesConn: maxNodesConn,
ConnGuard: newConnectionRateGuard(),
} }
go service.StartGC(30 * time.Second) go service.StartGC(30 * time.Second)
// Garbage collection is needed on every Map of Long-Lived Stream... it may be a top level redesigned // Garbage collection is needed on every Map of Long-Lived Stream... it may be a top level redesigned
@@ -247,7 +247,7 @@ func (ix *LongLivedStreamRecordedService[T]) HandleHeartbeat(s network.Stream) {
} }
// Send response back to the node (bidirectional heartbeat). // Send response back to the node (bidirectional heartbeat).
if ix.BuildHeartbeatResponse != nil { if ix.BuildHeartbeatResponse != nil {
if resp := ix.BuildHeartbeatResponse(s.Conn().RemotePeer(), hb.Need, hb.Challenges, hb.ChallengeDID, hb.Referent, hb.Record); resp != nil { if resp := ix.BuildHeartbeatResponse(s.Conn().RemotePeer(), hb); resp != nil {
s.SetWriteDeadline(time.Now().Add(3 * time.Second)) s.SetWriteDeadline(time.Now().Add(3 * time.Second))
json.NewEncoder(s).Encode(resp) json.NewEncoder(s).Encode(resp)
s.SetWriteDeadline(time.Time{}) s.SetWriteDeadline(time.Time{})
@@ -303,3 +303,52 @@ func CheckHeartbeat(h host.Host, s network.Stream, dec *json.Decoder, streams ma
return &pid, &hb, err return &pid, &hb, err
} }
} }
// ── ConnectionRateGuard ───────────────────────────────────────────────────────
// ConnectionRateGuard limits the number of NEW incoming connections accepted
// within a sliding time window. It protects public indexers against coordinated
// registration floods (Sybil bursts).
const defaultMaxConnPerWindow = 20
const defaultConnWindowSecs = 30
type ConnectionRateGuard struct {
mu sync.Mutex
window []time.Time
maxInWindow int
windowDur time.Duration
}
func newConnectionRateGuard() *ConnectionRateGuard {
cfg := conf.GetConfig()
return &ConnectionRateGuard{
maxInWindow: CfgOr(cfg.MaxConnPerWindow, defaultMaxConnPerWindow),
windowDur: time.Duration(CfgOr(cfg.ConnWindowSecs, defaultConnWindowSecs)) * time.Second,
}
}
// Allow returns true if a new connection may be accepted.
// The internal window is pruned on each call so memory stays bounded.
func (g *ConnectionRateGuard) Allow() bool {
g.mu.Lock()
defer g.mu.Unlock()
now := time.Now()
cutoff := now.Add(-g.windowDur)
i := 0
for i < len(g.window) && g.window[i].Before(cutoff) {
i++
}
g.window = g.window[i:]
if len(g.window) >= g.maxInWindow {
return false
}
g.window = append(g.window, now)
return true
}
func CfgOr(v, def int) int {
if v > 0 {
return v
}
return def
}

View File

@@ -14,11 +14,110 @@ import (
"github.com/libp2p/go-libp2p/core/protocol" "github.com/libp2p/go-libp2p/core/protocol"
) )
// InitialEventHops is the starting hop count for SWIM membership events.
// floor(log2(typical max-pool)) + 1 gives O(log n) propagation rounds.
const InitialEventHops = 4
const maxMemberEventQueue = 50
// MembershipEventQueue holds SWIM membership events to be piggybacked on
// outgoing heartbeats (infection-style dissemination). Bounded at
// maxMemberEventQueue entries; events are deduplicated by PeerID.
type MembershipEventQueue struct {
mu sync.Mutex
events []MemberEvent
}
// memberEventPriority maps event types to an integer so higher-severity
// events override lower-severity ones for the same PeerID.
func memberEventPriority(t MemberEventType) int {
switch t {
case MemberDead:
return 3
case MemberSuspect:
return 2
case MemberAlive:
return 1
}
return 0
}
// Add inserts or updates a membership event.
// An incoming event replaces the existing entry for the same PeerID when:
// - its Incarnation is higher, OR
// - the Incarnation is equal but the event type is higher-severity.
func (q *MembershipEventQueue) Add(e MemberEvent) {
q.mu.Lock()
defer q.mu.Unlock()
for i, ex := range q.events {
if ex.PeerID == e.PeerID {
if e.Incarnation > ex.Incarnation ||
(e.Incarnation == ex.Incarnation && memberEventPriority(e.Type) > memberEventPriority(ex.Type)) {
q.events[i] = e
}
return
}
}
if len(q.events) >= maxMemberEventQueue {
q.events = q.events[1:] // drop oldest
}
q.events = append(q.events, e)
}
// Drain returns up to max events ready for transmission.
// HopsLeft is decremented on each call; events that reach 0 are removed from
// the queue (they have already propagated enough rounds).
func (q *MembershipEventQueue) Drain(max int) []MemberEvent {
q.mu.Lock()
defer q.mu.Unlock()
if len(q.events) == 0 {
return nil
}
out := make([]MemberEvent, 0, max)
kept := q.events[:0]
for _, e := range q.events {
if len(out) < max {
e.HopsLeft--
out = append(out, e)
if e.HopsLeft > 0 {
kept = append(kept, e)
}
// HopsLeft reached 0: event has propagated enough, drop from queue.
} else {
kept = append(kept, e)
}
}
q.events = kept
return out
}
// NodeEventQueue is the global SWIM event queue for the node side.
// Events are added on suspect/dead detection and drained into outgoing heartbeats.
var NodeEventQueue = &MembershipEventQueue{}
const ( const (
ProtocolPublish = "/opencloud/record/publish/1.0" ProtocolPublish = "/opencloud/record/publish/1.0"
ProtocolGet = "/opencloud/record/get/1.0" ProtocolGet = "/opencloud/record/get/1.0"
ProtocolDelete = "/opencloud/record/delete/1.0"
// ProtocolIndirectProbe is opened by a node toward a live indexer to ask it
// to actively probe a suspected indexer on the node's behalf (SWIM indirect ping).
// It is the only inter-indexer protocol — indexers do not maintain persistent
// connections to each other; this stream is one-shot and short-lived.
ProtocolIndirectProbe = "/opencloud/indexer/probe/1.0"
) )
// IndirectProbeRequest is sent by a node over ProtocolIndirectProbe.
// The receiving indexer must attempt to reach Target and report back.
type IndirectProbeRequest struct {
Target pp.AddrInfo `json:"target"`
}
// IndirectProbeResponse is the reply from the probing indexer.
type IndirectProbeResponse struct {
Reachable bool `json:"reachable"`
LatencyMs int64 `json:"latency_ms,omitempty"`
}
const ProtocolHeartbeat = "/opencloud/heartbeat/1.0" const ProtocolHeartbeat = "/opencloud/heartbeat/1.0"
// ProtocolWitnessQuery is opened by a node to ask a peer what it thinks of a given indexer. // ProtocolWitnessQuery is opened by a node to ask a peer what it thinks of a given indexer.

View File

@@ -6,6 +6,7 @@ import (
"time" "time"
"oc-discovery/conf" "oc-discovery/conf"
"oc-discovery/daemons/node/common"
pp "github.com/libp2p/go-libp2p/core/peer" pp "github.com/libp2p/go-libp2p/core/peer"
) )
@@ -13,8 +14,6 @@ import (
// ── defaults ────────────────────────────────────────────────────────────────── // ── defaults ──────────────────────────────────────────────────────────────────
const ( const (
defaultMaxConnPerWindow = 20
defaultConnWindowSecs = 30
defaultMaxHBPerMinute = 5 defaultMaxHBPerMinute = 5
defaultMaxPublishPerMin = 10 defaultMaxPublishPerMin = 10
defaultMaxGetPerMin = 50 defaultMaxGetPerMin = 50
@@ -23,52 +22,6 @@ const (
behaviorWindowDur = 60 * time.Second behaviorWindowDur = 60 * time.Second
) )
func cfgOr(v, def int) int {
if v > 0 {
return v
}
return def
}
// ── ConnectionRateGuard ───────────────────────────────────────────────────────
// ConnectionRateGuard limits the number of NEW incoming connections accepted
// within a sliding time window. It protects public indexers against coordinated
// registration floods (Sybil bursts).
type ConnectionRateGuard struct {
mu sync.Mutex
window []time.Time
maxInWindow int
windowDur time.Duration
}
func newConnectionRateGuard() *ConnectionRateGuard {
cfg := conf.GetConfig()
return &ConnectionRateGuard{
maxInWindow: cfgOr(cfg.MaxConnPerWindow, defaultMaxConnPerWindow),
windowDur: time.Duration(cfgOr(cfg.ConnWindowSecs, defaultConnWindowSecs)) * time.Second,
}
}
// Allow returns true if a new connection may be accepted.
// The internal window is pruned on each call so memory stays bounded.
func (g *ConnectionRateGuard) Allow() bool {
g.mu.Lock()
defer g.mu.Unlock()
now := time.Now()
cutoff := now.Add(-g.windowDur)
i := 0
for i < len(g.window) && g.window[i].Before(cutoff) {
i++
}
g.window = g.window[i:]
if len(g.window) >= g.maxInWindow {
return false
}
g.window = append(g.window, now)
return true
}
// ── per-node state ──────────────────────────────────────────────────────────── // ── per-node state ────────────────────────────────────────────────────────────
type nodeBehavior struct { type nodeBehavior struct {
@@ -130,9 +83,9 @@ func newNodeBehaviorTracker() *NodeBehaviorTracker {
cfg := conf.GetConfig() cfg := conf.GetConfig()
return &NodeBehaviorTracker{ return &NodeBehaviorTracker{
nodes: make(map[pp.ID]*nodeBehavior), nodes: make(map[pp.ID]*nodeBehavior),
maxHB: cfgOr(cfg.MaxHBPerMinute, defaultMaxHBPerMinute), maxHB: common.CfgOr(cfg.MaxHBPerMinute, defaultMaxHBPerMinute),
maxPub: cfgOr(cfg.MaxPublishPerMinute, defaultMaxPublishPerMin), maxPub: common.CfgOr(cfg.MaxPublishPerMinute, defaultMaxPublishPerMin),
maxGet: cfgOr(cfg.MaxGetPerMinute, defaultMaxGetPerMin), maxGet: common.CfgOr(cfg.MaxGetPerMinute, defaultMaxGetPerMin),
} }
} }

View File

@@ -21,11 +21,26 @@ import (
lpp "github.com/libp2p/go-libp2p/core/peer" lpp "github.com/libp2p/go-libp2p/core/peer"
) )
// DefaultTTLSeconds is the default TTL for peer records when the publisher
// does not declare a custom TTL. Exported so the node package can reference it.
const DefaultTTLSeconds = 120
// maxTTLSeconds caps how far in the future a publisher can set their ExpiryDate.
const maxTTLSeconds = 86400 // 24h
// tombstoneTTL is how long a signed delete record stays alive in the DHT —
// long enough to propagate everywhere, short enough not to linger forever.
const tombstoneTTL = 10 * time.Minute
type PeerRecordPayload struct { type PeerRecordPayload struct {
Name string `json:"name"` Name string `json:"name"`
DID string `json:"did"` DID string `json:"did"`
PubKey []byte `json:"pub_key"` PubKey []byte `json:"pub_key"`
ExpiryDate time.Time `json:"expiry_date"` ExpiryDate time.Time `json:"expiry_date"`
// TTLSeconds is the publisher's declared lifetime for this record in seconds.
// 0 means "use the default (120 s)". Included in the signed payload so it
// cannot be altered by an intermediary.
TTLSeconds int `json:"ttl_seconds,omitempty"`
} }
type PeerRecord struct { type PeerRecord struct {
@@ -35,6 +50,7 @@ type PeerRecord struct {
StreamAddress string `json:"stream_address"` StreamAddress string `json:"stream_address"`
NATSAddress string `json:"nats_address"` NATSAddress string `json:"nats_address"`
WalletAddress string `json:"wallet_address"` WalletAddress string `json:"wallet_address"`
Location *pp.PeerLocation `json:"location,omitempty"`
Signature []byte `json:"signature"` Signature []byte `json:"signature"`
} }
@@ -84,6 +100,7 @@ func (pr *PeerRecord) ExtractPeer(ourkey string, key string, pubKey crypto.PubKe
StreamAddress: pr.StreamAddress, StreamAddress: pr.StreamAddress,
NATSAddress: pr.NATSAddress, NATSAddress: pr.NATSAddress,
WalletAddress: pr.WalletAddress, WalletAddress: pr.WalletAddress,
Location: pr.Location,
} }
if time.Now().UTC().After(pr.ExpiryDate) { if time.Now().UTC().After(pr.ExpiryDate) {
return pp.SELF == p.Relation, nil, errors.New("peer " + key + " is offline") return pp.SELF == p.Relation, nil, errors.New("peer " + key + " is offline")
@@ -91,6 +108,42 @@ func (pr *PeerRecord) ExtractPeer(ourkey string, key string, pubKey crypto.PubKe
return pp.SELF == p.Relation, p, nil return pp.SELF == p.Relation, p, nil
} }
// TombstonePayload is the signed body of a delete request.
// Only the owner's private key can produce a valid signature over this payload.
type TombstonePayload struct {
DID string `json:"did"`
PeerID string `json:"peer_id"`
DeletedAt time.Time `json:"deleted_at"`
}
// TombstoneRecord is stored in the DHT at /node/{DID} to signal that a peer
// has voluntarily left the network. The Tombstone bool field acts as a
// discriminator so validators can distinguish it from a live PeerRecord.
type TombstoneRecord struct {
TombstonePayload
PubKey []byte `json:"pub_key"`
Tombstone bool `json:"tombstone"`
Signature []byte `json:"signature"`
}
func (ts *TombstoneRecord) Verify() (crypto.PubKey, error) {
pubKey, err := crypto.UnmarshalPublicKey(ts.PubKey)
if err != nil {
return nil, err
}
payload, _ := json.Marshal(ts.TombstonePayload)
if ok, _ := pubKey.Verify(payload, ts.Signature); !ok {
return nil, errors.New("invalid tombstone signature")
}
return pubKey, nil
}
// isTombstone returns true if data is a valid, well-formed TombstoneRecord.
func isTombstone(data []byte) bool {
var ts TombstoneRecord
return json.Unmarshal(data, &ts) == nil && ts.Tombstone
}
type GetValue struct { type GetValue struct {
Key string `json:"key"` Key string `json:"key"`
PeerID string `json:"peer_id,omitempty"` PeerID string `json:"peer_id,omitempty"`
@@ -147,9 +200,9 @@ func (ix *IndexerService) isPeerKnown(pid lpp.ID) bool {
return false return false
} }
ctx2, cancel2 := context.WithTimeout(context.Background(), 3*time.Second) ctx2, cancel2 := context.WithTimeout(context.Background(), 3*time.Second)
_, err = ix.DHT.GetValue(ctx2, ix.genKey(string(did))) val, err := ix.DHT.GetValue(ctx2, ix.genKey(string(did)))
cancel2() cancel2()
return err == nil return err == nil && !isTombstone(val)
} }
func (ix *IndexerService) initNodeHandler() { func (ix *IndexerService) initNodeHandler() {
@@ -188,6 +241,18 @@ func (ix *IndexerService) initNodeHandler() {
logger.Warn().Err(err).Str("did", rec.DID).Msg("indexer: heartbeat record signature invalid") logger.Warn().Err(err).Str("did", rec.DID).Msg("indexer: heartbeat record signature invalid")
return return
} }
// Don't republish if a tombstone was recently stored for this DID:
// the peer explicitly left and we must not re-animate their record.
ix.deletedDIDsMu.Lock()
if t, ok := ix.deletedDIDs[rec.DID]; ok {
if time.Since(t) < tombstoneTTL {
ix.deletedDIDsMu.Unlock()
return
}
// tombstoneTTL elapsed — peer is allowed to re-register.
delete(ix.deletedDIDs, rec.DID)
}
ix.deletedDIDsMu.Unlock()
// Keep StreamRecord.Record in sync so BuildHeartbeatResponse always // Keep StreamRecord.Record in sync so BuildHeartbeatResponse always
// sees a populated PeerRecord (Name, DID, etc.) regardless of whether // sees a populated PeerRecord (Name, DID, etc.) regardless of whether
// handleNodePublish ran before or after the heartbeat stream was opened. // handleNodePublish ran before or after the heartbeat stream was opened.
@@ -220,6 +285,8 @@ func (ix *IndexerService) initNodeHandler() {
ix.Host.SetStreamHandler(common.ProtocolHeartbeat, ix.HandleHeartbeat) ix.Host.SetStreamHandler(common.ProtocolHeartbeat, ix.HandleHeartbeat)
ix.Host.SetStreamHandler(common.ProtocolPublish, ix.handleNodePublish) ix.Host.SetStreamHandler(common.ProtocolPublish, ix.handleNodePublish)
ix.Host.SetStreamHandler(common.ProtocolGet, ix.handleNodeGet) ix.Host.SetStreamHandler(common.ProtocolGet, ix.handleNodeGet)
ix.Host.SetStreamHandler(common.ProtocolDelete, ix.handleNodeDelete)
ix.Host.SetStreamHandler(common.ProtocolIndirectProbe, ix.handleIndirectProbe)
ix.Host.SetStreamHandler(common.ProtocolIndexerCandidates, ix.handleCandidateRequest) ix.Host.SetStreamHandler(common.ProtocolIndexerCandidates, ix.handleCandidateRequest)
ix.initSearchHandlers() ix.initSearchHandlers()
} }
@@ -383,12 +450,12 @@ func (ix *IndexerService) handleNodeGet(s network.Stream) {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
c, err := ix.DHT.GetValue(ctx, ix.genKey(key)) c, err := ix.DHT.GetValue(ctx, ix.genKey(key))
cancel() cancel()
if err == nil { if err == nil && !isTombstone(c) {
var rec PeerRecord var rec PeerRecord
if json.Unmarshal(c, &rec) == nil { if json.Unmarshal(c, &rec) == nil {
resp.Records[rec.PeerID] = rec resp.Records[rec.PeerID] = rec
} }
} else { } else if err != nil {
logger.Err(err).Msg("Failed to fetch PeerRecord from DHT " + key) logger.Err(err).Msg("Failed to fetch PeerRecord from DHT " + key)
} }
} }
@@ -399,3 +466,121 @@ func (ix *IndexerService) handleNodeGet(s network.Stream) {
} }
} }
// handleNodeDelete processes a signed delete (tombstone) request from a peer.
// It verifies that the request is:
// - marked as a tombstone
// - recent (within 5 minutes, preventing replay attacks)
// - sent by the actual peer whose record is being deleted (PeerID == remotePeer)
// - signed by the matching private key
//
// On success it stores the tombstone in the DHT, evicts the peer from the local
// stream records, and marks the DID in deletedDIDs so AfterHeartbeat cannot
// accidentally republish the record during the tombstoneTTL window.
func (ix *IndexerService) handleNodeDelete(s network.Stream) {
defer s.Close()
logger := oclib.GetLogger()
remotePeer := s.Conn().RemotePeer()
s.SetDeadline(time.Now().Add(10 * time.Second))
var ts TombstoneRecord
if err := json.NewDecoder(s).Decode(&ts); err != nil || !ts.Tombstone {
s.Reset()
return
}
if ts.PeerID == "" || ts.DID == "" {
s.Reset()
return
}
if time.Since(ts.DeletedAt) > 5*time.Minute {
logger.Warn().Str("peer", remotePeer.String()).Msg("[delete] stale tombstone rejected")
s.Reset()
return
}
if ts.PeerID != remotePeer.String() {
logger.Warn().Str("peer", remotePeer.String()).Msg("[delete] tombstone PeerID mismatch")
s.Reset()
return
}
if _, err := ts.Verify(); err != nil {
logger.Warn().Err(err).Str("peer", remotePeer.String()).Msg("[delete] invalid tombstone signature")
s.Reset()
return
}
// Mark DID as deleted in-memory before writing to DHT so AfterHeartbeat
// cannot win a race and republish the live record on top of the tombstone.
ix.deletedDIDsMu.Lock()
ix.deletedDIDs[ts.DID] = ts.DeletedAt
ix.deletedDIDsMu.Unlock()
data, _ := json.Marshal(ts)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
if err := ix.DHT.PutValue(ctx, ix.genKey(ts.DID), data); err != nil {
logger.Warn().Err(err).Str("did", ts.DID).Msg("[delete] DHT write tombstone failed")
}
cancel()
// Invalidate the /pid/ secondary index so isPeerKnown returns false quickly.
ctx2, cancel2 := context.WithTimeout(context.Background(), 10*time.Second)
if err := ix.DHT.PutValue(ctx2, ix.genPIDKey(ts.PeerID), []byte("")); err != nil {
logger.Warn().Err(err).Str("pid", ts.PeerID).Msg("[delete] DHT clear pid failed")
}
cancel2()
// Evict from active stream records.
if pid, err := lpp.Decode(ts.PeerID); err == nil {
ix.StreamMU.Lock()
delete(ix.StreamRecords[common.ProtocolHeartbeat], pid)
ix.StreamMU.Unlock()
}
logger.Info().Str("did", ts.DID).Str("peer", ts.PeerID).Msg("[delete] tombstone stored, peer evicted")
}
// handleIndirectProbe is the SWIM inter-indexer probe handler.
// A node opens this stream toward a live indexer to ask: "can you reach peer X?"
// The indexer attempts a ProtocolBandwidthProbe to X and reports back.
// This is the only protocol that indexers use to communicate with each other;
// no persistent inter-indexer connections are maintained.
func (ix *IndexerService) handleIndirectProbe(s network.Stream) {
defer s.Close()
s.SetDeadline(time.Now().Add(10 * time.Second))
var req common.IndirectProbeRequest
if err := json.NewDecoder(s).Decode(&req); err != nil {
s.Reset()
return
}
respond := func(reachable bool, latencyMs int64) {
json.NewEncoder(s).Encode(common.IndirectProbeResponse{
Reachable: reachable,
LatencyMs: latencyMs,
})
}
// Connect to target if not already connected.
ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second)
defer cancel()
if ix.Host.Network().Connectedness(req.Target.ID) != network.Connected {
if err := ix.Host.Connect(ctx, req.Target); err != nil {
respond(false, 0)
return
}
}
// Open a bandwidth probe stream — already registered on all nodes/indexers.
start := time.Now()
ps, err := ix.Host.NewStream(ctx, req.Target.ID, common.ProtocolBandwidthProbe)
if err != nil {
respond(false, 0)
return
}
defer ps.Reset()
ps.SetDeadline(time.Now().Add(3 * time.Second))
ps.Write([]byte("ping"))
buf := make([]byte, 4)
_, err = ps.Read(buf)
latency := time.Since(start).Milliseconds()
respond(err == nil, latency)
}

View File

@@ -9,6 +9,7 @@ import (
"oc-discovery/daemons/node/common" "oc-discovery/daemons/node/common"
"strings" "strings"
"sync" "sync"
"sync/atomic"
"time" "time"
oclib "cloud.o-forge.io/core/oc-lib" oclib "cloud.o-forge.io/core/oc-lib"
@@ -66,7 +67,18 @@ type IndexerService struct {
// identity consistency, signature failures). // identity consistency, signature failures).
behavior *NodeBehaviorTracker behavior *NodeBehaviorTracker
// connGuard limits new-connection bursts to protect public indexers. // connGuard limits new-connection bursts to protect public indexers.
connGuard *ConnectionRateGuard // deletedDIDs tracks recently tombstoned DIDs to prevent AfterHeartbeat
// from republishing records that were explicitly deleted by the peer.
// Entries are cleared automatically after tombstoneTTL.
deletedDIDs map[string]time.Time
deletedDIDsMu sync.RWMutex
// SWIM incarnation: incremented when a connecting node signals suspicion via
// SuspectedIncarnation. The new value is broadcast back so nodes can clear
// their suspect state (refutation mechanism).
incarnation atomic.Uint64
// eventQueue holds SWIM membership events to be piggybacked on responses
// (infection-style dissemination toward connected nodes).
eventQueue *common.MembershipEventQueue
} }
// NewIndexerService creates an IndexerService. // NewIndexerService creates an IndexerService.
@@ -81,7 +93,8 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
referencedNodes: map[pp.ID]PeerRecord{}, referencedNodes: map[pp.ID]PeerRecord{},
pendingSearches: map[string]chan []common.SearchHit{}, pendingSearches: map[string]chan []common.SearchHit{},
behavior: newNodeBehaviorTracker(), behavior: newNodeBehaviorTracker(),
connGuard: newConnectionRateGuard(), deletedDIDs: make(map[string]time.Time),
eventQueue: &common.MembershipEventQueue{},
} }
if ps == nil { if ps == nil {
ps, err = pubsub.NewGossipSub(context.Background(), ix.Host) ps, err = pubsub.NewGossipSub(context.Background(), ix.Host)
@@ -96,6 +109,21 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
common.ConnectToIndexers(h, conf.GetConfig().MinIndexer, conf.GetConfig().MaxIndexer*2) common.ConnectToIndexers(h, conf.GetConfig().MinIndexer, conf.GetConfig().MaxIndexer*2)
logger.Info().Msg("subscribe to decentralized search flow as strict indexer...") logger.Info().Msg("subscribe to decentralized search flow as strict indexer...")
go ix.SubscribeToSearch(ix.PS, nil) go ix.SubscribeToSearch(ix.PS, nil)
ix.AllowInbound = func(remotePeer pp.ID, isNew bool) error {
/*if ix.behavior.IsBanned(remotePeer) {
return errors.New("peer is banned")
}*/
if isNew {
// DB blacklist check: blocks reconnection after EvictPeer + blacklist.
/*if !ix.isPeerKnown(remotePeer) {
return errors.New("peer is blacklisted or unknown")
}*/
if !ix.ConnGuard.Allow() {
return errors.New("connection rate limit exceeded, retry later")
}
}
return nil
}
} }
ix.LongLivedStreamRecordedService.AfterDelete = func(pid pp.ID, name, did string) { ix.LongLivedStreamRecordedService.AfterDelete = func(pid pp.ID, name, did string) {
@@ -106,16 +134,7 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
// AllowInbound: fired once per stream open, before any heartbeat is decoded. // AllowInbound: fired once per stream open, before any heartbeat is decoded.
// 1. Reject peers that are currently banned (behavioral strikes). // 1. Reject peers that are currently banned (behavioral strikes).
// 2. For genuinely new connections, apply the burst guard. // 2. For genuinely new connections, check the DB blacklist and apply the burst guard.
ix.AllowInbound = func(remotePeer pp.ID, isNew bool) error {
if ix.behavior.IsBanned(remotePeer) {
return errors.New("peer is banned")
}
if isNew && !ix.connGuard.Allow() {
return errors.New("connection rate limit exceeded, retry later")
}
return nil
}
// ValidateHeartbeat: fired on every heartbeat tick for an established stream. // ValidateHeartbeat: fired on every heartbeat tick for an established stream.
// Checks heartbeat cadence — rejects if the node is sending too fast. // Checks heartbeat cadence — rejects if the node is sending too fast.
@@ -162,7 +181,11 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
// Build and send a HeartbeatResponse after each received node heartbeat. // Build and send a HeartbeatResponse after each received node heartbeat.
// Raw metrics only — no pre-cooked score. Node computes the score itself. // Raw metrics only — no pre-cooked score. Node computes the score itself.
ix.BuildHeartbeatResponse = func(remotePeer pp.ID, need int, challenges []string, challengeDID string, referent bool, rawRecord json.RawMessage) *common.HeartbeatResponse { ix.BuildHeartbeatResponse = func(remotePeer pp.ID, hb *common.Heartbeat) *common.HeartbeatResponse {
logger := oclib.GetLogger()
need, challenges, challengeDID, referent, rawRecord :=
hb.Need, hb.Challenges, hb.ChallengeDID, hb.Referent, hb.Record
ix.StreamMU.RLock() ix.StreamMU.RLock()
peerCount := len(ix.StreamRecords[common.ProtocolHeartbeat]) peerCount := len(ix.StreamRecords[common.ProtocolHeartbeat])
// Collect lastSeen per active peer for challenge responses. // Collect lastSeen per active peer for challenge responses.
@@ -197,6 +220,31 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
// Update referent designation: node marks its best-scored indexer with Referent=true. // Update referent designation: node marks its best-scored indexer with Referent=true.
ix.updateReferent(remotePeer, remotePeerRecord, referent) ix.updateReferent(remotePeer, remotePeerRecord, referent)
// SWIM refutation: if the node signals our current incarnation as suspected,
// increment it and broadcast an alive event so other nodes can clear suspicion.
inc := ix.incarnation.Load()
if hb.SuspectedIncarnation != nil && *hb.SuspectedIncarnation == inc {
inc = ix.incarnation.Add(1)
logger.Info().
Str("suspected_by", remotePeer.String()).
Uint64("new_incarnation", inc).
Msg("[swim] refuting suspicion — incarnation incremented")
ix.eventQueue.Add(common.MemberEvent{
Type: common.MemberAlive,
PeerID: ix.Host.ID().String(),
Incarnation: inc,
HopsLeft: common.InitialEventHops,
})
}
// Relay incoming SWIM events from the node into our event queue so they
// propagate to other connected nodes (infection-style forwarding).
for _, ev := range hb.MembershipEvents {
if ev.HopsLeft > 0 {
ix.eventQueue.Add(ev)
}
}
maxN := ix.MaxNodesConn() maxN := ix.MaxNodesConn()
fillRate := 0.0 fillRate := 0.0
if maxN > 0 { if maxN > 0 {
@@ -356,6 +404,10 @@ func NewIndexerService(h host.Host, ps *pubsub.PubSub, maxNode int) *IndexerServ
}() }()
} }
// Attach SWIM incarnation and piggybacked membership events.
resp.Incarnation = ix.incarnation.Load()
resp.MembershipEvents = ix.eventQueue.Drain(5)
return resp return resp
} }
@@ -489,6 +541,23 @@ func (ix *IndexerService) startDHTProvide(fillRateFn func() float64) {
}() }()
} }
// EvictPeer immediately closes the heartbeat stream of a peer and removes it
// from the active stream records. Used when a peer is auto-blacklisted.
func (ix *IndexerService) EvictPeer(peerID string) {
pid, err := pp.Decode(peerID)
if err != nil {
return
}
ix.StreamMU.Lock()
defer ix.StreamMU.Unlock()
if rec, ok := ix.StreamRecords[common.ProtocolHeartbeat][pid]; ok {
if rec.HeartbeatStream != nil && rec.HeartbeatStream.Stream != nil {
rec.HeartbeatStream.Stream.Reset()
}
delete(ix.StreamRecords[common.ProtocolHeartbeat], pid)
}
}
func (ix *IndexerService) Close() { func (ix *IndexerService) Close() {
if ix.dhtProvideCancel != nil { if ix.dhtProvideCancel != nil {
ix.dhtProvideCancel() ix.dhtProvideCancel()

View File

@@ -19,6 +19,21 @@ func (v DefaultValidator) Select(key string, values [][]byte) (int, error) {
type PeerRecordValidator struct{} type PeerRecordValidator struct{}
func (v PeerRecordValidator) Validate(key string, value []byte) error { func (v PeerRecordValidator) Validate(key string, value []byte) error {
// Accept valid tombstones — deletion must be storable so it can propagate
// and win over stale live records on other DHT nodes via Select().
var ts TombstoneRecord
if err := json.Unmarshal(value, &ts); err == nil && ts.Tombstone {
if ts.PeerID == "" || ts.DID == "" {
return errors.New("tombstone: missing fields")
}
if time.Since(ts.DeletedAt) > tombstoneTTL {
return errors.New("tombstone: expired")
}
if _, err := ts.Verify(); err != nil {
return errors.New("tombstone: " + err.Error())
}
return nil
}
var rec PeerRecord var rec PeerRecord
if err := json.Unmarshal(value, &rec); err != nil { if err := json.Unmarshal(value, &rec); err != nil {
@@ -35,6 +50,12 @@ func (v PeerRecordValidator) Validate(key string, value []byte) error {
return errors.New("record expired") return errors.New("record expired")
} }
// TTL cap: publisher cannot set an expiry further than maxTTLSeconds in
// the future. Prevents abuse (e.g. records designed to linger for years).
if rec.ExpiryDate.After(time.Now().UTC().Add(maxTTLSeconds * time.Second)) {
return errors.New("TTL exceeds maximum allowed")
}
// Signature verification // Signature verification
if _, err := rec.Verify(); err != nil { if _, err := rec.Verify(); err != nil {
return errors.New("invalid signature") return errors.New("invalid signature")
@@ -44,6 +65,14 @@ func (v PeerRecordValidator) Validate(key string, value []byte) error {
} }
func (v PeerRecordValidator) Select(key string, values [][]byte) (int, error) { func (v PeerRecordValidator) Select(key string, values [][]byte) (int, error) {
// Tombstone always wins: a signed delete supersedes any live record,
// even if the live record has a later ExpiryDate.
for i, val := range values {
var ts TombstoneRecord
if err := json.Unmarshal(val, &ts); err == nil && ts.Tombstone {
return i, nil
}
}
var newest time.Time var newest time.Time
index := 0 index := 0

View File

@@ -0,0 +1,99 @@
// Package location resolves the geographic position of this node via IP
// geolocation and applies a privacy-preserving random offset proportional
// to the chosen granularity level before publishing the result.
package location
import (
"encoding/json"
"fmt"
"math/rand"
"net/http"
"time"
peer "cloud.o-forge.io/core/oc-lib/models/peer"
)
// fuzzRadius returns the maximum random offset (in degrees) for each axis
// given a granularity level.
//
// 0 → no location
// 1 → continent ±15° lat / ±20° lng
// 2 → country ±3° lat / ±4° lng (default)
// 3 → region ±0.5° lat / ±0.7° lng
// 4 → city ±0.05° lat / ±0.07° lng
func fuzzRadius(granularity int) (latR, lngR float64) {
switch granularity {
case 1:
return 15.0, 20.0
case 2:
return 3.0, 4.0
case 3:
return 0.5, 0.7
case 4:
return 0.05, 0.07
default:
return 3.0, 4.0
}
}
// clamp keeps a value inside [min, max].
func clamp(v, min, max float64) float64 {
if v < min {
return min
}
if v > max {
return max
}
return v
}
// ipAPIResponse is the subset of fields returned by ip-api.com/json.
type ipAPIResponse struct {
Status string `json:"status"`
Lat float64 `json:"lat"`
Lon float64 `json:"lon"`
Country string `json:"country"`
Region string `json:"regionName"`
City string `json:"city"`
}
// Geolocate resolves the current public IP location via ip-api.com (free,
// no key required for non-commercial use), then fuzzes the result according
// to granularity.
//
// Returns nil if granularity == 0 (opt-out) or if the lookup fails.
func Geolocate(granularity int) *peer.PeerLocation {
if granularity == 0 {
return nil
}
client := &http.Client{Timeout: 5 * time.Second}
resp, err := client.Get("http://ip-api.com/json?fields=status,lat,lon,country,regionName,city")
if err != nil {
return nil
}
defer resp.Body.Close()
var result ipAPIResponse
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil || result.Status != "success" {
return nil
}
latR, lngR := fuzzRadius(granularity)
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
fuzzedLat := result.Lat + (rng.Float64()*2-1)*latR
fuzzedLng := result.Lon + (rng.Float64()*2-1)*lngR
fuzzedLat = clamp(fuzzedLat, -85.0, 85.0)
fuzzedLng = clamp(fuzzedLng, -180.0, 180.0)
fmt.Printf("[location] granularity=%d raw=(%.4f,%.4f) fuzzed=(%.4f,%.4f)\n",
granularity, result.Lat, result.Lon, fuzzedLat, fuzzedLng)
return &peer.PeerLocation{
Latitude: fuzzedLat,
Longitude: fuzzedLng,
Granularity: granularity,
}
}

View File

@@ -10,6 +10,7 @@ import (
oclib "cloud.o-forge.io/core/oc-lib" oclib "cloud.o-forge.io/core/oc-lib"
"cloud.o-forge.io/core/oc-lib/config" "cloud.o-forge.io/core/oc-lib/config"
pp_model "cloud.o-forge.io/core/oc-lib/models/peer"
"cloud.o-forge.io/core/oc-lib/tools" "cloud.o-forge.io/core/oc-lib/tools"
pp "github.com/libp2p/go-libp2p/core/peer" pp "github.com/libp2p/go-libp2p/core/peer"
"github.com/libp2p/go-libp2p/core/protocol" "github.com/libp2p/go-libp2p/core/protocol"
@@ -25,8 +26,10 @@ type executionConsidersPayload struct {
func ListenNATS(n *Node) { func ListenNATS(n *Node) {
tools.NewNATSCaller().ListenNats(map[tools.NATSMethod]func(tools.NATSResponse){ tools.NewNATSCaller().ListenNats(map[tools.NATSMethod]func(tools.NATSResponse){
tools.PEER_BEHAVIOR_EVENT: func(resp tools.NATSResponse) { //nolint:typecheck
handlePeerBehaviorEvent(n, resp)
},
tools.PROPALGATION_EVENT: func(resp tools.NATSResponse) { tools.PROPALGATION_EVENT: func(resp tools.NATSResponse) {
fmt.Println("PROPALGATION")
if resp.FromApp == config.GetAppName() { if resp.FromApp == config.GetAppName() {
return return
} }
@@ -41,7 +44,6 @@ func ListenNATS(n *Node) {
dtt := tools.DataType(propalgation.DataType) dtt := tools.DataType(propalgation.DataType)
dt = &dtt dt = &dtt
} }
fmt.Println("PROPALGATION ACT", propalgation.DataType, propalgation.Action, propalgation.Action == tools.PB_CREATE, err)
if err == nil { if err == nil {
switch propalgation.Action { switch propalgation.Action {
case tools.PB_ADMIRALTY_CONFIG, tools.PB_MINIO_CONFIG: case tools.PB_ADMIRALTY_CONFIG, tools.PB_MINIO_CONFIG:
@@ -116,6 +118,7 @@ func ListenNATS(n *Node) {
} }
n.StreamService.Mu.Unlock() n.StreamService.Mu.Unlock()
} else { } else {
fmt.Println("REACH PLANNER")
n.StreamService.PublishCommon(nil, resp.User, resp.Groups, fmt.Sprintf("%v", m["peer_id"]), stream.ProtocolSendPlanner, b) n.StreamService.PublishCommon(nil, resp.User, resp.Groups, fmt.Sprintf("%v", m["peer_id"]), stream.ProtocolSendPlanner, b)
} }
} }
@@ -158,6 +161,8 @@ func ListenNATS(n *Node) {
} else { } else {
m := map[string]interface{}{} m := map[string]interface{}{}
if err := json.Unmarshal(propalgation.Payload, &m); err == nil { if err := json.Unmarshal(propalgation.Payload, &m); err == nil {
fmt.Println("PB_SEARCH CATA", m)
n.PubSubService.SearchPublishEvent( n.PubSubService.SearchPublishEvent(
context.Background(), context.Background(),
dt, dt,
@@ -172,3 +177,66 @@ func ListenNATS(n *Node) {
}, },
}) })
} }
// handlePeerBehaviorEvent applies a PeerBehaviorReport received from a trusted
// service (oc-scheduler, oc-datacenter, …). It:
// 1. Loads the target peer from the local DB.
// 2. Deducts the trust penalty and appends a BehaviorWarning.
// 3. Auto-blacklists and evicts the peer stream when TrustScore ≤ threshold.
//
// oc-discovery does NOT re-emit a PROPALGATION_EVENT: propagation is strictly
// inbound (oc-catalog → oc-discovery). The blacklist takes effect locally at
// the next isPeerKnown() call, and immediately via EvictPeer().
func handlePeerBehaviorEvent(n *Node, resp tools.NATSResponse) {
var report tools.PeerBehaviorReport
if err := json.Unmarshal(resp.Payload, &report); err != nil {
fmt.Println("handlePeerBehaviorEvent: unmarshal error:", err)
return
}
if report.TargetPeerID == "" {
return
}
access := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil)
data := access.LoadOne(report.TargetPeerID)
if data.Data == nil {
fmt.Println("handlePeerBehaviorEvent: peer not found:", report.TargetPeerID)
return
}
p := data.ToPeer()
if p == nil {
return
}
// Self-protection: never penalise ourselves.
if self, err := oclib.GetMySelf(); err == nil && self != nil && self.GetID() == p.GetID() {
return
}
shouldBlacklist := p.ApplyBehaviorReport(report)
if shouldBlacklist && p.Relation != pp_model.BLACKLIST {
p.Relation = pp_model.BLACKLIST
fmt.Printf("handlePeerBehaviorEvent: auto-blacklisting peer %s — reason: %s\n",
p.PeerID, p.BlacklistReason)
// Immediately evict any active stream so the peer can no longer heartbeat.
if n.IndexerService != nil {
n.IndexerService.EvictPeer(p.PeerID)
}
}
// Persist updated trust score + relation locally.
if updated := access.UpdateOne(p.Serialize(p), p.GetID()); updated.Err != "" {
fmt.Println("handlePeerBehaviorEvent: could not update peer:", updated.Err)
return
}
// Notify oc-peer (and any other local NATS consumer) of the updated peer record
// via CREATE_RESOURCE so they can synchronise their own state.
if b, err := json.Marshal(p.Serialize(p)); err == nil {
tools.NewNATSCaller().SetNATSPub(tools.CREATE_RESOURCE, tools.NATSResponse{
FromApp: "oc-discovery",
Datatype: tools.PEER,
Method: int(tools.CREATE_RESOURCE),
Payload: b,
})
}
}

View File

@@ -8,6 +8,7 @@ import (
"oc-discovery/conf" "oc-discovery/conf"
"oc-discovery/daemons/node/common" "oc-discovery/daemons/node/common"
"oc-discovery/daemons/node/indexer" "oc-discovery/daemons/node/indexer"
"oc-discovery/daemons/node/location"
"oc-discovery/daemons/node/pubsub" "oc-discovery/daemons/node/pubsub"
"oc-discovery/daemons/node/stream" "oc-discovery/daemons/node/stream"
"sync" "sync"
@@ -108,7 +109,11 @@ func InitNode(isNode bool, isIndexer bool) (*Node, error) {
return nil return nil
} }
fresh := *node.peerRecord fresh := *node.peerRecord
fresh.PeerRecordPayload.ExpiryDate = time.Now().UTC().Add(2 * time.Minute) ttl := time.Duration(fresh.TTLSeconds) * time.Second
if ttl <= 0 {
ttl = indexer.DefaultTTLSeconds * time.Second
}
fresh.PeerRecordPayload.ExpiryDate = time.Now().UTC().Add(ttl)
payload, _ := json.Marshal(fresh.PeerRecordPayload) payload, _ := json.Marshal(fresh.PeerRecordPayload)
fresh.Signature, err = priv.Sign(payload) fresh.Signature, err = priv.Sign(payload)
if err != nil { if err != nil {
@@ -170,10 +175,24 @@ func InitNode(isNode bool, isIndexer bool) (*Node, error) {
if err != nil || evt.From == node.PeerID.String() { if err != nil || evt.From == node.PeerID.String() {
return return
} }
fmt.Println("PUBSUB SendResponse bef peerrece")
if p, err := node.GetPeerRecord(ctx, evt.From); err == nil && len(p) > 0 && m["search"] != nil { if p, err := node.GetPeerRecord(ctx, evt.From); err == nil && len(p) > 0 && m["search"] != nil {
fmt.Println("PUBSUB SendResponse af peerrece", m)
node.StreamService.SendResponse(p[0], &evt, fmt.Sprintf("%v", m["search"])) node.StreamService.SendResponse(p[0], &evt, fmt.Sprintf("%v", m["search"]))
} }
} }
node.AllowInbound = func(remotePeer pp.ID, isNew bool) error {
if isNew {
// DB blacklist check: blocks reconnection after EvictPeer + blacklist.
if !node.isPeerKnown(remotePeer) {
return errors.New("peer is blacklisted or unknown")
}
if !node.ConnGuard.Allow() {
return errors.New("connection rate limit exceeded, retry later")
}
}
return nil
}
logger.Info().Msg("subscribe to decentralized search flow...") logger.Info().Msg("subscribe to decentralized search flow...")
go node.SubscribeToSearch(node.PS, &f) go node.SubscribeToSearch(node.PS, &f)
logger.Info().Msg("connect to NATS") logger.Info().Msg("connect to NATS")
@@ -187,6 +206,39 @@ func InitNode(isNode bool, isIndexer bool) (*Node, error) {
return node, nil return node, nil
} }
// isPeerKnown is the stream-level gate: returns true if pid is allowed.
// Check order (fast → slow):
// 1. In-memory stream records — currently heartbeating to this indexer.
// 2. Local DB by peer_id — known peer, blacklist enforced here.
// 3. DHT /pid/{peerID} → /node/{DID} — registered on any indexer.
//
// ProtocolHeartbeat and ProtocolPublish handlers do NOT call this — they are
// the streams through which a node first makes itself known.
func (d *Node) isPeerKnown(pid pp.ID) bool {
// 1. Fast path: active heartbeat session.
d.StreamMU.RLock()
_, active := d.StreamRecords[common.ProtocolHeartbeat][pid]
d.StreamMU.RUnlock()
if active {
return true
}
// 2. Local DB: known peer (handles blacklist).
access := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil)
results := access.Search(&dbs.Filters{
And: map[string][]dbs.Filter{
"peer_id": {{Operator: dbs.EQUAL.String(), Value: pid.String()}},
},
}, pid.String(), false)
for _, item := range results.Data {
p, ok := item.(*peer.Peer)
if !ok || p.PeerID != pid.String() {
continue
}
return p.Relation != peer.BLACKLIST
}
return true
}
func (d *Node) Close() { func (d *Node) Close() {
if d.isIndexer && d.IndexerService != nil { if d.isIndexer && d.IndexerService != nil {
d.IndexerService.Close() d.IndexerService.Close()
@@ -211,11 +263,16 @@ func (d *Node) publishPeerRecord(
continue continue
} }
stream := common.Indexers.Streams.GetPerID(common.ProtocolPublish, ad.Info.ID) stream := common.Indexers.Streams.GetPerID(common.ProtocolPublish, ad.Info.ID)
ttl := time.Duration(rec.TTLSeconds) * time.Second
if ttl <= 0 {
ttl = indexer.DefaultTTLSeconds * time.Second
}
base := indexer.PeerRecordPayload{ base := indexer.PeerRecordPayload{
Name: rec.Name, Name: rec.Name,
DID: rec.DID, DID: rec.DID,
PubKey: rec.PubKey, PubKey: rec.PubKey,
ExpiryDate: time.Now().UTC().Add(2 * time.Minute), TTLSeconds: rec.TTLSeconds,
ExpiryDate: time.Now().UTC().Add(ttl),
} }
payload, _ := json.Marshal(base) payload, _ := json.Marshal(base)
rec.PeerRecordPayload = base rec.PeerRecordPayload = base
@@ -377,13 +434,12 @@ func (d *Node) claimInfo(
} }
now := time.Now().UTC() now := time.Now().UTC()
expiry := now.Add(150 * time.Second)
pRec := indexer.PeerRecordPayload{ pRec := indexer.PeerRecordPayload{
Name: name, Name: name,
DID: did, // REAL PEER ID DID: did, // REAL PEER ID
PubKey: pubBytes, PubKey: pubBytes,
ExpiryDate: expiry, TTLSeconds: indexer.DefaultTTLSeconds,
ExpiryDate: now.Add(indexer.DefaultTTLSeconds * time.Second),
} }
d.PeerID = d.Host.ID() d.PeerID = d.Host.ID()
payload, _ := json.Marshal(pRec) payload, _ := json.Marshal(pRec)
@@ -400,6 +456,7 @@ func (d *Node) claimInfo(
rec.StreamAddress = "/ip4/" + conf.GetConfig().Hostname + "/tcp/" + fmt.Sprintf("%v", conf.GetConfig().NodeEndpointPort) + "/p2p/" + rec.PeerID rec.StreamAddress = "/ip4/" + conf.GetConfig().Hostname + "/tcp/" + fmt.Sprintf("%v", conf.GetConfig().NodeEndpointPort) + "/p2p/" + rec.PeerID
rec.NATSAddress = oclib.GetConfig().NATSUrl rec.NATSAddress = oclib.GetConfig().NATSUrl
rec.WalletAddress = "my-wallet" rec.WalletAddress = "my-wallet"
rec.Location = location.Geolocate(conf.GetConfig().LocationGranularity)
if err := d.publishPeerRecord(rec); err != nil { if err := d.publishPeerRecord(rec); err != nil {
return nil, err return nil, err
@@ -424,6 +481,55 @@ func (d *Node) claimInfo(
} }
} }
// DeleteRecord broadcasts a signed tombstone to all connected indexers, signalling
// that this node is voluntarily leaving the network.
// Each indexer verifies the signature, stores the tombstone in the DHT (replacing
// the live record), and evicts the peer from its active pool.
// After a successful call, d.peerRecord is set to nil.
func (d *Node) DeleteRecord() error {
if d.peerRecord == nil {
return errors.New("no peer record to delete")
}
priv, err := tools.LoadKeyFromFilePrivate()
if err != nil {
return err
}
pubBytes, err := crypto.MarshalPublicKey(priv.GetPublic())
if err != nil {
return err
}
tp := indexer.TombstonePayload{
DID: d.peerRecord.DID,
PeerID: d.PeerID.String(),
DeletedAt: time.Now().UTC(),
}
payloadBytes, _ := json.Marshal(tp)
sig, err := priv.Sign(payloadBytes)
if err != nil {
return err
}
ts := &indexer.TombstoneRecord{
TombstonePayload: tp,
PubKey: pubBytes,
Tombstone: true,
Signature: sig,
}
data, _ := json.Marshal(ts)
for _, ad := range common.Indexers.GetAddrs() {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
s, err := d.Host.NewStream(ctx, ad.Info.ID, common.ProtocolDelete)
cancel()
if err != nil {
continue
}
s.SetDeadline(time.Now().Add(5 * time.Second))
s.Write(data)
s.Close()
}
d.peerRecord = nil
return nil
}
/* /*
TODO: TODO:
- Le booking est un flow neuf décentralisé : - Le booking est un flow neuf décentralisé :

View File

@@ -4,6 +4,7 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"errors" "errors"
"fmt"
"oc-discovery/conf" "oc-discovery/conf"
"oc-discovery/daemons/node/common" "oc-discovery/daemons/node/common"
"oc-discovery/daemons/node/stream" "oc-discovery/daemons/node/stream"
@@ -46,6 +47,7 @@ func (ps *PubSubService) SearchPublishEvent(
// The returned composite key is used as User in the GossipSub event so that // The returned composite key is used as User in the GossipSub event so that
// remote peers echo it back unchanged, allowing IsActive to validate results. // remote peers echo it back unchanged, allowing IsActive to validate results.
searchKey := ps.StreamService.ResourceSearches.Register(user, cancel, idleTimeout) searchKey := ps.StreamService.ResourceSearches.Register(user, cancel, idleTimeout)
fmt.Println("PUBLISH ON PUBSUB", common.TopicPubSubSearch, searchKey)
return ps.publishEvent(searchCtx, dt, tools.PB_SEARCH, common.TopicPubSubSearch, searchKey, b) return ps.publishEvent(searchCtx, dt, tools.PB_SEARCH, common.TopicPubSubSearch, searchKey, b)
default: default:
return errors.New("no type of research found") return errors.New("no type of research found")

View File

@@ -35,6 +35,7 @@ func (ps *StreamService) handleEvent(protocol string, evt *common.Event) error {
} }
}*/ }*/
if protocol == ProtocolSendPlanner { if protocol == ProtocolSendPlanner {
fmt.Println("sendPlanner", evt)
if err := ps.sendPlanner(evt); err != nil { if err := ps.sendPlanner(evt); err != nil {
return err return err
} }

View File

@@ -34,7 +34,7 @@ func (ps *StreamService) PublishesCommon(dt *tools.DataType, user string, groups
} }
func (ps *StreamService) PublishCommon(dt *tools.DataType, user string, groups []string, toPeerID string, proto protocol.ID, resource []byte) (*common.Stream, error) { func (ps *StreamService) PublishCommon(dt *tools.DataType, user string, groups []string, toPeerID string, proto protocol.ID, resource []byte) (*common.Stream, error) {
fmt.Println("PublishCommon") fmt.Println("PublishCommon", toPeerID)
if toPeerID == ps.Key.String() { if toPeerID == ps.Key.String() {
fmt.Println("Can't send to ourself !") fmt.Println("Can't send to ourself !")
return nil, errors.New("Can't send to ourself !") return nil, errors.New("Can't send to ourself !")
@@ -127,6 +127,7 @@ func (s *StreamService) write(
} }
// should create a very temp stream // should create a very temp stream
if s.Streams, err = common.TempStream(s.Host, *peerID, proto, did, s.Streams, pts, &s.Mu); err != nil { if s.Streams, err = common.TempStream(s.Host, *peerID, proto, did, s.Streams, pts, &s.Mu); err != nil {
fmt.Println("TempStream", err)
return nil, errors.New("no stream available for protocol " + fmt.Sprintf("%v", proto) + " from PID " + peerID.ID.String()) return nil, errors.New("no stream available for protocol " + fmt.Sprintf("%v", proto) + " from PID " + peerID.ID.String())
} }

2
go.mod
View File

@@ -3,7 +3,7 @@ module oc-discovery
go 1.25.0 go 1.25.0
require ( require (
cloud.o-forge.io/core/oc-lib v0.0.0-20260318143822-5976795d4406 cloud.o-forge.io/core/oc-lib v0.0.0-20260331181901-f3b5a54545ee
github.com/ipfs/go-cid v0.6.0 github.com/ipfs/go-cid v0.6.0
github.com/libp2p/go-libp2p v0.47.0 github.com/libp2p/go-libp2p v0.47.0
github.com/libp2p/go-libp2p-record v0.3.1 github.com/libp2p/go-libp2p-record v0.3.1

6
go.sum
View File

@@ -8,6 +8,12 @@ cloud.o-forge.io/core/oc-lib v0.0.0-20260312141150-a335c905b3a2 h1:DuB6SDThFVJVQ
cloud.o-forge.io/core/oc-lib v0.0.0-20260312141150-a335c905b3a2/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA= cloud.o-forge.io/core/oc-lib v0.0.0-20260312141150-a335c905b3a2/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260318143822-5976795d4406 h1:FN1EtRWn228JprAbnY5K863Fzj+SzMqQtKRtwvECbLw= cloud.o-forge.io/core/oc-lib v0.0.0-20260318143822-5976795d4406 h1:FN1EtRWn228JprAbnY5K863Fzj+SzMqQtKRtwvECbLw=
cloud.o-forge.io/core/oc-lib v0.0.0-20260318143822-5976795d4406/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA= cloud.o-forge.io/core/oc-lib v0.0.0-20260318143822-5976795d4406/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260325092016-4580200e8057 h1:pR+lZzcCWZ0kke2r2xXa7OpdbLpPW3gZSWZ8gGHh274=
cloud.o-forge.io/core/oc-lib v0.0.0-20260325092016-4580200e8057/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260331144112-c0722483b86c h1:wTIridvhud8zwMsMkwxgrQ+j+6UAo2IHDr3N80AA6zc=
cloud.o-forge.io/core/oc-lib v0.0.0-20260331144112-c0722483b86c/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
cloud.o-forge.io/core/oc-lib v0.0.0-20260331181901-f3b5a54545ee h1:iJ1kgMbBOBIHwS4jHOVB5zFqOd7J9ZlweQBuchnmvT0=
cloud.o-forge.io/core/oc-lib v0.0.0-20260331181901-f3b5a54545ee/go.mod h1:+ENuvBfZdESSvecoqGY/wSvRlT3vinEolxKgwbOhUpA=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0=
github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM=

866
logs.txt

File diff suppressed because one or more lines are too long

View File

@@ -35,6 +35,7 @@ func main() {
conf.GetConfig().MinIndexer = o.GetIntDefault("MIN_INDEXER", 1) conf.GetConfig().MinIndexer = o.GetIntDefault("MIN_INDEXER", 1)
conf.GetConfig().MaxIndexer = o.GetIntDefault("MAX_INDEXER", 5) conf.GetConfig().MaxIndexer = o.GetIntDefault("MAX_INDEXER", 5)
conf.GetConfig().LocationGranularity = o.GetIntDefault("LOCATION_GRANULARITY", 2)
ctx, stop := signal.NotifyContext( ctx, stop := signal.NotifyContext(
context.Background(), context.Background(),