package indexer import ( "context" "encoding/base64" "encoding/json" "errors" "io" "math/rand" "oc-discovery/daemons/node/common" "strings" "time" oclib "cloud.o-forge.io/core/oc-lib" "cloud.o-forge.io/core/oc-lib/dbs" pp "cloud.o-forge.io/core/oc-lib/models/peer" "cloud.o-forge.io/core/oc-lib/models/utils" "cloud.o-forge.io/core/oc-lib/tools" "github.com/libp2p/go-libp2p/core/crypto" "github.com/libp2p/go-libp2p/core/network" lpp "github.com/libp2p/go-libp2p/core/peer" ) // DefaultTTLSeconds is the default TTL for peer records when the publisher // does not declare a custom TTL. Exported so the node package can reference it. const DefaultTTLSeconds = 120 // maxTTLSeconds caps how far in the future a publisher can set their ExpiryDate. const maxTTLSeconds = 86400 // 24h // tombstoneTTL is how long a signed delete record stays alive in the DHT — // long enough to propagate everywhere, short enough not to linger forever. const tombstoneTTL = 10 * time.Minute type PeerRecordPayload struct { Name string `json:"name"` DID string `json:"did"` PubKey []byte `json:"pub_key"` ExpiryDate time.Time `json:"expiry_date"` // TTLSeconds is the publisher's declared lifetime for this record in seconds. // 0 means "use the default (120 s)". Included in the signed payload so it // cannot be altered by an intermediary. TTLSeconds int `json:"ttl_seconds,omitempty"` } type PeerRecord struct { PeerRecordPayload PeerID string `json:"peer_id"` APIUrl string `json:"api_url"` StreamAddress string `json:"stream_address"` NATSAddress string `json:"nats_address"` WalletAddress string `json:"wallet_address"` Location *pp.PeerLocation `json:"location,omitempty"` Signature []byte `json:"signature"` } func (p *PeerRecord) Sign() error { priv, err := tools.LoadKeyFromFilePrivate() if err != nil { return err } payload, _ := json.Marshal(p.PeerRecordPayload) b, err := common.Sign(priv, payload) p.Signature = b return err } func (p *PeerRecord) Verify() (crypto.PubKey, error) { pubKey, err := crypto.UnmarshalPublicKey(p.PubKey) // retrieve pub key in message if err != nil { return pubKey, err } payload, _ := json.Marshal(p.PeerRecordPayload) if ok, _ := pubKey.Verify(payload, p.Signature); !ok { // verify minimal message was sign per pubKey return pubKey, errors.New("invalid signature") } return pubKey, nil } func (pr *PeerRecord) ExtractPeer(ourkey string, key string, pubKey crypto.PubKey) (bool, *pp.Peer, error) { pubBytes, err := crypto.MarshalPublicKey(pubKey) if err != nil { return false, nil, err } rel := pp.NONE if ourkey == key { // at this point is PeerID is same as our... we are... thats our peer INFO rel = pp.SELF } p := &pp.Peer{ AbstractObject: utils.AbstractObject{ UUID: pr.DID, Name: pr.Name, }, Relation: rel, // VERIFY.... it crush nothing PeerID: pr.PeerID, PublicKey: base64.StdEncoding.EncodeToString(pubBytes), APIUrl: pr.APIUrl, StreamAddress: pr.StreamAddress, NATSAddress: pr.NATSAddress, WalletAddress: pr.WalletAddress, Location: pr.Location, } if time.Now().UTC().After(pr.ExpiryDate) { return pp.SELF == p.Relation, nil, errors.New("peer " + key + " is offline") } return pp.SELF == p.Relation, p, nil } // TombstonePayload is the signed body of a delete request. // Only the owner's private key can produce a valid signature over this payload. type TombstonePayload struct { DID string `json:"did"` PeerID string `json:"peer_id"` DeletedAt time.Time `json:"deleted_at"` } // TombstoneRecord is stored in the DHT at /node/{DID} to signal that a peer // has voluntarily left the network. The Tombstone bool field acts as a // discriminator so validators can distinguish it from a live PeerRecord. type TombstoneRecord struct { TombstonePayload PubKey []byte `json:"pub_key"` Tombstone bool `json:"tombstone"` Signature []byte `json:"signature"` } func (ts *TombstoneRecord) Verify() (crypto.PubKey, error) { pubKey, err := crypto.UnmarshalPublicKey(ts.PubKey) if err != nil { return nil, err } payload, _ := json.Marshal(ts.TombstonePayload) if ok, _ := pubKey.Verify(payload, ts.Signature); !ok { return nil, errors.New("invalid tombstone signature") } return pubKey, nil } // isTombstone returns true if data is a valid, well-formed TombstoneRecord. func isTombstone(data []byte) bool { var ts TombstoneRecord return json.Unmarshal(data, &ts) == nil && ts.Tombstone } type GetValue struct { Key string `json:"key"` PeerID string `json:"peer_id,omitempty"` } type GetResponse struct { Found bool `json:"found"` Records map[string]PeerRecord `json:"records,omitempty"` } func (ix *IndexerService) genKey(did string) string { return "/node/" + did } func (ix *IndexerService) genPIDKey(peerID string) string { return "/pid/" + peerID } // isPeerKnown is the stream-level gate: returns true if pid is allowed. // Check order (fast → slow): // 1. In-memory stream records — currently heartbeating to this indexer. // 2. Local DB by peer_id — known peer, blacklist enforced here. // 3. DHT /pid/{peerID} → /node/{DID} — registered on any indexer. // // ProtocolHeartbeat and ProtocolPublish handlers do NOT call this — they are // the streams through which a node first makes itself known. func (ix *IndexerService) isPeerKnown(pid lpp.ID) bool { // 1. Fast path: active heartbeat session. ix.StreamMU.RLock() _, active := ix.StreamRecords[common.ProtocolHeartbeat][pid] ix.StreamMU.RUnlock() if active { return true } // 2. Local DB: known peer (handles blacklist). access := oclib.NewRequestAdmin(oclib.LibDataEnum(oclib.PEER), nil) results := access.Search(&dbs.Filters{ And: map[string][]dbs.Filter{ "peer_id": {{Operator: dbs.EQUAL.String(), Value: pid.String()}}, }, }, pid.String(), false) for _, item := range results.Data { p, ok := item.(*pp.Peer) if !ok || p.PeerID != pid.String() { continue } return p.Relation != pp.BLACKLIST } // 3. DHT lookup by peer_id. ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) did, err := ix.DHT.GetValue(ctx, ix.genPIDKey(pid.String())) cancel() if err != nil || len(did) == 0 { return false } ctx2, cancel2 := context.WithTimeout(context.Background(), 3*time.Second) val, err := ix.DHT.GetValue(ctx2, ix.genKey(string(did))) cancel2() return err == nil && !isTombstone(val) } func (ix *IndexerService) initNodeHandler() { logger := oclib.GetLogger() logger.Info().Msg("Init Node Handler") // Each heartbeat from a node carries a freshly signed PeerRecord. // Republish it to the DHT so the record never expires as long as the node // is alive — no separate publish stream needed from the node side. ix.AfterHeartbeat = func(hb *common.Heartbeat) { // Priority 1: use the fresh signed PeerRecord embedded in the heartbeat. // Each heartbeat tick, the node re-signs with ExpiryDate = now+2min, so // this record is always fresh. Fetching from DHT would give a stale expiry. var rec PeerRecord if len(hb.Record) > 0 { if err := json.Unmarshal(hb.Record, &rec); err != nil { logger.Warn().Err(err).Msg("indexer: heartbeat embedded record unmarshal failed") return } } else { // Fallback: node didn't embed a record yet (first heartbeat before claimInfo). // Fetch from DHT using the DID resolved by HandleHeartbeat. ctx2, cancel2 := context.WithTimeout(context.Background(), 10*time.Second) res, err := ix.DHT.GetValue(ctx2, ix.genKey(hb.DID)) cancel2() if err != nil { logger.Warn().Err(err).Str("did", hb.DID).Msg("indexer: DHT fetch for refresh failed") return } if err := json.Unmarshal(res, &rec); err != nil { logger.Warn().Err(err).Str("did", hb.DID).Msg("indexer: heartbeat record unmarshal failed") return } } if _, err := rec.Verify(); err != nil { logger.Warn().Err(err).Str("did", rec.DID).Msg("indexer: heartbeat record signature invalid") return } // Don't republish if a tombstone was recently stored for this DID: // the peer explicitly left and we must not re-animate their record. ix.deletedDIDsMu.Lock() if t, ok := ix.deletedDIDs[rec.DID]; ok { if time.Since(t) < tombstoneTTL { ix.deletedDIDsMu.Unlock() return } // tombstoneTTL elapsed — peer is allowed to re-register. delete(ix.deletedDIDs, rec.DID) } ix.deletedDIDsMu.Unlock() // Keep StreamRecord.Record in sync so BuildHeartbeatResponse always // sees a populated PeerRecord (Name, DID, etc.) regardless of whether // handleNodePublish ran before or after the heartbeat stream was opened. if pid, err := lpp.Decode(rec.PeerID); err == nil { ix.StreamMU.Lock() if srec, ok := ix.StreamRecords[common.ProtocolHeartbeat][pid]; ok { srec.Record = rec } ix.StreamMU.Unlock() } data, err := json.Marshal(rec) if err != nil { return } ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) if err := ix.DHT.PutValue(ctx, ix.genKey(rec.DID), data); err != nil { logger.Warn().Err(err).Str("did", rec.DID).Msg("indexer: DHT refresh /node/ failed") } cancel() // /pid/ is written unconditionally — the gater queries by PeerID and this // index must stay fresh regardless of whether the /node/ write succeeded. if rec.PeerID != "" { ctx2, cancel2 := context.WithTimeout(context.Background(), 10*time.Second) if err := ix.DHT.PutValue(ctx2, ix.genPIDKey(rec.PeerID), []byte(rec.DID)); err != nil { logger.Warn().Err(err).Str("pid", rec.PeerID).Msg("indexer: DHT refresh /pid/ failed") } cancel2() } } ix.Host.SetStreamHandler(common.ProtocolHeartbeat, ix.HandleHeartbeat) ix.Host.SetStreamHandler(common.ProtocolPublish, ix.handleNodePublish) ix.Host.SetStreamHandler(common.ProtocolGet, ix.handleNodeGet) ix.Host.SetStreamHandler(common.ProtocolDelete, ix.handleNodeDelete) ix.Host.SetStreamHandler(common.ProtocolIndirectProbe, ix.handleIndirectProbe) ix.Host.SetStreamHandler(common.ProtocolIndexerCandidates, ix.handleCandidateRequest) ix.initSearchHandlers() } // handleCandidateRequest responds to a node's consensus candidate request. // Returns a random sample of indexers from the local DHT cache. func (ix *IndexerService) handleCandidateRequest(s network.Stream) { defer s.Close() if !ix.isPeerKnown(s.Conn().RemotePeer()) { logger := oclib.GetLogger() logger.Warn().Str("peer", s.Conn().RemotePeer().String()).Msg("[candidates] unknown peer, rejecting stream") s.Reset() return } s.SetDeadline(time.Now().Add(5 * time.Second)) var req common.IndexerCandidatesRequest if err := json.NewDecoder(s).Decode(&req); err != nil { return } if req.Count <= 0 || req.Count > 10 { req.Count = 3 } ix.dhtCacheMu.RLock() cache := make([]dhtCacheEntry, len(ix.dhtCache)) copy(cache, ix.dhtCache) ix.dhtCacheMu.RUnlock() // Shuffle for randomness: each voter offers a different subset. rand.Shuffle(len(cache), func(i, j int) { cache[i], cache[j] = cache[j], cache[i] }) candidates := make([]lpp.AddrInfo, 0, req.Count) for _, e := range cache { if len(candidates) >= req.Count { break } candidates = append(candidates, e.AI) } json.NewEncoder(s).Encode(common.IndexerCandidatesResponse{Candidates: candidates}) } func (ix *IndexerService) handleNodePublish(s network.Stream) { defer s.Close() logger := oclib.GetLogger() remotePeer := s.Conn().RemotePeer() if err := ix.behavior.RecordPublish(remotePeer); err != nil { logger.Warn().Err(err).Str("peer", remotePeer.String()).Msg("publish refused") s.Reset() return } for { var rec PeerRecord if err := json.NewDecoder(s).Decode(&rec); err != nil { logger.Err(err) if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) || strings.Contains(err.Error(), "reset") || strings.Contains(err.Error(), "closed") || strings.Contains(err.Error(), "too many connections") { return } continue } if _, err := rec.Verify(); err != nil { ix.behavior.RecordBadSignature(remotePeer) logger.Warn().Err(err).Str("peer", remotePeer.String()).Msg("bad signature on publish") return } if err := ix.behavior.CheckIdentity(remotePeer, rec.DID); err != nil { logger.Warn().Err(err).Msg("identity mismatch on publish") s.Reset() return } if rec.PeerID == "" || rec.ExpiryDate.Before(time.Now().UTC()) { logger.Err(errors.New(rec.PeerID + " is expired.")) return } pid, err := lpp.Decode(rec.PeerID) if err != nil { return } ix.StreamMU.Lock() defer ix.StreamMU.Unlock() if ix.StreamRecords[common.ProtocolHeartbeat] == nil { ix.StreamRecords[common.ProtocolHeartbeat] = map[lpp.ID]*common.StreamRecord[PeerRecord]{} } streams := ix.StreamRecords[common.ProtocolHeartbeat] if srec, ok := streams[pid]; ok { srec.DID = rec.DID srec.Record = rec srec.HeartbeatStream.UptimeTracker.LastSeen = time.Now().UTC() } key := ix.genKey(rec.DID) data, err := json.Marshal(rec) if err != nil { logger.Err(err) return } ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) if err := ix.DHT.PutValue(ctx, key, data); err != nil { logger.Err(err) cancel() return } cancel() // Secondary index: /pid/ → DID, so peers can resolve by libp2p PeerID. if rec.PeerID != "" { ctx2, cancel2 := context.WithTimeout(context.Background(), 10*time.Second) if err := ix.DHT.PutValue(ctx2, ix.genPIDKey(rec.PeerID), []byte(rec.DID)); err != nil { logger.Err(err).Str("pid", rec.PeerID).Msg("indexer: failed to write pid index") } cancel2() } return } } func (ix *IndexerService) handleNodeGet(s network.Stream) { defer s.Close() logger := oclib.GetLogger() remotePeer := s.Conn().RemotePeer() if !ix.isPeerKnown(remotePeer) { logger.Warn().Str("peer", remotePeer.String()).Msg("[get] unknown peer, rejecting stream") s.Reset() return } if err := ix.behavior.RecordGet(remotePeer); err != nil { logger.Warn().Err(err).Str("peer", remotePeer.String()).Msg("get refused") s.Reset() return } for { var req GetValue if err := json.NewDecoder(s).Decode(&req); err != nil { if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) || strings.Contains(err.Error(), "reset") || strings.Contains(err.Error(), "closed") || strings.Contains(err.Error(), "too many connections") { return } logger.Err(err) continue } resp := GetResponse{Found: false, Records: map[string]PeerRecord{}} // Resolve DID key: by PeerID (secondary /pid/ index) or direct DID key. var key string if req.PeerID != "" { pidCtx, pidCancel := context.WithTimeout(context.Background(), 5*time.Second) did, err := ix.DHT.GetValue(pidCtx, ix.genPIDKey(req.PeerID)) pidCancel() if err == nil { key = string(did) } } else { key = req.Key } if key != "" { ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) c, err := ix.DHT.GetValue(ctx, ix.genKey(key)) cancel() if err == nil && !isTombstone(c) { var rec PeerRecord if json.Unmarshal(c, &rec) == nil { resp.Records[rec.PeerID] = rec } } else if err != nil { logger.Err(err).Msg("Failed to fetch PeerRecord from DHT " + key) } } resp.Found = len(resp.Records) > 0 _ = json.NewEncoder(s).Encode(resp) break } } // handleNodeDelete processes a signed delete (tombstone) request from a peer. // It verifies that the request is: // - marked as a tombstone // - recent (within 5 minutes, preventing replay attacks) // - sent by the actual peer whose record is being deleted (PeerID == remotePeer) // - signed by the matching private key // // On success it stores the tombstone in the DHT, evicts the peer from the local // stream records, and marks the DID in deletedDIDs so AfterHeartbeat cannot // accidentally republish the record during the tombstoneTTL window. func (ix *IndexerService) handleNodeDelete(s network.Stream) { defer s.Close() logger := oclib.GetLogger() remotePeer := s.Conn().RemotePeer() s.SetDeadline(time.Now().Add(10 * time.Second)) var ts TombstoneRecord if err := json.NewDecoder(s).Decode(&ts); err != nil || !ts.Tombstone { s.Reset() return } if ts.PeerID == "" || ts.DID == "" { s.Reset() return } if time.Since(ts.DeletedAt) > 5*time.Minute { logger.Warn().Str("peer", remotePeer.String()).Msg("[delete] stale tombstone rejected") s.Reset() return } if ts.PeerID != remotePeer.String() { logger.Warn().Str("peer", remotePeer.String()).Msg("[delete] tombstone PeerID mismatch") s.Reset() return } if _, err := ts.Verify(); err != nil { logger.Warn().Err(err).Str("peer", remotePeer.String()).Msg("[delete] invalid tombstone signature") s.Reset() return } // Mark DID as deleted in-memory before writing to DHT so AfterHeartbeat // cannot win a race and republish the live record on top of the tombstone. ix.deletedDIDsMu.Lock() ix.deletedDIDs[ts.DID] = ts.DeletedAt ix.deletedDIDsMu.Unlock() data, _ := json.Marshal(ts) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) if err := ix.DHT.PutValue(ctx, ix.genKey(ts.DID), data); err != nil { logger.Warn().Err(err).Str("did", ts.DID).Msg("[delete] DHT write tombstone failed") } cancel() // Invalidate the /pid/ secondary index so isPeerKnown returns false quickly. ctx2, cancel2 := context.WithTimeout(context.Background(), 10*time.Second) if err := ix.DHT.PutValue(ctx2, ix.genPIDKey(ts.PeerID), []byte("")); err != nil { logger.Warn().Err(err).Str("pid", ts.PeerID).Msg("[delete] DHT clear pid failed") } cancel2() // Evict from active stream records. if pid, err := lpp.Decode(ts.PeerID); err == nil { ix.StreamMU.Lock() delete(ix.StreamRecords[common.ProtocolHeartbeat], pid) ix.StreamMU.Unlock() } logger.Info().Str("did", ts.DID).Str("peer", ts.PeerID).Msg("[delete] tombstone stored, peer evicted") } // handleIndirectProbe is the SWIM inter-indexer probe handler. // A node opens this stream toward a live indexer to ask: "can you reach peer X?" // The indexer attempts a ProtocolBandwidthProbe to X and reports back. // This is the only protocol that indexers use to communicate with each other; // no persistent inter-indexer connections are maintained. func (ix *IndexerService) handleIndirectProbe(s network.Stream) { defer s.Close() s.SetDeadline(time.Now().Add(10 * time.Second)) var req common.IndirectProbeRequest if err := json.NewDecoder(s).Decode(&req); err != nil { s.Reset() return } respond := func(reachable bool, latencyMs int64) { json.NewEncoder(s).Encode(common.IndirectProbeResponse{ Reachable: reachable, LatencyMs: latencyMs, }) } // Connect to target if not already connected. ctx, cancel := context.WithTimeout(context.Background(), 6*time.Second) defer cancel() if ix.Host.Network().Connectedness(req.Target.ID) != network.Connected { if err := ix.Host.Connect(ctx, req.Target); err != nil { respond(false, 0) return } } // Open a bandwidth probe stream — already registered on all nodes/indexers. start := time.Now() ps, err := ix.Host.NewStream(ctx, req.Target.ID, common.ProtocolBandwidthProbe) if err != nil { respond(false, 0) return } defer ps.Reset() ps.SetDeadline(time.Now().Add(3 * time.Second)) ps.Write([]byte("ping")) buf := make([]byte, 4) _, err = ps.Read(buf) latency := time.Since(start).Milliseconds() respond(err == nil, latency) }