Collector implementation
This commit is contained in:
393
cmd/collector/store.go
Normal file
393
cmd/collector/store.go
Normal file
@@ -0,0 +1,393 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"container/heap"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
pb "git.ipng.ch/ipng/nginx-logtail/proto/logtailpb"
|
||||
)
|
||||
|
||||
const (
|
||||
liveMapCap = 100_000 // hard cap on live map entries
|
||||
fineRingSize = 60 // 60 × 1-min buckets → 1 hour
|
||||
coarseRingSize = 288 // 288 × 5-min buckets → 24 hours
|
||||
fineTopK = 50_000 // entries kept per fine snapshot
|
||||
coarseTopK = 5_000 // entries kept per coarse snapshot
|
||||
coarseEvery = 5 // merge every N fine ticks into one coarse bucket
|
||||
)
|
||||
|
||||
// Tuple4 is the four-dimensional key.
|
||||
type Tuple4 struct {
|
||||
Website string
|
||||
Prefix string
|
||||
URI string
|
||||
Status string
|
||||
}
|
||||
|
||||
// Entry is a labelled count used in snapshots and query results.
|
||||
type Entry struct {
|
||||
Label string
|
||||
Count int64
|
||||
}
|
||||
|
||||
// snapshot is one sorted (desc) slice of top-K entries for a time bucket.
|
||||
type snapshot struct {
|
||||
Timestamp time.Time
|
||||
Entries []Entry // sorted descending by Count
|
||||
}
|
||||
|
||||
// Store holds the live map and both ring buffers.
|
||||
type Store struct {
|
||||
source string
|
||||
|
||||
// live map — written only by Run goroutine, no locking needed for writes
|
||||
live map[Tuple4]int64
|
||||
liveLen int // tracked separately to avoid map len() call in hot path
|
||||
|
||||
// ring buffers — protected by mu for reads (Run goroutine writes)
|
||||
mu sync.RWMutex
|
||||
fineRing [fineRingSize]snapshot
|
||||
fineHead int // index of next write slot
|
||||
fineFilled int // how many slots are populated
|
||||
|
||||
coarseRing [coarseRingSize]snapshot
|
||||
coarseHead int
|
||||
coarseFilled int
|
||||
fineTick int // counts fine ticks mod coarseEvery
|
||||
|
||||
// fan-out to StreamSnapshots subscribers
|
||||
subMu sync.Mutex
|
||||
subs map[chan snapshot]struct{}
|
||||
}
|
||||
|
||||
func NewStore(source string) *Store {
|
||||
return &Store{
|
||||
source: source,
|
||||
live: make(map[Tuple4]int64, liveMapCap),
|
||||
subs: make(map[chan snapshot]struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// Ingest records one log record into the live map.
|
||||
// Must only be called from the Run goroutine.
|
||||
func (s *Store) ingest(r LogRecord) {
|
||||
key := Tuple4{r.Website, r.ClientPrefix, r.URI, r.Status}
|
||||
if _, exists := s.live[key]; !exists {
|
||||
if s.liveLen >= liveMapCap {
|
||||
return // drop new keys when at cap
|
||||
}
|
||||
s.liveLen++
|
||||
}
|
||||
s.live[key]++
|
||||
}
|
||||
|
||||
// rotate snapshots the live map into the fine ring, and every coarseEvery ticks
|
||||
// also merges into the coarse ring. Called once per minute by Run.
|
||||
func (s *Store) rotate(now time.Time) {
|
||||
fine := topK(s.live, fineTopK, now)
|
||||
|
||||
s.mu.Lock()
|
||||
s.fineRing[s.fineHead] = fine
|
||||
s.fineHead = (s.fineHead + 1) % fineRingSize
|
||||
if s.fineFilled < fineRingSize {
|
||||
s.fineFilled++
|
||||
}
|
||||
|
||||
s.fineTick++
|
||||
if s.fineTick >= coarseEvery {
|
||||
s.fineTick = 0
|
||||
coarse := s.mergeFineBuckets(coarseTopK, now)
|
||||
s.coarseRing[s.coarseHead] = coarse
|
||||
s.coarseHead = (s.coarseHead + 1) % coarseRingSize
|
||||
if s.coarseFilled < coarseRingSize {
|
||||
s.coarseFilled++
|
||||
}
|
||||
}
|
||||
s.mu.Unlock()
|
||||
|
||||
// reset live map
|
||||
s.live = make(map[Tuple4]int64, liveMapCap)
|
||||
s.liveLen = 0
|
||||
|
||||
// notify subscribers — must be outside mu to avoid deadlock
|
||||
s.broadcast(fine)
|
||||
}
|
||||
|
||||
// mergeFineBuckets merges the last coarseEvery fine snapshots into one.
|
||||
// Called with mu held.
|
||||
func (s *Store) mergeFineBuckets(k int, now time.Time) snapshot {
|
||||
merged := make(map[string]int64)
|
||||
count := coarseEvery
|
||||
if count > s.fineFilled {
|
||||
count = s.fineFilled
|
||||
}
|
||||
for i := 0; i < count; i++ {
|
||||
idx := (s.fineHead - 1 - i + fineRingSize) % fineRingSize
|
||||
for _, e := range s.fineRing[idx].Entries {
|
||||
merged[e.Label] += e.Count
|
||||
}
|
||||
}
|
||||
entries := topKFromMap(merged, k)
|
||||
return snapshot{Timestamp: now, Entries: entries}
|
||||
}
|
||||
|
||||
// QueryTopN answers a TopN request from the ring buffers.
|
||||
func (s *Store) QueryTopN(filter *pb.Filter, groupBy pb.GroupBy, n int, window pb.Window) []Entry {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
|
||||
buckets, count := s.bucketsForWindow(window)
|
||||
|
||||
// Accumulate grouped counts
|
||||
grouped := make(map[string]int64)
|
||||
for i := 0; i < count; i++ {
|
||||
idx := (buckets.head - 1 - i + buckets.size) % buckets.size
|
||||
snap := buckets.ring[idx]
|
||||
for _, e := range snap.Entries {
|
||||
t := labelTuple(e.Label)
|
||||
if !matchesFilter(t, filter) {
|
||||
continue
|
||||
}
|
||||
grouped[dimensionLabel(t, groupBy)] += e.Count
|
||||
}
|
||||
}
|
||||
|
||||
return topKFromMap(grouped, n)
|
||||
}
|
||||
|
||||
// QueryTrend answers a Trend request from the ring buffers.
|
||||
func (s *Store) QueryTrend(filter *pb.Filter, window pb.Window) []trendPoint {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
|
||||
buckets, count := s.bucketsForWindow(window)
|
||||
points := make([]trendPoint, count)
|
||||
for i := 0; i < count; i++ {
|
||||
// oldest first
|
||||
idx := (buckets.head - count + i + buckets.size) % buckets.size
|
||||
snap := buckets.ring[idx]
|
||||
var total int64
|
||||
for _, e := range snap.Entries {
|
||||
if matchesFilter(labelTuple(e.Label), filter) {
|
||||
total += e.Count
|
||||
}
|
||||
}
|
||||
points[i] = trendPoint{Timestamp: snap.Timestamp, Count: total}
|
||||
}
|
||||
return points
|
||||
}
|
||||
|
||||
type trendPoint struct {
|
||||
Timestamp time.Time
|
||||
Count int64
|
||||
}
|
||||
|
||||
// ringView is a helper to treat fine and coarse rings uniformly.
|
||||
type ringView struct {
|
||||
ring []snapshot
|
||||
head int
|
||||
size int
|
||||
}
|
||||
|
||||
func (s *Store) bucketsForWindow(window pb.Window) (ringView, int) {
|
||||
switch window {
|
||||
case pb.Window_W1M:
|
||||
return s.fineView(), min(1, s.fineFilled)
|
||||
case pb.Window_W5M:
|
||||
return s.fineView(), min(5, s.fineFilled)
|
||||
case pb.Window_W15M:
|
||||
return s.fineView(), min(15, s.fineFilled)
|
||||
case pb.Window_W60M:
|
||||
return s.fineView(), min(60, s.fineFilled)
|
||||
case pb.Window_W6H:
|
||||
return s.coarseView(), min(72, s.coarseFilled) // 72 × 5-min = 6h
|
||||
case pb.Window_W24H:
|
||||
return s.coarseView(), min(288, s.coarseFilled)
|
||||
default:
|
||||
return s.fineView(), min(5, s.fineFilled)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Store) fineView() ringView {
|
||||
ring := make([]snapshot, fineRingSize)
|
||||
copy(ring, s.fineRing[:])
|
||||
return ringView{ring: ring, head: s.fineHead, size: fineRingSize}
|
||||
}
|
||||
|
||||
func (s *Store) coarseView() ringView {
|
||||
ring := make([]snapshot, coarseRingSize)
|
||||
copy(ring, s.coarseRing[:])
|
||||
return ringView{ring: ring, head: s.coarseHead, size: coarseRingSize}
|
||||
}
|
||||
|
||||
// Subscribe returns a channel that receives a copy of each fine snapshot
|
||||
// after rotation. Buffer of 4 so a slow subscriber doesn't block rotation.
|
||||
func (s *Store) Subscribe() chan snapshot {
|
||||
ch := make(chan snapshot, 4)
|
||||
s.subMu.Lock()
|
||||
s.subs[ch] = struct{}{}
|
||||
s.subMu.Unlock()
|
||||
return ch
|
||||
}
|
||||
|
||||
// Unsubscribe removes and closes the subscriber channel.
|
||||
func (s *Store) Unsubscribe(ch chan snapshot) {
|
||||
s.subMu.Lock()
|
||||
delete(s.subs, ch)
|
||||
s.subMu.Unlock()
|
||||
close(ch)
|
||||
}
|
||||
|
||||
func (s *Store) broadcast(snap snapshot) {
|
||||
s.subMu.Lock()
|
||||
defer s.subMu.Unlock()
|
||||
for ch := range s.subs {
|
||||
select {
|
||||
case ch <- snap:
|
||||
default:
|
||||
// subscriber is slow; drop rather than block rotation
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Run is the single goroutine that reads from ch, ingests records, and rotates
|
||||
// the ring buffer every minute. Exits when ch is closed.
|
||||
func (s *Store) Run(ch <-chan LogRecord) {
|
||||
ticker := time.NewTicker(time.Minute)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case r, ok := <-ch:
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
s.ingest(r)
|
||||
case t := <-ticker.C:
|
||||
s.rotate(t)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- heap-based top-K helpers ---
|
||||
|
||||
type entryHeap []Entry
|
||||
|
||||
func (h entryHeap) Len() int { return len(h) }
|
||||
func (h entryHeap) Less(i, j int) bool { return h[i].Count < h[j].Count } // min-heap
|
||||
func (h entryHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
|
||||
func (h *entryHeap) Push(x interface{}) { *h = append(*h, x.(Entry)) }
|
||||
func (h *entryHeap) Pop() interface{} {
|
||||
old := *h
|
||||
n := len(old)
|
||||
x := old[n-1]
|
||||
*h = old[:n-1]
|
||||
return x
|
||||
}
|
||||
|
||||
// topK extracts the top-k entries from a Tuple4 map, labelled as "w|p|u|s".
|
||||
func topK(m map[Tuple4]int64, k int, ts time.Time) snapshot {
|
||||
// Build a string-keyed map for topKFromMap
|
||||
flat := make(map[string]int64, len(m))
|
||||
for t, c := range m {
|
||||
flat[encodeTuple(t)] = c
|
||||
}
|
||||
return snapshot{Timestamp: ts, Entries: topKFromMap(flat, k)}
|
||||
}
|
||||
|
||||
// topKFromMap selects the top-k entries from a string→count map, sorted desc.
|
||||
func topKFromMap(m map[string]int64, k int) []Entry {
|
||||
if k <= 0 {
|
||||
return nil
|
||||
}
|
||||
h := make(entryHeap, 0, k+1)
|
||||
for label, count := range m {
|
||||
heap.Push(&h, Entry{Label: label, Count: count})
|
||||
if h.Len() > k {
|
||||
heap.Pop(&h) // evict smallest
|
||||
}
|
||||
}
|
||||
result := make([]Entry, h.Len())
|
||||
for i := len(result) - 1; i >= 0; i-- {
|
||||
result[i] = heap.Pop(&h).(Entry)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// --- label encoding: "website\x00prefix\x00uri\x00status" ---
|
||||
|
||||
func encodeTuple(t Tuple4) string {
|
||||
return t.Website + "\x00" + t.Prefix + "\x00" + t.URI + "\x00" + t.Status
|
||||
}
|
||||
|
||||
func labelTuple(label string) Tuple4 {
|
||||
parts := splitN(label, '\x00', 4)
|
||||
if len(parts) != 4 {
|
||||
return Tuple4{}
|
||||
}
|
||||
return Tuple4{parts[0], parts[1], parts[2], parts[3]}
|
||||
}
|
||||
|
||||
func splitN(s string, sep byte, n int) []string {
|
||||
result := make([]string, 0, n)
|
||||
for len(result) < n-1 {
|
||||
i := indexOf(s, sep)
|
||||
if i < 0 {
|
||||
break
|
||||
}
|
||||
result = append(result, s[:i])
|
||||
s = s[i+1:]
|
||||
}
|
||||
return append(result, s)
|
||||
}
|
||||
|
||||
func indexOf(s string, b byte) int {
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] == b {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return -1
|
||||
}
|
||||
|
||||
func matchesFilter(t Tuple4, f *pb.Filter) bool {
|
||||
if f == nil {
|
||||
return true
|
||||
}
|
||||
if f.Website != nil && t.Website != f.GetWebsite() {
|
||||
return false
|
||||
}
|
||||
if f.ClientPrefix != nil && t.Prefix != f.GetClientPrefix() {
|
||||
return false
|
||||
}
|
||||
if f.HttpRequestUri != nil && t.URI != f.GetHttpRequestUri() {
|
||||
return false
|
||||
}
|
||||
if f.HttpResponse != nil && t.Status != fmt.Sprint(f.GetHttpResponse()) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func dimensionLabel(t Tuple4, g pb.GroupBy) string {
|
||||
switch g {
|
||||
case pb.GroupBy_WEBSITE:
|
||||
return t.Website
|
||||
case pb.GroupBy_CLIENT_PREFIX:
|
||||
return t.Prefix
|
||||
case pb.GroupBy_REQUEST_URI:
|
||||
return t.URI
|
||||
case pb.GroupBy_HTTP_RESPONSE:
|
||||
return t.Status
|
||||
default:
|
||||
return t.Website
|
||||
}
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
Reference in New Issue
Block a user