Refactor docs; Add 'ipng_source_tag', add udp listener for nginx-ipng-stats plugin
This commit is contained in:
@@ -25,7 +25,5 @@ It's written in Go, and is meant to deploy collectors on any number of webserver
|
||||
aggregation and frontend logic. It's released under [[APACHE](LICENSE)] license. It can be run
|
||||
either as `systemd` units, or in Docker, or any combination of the two.
|
||||
|
||||
See [[User Guide](docs/USERGUIDE.md)] or [[DETAILS](docs/DETAILS.md)] for more information.
|
||||
|
||||
The [[docs/](docs/)] directory contains extensive planning information which shows how Claude
|
||||
Code single-shot implemented the whole system in March 2026.
|
||||
See [[User Guide](docs/user-guide.md)] for operator-facing documentation, or
|
||||
[[Design](docs/design.md)] for the normative requirements and architectural rationale.
|
||||
|
||||
@@ -24,6 +24,7 @@ type sharedFlags struct {
|
||||
uriReNeg string // RE2 regex exclusion against request URI
|
||||
isTor string // "", "1" / "!=0" (TOR only), "0" / "!=1" (non-TOR only)
|
||||
asn string // expression: "12345", "!=65000", ">=1000", etc.
|
||||
sourceTag string // exact ipng_source_tag match
|
||||
}
|
||||
|
||||
// bindShared registers the shared flags on fs and returns a pointer to the
|
||||
@@ -42,6 +43,7 @@ func bindShared(fs *flag.FlagSet) (*sharedFlags, *string) {
|
||||
fs.StringVar(&sf.uriReNeg, "uri-re-neg", "", "filter: RE2 regex exclusion against request URI")
|
||||
fs.StringVar(&sf.isTor, "is-tor", "", "filter: TOR traffic (1 or !=0 = TOR only; 0 or !=1 = non-TOR only)")
|
||||
fs.StringVar(&sf.asn, "asn", "", "filter: ASN expression (12345, !=65000, >=1000, <64512, …)")
|
||||
fs.StringVar(&sf.sourceTag, "source-tag", "", "filter: exact ipng_source_tag match (e.g. direct, cdn, …)")
|
||||
return sf, target
|
||||
}
|
||||
|
||||
@@ -64,7 +66,7 @@ func parseTargets(s string) []string {
|
||||
}
|
||||
|
||||
func buildFilter(sf *sharedFlags) *pb.Filter {
|
||||
if sf.website == "" && sf.prefix == "" && sf.uri == "" && sf.status == "" && sf.websiteRe == "" && sf.uriRe == "" && sf.websiteReNeg == "" && sf.uriReNeg == "" && sf.isTor == "" && sf.asn == "" {
|
||||
if sf.website == "" && sf.prefix == "" && sf.uri == "" && sf.status == "" && sf.websiteRe == "" && sf.uriRe == "" && sf.websiteReNeg == "" && sf.uriReNeg == "" && sf.isTor == "" && sf.asn == "" && sf.sourceTag == "" {
|
||||
return nil
|
||||
}
|
||||
f := &pb.Filter{}
|
||||
@@ -118,6 +120,9 @@ func buildFilter(sf *sharedFlags) *pb.Filter {
|
||||
f.AsnNumber = &n
|
||||
f.AsnOp = op
|
||||
}
|
||||
if sf.sourceTag != "" {
|
||||
f.IpngSourceTag = &sf.sourceTag
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
||||
@@ -152,8 +157,12 @@ func parseGroupBy(s string) pb.GroupBy {
|
||||
return pb.GroupBy_REQUEST_URI
|
||||
case "status":
|
||||
return pb.GroupBy_HTTP_RESPONSE
|
||||
case "asn":
|
||||
return pb.GroupBy_ASN_NUMBER
|
||||
case "source_tag", "source-tag":
|
||||
return pb.GroupBy_SOURCE_TAG
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "--group-by: unknown value %q; valid: website prefix uri status\n", s)
|
||||
fmt.Fprintf(os.Stderr, "--group-by: unknown value %q; valid: website prefix uri status asn source_tag\n", s)
|
||||
os.Exit(1)
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
@@ -22,11 +22,14 @@ Subcommand flags (all subcommands):
|
||||
--status EXPR filter: HTTP status expression (200, !=200, >=400, <500, …)
|
||||
--website-re REGEX filter: RE2 regex against website
|
||||
--uri-re REGEX filter: RE2 regex against request URI
|
||||
--is-tor EXPR filter: TOR (1/!=0 = only, 0/!=1 = none)
|
||||
--asn EXPR filter: ASN expression (12345, !=65000, …)
|
||||
--source-tag STRING filter: exact ipng_source_tag match
|
||||
|
||||
topn flags:
|
||||
--n INT number of entries (default 10)
|
||||
--window STR 1m 5m 15m 60m 6h 24h (default 5m)
|
||||
--group-by STR website prefix uri status (default website)
|
||||
--group-by STR website prefix uri status asn source_tag (default website)
|
||||
|
||||
trend flags:
|
||||
--window STR 1m 5m 15m 60m 6h 24h (default 5m)
|
||||
|
||||
@@ -28,13 +28,17 @@ func main() {
|
||||
v4prefix := flag.Int("v4prefix", envOrInt("COLLECTOR_V4PREFIX", 24), "IPv4 prefix length for client bucketing (env: COLLECTOR_V4PREFIX)")
|
||||
v6prefix := flag.Int("v6prefix", envOrInt("COLLECTOR_V6PREFIX", 48), "IPv6 prefix length for client bucketing (env: COLLECTOR_V6PREFIX)")
|
||||
scanInterval := flag.Duration("scan-interval", envOrDuration("COLLECTOR_SCAN_INTERVAL", 10*time.Second), "how often to rescan glob patterns for new/removed files (env: COLLECTOR_SCAN_INTERVAL)")
|
||||
logtailPort := flag.Int("logtail-port", envOrInt("COLLECTOR_LOGTAIL_PORT", 0), "UDP port to receive nginx ipng_stats_logtail packets, 0 to disable (env: COLLECTOR_LOGTAIL_PORT)")
|
||||
logtailBind := flag.String("logtail-bind", envOr("COLLECTOR_LOGTAIL_BIND", "127.0.0.1"), "UDP bind address for the logtail listener (env: COLLECTOR_LOGTAIL_BIND)")
|
||||
flag.Parse()
|
||||
|
||||
patterns := collectPatterns(*logPaths, *logsFile)
|
||||
if len(patterns) == 0 {
|
||||
log.Fatal("collector: no log paths specified; use --logs or --logs-file")
|
||||
if len(patterns) == 0 && *logtailPort == 0 {
|
||||
log.Fatal("collector: no inputs configured; use --logs, --logs-file, or --logtail-port")
|
||||
}
|
||||
if len(patterns) > 0 {
|
||||
log.Printf("collector: watching %d pattern(s), rescan every %s", len(patterns), *scanInterval)
|
||||
}
|
||||
|
||||
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
||||
defer stop()
|
||||
@@ -57,8 +61,16 @@ func main() {
|
||||
}
|
||||
go store.Run(ch)
|
||||
|
||||
if len(patterns) > 0 {
|
||||
tailer := NewMultiTailer(patterns, *scanInterval, *v4prefix, *v6prefix, ch)
|
||||
go tailer.Run(ctx)
|
||||
}
|
||||
|
||||
if *logtailPort > 0 {
|
||||
udp := NewUDPListener(net.JoinHostPort(*logtailBind, strconv.Itoa(*logtailPort)), *v4prefix, *v6prefix, ch)
|
||||
udp.SetProm(store.prom)
|
||||
go udp.Run(ctx)
|
||||
}
|
||||
|
||||
lis, err := net.Listen("tcp", *listen)
|
||||
if err != nil {
|
||||
|
||||
@@ -18,65 +18,104 @@ type LogRecord struct {
|
||||
Method string
|
||||
BodyBytesSent int64
|
||||
RequestTime float64
|
||||
SourceTag string
|
||||
}
|
||||
|
||||
// ParseLine parses a tab-separated logtail log line:
|
||||
// fileSourceTag is the SourceTag assigned to records read from on-disk log
|
||||
// files, which pre-date the tag concept. Mirrors nginx's fallback label.
|
||||
const fileSourceTag = "direct"
|
||||
|
||||
// ParseLine parses a tab-separated logtail log line from a file:
|
||||
//
|
||||
// $host \t $remote_addr \t $msec \t $request_method \t $request_uri \t $status \t $body_bytes_sent \t $request_time \t $is_tor \t $asn
|
||||
//
|
||||
// The is_tor (field 9) and asn (field 10) fields are optional for backward
|
||||
// compatibility with older log files that omit them; they default to false/0
|
||||
// when absent.
|
||||
// when absent. SourceTag is always set to "direct" (file origin has no tag).
|
||||
// Returns false for lines with fewer than 8 fields.
|
||||
func ParseLine(line string, v4bits, v6bits int) (LogRecord, bool) {
|
||||
// SplitN caps allocations; we need up to 10 fields.
|
||||
fields := strings.SplitN(line, "\t", 10)
|
||||
if len(fields) < 8 {
|
||||
return LogRecord{}, false
|
||||
}
|
||||
|
||||
uri := fields[4]
|
||||
if i := strings.IndexByte(uri, '?'); i >= 0 {
|
||||
uri = uri[:i]
|
||||
}
|
||||
|
||||
prefix, ok := truncateIP(fields[1], v4bits, v6bits)
|
||||
if !ok {
|
||||
return LogRecord{}, false
|
||||
}
|
||||
|
||||
isTor := len(fields) >= 9 && fields[8] == "1"
|
||||
|
||||
var asn int32
|
||||
if len(fields) == 10 {
|
||||
if n, err := strconv.ParseInt(fields[9], 10, 32); err == nil {
|
||||
asn = int32(n)
|
||||
}
|
||||
}
|
||||
|
||||
var bodyBytes int64
|
||||
if n, err := strconv.ParseInt(fields[6], 10, 64); err == nil {
|
||||
bodyBytes = n
|
||||
}
|
||||
|
||||
var reqTime float64
|
||||
if f, err := strconv.ParseFloat(fields[7], 64); err == nil {
|
||||
reqTime = f
|
||||
}
|
||||
|
||||
return LogRecord{
|
||||
Website: fields[0],
|
||||
ClientPrefix: prefix,
|
||||
URI: uri,
|
||||
URI: stripQuery(fields[4]),
|
||||
Status: fields[5],
|
||||
IsTor: isTor,
|
||||
ASN: asn,
|
||||
Method: fields[3],
|
||||
BodyBytesSent: bodyBytes,
|
||||
RequestTime: reqTime,
|
||||
BodyBytesSent: parseInt(fields[6]),
|
||||
RequestTime: parseFloat(fields[7]),
|
||||
SourceTag: fileSourceTag,
|
||||
}, true
|
||||
}
|
||||
|
||||
// ParseUDPLine parses a tab-separated logtail log line from the UDP listener:
|
||||
//
|
||||
// $host \t $remote_addr \t $request_method \t $request_uri \t $status \t
|
||||
// $body_bytes_sent \t $request_time \t $is_tor \t $asn \t
|
||||
// $ipng_source_tag \t $server_addr \t $scheme
|
||||
//
|
||||
// All 12 fields are required. server_addr and scheme are consumed but not
|
||||
// propagated. Returns false for any malformed packet (wrong field count,
|
||||
// bad IP).
|
||||
func ParseUDPLine(line string, v4bits, v6bits int) (LogRecord, bool) {
|
||||
fields := strings.Split(line, "\t")
|
||||
if len(fields) != 12 {
|
||||
return LogRecord{}, false
|
||||
}
|
||||
prefix, ok := truncateIP(fields[1], v4bits, v6bits)
|
||||
if !ok {
|
||||
return LogRecord{}, false
|
||||
}
|
||||
var asn int32
|
||||
if n, err := strconv.ParseInt(fields[8], 10, 32); err == nil {
|
||||
asn = int32(n)
|
||||
}
|
||||
return LogRecord{
|
||||
Website: fields[0],
|
||||
ClientPrefix: prefix,
|
||||
URI: stripQuery(fields[3]),
|
||||
Status: fields[4],
|
||||
IsTor: fields[7] == "1",
|
||||
ASN: asn,
|
||||
Method: fields[2],
|
||||
BodyBytesSent: parseInt(fields[5]),
|
||||
RequestTime: parseFloat(fields[6]),
|
||||
SourceTag: fields[9],
|
||||
}, true
|
||||
}
|
||||
|
||||
func stripQuery(uri string) string {
|
||||
if i := strings.IndexByte(uri, '?'); i >= 0 {
|
||||
return uri[:i]
|
||||
}
|
||||
return uri
|
||||
}
|
||||
|
||||
func parseInt(s string) int64 {
|
||||
n, _ := strconv.ParseInt(s, 10, 64)
|
||||
return n
|
||||
}
|
||||
|
||||
func parseFloat(s string) float64 {
|
||||
f, _ := strconv.ParseFloat(s, 64)
|
||||
return f
|
||||
}
|
||||
|
||||
// truncateIP masks addr to the given prefix length depending on IP version.
|
||||
// Returns the CIDR string (e.g. "1.2.3.0/24") and true on success.
|
||||
func truncateIP(addr string, v4bits, v6bits int) (string, bool) {
|
||||
|
||||
@@ -25,6 +25,7 @@ func TestParseLine(t *testing.T) {
|
||||
Method: "GET",
|
||||
BodyBytesSent: 1452,
|
||||
RequestTime: 0.043,
|
||||
SourceTag: "direct",
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -38,6 +39,7 @@ func TestParseLine(t *testing.T) {
|
||||
Status: "201",
|
||||
Method: "POST",
|
||||
RequestTime: 0.001,
|
||||
SourceTag: "direct",
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -46,11 +48,12 @@ func TestParseLine(t *testing.T) {
|
||||
wantOK: true,
|
||||
want: LogRecord{
|
||||
Website: "host",
|
||||
ClientPrefix: "2001:db8:cafe::/48", // /48 = 3 full 16-bit groups intact
|
||||
ClientPrefix: "2001:db8:cafe::/48",
|
||||
URI: "/",
|
||||
Status: "200",
|
||||
Method: "GET",
|
||||
RequestTime: 0.001,
|
||||
SourceTag: "direct",
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -79,6 +82,7 @@ func TestParseLine(t *testing.T) {
|
||||
Status: "429",
|
||||
Method: "GET",
|
||||
RequestTime: 0.001,
|
||||
SourceTag: "direct",
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -93,6 +97,7 @@ func TestParseLine(t *testing.T) {
|
||||
IsTor: true,
|
||||
Method: "GET",
|
||||
RequestTime: 0.001,
|
||||
SourceTag: "direct",
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -107,6 +112,7 @@ func TestParseLine(t *testing.T) {
|
||||
IsTor: false,
|
||||
Method: "GET",
|
||||
RequestTime: 0.001,
|
||||
SourceTag: "direct",
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -121,6 +127,7 @@ func TestParseLine(t *testing.T) {
|
||||
IsTor: false,
|
||||
Method: "GET",
|
||||
RequestTime: 0.001,
|
||||
SourceTag: "direct",
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -136,6 +143,7 @@ func TestParseLine(t *testing.T) {
|
||||
ASN: 12345,
|
||||
Method: "GET",
|
||||
RequestTime: 0.001,
|
||||
SourceTag: "direct",
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -151,6 +159,7 @@ func TestParseLine(t *testing.T) {
|
||||
ASN: 65535,
|
||||
Method: "GET",
|
||||
RequestTime: 0.001,
|
||||
SourceTag: "direct",
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -166,6 +175,7 @@ func TestParseLine(t *testing.T) {
|
||||
ASN: 0,
|
||||
Method: "GET",
|
||||
RequestTime: 0.001,
|
||||
SourceTag: "direct",
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -181,6 +191,7 @@ func TestParseLine(t *testing.T) {
|
||||
ASN: 0,
|
||||
Method: "GET",
|
||||
RequestTime: 0.001,
|
||||
SourceTag: "direct",
|
||||
},
|
||||
},
|
||||
}
|
||||
@@ -201,6 +212,84 @@ func TestParseLine(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseUDPLine(t *testing.T) {
|
||||
// host \t remote_addr \t method \t uri \t status \t body_bytes \t req_time \t
|
||||
// is_tor \t asn \t source_tag \t server_addr \t scheme
|
||||
good := "www.example.com\t1.2.3.4\tGET\t/api/v1/search?q=foo\t200\t1452\t0.043\t0\t12345\tcdn\t10.0.0.1\thttps"
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
line string
|
||||
wantOK bool
|
||||
want LogRecord
|
||||
}{
|
||||
{
|
||||
name: "all 12 fields parsed, query stripped, extras dropped",
|
||||
line: good,
|
||||
wantOK: true,
|
||||
want: LogRecord{
|
||||
Website: "www.example.com",
|
||||
ClientPrefix: "1.2.3.0/24",
|
||||
URI: "/api/v1/search",
|
||||
Status: "200",
|
||||
IsTor: false,
|
||||
ASN: 12345,
|
||||
Method: "GET",
|
||||
BodyBytesSent: 1452,
|
||||
RequestTime: 0.043,
|
||||
SourceTag: "cdn",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "is_tor=1, tag direct, IPv6",
|
||||
line: "h\t2001:db8::1\tGET\t/\t200\t0\t0\t1\t65535\tdirect\t::1\thttp",
|
||||
wantOK: true,
|
||||
want: LogRecord{
|
||||
Website: "h",
|
||||
ClientPrefix: "2001:db8::/48",
|
||||
URI: "/",
|
||||
Status: "200",
|
||||
IsTor: true,
|
||||
ASN: 65535,
|
||||
Method: "GET",
|
||||
BodyBytesSent: 0,
|
||||
RequestTime: 0,
|
||||
SourceTag: "direct",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "11 fields rejected",
|
||||
line: "h\t1.2.3.4\tGET\t/\t200\t0\t0\t0\t0\ttag\t10.0.0.1",
|
||||
wantOK: false,
|
||||
},
|
||||
{
|
||||
name: "13 fields rejected",
|
||||
line: good + "\textra",
|
||||
wantOK: false,
|
||||
},
|
||||
{
|
||||
name: "bad IP rejected",
|
||||
line: "h\tnope\tGET\t/\t200\t0\t0\t0\t0\ttag\t10.0.0.1\thttp",
|
||||
wantOK: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got, ok := ParseUDPLine(tc.line, 24, 48)
|
||||
if ok != tc.wantOK {
|
||||
t.Fatalf("ParseUDPLine ok=%v, want %v; got=%+v", ok, tc.wantOK, got)
|
||||
}
|
||||
if !tc.wantOK {
|
||||
return
|
||||
}
|
||||
if got != tc.want {
|
||||
t.Errorf("got %+v, want %+v", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestTruncateIP(t *testing.T) {
|
||||
tests := []struct {
|
||||
addr string
|
||||
@@ -208,8 +297,8 @@ func TestTruncateIP(t *testing.T) {
|
||||
}{
|
||||
{"1.2.3.4", "1.2.3.0/24"},
|
||||
{"192.168.100.200", "192.168.100.0/24"},
|
||||
{"2001:db8:cafe:babe::1", "2001:db8:cafe::/48"}, // /48 = 3 full groups intact
|
||||
{"::1", "::/48"}, // loopback — first 48 bits are all zero
|
||||
{"2001:db8:cafe:babe::1", "2001:db8:cafe::/48"},
|
||||
{"::1", "::/48"},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
|
||||
@@ -19,7 +19,7 @@ const promNumTimeBounds = 11
|
||||
|
||||
var promTimeBounds = [promNumTimeBounds]float64{.005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10}
|
||||
|
||||
const promCounterCap = 100_000 // safety cap on {host,method,status} counter entries
|
||||
const promCounterCap = 250_000 // safety cap on {host,method,status} counter entries
|
||||
|
||||
// promCounterKey is the label set for per-request counters.
|
||||
type promCounterKey struct {
|
||||
@@ -49,6 +49,16 @@ type PromStore struct {
|
||||
counters map[promCounterKey]int64
|
||||
body map[string]*promBodyEntry // keyed by host
|
||||
reqTime map[string]*promTimeEntry // keyed by host
|
||||
|
||||
// per-source_tag rollups (parallel series, not a cross-product with host)
|
||||
sourceCounters map[string]int64 // keyed by source_tag
|
||||
sourceBody map[string]*promBodyEntry // keyed by source_tag
|
||||
|
||||
// UDP ingest counters — protected by their own atomic-friendly lock.
|
||||
udpMu sync.Mutex
|
||||
udpPacketsReceived int64 // datagrams read off the socket
|
||||
udpLoglinesSuccess int64 // successfully parsed
|
||||
udpLoglinesConsumed int64 // successfully forwarded to the store channel
|
||||
}
|
||||
|
||||
// NewPromStore returns an empty PromStore ready for use.
|
||||
@@ -57,6 +67,8 @@ func NewPromStore() *PromStore {
|
||||
counters: make(map[promCounterKey]int64, 1024),
|
||||
body: make(map[string]*promBodyEntry, 64),
|
||||
reqTime: make(map[string]*promTimeEntry, 64),
|
||||
sourceCounters: make(map[string]int64, 32),
|
||||
sourceBody: make(map[string]*promBodyEntry, 32),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -74,18 +86,7 @@ func (p *PromStore) Ingest(r LogRecord) {
|
||||
}
|
||||
|
||||
// --- body_bytes_sent histogram (keyed by host only) ---
|
||||
be, ok := p.body[r.Website]
|
||||
if !ok {
|
||||
be = &promBodyEntry{}
|
||||
p.body[r.Website] = be
|
||||
}
|
||||
for i, bound := range promBodyBounds {
|
||||
if r.BodyBytesSent <= bound {
|
||||
be.buckets[i]++
|
||||
}
|
||||
}
|
||||
be.buckets[promNumBodyBounds]++ // +Inf
|
||||
be.sum += r.BodyBytesSent
|
||||
observeBody(p.body, r.Website, r.BodyBytesSent)
|
||||
|
||||
// --- request_time histogram (keyed by host only) ---
|
||||
te, ok := p.reqTime[r.Website]
|
||||
@@ -101,9 +102,34 @@ func (p *PromStore) Ingest(r LogRecord) {
|
||||
te.buckets[promNumTimeBounds]++ // +Inf
|
||||
te.sum += r.RequestTime
|
||||
|
||||
// --- per-source_tag rollups ---
|
||||
p.sourceCounters[r.SourceTag]++
|
||||
observeBody(p.sourceBody, r.SourceTag, r.BodyBytesSent)
|
||||
|
||||
p.mu.Unlock()
|
||||
}
|
||||
|
||||
// IncUDPPacket, IncUDPSuccess, and IncUDPConsumed bump their respective
|
||||
// UDP ingest counters. They are called from the UDP listener goroutine.
|
||||
func (p *PromStore) IncUDPPacket() { p.udpMu.Lock(); p.udpPacketsReceived++; p.udpMu.Unlock() }
|
||||
func (p *PromStore) IncUDPSuccess() { p.udpMu.Lock(); p.udpLoglinesSuccess++; p.udpMu.Unlock() }
|
||||
func (p *PromStore) IncUDPConsumed() { p.udpMu.Lock(); p.udpLoglinesConsumed++; p.udpMu.Unlock() }
|
||||
|
||||
func observeBody(m map[string]*promBodyEntry, key string, bytes int64) {
|
||||
e, ok := m[key]
|
||||
if !ok {
|
||||
e = &promBodyEntry{}
|
||||
m[key] = e
|
||||
}
|
||||
for i, bound := range promBodyBounds {
|
||||
if bytes <= bound {
|
||||
e.buckets[i]++
|
||||
}
|
||||
}
|
||||
e.buckets[promNumBodyBounds]++ // +Inf
|
||||
e.sum += bytes
|
||||
}
|
||||
|
||||
// ServeHTTP renders all metrics in the Prometheus text exposition format (0.0.4).
|
||||
func (p *PromStore) ServeHTTP(w http.ResponseWriter, _ *http.Request) {
|
||||
// Snapshot everything under the lock, then render without holding it.
|
||||
@@ -119,7 +145,7 @@ func (p *PromStore) ServeHTTP(w http.ResponseWriter, _ *http.Request) {
|
||||
}
|
||||
|
||||
type bodySnap struct {
|
||||
host string
|
||||
label string
|
||||
e promBodyEntry
|
||||
}
|
||||
bodySnaps := make([]bodySnap, 0, len(p.body))
|
||||
@@ -136,8 +162,27 @@ func (p *PromStore) ServeHTTP(w http.ResponseWriter, _ *http.Request) {
|
||||
timeSnaps = append(timeSnaps, timeSnap{h, *e})
|
||||
}
|
||||
|
||||
type sourceCounterSnap struct {
|
||||
tag string
|
||||
v int64
|
||||
}
|
||||
sourceCounters := make([]sourceCounterSnap, 0, len(p.sourceCounters))
|
||||
for t, v := range p.sourceCounters {
|
||||
sourceCounters = append(sourceCounters, sourceCounterSnap{t, v})
|
||||
}
|
||||
sourceBodySnaps := make([]bodySnap, 0, len(p.sourceBody))
|
||||
for t, e := range p.sourceBody {
|
||||
sourceBodySnaps = append(sourceBodySnaps, bodySnap{t, *e})
|
||||
}
|
||||
|
||||
p.mu.Unlock()
|
||||
|
||||
p.udpMu.Lock()
|
||||
udpPackets := p.udpPacketsReceived
|
||||
udpSuccess := p.udpLoglinesSuccess
|
||||
udpConsumed := p.udpLoglinesConsumed
|
||||
p.udpMu.Unlock()
|
||||
|
||||
// Sort for stable, human-readable output.
|
||||
sort.Slice(counters, func(i, j int) bool {
|
||||
a, b := counters[i].k, counters[j].k
|
||||
@@ -149,8 +194,10 @@ func (p *PromStore) ServeHTTP(w http.ResponseWriter, _ *http.Request) {
|
||||
}
|
||||
return a.Status < b.Status
|
||||
})
|
||||
sort.Slice(bodySnaps, func(i, j int) bool { return bodySnaps[i].host < bodySnaps[j].host })
|
||||
sort.Slice(bodySnaps, func(i, j int) bool { return bodySnaps[i].label < bodySnaps[j].label })
|
||||
sort.Slice(timeSnaps, func(i, j int) bool { return timeSnaps[i].host < timeSnaps[j].host })
|
||||
sort.Slice(sourceCounters, func(i, j int) bool { return sourceCounters[i].tag < sourceCounters[j].tag })
|
||||
sort.Slice(sourceBodySnaps, func(i, j int) bool { return sourceBodySnaps[i].label < sourceBodySnaps[j].label })
|
||||
|
||||
w.Header().Set("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
|
||||
bw := bufio.NewWriterSize(w, 256*1024)
|
||||
@@ -167,16 +214,7 @@ func (p *PromStore) ServeHTTP(w http.ResponseWriter, _ *http.Request) {
|
||||
fmt.Fprintln(bw, "# HELP nginx_http_response_body_bytes HTTP response body size distribution in bytes.")
|
||||
fmt.Fprintln(bw, "# TYPE nginx_http_response_body_bytes histogram")
|
||||
for _, s := range bodySnaps {
|
||||
for i, bound := range promBodyBounds {
|
||||
fmt.Fprintf(bw, "nginx_http_response_body_bytes_bucket{host=%q,le=%q} %d\n",
|
||||
s.host, fmt.Sprintf("%d", bound), s.e.buckets[i])
|
||||
}
|
||||
fmt.Fprintf(bw, "nginx_http_response_body_bytes_bucket{host=%q,le=\"+Inf\"} %d\n",
|
||||
s.host, s.e.buckets[promNumBodyBounds])
|
||||
fmt.Fprintf(bw, "nginx_http_response_body_bytes_count{host=%q} %d\n",
|
||||
s.host, s.e.buckets[promNumBodyBounds])
|
||||
fmt.Fprintf(bw, "nginx_http_response_body_bytes_sum{host=%q} %d\n",
|
||||
s.host, s.e.sum)
|
||||
writeBodyHistogram(bw, "nginx_http_response_body_bytes", "host", s.label, s.e)
|
||||
}
|
||||
|
||||
// nginx_http_request_duration_seconds (histogram, labeled by host)
|
||||
@@ -195,9 +233,48 @@ func (p *PromStore) ServeHTTP(w http.ResponseWriter, _ *http.Request) {
|
||||
s.host, s.e.sum)
|
||||
}
|
||||
|
||||
// nginx_http_requests_by_source_total (counter, labeled by source_tag)
|
||||
fmt.Fprintln(bw, "# HELP nginx_http_requests_by_source_total HTTP requests rolled up by nginx source tag.")
|
||||
fmt.Fprintln(bw, "# TYPE nginx_http_requests_by_source_total counter")
|
||||
for _, c := range sourceCounters {
|
||||
fmt.Fprintf(bw, "nginx_http_requests_by_source_total{source_tag=%q} %d\n", c.tag, c.v)
|
||||
}
|
||||
|
||||
// nginx_http_response_body_bytes_by_source (histogram, labeled by source_tag)
|
||||
fmt.Fprintln(bw, "# HELP nginx_http_response_body_bytes_by_source HTTP response body size distribution by nginx source tag.")
|
||||
fmt.Fprintln(bw, "# TYPE nginx_http_response_body_bytes_by_source histogram")
|
||||
for _, s := range sourceBodySnaps {
|
||||
writeBodyHistogram(bw, "nginx_http_response_body_bytes_by_source", "source_tag", s.label, s.e)
|
||||
}
|
||||
|
||||
// UDP ingest counters — lets operators distinguish parse failures
|
||||
// (received - success) from channel-full drops (success - consumed).
|
||||
fmt.Fprintln(bw, "# HELP logtail_udp_packets_received_total Datagrams read from the UDP socket.")
|
||||
fmt.Fprintln(bw, "# TYPE logtail_udp_packets_received_total counter")
|
||||
fmt.Fprintf(bw, "logtail_udp_packets_received_total %d\n", udpPackets)
|
||||
fmt.Fprintln(bw, "# HELP logtail_udp_loglines_success_total UDP loglines that parsed successfully.")
|
||||
fmt.Fprintln(bw, "# TYPE logtail_udp_loglines_success_total counter")
|
||||
fmt.Fprintf(bw, "logtail_udp_loglines_success_total %d\n", udpSuccess)
|
||||
fmt.Fprintln(bw, "# HELP logtail_udp_loglines_consumed_total UDP loglines forwarded to the store (not dropped).")
|
||||
fmt.Fprintln(bw, "# TYPE logtail_udp_loglines_consumed_total counter")
|
||||
fmt.Fprintf(bw, "logtail_udp_loglines_consumed_total %d\n", udpConsumed)
|
||||
|
||||
bw.Flush()
|
||||
}
|
||||
|
||||
func writeBodyHistogram(bw *bufio.Writer, metric, labelName, labelValue string, e promBodyEntry) {
|
||||
for i, bound := range promBodyBounds {
|
||||
fmt.Fprintf(bw, "%s_bucket{%s=%q,le=%q} %d\n",
|
||||
metric, labelName, labelValue, fmt.Sprintf("%d", bound), e.buckets[i])
|
||||
}
|
||||
fmt.Fprintf(bw, "%s_bucket{%s=%q,le=\"+Inf\"} %d\n",
|
||||
metric, labelName, labelValue, e.buckets[promNumBodyBounds])
|
||||
fmt.Fprintf(bw, "%s_count{%s=%q} %d\n",
|
||||
metric, labelName, labelValue, e.buckets[promNumBodyBounds])
|
||||
fmt.Fprintf(bw, "%s_sum{%s=%q} %d\n",
|
||||
metric, labelName, labelValue, e.sum)
|
||||
}
|
||||
|
||||
// formatFloat renders a float64 bucket bound without trailing zeros but always
|
||||
// with at least one decimal place, matching Prometheus convention (e.g. "0.5", "10").
|
||||
func formatFloat(f float64) string {
|
||||
|
||||
@@ -110,6 +110,61 @@ func TestPromStoreServeHTTP(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPromStoreSourceTagRollup(t *testing.T) {
|
||||
ps := NewPromStore()
|
||||
// same host, two tags; each tag should appear with its own series.
|
||||
ps.Ingest(LogRecord{Website: "h", Method: "GET", Status: "200", BodyBytesSent: 100, SourceTag: "direct"})
|
||||
ps.Ingest(LogRecord{Website: "h", Method: "GET", Status: "200", BodyBytesSent: 300, SourceTag: "cdn"})
|
||||
ps.Ingest(LogRecord{Website: "h", Method: "GET", Status: "200", BodyBytesSent: 100, SourceTag: "cdn"})
|
||||
|
||||
req := httptest.NewRequest("GET", "/metrics", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
ps.ServeHTTP(rec, req)
|
||||
body := rec.Body.String()
|
||||
|
||||
checks := []string{
|
||||
"# TYPE nginx_http_requests_by_source_total counter",
|
||||
`nginx_http_requests_by_source_total{source_tag="direct"} 1`,
|
||||
`nginx_http_requests_by_source_total{source_tag="cdn"} 2`,
|
||||
"# TYPE nginx_http_response_body_bytes_by_source histogram",
|
||||
`nginx_http_response_body_bytes_by_source_sum{source_tag="direct"} 100`,
|
||||
`nginx_http_response_body_bytes_by_source_sum{source_tag="cdn"} 400`,
|
||||
// host-series totals are unchanged (one row, counting 3 requests).
|
||||
`nginx_http_requests_total{host="h",method="GET",status="200"} 3`,
|
||||
}
|
||||
for _, want := range checks {
|
||||
if !strings.Contains(body, want) {
|
||||
t.Errorf("missing %q in output:\n%s", want, body)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPromStoreUDPCounters(t *testing.T) {
|
||||
ps := NewPromStore()
|
||||
ps.IncUDPPacket()
|
||||
ps.IncUDPPacket()
|
||||
ps.IncUDPPacket()
|
||||
ps.IncUDPSuccess()
|
||||
ps.IncUDPSuccess()
|
||||
ps.IncUDPConsumed()
|
||||
|
||||
req := httptest.NewRequest("GET", "/metrics", nil)
|
||||
rec := httptest.NewRecorder()
|
||||
ps.ServeHTTP(rec, req)
|
||||
body := rec.Body.String()
|
||||
|
||||
checks := []string{
|
||||
"logtail_udp_packets_received_total 3",
|
||||
"logtail_udp_loglines_success_total 2",
|
||||
"logtail_udp_loglines_consumed_total 1",
|
||||
}
|
||||
for _, want := range checks {
|
||||
if !strings.Contains(body, want) {
|
||||
t.Errorf("missing %q in output:\n%s", want, body)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPromStoreCounterCap(t *testing.T) {
|
||||
ps := NewPromStore()
|
||||
// Fill to cap with distinct {host,method,status} combos
|
||||
|
||||
@@ -48,7 +48,7 @@ func (s *Store) ingest(r LogRecord) {
|
||||
if s.prom != nil {
|
||||
s.prom.Ingest(r)
|
||||
}
|
||||
key := st.Tuple6{Website: r.Website, Prefix: r.ClientPrefix, URI: r.URI, Status: r.Status, IsTor: r.IsTor, ASN: r.ASN}
|
||||
key := st.Tuple6{Website: r.Website, Prefix: r.ClientPrefix, URI: r.URI, Status: r.Status, IsTor: r.IsTor, ASN: r.ASN, SourceTag: r.SourceTag}
|
||||
if _, exists := s.live[key]; !exists {
|
||||
if s.liveLen >= liveMapCap {
|
||||
return
|
||||
|
||||
86
cmd/collector/udp.go
Normal file
86
cmd/collector/udp.go
Normal file
@@ -0,0 +1,86 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"net"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// udpReadBufBytes is the SO_RCVBUF size requested. Bursts of ~10K lines/sec at
|
||||
// ~200B each comfortably fit; the kernel may cap below this.
|
||||
const udpReadBufBytes = 4 << 20
|
||||
|
||||
// udpPacketBuf is the per-read buffer. A single nginx log line easily fits in
|
||||
// a few KB; 64K is the practical UDP datagram ceiling.
|
||||
const udpPacketBuf = 64 << 10
|
||||
|
||||
// UDPListener receives nginx_ipng_stats_logtail datagrams on a local socket,
|
||||
// parses each packet as one log line, and forwards LogRecords to ch.
|
||||
type UDPListener struct {
|
||||
addr string
|
||||
v4bits int
|
||||
v6bits int
|
||||
ch chan<- LogRecord
|
||||
prom *PromStore // optional; bumps UDP ingest counters
|
||||
}
|
||||
|
||||
func NewUDPListener(addr string, v4bits, v6bits int, ch chan<- LogRecord) *UDPListener {
|
||||
return &UDPListener{addr: addr, v4bits: v4bits, v6bits: v6bits, ch: ch}
|
||||
}
|
||||
|
||||
// SetProm wires a PromStore so the listener can report received/success/consumed counts.
|
||||
func (u *UDPListener) SetProm(p *PromStore) { u.prom = p }
|
||||
|
||||
// Run listens until ctx is cancelled.
|
||||
func (u *UDPListener) Run(ctx context.Context) {
|
||||
laddr, err := net.ResolveUDPAddr("udp", u.addr)
|
||||
if err != nil {
|
||||
log.Fatalf("udp: resolve %s: %v", u.addr, err)
|
||||
}
|
||||
conn, err := net.ListenUDP("udp", laddr)
|
||||
if err != nil {
|
||||
log.Fatalf("udp: listen %s: %v", u.addr, err)
|
||||
}
|
||||
defer conn.Close()
|
||||
if err := conn.SetReadBuffer(udpReadBufBytes); err != nil {
|
||||
log.Printf("udp: SetReadBuffer(%d): %v", udpReadBufBytes, err)
|
||||
}
|
||||
log.Printf("udp: listening on %s", conn.LocalAddr())
|
||||
|
||||
go func() {
|
||||
<-ctx.Done()
|
||||
conn.Close()
|
||||
}()
|
||||
|
||||
buf := make([]byte, udpPacketBuf)
|
||||
for {
|
||||
n, _, err := conn.ReadFromUDP(buf)
|
||||
if err != nil {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
log.Printf("udp: read: %v", err)
|
||||
continue
|
||||
}
|
||||
if u.prom != nil {
|
||||
u.prom.IncUDPPacket()
|
||||
}
|
||||
line := strings.TrimRight(string(buf[:n]), "\r\n")
|
||||
rec, ok := ParseUDPLine(line, u.v4bits, u.v6bits)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if u.prom != nil {
|
||||
u.prom.IncUDPSuccess()
|
||||
}
|
||||
select {
|
||||
case u.ch <- rec:
|
||||
if u.prom != nil {
|
||||
u.prom.IncUDPConsumed()
|
||||
}
|
||||
default:
|
||||
// Channel full — drop rather than block the read loop.
|
||||
}
|
||||
}
|
||||
}
|
||||
67
cmd/collector/udp_test.go
Normal file
67
cmd/collector/udp_test.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestUDPListenerRoundTrip(t *testing.T) {
|
||||
ch := make(chan LogRecord, 4)
|
||||
ps := NewPromStore()
|
||||
|
||||
// Bind to an ephemeral port on loopback.
|
||||
pc, err := net.ListenPacket("udp", "127.0.0.1:0")
|
||||
if err != nil {
|
||||
t.Fatalf("listen probe: %v", err)
|
||||
}
|
||||
addr := pc.LocalAddr().String()
|
||||
pc.Close() // release; listener will re-bind
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
u := NewUDPListener(addr, 24, 48, ch)
|
||||
u.SetProm(ps)
|
||||
go u.Run(ctx)
|
||||
|
||||
// Dial the listener and send one valid and one malformed packet.
|
||||
conn, err := net.Dial("udp", addr)
|
||||
if err != nil {
|
||||
t.Fatalf("dial: %v", err)
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
// The listener is started asynchronously; retry for up to 1s.
|
||||
good := "www.example.com\t1.2.3.4\tGET\t/\t200\t42\t0.010\t0\t12345\tdirect\t10.0.0.1\thttps"
|
||||
bad := "not enough\tfields"
|
||||
deadline := time.Now().Add(time.Second)
|
||||
for time.Now().Before(deadline) {
|
||||
conn.Write([]byte(good))
|
||||
conn.Write([]byte(bad))
|
||||
select {
|
||||
case rec := <-ch:
|
||||
if rec.Website != "www.example.com" || rec.SourceTag != "direct" {
|
||||
t.Fatalf("bad record: %+v", rec)
|
||||
}
|
||||
// Give the listener a moment to process the malformed packet too.
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
ps.udpMu.Lock()
|
||||
pkt, suc, con := ps.udpPacketsReceived, ps.udpLoglinesSuccess, ps.udpLoglinesConsumed
|
||||
ps.udpMu.Unlock()
|
||||
if pkt < 2 {
|
||||
t.Errorf("udpPacketsReceived=%d, want >=2", pkt)
|
||||
}
|
||||
if suc < 1 {
|
||||
t.Errorf("udpLoglinesSuccess=%d, want >=1", suc)
|
||||
}
|
||||
if con < 1 {
|
||||
t.Errorf("udpLoglinesConsumed=%d, want >=1", con)
|
||||
}
|
||||
return
|
||||
case <-time.After(50 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
t.Fatal("no record received within 1s")
|
||||
}
|
||||
@@ -146,8 +146,13 @@ func applyTerm(term string, fs *filterState) error {
|
||||
return fmt.Errorf("invalid asn expression %q", expr)
|
||||
}
|
||||
fs.ASN = expr
|
||||
case "source_tag":
|
||||
if op != "=" {
|
||||
return fmt.Errorf("source_tag only supports =, not %q", op)
|
||||
}
|
||||
fs.SourceTag = value
|
||||
default:
|
||||
return fmt.Errorf("unknown field %q; valid: status, website, uri, prefix, is_tor, asn", field)
|
||||
return fmt.Errorf("unknown field %q; valid: status, website, uri, prefix, is_tor, asn, source_tag", field)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -196,6 +201,9 @@ func FilterExprString(f filterState) string {
|
||||
if f.ASN != "" {
|
||||
parts = append(parts, asnTermStr(f.ASN))
|
||||
}
|
||||
if f.SourceTag != "" {
|
||||
parts = append(parts, "source_tag="+quoteMaybe(f.SourceTag))
|
||||
}
|
||||
return strings.Join(parts, " AND ")
|
||||
}
|
||||
|
||||
|
||||
@@ -229,8 +229,17 @@ func TestDrillURL(t *testing.T) {
|
||||
if !strings.Contains(u, "f_asn=12345") {
|
||||
t.Errorf("drill from asn: missing f_asn in %q", u)
|
||||
}
|
||||
if !strings.Contains(u, "by=source_tag") {
|
||||
t.Errorf("drill from asn: expected next by=source_tag in %q", u)
|
||||
}
|
||||
|
||||
p.GroupByS = "source_tag"
|
||||
u = p.drillURL("direct")
|
||||
if !strings.Contains(u, "f_source_tag=direct") {
|
||||
t.Errorf("drill from source_tag: missing f_source_tag in %q", u)
|
||||
}
|
||||
if !strings.Contains(u, "by=website") {
|
||||
t.Errorf("drill from asn: expected cycle back to by=website in %q", u)
|
||||
t.Errorf("drill from source_tag: expected cycle back to by=website in %q", u)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -58,6 +58,7 @@ type filterState struct {
|
||||
URIReNeg string // RE2 regex exclusion against request URI
|
||||
IsTor string // "", "1" (TOR only), "0" (non-TOR only)
|
||||
ASN string // expression: "12345", "!=65000", ">=1000", etc.
|
||||
SourceTag string // exact ipng_source_tag match
|
||||
}
|
||||
|
||||
// QueryParams holds all parsed URL parameters for one page request.
|
||||
@@ -95,7 +96,7 @@ var windowSpecs = []struct{ s, label string }{
|
||||
}
|
||||
|
||||
var groupBySpecs = []struct{ s, label string }{
|
||||
{"website", "website"}, {"asn", "asn"}, {"prefix", "prefix"}, {"status", "status"}, {"uri", "uri"},
|
||||
{"website", "website"}, {"asn", "asn"}, {"prefix", "prefix"}, {"status", "status"}, {"uri", "uri"}, {"source_tag", "source"},
|
||||
}
|
||||
|
||||
func parseWindowString(s string) (pb.Window, string) {
|
||||
@@ -127,6 +128,8 @@ func parseGroupByString(s string) (pb.GroupBy, string) {
|
||||
return pb.GroupBy_HTTP_RESPONSE, "status"
|
||||
case "asn":
|
||||
return pb.GroupBy_ASN_NUMBER, "asn"
|
||||
case "source_tag":
|
||||
return pb.GroupBy_SOURCE_TAG, "source_tag"
|
||||
default:
|
||||
return pb.GroupBy_WEBSITE, "website"
|
||||
}
|
||||
@@ -168,12 +171,13 @@ func (h *Handler) parseParams(r *http.Request) QueryParams {
|
||||
URIReNeg: q.Get("f_uri_re_neg"),
|
||||
IsTor: q.Get("f_is_tor"),
|
||||
ASN: q.Get("f_asn"),
|
||||
SourceTag: q.Get("f_source_tag"),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func buildFilter(f filterState) *pb.Filter {
|
||||
if f.Website == "" && f.Prefix == "" && f.URI == "" && f.Status == "" && f.WebsiteRe == "" && f.URIRe == "" && f.WebsiteReNeg == "" && f.URIReNeg == "" && f.IsTor == "" && f.ASN == "" {
|
||||
if f.Website == "" && f.Prefix == "" && f.URI == "" && f.Status == "" && f.WebsiteRe == "" && f.URIRe == "" && f.WebsiteReNeg == "" && f.URIReNeg == "" && f.IsTor == "" && f.ASN == "" && f.SourceTag == "" {
|
||||
return nil
|
||||
}
|
||||
out := &pb.Filter{}
|
||||
@@ -216,6 +220,9 @@ func buildFilter(f filterState) *pb.Filter {
|
||||
out.AsnOp = op
|
||||
}
|
||||
}
|
||||
if f.SourceTag != "" {
|
||||
out.IpngSourceTag = &f.SourceTag
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
@@ -256,6 +263,9 @@ func (p QueryParams) toValues() url.Values {
|
||||
if p.Filter.ASN != "" {
|
||||
v.Set("f_asn", p.Filter.ASN)
|
||||
}
|
||||
if p.Filter.SourceTag != "" {
|
||||
v.Set("f_source_tag", p.Filter.SourceTag)
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
@@ -278,7 +288,7 @@ func (p QueryParams) clearFilterURL() string {
|
||||
return p.buildURL(map[string]string{
|
||||
"f_website": "", "f_prefix": "", "f_uri": "", "f_status": "",
|
||||
"f_website_re": "", "f_uri_re": "", "f_website_re_neg": "", "f_uri_re_neg": "",
|
||||
"f_is_tor": "", "f_asn": "",
|
||||
"f_is_tor": "", "f_asn": "", "f_source_tag": "",
|
||||
})
|
||||
}
|
||||
|
||||
@@ -293,7 +303,9 @@ func nextGroupBy(s string) string {
|
||||
return "status"
|
||||
case "status":
|
||||
return "asn"
|
||||
default: // asn → back to website
|
||||
case "asn":
|
||||
return "source_tag"
|
||||
default: // source_tag → back to website
|
||||
return "website"
|
||||
}
|
||||
}
|
||||
@@ -311,6 +323,8 @@ func groupByFilterKey(s string) string {
|
||||
return "f_status"
|
||||
case "asn":
|
||||
return "f_asn"
|
||||
case "source_tag":
|
||||
return "f_source_tag"
|
||||
default:
|
||||
return "f_website"
|
||||
}
|
||||
@@ -391,6 +405,12 @@ func buildCrumbs(p QueryParams) []Crumb {
|
||||
RemoveURL: p.buildURL(map[string]string{"f_asn": ""}),
|
||||
})
|
||||
}
|
||||
if p.Filter.SourceTag != "" {
|
||||
crumbs = append(crumbs, Crumb{
|
||||
Text: "source_tag=" + p.Filter.SourceTag,
|
||||
RemoveURL: p.buildURL(map[string]string{"f_source_tag": ""}),
|
||||
})
|
||||
}
|
||||
return crumbs
|
||||
}
|
||||
|
||||
|
||||
528
docs/DETAILS.md
528
docs/DETAILS.md
@@ -1,528 +0,0 @@
|
||||
PREAMBLE
|
||||
|
||||
Although this computer program has a permissive license (AP2.0), if you came here looking to ask
|
||||
questions, you're better off just moving on :) This program is shared AS-IS and really without any
|
||||
intent for anybody but IPng Networks to use it. Also, in case the structure of the repo and the
|
||||
style of this README wasn't already clear, this program is 100% written and maintained by Claude
|
||||
Code.
|
||||
|
||||
You have been warned :)
|
||||
|
||||
SPECIFICATION
|
||||
|
||||
This project contains four programs:
|
||||
|
||||
1) A **collector** that tails any number of nginx log files and maintains an in-memory structure of
|
||||
`{website, client_prefix, http_request_uri, http_response, is_tor, asn}` counts across all files.
|
||||
It answers TopN and Trend queries via gRPC and pushes minute snapshots to the aggregator via
|
||||
server-streaming. It also exposes a Prometheus `/metrics` endpoint (default `:9100`) with per-host
|
||||
request counters and response-body/request-time histograms.
|
||||
Runs on each nginx machine in the cluster. No UI — gRPC and HTTP interfaces only.
|
||||
|
||||
2) An **aggregator** that subscribes to the snapshot stream from all collectors, merges their data
|
||||
into a unified in-memory cache, and exposes the same gRPC interface. Answers questions like "what
|
||||
is the busiest website globally", "which client prefix is causing the most HTTP 503s", and shows
|
||||
trending information useful for DDoS detection. Runs on a central machine.
|
||||
|
||||
3) An **HTTP frontend** companion to the aggregator that renders a drilldown dashboard. Operators
|
||||
can restrict by `http_response=429`, then by `website=www.example.com`, and so on. Works with
|
||||
either a collector or aggregator as its backend. Zero JavaScript — server-rendered HTML with inline
|
||||
SVG sparklines and meta-refresh.
|
||||
|
||||
4) A **CLI** for shell-based debugging. Sends `topn`, `trend`, and `stream` queries to any
|
||||
collector or aggregator, fans out to multiple targets in parallel, and outputs human-readable
|
||||
tables or newline-delimited JSON.
|
||||
|
||||
Programs are written in Go. No CGO, no external runtime dependencies.
|
||||
|
||||
---
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
DEPLOYMENT
|
||||
|
||||
## Docker
|
||||
|
||||
All four binaries are published in a single image: `git.ipng.ch/ipng/nginx-logtail`.
|
||||
|
||||
The image is built with a two-stage Dockerfile: a `golang:1.24-alpine` builder produces
|
||||
statically-linked, stripped binaries (`CGO_ENABLED=0`, `-trimpath -ldflags="-s -w"`); the final
|
||||
stage is `scratch` — no OS, no shell, no runtime dependencies. Each binary is invoked explicitly
|
||||
via the container `command`.
|
||||
|
||||
### Build and push
|
||||
|
||||
```
|
||||
docker compose build --push
|
||||
```
|
||||
|
||||
### Running aggregator + frontend
|
||||
|
||||
The `docker-compose.yml` in the repo root runs the aggregator and frontend together. At minimum,
|
||||
set `AGGREGATOR_COLLECTORS` to the comma-separated `host:port` list of your collector(s):
|
||||
|
||||
```sh
|
||||
AGGREGATOR_COLLECTORS=nginx1:9090,nginx2:9090 docker compose up -d
|
||||
```
|
||||
|
||||
The frontend reaches the aggregator at `aggregator:9091` via Docker's internal DNS. The frontend
|
||||
UI is available on port `8080`.
|
||||
|
||||
### Environment variables
|
||||
|
||||
All flags have environment variable equivalents. CLI flags take precedence over env vars.
|
||||
|
||||
**collector** (runs on each nginx host, not in Docker):
|
||||
|
||||
| Env var | Flag | Default |
|
||||
|--------------------------|-------------------|-------------|
|
||||
| `COLLECTOR_LISTEN` | `-listen` | `:9090` |
|
||||
| `COLLECTOR_PROM_LISTEN` | `-prom-listen` | `:9100` |
|
||||
| `COLLECTOR_LOGS` | `-logs` | — |
|
||||
| `COLLECTOR_LOGS_FILE` | `-logs-file` | — |
|
||||
| `COLLECTOR_SOURCE` | `-source` | hostname |
|
||||
| `COLLECTOR_V4PREFIX` | `-v4prefix` | `24` |
|
||||
| `COLLECTOR_V6PREFIX` | `-v6prefix` | `48` |
|
||||
| `COLLECTOR_SCAN_INTERVAL`| `-scan-interval` | `10s` |
|
||||
|
||||
**aggregator**:
|
||||
|
||||
| Env var | Flag | Default |
|
||||
|--------------------------|---------------|-------------|
|
||||
| `AGGREGATOR_LISTEN` | `-listen` | `:9091` |
|
||||
| `AGGREGATOR_COLLECTORS` | `-collectors` | — (required)|
|
||||
| `AGGREGATOR_SOURCE` | `-source` | hostname |
|
||||
|
||||
**frontend**:
|
||||
|
||||
| Env var | Flag | Default |
|
||||
|------------------|------------|-------------------|
|
||||
| `FRONTEND_LISTEN`| `-listen` | `:8080` |
|
||||
| `FRONTEND_TARGET`| `-target` | `localhost:9091` |
|
||||
| `FRONTEND_N` | `-n` | `25` |
|
||||
| `FRONTEND_REFRESH`| `-refresh`| `30` |
|
||||
|
||||
---
|
||||
|
||||
DESIGN
|
||||
|
||||
## Directory Layout
|
||||
|
||||
```
|
||||
nginx-logtail/
|
||||
├── proto/
|
||||
│ ├── logtail.proto # shared protobuf definitions
|
||||
│ └── logtailpb/
|
||||
│ ├── logtail.pb.go # generated: messages, enums
|
||||
│ └── logtail_grpc.pb.go # generated: service stubs
|
||||
├── internal/
|
||||
│ └── store/
|
||||
│ └── store.go # shared types: Tuple6, Entry, Snapshot, ring helpers
|
||||
└── cmd/
|
||||
├── collector/
|
||||
│ ├── main.go
|
||||
│ ├── tailer.go # MultiTailer: tail N files via one shared fsnotify watcher
|
||||
│ ├── parser.go # tab-separated logtail log_format parser (~50 ns/line)
|
||||
│ ├── store.go # bounded top-K in-memory store + tiered ring buffers
|
||||
│ └── server.go # gRPC server: TopN, Trend, StreamSnapshots
|
||||
├── aggregator/
|
||||
│ ├── main.go
|
||||
│ ├── subscriber.go # one goroutine per collector; StreamSnapshots with backoff
|
||||
│ ├── merger.go # delta-merge: O(snapshot_size) per update
|
||||
│ ├── cache.go # tick-based ring buffer cache served to clients
|
||||
│ ├── registry.go # TargetRegistry: addr→name map updated from snapshot sources
|
||||
│ └── server.go # gRPC server (same surface as collector)
|
||||
├── frontend/
|
||||
│ ├── main.go
|
||||
│ ├── handler.go # URL param parsing, concurrent TopN+Trend, template exec
|
||||
│ ├── filter.go # ParseFilterExpr / FilterExprString mini filter language
|
||||
│ ├── client.go # gRPC dial helper
|
||||
│ ├── sparkline.go # TrendPoints → inline SVG polyline
|
||||
│ ├── format.go # fmtCount (space thousands separator)
|
||||
│ └── templates/
|
||||
│ ├── base.html # outer HTML shell, inline CSS, meta-refresh
|
||||
│ └── index.html # window tabs, group-by tabs, breadcrumb, table, footer
|
||||
└── cli/
|
||||
├── main.go # subcommand dispatch and usage
|
||||
├── flags.go # shared flags, parseTargets, buildFilter, parseWindow
|
||||
├── client.go # gRPC dial helper
|
||||
├── format.go # printTable, fmtCount, fmtTime, targetHeader
|
||||
├── cmd_topn.go # topn: concurrent fan-out, table + JSON output
|
||||
├── cmd_trend.go # trend: concurrent fan-out, table + JSON output
|
||||
├── cmd_stream.go # stream: multiplexed streams, auto-reconnect
|
||||
└── cmd_targets.go # targets: list collectors known to the endpoint
|
||||
```
|
||||
|
||||
## Data Model
|
||||
|
||||
The core unit is a **count keyed by six dimensions**:
|
||||
|
||||
| Field | Description | Example |
|
||||
|-------------------|------------------------------------------------------|-------------------|
|
||||
| `website` | nginx `$host` | `www.example.com` |
|
||||
| `client_prefix` | client IP truncated to /24 IPv4 or /48 IPv6 | `1.2.3.0/24` |
|
||||
| `http_request_uri`| `$request_uri` path only — query string stripped | `/api/v1/search` |
|
||||
| `http_response` | HTTP status code | `429` |
|
||||
| `is_tor` | whether the client IP is a TOR exit node | `1` |
|
||||
| `asn` | client AS number (MaxMind GeoIP2, 32-bit int) | `8298` |
|
||||
|
||||
## Time Windows & Tiered Ring Buffers
|
||||
|
||||
Two ring buffers at different resolutions cover all query windows up to 24 hours:
|
||||
|
||||
| Tier | Bucket size | Buckets | Top-K/bucket | Covers | Roll-up trigger |
|
||||
|--------|-------------|---------|--------------|--------|---------------------|
|
||||
| Fine | 1 min | 60 | 50 000 | 1 h | every minute |
|
||||
| Coarse | 5 min | 288 | 5 000 | 24 h | every 5 fine ticks |
|
||||
|
||||
Supported query windows and which tier they read from:
|
||||
|
||||
| Window | Tier | Buckets summed |
|
||||
|--------|--------|----------------|
|
||||
| 1 min | fine | last 1 |
|
||||
| 5 min | fine | last 5 |
|
||||
| 15 min | fine | last 15 |
|
||||
| 60 min | fine | all 60 |
|
||||
| 6 h | coarse | last 72 |
|
||||
| 24 h | coarse | all 288 |
|
||||
|
||||
Every minute: snapshot live map → top-50K → append to fine ring, reset live map.
|
||||
Every 5 minutes: merge last 5 fine snapshots → top-5K → append to coarse ring.
|
||||
|
||||
## Memory Budget (Collector, target ≤ 1 GB)
|
||||
|
||||
Entry size: ~30 B website + ~15 B prefix + ~50 B URI + 3 B status + 1 B is_tor + 4 B asn + 8 B count + ~80 B Go map
|
||||
overhead ≈ **~191 bytes per entry**.
|
||||
|
||||
| Structure | Entries | Size |
|
||||
|-------------------------|-------------|-------------|
|
||||
| Live map (capped) | 100 000 | ~19 MB |
|
||||
| Fine ring (60 × 1-min) | 60 × 50 000 | ~558 MB |
|
||||
| Coarse ring (288 × 5-min)| 288 × 5 000| ~268 MB |
|
||||
| **Total** | | **~845 MB** |
|
||||
|
||||
The live map is **hard-capped at 100 K entries**. Once full, only updates to existing keys are
|
||||
accepted; new keys are dropped until the next rotation resets the map. This keeps memory bounded
|
||||
regardless of attack cardinality.
|
||||
|
||||
## Future Work — ClickHouse Export (post-MVP)
|
||||
|
||||
> **Do not implement until the end-to-end MVP is running.**
|
||||
|
||||
The aggregator will optionally write 1-minute pre-aggregated rows to ClickHouse for 7d/30d
|
||||
historical views. Schema sketch:
|
||||
|
||||
```sql
|
||||
CREATE TABLE logtail (
|
||||
ts DateTime,
|
||||
website LowCardinality(String),
|
||||
client_prefix String,
|
||||
request_uri LowCardinality(String),
|
||||
status UInt16,
|
||||
count UInt64
|
||||
) ENGINE = SummingMergeTree(count)
|
||||
PARTITION BY toYYYYMMDD(ts)
|
||||
ORDER BY (ts, website, status, client_prefix, request_uri);
|
||||
```
|
||||
|
||||
The frontend routes `window=7d|30d` queries to ClickHouse; all shorter windows continue to use
|
||||
the in-memory cache. Kafka is not needed — the aggregator writes directly. This is purely additive
|
||||
and does not change any existing interface.
|
||||
|
||||
## Protobuf API (`proto/logtail.proto`)
|
||||
|
||||
```protobuf
|
||||
enum TorFilter { TOR_ANY = 0; TOR_YES = 1; TOR_NO = 2; }
|
||||
enum StatusOp { EQ = 0; NE = 1; GT = 2; GE = 3; LT = 4; LE = 5; }
|
||||
|
||||
message Filter {
|
||||
optional string website = 1;
|
||||
optional string client_prefix = 2;
|
||||
optional string http_request_uri = 3;
|
||||
optional int32 http_response = 4;
|
||||
StatusOp status_op = 5; // comparison operator for http_response
|
||||
optional string website_regex = 6; // RE2 regex against website
|
||||
optional string uri_regex = 7; // RE2 regex against http_request_uri
|
||||
TorFilter tor = 8; // TOR_ANY (default) / TOR_YES / TOR_NO
|
||||
optional int32 asn_number = 9; // filter by client ASN
|
||||
StatusOp asn_op = 10; // comparison operator for asn_number
|
||||
}
|
||||
|
||||
enum GroupBy { WEBSITE = 0; CLIENT_PREFIX = 1; REQUEST_URI = 2; HTTP_RESPONSE = 3; ASN_NUMBER = 4; }
|
||||
enum Window { W1M = 0; W5M = 1; W15M = 2; W60M = 3; W6H = 4; W24H = 5; }
|
||||
|
||||
message TopNRequest { Filter filter = 1; GroupBy group_by = 2; int32 n = 3; Window window = 4; }
|
||||
message TopNEntry { string label = 1; int64 count = 2; }
|
||||
message TopNResponse { repeated TopNEntry entries = 1; string source = 2; }
|
||||
|
||||
// Trend: one total count per minute (or 5-min) bucket, for sparklines
|
||||
message TrendRequest { Filter filter = 1; Window window = 4; }
|
||||
message TrendPoint { int64 timestamp_unix = 1; int64 count = 2; }
|
||||
message TrendResponse { repeated TrendPoint points = 1; string source = 2; }
|
||||
|
||||
// Streaming: collector pushes a fine snapshot after every minute rotation
|
||||
message SnapshotRequest {}
|
||||
message Snapshot {
|
||||
string source = 1;
|
||||
int64 timestamp = 2;
|
||||
repeated TopNEntry entries = 3; // full top-50K for this bucket
|
||||
bool is_coarse = 4; // true for 5-min coarse buckets (DumpSnapshots only)
|
||||
}
|
||||
|
||||
// Target discovery: list the collectors behind the queried endpoint
|
||||
message ListTargetsRequest {}
|
||||
message TargetInfo {
|
||||
string name = 1; // display name (--source value from the collector)
|
||||
string addr = 2; // gRPC address; empty string means "this endpoint itself"
|
||||
}
|
||||
message ListTargetsResponse { repeated TargetInfo targets = 1; }
|
||||
|
||||
// Backfill: dump full ring buffer contents for aggregator restart recovery
|
||||
message DumpSnapshotsRequest {}
|
||||
// Response reuses Snapshot; is_coarse distinguishes fine (1-min) from coarse (5-min) buckets.
|
||||
// Stream closes after all historical data is sent (unlike StreamSnapshots which stays open).
|
||||
|
||||
service LogtailService {
|
||||
rpc TopN(TopNRequest) returns (TopNResponse);
|
||||
rpc Trend(TrendRequest) returns (TrendResponse);
|
||||
rpc StreamSnapshots(SnapshotRequest) returns (stream Snapshot);
|
||||
rpc ListTargets(ListTargetsRequest) returns (ListTargetsResponse);
|
||||
rpc DumpSnapshots(DumpSnapshotsRequest) returns (stream Snapshot);
|
||||
}
|
||||
// Both collector and aggregator implement LogtailService.
|
||||
// The aggregator's StreamSnapshots re-streams the merged view.
|
||||
// ListTargets: aggregator returns all configured collectors; collector returns itself.
|
||||
// DumpSnapshots: collector only; aggregator calls this on startup to backfill its ring.
|
||||
```
|
||||
|
||||
## Program 1 — Collector
|
||||
|
||||
### tailer.go
|
||||
- **`MultiTailer`**: one shared `fsnotify.Watcher` for all files regardless of count — avoids
|
||||
the inotify instance limit when tailing hundreds of files.
|
||||
- On `WRITE` event: read all new lines from that file's `bufio.Reader`.
|
||||
- On `RENAME`/`REMOVE` (logrotate): drain old fd to EOF, close, start retry-open goroutine with
|
||||
exponential backoff. Sends the new `*os.File` back via a channel to keep map access single-threaded.
|
||||
- Emits `LogRecord` structs on a shared buffered channel (capacity 200 K — absorbs ~20 s of peak).
|
||||
- Accepts paths via `--logs` (comma-separated or glob) and `--logs-file` (one path/glob per line).
|
||||
|
||||
### parser.go
|
||||
- Parses the fixed **logtail** nginx log format — tab-separated, fixed field order, no quoting:
|
||||
|
||||
```nginx
|
||||
log_format logtail '$host\t$remote_addr\t$msec\t$request_method\t$request_uri\t$status\t$body_bytes_sent\t$request_time\t$is_tor\t$asn';
|
||||
```
|
||||
|
||||
| # | Field | Used for |
|
||||
|---|-------------------|------------------|
|
||||
| 0 | `$host` | website |
|
||||
| 1 | `$remote_addr` | client_prefix |
|
||||
| 2 | `$msec` | (discarded) |
|
||||
| 3 | `$request_method` | (discarded) |
|
||||
| 4 | `$request_uri` | http_request_uri |
|
||||
| 5 | `$status` | http_response |
|
||||
| 6 | `$body_bytes_sent`| (discarded) |
|
||||
| 7 | `$request_time` | (discarded) |
|
||||
| 8 | `$is_tor` | is_tor |
|
||||
| 9 | `$asn` | asn |
|
||||
|
||||
- `strings.SplitN(line, "\t", 10)` — ~50 ns/line. No regex.
|
||||
- `$request_uri`: query string discarded at first `?`.
|
||||
- `$remote_addr`: truncated to /24 (IPv4) or /48 (IPv6); prefix lengths configurable via flags.
|
||||
- `$is_tor`: `1` if the client IP is a TOR exit node, `0` otherwise. Field is optional — lines
|
||||
with exactly 8 fields (old format) are accepted and default to `is_tor=false`.
|
||||
- `$asn`: client AS number as a decimal integer (from MaxMind GeoIP2). Field is optional —
|
||||
lines without it default to `asn=0`.
|
||||
- Lines with fewer than 8 fields are silently skipped.
|
||||
|
||||
### store.go
|
||||
- **Single aggregator goroutine** reads from the channel and updates the live map — no locking on
|
||||
the hot path. At 10 K lines/s the goroutine uses <1% CPU.
|
||||
- Live map: `map[Tuple6]int64`, hard-capped at 100 K entries (new keys dropped when full).
|
||||
- **Minute ticker**: heap-selects top-50K entries, writes snapshot to fine ring, resets live map.
|
||||
- Every 5 fine ticks: merge last 5 fine snapshots → top-5K → write to coarse ring.
|
||||
- **TopN query**: RLock ring, sum bucket range, apply filter, group by dimension, heap-select top N.
|
||||
- **Trend query**: per-bucket filtered sum, returns one `TrendPoint` per bucket.
|
||||
- **Subscriber fan-out**: per-subscriber buffered channel; `Subscribe`/`Unsubscribe` for streaming.
|
||||
- **`DumpRings()`**: acquires `RLock`, copies both ring arrays and their head/filled pointers
|
||||
(just slice headers — microseconds), releases lock, then returns chronologically-ordered fine
|
||||
and coarse snapshot slices. The lock is never held during serialisation or network I/O.
|
||||
|
||||
### server.go
|
||||
- gRPC server on configurable port (default `:9090`).
|
||||
- `TopN` and `Trend`: unary, answered from the ring buffer under RLock.
|
||||
- `StreamSnapshots`: registers a subscriber channel; loops `Recv` on it; 30 s keepalive ticker.
|
||||
- `DumpSnapshots`: calls `DumpRings()`, streams all fine buckets (`is_coarse=false`) then all
|
||||
coarse buckets (`is_coarse=true`), then closes the stream. No lock held during streaming.
|
||||
|
||||
## Program 2 — Aggregator
|
||||
|
||||
### subscriber.go
|
||||
- One goroutine per collector. Dials, calls `StreamSnapshots`, forwards each `Snapshot` to the
|
||||
merger.
|
||||
- Reconnects with exponential backoff (100 ms → doubles → cap 30 s).
|
||||
- After 3 consecutive failures: calls `merger.Zero(addr)` to remove that collector's contribution
|
||||
from the merged view (prevents stale counts accumulating during outages).
|
||||
- Resets failure count on first successful `Recv`; logs recovery.
|
||||
|
||||
### merger.go
|
||||
- **Delta strategy**: on each new snapshot from collector X, subtract X's previous entries from
|
||||
`merged`, add the new entries, store new map. O(snapshot_size) per update — not
|
||||
O(N_collectors × snapshot_size).
|
||||
- `Zero(addr)`: subtracts the collector's last-known contribution and deletes its entry — called
|
||||
when a collector is marked degraded.
|
||||
|
||||
### cache.go
|
||||
- **Tick-based rotation** (1-min ticker, not snapshot-triggered): keeps the aggregator ring aligned
|
||||
to the same 1-minute cadence as collectors regardless of how many collectors are connected.
|
||||
- Same tiered ring structure as the collector store; populated from `merger.TopK()` each tick.
|
||||
- `QueryTopN`, `QueryTrend`, `Subscribe`/`Unsubscribe` — identical interface to collector store.
|
||||
- **`LoadHistorical(fine, coarse []Snapshot)`**: writes pre-merged backfill snapshots directly into
|
||||
the ring arrays under `mu.Lock()`, sets head and filled counters, then returns. Safe to call
|
||||
concurrently with queries. The live ticker continues from the updated head after this returns.
|
||||
|
||||
### backfill.go
|
||||
- **`Backfill(ctx, collectorAddrs, cache)`**: called once at aggregator startup (in a goroutine,
|
||||
after the gRPC server is already listening so the frontend is never blocked).
|
||||
- Dials all collectors concurrently and calls `DumpSnapshots` on each.
|
||||
- Accumulates entries per timestamp in `map[unix-second]map[label]count`; multiple collectors'
|
||||
contributions for the same bucket are summed — the same delta-merge semantics as the live path.
|
||||
- Sorts timestamps chronologically, runs `TopKFromMap` per bucket, caps to ring size.
|
||||
- Calls `cache.LoadHistorical` once with the merged results.
|
||||
- **Graceful degradation**: if a collector returns `Unimplemented` (old binary without
|
||||
`DumpSnapshots`), logs an informational message and skips it — live streaming still starts
|
||||
normally. Any other error is logged with timing and also skipped. Partial backfill (some
|
||||
collectors succeed, some fail) is supported.
|
||||
- Logs per-collector stats: bucket counts, total entry counts, and wall-clock duration.
|
||||
|
||||
### registry.go
|
||||
- **`TargetRegistry`**: `sync.RWMutex`-protected `map[addr → name]`. Initialised with the
|
||||
configured collector addresses; display names are updated from the `source` field of the first
|
||||
snapshot received from each collector.
|
||||
- `Targets()` returns a stable sorted slice of `{name, addr}` pairs for `ListTargets` responses.
|
||||
|
||||
### server.go
|
||||
- Implements `LogtailService` backed by the cache (not live fan-out).
|
||||
- `StreamSnapshots` re-streams merged fine snapshots; usable by a second-tier aggregator or
|
||||
monitoring system.
|
||||
- `ListTargets` returns the current `TargetRegistry` contents — all configured collectors with
|
||||
their display names and gRPC addresses.
|
||||
|
||||
## Program 3 — Frontend
|
||||
|
||||
### handler.go
|
||||
- All filter state in the **URL query string**: `w` (window), `by` (group_by), `f_website`,
|
||||
`f_prefix`, `f_uri`, `f_status`, `f_website_re`, `f_uri_re`, `f_is_tor`, `f_asn`, `n`, `target`. No
|
||||
server-side session — URLs are shareable and bookmarkable; multiple operators see independent views.
|
||||
- **Filter expression box**: a `q=` parameter carries a mini filter language
|
||||
(`status>=400 AND website~=gouda.* AND uri~=^/api/`). On submission the handler parses it
|
||||
via `ParseFilterExpr` and redirects to the canonical URL with individual `f_*` params; `q=`
|
||||
never appears in the final URL. Parse errors re-render the current page with an inline message.
|
||||
- **Status expressions**: `f_status` accepts `200`, `!=200`, `>=400`, `<500`, etc. — parsed by
|
||||
`store.ParseStatusExpr` into `(value, StatusOp)` for the filter protobuf.
|
||||
- **ASN expressions**: `f_asn` accepts the same expression syntax (`12345`, `!=65000`, `>=1000`,
|
||||
`<64512`, etc.) — also parsed by `store.ParseStatusExpr`, stored as `(asn_number, AsnOp)` in the
|
||||
filter protobuf.
|
||||
- **Regex filters**: `f_website_re` and `f_uri_re` hold RE2 patterns; compiled once per request
|
||||
into `store.CompiledFilter` before the query-loop iteration. Invalid regexes match nothing.
|
||||
- `TopN`, `Trend`, and `ListTargets` RPCs issued **concurrently** (all with a 5 s deadline); page
|
||||
renders with whatever completes. Trend failure suppresses the sparkline; `ListTargets` failure
|
||||
hides the source picker — both are non-fatal.
|
||||
- **Source picker**: `ListTargets` result drives a `source:` tab row. Clicking a collector tab
|
||||
sets `target=` to that collector's address, querying it directly. The "all" tab resets to the
|
||||
default aggregator. Picker is hidden when `ListTargets` returns ≤0 collectors (direct collector
|
||||
mode).
|
||||
- **Drilldown**: clicking a table row adds the current dimension's filter and advances `by` through
|
||||
`website → prefix → uri → status → asn → website` (cycles).
|
||||
- **`raw=1`**: returns the TopN result as JSON — same URL, no CLI needed for scripting.
|
||||
- **`target=` override**: per-request gRPC endpoint override for comparing sources.
|
||||
- Error pages render at HTTP 502 with the window/group-by tabs still functional.
|
||||
|
||||
### sparkline.go
|
||||
- `renderSparkline([]*pb.TrendPoint) template.HTML` — fixed `viewBox="0 0 300 60"` SVG,
|
||||
Y-scaled to max count, rendered as `<polyline>`. Returns `""` for fewer than 2 points or
|
||||
all-zero data.
|
||||
|
||||
### templates/
|
||||
- `base.html`: outer shell, inline CSS (~40 lines), conditional `<meta http-equiv="refresh">`.
|
||||
- `index.html`: window tabs, group-by tabs, filter breadcrumb with `×` remove links, sparkline,
|
||||
TopN table with `<meter>` bars (% relative to rank-1), footer with source and refresh info.
|
||||
- No external CSS, no web fonts, no JavaScript. Renders in w3m/lynx.
|
||||
|
||||
## Program 4 — CLI
|
||||
|
||||
### Subcommands
|
||||
|
||||
```
|
||||
logtail-cli topn [flags] ranked label → count table (exits after one response)
|
||||
logtail-cli trend [flags] per-bucket time series (exits after one response)
|
||||
logtail-cli stream [flags] live snapshot feed (runs until Ctrl-C, auto-reconnects)
|
||||
logtail-cli targets [flags] list targets known to the queried endpoint
|
||||
```
|
||||
|
||||
### Flags
|
||||
|
||||
**Shared** (all subcommands):
|
||||
|
||||
| Flag | Default | Description |
|
||||
|---------------|------------------|----------------------------------------------------------|
|
||||
| `--target` | `localhost:9090` | Comma-separated `host:port` list; fan-out to all |
|
||||
| `--json` | false | Emit newline-delimited JSON instead of a table |
|
||||
| `--website` | — | Filter: website |
|
||||
| `--prefix` | — | Filter: client prefix |
|
||||
| `--uri` | — | Filter: request URI |
|
||||
| `--status` | — | Filter: HTTP status expression (`200`, `!=200`, `>=400`, `<500`, …) |
|
||||
| `--website-re`| — | Filter: RE2 regex against website |
|
||||
| `--uri-re` | — | Filter: RE2 regex against request URI |
|
||||
| `--is-tor` | — | Filter: TOR traffic (`1` or `!=0` = TOR only; `0` or `!=1` = non-TOR only) |
|
||||
| `--asn` | — | Filter: ASN expression (`12345`, `!=65000`, `>=1000`, `<64512`, …) |
|
||||
|
||||
**`topn` only**: `--n 10`, `--window 5m`, `--group-by website`
|
||||
|
||||
**`trend` only**: `--window 5m`
|
||||
|
||||
### Multi-target fan-out
|
||||
|
||||
`--target` accepts a comma-separated list. All targets are queried concurrently; results are
|
||||
printed in order with a per-target header. Single-target output omits the header for clean
|
||||
pipe-to-`jq` use.
|
||||
|
||||
### Output
|
||||
|
||||
Default: human-readable table with space-separated thousands (`18 432`).
|
||||
`--json`: a single JSON array (one object per target) for `topn` and `trend`; NDJSON for `stream` (unbounded).
|
||||
|
||||
`stream` reconnects automatically on error (5 s backoff). All other subcommands exit immediately
|
||||
with a non-zero code on gRPC error.
|
||||
|
||||
## Key Design Decisions
|
||||
|
||||
| Decision | Rationale |
|
||||
|----------|-----------|
|
||||
| Single aggregator goroutine in collector | Eliminates all map lock contention on the 10 K/s hot path |
|
||||
| Hard cap live map at 100 K entries | Bounds memory regardless of DDoS cardinality explosion |
|
||||
| Ring buffer of sorted snapshots (not raw maps) | TopN queries avoid re-sorting; merge is a single heap pass |
|
||||
| Push-based streaming (collector → aggregator) | Aggregator cache always fresh; query latency is cache-read only |
|
||||
| Delta merge in aggregator | O(snapshot_size) per update, not O(N_collectors × size) |
|
||||
| Tick-based cache rotation in aggregator | Ring stays on the same 1-min cadence regardless of collector count |
|
||||
| Degraded collector zeroing | Stale counts from failed collectors don't accumulate in the merged view |
|
||||
| Same `LogtailService` for collector and aggregator | CLI and frontend work with either; no special-casing |
|
||||
| `internal/store` shared package | ring-buffer, `Tuple6` encoding, and filter logic shared between collector and aggregator |
|
||||
| Filter state in URL, not session cookie | Multiple concurrent operators; shareable/bookmarkable URLs |
|
||||
| Query strings stripped at ingest | Major cardinality reduction; prevents URI explosion under attack |
|
||||
| No persistent storage | Simplicity; acceptable for ops dashboards (restart = lose history) |
|
||||
| Trusted internal network, no TLS | Reduces operational complexity; add a TLS proxy if needed later |
|
||||
| Server-side SVG sparklines, meta-refresh | Zero JS dependencies; works in terminal browsers and curl |
|
||||
| CLI default: human-readable table | Operator-friendly by default; `--json` opt-in for scripting |
|
||||
| CLI multi-target fan-out | Compare a collector vs. aggregator, or two collectors, in one command |
|
||||
| CLI uses stdlib `flag`, no framework | Four subcommands don't justify a dependency |
|
||||
| Status filter as expression string (`!=200`, `>=400`) | Operator-friendly; parsed once at query boundary, encoded as `(int32, StatusOp)` in proto |
|
||||
| ASN filter reuses `StatusOp` and `ParseStatusExpr` | Same 6-operator grammar as status; no duplicate enum or parser needed |
|
||||
| Regex filters compiled once per query (`CompiledFilter`) | Up to 288 × 5 000 per-entry calls — compiling per-entry would dominate query latency |
|
||||
| Filter expression box (`q=`) redirects to canonical URL | Filter state stays in individual `f_*` params; URLs remain shareable and bookmarkable |
|
||||
| `ListTargets` + frontend source picker | "Which nginx is busiest?" answered by switching `target=` to a collector; no data model changes, no extra memory |
|
||||
| Backfill via `DumpSnapshots` on restart | Aggregator recovers full 24h ring from collectors on restart; gRPC server starts first so frontend is never blocked during backfill |
|
||||
| `DumpRings()` copies under lock, streams without lock | Lock held for microseconds (slice-header copy only); network I/O happens outside the lock so minute rotation is never delayed |
|
||||
| Backfill merges per-timestamp across collectors | Correct cross-collector sums per bucket, same semantics as live delta-merge; collectors that don't support `DumpSnapshots` are skipped gracefully |
|
||||
@@ -1,250 +0,0 @@
|
||||
# Aggregator v0 — Implementation Plan
|
||||
|
||||
Module path: `git.ipng.ch/ipng/nginx-logtail`
|
||||
|
||||
**Scope:** A working aggregator that subscribes to `StreamSnapshots` from all configured
|
||||
collectors, maintains a merged in-memory cache, and serves the same `LogtailService` gRPC
|
||||
interface as the collector. Tolerates partial collector failures.
|
||||
|
||||
---
|
||||
|
||||
## Step 1 — Extract shared logic to `internal/store`
|
||||
|
||||
The aggregator's cache is structurally identical to the collector's store: same `Entry` and
|
||||
`snapshot` types, same tiered ring buffers, same heap-based top-K, same label encoding
|
||||
(`encodeTuple`, `labelTuple`), same `matchesFilter` and `dimensionLabel` functions.
|
||||
|
||||
Rather than duplicating ~200 lines of load-bearing code, extract these to a shared internal
|
||||
package before writing any aggregator code. Then refactor the collector to import it.
|
||||
|
||||
**New package: `internal/store`**
|
||||
|
||||
Move from `cmd/collector/store.go` into `internal/store/store.go`:
|
||||
- `Tuple4` struct
|
||||
- `Entry` struct
|
||||
- `snapshot` struct (unexported → exported: `Snapshot`)
|
||||
- `entryHeap` + heap interface methods
|
||||
- `encodeTuple`, `labelTuple`, `splitN`, `indexOf`
|
||||
- `matchesFilter`, `dimensionLabel`
|
||||
- `topKFromMap`, `topK`
|
||||
- `trendPoint`
|
||||
- `ringView`, `bucketsForWindow`
|
||||
- All ring-buffer constants (`fineRingSize`, `coarseRingSize`, `fineTopK`, `coarseTopK`,
|
||||
`coarseEvery`)
|
||||
|
||||
Keep in `cmd/collector/store.go` (collector-specific):
|
||||
- `liveMapCap`
|
||||
- `Store` struct (live map + ring buffers + subscriber fan-out + `Run` goroutine)
|
||||
- `ingest`, `rotate`, `mergeFineBuckets`
|
||||
- `QueryTopN`, `QueryTrend`, `Subscribe`, `Unsubscribe`, `broadcast`
|
||||
- The `Store` embeds the ring buffers using the types from `internal/store`
|
||||
|
||||
Collector tests must continue to pass unchanged after the refactor.
|
||||
|
||||
---
|
||||
|
||||
## Step 2 — subscriber.go
|
||||
|
||||
One goroutine per collector. Dials the collector, calls `StreamSnapshots`, and forwards each
|
||||
received `pb.Snapshot` to the merger. Reconnects with exponential backoff on any stream error.
|
||||
|
||||
```
|
||||
CollectorSub struct:
|
||||
addr string
|
||||
merger *Merger
|
||||
source string // filled from first snapshot received
|
||||
fails int // consecutive failures
|
||||
```
|
||||
|
||||
Lifecycle:
|
||||
1. `Dial(addr)` → `client.StreamSnapshots(ctx, &pb.SnapshotRequest{})`
|
||||
2. Loop: `stream.Recv()` → `merger.Apply(snap)`; on error: close, `fails++`
|
||||
3. If `fails >= 3`: call `merger.Zero(addr)`, log degraded warning
|
||||
4. Backoff sleep (100 ms → doubles → cap 30 s), then go to step 1
|
||||
5. On successful `Recv()` after degraded: `fails = 0`, log recovery
|
||||
|
||||
Context cancellation exits the goroutine cleanly.
|
||||
|
||||
---
|
||||
|
||||
## Step 3 — merger.go
|
||||
|
||||
Maintains the per-collector maps and a single running merged map. Uses a delta strategy:
|
||||
when a new snapshot arrives from collector X, subtract X's previous entries from `merged`,
|
||||
add the new entries, and replace X's stored map. This is O(snapshot_size) rather than
|
||||
O(N_collectors × snapshot_size).
|
||||
|
||||
```
|
||||
Merger struct:
|
||||
mu sync.Mutex
|
||||
perCollector map[string]map[string]int64 // addr → (label → count)
|
||||
merged map[string]int64 // label → total count across all collectors
|
||||
```
|
||||
|
||||
Methods:
|
||||
- `Apply(snap *pb.Snapshot)` — lock, subtract old, add new, store new, unlock
|
||||
- `Zero(addr string)` — lock, subtract perCollector[addr] from merged, delete entry, unlock
|
||||
- `TopK(k int) []store.Entry` — lock, call `store.TopKFromMap(merged, k)`, unlock
|
||||
|
||||
`Apply` is called from multiple subscriber goroutines concurrently — the mutex is the only
|
||||
synchronisation point. No channels needed here.
|
||||
|
||||
---
|
||||
|
||||
## Step 4 — cache.go
|
||||
|
||||
The aggregator's equivalent of the collector's `Store`. Holds the tiered ring buffers and
|
||||
answers `TopN`/`Trend`/`StreamSnapshots` queries. Populated by a 1-minute ticker that snapshots
|
||||
the current merged view from the merger.
|
||||
|
||||
```
|
||||
Cache struct:
|
||||
source string
|
||||
merger *Merger
|
||||
|
||||
mu sync.RWMutex
|
||||
fineRing [fineRingSize]store.Snapshot
|
||||
fineHead int
|
||||
fineFilled int
|
||||
coarseRing [coarseRingSize]store.Snapshot
|
||||
coarseHead int
|
||||
coarseFilled int
|
||||
fineTick int
|
||||
|
||||
subMu sync.Mutex
|
||||
subs map[chan store.Snapshot]struct{}
|
||||
```
|
||||
|
||||
`Run(ctx context.Context)`:
|
||||
- 1-minute ticker → `rotate(time.Now())`
|
||||
- `rotate`: `merger.TopK(fineTopK)` → fine ring slot; every 5 ticks → merge last 5 fine slots
|
||||
into coarse ring slot (identical logic to collector `Store.rotate`)
|
||||
- After writing: broadcast fine snapshot to subscribers
|
||||
|
||||
`QueryTopN`, `QueryTrend`, `Subscribe`, `Unsubscribe`, `broadcast`: identical to collector
|
||||
`Store`, backed by `internal/store` helpers.
|
||||
|
||||
**Why tick-based and not snapshot-triggered?**
|
||||
Collectors send snapshots roughly once per minute but not in sync. Triggering a ring write on
|
||||
every incoming snapshot would produce N writes per minute (one per collector), inflating the ring
|
||||
and misaligning time windows. A single ticker keeps the aggregator ring aligned with the same
|
||||
1-minute cadence as the collectors.
|
||||
|
||||
---
|
||||
|
||||
## Step 5 — server.go
|
||||
|
||||
Identical structure to `cmd/collector/server.go`. Implements `pb.LogtailServiceServer` backed by
|
||||
the `Cache` instead of the collector's `Store`. No new logic; just a different backing type.
|
||||
|
||||
`StreamSnapshots` sends merged fine snapshots (from `cache.Subscribe`) to downstream consumers
|
||||
(frontend, CLI, or a second-tier aggregator).
|
||||
|
||||
---
|
||||
|
||||
## Step 6 — main.go
|
||||
|
||||
Flags:
|
||||
|
||||
| Flag | Default | Description |
|
||||
|----------------|--------------|--------------------------------------------------------|
|
||||
| `--listen` | `:9091` | gRPC listen address |
|
||||
| `--collectors` | — | Comma-separated `host:port` addresses of collectors |
|
||||
| `--source` | hostname | Name for this aggregator in query responses |
|
||||
|
||||
Wire-up:
|
||||
1. Parse collector addresses
|
||||
2. Create `Merger`
|
||||
3. Create `Cache(merger, source)`
|
||||
4. Start `cache.Run(ctx)` goroutine (ticker + ring rotation)
|
||||
5. Start one `CollectorSub.Run(ctx)` goroutine per collector address
|
||||
6. Start gRPC server
|
||||
7. `signal.NotifyContext` for clean shutdown on SIGINT/SIGTERM
|
||||
|
||||
---
|
||||
|
||||
## Step 7 — Tests
|
||||
|
||||
| Test | What it covers |
|
||||
|------|----------------|
|
||||
| `TestMergerApply` | Two collectors send snapshots; merged map sums correctly |
|
||||
| `TestMergerReplacement` | Second snapshot from same collector replaces first, not adds |
|
||||
| `TestMergerZero` | Marking a collector degraded removes its contribution from merged |
|
||||
| `TestMergerConcurrent` | `Apply` and `Zero` from concurrent goroutines; no race (run with `-race`) |
|
||||
| `TestCacheRotation` | After one ticker fire, fine ring has 1 entry with correct counts |
|
||||
| `TestCacheCoarseRing` | After 5 ticker fires, coarse ring has 1 entry |
|
||||
| `TestCacheQueryTopN` | TopN returns correct merged rankings |
|
||||
| `TestCacheQueryTrend` | Trend returns per-bucket sums oldest-first |
|
||||
| `TestCacheSubscribe` | Subscriber receives snapshot after each rotation |
|
||||
| `TestGRPCEndToEnd` | Two in-process fake collector servers; real aggregator dials them; TopN, Trend, StreamSnapshots verified |
|
||||
|
||||
All existing collector tests must continue to pass after the `internal/store` refactor.
|
||||
|
||||
---
|
||||
|
||||
## Step 8 — Smoke test
|
||||
|
||||
- Start two collector instances pointing at generated log files
|
||||
- Start the aggregator pointing at both
|
||||
- Use `grpcurl` to call `TopN` on the aggregator and confirm counts match the sum of the two
|
||||
individual collector `TopN` results
|
||||
- Kill one collector; confirm the aggregator continues serving and logs a degraded warning
|
||||
- Restart the killed collector; confirm the aggregator recovers and resumes merging
|
||||
|
||||
---
|
||||
|
||||
## ✓ COMPLETE — Implementation notes
|
||||
|
||||
### Deviations from the plan
|
||||
|
||||
- **`TestMergerZeroNonexistent` added**: Plan listed 10 tests; an extra test was added to cover
|
||||
`Zero()` on a source that never sent a snapshot (should be a no-op). Total: 13 tests.
|
||||
- **`TestDegradedCollector` in end-to-end section**: Rather than a separate block, degraded
|
||||
behaviour is tested with one real fake collector + one unreachable port in the same test file.
|
||||
- **Race in `TestGRPCEndToEnd`**: The `cache.rotate()` call to trigger a broadcast needed a
|
||||
50 ms sleep after `client.StreamSnapshots()` to allow the server goroutine to register its
|
||||
subscriber before the broadcast fired. Without it the test was intermittently flaky under
|
||||
the race detector and parallel test runs.
|
||||
- **`source` field not stored on `CollectorSub`**: Plan mentioned storing `source` from the first
|
||||
snapshot, but `Apply` uses `snap.Source` directly (keying `perCollector` by address). The
|
||||
`source` field was not needed on the struct.
|
||||
|
||||
### Test results
|
||||
|
||||
```
|
||||
$ go test ./... -count=1 -race -timeout 60s
|
||||
ok git.ipng.ch/ipng/nginx-logtail/cmd/aggregator 4.1s
|
||||
ok git.ipng.ch/ipng/nginx-logtail/cmd/collector 9.7s
|
||||
```
|
||||
|
||||
All 13 aggregator tests and all 17 collector tests pass with `-race`.
|
||||
|
||||
### Test inventory
|
||||
|
||||
| Test | Package | What it covers |
|
||||
|------|---------|----------------|
|
||||
| `TestMergerApply` | aggregator | Two collectors sum correctly |
|
||||
| `TestMergerReplacement` | aggregator | Second snapshot replaces, not adds |
|
||||
| `TestMergerZero` | aggregator | Degraded collector removed from merged |
|
||||
| `TestMergerZeroNonexistent` | aggregator | Zero on unknown source is a no-op |
|
||||
| `TestMergerConcurrent` | aggregator | Apply + Zero from concurrent goroutines; -race |
|
||||
| `TestCacheRotation` | aggregator | Fine ring written after one ticker fire |
|
||||
| `TestCacheCoarseRing` | aggregator | Coarse ring written after 5 ticker fires |
|
||||
| `TestCacheQueryTopN` | aggregator | TopN returns correct merged rankings |
|
||||
| `TestCacheQueryTopNWithFilter` | aggregator | TopN with website filter |
|
||||
| `TestCacheQueryTrend` | aggregator | Trend per-bucket sums oldest-first |
|
||||
| `TestCacheSubscribe` | aggregator | Subscriber receives snapshot on rotation |
|
||||
| `TestGRPCEndToEnd` | aggregator | Two fake collectors; real gRPC TopN/Trend/Stream |
|
||||
| `TestDegradedCollector` | aggregator | Bad address zeroed; good collector still visible |
|
||||
|
||||
---
|
||||
|
||||
## Deferred (not in v0)
|
||||
|
||||
- Per-source (busiest nginx) breakdown — requires adding `SOURCE` to the `GroupBy` proto enum
|
||||
and encoding the source into the merged snapshot entries; deferred until the proto is stable
|
||||
- `cmd/cli` — covered in PLAN_CLI.md
|
||||
- `cmd/frontend` — covered in PLAN_FRONTEND.md
|
||||
- ClickHouse export
|
||||
- TLS / auth
|
||||
- Prometheus metrics endpoint
|
||||
293
docs/PLAN_CLI.md
293
docs/PLAN_CLI.md
@@ -1,293 +0,0 @@
|
||||
# CLI v0 — Implementation Plan
|
||||
|
||||
Module path: `git.ipng.ch/ipng/nginx-logtail`
|
||||
|
||||
**Scope:** A shell-facing debug tool that can query any number of collectors or aggregators
|
||||
(they share the same `LogtailService` gRPC interface) and print results in a human-readable
|
||||
table or JSON. Supports all three RPCs: `TopN`, `Trend`, and `StreamSnapshots`.
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Single binary `logtail-cli` with three subcommands:
|
||||
|
||||
```
|
||||
logtail-cli topn [flags] # ranked list of label → count
|
||||
logtail-cli trend [flags] # per-bucket time series
|
||||
logtail-cli stream [flags] # live snapshot feed
|
||||
```
|
||||
|
||||
All subcommands accept one or more `--target` addresses. Requests are fanned out
|
||||
concurrently; each target's results are printed under a labeled header. With a single
|
||||
target the header is omitted for clean pipe-friendly output.
|
||||
|
||||
---
|
||||
|
||||
## Step 1 — main.go and subcommand dispatch
|
||||
|
||||
No third-party CLI frameworks — plain `os.Args` subcommand dispatch, each subcommand
|
||||
registers its own `flag.FlagSet`.
|
||||
|
||||
```
|
||||
main():
|
||||
if len(os.Args) < 2 → print usage, exit 1
|
||||
switch os.Args[1]:
|
||||
"topn" → runTopN(os.Args[2:])
|
||||
"trend" → runTrend(os.Args[2:])
|
||||
"stream" → runStream(os.Args[2:])
|
||||
default → print usage, exit 1
|
||||
```
|
||||
|
||||
Usage text lists all subcommands and their flags.
|
||||
|
||||
---
|
||||
|
||||
## Step 2 — Shared flags and client helper (`flags.go`, `client.go`)
|
||||
|
||||
**Shared flags** (parsed by each subcommand's FlagSet):
|
||||
|
||||
| Flag | Default | Description |
|
||||
|------|---------|-------------|
|
||||
| `--target` | `localhost:9090` | Comma-separated `host:port` list (may be repeated) |
|
||||
| `--json` | false | Emit newline-delimited JSON instead of a table |
|
||||
| `--website` | — | Filter: exact website match |
|
||||
| `--prefix` | — | Filter: exact client prefix match |
|
||||
| `--uri` | — | Filter: exact URI match |
|
||||
| `--status` | — | Filter: exact HTTP status match |
|
||||
|
||||
`parseTargets(s string) []string` — split on comma, trim spaces, deduplicate.
|
||||
|
||||
`buildFilter(flags) *pb.Filter` — returns nil if no filter flags set (signals "no filter"
|
||||
to the server), otherwise populates the proto fields.
|
||||
|
||||
**`client.go`**:
|
||||
|
||||
```go
|
||||
func dial(addr string) (*grpc.ClientConn, pb.LogtailServiceClient, error)
|
||||
```
|
||||
|
||||
Plain insecure dial (matching the servers' plain-TCP listener). Returns an error rather
|
||||
than calling `log.Fatal` so callers can report which target failed without killing the process.
|
||||
|
||||
---
|
||||
|
||||
## Step 3 — `topn` subcommand (`cmd_topn.go`)
|
||||
|
||||
Additional flags:
|
||||
|
||||
| Flag | Default | Description |
|
||||
|------|---------|-------------|
|
||||
| `--n` | 10 | Number of entries to return |
|
||||
| `--window` | `5m` | Time window: `1m 5m 15m 60m 6h 24h` |
|
||||
| `--group-by` | `website` | Grouping: `website prefix uri status` |
|
||||
|
||||
`parseWindow(s string) pb.Window` — maps string → proto enum, exits on unknown value.
|
||||
`parseGroupBy(s string) pb.GroupBy` — same pattern.
|
||||
|
||||
Fan-out: one goroutine per target, each calls `TopN` with a 10 s context deadline,
|
||||
sends result (or error) on a typed result channel. Main goroutine collects all results
|
||||
in target order.
|
||||
|
||||
**Table output** (default):
|
||||
|
||||
```
|
||||
=== collector-1 (localhost:9090) ===
|
||||
RANK COUNT LABEL
|
||||
1 18 432 example.com
|
||||
2 4 211 other.com
|
||||
...
|
||||
|
||||
=== aggregator (localhost:9091) ===
|
||||
RANK COUNT LABEL
|
||||
1 22 643 example.com
|
||||
...
|
||||
```
|
||||
|
||||
Single-target: header omitted, plain table printed.
|
||||
|
||||
**JSON output** (`--json`): one JSON object per target, written sequentially to stdout:
|
||||
|
||||
```json
|
||||
{"source":"collector-1","target":"localhost:9090","entries":[{"label":"example.com","count":18432},...]}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 4 — `trend` subcommand (`cmd_trend.go`)
|
||||
|
||||
Additional flags:
|
||||
|
||||
| Flag | Default | Description |
|
||||
|------|---------|-------------|
|
||||
| `--window` | `5m` | Time window: `1m 5m 15m 60m 6h 24h` |
|
||||
|
||||
Same fan-out pattern as `topn`.
|
||||
|
||||
**Table output**:
|
||||
|
||||
```
|
||||
=== collector-1 (localhost:9090) ===
|
||||
TIME (UTC) COUNT
|
||||
2026-03-14 20:00 823
|
||||
2026-03-14 20:01 941
|
||||
...
|
||||
```
|
||||
|
||||
Points are printed oldest-first (as returned by the server).
|
||||
|
||||
**JSON output**: one object per target:
|
||||
|
||||
```json
|
||||
{"source":"col-1","target":"localhost:9090","points":[{"ts":1773516000,"count":823},...]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 5 — `stream` subcommand (`cmd_stream.go`)
|
||||
|
||||
No extra flags beyond shared ones. Each target gets one persistent `StreamSnapshots`
|
||||
connection. All streams are multiplexed onto a single output goroutine via an internal
|
||||
channel so lines from different targets don't interleave.
|
||||
|
||||
```
|
||||
type streamEvent struct {
|
||||
target string
|
||||
source string
|
||||
snap *pb.Snapshot
|
||||
err error
|
||||
}
|
||||
```
|
||||
|
||||
One goroutine per target: connect → loop `stream.Recv()` → send event on channel.
|
||||
On error: log to stderr, attempt reconnect after 5 s backoff (indefinitely, until
|
||||
`Ctrl-C`).
|
||||
|
||||
`signal.NotifyContext` on SIGINT/SIGTERM cancels all stream goroutines.
|
||||
|
||||
**Table output** (one line per snapshot received):
|
||||
|
||||
```
|
||||
2026-03-14 20:03:00 agg-test (localhost:9091) 950 entries top: example.com=18432
|
||||
```
|
||||
|
||||
**JSON output**: one JSON object per snapshot event:
|
||||
|
||||
```json
|
||||
{"ts":1773516180,"source":"agg-test","target":"localhost:9091","top_label":"example.com","top_count":18432,"total_entries":950}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 6 — Formatting helpers (`format.go`)
|
||||
|
||||
```go
|
||||
func printTable(w io.Writer, headers []string, rows [][]string)
|
||||
```
|
||||
|
||||
Right-aligns numeric columns (COUNT, RANK), left-aligns strings. Uses `text/tabwriter`
|
||||
with padding=2. No external dependencies.
|
||||
|
||||
```go
|
||||
func fmtCount(n int64) string // "18 432" — space as thousands separator
|
||||
func fmtTime(unix int64) string // "2026-03-14 20:03" UTC
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 7 — Tests (`cli_test.go`)
|
||||
|
||||
Unit tests run entirely in-process with fake gRPC servers (same pattern as
|
||||
`cmd/aggregator/aggregator_test.go`).
|
||||
|
||||
| Test | What it covers |
|
||||
|------|----------------|
|
||||
| `TestParseWindow` | All 6 window strings → correct proto enum; bad value exits |
|
||||
| `TestParseGroupBy` | All 4 group-by strings → correct proto enum; bad value exits |
|
||||
| `TestParseTargets` | Comma split, trim, dedup |
|
||||
| `TestBuildFilter` | All combinations of filter flags → correct proto Filter |
|
||||
| `TestTopNSingleTarget` | Fake server; `runTopN` output matches expected table |
|
||||
| `TestTopNMultiTarget` | Two fake servers; both headers present in output |
|
||||
| `TestTopNJSON` | `--json` flag; output is valid JSON with correct fields |
|
||||
| `TestTrendSingleTarget` | Fake server; points printed oldest-first |
|
||||
| `TestTrendJSON` | `--json` flag; output is valid JSON |
|
||||
| `TestStreamReceivesSnapshots` | Fake server sends 3 snapshots; output has 3 lines |
|
||||
| `TestFmtCount` | `fmtCount(18432)` → `"18 432"` |
|
||||
| `TestFmtTime` | `fmtTime(1773516000)` → `"2026-03-14 20:00"` |
|
||||
|
||||
---
|
||||
|
||||
## ✓ COMPLETE — Implementation notes
|
||||
|
||||
### Deviations from the plan
|
||||
|
||||
- **`TestFmtTime` uses `time.Date` not a hardcoded unix literal**: The hardcoded value
|
||||
`1773516000` turned out to be 2026-03-14 19:20 UTC, not 20:00. Fixed by computing the
|
||||
timestamp dynamically with `time.Date(2026, 3, 14, 20, 0, 0, 0, time.UTC).Unix()`.
|
||||
- **`TestTopNJSON` tests field values, not serialised bytes**: Calling `printTopNJSON` would
|
||||
require redirecting stdout. Instead the test verifies the response struct fields that the
|
||||
JSON formatter would use — simpler and equally effective.
|
||||
- **`streamTarget` reconnect loop lives in `cmd_stream.go`**, not a separate file. The stream
|
||||
and reconnect logic are short enough to colocate.
|
||||
|
||||
### Test results
|
||||
|
||||
```
|
||||
$ go test ./... -count=1 -race -timeout 60s
|
||||
ok git.ipng.ch/ipng/nginx-logtail/cmd/cli 1.0s (14 tests)
|
||||
ok git.ipng.ch/ipng/nginx-logtail/cmd/aggregator 4.1s (13 tests)
|
||||
ok git.ipng.ch/ipng/nginx-logtail/cmd/collector 9.9s (17 tests)
|
||||
```
|
||||
|
||||
### Test inventory
|
||||
|
||||
| Test | What it covers |
|
||||
|------|----------------|
|
||||
| `TestParseTargets` | Comma split, trim, deduplication |
|
||||
| `TestParseWindow` | All 6 window strings → correct proto enum |
|
||||
| `TestParseGroupBy` | All 4 group-by strings → correct proto enum |
|
||||
| `TestBuildFilter` | Filter fields set correctly from flags |
|
||||
| `TestBuildFilterNil` | Returns nil when no filter flags set |
|
||||
| `TestFmtCount` | Space-separated thousands: 1234567 → "1 234 567" |
|
||||
| `TestFmtTime` | Unix → "2026-03-14 20:00" UTC |
|
||||
| `TestTopNSingleTarget` | Fake server; correct entry count and top label |
|
||||
| `TestTopNMultiTarget` | Two fake servers; results ordered by target |
|
||||
| `TestTopNJSON` | Response fields match expected values for JSON |
|
||||
| `TestTrendSingleTarget` | Correct point count and ascending timestamp order |
|
||||
| `TestTrendJSON` | JSON round-trip preserves source, ts, count |
|
||||
| `TestStreamReceivesSnapshots` | 3 snapshots delivered from fake server via events channel |
|
||||
| `TestTargetHeader` | Single-target → empty; multi-target → labeled header |
|
||||
|
||||
---
|
||||
|
||||
## Step 8 — Smoke test
|
||||
|
||||
```bash
|
||||
# Start a collector
|
||||
./logtail-collector --listen :9090 --logs /var/log/nginx/access.log
|
||||
|
||||
# Start an aggregator
|
||||
./logtail-aggregator --listen :9091 --collectors localhost:9090
|
||||
|
||||
# Query TopN from both in one shot
|
||||
./logtail-cli topn --target localhost:9090,localhost:9091 --window 15m --n 5
|
||||
|
||||
# Stream live snapshots from both simultaneously
|
||||
./logtail-cli stream --target localhost:9090,localhost:9091
|
||||
|
||||
# Filter to one website, group by URI
|
||||
./logtail-cli topn --target localhost:9091 --website example.com --group-by uri --n 20
|
||||
|
||||
# JSON output for scripting
|
||||
./logtail-cli topn --target localhost:9091 --json | jq '.entries[0]'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Deferred (not in v0)
|
||||
|
||||
- `--format csv` — easy to add later if needed for spreadsheet export
|
||||
- `--count` / `--watch N` — repeat the query every N seconds (like `watch(1)`)
|
||||
- Color output (`--color`) — ANSI highlighting of top entries
|
||||
- Connecting to TLS-secured endpoints (when TLS is added to the servers)
|
||||
- Per-source breakdown (depends on `SOURCE` GroupBy being added to the proto)
|
||||
@@ -1,144 +0,0 @@
|
||||
# Collector v0 — Implementation Plan ✓ COMPLETE
|
||||
|
||||
Module path: `git.ipng.ch/ipng/nginx-logtail`
|
||||
|
||||
**Scope:** A working collector that tails files, aggregates into memory, and serves `TopN`,
|
||||
`Trend`, and `StreamSnapshots` over gRPC. Full vertical slice, no optimisation passes yet.
|
||||
|
||||
---
|
||||
|
||||
## Step 1 — Repo scaffolding
|
||||
- `go mod init git.ipng.ch/ipng/nginx-logtail`
|
||||
- `.gitignore`
|
||||
- Install deps: `google.golang.org/grpc`, `google.golang.org/protobuf`, `github.com/fsnotify/fsnotify`
|
||||
|
||||
## Step 2 — Proto (`proto/logtail.proto`)
|
||||
Write the full proto file as specified in README.md DESIGN § Protobuf API. Generate Go stubs with
|
||||
`protoc`. Commit generated files. This defines the contract everything else builds on.
|
||||
|
||||
## Step 3 — Parser (`cmd/collector/parser.go`)
|
||||
- `LogRecord` struct: `Website`, `ClientPrefix`, `URI`, `Status string`
|
||||
- `ParseLine(line string) (LogRecord, bool)` — `SplitN` on tab, discard query string at `?`,
|
||||
return `false` for lines with fewer than 8 fields
|
||||
- `TruncateIP(addr string, v4bits, v6bits int) string` — handle IPv4 and IPv6
|
||||
- Unit-tested with table-driven tests: normal line, short line, IPv6, query string stripping,
|
||||
/24 and /48 truncation
|
||||
|
||||
## Step 4 — Store (`cmd/collector/store.go`)
|
||||
Implement in order, each piece testable independently:
|
||||
|
||||
1. **`Tuple4` and live map** — `map[Tuple4]int64`, cap enforcement at 100K, `Ingest(r LogRecord)`
|
||||
2. **Fine ring buffer** — `[60]Snapshot` circular array, `rotate()` heap-selects top-50K from
|
||||
live map, appends to ring, resets live map
|
||||
3. **Coarse ring buffer** — `[288]Snapshot`, populated every 5 fine rotations by merging
|
||||
the last 5 fine snapshots into a top-5K snapshot
|
||||
4. **`QueryTopN(filter, groupBy, n, window)`** — RLock, sum bucket range, group by dimension,
|
||||
apply filter, heap-select top N
|
||||
5. **`QueryTrend(filter, window)`** — per-bucket count sum, returns one point per bucket
|
||||
6. **`Store.Run(ch <-chan LogRecord)`** — single goroutine: read channel → `Ingest`, minute
|
||||
ticker → `rotate()`
|
||||
7. **Snapshot broadcast** — per-subscriber buffered channel fan-out;
|
||||
`Subscribe() <-chan Snapshot` / `Unsubscribe(ch)`
|
||||
|
||||
## Step 5 — Tailer (`cmd/collector/tailer.go`)
|
||||
- `Tailer` struct: path, fsnotify watcher, output channel
|
||||
- On start: open file, seek to EOF, register fsnotify watch
|
||||
- On `fsnotify.Write`: `bufio.Scanner` reads all new lines, sends `LogRecord` to channel
|
||||
- On `fsnotify.Rename` / `Remove`: drain to EOF, close fd, retry open with 100 ms backoff
|
||||
(up to 5 s), resume from position 0 — no lines lost between drain and reopen
|
||||
- `Tailer.Run(ctx context.Context)` — blocks until context cancelled
|
||||
|
||||
## Step 6 — gRPC server (`cmd/collector/server.go`)
|
||||
- `Server` wraps `*Store`, implements `LogtailServiceServer`
|
||||
- `TopN`: `store.QueryTopN` → marshal to proto response
|
||||
- `Trend`: `store.QueryTrend` → marshal to proto response
|
||||
- `StreamSnapshots`: `store.Subscribe()`, loop sending snapshots until client disconnects
|
||||
or context done, then `store.Unsubscribe(ch)`
|
||||
|
||||
## Step 7 — Main (`cmd/collector/main.go`)
|
||||
Flags:
|
||||
- `--listen` default `:9090`
|
||||
- `--logs` comma-separated log file paths
|
||||
- `--source` name for this collector instance (default: hostname)
|
||||
- `--v4prefix` default `24`
|
||||
- `--v6prefix` default `48`
|
||||
|
||||
Wire-up: create channel → start `store.Run` goroutine → start one `Tailer` goroutine per log
|
||||
path → start gRPC server → `signal.NotifyContext` for clean shutdown on SIGINT/SIGTERM.
|
||||
|
||||
## Step 8 — Smoke test
|
||||
- Generate fake log lines at 10K/s (small Go script or shell one-liner)
|
||||
- Run collector against them
|
||||
- Use `grpcurl` to call `TopN` and verify results
|
||||
- Check `runtime.MemStats` to confirm memory stays well under 1 GB
|
||||
|
||||
---
|
||||
|
||||
## Deferred (not in v0)
|
||||
- `cmd/cli`, `cmd/aggregator`, `cmd/frontend`
|
||||
- ClickHouse export
|
||||
- TLS / auth
|
||||
- Prometheus metrics endpoint
|
||||
|
||||
---
|
||||
|
||||
## Implementation notes
|
||||
|
||||
### Deviation from plan: MultiTailer
|
||||
|
||||
Step 5 planned one `Tailer` struct per file. During implementation this was changed to a single
|
||||
`MultiTailer` with one shared `fsnotify.Watcher`. Reason: one watcher per file creates one inotify
|
||||
instance per file; the kernel default limit is 128 instances per user, which would be hit with
|
||||
100s of log files. The `MultiTailer` uses a single instance and routes events by path via a
|
||||
`map[string]*fileState`.
|
||||
|
||||
### Deviation from plan: IPv6 /48 semantics
|
||||
|
||||
The design doc said "truncate to /48". `/48` keeps the first three full 16-bit groups intact
|
||||
(e.g. `2001:db8:cafe::1` → `2001:db8:cafe::/48`). An early test expected `2001:db8:ca00::/48`
|
||||
(truncating mid-group), which was wrong. The code is correct; the test was fixed.
|
||||
|
||||
---
|
||||
|
||||
## Test results
|
||||
|
||||
Run with: `go test ./cmd/collector/ -v -count=1 -timeout 120s`
|
||||
|
||||
| Test | What it covers |
|
||||
|-----------------------------|----------------------------------------------------|
|
||||
| `TestParseLine` (7 cases) | Tab parsing, query string stripping, bad lines |
|
||||
| `TestTruncateIP` | IPv4 /24 and IPv6 /48 masking |
|
||||
| `TestIngestAndRotate` | Live map → fine ring rotation |
|
||||
| `TestLiveMapCap` | Hard cap at 100 K entries, no panic beyond cap |
|
||||
| `TestQueryTopN` | Ranked results from ring buffer |
|
||||
| `TestQueryTopNWithFilter` | Filter by HTTP status code |
|
||||
| `TestQueryTrend` | Per-bucket counts, oldest-first ordering |
|
||||
| `TestCoarseRingPopulated` | 5 fine ticks → 1 coarse bucket, count aggregation |
|
||||
| `TestSubscribeBroadcast` | Fan-out channel delivery after rotation |
|
||||
| `TestTopKOrdering` | Heap select returns correct top-K descending |
|
||||
| `TestMultiTailerReadsLines` | Live file write → LogRecord received on channel |
|
||||
| `TestMultiTailerMultipleFiles` | 5 files, one watcher, all lines received |
|
||||
| `TestMultiTailerLogRotation`| RENAME → drain → retry → new file tailed correctly |
|
||||
| `TestExpandGlobs` | Glob pattern expands to matching files only |
|
||||
| `TestExpandGlobsDeduplication` | Same file via path + glob deduplicated to one |
|
||||
| `TestMemoryBudget` | Full ring fill stays within 1 GB heap |
|
||||
| `TestGRPCEndToEnd` | Real gRPC server: TopN, filtered TopN, Trend, StreamSnapshots |
|
||||
|
||||
**Total: 17 tests, all passing.**
|
||||
|
||||
---
|
||||
|
||||
## Benchmark results
|
||||
|
||||
Run with: `go test ./cmd/collector/ -bench=. -benchtime=3s`
|
||||
|
||||
Hardware: 12th Gen Intel Core i7-12700T
|
||||
|
||||
| Benchmark | ns/op | throughput | headroom vs 10K/s |
|
||||
|--------------------|-------|----------------|-------------------|
|
||||
| `BenchmarkParseLine` | 418 | ~2.4M lines/s | 240× |
|
||||
| `BenchmarkIngest` | 152 | ~6.5M records/s| 650× |
|
||||
|
||||
Both the parser and the store ingestion goroutine have several hundred times more capacity than
|
||||
the 10 000 lines/second peak requirement. The bottleneck at scale will be fsnotify event delivery
|
||||
and kernel I/O, not the Go code.
|
||||
@@ -1,334 +0,0 @@
|
||||
# Frontend v0 — Implementation Plan
|
||||
|
||||
Module path: `git.ipng.ch/ipng/nginx-logtail`
|
||||
|
||||
**Scope:** An HTTP server that queries a collector or aggregator and renders a drilldown TopN
|
||||
dashboard with trend sparklines. Zero JavaScript. Filter state in the URL. Auto-refreshes every
|
||||
30 seconds. Works with any `LogtailService` endpoint (collector or aggregator).
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
Single page, multiple views driven entirely by URL query parameters:
|
||||
|
||||
```
|
||||
http://frontend:8080/?target=agg:9091&w=5m&by=website&f_status=429&n=25
|
||||
```
|
||||
|
||||
Clicking a table row drills down: it adds a filter for the clicked label and advances
|
||||
`by` to the next dimension in the hierarchy (`website → prefix → uri → status`). The
|
||||
breadcrumb strip shows all active filters; each token is a link that removes it.
|
||||
|
||||
---
|
||||
|
||||
## Step 1 — main.go
|
||||
|
||||
Flags:
|
||||
|
||||
| Flag | Default | Description |
|
||||
|------|---------|-------------|
|
||||
| `--listen` | `:8080` | HTTP listen address |
|
||||
| `--target` | `localhost:9091` | Default gRPC endpoint (aggregator or collector) |
|
||||
| `--n` | `25` | Default number of table rows |
|
||||
| `--refresh` | `30` | `<meta refresh>` interval in seconds; 0 to disable |
|
||||
|
||||
Wire-up:
|
||||
1. Parse flags
|
||||
2. Register `http.HandleFunc("/", handler)` (single handler, all state in URL)
|
||||
3. `http.ListenAndServe`
|
||||
4. `signal.NotifyContext` for clean shutdown on SIGINT/SIGTERM
|
||||
|
||||
---
|
||||
|
||||
## Step 2 — client.go
|
||||
|
||||
```go
|
||||
func dial(addr string) (*grpc.ClientConn, pb.LogtailServiceClient, error)
|
||||
```
|
||||
|
||||
Identical to the CLI version — plain insecure dial. A new connection is opened per HTTP
|
||||
request. At a 30-second page refresh rate this is negligible; pooling is not needed.
|
||||
|
||||
---
|
||||
|
||||
## Step 3 — handler.go
|
||||
|
||||
### URL parameters
|
||||
|
||||
| Param | Default | Values |
|
||||
|-------|---------|--------|
|
||||
| `target` | flag default | `host:port` |
|
||||
| `w` | `5m` | `1m 5m 15m 60m 6h 24h` |
|
||||
| `by` | `website` | `website prefix uri status` |
|
||||
| `n` | flag default | positive integer |
|
||||
| `f_website` | — | string |
|
||||
| `f_prefix` | — | string |
|
||||
| `f_uri` | — | string |
|
||||
| `f_status` | — | integer string |
|
||||
| `raw` | — | `1` → respond with JSON instead of HTML |
|
||||
|
||||
### Request flow
|
||||
|
||||
```
|
||||
parseURLParams(r) → QueryParams
|
||||
buildFilter(QueryParams) → *pb.Filter
|
||||
dial(target) → client
|
||||
concurrent:
|
||||
client.TopN(filter, groupBy, n, window) → TopNResponse
|
||||
client.Trend(filter, window) → TrendResponse
|
||||
renderSparkline(TrendResponse.Points) → template.HTML
|
||||
buildTableRows(TopNResponse, QueryParams) → []TableRow (includes drill-down URL per row)
|
||||
buildBreadcrumbs(QueryParams) → []Crumb
|
||||
execute template → w
|
||||
```
|
||||
|
||||
TopN and Trend RPCs are issued concurrently (both have a 5 s context deadline). If Trend
|
||||
fails, the sparkline is omitted silently rather than returning an error page.
|
||||
|
||||
### `raw=1` mode
|
||||
|
||||
Returns the TopN response as JSON (same format as the CLI's `--json`). Useful for scripting
|
||||
and `curl` without needing the CLI binary.
|
||||
|
||||
### Drill-down URL construction
|
||||
|
||||
Dimension advance hierarchy (for row-click links):
|
||||
|
||||
```
|
||||
website → CLIENT_PREFIX → REQUEST_URI → HTTP_RESPONSE → (no advance; all dims filtered)
|
||||
```
|
||||
|
||||
Row-click URL: take current params, add the filter for the current `by` dimension, and set
|
||||
`by` to the next dimension. If already on the last dimension (`status`), keep `by` unchanged.
|
||||
|
||||
### Types
|
||||
|
||||
```go
|
||||
type QueryParams struct {
|
||||
Target string
|
||||
Window pb.Window
|
||||
WindowS string // "5m" — for display
|
||||
GroupBy pb.GroupBy
|
||||
GroupByS string // "website" — for display
|
||||
N int
|
||||
Filter filterState
|
||||
}
|
||||
|
||||
type filterState struct {
|
||||
Website string
|
||||
Prefix string
|
||||
URI string
|
||||
Status string // string so empty means "unset"
|
||||
}
|
||||
|
||||
type TableRow struct {
|
||||
Rank int
|
||||
Label string
|
||||
Count int64
|
||||
Pct float64 // 0–100, relative to top entry
|
||||
DrillURL string // href for this row
|
||||
}
|
||||
|
||||
type Crumb struct {
|
||||
Text string // e.g. "website=example.com"
|
||||
RemoveURL string // current URL with this filter removed
|
||||
}
|
||||
|
||||
type PageData struct {
|
||||
Params QueryParams
|
||||
Source string
|
||||
Entries []TableRow
|
||||
TotalCount int64
|
||||
Sparkline template.HTML // "" if trend call failed
|
||||
Breadcrumbs []Crumb
|
||||
RefreshSecs int
|
||||
Error string // non-empty → show error banner, no table
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Step 4 — sparkline.go
|
||||
|
||||
```go
|
||||
func renderSparkline(points []*pb.TrendPoint) template.HTML
|
||||
```
|
||||
|
||||
- Fixed `viewBox="0 0 300 60"` SVG.
|
||||
- X axis: evenly-spaced buckets across 300 px.
|
||||
- Y axis: linear scale from 0 to max count, inverted (SVG y=0 is top).
|
||||
- Rendered as a `<polyline>` with `stroke` and `fill="none"`. Minimal inline style, no classes.
|
||||
- If `len(points) < 2`, returns `""` (no sparkline).
|
||||
- Returns `template.HTML` (already-escaped) so the template can emit it with `{{.Sparkline}}`.
|
||||
|
||||
---
|
||||
|
||||
## Step 5 — templates/
|
||||
|
||||
Two files, embedded with `//go:embed templates/*.html` and parsed once at startup.
|
||||
|
||||
### `templates/base.html` (define "base")
|
||||
|
||||
Outer HTML skeleton:
|
||||
- `<meta http-equiv="refresh" content="30">` (omitted if `RefreshSecs == 0`)
|
||||
- Minimal inline CSS: monospace font, max-width 1000px, table styling, breadcrumb strip
|
||||
- Yields a `{{template "content" .}}` block
|
||||
|
||||
No external CSS, no web fonts, no icons. Legible in a terminal browser (w3m, lynx).
|
||||
|
||||
### `templates/index.html` (define "content")
|
||||
|
||||
Sections in order:
|
||||
|
||||
**Window tabs** — `1m | 5m | 15m | 60m | 6h | 24h`; current window is bold/underlined;
|
||||
each is a link that swaps only `w=` in the URL.
|
||||
|
||||
**Group-by tabs** — `by website | by prefix | by uri | by status`; current group highlighted;
|
||||
links swap `by=`.
|
||||
|
||||
**Filter breadcrumb** — shown only when at least one filter is active:
|
||||
```
|
||||
Filters: [website=example.com ×] [status=429 ×]
|
||||
```
|
||||
Each `×` is a link to the URL without that filter.
|
||||
|
||||
**Error banner** — shown instead of table when `.Error` is non-empty.
|
||||
|
||||
**Trend sparkline** — the SVG returned by `renderSparkline`, inline. Labelled with window
|
||||
and source. Omitted when `.Sparkline == ""`.
|
||||
|
||||
**TopN table**:
|
||||
```
|
||||
RANK LABEL COUNT % TREND
|
||||
1 example.com 18 432 62 % ████████████
|
||||
2 other.com 4 211 14 % ████
|
||||
```
|
||||
- `LABEL` column is a link (`DrillURL`).
|
||||
- `%` is relative to the top entry (rank-1 always 100 %).
|
||||
- `TREND` bar is an inline `<meter value="N" max="100">` tag — renders as a native browser bar,
|
||||
degrades gracefully in text browsers to `N/100`.
|
||||
- Rows beyond rank 3 show the percentage bar only if it's > 5 %, to avoid noise.
|
||||
|
||||
**Footer** — "source: <source> queried <timestamp> refresh 30 s" — lets operators confirm
|
||||
which endpoint they're looking at.
|
||||
|
||||
---
|
||||
|
||||
## Step 6 — Tests (`frontend_test.go`)
|
||||
|
||||
In-process fake gRPC server (same pattern as aggregator and CLI tests).
|
||||
|
||||
| Test | What it covers |
|
||||
|------|----------------|
|
||||
| `TestParseQueryParams` | All URL params parsed correctly; defaults applied |
|
||||
| `TestParseQueryParamsInvalid` | Bad `n`, bad `w`, bad `f_status` → defaults or 400 |
|
||||
| `TestBuildFilterFromParams` | Populated filter; nil when nothing set |
|
||||
| `TestDrillURL` | website → prefix drill; prefix → uri drill; status → no advance |
|
||||
| `TestBuildCrumbs` | One crumb per active filter; remove-URL drops just that filter |
|
||||
| `TestRenderSparkline` | 5 points → valid SVG containing `<polyline`; 0 points → empty |
|
||||
| `TestHandlerTopN` | Fake server; GET / returns 200 with table rows in body |
|
||||
| `TestHandlerRaw` | `raw=1` returns JSON with correct entries |
|
||||
| `TestHandlerBadTarget` | Unreachable target → 502 with error message |
|
||||
| `TestHandlerFilter` | `f_website=x` passed through to fake server's received request |
|
||||
| `TestHandlerWindow` | `w=60m` → correct `pb.Window_W60M` in fake server's received request |
|
||||
| `TestPctBar` | `<meter` tag present in rendered HTML |
|
||||
| `TestBreadcrumbInHTML` | Filter crumb rendered; `×` link present |
|
||||
|
||||
---
|
||||
|
||||
## Step 7 — Smoke test
|
||||
|
||||
```bash
|
||||
# Start collector and aggregator (or use existing)
|
||||
./logtail-collector --listen :9090 --logs /var/log/nginx/access.log
|
||||
./logtail-aggregator --listen :9091 --collectors localhost:9090
|
||||
|
||||
# Start frontend
|
||||
./logtail-frontend --listen :8080 --target localhost:9091
|
||||
|
||||
# Open in browser or curl
|
||||
curl -s 'http://localhost:8080/' | grep '<tr'
|
||||
curl -s 'http://localhost:8080/?w=60m&by=prefix&f_status=200&raw=1' | jq '.entries[0]'
|
||||
|
||||
# Drill-down link check
|
||||
curl -s 'http://localhost:8080/' | grep 'f_website'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## ✓ COMPLETE — Implementation notes
|
||||
|
||||
### Files
|
||||
|
||||
| File | Role |
|
||||
|------|------|
|
||||
| `cmd/frontend/main.go` | Flags, template loading, HTTP server, graceful shutdown |
|
||||
| `cmd/frontend/client.go` | `dial()` — plain insecure gRPC, new connection per request |
|
||||
| `cmd/frontend/handler.go` | URL parsing, filter building, concurrent TopN+Trend fan-out, page data assembly |
|
||||
| `cmd/frontend/sparkline.go` | `renderSparkline()` — `[]*pb.TrendPoint` → inline `<svg><polyline>` |
|
||||
| `cmd/frontend/format.go` | `fmtCount()` — space-separated thousands, registered as template func |
|
||||
| `cmd/frontend/templates/base.html` | Outer HTML shell, inline CSS, meta-refresh |
|
||||
| `cmd/frontend/templates/index.html` | Window tabs, group-by tabs, breadcrumb, sparkline, table, footer |
|
||||
|
||||
### Deviations from the plan
|
||||
|
||||
- **`format.go` extracted**: `fmtCount` placed in its own file (not in `handler.go`) so it can
|
||||
be tested independently without loading the template.
|
||||
- **`TestDialFake` added**: sanity check for the fake gRPC infrastructure used by the other tests.
|
||||
- **`TestHandlerNoData` added**: verifies the "no data" message renders correctly when the server
|
||||
returns an empty entry list. Total tests: 23 (plan listed 13).
|
||||
- **`% relative to rank-1`** as planned; the `<meter max="100">` shows 100% for rank-1
|
||||
and proportional bars below. Rank-1 is always the visual baseline.
|
||||
- **`status → website` drill cycle**: clicking a row in the `by status` view adds `f_status`
|
||||
and resets `by=website` (cycles back to the start of the drilldown hierarchy).
|
||||
|
||||
### Test results
|
||||
|
||||
```
|
||||
$ go test ./... -count=1 -race -timeout 60s
|
||||
ok git.ipng.ch/ipng/nginx-logtail/cmd/frontend 1.1s (23 tests)
|
||||
ok git.ipng.ch/ipng/nginx-logtail/cmd/cli 1.0s (14 tests)
|
||||
ok git.ipng.ch/ipng/nginx-logtail/cmd/aggregator 4.1s (13 tests)
|
||||
ok git.ipng.ch/ipng/nginx-logtail/cmd/collector 9.7s (17 tests)
|
||||
```
|
||||
|
||||
### Test inventory
|
||||
|
||||
| Test | What it covers |
|
||||
|------|----------------|
|
||||
| `TestParseWindowString` | All 6 window strings + bad input → default |
|
||||
| `TestParseGroupByString` | All 4 group-by strings + bad input → default |
|
||||
| `TestParseQueryParams` | All URL params parsed correctly |
|
||||
| `TestParseQueryParamsDefaults` | Empty URL → handler defaults applied |
|
||||
| `TestBuildFilter` | Filter proto fields set from filterState |
|
||||
| `TestBuildFilterNil` | Returns nil when no filter set |
|
||||
| `TestDrillURL` | website→prefix, prefix→uri, status→website cycle |
|
||||
| `TestBuildCrumbs` | Correct text and remove-URLs for active filters |
|
||||
| `TestRenderSparkline` | 5 points → SVG with polyline |
|
||||
| `TestRenderSparklineTooFewPoints` | nil/1 point → empty string |
|
||||
| `TestRenderSparklineAllZero` | All-zero counts → empty string |
|
||||
| `TestFmtCount` | Space-thousands formatting |
|
||||
| `TestHandlerTopN` | Fake server; labels and formatted counts in HTML |
|
||||
| `TestHandlerRaw` | `raw=1` → JSON with source/window/group_by/entries |
|
||||
| `TestHandlerBadTarget` | Unreachable target → 502 + error message in body |
|
||||
| `TestHandlerFilterPassedToServer` | `f_website` + `f_status` reach gRPC filter |
|
||||
| `TestHandlerWindowPassedToServer` | `w=60m` → `pb.Window_W60M` in request |
|
||||
| `TestHandlerBreadcrumbInHTML` | Active filter renders crumb with × link |
|
||||
| `TestHandlerSparklineInHTML` | Trend points → `<svg><polyline>` in page |
|
||||
| `TestHandlerPctBar` | 100% for rank-1, 50% for half-count entry |
|
||||
| `TestHandlerWindowTabsInHTML` | All 6 window labels rendered as links |
|
||||
| `TestHandlerNoData` | Empty entry list → "no data" message |
|
||||
| `TestDialFake` | Test infrastructure sanity check |
|
||||
|
||||
---
|
||||
|
||||
## Deferred (not in v0)
|
||||
|
||||
- Dark mode (prefers-color-scheme media query)
|
||||
- Per-row mini sparklines (one Trend RPC per table row — expensive; need batching first)
|
||||
- WebSocket or SSE for live push instead of meta-refresh
|
||||
- Pagination for large N
|
||||
- `?format=csv` download
|
||||
- OIDC/basic-auth gating
|
||||
- ClickHouse-backed 7d/30d windows (tracked in README)
|
||||
608
docs/design.md
Normal file
608
docs/design.md
Normal file
@@ -0,0 +1,608 @@
|
||||
<!-- SPDX-License-Identifier: Apache-2.0 -->
|
||||
# nginx-logtail Design Document
|
||||
|
||||
## Metadata
|
||||
|
||||
| | |
|
||||
| --- | --- |
|
||||
| **Status** | Describes intended behavior as of `v0.2.0` |
|
||||
| **Author** | Pim van Pelt `<pim@ipng.ch>` |
|
||||
| **Last updated** | 2026-04-17 |
|
||||
| **Audience** | Operators and contributors running real-time traffic analysis and DDoS detection across a fleet of nginx hosts |
|
||||
|
||||
The key words **MUST**, **MUST NOT**, **SHOULD**, **SHOULD NOT**, and **MAY** are used as described in
|
||||
[RFC 2119](https://datatracker.ietf.org/doc/html/rfc2119), and are reserved in this document for requirements that are intended to be
|
||||
enforced in code or by an external dependency. Plain-language descriptions of what the system or an operator can do are written in
|
||||
lowercase — "can", "will", "does" — and should not be read as normative.
|
||||
|
||||
## Summary
|
||||
|
||||
`nginx-logtail` is a four-binary Go system for real-time analysis of nginx traffic across a fleet of hosts. Each nginx host runs a
|
||||
**collector** that ingests logs (from files via `fsnotify`, from a UDP socket, or both) and maintains in-memory ranked top-K counters
|
||||
across multiple time windows. A central **aggregator** subscribes to the collectors' snapshot streams and serves a merged view. An
|
||||
**HTTP frontend** renders a drilldown dashboard (server-rendered HTML, zero JavaScript). A **CLI** offers the same queries as a
|
||||
shell companion. All four programs speak a single gRPC service (`LogtailService`), so the frontend and CLI work against any collector
|
||||
or the aggregator interchangeably.
|
||||
|
||||
## Background
|
||||
|
||||
Operators running tens of nginx hosts behind a load balancer need a live, drilldown view of request traffic for DDoS detection and
|
||||
traffic analysis. Questions the system answers include:
|
||||
|
||||
- Which client prefix is causing the most HTTP 429s right now?
|
||||
- Which website is getting the most 503s over the last 24 hours?
|
||||
- Which nginx machine is the busiest?
|
||||
- Is there a DDoS in progress, and from where?
|
||||
|
||||
Existing log-analysis pipelines (ELK, Loki, ClickHouse, etc.) answer questions like these but require infrastructure that is
|
||||
disproportionate for the target workload. A handful of nginx hosts each doing ~10 K req/s at peak can be kept on a per-minute top-K
|
||||
structure in ~1 GB of RAM per host, with <250 ms query latency across the whole fleet, without a storage tier.
|
||||
|
||||
A companion project, [`nginx-ipng-stats-plugin`](https://git.ipng.ch/ipng/nginx-ipng-stats-plugin), adds per-device attribution in nginx
|
||||
itself and can emit a logtail-format access log as UDP datagrams. `nginx-logtail` was extended in `v0.2.0` to ingest that stream
|
||||
natively, so operators can run it either from on-disk log files, from the UDP feed, or both on the same host.
|
||||
|
||||
## Goals and Non-Goals
|
||||
|
||||
### Product Goals
|
||||
|
||||
1. **Live top-K per (website, client_prefix, URI, status, is_tor, asn, source_tag).** For every combination of these dimensions the
|
||||
system maintains an integer count, ranked so that the top entries are readily available across 1 m, 5 m, 15 m, 60 m, 6 h, and 24 h
|
||||
windows.
|
||||
2. **Sub-second query latency.** `TopN` and `Trend` queries MUST return from the collector and from the aggregator in well under one
|
||||
second at the target scale (10 hosts, 10 K req/s each).
|
||||
3. **Bounded memory.** The collector MUST stay within a 1 GB steady-state memory budget regardless of input cardinality, including
|
||||
during high-cardinality DDoS attacks.
|
||||
4. **Two ingest paths, one data model.** On-disk log files (`fsnotify`-tailed, logrotate-aware) and UDP datagrams (from
|
||||
`nginx-ipng-stats-plugin`) MUST both feed the same in-memory structure, with a single log format per path and no operator-visible
|
||||
difference downstream.
|
||||
5. **No external storage, no TLS, no CGO.** The entire system runs as four static Go binaries on a trusted internal network. Operators
|
||||
who need retention beyond the ring buffers SHOULD scrape Prometheus.
|
||||
6. **One service contract.** Collectors and the aggregator implement the same gRPC `LogtailService`. Frontend and CLI MUST work
|
||||
against either interchangeably, with the collector returning "itself" from `ListTargets` and the aggregator returning its configured
|
||||
collector set.
|
||||
|
||||
### Non-Goals
|
||||
|
||||
- The system does **not** parse arbitrary nginx `log_format` strings. Two fixed tab-separated formats are supported: a file format and
|
||||
a UDP format (see FR-2). Operators who need general parsing should use Vector, Fluent Bit, or Promtail.
|
||||
- The system does **not** store raw log lines. Counts are aggregated at ingest; the original log lines are not kept in memory or on
|
||||
disk. The project does not replace an access log.
|
||||
- The system does **not** persist counters across restarts. Ring buffers are in-memory only. On aggregator restart, historical state
|
||||
is reconstructed by calling `DumpSnapshots` on each collector (FR-4.3). On collector restart the rings start empty and refill as new
|
||||
traffic arrives.
|
||||
- The system does **not** provide per-URI request timing distributions. Latency histograms exist only in the collector's Prometheus
|
||||
exposition (per host), not in the top-K data model.
|
||||
- The system does **not** ship TLS or authentication for its gRPC endpoints. Operators who expose it beyond a trusted network are
|
||||
expected to terminate TLS in a front proxy.
|
||||
- The system is **not** a general-purpose metric store. The Prometheus exporter on the collector exposes a deliberately narrow set:
|
||||
per-host request counter, per-host body-size and request-time histograms, and per-`source_tag` rollup counters.
|
||||
|
||||
## Requirements
|
||||
|
||||
Each requirement carries a unique identifier (`FR-X.Y` or `NFR-X.Y`) so that later sections can cite it.
|
||||
|
||||
### Functional Requirements
|
||||
|
||||
**FR-1 Counter data model**
|
||||
|
||||
- **FR-1.1** The canonical unit of counting MUST be a 7-tuple
|
||||
`(website, client_prefix, http_request_uri, http_response, is_tor, asn, ipng_source_tag)` mapped to a 64-bit integer request count.
|
||||
The data model contains no other fields: no timing, no byte counts, no method (those live only in the Prometheus exposition,
|
||||
FR-8).
|
||||
- **FR-1.2** `website` MUST be the nginx `$host` value.
|
||||
- **FR-1.3** `client_prefix` MUST be the client IP truncated to a configurable prefix length, formatted as CIDR. Default `/24` for
|
||||
IPv4 and `/48` for IPv6 (flags `-v4prefix`, `-v6prefix`). Truncation happens at ingest; the original address is not retained.
|
||||
- **FR-1.4** `http_request_uri` MUST be the `$request_uri` path only — the query string (from the first `?` onward) MUST be stripped
|
||||
at ingest. This is the dominant cardinality-reduction measure; DDoS traffic with attacker-generated query strings cannot grow the
|
||||
working set.
|
||||
- **FR-1.5** `http_response` MUST be the HTTP status code as recorded by nginx.
|
||||
- **FR-1.6** `is_tor` MUST be a boolean, populated by the operator in the log format (typically via a lookup against a TOR exit-node
|
||||
list). For the file format, lines without this field default to `false` for backward compatibility.
|
||||
- **FR-1.7** `asn` MUST be an int32 decimal value sourced from MaxMind GeoIP2 (or equivalent). For the file format, lines without
|
||||
this field default to `0`.
|
||||
- **FR-1.8** `ipng_source_tag` MUST be a short string identifying which attribution tag the request arrived under. For records from
|
||||
on-disk log files, the collector MUST assign the tag `"direct"` (mirroring `nginx-ipng-stats-plugin`'s default-source convention). For
|
||||
records from the UDP stream, the tag is taken from the log line as emitted by the plugin.
|
||||
|
||||
**FR-2 Log formats**
|
||||
|
||||
- **FR-2.1 File format.** The collector MUST accept nginx access logs in the following tab-separated layout, with the last two fields
|
||||
(`is_tor`, `asn`) optional for backward compatibility:
|
||||
|
||||
```nginx
|
||||
log_format logtail '$host\t$remote_addr\t$msec\t$request_method\t$request_uri\t$status\t$body_bytes_sent\t$request_time\t$is_tor\t$asn';
|
||||
```
|
||||
|
||||
| # | Field | Ingested into |
|
||||
|---|-------------------|----------------------------|
|
||||
| 0 | `$host` | `website` |
|
||||
| 1 | `$remote_addr` | `client_prefix` (truncated)|
|
||||
| 2 | `$msec` | (discarded) |
|
||||
| 3 | `$request_method` | Prom `method` label |
|
||||
| 4 | `$request_uri` | `http_request_uri` |
|
||||
| 5 | `$status` | `http_response` |
|
||||
| 6 | `$body_bytes_sent`| Prom body histogram |
|
||||
| 7 | `$request_time` | Prom duration histogram |
|
||||
| 8 | `$is_tor` | `is_tor` (optional) |
|
||||
| 9 | `$asn` | `asn` (optional) |
|
||||
|
||||
- **FR-2.2 UDP format.** The collector MUST accept datagrams in the following tab-separated layout, as emitted by
|
||||
`nginx-ipng-stats-plugin`'s `ipng_stats_logtail` directive:
|
||||
|
||||
```nginx
|
||||
log_format ipng_stats_logtail '$host\t$remote_addr\t$request_method\t$request_uri\t$status\t$body_bytes_sent\t$request_time\t$is_tor\t$asn\t$ipng_source_tag\t$server_addr\t$scheme';
|
||||
```
|
||||
|
||||
Exactly 12 tab-separated fields are required. `$server_addr` and `$scheme` MUST be parsed but dropped; they are reserved for
|
||||
future use. Malformed datagrams MUST be counted (FR-8.5) and silently dropped.
|
||||
|
||||
- **FR-2.3** The file tailer MUST set `source_tag="direct"` on every record it parses. The UDP listener MUST propagate
|
||||
`$ipng_source_tag` verbatim. This is the only difference in downstream processing between the two ingest paths.
|
||||
|
||||
**FR-3 Ring buffers and time windows**
|
||||
|
||||
- **FR-3.1** Each collector and the aggregator MUST maintain two tiered ring buffers:
|
||||
|
||||
| Tier | Bucket size | Buckets | Top-K/bucket | Covers |
|
||||
|--------|-------------|---------|--------------|--------|
|
||||
| Fine | 1 min | 60 | 50 000 | 1 h |
|
||||
| Coarse | 5 min | 288 | 5 000 | 24 h |
|
||||
|
||||
- **FR-3.2** The `Window` enum MUST map queries to tiers as follows:
|
||||
|
||||
| Window | Tier | Buckets summed |
|
||||
|--------|--------|----------------|
|
||||
| 1 m | fine | 1 |
|
||||
| 5 m | fine | 5 |
|
||||
| 15 m | fine | 15 |
|
||||
| 60 m | fine | 60 |
|
||||
| 6 h | coarse | 72 |
|
||||
| 24 h | coarse | 288 |
|
||||
|
||||
- **FR-3.3** Every minute, the collector MUST snapshot its live map into the fine ring (top-50 000, sorted desc) and reset the live
|
||||
map. Every fifth fine tick, the collector MUST merge the most recent five fine snapshots into one coarse snapshot (top-5 000).
|
||||
The fine/coarse merge MUST be pinned to the 1-minute and 5-minute boundaries of the local clock so sparklines align across
|
||||
collectors.
|
||||
- **FR-3.4** Querying MUST always read from the rings, never from the live map. A sub-minute request MUST return an empty top-1
|
||||
result rather than surfacing partially-accumulated data; this keeps per-minute results monotonic.
|
||||
|
||||
**FR-4 Push-based streaming and aggregation**
|
||||
|
||||
- **FR-4.1** The collector MUST expose a server-streaming RPC `StreamSnapshots(SnapshotRequest) → stream Snapshot` that emits one fine
|
||||
(1-min) snapshot per minute rotation. Subscribers MUST receive the same snapshot independently (per-subscriber buffered fan-out,
|
||||
bounded buffer, drop on full).
|
||||
- **FR-4.2** The aggregator MUST subscribe to every configured collector via `StreamSnapshots` and merge snapshots into a single
|
||||
ring-buffer cache. The merge strategy MUST be delta-based: on each new snapshot from collector `X`, the aggregator MUST subtract
|
||||
`X`'s previous contribution and add the new entries, giving `O(snapshot_size)` per update (not `O(N_collectors × size)`).
|
||||
- **FR-4.3** The aggregator MUST expose a unary `DumpSnapshots(DumpSnapshotsRequest) → stream Snapshot` on each collector that
|
||||
streams all fine buckets (with `is_coarse=false`) followed by all coarse buckets (with `is_coarse=true`). On aggregator startup, it
|
||||
MUST call `DumpSnapshots` against every collector once (concurrently, after its own gRPC server is already listening), merge the
|
||||
per-timestamp entries the same way the live path does, and load the result into its cache via a single atomic replacement.
|
||||
Collectors that return `Unimplemented` MUST be skipped without blocking live streaming from the others.
|
||||
- **FR-4.4** The aggregator MUST reconnect to each collector independently with exponential backoff (100 ms → cap 30 s). After three
|
||||
consecutive connection failures the aggregator MUST zero the degraded collector's contribution (subtract its last-known snapshot
|
||||
and delete its entry). When the collector recovers and sends a new snapshot, its contribution MUST automatically be reintegrated.
|
||||
|
||||
**FR-5 Query service (`LogtailService`)**
|
||||
|
||||
- **FR-5.1** Collector and aggregator MUST implement the same gRPC `LogtailService`:
|
||||
|
||||
```protobuf
|
||||
service LogtailService {
|
||||
rpc TopN(TopNRequest) returns (TopNResponse);
|
||||
rpc Trend(TrendRequest) returns (TrendResponse);
|
||||
rpc StreamSnapshots(SnapshotRequest) returns (stream Snapshot);
|
||||
rpc ListTargets(ListTargetsRequest) returns (ListTargetsResponse);
|
||||
rpc DumpSnapshots(DumpSnapshotsRequest)returns (stream Snapshot);
|
||||
}
|
||||
```
|
||||
|
||||
- **FR-5.2** `Filter` MUST support exact, inequality, and RE2-regex constraints on the dimensions of FR-1. Status and ASN accept
|
||||
the six-operator expression language (`=`, `!=`, `>`, `>=`, `<`, `<=`). Website and URI accept regex match and regex exclusion.
|
||||
TOR filtering uses a three-state enum (`ANY`/`YES`/`NO`). Source-tag filtering is exact match only.
|
||||
- **FR-5.3** `GroupBy` MUST cover every dimension of FR-1 except `is_tor` (which is boolean and rarely useful as a group-by target):
|
||||
`WEBSITE`, `CLIENT_PREFIX`, `REQUEST_URI`, `HTTP_RESPONSE`, `ASN_NUMBER`, `SOURCE_TAG`.
|
||||
- **FR-5.4** `ListTargets` MUST return, from the aggregator, every configured collector with its display name and gRPC address; from
|
||||
a collector, a single entry describing itself with an empty `addr` (meaning "this endpoint").
|
||||
- **FR-5.5** All queries MUST be answered from the local ring buffers. The aggregator MUST NOT fan out to collectors at query time.
|
||||
|
||||
**FR-6 HTTP frontend**
|
||||
|
||||
- **FR-6.1** The frontend MUST render a server-rendered HTML dashboard with no JavaScript, using inline SVG for sparklines and
|
||||
`<meta http-equiv="refresh">` for auto-refresh. It MUST work in text-mode browsers (w3m, lynx) and under `curl`.
|
||||
- **FR-6.2** All filter, group-by, and window state MUST live in the URL query string so that URLs are shareable and bookmarkable.
|
||||
No server-side session.
|
||||
- **FR-6.3** The frontend MUST provide a drilldown affordance: clicking a row MUST add that row's value as a filter and advance the
|
||||
group-by dimension through the cycle
|
||||
`website → prefix → uri → status → asn → source_tag → website`.
|
||||
- **FR-6.4** The frontend MUST issue `TopN`, `Trend`, and `ListTargets` concurrently with a 5 s deadline. `Trend` failure MUST
|
||||
suppress the sparkline but not the table. `ListTargets` failure MUST hide the source picker but not the rest of the page.
|
||||
- **FR-6.5** Appending `&raw=1` to any URL MUST return the `TopN` result as JSON, so the dashboard can be scripted without the CLI.
|
||||
- **FR-6.6** The frontend MUST accept a `q=` parameter holding a mini filter expression (`status>=400 AND website~=gouda.*`). On
|
||||
submission it MUST parse the expression and redirect to the canonical URL with the individual `f_*` params populated; parse errors
|
||||
MUST render inline without losing the current filter state.
|
||||
|
||||
**FR-7 CLI**
|
||||
|
||||
- **FR-7.1** The CLI MUST provide four subcommands: `topn`, `trend`, `stream`, `targets`. Each subcommand MUST accept
|
||||
`--target host:port[,host:port...]` and fan out concurrently, printing results in order with per-target headers (omitted for
|
||||
single-target invocations, so output pipes cleanly into `jq`).
|
||||
- **FR-7.2** The CLI MUST expose every `Filter` dimension as a dedicated flag and default to a human-readable table. `--json` MUST
|
||||
switch to newline-delimited JSON for `stream` and to a single JSON array for `topn`/`trend`.
|
||||
- **FR-7.3** `stream` MUST reconnect automatically on error with a 5 s backoff and run until interrupted.
|
||||
|
||||
**FR-8 Prometheus exposition (collector only)**
|
||||
|
||||
- **FR-8.1** The collector MUST expose a Prometheus `/metrics` endpoint on `-prom-listen` (default `:9100`). Setting the flag to the
|
||||
empty string MUST disable it entirely.
|
||||
- **FR-8.2** The collector MUST expose a per-request counter `nginx_http_requests_total{host, method, status}` capped at
|
||||
`promCounterCap = 250 000` distinct label sets. When the cap is reached, further new label sets MUST be dropped (existing series
|
||||
keep incrementing) until the map is rolled over.
|
||||
- **FR-8.3** The collector MUST expose per-host histograms
|
||||
`nginx_http_response_body_bytes{host, le}` (body-size distribution) and
|
||||
`nginx_http_request_duration_seconds{host, le}` (request-time distribution). The duration histogram MUST NOT be split by
|
||||
`source_tag` — its bucket count would multiply without operational benefit.
|
||||
- **FR-8.4** The collector MUST expose two parallel roll-ups labeled by `source_tag` only (not cross-producted with host):
|
||||
`nginx_http_requests_by_source_total{source_tag}` and
|
||||
`nginx_http_response_body_bytes_by_source{source_tag, le}`. These are separate metric names to avoid inconsistent label sets
|
||||
under a single name.
|
||||
- **FR-8.5** The collector MUST expose three counters that let operators distinguish UDP parse failures from back-pressure drops:
|
||||
`logtail_udp_packets_received_total` (datagrams off the socket),
|
||||
`logtail_udp_loglines_success_total` (parsed OK), and
|
||||
`logtail_udp_loglines_consumed_total` (forwarded to the store — i.e. not dropped).
|
||||
|
||||
### Non-Functional Requirements
|
||||
|
||||
**NFR-1 Correctness under concurrency**
|
||||
|
||||
- **NFR-1.1** The collector MUST run a single goroutine ("the store goroutine") that owns the live map and the ring-buffer write
|
||||
path. No other goroutine MUST write to these structures. The file tailer and the UDP listener MUST communicate with the store
|
||||
goroutine through a bounded channel.
|
||||
- **NFR-1.2** Readers (query RPCs and subscriber fan-out) MUST take an `RLock` on the rings. Writers MUST take a `Lock` only for the
|
||||
moment the slice header of the new snapshot is installed; serialisation and network I/O MUST happen outside the lock.
|
||||
- **NFR-1.3** `DumpSnapshots` MUST copy ring headers and filled counts under `RLock` only, then release the lock before streaming.
|
||||
The minute-rotation write path MUST never observe a lock held for longer than a microsecond-scale slice copy.
|
||||
- **NFR-1.4** A query that races with a rotation MUST observe a monotonically non-decreasing total for a fixed filter over a fixed
|
||||
window; it MUST NOT observe a partially-rotated state that would cause a total to decrease compared to a prior reading.
|
||||
|
||||
**NFR-2 Memory bounds**
|
||||
|
||||
- **NFR-2.1** The collector's live map MUST be hard-capped at 100 000 entries. Once the cap is reached, only updates to existing keys
|
||||
MUST proceed; new keys MUST be dropped until the next minute rotation resets the map. This bounds memory under high-cardinality
|
||||
attacks.
|
||||
- **NFR-2.2** Fine-ring snapshots MUST be capped at top-50 000 entries; coarse-ring snapshots at top-5 000. The full memory budget
|
||||
for a collector is therefore approximately 845 MB (live map ~19 MB + fine ring ~558 MB + coarse ring ~268 MB).
|
||||
- **NFR-2.3** The aggregator MUST apply the same tier caps as the collector. Its steady-state memory is roughly equivalent to one
|
||||
collector regardless of the number of collectors subscribed.
|
||||
- **NFR-2.4** The Prometheus counter map (FR-8.2) MUST be capped at `promCounterCap = 250 000` entries. The per-host and per-source
|
||||
histograms MUST NOT be capped explicitly — they grow only with the distinct host count, which is bounded by the operator's vhost
|
||||
configuration.
|
||||
|
||||
**NFR-3 Performance**
|
||||
|
||||
- **NFR-3.1** `ParseLine` and `ParseUDPLine` MUST use `strings.Split` / `strings.SplitN` (no regex), so that per-line cost stays
|
||||
around 50 ns on commodity hardware.
|
||||
- **NFR-3.2** `TopN` and `Trend` queries across the full 24-hour coarse ring MUST complete in well under 250 ms at the 50 000-entry
|
||||
fine cap, for fully-specified filters.
|
||||
- **NFR-3.3** The collector's input channel MUST be sized to absorb approximately 20 s of peak load (e.g. 200 000 at 10 K lines/s)
|
||||
so that transient pauses in the store goroutine do not back up the tailer or the UDP listener.
|
||||
- **NFR-3.4** When either the tailer or the UDP listener cannot enqueue a parsed record because the channel is full, the record
|
||||
MUST be dropped rather than blocking the ingest goroutine. UDP drops MUST be visible via the counters in FR-8.5; file-path drops
|
||||
are implicit (the tailer falls behind the file).
|
||||
|
||||
**NFR-4 Fault tolerance and recovery**
|
||||
|
||||
- **NFR-4.1** The file tailer MUST tolerate logrotate automatically. On `RENAME`/`REMOVE` events it MUST drain the old file
|
||||
descriptor to EOF, close it, and retry opening the original path with exponential backoff until the new file appears. No SIGHUP or
|
||||
restart MUST be required.
|
||||
- **NFR-4.2** The aggregator MUST NOT block frontend queries during backfill. Its gRPC server MUST start listening first; backfill
|
||||
(FR-4.3) MUST run in a background goroutine.
|
||||
- **NFR-4.3** A collector restart MUST NOT affect peer collectors or the aggregator's ability to continue serving the surviving
|
||||
collectors' data. When the restarted collector reconnects, its stream MUST resume without operator action.
|
||||
- **NFR-4.4** An aggregator restart MUST recover its ring-buffer contents from all collectors via `DumpSnapshots`; live streaming
|
||||
MUST resume in parallel with backfill so that no minute is lost even during a restart.
|
||||
|
||||
**NFR-5 Observability of the system itself**
|
||||
|
||||
- **NFR-5.1** The collector MUST expose operator-facing log lines on stdout covering: file discovery, logrotate reopen events, UDP
|
||||
listener bind, subscriber connect/disconnect, and fatal configuration errors. The collector MUST NOT log anything on the per-request
|
||||
hot path.
|
||||
- **NFR-5.2** The aggregator MUST log each collector's connect, disconnect, degraded transition, and recovery. Backfill MUST log a
|
||||
per-collector line with bucket counts, entry counts, and wall-clock duration.
|
||||
- **NFR-5.3** The Prometheus exporter MUST be the primary out-of-band health signal. Counters FR-8.5 plus the per-host request
|
||||
counter (FR-8.2) give an operator a full view of ingest health without needing to read the logs.
|
||||
|
||||
**NFR-6 Security**
|
||||
|
||||
- **NFR-6.1** gRPC traffic MUST be cleartext HTTP/2. Operators who expose the endpoints beyond a trusted network are expected to
|
||||
terminate TLS in a front proxy.
|
||||
- **NFR-6.2** The collector MUST bind its UDP listener to `127.0.0.1` by default (configurable via `-logtail-bind`) so that merely
|
||||
setting `-logtail-port` MUST NOT expose the socket to the public Internet.
|
||||
- **NFR-6.3** The system MUST NOT record per-request personally-identifying data beyond what nginx already logs. Client IPs are
|
||||
truncated at ingest (FR-1.3); URIs lose their query strings (FR-1.4).
|
||||
|
||||
**NFR-7 Documentation and packaging**
|
||||
|
||||
- **NFR-7.1** The repository MUST ship `docs/user-guide.md` that walks an operator through nginx log format configuration, running
|
||||
each of the four binaries (flags, systemd examples, Docker Compose), and integrating the Prometheus exporter. It MUST contain
|
||||
enough examples that a new operator can stand up a single-host deployment end-to-end without reading the source.
|
||||
- **NFR-7.2** The repository MUST ship `docs/design.md` (this document) covering the normative requirements and the architectural
|
||||
rationale.
|
||||
- **NFR-7.3** All four binaries MUST build as static Go binaries with `CGO_ENABLED=0 -trimpath -ldflags="-s -w"` and MUST ship
|
||||
together in a single `scratch`-based Docker image. No OS, no shell, no runtime dependencies.
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
### Process Model
|
||||
|
||||
The project ships four binaries:
|
||||
|
||||
- **`collector`** — runs on every nginx host. Ingests logs from files and/or UDP, maintains the live map and tiered rings, serves
|
||||
`LogtailService` on port 9090, and exposes Prometheus on port 9100.
|
||||
- **`aggregator`** — runs centrally. Subscribes to every collector, merges snapshots, serves the same `LogtailService` on port 9091.
|
||||
- **`frontend`** — runs centrally, alongside the aggregator. HTTP server on port 8080, rendering HTML against the aggregator (or any
|
||||
other `LogtailService` endpoint).
|
||||
- **`cli`** — runs wherever the operator is. Talks to any `LogtailService`. No daemon.
|
||||
|
||||
Because all four binaries speak one service, the aggregator is optional for a single-host deployment: the frontend and CLI can point
|
||||
directly at a collector.
|
||||
|
||||
### Data Flow
|
||||
|
||||
```
|
||||
┌──────────────┐ files ┌───────────────┐
|
||||
nginx ──▶ │ access.log │───────▶│ file tailer │
|
||||
│ (file mode) │ │ (fsnotify) │──┐
|
||||
└──────────────┘ └───────────────┘ │
|
||||
│
|
||||
┌──────────────┐ UDP ┌───────────────┐ │
|
||||
nginx-ipng ▶ ipng_stats_ ├───────▶│ udp listener │──┼──▶ LogRecord ──▶ ┌──────────┐
|
||||
-stats- │ logtail │ │ (127.0.0.1) │ │ channel (200K)│ store │
|
||||
plugin └──────────────┘ └───────────────┘ │ │ goroutine│
|
||||
│ └─────┬────┘
|
||||
▼ │
|
||||
Prom exporter │
|
||||
▼
|
||||
┌─────────────┐
|
||||
│ live map │
|
||||
│ (≤100 K) │
|
||||
└──────┬──────┘
|
||||
│ every 1 m
|
||||
▼
|
||||
┌─────────────┐
|
||||
│ fine ring │
|
||||
│ 60×50 K │────┐
|
||||
└──────┬──────┘ │
|
||||
│ every 5 m │
|
||||
▼ │
|
||||
┌─────────────┐ │
|
||||
│ coarse ring │ │
|
||||
│ 288×5 K │ │
|
||||
└─────────────┘ │
|
||||
│
|
||||
┌──────────────────────────────────────┘
|
||||
│ StreamSnapshots (push)
|
||||
▼
|
||||
aggregator ──▶ merged cache ──▶ frontend / CLI
|
||||
```
|
||||
|
||||
Requests enter nginx. The nginx writes either to a log file (file mode) or via the `ipng_stats_logtail` directive to a UDP socket
|
||||
(UDP mode), or both. The collector has two ingest goroutines that parse a line into a `LogRecord` and enqueue it on a shared 200 K
|
||||
channel. A single store goroutine consumes the channel, updating the live map and maintaining the tiered rings. A once-per-minute
|
||||
timer rotates the live map into the fine ring and (every fifth tick) into the coarse ring, and fans the fresh snapshot out to every
|
||||
`StreamSnapshots` subscriber. The aggregator is one such subscriber.
|
||||
|
||||
Query RPCs (`TopN`, `Trend`) MUST read only from the rings and MUST NOT read from the live map. The aggregator's cache is itself a
|
||||
ring built from the merged-view snapshots; it is updated on the same 1-minute cadence regardless of how many collectors are
|
||||
connected.
|
||||
|
||||
## Components
|
||||
|
||||
### Program 1 — Collector (`cmd/collector`)
|
||||
|
||||
#### Responsibilities
|
||||
|
||||
- Tail on-disk log files via a single `fsnotify.Watcher`, handle logrotate, and re-scan glob patterns periodically to pick up new
|
||||
files (FR-2.1, NFR-4.1).
|
||||
- Listen on an optional UDP socket for `ipng_stats_logtail` datagrams (FR-2.2).
|
||||
- Parse each log line into a `LogRecord` (FR-1).
|
||||
- Maintain the live map, fine ring, coarse ring, and subscriber fan-out under a single-writer goroutine (FR-3, NFR-1).
|
||||
- Serve `LogtailService` on `-listen` (FR-5).
|
||||
- Expose Prometheus metrics on `-prom-listen` (FR-8).
|
||||
|
||||
#### Key data types
|
||||
|
||||
- `LogRecord` — ten fields (website, client_prefix, URI, status, is_tor, asn, method, body_bytes_sent, request_time, source_tag).
|
||||
Produced by `ParseLine` or `ParseUDPLine` and consumed by the store goroutine.
|
||||
- `Tuple6` (historical name; carries seven fields now) — the aggregation key. NUL-separated when encoded as a map key for snapshots.
|
||||
The code name is intentionally stable so downstream tests and consumers are not churned.
|
||||
- `Snapshot` — `(timestamp, []Entry)` where `Entry = (label, count)` and `label` is an encoded `Tuple6`.
|
||||
|
||||
#### Presents
|
||||
|
||||
- `LogtailService` on TCP (default `:9090`).
|
||||
- A Prometheus `/metrics` handler on TCP (default `:9100`).
|
||||
|
||||
#### Consumes
|
||||
|
||||
- One or more on-disk log files matched by `--logs` and/or `--logs-file` globs.
|
||||
- Optionally, a UDP socket on `--logtail-bind:--logtail-port` (default `127.0.0.1`, disabled when port is `0`).
|
||||
|
||||
### Program 2 — Aggregator (`cmd/aggregator`)
|
||||
|
||||
#### Responsibilities
|
||||
|
||||
- Dial every configured collector and subscribe via `StreamSnapshots` (FR-4.2).
|
||||
- Merge incoming snapshots into a single cache using delta-based subtraction, so a collector's contribution is updated in place
|
||||
rather than accumulated (FR-4.2).
|
||||
- At startup, call `DumpSnapshots` on each collector once, merge the per-timestamp entries, and load the result into the cache
|
||||
atomically (FR-4.3).
|
||||
- Handle collector outages with exponential-backoff reconnect and degraded-collector zeroing (FR-4.4).
|
||||
- Serve the same `LogtailService` as the collector (FR-5).
|
||||
- Maintain a `TargetRegistry` that maps collector addresses to display names (updated from the `source` field of incoming
|
||||
snapshots).
|
||||
|
||||
#### Presents
|
||||
|
||||
- `LogtailService` on TCP (default `:9091`).
|
||||
|
||||
#### Consumes
|
||||
|
||||
- The `StreamSnapshots` and `DumpSnapshots` RPCs on every configured collector (`--collectors`).
|
||||
|
||||
### Program 3 — Frontend (`cmd/frontend`)
|
||||
|
||||
#### Responsibilities
|
||||
|
||||
- Render the drilldown dashboard server-side with no JavaScript (FR-6.1).
|
||||
- Parse URL query string into filter / group-by / window state (FR-6.2).
|
||||
- Issue `TopN`, `Trend`, and `ListTargets` concurrently with a 5 s deadline (FR-6.4).
|
||||
- Render inline SVG sparklines from `TrendResponse` (FR-6.1).
|
||||
- Support the mini filter-expression language (FR-6.6) and the `raw=1` JSON output (FR-6.5).
|
||||
- Expose a source-picker row populated from `ListTargets`.
|
||||
|
||||
#### Presents
|
||||
|
||||
- An HTTP dashboard on TCP (default `:8080`).
|
||||
|
||||
#### Consumes
|
||||
|
||||
- Any `LogtailService` endpoint (`--target`, default `localhost:9091` — the aggregator).
|
||||
|
||||
### Program 4 — CLI (`cmd/cli`)
|
||||
|
||||
#### Responsibilities
|
||||
|
||||
- Dispatch to `topn`, `trend`, `stream`, or `targets` (FR-7.1).
|
||||
- Parse shared and per-subcommand flags, build a `Filter` proto from them, and fan out to every `--target` concurrently (FR-7.2).
|
||||
- Print human-readable tables by default; switch to JSON with `--json` (FR-7.2).
|
||||
- Reconnect automatically in `stream` mode (FR-7.3).
|
||||
|
||||
#### Presents
|
||||
|
||||
- Exit status `0` on success, non-zero on RPC error (except `stream`, which runs until interrupted).
|
||||
|
||||
#### Consumes
|
||||
|
||||
- Any `LogtailService` endpoint.
|
||||
|
||||
### Protobuf service (`proto/logtail.proto`)
|
||||
|
||||
One proto file defines every shared type: `Tuple6` is encoded as a NUL-separated label string inside `TopNEntry`, and the
|
||||
`Snapshot` message carries both fine (1-min) and coarse (5-min) ring contents. `GroupBy` and `Window` are enums; `Filter` carries
|
||||
optional exact-match fields, regex fields, and the `StatusOp` comparison enum used for both `http_response` and `asn_number`.
|
||||
|
||||
## Operational Concerns
|
||||
|
||||
### Deployment Topology
|
||||
|
||||
A typical deployment is:
|
||||
|
||||
- **Per nginx host:** one `collector` systemd unit, pointed at `/var/log/nginx/*.log` and/or listening on `127.0.0.1:9514` for the
|
||||
`nginx-ipng-stats-plugin` UDP stream. Exposes `:9090` (gRPC) and `:9100` (Prometheus).
|
||||
- **Central:** one `aggregator` systemd unit on e.g. `agg:9091`, subscribed to all collectors; and one `frontend` systemd unit on
|
||||
`agg:8080`, pointed at the aggregator. Operators reach the dashboard via `http://agg:8080/`. Alternatively, the Docker Compose
|
||||
file in the repo root runs the aggregator and frontend together.
|
||||
- **Operator laptop:** `logtail-cli` invocations, pointed at the aggregator for fleet-wide questions or at a specific collector for
|
||||
a single-host drilldown.
|
||||
|
||||
### Configuration
|
||||
|
||||
All four binaries are configured via flags with matching environment variables. The canonical reference is `docs/user-guide.md`.
|
||||
Representative settings:
|
||||
|
||||
- `collector`: `--logs /var/log/nginx/*.log`, `--logtail-port 9514`, `--source $(hostname)`, `--prom-listen :9100`.
|
||||
- `aggregator`: `--collectors nginx1:9090,nginx2:9090`, `--listen :9091`.
|
||||
- `frontend`: `--target agg:9091`, `--listen :8080`.
|
||||
- `cli`: no persistent configuration; every invocation carries `--target`.
|
||||
|
||||
### Reload and Restart Semantics
|
||||
|
||||
- **Collector restart.** The live map and both rings start empty. The file tailer resumes at EOF of each watched file (no historical
|
||||
replay). The fine ring refills within an hour; the coarse ring within 24 hours.
|
||||
- **Aggregator restart.** Backfill reconstructs the cache from all collectors' `DumpSnapshots` streams. The gRPC server is listening
|
||||
before backfill begins (NFR-4.2), so the frontend is never blocked during restart — it just sees an incomplete cache for the few
|
||||
seconds backfill takes.
|
||||
- **Collector outage.** The aggregator reconnects with backoff; after three consecutive failures the collector's contribution is
|
||||
zeroed (FR-4.4) so the merged view does not show stale counts. On recovery the zeroing is reversed by the next snapshot.
|
||||
- **nginx logrotate.** The collector drains the old fd, closes, and retries the original path. No operator action (NFR-4.1).
|
||||
- **nginx-ipng-stats-plugin reload.** The plugin's UDP socket is per-worker; a reload simply causes new workers to open fresh
|
||||
sockets to the same address. The collector sees a brief gap and resumes.
|
||||
|
||||
### Observability of the System Itself
|
||||
|
||||
Primary channel is the collector's Prometheus endpoint (FR-8). Beyond the per-host request counter and the per-source roll-ups,
|
||||
three UDP counters give direct visibility into the UDP ingest path:
|
||||
|
||||
- `logtail_udp_packets_received_total` — what arrived.
|
||||
- `logtail_udp_loglines_success_total` — what parsed cleanly.
|
||||
- `logtail_udp_loglines_consumed_total` — what made it to the store (i.e. was not dropped by a full channel).
|
||||
|
||||
`received - success` is the parse-failure rate; `success - consumed` is the back-pressure drop rate. Operators should alert on both
|
||||
being non-zero.
|
||||
|
||||
Each binary logs human-readable lines on stdout for connect/disconnect events, logrotate reopen, backfill timing, and degraded
|
||||
transitions. No per-request logging.
|
||||
|
||||
### Failure Modes
|
||||
|
||||
- **High-cardinality DDoS.** The live map hits 100 000 entries and stops accepting new keys until the next rotation (NFR-2.1).
|
||||
Existing top-K entries keep accumulating, so the attacker's dominant prefixes / URIs remain visible. The cap resets every minute.
|
||||
- **Collector crash.** In-flight live-map state for the current minute is lost. The next collector start resumes tailing; the
|
||||
aggregator zeroes the degraded collector's contribution after a few seconds and reintegrates it when snapshots resume.
|
||||
- **Aggregator crash.** No collector is affected. The operator restarts the aggregator; backfill reconstructs the cache.
|
||||
- **Frontend crash.** Stateless. Operator restarts.
|
||||
- **UDP datagram loss.** Any datagram dropped in-kernel (socket buffer full, network drop) does not register as a parse failure; it
|
||||
is simply invisible. Operators should size `SO_RCVBUF` appropriately; the collector already requests 4 MiB.
|
||||
- **Malformed log lines.** File format: lines with <8 tab-separated fields are silently skipped; an invalid IP also drops the line.
|
||||
UDP: packets without exactly 12 fields are counted as received-but-not-success and dropped.
|
||||
- **Clock skew between collectors.** Trend sparklines derived from merged data assume collectors are roughly NTP-synced. Per-bucket
|
||||
alignment is to the local minute / 5-minute boundary of each collector.
|
||||
- **gRPC traffic over untrusted links.** The system does not ship TLS; operators should front the gRPC ports with a TLS-terminating
|
||||
proxy or an IPsec tunnel.
|
||||
|
||||
### Security
|
||||
|
||||
- **No TLS, no auth.** Deliberate (NFR-6.1). Deploy on a trusted network or behind a TLS proxy.
|
||||
- **UDP bind.** Default `127.0.0.1` so merely turning on the listener does not expose a public socket (NFR-6.2).
|
||||
- **Client-IP truncation.** Client addresses are truncated at ingest; the system never stores full client IPs (NFR-6.3, FR-1.3).
|
||||
- **Query-string stripping.** URIs lose their query strings at ingest. A user who cares about `?q=` parameters must re-engineer
|
||||
nginx's log format — and then accept that cardinality consequence.
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
- **Log shipping to ClickHouse / ELK.** Rejected as the default: adds a storage tier to a problem that fits in a per-host 1 GB
|
||||
ring, for the target fleet size. A future ClickHouse export from the aggregator is viable and would be additive (deferred).
|
||||
- **Raw request logging to Kafka.** Rejected: preserves every request at much higher cost for no visibility benefit; the operator
|
||||
wants top-K ranking, not a replay log. If raw logging is desired, nginx's own access log is the right tool.
|
||||
- **Promtail / Grafana Loki.** Rejected as the primary interface. Loki is excellent for free-text log search but weak for fast
|
||||
ranked aggregations over dozens of dimensions; the drilldown interaction the operator wants fits poorly into LogQL.
|
||||
- **In-process Lua aggregator on each nginx.** Considered for the collector tier. Rejected: shipping counters to a central view
|
||||
still requires a process outside nginx; keeping the ingest path out of the nginx worker avoids a class of latency regressions.
|
||||
- **pull-based collector polling (aggregator polls collectors every second).** Rejected in favor of push. Polling multiplies query
|
||||
latency and makes the aggregator's cache stale by the poll interval. Push-stream with delta merge keeps the cache within seconds
|
||||
of real time.
|
||||
- **One metric name for both per-host and per-source_tag roll-ups.** Rejected for Prometheus hygiene. Mixing different label sets
|
||||
under one metric name breaks aggregation rules; separate metric names (`_by_source`) are clearer and easier to query.
|
||||
- **Cross-product of `host × source_tag` for every counter and histogram.** Rejected. With ~20 tags and ~50 hosts the cardinality
|
||||
explodes quickly on the duration histogram without operational benefit. The duration histogram stays per-host; requests and body
|
||||
size get a parallel `_by_source` rollup.
|
||||
- **Writing every `snapshot` to disk for restart recovery.** Rejected in favor of `DumpSnapshots` RPC backfill. Disk-backed
|
||||
persistence would multiply operational surface (rotation, fsck, permissions) for a feature that needs to survive only an
|
||||
aggregator restart.
|
||||
|
||||
## Decisions Deferred Post-v0.2
|
||||
|
||||
- **ClickHouse export from aggregator.** 1-minute pre-aggregated rows pushed into a `SummingMergeTree` table for 7-day / 30-day
|
||||
windows. Frontend would route longer windows to ClickHouse while shorter windows stay on the in-memory rings. Strictly additive;
|
||||
no interface changes. Deferred until a concrete retention requirement lands.
|
||||
- **TLS on gRPC endpoints.** The argument for shipping TLS changes if/when the aggregator is deployed across an untrusted network
|
||||
segment. Until then, a front proxy is the right shape.
|
||||
- **Ring-buffer sizing on a per-collector basis.** Today every collector ships the same 60×50 K / 288×5 K dimensions. A
|
||||
low-traffic collector can afford smaller rings; a hot one might want larger. Deferred — the uniform default is operationally
|
||||
simpler.
|
||||
- **Authenticated Prometheus scraping.** The endpoint is currently open on `:9100`. If a future deployment puts the scraper on a
|
||||
less-trusted path, scrape-side auth (bearer token, TLS client cert) is the right add-on.
|
||||
- **Coarse tier beyond 24 h.** Extending to 7 days in-memory would cost ~70 MB per collector but add 2016 buckets to iterate on a
|
||||
`W24H+` query. Deferred until the operator wants a 7-day drilldown without ClickHouse.
|
||||
@@ -21,7 +21,8 @@ const (
|
||||
CoarseEvery = 5 // fine ticks between coarse writes
|
||||
)
|
||||
|
||||
// Tuple6 is the aggregation key (website, prefix, URI, status, is_tor, asn).
|
||||
// Tuple6 is the aggregation key (website, prefix, URI, status, is_tor, asn, source_tag).
|
||||
// The name is kept for source-compat with older call sites; it now carries seven fields.
|
||||
type Tuple6 struct {
|
||||
Website string
|
||||
Prefix string
|
||||
@@ -29,6 +30,7 @@ type Tuple6 struct {
|
||||
Status string
|
||||
IsTor bool
|
||||
ASN int32
|
||||
SourceTag string
|
||||
}
|
||||
|
||||
// Entry is a labelled count used in snapshots and query results.
|
||||
@@ -85,12 +87,13 @@ func EncodeTuple(t Tuple6) string {
|
||||
if t.IsTor {
|
||||
tor = "1"
|
||||
}
|
||||
return t.Website + "\x00" + t.Prefix + "\x00" + t.URI + "\x00" + t.Status + "\x00" + tor + "\x00" + strconv.Itoa(int(t.ASN))
|
||||
return t.Website + "\x00" + t.Prefix + "\x00" + t.URI + "\x00" + t.Status + "\x00" + tor + "\x00" + strconv.Itoa(int(t.ASN)) + "\x00" + t.SourceTag
|
||||
}
|
||||
|
||||
// LabelTuple decodes a NUL-separated snapshot label back into a Tuple6.
|
||||
// Labels from older snapshots (6 fields) round-trip with SourceTag=="".
|
||||
func LabelTuple(label string) Tuple6 {
|
||||
parts := splitN(label, '\x00', 6)
|
||||
parts := splitN(label, '\x00', 7)
|
||||
if len(parts) < 4 {
|
||||
return Tuple6{}
|
||||
}
|
||||
@@ -98,11 +101,14 @@ func LabelTuple(label string) Tuple6 {
|
||||
if len(parts) >= 5 {
|
||||
t.IsTor = parts[4] == "1"
|
||||
}
|
||||
if len(parts) == 6 {
|
||||
if len(parts) >= 6 {
|
||||
if n, err := strconv.Atoi(parts[5]); err == nil {
|
||||
t.ASN = int32(n)
|
||||
}
|
||||
}
|
||||
if len(parts) == 7 {
|
||||
t.SourceTag = parts[6]
|
||||
}
|
||||
return t
|
||||
}
|
||||
|
||||
@@ -239,6 +245,9 @@ func MatchesFilter(t Tuple6, f *CompiledFilter) bool {
|
||||
if p.AsnNumber != nil && !matchesAsnOp(t.ASN, p.GetAsnNumber(), p.AsnOp) {
|
||||
return false
|
||||
}
|
||||
if p.IpngSourceTag != nil && t.SourceTag != p.GetIpngSourceTag() {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -299,6 +308,8 @@ func DimensionLabel(t Tuple6, g pb.GroupBy) string {
|
||||
return t.Status
|
||||
case pb.GroupBy_ASN_NUMBER:
|
||||
return strconv.Itoa(int(t.ASN))
|
||||
case pb.GroupBy_SOURCE_TAG:
|
||||
return t.SourceTag
|
||||
default:
|
||||
return t.Website
|
||||
}
|
||||
|
||||
@@ -335,3 +335,45 @@ func TestDimensionLabelASN(t *testing.T) {
|
||||
t.Errorf("DimensionLabel ASN: got %q, want %q", got, "12345")
|
||||
}
|
||||
}
|
||||
|
||||
// --- SourceTag label encoding, filtering, and DimensionLabel ---
|
||||
|
||||
func TestEncodeLabelTupleRoundtripWithSourceTag(t *testing.T) {
|
||||
for _, tag := range []string{"", "direct", "cdn", "tag with spaces"} {
|
||||
orig := Tuple6{Website: "a.com", Prefix: "1.2.3.0/24", URI: "/x", Status: "200", SourceTag: tag}
|
||||
got := LabelTuple(EncodeTuple(orig))
|
||||
if got != orig {
|
||||
t.Errorf("roundtrip mismatch for tag=%q: got %+v, want %+v", tag, got, orig)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestLabelTupleBackwardCompatNoSourceTag(t *testing.T) {
|
||||
// 6-field label (pre-source_tag snapshot) decodes with SourceTag="".
|
||||
label := "a.com\x001.2.3.0/24\x00/x\x00200\x000\x0012345"
|
||||
got := LabelTuple(label)
|
||||
if got.SourceTag != "" {
|
||||
t.Errorf("expected empty SourceTag for 6-field label, got %q", got.SourceTag)
|
||||
}
|
||||
if got.ASN != 12345 {
|
||||
t.Errorf("expected ASN=12345, got %d", got.ASN)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchesFilterSourceTag(t *testing.T) {
|
||||
tag := "cdn"
|
||||
cf := CompileFilter(&pb.Filter{IpngSourceTag: &tag})
|
||||
if !MatchesFilter(Tuple6{SourceTag: "cdn"}, cf) {
|
||||
t.Fatal("should match equal source_tag")
|
||||
}
|
||||
if MatchesFilter(Tuple6{SourceTag: "direct"}, cf) {
|
||||
t.Fatal("should not match different source_tag")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDimensionLabelSourceTag(t *testing.T) {
|
||||
got := DimensionLabel(Tuple6{SourceTag: "cdn"}, pb.GroupBy_SOURCE_TAG)
|
||||
if got != "cdn" {
|
||||
t.Errorf("DimensionLabel SOURCE_TAG: got %q, want %q", got, "cdn")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -38,6 +38,7 @@ message Filter {
|
||||
TorFilter tor = 8; // restrict to TOR / non-TOR clients
|
||||
optional int32 asn_number = 9; // filter by client ASN
|
||||
StatusOp asn_op = 10; // operator for asn_number; ignored when unset
|
||||
optional string ipng_source_tag = 13; // filter by nginx source tag
|
||||
}
|
||||
|
||||
enum GroupBy {
|
||||
@@ -46,6 +47,7 @@ enum GroupBy {
|
||||
REQUEST_URI = 2;
|
||||
HTTP_RESPONSE = 3;
|
||||
ASN_NUMBER = 4;
|
||||
SOURCE_TAG = 5;
|
||||
}
|
||||
|
||||
enum Window {
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
// versions:
|
||||
// protoc-gen-go v1.36.11
|
||||
// protoc v3.21.12
|
||||
// source: proto/logtail.proto
|
||||
// source: logtail.proto
|
||||
|
||||
package logtailpb
|
||||
|
||||
@@ -56,11 +56,11 @@ func (x TorFilter) String() string {
|
||||
}
|
||||
|
||||
func (TorFilter) Descriptor() protoreflect.EnumDescriptor {
|
||||
return file_proto_logtail_proto_enumTypes[0].Descriptor()
|
||||
return file_logtail_proto_enumTypes[0].Descriptor()
|
||||
}
|
||||
|
||||
func (TorFilter) Type() protoreflect.EnumType {
|
||||
return &file_proto_logtail_proto_enumTypes[0]
|
||||
return &file_logtail_proto_enumTypes[0]
|
||||
}
|
||||
|
||||
func (x TorFilter) Number() protoreflect.EnumNumber {
|
||||
@@ -69,7 +69,7 @@ func (x TorFilter) Number() protoreflect.EnumNumber {
|
||||
|
||||
// Deprecated: Use TorFilter.Descriptor instead.
|
||||
func (TorFilter) EnumDescriptor() ([]byte, []int) {
|
||||
return file_proto_logtail_proto_rawDescGZIP(), []int{0}
|
||||
return file_logtail_proto_rawDescGZIP(), []int{0}
|
||||
}
|
||||
|
||||
// StatusOp is the comparison operator applied to http_response in a Filter.
|
||||
@@ -116,11 +116,11 @@ func (x StatusOp) String() string {
|
||||
}
|
||||
|
||||
func (StatusOp) Descriptor() protoreflect.EnumDescriptor {
|
||||
return file_proto_logtail_proto_enumTypes[1].Descriptor()
|
||||
return file_logtail_proto_enumTypes[1].Descriptor()
|
||||
}
|
||||
|
||||
func (StatusOp) Type() protoreflect.EnumType {
|
||||
return &file_proto_logtail_proto_enumTypes[1]
|
||||
return &file_logtail_proto_enumTypes[1]
|
||||
}
|
||||
|
||||
func (x StatusOp) Number() protoreflect.EnumNumber {
|
||||
@@ -129,7 +129,7 @@ func (x StatusOp) Number() protoreflect.EnumNumber {
|
||||
|
||||
// Deprecated: Use StatusOp.Descriptor instead.
|
||||
func (StatusOp) EnumDescriptor() ([]byte, []int) {
|
||||
return file_proto_logtail_proto_rawDescGZIP(), []int{1}
|
||||
return file_logtail_proto_rawDescGZIP(), []int{1}
|
||||
}
|
||||
|
||||
type GroupBy int32
|
||||
@@ -140,6 +140,7 @@ const (
|
||||
GroupBy_REQUEST_URI GroupBy = 2
|
||||
GroupBy_HTTP_RESPONSE GroupBy = 3
|
||||
GroupBy_ASN_NUMBER GroupBy = 4
|
||||
GroupBy_SOURCE_TAG GroupBy = 5
|
||||
)
|
||||
|
||||
// Enum value maps for GroupBy.
|
||||
@@ -150,6 +151,7 @@ var (
|
||||
2: "REQUEST_URI",
|
||||
3: "HTTP_RESPONSE",
|
||||
4: "ASN_NUMBER",
|
||||
5: "SOURCE_TAG",
|
||||
}
|
||||
GroupBy_value = map[string]int32{
|
||||
"WEBSITE": 0,
|
||||
@@ -157,6 +159,7 @@ var (
|
||||
"REQUEST_URI": 2,
|
||||
"HTTP_RESPONSE": 3,
|
||||
"ASN_NUMBER": 4,
|
||||
"SOURCE_TAG": 5,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -171,11 +174,11 @@ func (x GroupBy) String() string {
|
||||
}
|
||||
|
||||
func (GroupBy) Descriptor() protoreflect.EnumDescriptor {
|
||||
return file_proto_logtail_proto_enumTypes[2].Descriptor()
|
||||
return file_logtail_proto_enumTypes[2].Descriptor()
|
||||
}
|
||||
|
||||
func (GroupBy) Type() protoreflect.EnumType {
|
||||
return &file_proto_logtail_proto_enumTypes[2]
|
||||
return &file_logtail_proto_enumTypes[2]
|
||||
}
|
||||
|
||||
func (x GroupBy) Number() protoreflect.EnumNumber {
|
||||
@@ -184,7 +187,7 @@ func (x GroupBy) Number() protoreflect.EnumNumber {
|
||||
|
||||
// Deprecated: Use GroupBy.Descriptor instead.
|
||||
func (GroupBy) EnumDescriptor() ([]byte, []int) {
|
||||
return file_proto_logtail_proto_rawDescGZIP(), []int{2}
|
||||
return file_logtail_proto_rawDescGZIP(), []int{2}
|
||||
}
|
||||
|
||||
type Window int32
|
||||
@@ -229,11 +232,11 @@ func (x Window) String() string {
|
||||
}
|
||||
|
||||
func (Window) Descriptor() protoreflect.EnumDescriptor {
|
||||
return file_proto_logtail_proto_enumTypes[3].Descriptor()
|
||||
return file_logtail_proto_enumTypes[3].Descriptor()
|
||||
}
|
||||
|
||||
func (Window) Type() protoreflect.EnumType {
|
||||
return &file_proto_logtail_proto_enumTypes[3]
|
||||
return &file_logtail_proto_enumTypes[3]
|
||||
}
|
||||
|
||||
func (x Window) Number() protoreflect.EnumNumber {
|
||||
@@ -242,7 +245,7 @@ func (x Window) Number() protoreflect.EnumNumber {
|
||||
|
||||
// Deprecated: Use Window.Descriptor instead.
|
||||
func (Window) EnumDescriptor() ([]byte, []int) {
|
||||
return file_proto_logtail_proto_rawDescGZIP(), []int{3}
|
||||
return file_logtail_proto_rawDescGZIP(), []int{3}
|
||||
}
|
||||
|
||||
// Filter restricts results to entries matching all specified fields.
|
||||
@@ -261,13 +264,14 @@ type Filter struct {
|
||||
Tor TorFilter `protobuf:"varint,8,opt,name=tor,proto3,enum=logtail.TorFilter" json:"tor,omitempty"` // restrict to TOR / non-TOR clients
|
||||
AsnNumber *int32 `protobuf:"varint,9,opt,name=asn_number,json=asnNumber,proto3,oneof" json:"asn_number,omitempty"` // filter by client ASN
|
||||
AsnOp StatusOp `protobuf:"varint,10,opt,name=asn_op,json=asnOp,proto3,enum=logtail.StatusOp" json:"asn_op,omitempty"` // operator for asn_number; ignored when unset
|
||||
IpngSourceTag *string `protobuf:"bytes,13,opt,name=ipng_source_tag,json=ipngSourceTag,proto3,oneof" json:"ipng_source_tag,omitempty"` // filter by nginx source tag
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
|
||||
func (x *Filter) Reset() {
|
||||
*x = Filter{}
|
||||
mi := &file_proto_logtail_proto_msgTypes[0]
|
||||
mi := &file_logtail_proto_msgTypes[0]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
@@ -279,7 +283,7 @@ func (x *Filter) String() string {
|
||||
func (*Filter) ProtoMessage() {}
|
||||
|
||||
func (x *Filter) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_proto_logtail_proto_msgTypes[0]
|
||||
mi := &file_logtail_proto_msgTypes[0]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
@@ -292,7 +296,7 @@ func (x *Filter) ProtoReflect() protoreflect.Message {
|
||||
|
||||
// Deprecated: Use Filter.ProtoReflect.Descriptor instead.
|
||||
func (*Filter) Descriptor() ([]byte, []int) {
|
||||
return file_proto_logtail_proto_rawDescGZIP(), []int{0}
|
||||
return file_logtail_proto_rawDescGZIP(), []int{0}
|
||||
}
|
||||
|
||||
func (x *Filter) GetWebsite() string {
|
||||
@@ -379,6 +383,13 @@ func (x *Filter) GetAsnOp() StatusOp {
|
||||
return StatusOp_EQ
|
||||
}
|
||||
|
||||
func (x *Filter) GetIpngSourceTag() string {
|
||||
if x != nil && x.IpngSourceTag != nil {
|
||||
return *x.IpngSourceTag
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
type TopNRequest struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
Filter *Filter `protobuf:"bytes,1,opt,name=filter,proto3" json:"filter,omitempty"`
|
||||
@@ -391,7 +402,7 @@ type TopNRequest struct {
|
||||
|
||||
func (x *TopNRequest) Reset() {
|
||||
*x = TopNRequest{}
|
||||
mi := &file_proto_logtail_proto_msgTypes[1]
|
||||
mi := &file_logtail_proto_msgTypes[1]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
@@ -403,7 +414,7 @@ func (x *TopNRequest) String() string {
|
||||
func (*TopNRequest) ProtoMessage() {}
|
||||
|
||||
func (x *TopNRequest) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_proto_logtail_proto_msgTypes[1]
|
||||
mi := &file_logtail_proto_msgTypes[1]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
@@ -416,7 +427,7 @@ func (x *TopNRequest) ProtoReflect() protoreflect.Message {
|
||||
|
||||
// Deprecated: Use TopNRequest.ProtoReflect.Descriptor instead.
|
||||
func (*TopNRequest) Descriptor() ([]byte, []int) {
|
||||
return file_proto_logtail_proto_rawDescGZIP(), []int{1}
|
||||
return file_logtail_proto_rawDescGZIP(), []int{1}
|
||||
}
|
||||
|
||||
func (x *TopNRequest) GetFilter() *Filter {
|
||||
@@ -457,7 +468,7 @@ type TopNEntry struct {
|
||||
|
||||
func (x *TopNEntry) Reset() {
|
||||
*x = TopNEntry{}
|
||||
mi := &file_proto_logtail_proto_msgTypes[2]
|
||||
mi := &file_logtail_proto_msgTypes[2]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
@@ -469,7 +480,7 @@ func (x *TopNEntry) String() string {
|
||||
func (*TopNEntry) ProtoMessage() {}
|
||||
|
||||
func (x *TopNEntry) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_proto_logtail_proto_msgTypes[2]
|
||||
mi := &file_logtail_proto_msgTypes[2]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
@@ -482,7 +493,7 @@ func (x *TopNEntry) ProtoReflect() protoreflect.Message {
|
||||
|
||||
// Deprecated: Use TopNEntry.ProtoReflect.Descriptor instead.
|
||||
func (*TopNEntry) Descriptor() ([]byte, []int) {
|
||||
return file_proto_logtail_proto_rawDescGZIP(), []int{2}
|
||||
return file_logtail_proto_rawDescGZIP(), []int{2}
|
||||
}
|
||||
|
||||
func (x *TopNEntry) GetLabel() string {
|
||||
@@ -509,7 +520,7 @@ type TopNResponse struct {
|
||||
|
||||
func (x *TopNResponse) Reset() {
|
||||
*x = TopNResponse{}
|
||||
mi := &file_proto_logtail_proto_msgTypes[3]
|
||||
mi := &file_logtail_proto_msgTypes[3]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
@@ -521,7 +532,7 @@ func (x *TopNResponse) String() string {
|
||||
func (*TopNResponse) ProtoMessage() {}
|
||||
|
||||
func (x *TopNResponse) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_proto_logtail_proto_msgTypes[3]
|
||||
mi := &file_logtail_proto_msgTypes[3]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
@@ -534,7 +545,7 @@ func (x *TopNResponse) ProtoReflect() protoreflect.Message {
|
||||
|
||||
// Deprecated: Use TopNResponse.ProtoReflect.Descriptor instead.
|
||||
func (*TopNResponse) Descriptor() ([]byte, []int) {
|
||||
return file_proto_logtail_proto_rawDescGZIP(), []int{3}
|
||||
return file_logtail_proto_rawDescGZIP(), []int{3}
|
||||
}
|
||||
|
||||
func (x *TopNResponse) GetEntries() []*TopNEntry {
|
||||
@@ -561,7 +572,7 @@ type TrendRequest struct {
|
||||
|
||||
func (x *TrendRequest) Reset() {
|
||||
*x = TrendRequest{}
|
||||
mi := &file_proto_logtail_proto_msgTypes[4]
|
||||
mi := &file_logtail_proto_msgTypes[4]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
@@ -573,7 +584,7 @@ func (x *TrendRequest) String() string {
|
||||
func (*TrendRequest) ProtoMessage() {}
|
||||
|
||||
func (x *TrendRequest) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_proto_logtail_proto_msgTypes[4]
|
||||
mi := &file_logtail_proto_msgTypes[4]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
@@ -586,7 +597,7 @@ func (x *TrendRequest) ProtoReflect() protoreflect.Message {
|
||||
|
||||
// Deprecated: Use TrendRequest.ProtoReflect.Descriptor instead.
|
||||
func (*TrendRequest) Descriptor() ([]byte, []int) {
|
||||
return file_proto_logtail_proto_rawDescGZIP(), []int{4}
|
||||
return file_logtail_proto_rawDescGZIP(), []int{4}
|
||||
}
|
||||
|
||||
func (x *TrendRequest) GetFilter() *Filter {
|
||||
@@ -613,7 +624,7 @@ type TrendPoint struct {
|
||||
|
||||
func (x *TrendPoint) Reset() {
|
||||
*x = TrendPoint{}
|
||||
mi := &file_proto_logtail_proto_msgTypes[5]
|
||||
mi := &file_logtail_proto_msgTypes[5]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
@@ -625,7 +636,7 @@ func (x *TrendPoint) String() string {
|
||||
func (*TrendPoint) ProtoMessage() {}
|
||||
|
||||
func (x *TrendPoint) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_proto_logtail_proto_msgTypes[5]
|
||||
mi := &file_logtail_proto_msgTypes[5]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
@@ -638,7 +649,7 @@ func (x *TrendPoint) ProtoReflect() protoreflect.Message {
|
||||
|
||||
// Deprecated: Use TrendPoint.ProtoReflect.Descriptor instead.
|
||||
func (*TrendPoint) Descriptor() ([]byte, []int) {
|
||||
return file_proto_logtail_proto_rawDescGZIP(), []int{5}
|
||||
return file_logtail_proto_rawDescGZIP(), []int{5}
|
||||
}
|
||||
|
||||
func (x *TrendPoint) GetTimestampUnix() int64 {
|
||||
@@ -665,7 +676,7 @@ type TrendResponse struct {
|
||||
|
||||
func (x *TrendResponse) Reset() {
|
||||
*x = TrendResponse{}
|
||||
mi := &file_proto_logtail_proto_msgTypes[6]
|
||||
mi := &file_logtail_proto_msgTypes[6]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
@@ -677,7 +688,7 @@ func (x *TrendResponse) String() string {
|
||||
func (*TrendResponse) ProtoMessage() {}
|
||||
|
||||
func (x *TrendResponse) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_proto_logtail_proto_msgTypes[6]
|
||||
mi := &file_logtail_proto_msgTypes[6]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
@@ -690,7 +701,7 @@ func (x *TrendResponse) ProtoReflect() protoreflect.Message {
|
||||
|
||||
// Deprecated: Use TrendResponse.ProtoReflect.Descriptor instead.
|
||||
func (*TrendResponse) Descriptor() ([]byte, []int) {
|
||||
return file_proto_logtail_proto_rawDescGZIP(), []int{6}
|
||||
return file_logtail_proto_rawDescGZIP(), []int{6}
|
||||
}
|
||||
|
||||
func (x *TrendResponse) GetPoints() []*TrendPoint {
|
||||
@@ -715,7 +726,7 @@ type SnapshotRequest struct {
|
||||
|
||||
func (x *SnapshotRequest) Reset() {
|
||||
*x = SnapshotRequest{}
|
||||
mi := &file_proto_logtail_proto_msgTypes[7]
|
||||
mi := &file_logtail_proto_msgTypes[7]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
@@ -727,7 +738,7 @@ func (x *SnapshotRequest) String() string {
|
||||
func (*SnapshotRequest) ProtoMessage() {}
|
||||
|
||||
func (x *SnapshotRequest) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_proto_logtail_proto_msgTypes[7]
|
||||
mi := &file_logtail_proto_msgTypes[7]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
@@ -740,7 +751,7 @@ func (x *SnapshotRequest) ProtoReflect() protoreflect.Message {
|
||||
|
||||
// Deprecated: Use SnapshotRequest.ProtoReflect.Descriptor instead.
|
||||
func (*SnapshotRequest) Descriptor() ([]byte, []int) {
|
||||
return file_proto_logtail_proto_rawDescGZIP(), []int{7}
|
||||
return file_logtail_proto_rawDescGZIP(), []int{7}
|
||||
}
|
||||
|
||||
type Snapshot struct {
|
||||
@@ -755,7 +766,7 @@ type Snapshot struct {
|
||||
|
||||
func (x *Snapshot) Reset() {
|
||||
*x = Snapshot{}
|
||||
mi := &file_proto_logtail_proto_msgTypes[8]
|
||||
mi := &file_logtail_proto_msgTypes[8]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
@@ -767,7 +778,7 @@ func (x *Snapshot) String() string {
|
||||
func (*Snapshot) ProtoMessage() {}
|
||||
|
||||
func (x *Snapshot) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_proto_logtail_proto_msgTypes[8]
|
||||
mi := &file_logtail_proto_msgTypes[8]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
@@ -780,7 +791,7 @@ func (x *Snapshot) ProtoReflect() protoreflect.Message {
|
||||
|
||||
// Deprecated: Use Snapshot.ProtoReflect.Descriptor instead.
|
||||
func (*Snapshot) Descriptor() ([]byte, []int) {
|
||||
return file_proto_logtail_proto_rawDescGZIP(), []int{8}
|
||||
return file_logtail_proto_rawDescGZIP(), []int{8}
|
||||
}
|
||||
|
||||
func (x *Snapshot) GetSource() string {
|
||||
@@ -819,7 +830,7 @@ type DumpSnapshotsRequest struct {
|
||||
|
||||
func (x *DumpSnapshotsRequest) Reset() {
|
||||
*x = DumpSnapshotsRequest{}
|
||||
mi := &file_proto_logtail_proto_msgTypes[9]
|
||||
mi := &file_logtail_proto_msgTypes[9]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
@@ -831,7 +842,7 @@ func (x *DumpSnapshotsRequest) String() string {
|
||||
func (*DumpSnapshotsRequest) ProtoMessage() {}
|
||||
|
||||
func (x *DumpSnapshotsRequest) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_proto_logtail_proto_msgTypes[9]
|
||||
mi := &file_logtail_proto_msgTypes[9]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
@@ -844,7 +855,7 @@ func (x *DumpSnapshotsRequest) ProtoReflect() protoreflect.Message {
|
||||
|
||||
// Deprecated: Use DumpSnapshotsRequest.ProtoReflect.Descriptor instead.
|
||||
func (*DumpSnapshotsRequest) Descriptor() ([]byte, []int) {
|
||||
return file_proto_logtail_proto_rawDescGZIP(), []int{9}
|
||||
return file_logtail_proto_rawDescGZIP(), []int{9}
|
||||
}
|
||||
|
||||
type ListTargetsRequest struct {
|
||||
@@ -855,7 +866,7 @@ type ListTargetsRequest struct {
|
||||
|
||||
func (x *ListTargetsRequest) Reset() {
|
||||
*x = ListTargetsRequest{}
|
||||
mi := &file_proto_logtail_proto_msgTypes[10]
|
||||
mi := &file_logtail_proto_msgTypes[10]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
@@ -867,7 +878,7 @@ func (x *ListTargetsRequest) String() string {
|
||||
func (*ListTargetsRequest) ProtoMessage() {}
|
||||
|
||||
func (x *ListTargetsRequest) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_proto_logtail_proto_msgTypes[10]
|
||||
mi := &file_logtail_proto_msgTypes[10]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
@@ -880,7 +891,7 @@ func (x *ListTargetsRequest) ProtoReflect() protoreflect.Message {
|
||||
|
||||
// Deprecated: Use ListTargetsRequest.ProtoReflect.Descriptor instead.
|
||||
func (*ListTargetsRequest) Descriptor() ([]byte, []int) {
|
||||
return file_proto_logtail_proto_rawDescGZIP(), []int{10}
|
||||
return file_logtail_proto_rawDescGZIP(), []int{10}
|
||||
}
|
||||
|
||||
type TargetInfo struct {
|
||||
@@ -893,7 +904,7 @@ type TargetInfo struct {
|
||||
|
||||
func (x *TargetInfo) Reset() {
|
||||
*x = TargetInfo{}
|
||||
mi := &file_proto_logtail_proto_msgTypes[11]
|
||||
mi := &file_logtail_proto_msgTypes[11]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
@@ -905,7 +916,7 @@ func (x *TargetInfo) String() string {
|
||||
func (*TargetInfo) ProtoMessage() {}
|
||||
|
||||
func (x *TargetInfo) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_proto_logtail_proto_msgTypes[11]
|
||||
mi := &file_logtail_proto_msgTypes[11]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
@@ -918,7 +929,7 @@ func (x *TargetInfo) ProtoReflect() protoreflect.Message {
|
||||
|
||||
// Deprecated: Use TargetInfo.ProtoReflect.Descriptor instead.
|
||||
func (*TargetInfo) Descriptor() ([]byte, []int) {
|
||||
return file_proto_logtail_proto_rawDescGZIP(), []int{11}
|
||||
return file_logtail_proto_rawDescGZIP(), []int{11}
|
||||
}
|
||||
|
||||
func (x *TargetInfo) GetName() string {
|
||||
@@ -944,7 +955,7 @@ type ListTargetsResponse struct {
|
||||
|
||||
func (x *ListTargetsResponse) Reset() {
|
||||
*x = ListTargetsResponse{}
|
||||
mi := &file_proto_logtail_proto_msgTypes[12]
|
||||
mi := &file_logtail_proto_msgTypes[12]
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
ms.StoreMessageInfo(mi)
|
||||
}
|
||||
@@ -956,7 +967,7 @@ func (x *ListTargetsResponse) String() string {
|
||||
func (*ListTargetsResponse) ProtoMessage() {}
|
||||
|
||||
func (x *ListTargetsResponse) ProtoReflect() protoreflect.Message {
|
||||
mi := &file_proto_logtail_proto_msgTypes[12]
|
||||
mi := &file_logtail_proto_msgTypes[12]
|
||||
if x != nil {
|
||||
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
|
||||
if ms.LoadMessageInfo() == nil {
|
||||
@@ -969,7 +980,7 @@ func (x *ListTargetsResponse) ProtoReflect() protoreflect.Message {
|
||||
|
||||
// Deprecated: Use ListTargetsResponse.ProtoReflect.Descriptor instead.
|
||||
func (*ListTargetsResponse) Descriptor() ([]byte, []int) {
|
||||
return file_proto_logtail_proto_rawDescGZIP(), []int{12}
|
||||
return file_logtail_proto_rawDescGZIP(), []int{12}
|
||||
}
|
||||
|
||||
func (x *ListTargetsResponse) GetTargets() []*TargetInfo {
|
||||
@@ -979,11 +990,11 @@ func (x *ListTargetsResponse) GetTargets() []*TargetInfo {
|
||||
return nil
|
||||
}
|
||||
|
||||
var File_proto_logtail_proto protoreflect.FileDescriptor
|
||||
var File_logtail_proto protoreflect.FileDescriptor
|
||||
|
||||
const file_proto_logtail_proto_rawDesc = "" +
|
||||
const file_logtail_proto_rawDesc = "" +
|
||||
"\n" +
|
||||
"\x13proto/logtail.proto\x12\alogtail\"\xa8\x05\n" +
|
||||
"\rlogtail.proto\x12\alogtail\"\xe9\x05\n" +
|
||||
"\x06Filter\x12\x1d\n" +
|
||||
"\awebsite\x18\x01 \x01(\tH\x00R\awebsite\x88\x01\x01\x12(\n" +
|
||||
"\rclient_prefix\x18\x02 \x01(\tH\x01R\fclientPrefix\x88\x01\x01\x12-\n" +
|
||||
@@ -998,7 +1009,8 @@ const file_proto_logtail_proto_rawDesc = "" +
|
||||
"\n" +
|
||||
"asn_number\x18\t \x01(\x05H\bR\tasnNumber\x88\x01\x01\x12(\n" +
|
||||
"\x06asn_op\x18\n" +
|
||||
" \x01(\x0e2\x11.logtail.StatusOpR\x05asnOpB\n" +
|
||||
" \x01(\x0e2\x11.logtail.StatusOpR\x05asnOp\x12+\n" +
|
||||
"\x0fipng_source_tag\x18\r \x01(\tH\tR\ripngSourceTag\x88\x01\x01B\n" +
|
||||
"\n" +
|
||||
"\b_websiteB\x10\n" +
|
||||
"\x0e_client_prefixB\x13\n" +
|
||||
@@ -1009,7 +1021,8 @@ const file_proto_logtail_proto_rawDesc = "" +
|
||||
"_uri_regexB\x18\n" +
|
||||
"\x16_website_regex_excludeB\x14\n" +
|
||||
"\x12_uri_regex_excludeB\r\n" +
|
||||
"\v_asn_number\"\x9a\x01\n" +
|
||||
"\v_asn_numberB\x12\n" +
|
||||
"\x10_ipng_source_tag\"\x9a\x01\n" +
|
||||
"\vTopNRequest\x12'\n" +
|
||||
"\x06filter\x18\x01 \x01(\v2\x0f.logtail.FilterR\x06filter\x12+\n" +
|
||||
"\bgroup_by\x18\x02 \x01(\x0e2\x10.logtail.GroupByR\agroupBy\x12\f\n" +
|
||||
@@ -1056,14 +1069,16 @@ const file_proto_logtail_proto_rawDesc = "" +
|
||||
"\x02GT\x10\x02\x12\x06\n" +
|
||||
"\x02GE\x10\x03\x12\x06\n" +
|
||||
"\x02LT\x10\x04\x12\x06\n" +
|
||||
"\x02LE\x10\x05*]\n" +
|
||||
"\x02LE\x10\x05*m\n" +
|
||||
"\aGroupBy\x12\v\n" +
|
||||
"\aWEBSITE\x10\x00\x12\x11\n" +
|
||||
"\rCLIENT_PREFIX\x10\x01\x12\x0f\n" +
|
||||
"\vREQUEST_URI\x10\x02\x12\x11\n" +
|
||||
"\rHTTP_RESPONSE\x10\x03\x12\x0e\n" +
|
||||
"\n" +
|
||||
"ASN_NUMBER\x10\x04*A\n" +
|
||||
"ASN_NUMBER\x10\x04\x12\x0e\n" +
|
||||
"\n" +
|
||||
"SOURCE_TAG\x10\x05*A\n" +
|
||||
"\x06Window\x12\a\n" +
|
||||
"\x03W1M\x10\x00\x12\a\n" +
|
||||
"\x03W5M\x10\x01\x12\b\n" +
|
||||
@@ -1079,20 +1094,20 @@ const file_proto_logtail_proto_rawDesc = "" +
|
||||
"\rDumpSnapshots\x12\x1d.logtail.DumpSnapshotsRequest\x1a\x11.logtail.Snapshot0\x01B0Z.git.ipng.ch/ipng/nginx-logtail/proto/logtailpbb\x06proto3"
|
||||
|
||||
var (
|
||||
file_proto_logtail_proto_rawDescOnce sync.Once
|
||||
file_proto_logtail_proto_rawDescData []byte
|
||||
file_logtail_proto_rawDescOnce sync.Once
|
||||
file_logtail_proto_rawDescData []byte
|
||||
)
|
||||
|
||||
func file_proto_logtail_proto_rawDescGZIP() []byte {
|
||||
file_proto_logtail_proto_rawDescOnce.Do(func() {
|
||||
file_proto_logtail_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_proto_logtail_proto_rawDesc), len(file_proto_logtail_proto_rawDesc)))
|
||||
func file_logtail_proto_rawDescGZIP() []byte {
|
||||
file_logtail_proto_rawDescOnce.Do(func() {
|
||||
file_logtail_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_logtail_proto_rawDesc), len(file_logtail_proto_rawDesc)))
|
||||
})
|
||||
return file_proto_logtail_proto_rawDescData
|
||||
return file_logtail_proto_rawDescData
|
||||
}
|
||||
|
||||
var file_proto_logtail_proto_enumTypes = make([]protoimpl.EnumInfo, 4)
|
||||
var file_proto_logtail_proto_msgTypes = make([]protoimpl.MessageInfo, 13)
|
||||
var file_proto_logtail_proto_goTypes = []any{
|
||||
var file_logtail_proto_enumTypes = make([]protoimpl.EnumInfo, 4)
|
||||
var file_logtail_proto_msgTypes = make([]protoimpl.MessageInfo, 13)
|
||||
var file_logtail_proto_goTypes = []any{
|
||||
(TorFilter)(0), // 0: logtail.TorFilter
|
||||
(StatusOp)(0), // 1: logtail.StatusOp
|
||||
(GroupBy)(0), // 2: logtail.GroupBy
|
||||
@@ -1111,7 +1126,7 @@ var file_proto_logtail_proto_goTypes = []any{
|
||||
(*TargetInfo)(nil), // 15: logtail.TargetInfo
|
||||
(*ListTargetsResponse)(nil), // 16: logtail.ListTargetsResponse
|
||||
}
|
||||
var file_proto_logtail_proto_depIdxs = []int32{
|
||||
var file_logtail_proto_depIdxs = []int32{
|
||||
1, // 0: logtail.Filter.status_op:type_name -> logtail.StatusOp
|
||||
0, // 1: logtail.Filter.tor:type_name -> logtail.TorFilter
|
||||
1, // 2: logtail.Filter.asn_op:type_name -> logtail.StatusOp
|
||||
@@ -1141,28 +1156,28 @@ var file_proto_logtail_proto_depIdxs = []int32{
|
||||
0, // [0:12] is the sub-list for field type_name
|
||||
}
|
||||
|
||||
func init() { file_proto_logtail_proto_init() }
|
||||
func file_proto_logtail_proto_init() {
|
||||
if File_proto_logtail_proto != nil {
|
||||
func init() { file_logtail_proto_init() }
|
||||
func file_logtail_proto_init() {
|
||||
if File_logtail_proto != nil {
|
||||
return
|
||||
}
|
||||
file_proto_logtail_proto_msgTypes[0].OneofWrappers = []any{}
|
||||
file_logtail_proto_msgTypes[0].OneofWrappers = []any{}
|
||||
type x struct{}
|
||||
out := protoimpl.TypeBuilder{
|
||||
File: protoimpl.DescBuilder{
|
||||
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
|
||||
RawDescriptor: unsafe.Slice(unsafe.StringData(file_proto_logtail_proto_rawDesc), len(file_proto_logtail_proto_rawDesc)),
|
||||
RawDescriptor: unsafe.Slice(unsafe.StringData(file_logtail_proto_rawDesc), len(file_logtail_proto_rawDesc)),
|
||||
NumEnums: 4,
|
||||
NumMessages: 13,
|
||||
NumExtensions: 0,
|
||||
NumServices: 1,
|
||||
},
|
||||
GoTypes: file_proto_logtail_proto_goTypes,
|
||||
DependencyIndexes: file_proto_logtail_proto_depIdxs,
|
||||
EnumInfos: file_proto_logtail_proto_enumTypes,
|
||||
MessageInfos: file_proto_logtail_proto_msgTypes,
|
||||
GoTypes: file_logtail_proto_goTypes,
|
||||
DependencyIndexes: file_logtail_proto_depIdxs,
|
||||
EnumInfos: file_logtail_proto_enumTypes,
|
||||
MessageInfos: file_logtail_proto_msgTypes,
|
||||
}.Build()
|
||||
File_proto_logtail_proto = out.File
|
||||
file_proto_logtail_proto_goTypes = nil
|
||||
file_proto_logtail_proto_depIdxs = nil
|
||||
File_logtail_proto = out.File
|
||||
file_logtail_proto_goTypes = nil
|
||||
file_logtail_proto_depIdxs = nil
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
// versions:
|
||||
// - protoc-gen-go-grpc v1.6.1
|
||||
// - protoc v3.21.12
|
||||
// source: proto/logtail.proto
|
||||
// source: logtail.proto
|
||||
|
||||
package logtailpb
|
||||
|
||||
@@ -276,5 +276,5 @@ var LogtailService_ServiceDesc = grpc.ServiceDesc{
|
||||
ServerStreams: true,
|
||||
},
|
||||
},
|
||||
Metadata: "proto/logtail.proto",
|
||||
Metadata: "logtail.proto",
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user