Add is_tor plumbing from collector->aggregator->frontend/cli

This commit is contained in:
2026-03-23 22:17:39 +01:00
parent b89caa594c
commit cd7f15afaf
20 changed files with 1815 additions and 212 deletions

View File

@@ -20,12 +20,13 @@ const (
CoarseEvery = 5 // fine ticks between coarse writes
)
// Tuple4 is the four-dimensional aggregation key.
type Tuple4 struct {
// Tuple5 is the aggregation key (website, prefix, URI, status, is_tor).
type Tuple5 struct {
Website string
Prefix string
URI string
Status string
IsTor bool
}
// Entry is a labelled count used in snapshots and query results.
@@ -73,21 +74,29 @@ func BucketsForWindow(window pb.Window, fine, coarse RingView, fineFilled, coars
}
}
// --- label encoding: "website\x00prefix\x00uri\x00status" ---
// --- label encoding: "website\x00prefix\x00uri\x00status\x00is_tor" ---
// EncodeTuple encodes a Tuple4 as a NUL-separated string suitable for use
// EncodeTuple encodes a Tuple5 as a NUL-separated string suitable for use
// as a map key in snapshots.
func EncodeTuple(t Tuple4) string {
return t.Website + "\x00" + t.Prefix + "\x00" + t.URI + "\x00" + t.Status
func EncodeTuple(t Tuple5) string {
tor := "0"
if t.IsTor {
tor = "1"
}
return t.Website + "\x00" + t.Prefix + "\x00" + t.URI + "\x00" + t.Status + "\x00" + tor
}
// LabelTuple decodes a NUL-separated snapshot label back into a Tuple4.
func LabelTuple(label string) Tuple4 {
parts := splitN(label, '\x00', 4)
if len(parts) != 4 {
return Tuple4{}
// LabelTuple decodes a NUL-separated snapshot label back into a Tuple5.
func LabelTuple(label string) Tuple5 {
parts := splitN(label, '\x00', 5)
if len(parts) < 4 {
return Tuple5{}
}
return Tuple4{parts[0], parts[1], parts[2], parts[3]}
t := Tuple5{Website: parts[0], Prefix: parts[1], URI: parts[2], Status: parts[3]}
if len(parts) == 5 {
t.IsTor = parts[4] == "1"
}
return t
}
func splitN(s string, sep byte, n int) []string {
@@ -150,7 +159,7 @@ func CompileFilter(f *pb.Filter) *CompiledFilter {
// MatchesFilter returns true if t satisfies all constraints in f.
// A nil filter matches everything.
func MatchesFilter(t Tuple4, f *CompiledFilter) bool {
func MatchesFilter(t Tuple5, f *CompiledFilter) bool {
if f == nil || f.Proto == nil {
return true
}
@@ -180,6 +189,16 @@ func MatchesFilter(t Tuple4, f *CompiledFilter) bool {
if p.HttpResponse != nil && !matchesStatusOp(t.Status, p.GetHttpResponse(), p.StatusOp) {
return false
}
switch p.Tor {
case pb.TorFilter_TOR_YES:
if !t.IsTor {
return false
}
case pb.TorFilter_TOR_NO:
if t.IsTor {
return false
}
}
return true
}
@@ -210,7 +229,7 @@ func matchesStatusOp(statusStr string, want int32, op pb.StatusOp) bool {
}
// DimensionLabel returns the string value of t for the given group-by dimension.
func DimensionLabel(t Tuple4, g pb.GroupBy) string {
func DimensionLabel(t Tuple5, g pb.GroupBy) string {
switch g {
case pb.GroupBy_WEBSITE:
return t.Website
@@ -299,9 +318,9 @@ func TopKFromMap(m map[string]int64, k int) []Entry {
return result
}
// TopKFromTupleMap encodes a Tuple4 map and returns the top-k as a Snapshot.
// TopKFromTupleMap encodes a Tuple5 map and returns the top-k as a Snapshot.
// Used by the collector to snapshot its live map.
func TopKFromTupleMap(m map[Tuple4]int64, k int, ts time.Time) Snapshot {
func TopKFromTupleMap(m map[Tuple5]int64, k int, ts time.Time) Snapshot {
flat := make(map[string]int64, len(m))
for t, c := range m {
flat[EncodeTuple(t)] = c

View File

@@ -83,10 +83,10 @@ func compiledEQ(status int32) *CompiledFilter {
}
func TestMatchesFilterNil(t *testing.T) {
if !MatchesFilter(Tuple4{Website: "x"}, nil) {
if !MatchesFilter(Tuple5{Website: "x"}, nil) {
t.Fatal("nil filter should match everything")
}
if !MatchesFilter(Tuple4{Website: "x"}, &CompiledFilter{}) {
if !MatchesFilter(Tuple5{Website: "x"}, &CompiledFilter{}) {
t.Fatal("empty compiled filter should match everything")
}
}
@@ -94,10 +94,10 @@ func TestMatchesFilterNil(t *testing.T) {
func TestMatchesFilterExactWebsite(t *testing.T) {
w := "example.com"
cf := CompileFilter(&pb.Filter{Website: &w})
if !MatchesFilter(Tuple4{Website: "example.com"}, cf) {
if !MatchesFilter(Tuple5{Website: "example.com"}, cf) {
t.Fatal("expected match")
}
if MatchesFilter(Tuple4{Website: "other.com"}, cf) {
if MatchesFilter(Tuple5{Website: "other.com"}, cf) {
t.Fatal("expected no match")
}
}
@@ -105,10 +105,10 @@ func TestMatchesFilterExactWebsite(t *testing.T) {
func TestMatchesFilterWebsiteRegex(t *testing.T) {
re := "gouda.*"
cf := CompileFilter(&pb.Filter{WebsiteRegex: &re})
if !MatchesFilter(Tuple4{Website: "gouda.example.com"}, cf) {
if !MatchesFilter(Tuple5{Website: "gouda.example.com"}, cf) {
t.Fatal("expected match")
}
if MatchesFilter(Tuple4{Website: "edam.example.com"}, cf) {
if MatchesFilter(Tuple5{Website: "edam.example.com"}, cf) {
t.Fatal("expected no match")
}
}
@@ -116,10 +116,10 @@ func TestMatchesFilterWebsiteRegex(t *testing.T) {
func TestMatchesFilterURIRegex(t *testing.T) {
re := "^/api/.*"
cf := CompileFilter(&pb.Filter{UriRegex: &re})
if !MatchesFilter(Tuple4{URI: "/api/users"}, cf) {
if !MatchesFilter(Tuple5{URI: "/api/users"}, cf) {
t.Fatal("expected match")
}
if MatchesFilter(Tuple4{URI: "/health"}, cf) {
if MatchesFilter(Tuple5{URI: "/health"}, cf) {
t.Fatal("expected no match")
}
}
@@ -127,17 +127,17 @@ func TestMatchesFilterURIRegex(t *testing.T) {
func TestMatchesFilterInvalidRegexMatchesNothing(t *testing.T) {
re := "[invalid"
cf := CompileFilter(&pb.Filter{WebsiteRegex: &re})
if MatchesFilter(Tuple4{Website: "anything"}, cf) {
if MatchesFilter(Tuple5{Website: "anything"}, cf) {
t.Fatal("invalid regex should match nothing")
}
}
func TestMatchesFilterStatusEQ(t *testing.T) {
cf := compiledEQ(200)
if !MatchesFilter(Tuple4{Status: "200"}, cf) {
if !MatchesFilter(Tuple5{Status: "200"}, cf) {
t.Fatal("expected match")
}
if MatchesFilter(Tuple4{Status: "404"}, cf) {
if MatchesFilter(Tuple5{Status: "404"}, cf) {
t.Fatal("expected no match")
}
}
@@ -145,10 +145,10 @@ func TestMatchesFilterStatusEQ(t *testing.T) {
func TestMatchesFilterStatusNE(t *testing.T) {
v := int32(200)
cf := CompileFilter(&pb.Filter{HttpResponse: &v, StatusOp: pb.StatusOp_NE})
if MatchesFilter(Tuple4{Status: "200"}, cf) {
if MatchesFilter(Tuple5{Status: "200"}, cf) {
t.Fatal("expected no match for 200 != 200")
}
if !MatchesFilter(Tuple4{Status: "404"}, cf) {
if !MatchesFilter(Tuple5{Status: "404"}, cf) {
t.Fatal("expected match for 404 != 200")
}
}
@@ -156,13 +156,13 @@ func TestMatchesFilterStatusNE(t *testing.T) {
func TestMatchesFilterStatusGE(t *testing.T) {
v := int32(400)
cf := CompileFilter(&pb.Filter{HttpResponse: &v, StatusOp: pb.StatusOp_GE})
if !MatchesFilter(Tuple4{Status: "400"}, cf) {
if !MatchesFilter(Tuple5{Status: "400"}, cf) {
t.Fatal("expected match: 400 >= 400")
}
if !MatchesFilter(Tuple4{Status: "500"}, cf) {
if !MatchesFilter(Tuple5{Status: "500"}, cf) {
t.Fatal("expected match: 500 >= 400")
}
if MatchesFilter(Tuple4{Status: "200"}, cf) {
if MatchesFilter(Tuple5{Status: "200"}, cf) {
t.Fatal("expected no match: 200 >= 400")
}
}
@@ -170,17 +170,17 @@ func TestMatchesFilterStatusGE(t *testing.T) {
func TestMatchesFilterStatusLT(t *testing.T) {
v := int32(400)
cf := CompileFilter(&pb.Filter{HttpResponse: &v, StatusOp: pb.StatusOp_LT})
if !MatchesFilter(Tuple4{Status: "200"}, cf) {
if !MatchesFilter(Tuple5{Status: "200"}, cf) {
t.Fatal("expected match: 200 < 400")
}
if MatchesFilter(Tuple4{Status: "400"}, cf) {
if MatchesFilter(Tuple5{Status: "400"}, cf) {
t.Fatal("expected no match: 400 < 400")
}
}
func TestMatchesFilterStatusNonNumeric(t *testing.T) {
cf := compiledEQ(200)
if MatchesFilter(Tuple4{Status: "ok"}, cf) {
if MatchesFilter(Tuple5{Status: "ok"}, cf) {
t.Fatal("non-numeric status should not match")
}
}
@@ -193,13 +193,67 @@ func TestMatchesFilterCombined(t *testing.T) {
HttpResponse: &v,
StatusOp: pb.StatusOp_EQ,
})
if !MatchesFilter(Tuple4{Website: "example.com", Status: "200"}, cf) {
if !MatchesFilter(Tuple5{Website: "example.com", Status: "200"}, cf) {
t.Fatal("expected match")
}
if MatchesFilter(Tuple4{Website: "other.com", Status: "200"}, cf) {
if MatchesFilter(Tuple5{Website: "other.com", Status: "200"}, cf) {
t.Fatal("expected no match: wrong website")
}
if MatchesFilter(Tuple4{Website: "example.com", Status: "404"}, cf) {
if MatchesFilter(Tuple5{Website: "example.com", Status: "404"}, cf) {
t.Fatal("expected no match: wrong status")
}
}
// --- IsTor label encoding and filtering ---
func TestEncodeLabelTupleRoundtripWithTor(t *testing.T) {
for _, isTor := range []bool{false, true} {
orig := Tuple5{Website: "a.com", Prefix: "1.2.3.0/24", URI: "/x", Status: "200", IsTor: isTor}
got := LabelTuple(EncodeTuple(orig))
if got != orig {
t.Errorf("roundtrip mismatch: got %+v, want %+v", got, orig)
}
}
}
func TestLabelTupleBackwardCompat(t *testing.T) {
// Old 4-field label (no is_tor field) should decode with IsTor=false.
label := "a.com\x001.2.3.0/24\x00/x\x00200"
got := LabelTuple(label)
if got.IsTor {
t.Errorf("expected IsTor=false for old label, got true")
}
if got.Website != "a.com" || got.Status != "200" {
t.Errorf("unexpected tuple: %+v", got)
}
}
func TestMatchesFilterTorYes(t *testing.T) {
cf := CompileFilter(&pb.Filter{Tor: pb.TorFilter_TOR_YES})
if !MatchesFilter(Tuple5{IsTor: true}, cf) {
t.Fatal("TOR_YES should match TOR tuple")
}
if MatchesFilter(Tuple5{IsTor: false}, cf) {
t.Fatal("TOR_YES should not match non-TOR tuple")
}
}
func TestMatchesFilterTorNo(t *testing.T) {
cf := CompileFilter(&pb.Filter{Tor: pb.TorFilter_TOR_NO})
if !MatchesFilter(Tuple5{IsTor: false}, cf) {
t.Fatal("TOR_NO should match non-TOR tuple")
}
if MatchesFilter(Tuple5{IsTor: true}, cf) {
t.Fatal("TOR_NO should not match TOR tuple")
}
}
func TestMatchesFilterTorAny(t *testing.T) {
cf := CompileFilter(&pb.Filter{Tor: pb.TorFilter_TOR_ANY})
if !MatchesFilter(Tuple5{IsTor: true}, cf) {
t.Fatal("TOR_ANY should match TOR tuple")
}
if !MatchesFilter(Tuple5{IsTor: false}, cf) {
t.Fatal("TOR_ANY should match non-TOR tuple")
}
}