Add is_tor plumbing from collector->aggregator->frontend/cli

This commit is contained in:
2026-03-23 22:17:39 +01:00
parent b89caa594c
commit cd7f15afaf
20 changed files with 1815 additions and 212 deletions

View File

@@ -20,12 +20,13 @@ const (
CoarseEvery = 5 // fine ticks between coarse writes
)
// Tuple4 is the four-dimensional aggregation key.
type Tuple4 struct {
// Tuple5 is the aggregation key (website, prefix, URI, status, is_tor).
type Tuple5 struct {
Website string
Prefix string
URI string
Status string
IsTor bool
}
// Entry is a labelled count used in snapshots and query results.
@@ -73,21 +74,29 @@ func BucketsForWindow(window pb.Window, fine, coarse RingView, fineFilled, coars
}
}
// --- label encoding: "website\x00prefix\x00uri\x00status" ---
// --- label encoding: "website\x00prefix\x00uri\x00status\x00is_tor" ---
// EncodeTuple encodes a Tuple4 as a NUL-separated string suitable for use
// EncodeTuple encodes a Tuple5 as a NUL-separated string suitable for use
// as a map key in snapshots.
func EncodeTuple(t Tuple4) string {
return t.Website + "\x00" + t.Prefix + "\x00" + t.URI + "\x00" + t.Status
func EncodeTuple(t Tuple5) string {
tor := "0"
if t.IsTor {
tor = "1"
}
return t.Website + "\x00" + t.Prefix + "\x00" + t.URI + "\x00" + t.Status + "\x00" + tor
}
// LabelTuple decodes a NUL-separated snapshot label back into a Tuple4.
func LabelTuple(label string) Tuple4 {
parts := splitN(label, '\x00', 4)
if len(parts) != 4 {
return Tuple4{}
// LabelTuple decodes a NUL-separated snapshot label back into a Tuple5.
func LabelTuple(label string) Tuple5 {
parts := splitN(label, '\x00', 5)
if len(parts) < 4 {
return Tuple5{}
}
return Tuple4{parts[0], parts[1], parts[2], parts[3]}
t := Tuple5{Website: parts[0], Prefix: parts[1], URI: parts[2], Status: parts[3]}
if len(parts) == 5 {
t.IsTor = parts[4] == "1"
}
return t
}
func splitN(s string, sep byte, n int) []string {
@@ -150,7 +159,7 @@ func CompileFilter(f *pb.Filter) *CompiledFilter {
// MatchesFilter returns true if t satisfies all constraints in f.
// A nil filter matches everything.
func MatchesFilter(t Tuple4, f *CompiledFilter) bool {
func MatchesFilter(t Tuple5, f *CompiledFilter) bool {
if f == nil || f.Proto == nil {
return true
}
@@ -180,6 +189,16 @@ func MatchesFilter(t Tuple4, f *CompiledFilter) bool {
if p.HttpResponse != nil && !matchesStatusOp(t.Status, p.GetHttpResponse(), p.StatusOp) {
return false
}
switch p.Tor {
case pb.TorFilter_TOR_YES:
if !t.IsTor {
return false
}
case pb.TorFilter_TOR_NO:
if t.IsTor {
return false
}
}
return true
}
@@ -210,7 +229,7 @@ func matchesStatusOp(statusStr string, want int32, op pb.StatusOp) bool {
}
// DimensionLabel returns the string value of t for the given group-by dimension.
func DimensionLabel(t Tuple4, g pb.GroupBy) string {
func DimensionLabel(t Tuple5, g pb.GroupBy) string {
switch g {
case pb.GroupBy_WEBSITE:
return t.Website
@@ -299,9 +318,9 @@ func TopKFromMap(m map[string]int64, k int) []Entry {
return result
}
// TopKFromTupleMap encodes a Tuple4 map and returns the top-k as a Snapshot.
// TopKFromTupleMap encodes a Tuple5 map and returns the top-k as a Snapshot.
// Used by the collector to snapshot its live map.
func TopKFromTupleMap(m map[Tuple4]int64, k int, ts time.Time) Snapshot {
func TopKFromTupleMap(m map[Tuple5]int64, k int, ts time.Time) Snapshot {
flat := make(map[string]int64, len(m))
for t, c := range m {
flat[EncodeTuple(t)] = c