From 456452afc4fc041cbd71004bb6f1196d0d46c5c3 Mon Sep 17 00:00:00 2001 From: Pim van Pelt Date: Wed, 25 Mar 2026 07:32:39 +0100 Subject: [PATCH] Allow !~= for website/uri --- cmd/cli/flags.go | 32 +++++++++++------ cmd/frontend/filter.go | 22 +++++++++--- cmd/frontend/handler.go | 65 +++++++++++++++++++++++++---------- internal/store/store.go | 36 +++++++++++++++++-- proto/logtail.proto | 6 ++-- proto/logtailpb/logtail.pb.go | 54 ++++++++++++++++++++--------- 6 files changed, 160 insertions(+), 55 deletions(-) diff --git a/cmd/cli/flags.go b/cmd/cli/flags.go index 41ea0fd..9518d84 100644 --- a/cmd/cli/flags.go +++ b/cmd/cli/flags.go @@ -12,16 +12,18 @@ import ( // sharedFlags holds the flags common to every subcommand. type sharedFlags struct { - targets []string - jsonOut bool - website string - prefix string - uri string - status string // expression: "200", "!=200", ">=400", etc. - websiteRe string // RE2 regex against website - uriRe string // RE2 regex against request URI - isTor string // "", "1" / "!=0" (TOR only), "0" / "!=1" (non-TOR only) - asn string // expression: "12345", "!=65000", ">=1000", etc. + targets []string + jsonOut bool + website string + prefix string + uri string + status string // expression: "200", "!=200", ">=400", etc. + websiteRe string // RE2 regex against website + uriRe string // RE2 regex against request URI + websiteReNeg string // RE2 regex exclusion against website + uriReNeg string // RE2 regex exclusion against request URI + isTor string // "", "1" / "!=0" (TOR only), "0" / "!=1" (non-TOR only) + asn string // expression: "12345", "!=65000", ">=1000", etc. } // bindShared registers the shared flags on fs and returns a pointer to the @@ -36,6 +38,8 @@ func bindShared(fs *flag.FlagSet) (*sharedFlags, *string) { fs.StringVar(&sf.status, "status", "", "filter: HTTP status expression (200, !=200, >=400, <500, …)") fs.StringVar(&sf.websiteRe, "website-re", "", "filter: RE2 regex against website") fs.StringVar(&sf.uriRe, "uri-re", "", "filter: RE2 regex against request URI") + fs.StringVar(&sf.websiteReNeg, "website-re-neg", "", "filter: RE2 regex exclusion against website") + fs.StringVar(&sf.uriReNeg, "uri-re-neg", "", "filter: RE2 regex exclusion against request URI") fs.StringVar(&sf.isTor, "is-tor", "", "filter: TOR traffic (1 or !=0 = TOR only; 0 or !=1 = non-TOR only)") fs.StringVar(&sf.asn, "asn", "", "filter: ASN expression (12345, !=65000, >=1000, <64512, …)") return sf, target @@ -60,7 +64,7 @@ func parseTargets(s string) []string { } func buildFilter(sf *sharedFlags) *pb.Filter { - if sf.website == "" && sf.prefix == "" && sf.uri == "" && sf.status == "" && sf.websiteRe == "" && sf.uriRe == "" && sf.isTor == "" && sf.asn == "" { + if sf.website == "" && sf.prefix == "" && sf.uri == "" && sf.status == "" && sf.websiteRe == "" && sf.uriRe == "" && sf.websiteReNeg == "" && sf.uriReNeg == "" && sf.isTor == "" && sf.asn == "" { return nil } f := &pb.Filter{} @@ -88,6 +92,12 @@ func buildFilter(sf *sharedFlags) *pb.Filter { if sf.uriRe != "" { f.UriRegex = &sf.uriRe } + if sf.websiteReNeg != "" { + f.WebsiteRegexExclude = &sf.websiteReNeg + } + if sf.uriReNeg != "" { + f.UriRegexExclude = &sf.uriReNeg + } switch sf.isTor { case "1", "!=0": f.Tor = pb.TorFilter_TOR_YES diff --git a/cmd/frontend/filter.go b/cmd/frontend/filter.go index 2aadf89..ff797fe 100644 --- a/cmd/frontend/filter.go +++ b/cmd/frontend/filter.go @@ -19,9 +19,11 @@ var andRe = regexp.MustCompile(`(?i)\s+and\s+`) // // status=200 status!=200 status>=400 status>400 status<=500 status<500 // website=example.com — exact match -// website~=gouda.* — RE2 regex +// website~=gouda.* — RE2 regex match +// website!~=gouda.* — RE2 regex exclusion // uri=/api/v1/ — exact match -// uri~=^/api/.* — RE2 regex +// uri~=^/api/.* — RE2 regex match +// uri!~=^/ct/.* — RE2 regex exclusion // prefix=1.2.3.0/24 — exact match // // Values may be enclosed in double or single quotes. @@ -57,6 +59,8 @@ func applyTerm(term string, fs *filterState) error { var op, value string switch { + case strings.HasPrefix(rest, "!~="): + op, value = "!~=", rest[3:] case strings.HasPrefix(rest, "~="): op, value = "~=", rest[2:] case strings.HasPrefix(rest, "!="): @@ -96,8 +100,10 @@ func applyTerm(term string, fs *filterState) error { fs.Website = value case "~=": fs.WebsiteRe = value + case "!~=": + fs.WebsiteReNeg = value default: - return fmt.Errorf("website only supports = and ~=, not %q", op) + return fmt.Errorf("website only supports =, ~=, and !~=, not %q", op) } case "uri": switch op { @@ -105,8 +111,10 @@ func applyTerm(term string, fs *filterState) error { fs.URI = value case "~=": fs.URIRe = value + case "!~=": + fs.URIReNeg = value default: - return fmt.Errorf("uri only supports = and ~=, not %q", op) + return fmt.Errorf("uri only supports =, ~=, and !~=, not %q", op) } case "prefix": if op != "=" { @@ -164,6 +172,9 @@ func FilterExprString(f filterState) string { if f.WebsiteRe != "" { parts = append(parts, "website~="+quoteMaybe(f.WebsiteRe)) } + if f.WebsiteReNeg != "" { + parts = append(parts, "website!~="+quoteMaybe(f.WebsiteReNeg)) + } if f.Prefix != "" { parts = append(parts, "prefix="+quoteMaybe(f.Prefix)) } @@ -173,6 +184,9 @@ func FilterExprString(f filterState) string { if f.URIRe != "" { parts = append(parts, "uri~="+quoteMaybe(f.URIRe)) } + if f.URIReNeg != "" { + parts = append(parts, "uri!~="+quoteMaybe(f.URIReNeg)) + } if f.Status != "" { parts = append(parts, statusTermStr(f.Status)) } diff --git a/cmd/frontend/handler.go b/cmd/frontend/handler.go index 8117186..79d62d3 100644 --- a/cmd/frontend/handler.go +++ b/cmd/frontend/handler.go @@ -48,14 +48,16 @@ type TableRow struct { // filterState holds the filter fields parsed from URL params. type filterState struct { - Website string - Prefix string - URI string - Status string // expression: "200", "!=200", ">=400", etc. - WebsiteRe string // RE2 regex against website - URIRe string // RE2 regex against request URI - IsTor string // "", "1" (TOR only), "0" (non-TOR only) - ASN string // expression: "12345", "!=65000", ">=1000", etc. + Website string + Prefix string + URI string + Status string // expression: "200", "!=200", ">=400", etc. + WebsiteRe string // RE2 regex against website + URIRe string // RE2 regex against request URI + WebsiteReNeg string // RE2 regex exclusion against website + URIReNeg string // RE2 regex exclusion against request URI + IsTor string // "", "1" (TOR only), "0" (non-TOR only) + ASN string // expression: "12345", "!=65000", ">=1000", etc. } // QueryParams holds all parsed URL parameters for one page request. @@ -156,20 +158,22 @@ func (h *Handler) parseParams(r *http.Request) QueryParams { GroupByS: grpS, N: n, Filter: filterState{ - Website: q.Get("f_website"), - Prefix: q.Get("f_prefix"), - URI: q.Get("f_uri"), - Status: q.Get("f_status"), - WebsiteRe: q.Get("f_website_re"), - URIRe: q.Get("f_uri_re"), - IsTor: q.Get("f_is_tor"), - ASN: q.Get("f_asn"), + Website: q.Get("f_website"), + Prefix: q.Get("f_prefix"), + URI: q.Get("f_uri"), + Status: q.Get("f_status"), + WebsiteRe: q.Get("f_website_re"), + URIRe: q.Get("f_uri_re"), + WebsiteReNeg: q.Get("f_website_re_neg"), + URIReNeg: q.Get("f_uri_re_neg"), + IsTor: q.Get("f_is_tor"), + ASN: q.Get("f_asn"), }, } } func buildFilter(f filterState) *pb.Filter { - if f.Website == "" && f.Prefix == "" && f.URI == "" && f.Status == "" && f.WebsiteRe == "" && f.URIRe == "" && f.IsTor == "" && f.ASN == "" { + if f.Website == "" && f.Prefix == "" && f.URI == "" && f.Status == "" && f.WebsiteRe == "" && f.URIRe == "" && f.WebsiteReNeg == "" && f.URIReNeg == "" && f.IsTor == "" && f.ASN == "" { return nil } out := &pb.Filter{} @@ -194,6 +198,12 @@ func buildFilter(f filterState) *pb.Filter { if f.URIRe != "" { out.UriRegex = &f.URIRe } + if f.WebsiteReNeg != "" { + out.WebsiteRegexExclude = &f.WebsiteReNeg + } + if f.URIReNeg != "" { + out.UriRegexExclude = &f.URIReNeg + } switch f.IsTor { case "1": out.Tor = pb.TorFilter_TOR_YES @@ -234,6 +244,12 @@ func (p QueryParams) toValues() url.Values { if p.Filter.URIRe != "" { v.Set("f_uri_re", p.Filter.URIRe) } + if p.Filter.WebsiteReNeg != "" { + v.Set("f_website_re_neg", p.Filter.WebsiteReNeg) + } + if p.Filter.URIReNeg != "" { + v.Set("f_uri_re_neg", p.Filter.URIReNeg) + } if p.Filter.IsTor != "" { v.Set("f_is_tor", p.Filter.IsTor) } @@ -261,7 +277,8 @@ func (p QueryParams) buildURL(overrides map[string]string) string { func (p QueryParams) clearFilterURL() string { return p.buildURL(map[string]string{ "f_website": "", "f_prefix": "", "f_uri": "", "f_status": "", - "f_website_re": "", "f_uri_re": "", "f_is_tor": "", "f_asn": "", + "f_website_re": "", "f_uri_re": "", "f_website_re_neg": "", "f_uri_re_neg": "", + "f_is_tor": "", "f_asn": "", }) } @@ -344,6 +361,18 @@ func buildCrumbs(p QueryParams) []Crumb { RemoveURL: p.buildURL(map[string]string{"f_uri_re": ""}), }) } + if p.Filter.WebsiteReNeg != "" { + crumbs = append(crumbs, Crumb{ + Text: "website!~=" + p.Filter.WebsiteReNeg, + RemoveURL: p.buildURL(map[string]string{"f_website_re_neg": ""}), + }) + } + if p.Filter.URIReNeg != "" { + crumbs = append(crumbs, Crumb{ + Text: "uri!~=" + p.Filter.URIReNeg, + RemoveURL: p.buildURL(map[string]string{"f_uri_re_neg": ""}), + }) + } switch p.Filter.IsTor { case "1": crumbs = append(crumbs, Crumb{ diff --git a/internal/store/store.go b/internal/store/store.go index 74b222f..8966fc4 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -133,9 +133,11 @@ func indexOf(s string, b byte) int { // CompiledFilter wraps a pb.Filter with pre-compiled regular expressions. // Use CompileFilter to construct one before a query loop. type CompiledFilter struct { - Proto *pb.Filter - WebsiteRe *regexp.Regexp // nil if no website_regex or compilation failed - URIRe *regexp.Regexp // nil if no uri_regex or compilation failed + Proto *pb.Filter + WebsiteRe *regexp.Regexp // nil if no website_regex or compilation failed + URIRe *regexp.Regexp // nil if no uri_regex or compilation failed + WebsiteReExcl *regexp.Regexp // nil if no website_regex_exclude or compilation failed + URIReExcl *regexp.Regexp // nil if no uri_regex_exclude or compilation failed } // CompileFilter compiles the regex fields in f once. Invalid regexes are @@ -161,6 +163,22 @@ func CompileFilter(f *pb.Filter) *CompiledFilter { cf.URIRe = re } } + if f.WebsiteRegexExclude != nil { + re, err := regexp.Compile(f.GetWebsiteRegexExclude()) + if err != nil { + log.Printf("store: invalid website_regex_exclude %q: %v", f.GetWebsiteRegexExclude(), err) + } else { + cf.WebsiteReExcl = re + } + } + if f.UriRegexExclude != nil { + re, err := regexp.Compile(f.GetUriRegexExclude()) + if err != nil { + log.Printf("store: invalid uri_regex_exclude %q: %v", f.GetUriRegexExclude(), err) + } else { + cf.URIReExcl = re + } + } return cf } @@ -193,6 +211,18 @@ func MatchesFilter(t Tuple6, f *CompiledFilter) bool { if p.UriRegex != nil && f.URIRe == nil { return false } + if f.WebsiteReExcl != nil && f.WebsiteReExcl.MatchString(t.Website) { + return false + } + if p.WebsiteRegexExclude != nil && f.WebsiteReExcl == nil { + return false + } + if f.URIReExcl != nil && f.URIReExcl.MatchString(t.URI) { + return false + } + if p.UriRegexExclude != nil && f.URIReExcl == nil { + return false + } if p.HttpResponse != nil && !matchesStatusOp(t.Status, p.GetHttpResponse(), p.StatusOp) { return false } diff --git a/proto/logtail.proto b/proto/logtail.proto index 7338efd..a44b022 100644 --- a/proto/logtail.proto +++ b/proto/logtail.proto @@ -31,8 +31,10 @@ message Filter { optional string http_request_uri = 3; optional int32 http_response = 4; StatusOp status_op = 5; // operator for http_response; ignored when unset - optional string website_regex = 6; // RE2 regex matched against website - optional string uri_regex = 7; // RE2 regex matched against http_request_uri + optional string website_regex = 6; // RE2 regex matched against website + optional string uri_regex = 7; // RE2 regex matched against http_request_uri + optional string website_regex_exclude = 11; // RE2 regex; entries matching this are excluded + optional string uri_regex_exclude = 12; // RE2 regex; entries matching this are excluded TorFilter tor = 8; // restrict to TOR / non-TOR clients optional int32 asn_number = 9; // filter by client ASN StatusOp asn_op = 10; // operator for asn_number; ignored when unset diff --git a/proto/logtailpb/logtail.pb.go b/proto/logtailpb/logtail.pb.go index d39ce4d..4092b34 100644 --- a/proto/logtailpb/logtail.pb.go +++ b/proto/logtailpb/logtail.pb.go @@ -248,19 +248,21 @@ func (Window) EnumDescriptor() ([]byte, []int) { // Filter restricts results to entries matching all specified fields. // Unset fields match everything. Exact-match and regex fields are ANDed. type Filter struct { - state protoimpl.MessageState `protogen:"open.v1"` - Website *string `protobuf:"bytes,1,opt,name=website,proto3,oneof" json:"website,omitempty"` - ClientPrefix *string `protobuf:"bytes,2,opt,name=client_prefix,json=clientPrefix,proto3,oneof" json:"client_prefix,omitempty"` - HttpRequestUri *string `protobuf:"bytes,3,opt,name=http_request_uri,json=httpRequestUri,proto3,oneof" json:"http_request_uri,omitempty"` - HttpResponse *int32 `protobuf:"varint,4,opt,name=http_response,json=httpResponse,proto3,oneof" json:"http_response,omitempty"` - StatusOp StatusOp `protobuf:"varint,5,opt,name=status_op,json=statusOp,proto3,enum=logtail.StatusOp" json:"status_op,omitempty"` // operator for http_response; ignored when unset - WebsiteRegex *string `protobuf:"bytes,6,opt,name=website_regex,json=websiteRegex,proto3,oneof" json:"website_regex,omitempty"` // RE2 regex matched against website - UriRegex *string `protobuf:"bytes,7,opt,name=uri_regex,json=uriRegex,proto3,oneof" json:"uri_regex,omitempty"` // RE2 regex matched against http_request_uri - Tor TorFilter `protobuf:"varint,8,opt,name=tor,proto3,enum=logtail.TorFilter" json:"tor,omitempty"` // restrict to TOR / non-TOR clients - AsnNumber *int32 `protobuf:"varint,9,opt,name=asn_number,json=asnNumber,proto3,oneof" json:"asn_number,omitempty"` // filter by client ASN - AsnOp StatusOp `protobuf:"varint,10,opt,name=asn_op,json=asnOp,proto3,enum=logtail.StatusOp" json:"asn_op,omitempty"` // operator for asn_number; ignored when unset - unknownFields protoimpl.UnknownFields - sizeCache protoimpl.SizeCache + state protoimpl.MessageState `protogen:"open.v1"` + Website *string `protobuf:"bytes,1,opt,name=website,proto3,oneof" json:"website,omitempty"` + ClientPrefix *string `protobuf:"bytes,2,opt,name=client_prefix,json=clientPrefix,proto3,oneof" json:"client_prefix,omitempty"` + HttpRequestUri *string `protobuf:"bytes,3,opt,name=http_request_uri,json=httpRequestUri,proto3,oneof" json:"http_request_uri,omitempty"` + HttpResponse *int32 `protobuf:"varint,4,opt,name=http_response,json=httpResponse,proto3,oneof" json:"http_response,omitempty"` + StatusOp StatusOp `protobuf:"varint,5,opt,name=status_op,json=statusOp,proto3,enum=logtail.StatusOp" json:"status_op,omitempty"` // operator for http_response; ignored when unset + WebsiteRegex *string `protobuf:"bytes,6,opt,name=website_regex,json=websiteRegex,proto3,oneof" json:"website_regex,omitempty"` // RE2 regex matched against website + UriRegex *string `protobuf:"bytes,7,opt,name=uri_regex,json=uriRegex,proto3,oneof" json:"uri_regex,omitempty"` // RE2 regex matched against http_request_uri + WebsiteRegexExclude *string `protobuf:"bytes,11,opt,name=website_regex_exclude,json=websiteRegexExclude,proto3,oneof" json:"website_regex_exclude,omitempty"` // RE2 regex; entries matching this are excluded + UriRegexExclude *string `protobuf:"bytes,12,opt,name=uri_regex_exclude,json=uriRegexExclude,proto3,oneof" json:"uri_regex_exclude,omitempty"` // RE2 regex; entries matching this are excluded + Tor TorFilter `protobuf:"varint,8,opt,name=tor,proto3,enum=logtail.TorFilter" json:"tor,omitempty"` // restrict to TOR / non-TOR clients + AsnNumber *int32 `protobuf:"varint,9,opt,name=asn_number,json=asnNumber,proto3,oneof" json:"asn_number,omitempty"` // filter by client ASN + AsnOp StatusOp `protobuf:"varint,10,opt,name=asn_op,json=asnOp,proto3,enum=logtail.StatusOp" json:"asn_op,omitempty"` // operator for asn_number; ignored when unset + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache } func (x *Filter) Reset() { @@ -342,6 +344,20 @@ func (x *Filter) GetUriRegex() string { return "" } +func (x *Filter) GetWebsiteRegexExclude() string { + if x != nil && x.WebsiteRegexExclude != nil { + return *x.WebsiteRegexExclude + } + return "" +} + +func (x *Filter) GetUriRegexExclude() string { + if x != nil && x.UriRegexExclude != nil { + return *x.UriRegexExclude + } + return "" +} + func (x *Filter) GetTor() TorFilter { if x != nil { return x.Tor @@ -967,7 +983,7 @@ var File_proto_logtail_proto protoreflect.FileDescriptor const file_proto_logtail_proto_rawDesc = "" + "\n" + - "\x13proto/logtail.proto\x12\alogtail\"\x8e\x04\n" + + "\x13proto/logtail.proto\x12\alogtail\"\xa8\x05\n" + "\x06Filter\x12\x1d\n" + "\awebsite\x18\x01 \x01(\tH\x00R\awebsite\x88\x01\x01\x12(\n" + "\rclient_prefix\x18\x02 \x01(\tH\x01R\fclientPrefix\x88\x01\x01\x12-\n" + @@ -975,10 +991,12 @@ const file_proto_logtail_proto_rawDesc = "" + "\rhttp_response\x18\x04 \x01(\x05H\x03R\fhttpResponse\x88\x01\x01\x12.\n" + "\tstatus_op\x18\x05 \x01(\x0e2\x11.logtail.StatusOpR\bstatusOp\x12(\n" + "\rwebsite_regex\x18\x06 \x01(\tH\x04R\fwebsiteRegex\x88\x01\x01\x12 \n" + - "\turi_regex\x18\a \x01(\tH\x05R\buriRegex\x88\x01\x01\x12$\n" + + "\turi_regex\x18\a \x01(\tH\x05R\buriRegex\x88\x01\x01\x127\n" + + "\x15website_regex_exclude\x18\v \x01(\tH\x06R\x13websiteRegexExclude\x88\x01\x01\x12/\n" + + "\x11uri_regex_exclude\x18\f \x01(\tH\aR\x0furiRegexExclude\x88\x01\x01\x12$\n" + "\x03tor\x18\b \x01(\x0e2\x12.logtail.TorFilterR\x03tor\x12\"\n" + "\n" + - "asn_number\x18\t \x01(\x05H\x06R\tasnNumber\x88\x01\x01\x12(\n" + + "asn_number\x18\t \x01(\x05H\bR\tasnNumber\x88\x01\x01\x12(\n" + "\x06asn_op\x18\n" + " \x01(\x0e2\x11.logtail.StatusOpR\x05asnOpB\n" + "\n" + @@ -988,7 +1006,9 @@ const file_proto_logtail_proto_rawDesc = "" + "\x0e_http_responseB\x10\n" + "\x0e_website_regexB\f\n" + "\n" + - "_uri_regexB\r\n" + + "_uri_regexB\x18\n" + + "\x16_website_regex_excludeB\x14\n" + + "\x12_uri_regex_excludeB\r\n" + "\v_asn_number\"\x9a\x01\n" + "\vTopNRequest\x12'\n" + "\x06filter\x18\x01 \x01(\v2\x0f.logtail.FilterR\x06filter\x12+\n" +