From 143aad9063c30ae9a0e58f7dc323a7b3ab2fbd40 Mon Sep 17 00:00:00 2001 From: Pim van Pelt Date: Fri, 17 Apr 2026 10:35:08 +0200 Subject: [PATCH] PRE-RELEASE 0.9.1: Makefile, Debian packaging, versioned UDP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Build and release tooling: - Makefile with help as default; targets: build/build-amd64/build-arm64, test, lint, proto, pkg-deb, docker, docker-push, clean, plus install-deps (+ three sub-targets for apt / Go toolchain / Go tools). - internal/version package; -ldflags -X injects Version/Commit/Date into every binary. -version flag on all four binaries (nginx-logtail version for the CLI). - Dockerfile takes VERSION/COMMIT/DATE build-args and forwards them. - .deb output lands in build/; .gitignore ignores /build/. Debian package: - debian/build-deb.sh packages all four static binaries into a single nginx-logtail__.deb using dpkg-deb. - Binary layout: /usr/sbin/nginx-logtail-{collector,aggregator,frontend} and /usr/bin/nginx-logtail. - nginx-logtail(8) manpage. - Three systemd units (collector, aggregator, frontend) shipped under /lib/systemd/system/. Installed but never enabled or started — the operator opts in per host. - Collector runs as _logtail:www-data (log access); aggregator and frontend as _logtail:_logtail. postinst creates the system user/group idempotently. - Single shared env file /etc/default/nginx-logtail rendered from a template at first install with %HOSTNAME% substituted. Sensible defaults for every COLLECTOR_*, AGGREGATOR_*, FRONTEND_* variable; plus COLLECTOR_ARGS / AGGREGATOR_ARGS / FRONTEND_ARGS escape hatches appended to ExecStart. Not a dpkg conffile: operator edits survive upgrades and dpkg --purge removes it. Versioned UDP wire format: - ParseUDPLine dispatches on a leading "v\t" tag; v1 routes to the existing 12-field parser. Unknown/missing versions fail closed so future v2 parsers can land before emitters are upgraded. - Tests updated; design.md FR-2.2 rewritten to make the version tag normative. Docs: - README.md gains a Quick Start (Debian / Docker Compose / from source). - user-guide.md rewritten around Installation and Configuration: full env-var table, UDP-only default explained, precise file/UDP log_format layouts, note that operators can emit "0" for unknown \$is_tor / \$asn. - Drilldown cycle, frontend filter table, and CLI --group-by list all include source_tag. UDP counters documented in the Prometheus section. Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 3 + Dockerfile | 15 +- Makefile | 201 +++++++++++++++ README.md | 66 ++++- cmd/aggregator/main.go | 8 + cmd/cli/main.go | 4 + cmd/collector/main.go | 8 + cmd/collector/parser.go | 28 +- cmd/collector/parser_test.go | 35 ++- cmd/collector/udp_test.go | 2 +- cmd/frontend/main.go | 9 + debian/build-deb.sh | 91 +++++++ debian/default.template | 97 +++++++ debian/nginx-logtail-aggregator.service | 23 ++ debian/nginx-logtail-collector.service | 26 ++ debian/nginx-logtail-frontend.service | 22 ++ debian/nginx-logtail.8 | 240 +++++++++++++++++ debian/postinst | 50 ++++ debian/postrm | 23 ++ debian/prerm | 17 ++ docs/design.md | 16 +- docs/user-guide.md | 328 ++++++++++++++++++------ internal/version/version.go | 16 ++ 23 files changed, 1214 insertions(+), 114 deletions(-) create mode 100644 Makefile create mode 100755 debian/build-deb.sh create mode 100644 debian/default.template create mode 100644 debian/nginx-logtail-aggregator.service create mode 100644 debian/nginx-logtail-collector.service create mode 100644 debian/nginx-logtail-frontend.service create mode 100644 debian/nginx-logtail.8 create mode 100755 debian/postinst create mode 100755 debian/postrm create mode 100755 debian/prerm create mode 100644 internal/version/version.go diff --git a/.gitignore b/.gitignore index 6b672b3..39a00c2 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,9 @@ /frontend /cli +# Build output — per-arch binaries and .deb packages, all under build/. +/build/ + # Editor .idea/ .vscode/ diff --git a/Dockerfile b/Dockerfile index 63ae17e..845ca44 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,14 +1,21 @@ FROM golang:1.24-alpine AS builder +ARG VERSION=dev +ARG COMMIT=unknown +ARG DATE=unknown + +ENV CGO_ENABLED=0 \ + LDFLAGS="-s -w -X git.ipng.ch/ipng/nginx-logtail/internal/version.Version=${VERSION} -X git.ipng.ch/ipng/nginx-logtail/internal/version.Commit=${COMMIT} -X git.ipng.ch/ipng/nginx-logtail/internal/version.Date=${DATE}" + WORKDIR /src COPY go.mod go.sum ./ RUN go mod download COPY . . -RUN CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" -o /out/collector ./cmd/collector && \ - CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" -o /out/aggregator ./cmd/aggregator && \ - CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" -o /out/frontend ./cmd/frontend && \ - CGO_ENABLED=0 go build -trimpath -ldflags="-s -w" -o /out/cli ./cmd/cli +RUN go build -trimpath -ldflags="${LDFLAGS}" -o /out/collector ./cmd/collector && \ + go build -trimpath -ldflags="${LDFLAGS}" -o /out/aggregator ./cmd/aggregator && \ + go build -trimpath -ldflags="${LDFLAGS}" -o /out/frontend ./cmd/frontend && \ + go build -trimpath -ldflags="${LDFLAGS}" -o /out/cli ./cmd/cli FROM scratch COPY --from=builder /out/ /usr/local/bin/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f6dca90 --- /dev/null +++ b/Makefile @@ -0,0 +1,201 @@ +BINARIES := collector aggregator frontend cli +MODULE := git.ipng.ch/ipng/nginx-logtail +PROTO_DIR := proto +PROTO_FILE := $(PROTO_DIR)/logtail.proto +GEN_FILES := proto/logtailpb/logtail.pb.go proto/logtailpb/logtail_grpc.pb.go + +NATIVE_ARCH := $(shell go env GOARCH) +VERSION := 0.9.1 +COMMIT_HASH := $(shell git rev-parse --short HEAD 2>/dev/null || echo unknown) +DATE := $(shell date -u +%Y-%m-%dT%H:%M:%SZ) +LDFLAGS := -s -w \ + -X '$(MODULE)/internal/version.Version=$(VERSION)' \ + -X '$(MODULE)/internal/version.Commit=$(COMMIT_HASH)' \ + -X '$(MODULE)/internal/version.Date=$(DATE)' + +# CGO_ENABLED=0 produces fully static binaries: no libc dependency, so the +# .deb runs on any Linux-amd64/arm64 host regardless of glibc version. +export CGO_ENABLED := 0 + +IMAGE := git.ipng.ch/ipng/nginx-logtail + +# GO_VERSION is the floor install-deps-go enforces. go.mod currently requires +# 1.24; override on the command line to pull a specific patch release: +# make install-deps GO_VERSION=1.25.0 +GO_VERSION ?= 1.24.6 + +# GOLANGCI_LINT_VERSION is the minimum golangci-lint version install-deps-go-tools +# accepts. Older releases can't parse recent Go syntax. +GOLANGCI_LINT_VERSION ?= 1.64.0 + +.PHONY: help all build build-amd64 build-arm64 test lint proto pkg-deb docker docker-push clean \ + install-deps install-deps-apt install-deps-go install-deps-go-tools + +# help is the default target. Keep the list aligned with the .PHONY block above. +help: + @echo "nginx-logtail — make targets (version $(VERSION))" + @echo "" + @echo " build build all four binaries for the native arch into build/$(NATIVE_ARCH)/" + @echo " build-amd64 build all four binaries for linux/amd64 into build/amd64/" + @echo " build-arm64 build all four binaries for linux/arm64 into build/arm64/" + @echo " test run 'go test ./...'" + @echo " lint run 'golangci-lint run ./...'" + @echo " proto regenerate proto/logtailpb/*.pb.go from proto/logtail.proto" + @echo " pkg-deb build amd64 + arm64 .deb packages (requires dpkg-deb)" + @echo " docker buildx --load two tags ($(IMAGE):v$(VERSION), $(IMAGE):latest) for the native arch" + @echo " docker-push buildx multi-arch push (amd64+arm64) to $(IMAGE)" + @echo " clean remove build/ and generated proto files" + @echo "" + @echo " install-deps apt + Go toolchain + Go tools (runs the three sub-targets)" + @echo " install-deps-apt apt-install protobuf-compiler, git, make, dpkg-dev, curl, tar" + @echo " install-deps-go download Go $(GO_VERSION)+ to /usr/local/go if missing" + @echo " install-deps-go-tools go install protoc-gen-go, protoc-gen-go-grpc, golangci-lint" + @echo "" + @echo "Overridable variables:" + @echo " VERSION=$(VERSION) GO_VERSION=$(GO_VERSION) GOLANGCI_LINT_VERSION=$(GOLANGCI_LINT_VERSION)" + +all: build + +build: $(GEN_FILES) + mkdir -p build/$(NATIVE_ARCH) + $(foreach b,$(BINARIES),go build -trimpath -ldflags "$(LDFLAGS)" -o build/$(NATIVE_ARCH)/$(b) ./cmd/$(b) &&) true + +build-amd64: $(GEN_FILES) + mkdir -p build/amd64 + $(foreach b,$(BINARIES),GOOS=linux GOARCH=amd64 go build -trimpath -ldflags "$(LDFLAGS)" -o build/amd64/$(b) ./cmd/$(b) &&) true + +build-arm64: $(GEN_FILES) + mkdir -p build/arm64 + $(foreach b,$(BINARIES),GOOS=linux GOARCH=arm64 go build -trimpath -ldflags "$(LDFLAGS)" -o build/arm64/$(b) ./cmd/$(b) &&) true + +test: $(GEN_FILES) + go test ./... + +lint: + golangci-lint run ./... + +proto: $(GEN_FILES) + +# protoc's go_package option places output at the go_package path (not source-relative). +# We invoke protoc from the repo root so the resulting proto/logtailpb/*.pb.go tree +# lands alongside the .proto file. +$(GEN_FILES): $(PROTO_FILE) + protoc \ + --go_out=. --go_opt=module=$(MODULE) \ + --go-grpc_out=. --go-grpc_opt=module=$(MODULE) \ + $(PROTO_FILE) + +# pkg-deb builds one package per arch; both contain all four static binaries under +# /usr/local/bin/ with the nginx-logtail- prefix. Each build- target is a hard +# prerequisite — the packaging script refuses to run without build// populated. +pkg-deb: build-amd64 build-arm64 + debian/build-deb.sh amd64 $(VERSION) + debian/build-deb.sh arm64 $(VERSION) + +# docker — build one image for the native host arch and --load it into the local +# daemon. Tagged both :v$(VERSION) and :latest in a single build, so bumping +# VERSION is the only change needed to cut a release. +docker: + docker buildx build --load \ + --build-arg VERSION=$(VERSION) \ + --build-arg COMMIT=$(COMMIT_HASH) \ + --build-arg DATE=$(DATE) \ + -t $(IMAGE):v$(VERSION) -t $(IMAGE):latest . + +# docker-push — build a multi-arch (amd64+arm64) manifest and push it. Buildx +# won't --load a multi-platform result, so this is the only path that produces +# the combined manifest. Assumes the caller is already logged in to the registry. +docker-push: + docker buildx build --platform linux/amd64,linux/arm64 --push \ + --build-arg VERSION=$(VERSION) \ + --build-arg COMMIT=$(COMMIT_HASH) \ + --build-arg DATE=$(DATE) \ + -t $(IMAGE):v$(VERSION) -t $(IMAGE):latest . + +clean: + rm -rf build/ + rm -f $(GEN_FILES) + +# install-deps is an opt-in "set up a fresh developer box" target. Tested on +# Debian Trixie; the apt half should also work on Bookworm and recent Ubuntu LTS. +install-deps: install-deps-apt install-deps-go install-deps-go-tools + @echo "" + @echo "==> All build dependencies installed." + @echo " Make sure these are on PATH:" + @echo " /usr/local/go/bin (Go toolchain)" + @echo " \$$(go env GOPATH)/bin (protoc-gen-go, golangci-lint, ...)" + +install-deps-apt: + @set -eu; \ + if [ "$$(id -u)" = 0 ]; then SUDO=""; else SUDO="sudo"; fi; \ + echo "==> Installing apt packages (protoc, git, make, dpkg-dev, curl, tar)"; \ + $$SUDO apt-get update; \ + $$SUDO apt-get install -y --no-install-recommends \ + protobuf-compiler git make dpkg-dev ca-certificates curl tar + +# install-deps-go short-circuits when go env GOVERSION already reports a version +# >= GO_VERSION. Otherwise it downloads the official upstream tarball and extracts +# it to /usr/local/go. +install-deps-go: + @set -eu; \ + if [ "$$(id -u)" = 0 ]; then SUDO=""; else SUDO="sudo"; fi; \ + echo "==> Checking Go toolchain (required: $(GO_VERSION)+)"; \ + if command -v go >/dev/null 2>&1; then \ + CURRENT=$$(go env GOVERSION 2>/dev/null | sed 's/^go//'); \ + OLDEST=$$(printf '%s\n%s\n' "$(GO_VERSION)" "$$CURRENT" | sort -V | head -n1); \ + if [ "$$OLDEST" = "$(GO_VERSION)" ] && [ -n "$$CURRENT" ]; then \ + echo " go$$CURRENT already installed (>= $(GO_VERSION)), skipping."; \ + exit 0; \ + fi; \ + echo " go$$CURRENT is older than $(GO_VERSION), upgrading."; \ + else \ + echo " no Go toolchain on PATH, installing."; \ + fi; \ + DEB_ARCH=$$(dpkg --print-architecture); \ + case "$$DEB_ARCH" in \ + amd64) GOARCH=amd64 ;; \ + arm64) GOARCH=arm64 ;; \ + armhf) GOARCH=armv6l ;; \ + *) echo " unsupported architecture: $$DEB_ARCH" >&2; exit 1 ;; \ + esac; \ + TARBALL="go$(GO_VERSION).linux-$$GOARCH.tar.gz"; \ + URL="https://go.dev/dl/$$TARBALL"; \ + echo " downloading $$URL"; \ + curl -fsSL -o "/tmp/$$TARBALL" "$$URL"; \ + echo " installing to /usr/local/go"; \ + $$SUDO rm -rf /usr/local/go; \ + $$SUDO tar -C /usr/local -xzf "/tmp/$$TARBALL"; \ + rm -f "/tmp/$$TARBALL"; \ + echo " installed $$(/usr/local/go/bin/go version)" + +install-deps-go-tools: + @set -eu; \ + if ! command -v go >/dev/null 2>&1; then \ + export PATH="/usr/local/go/bin:$$PATH"; \ + fi; \ + echo "==> Installing Go tools via 'go install'"; \ + echo " google.golang.org/protobuf/cmd/protoc-gen-go"; \ + go install google.golang.org/protobuf/cmd/protoc-gen-go@latest; \ + echo " google.golang.org/grpc/cmd/protoc-gen-go-grpc"; \ + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest; \ + echo " github.com/golangci/golangci-lint/v2/cmd/golangci-lint"; \ + go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@latest; \ + GOBIN="$$(go env GOBIN)"; \ + if [ -z "$$GOBIN" ]; then GOBIN="$$(go env GOPATH)/bin"; fi; \ + echo "==> Asserting golangci-lint version >= $(GOLANGCI_LINT_VERSION)"; \ + if ! "$$GOBIN/golangci-lint" version >/dev/null 2>&1; then \ + echo " ERROR: $$GOBIN/golangci-lint is not executable" >&2; \ + exit 1; \ + fi; \ + INSTALLED=$$("$$GOBIN/golangci-lint" version 2>&1 | sed -En 's/.*has version v?([0-9][0-9.]*).*/\1/p' | head -n1); \ + if [ -z "$$INSTALLED" ]; then \ + echo " ERROR: could not parse golangci-lint version output" >&2; \ + "$$GOBIN/golangci-lint" version >&2; \ + exit 1; \ + fi; \ + OLDEST=$$(printf '%s\n%s\n' "$(GOLANGCI_LINT_VERSION)" "$$INSTALLED" | sort -V | head -n1); \ + if [ "$$OLDEST" != "$(GOLANGCI_LINT_VERSION)" ]; then \ + echo " ERROR: golangci-lint $$INSTALLED is older than the required $(GOLANGCI_LINT_VERSION)" >&2; \ + exit 1; \ + fi; \ + echo " golangci-lint $$INSTALLED (>= $(GOLANGCI_LINT_VERSION)) OK" diff --git a/README.md b/README.md index 5794daf..05ed054 100644 --- a/README.md +++ b/README.md @@ -13,17 +13,63 @@ You have been warned :) ## What is this? This project consists of four components: -1. A log collector that tails NGINX (or Apache) logs of a certain format, and aggregates - information per website, client address, status, and so on. It buckets these into windows - of 1min, 5min, 15min, 60min, 6hrs and 24hrs. It exposes this on a gRPC endpoint. -1. An aggregator that can scrape any number of collectors into a merged regional (or global) - view. The aggregator exposes the same gRPC endpoint as the collectors. -1. A Frontend that allows to query this data structure very quickly. -1. A CLI that allows to query this data also, returning JSON for further processing. +1. A log **collector** that tails NGINX (or Apache) logs and/or receives logs over UDP from + [`nginx-ipng-stats-plugin`](https://git.ipng.ch/ipng/nginx-ipng-stats-plugin), aggregating + counts per website, client address, URI, status, ASN, and source tag. It buckets these into + windows of 1m, 5m, 15m, 60m, 6h, and 24h and exposes them over gRPC. +1. An **aggregator** that subscribes to any number of collectors and serves a merged view on + the same gRPC surface. +1. An HTTP **frontend** that renders a drilldown dashboard (zero JavaScript, server-side SVG + sparklines) against any collector or the aggregator. +1. A **CLI** for shell queries, returning tables or JSON. -It's written in Go, and is meant to deploy collectors on any number of webservers, and central -aggregation and frontend logic. It's released under [[APACHE](LICENSE)] license. It can be run -either as `systemd` units, or in Docker, or any combination of the two. +Written in Go, released under [[APACHE](LICENSE)]. Runs as `systemd` units, in Docker, or any +combination. + +## Quick Start + +Three deployment flavors. Pick whichever suits the host. + +**Debian package.** Build once, install the `.deb` on every nginx host (for the collector) and +on one central host (for the aggregator + frontend): + +```bash +make install-deps # one-time: apt deps, Go toolchain, go tools +make pkg-deb # produces nginx-logtail__{amd64,arm64}.deb + +# on each nginx host: +sudo dpkg -i nginx-logtail_*_amd64.deb +sudo $EDITOR /etc/default/nginx-logtail # defaults to UDP-only on :9514; set COLLECTOR_LOGS=... to also tail files +sudo systemctl enable --now nginx-logtail-collector.service + +# on the central host: +sudo dpkg -i nginx-logtail_*_amd64.deb +sudo systemctl enable --now nginx-logtail-aggregator.service nginx-logtail-frontend.service +# dashboard now at http://:8080 +``` + +Binaries land at `/usr/sbin/nginx-logtail-{collector,aggregator,frontend}` and the CLI at +`/usr/bin/nginx-logtail`. All three services run as the `_logtail` system user (collector uses +`Group=www-data` for log access). None are auto-enabled, so installing the package is safe on +any host. + +**Docker Compose.** Runs the aggregator and frontend in one stack; point collectors (on each +nginx host) at the aggregator: + +```bash +AGGREGATOR_COLLECTORS=nginx1:9090,nginx2:9090 docker compose up -d +# frontend on :8080, aggregator gRPC on :9091 +``` + +**From source (`make`).** + +```bash +make build # build//{collector,aggregator,frontend,cli} +make test +./build/*/nginx-logtail -version +``` + +`make help` lists every target. See [[User Guide](docs/user-guide.md)] for operator-facing documentation, or [[Design](docs/design.md)] for the normative requirements and architectural rationale. diff --git a/cmd/aggregator/main.go b/cmd/aggregator/main.go index 67ba62e..311b695 100644 --- a/cmd/aggregator/main.go +++ b/cmd/aggregator/main.go @@ -3,6 +3,7 @@ package main import ( "context" "flag" + "fmt" "log" "net" "os" @@ -10,6 +11,7 @@ import ( "strings" "syscall" + "git.ipng.ch/ipng/nginx-logtail/internal/version" pb "git.ipng.ch/ipng/nginx-logtail/proto/logtailpb" "google.golang.org/grpc" ) @@ -18,8 +20,14 @@ func main() { listen := flag.String("listen", envOr("AGGREGATOR_LISTEN", ":9091"), "gRPC listen address (env: AGGREGATOR_LISTEN)") collectors := flag.String("collectors", envOr("AGGREGATOR_COLLECTORS", ""), "comma-separated collector host:port addresses (env: AGGREGATOR_COLLECTORS)") source := flag.String("source", envOr("AGGREGATOR_SOURCE", hostname()), "name for this aggregator in responses (env: AGGREGATOR_SOURCE, default: hostname)") + showVersion := flag.Bool("version", false, "print version and exit") flag.Parse() + if *showVersion { + fmt.Printf("aggregator %s\n", version.String()) + return + } + if *collectors == "" { log.Fatal("aggregator: --collectors / AGGREGATOR_COLLECTORS is required") } diff --git a/cmd/cli/main.go b/cmd/cli/main.go index e123d01..7490450 100644 --- a/cmd/cli/main.go +++ b/cmd/cli/main.go @@ -3,6 +3,8 @@ package main import ( "fmt" "os" + + "git.ipng.ch/ipng/nginx-logtail/internal/version" ) const usage = `logtail-cli — debug shell for nginx-logtail collectors and aggregators @@ -51,6 +53,8 @@ func main() { runTargets(os.Args[2:]) case "-h", "--help", "help": fmt.Print(usage) + case "-version", "--version", "version": + fmt.Printf("logtail-cli %s\n", version.String()) default: fmt.Fprintf(os.Stderr, "unknown subcommand %q\n\n%s", os.Args[1], usage) os.Exit(1) diff --git a/cmd/collector/main.go b/cmd/collector/main.go index a84f3cf..9388fb0 100644 --- a/cmd/collector/main.go +++ b/cmd/collector/main.go @@ -4,6 +4,7 @@ import ( "bufio" "context" "flag" + "fmt" "log" "net" "net/http" @@ -15,6 +16,7 @@ import ( "syscall" "time" + "git.ipng.ch/ipng/nginx-logtail/internal/version" pb "git.ipng.ch/ipng/nginx-logtail/proto/logtailpb" "google.golang.org/grpc" ) @@ -30,8 +32,14 @@ func main() { scanInterval := flag.Duration("scan-interval", envOrDuration("COLLECTOR_SCAN_INTERVAL", 10*time.Second), "how often to rescan glob patterns for new/removed files (env: COLLECTOR_SCAN_INTERVAL)") logtailPort := flag.Int("logtail-port", envOrInt("COLLECTOR_LOGTAIL_PORT", 0), "UDP port to receive nginx ipng_stats_logtail packets, 0 to disable (env: COLLECTOR_LOGTAIL_PORT)") logtailBind := flag.String("logtail-bind", envOr("COLLECTOR_LOGTAIL_BIND", "127.0.0.1"), "UDP bind address for the logtail listener (env: COLLECTOR_LOGTAIL_BIND)") + showVersion := flag.Bool("version", false, "print version and exit") flag.Parse() + if *showVersion { + fmt.Printf("collector %s\n", version.String()) + return + } + patterns := collectPatterns(*logPaths, *logsFile) if len(patterns) == 0 && *logtailPort == 0 { log.Fatal("collector: no inputs configured; use --logs, --logs-file, or --logtail-port") diff --git a/cmd/collector/parser.go b/cmd/collector/parser.go index 3e90f37..59c7dd4 100644 --- a/cmd/collector/parser.go +++ b/cmd/collector/parser.go @@ -63,17 +63,33 @@ func ParseLine(line string, v4bits, v6bits int) (LogRecord, bool) { }, true } -// ParseUDPLine parses a tab-separated logtail log line from the UDP listener: +// ParseUDPLine dispatches on the version prefix emitted by +// nginx-ipng-stats-plugin's ipng_stats_logtail directive. The wire format is +// "v\t", where is version-specific. Unknown or missing +// versions return false so operators can roll out a v2 parser before +// upgrading emitters. +func ParseUDPLine(line string, v4bits, v6bits int) (LogRecord, bool) { + i := strings.IndexByte(line, '\t') + if i < 0 { + return LogRecord{}, false + } + switch line[:i] { + case "v1": + return parseUDPLineV1(line[i+1:], v4bits, v6bits) + default: + return LogRecord{}, false + } +} + +// parseUDPLineV1 parses the v1 payload (12 tab-separated fields): // // $host \t $remote_addr \t $request_method \t $request_uri \t $status \t // $body_bytes_sent \t $request_time \t $is_tor \t $asn \t // $ipng_source_tag \t $server_addr \t $scheme // -// All 12 fields are required. server_addr and scheme are consumed but not -// propagated. Returns false for any malformed packet (wrong field count, -// bad IP). -func ParseUDPLine(line string, v4bits, v6bits int) (LogRecord, bool) { - fields := strings.Split(line, "\t") +// server_addr and scheme are parsed but discarded. +func parseUDPLineV1(payload string, v4bits, v6bits int) (LogRecord, bool) { + fields := strings.Split(payload, "\t") if len(fields) != 12 { return LogRecord{}, false } diff --git a/cmd/collector/parser_test.go b/cmd/collector/parser_test.go index c7e755e..5020cc5 100644 --- a/cmd/collector/parser_test.go +++ b/cmd/collector/parser_test.go @@ -213,9 +213,9 @@ func TestParseLine(t *testing.T) { } func TestParseUDPLine(t *testing.T) { - // host \t remote_addr \t method \t uri \t status \t body_bytes \t req_time \t + // v1 \t host \t remote_addr \t method \t uri \t status \t body_bytes \t req_time \t // is_tor \t asn \t source_tag \t server_addr \t scheme - good := "www.example.com\t1.2.3.4\tGET\t/api/v1/search?q=foo\t200\t1452\t0.043\t0\t12345\tcdn\t10.0.0.1\thttps" + good := "v1\twww.example.com\t1.2.3.4\tGET\t/api/v1/search?q=foo\t200\t1452\t0.043\t0\t12345\tcdn\t10.0.0.1\thttps" tests := []struct { name string @@ -224,7 +224,7 @@ func TestParseUDPLine(t *testing.T) { want LogRecord }{ { - name: "all 12 fields parsed, query stripped, extras dropped", + name: "v1 payload parsed, query stripped, extras dropped", line: good, wantOK: true, want: LogRecord{ @@ -241,8 +241,8 @@ func TestParseUDPLine(t *testing.T) { }, }, { - name: "is_tor=1, tag direct, IPv6", - line: "h\t2001:db8::1\tGET\t/\t200\t0\t0\t1\t65535\tdirect\t::1\thttp", + name: "v1 IPv6 tor=1 direct tag", + line: "v1\th\t2001:db8::1\tGET\t/\t200\t0\t0\t1\t65535\tdirect\t::1\thttp", wantOK: true, want: LogRecord{ Website: "h", @@ -258,18 +258,33 @@ func TestParseUDPLine(t *testing.T) { }, }, { - name: "11 fields rejected", - line: "h\t1.2.3.4\tGET\t/\t200\t0\t0\t0\t0\ttag\t10.0.0.1", + name: "v1 payload with 11 fields rejected", + line: "v1\th\t1.2.3.4\tGET\t/\t200\t0\t0\t0\t0\ttag\t10.0.0.1", wantOK: false, }, { - name: "13 fields rejected", + name: "v1 payload with 13 fields rejected", line: good + "\textra", wantOK: false, }, { - name: "bad IP rejected", - line: "h\tnope\tGET\t/\t200\t0\t0\t0\t0\ttag\t10.0.0.1\thttp", + name: "v1 bad IP rejected", + line: "v1\th\tnope\tGET\t/\t200\t0\t0\t0\t0\ttag\t10.0.0.1\thttp", + wantOK: false, + }, + { + name: "unknown version rejected (future v2)", + line: "v2\twww.example.com\t1.2.3.4\tGET\t/\t200\t0\t0\t0\t0\ttag\t10.0.0.1\thttp", + wantOK: false, + }, + { + name: "missing version prefix rejected (legacy 12-field line)", + line: "www.example.com\t1.2.3.4\tGET\t/\t200\t0\t0\t0\t0\ttag\t10.0.0.1\thttp", + wantOK: false, + }, + { + name: "no tab at all rejected", + line: "v1", wantOK: false, }, } diff --git a/cmd/collector/udp_test.go b/cmd/collector/udp_test.go index 6105ad9..94ccc52 100644 --- a/cmd/collector/udp_test.go +++ b/cmd/collector/udp_test.go @@ -34,7 +34,7 @@ func TestUDPListenerRoundTrip(t *testing.T) { defer conn.Close() // The listener is started asynchronously; retry for up to 1s. - good := "www.example.com\t1.2.3.4\tGET\t/\t200\t42\t0.010\t0\t12345\tdirect\t10.0.0.1\thttps" + good := "v1\twww.example.com\t1.2.3.4\tGET\t/\t200\t42\t0.010\t0\t12345\tdirect\t10.0.0.1\thttps" bad := "not enough\tfields" deadline := time.Now().Add(time.Second) for time.Now().Before(deadline) { diff --git a/cmd/frontend/main.go b/cmd/frontend/main.go index 5dcaa86..aa0c37e 100644 --- a/cmd/frontend/main.go +++ b/cmd/frontend/main.go @@ -4,6 +4,7 @@ import ( "context" "embed" "flag" + "fmt" "html/template" "log" "net/http" @@ -11,6 +12,8 @@ import ( "os/signal" "strconv" "syscall" + + "git.ipng.ch/ipng/nginx-logtail/internal/version" ) //go:embed templates @@ -21,8 +24,14 @@ func main() { target := flag.String("target", envOr("FRONTEND_TARGET", "localhost:9091"), "default gRPC endpoint, aggregator or collector (env: FRONTEND_TARGET)") n := flag.Int("n", envOrInt("FRONTEND_N", 25), "default number of table rows (env: FRONTEND_N)") refresh := flag.Int("refresh", envOrInt("FRONTEND_REFRESH", 30), "meta-refresh interval in seconds, 0 to disable (env: FRONTEND_REFRESH)") + showVersion := flag.Bool("version", false, "print version and exit") flag.Parse() + if *showVersion { + fmt.Printf("frontend %s\n", version.String()) + return + } + funcMap := template.FuncMap{"fmtCount": fmtCount} tmpl := template.Must( template.New("").Funcs(funcMap).ParseFS(templatesFS, "templates/*.html"), diff --git a/debian/build-deb.sh b/debian/build-deb.sh new file mode 100755 index 0000000..a1deea1 --- /dev/null +++ b/debian/build-deb.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +# Build a minimal .deb for nginx-logtail containing the four static binaries. +# Expects `make build-` to have already populated build//. +# +# Usage: debian/build-deb.sh +# arch: amd64 | arm64 +# version: e.g. 0.9.1 + +set -euo pipefail + +if [ "$#" -ne 2 ]; then + echo "usage: $0 " >&2 + exit 1 +fi + +ARCH="$1" +VERSION="$2" +PKG="nginx-logtail" +STAGE="$(mktemp -d)" +chmod 0755 "$STAGE" + +# Output into build/ alongside the per-arch binary trees so `make clean` +# wipes everything in one rm and .gitignore only needs to ignore build/. +OUT_DIR="build" +mkdir -p "${OUT_DIR}" +OUT="${OUT_DIR}/${PKG}_${VERSION}_${ARCH}.deb" + +trap 'rm -rf "$STAGE"' EXIT + +BUILD_DIR="build/${ARCH}" +for b in collector aggregator frontend cli; do + if [ ! -x "${BUILD_DIR}/${b}" ]; then + echo "error: ${BUILD_DIR}/${b} not found — run 'make build-${ARCH}' first" >&2 + exit 1 + fi +done + +install -d -m 0755 \ + "${STAGE}/DEBIAN" \ + "${STAGE}/usr/sbin" \ + "${STAGE}/usr/bin" \ + "${STAGE}/usr/share/doc/${PKG}" \ + "${STAGE}/usr/share/man/man8" \ + "${STAGE}/usr/share/${PKG}" \ + "${STAGE}/lib/systemd/system" + +install -m 0755 "${BUILD_DIR}/collector" "${STAGE}/usr/sbin/nginx-logtail-collector" +install -m 0755 "${BUILD_DIR}/aggregator" "${STAGE}/usr/sbin/nginx-logtail-aggregator" +install -m 0755 "${BUILD_DIR}/frontend" "${STAGE}/usr/sbin/nginx-logtail-frontend" +install -m 0755 "${BUILD_DIR}/cli" "${STAGE}/usr/bin/nginx-logtail" + +install -m 0644 LICENSE "${STAGE}/usr/share/doc/${PKG}/copyright" +install -m 0644 README.md "${STAGE}/usr/share/doc/${PKG}/README.md" + +# Manpage: gzip per Debian policy (lintian only checks for .gz). +gzip -n -9 -c debian/nginx-logtail.8 > "${STAGE}/usr/share/man/man8/nginx-logtail.8.gz" +chmod 0644 "${STAGE}/usr/share/man/man8/nginx-logtail.8.gz" + +# systemd units. Installed, not enabled or started — operator opts in. +install -m 0644 debian/nginx-logtail-collector.service "${STAGE}/lib/systemd/system/" +install -m 0644 debian/nginx-logtail-aggregator.service "${STAGE}/lib/systemd/system/" +install -m 0644 debian/nginx-logtail-frontend.service "${STAGE}/lib/systemd/system/" + +# Defaults template. postinst renders this to /etc/default/nginx-logtail on +# first install with %HOSTNAME% substituted. Not a dpkg conffile — operator +# edits survive upgrades because postinst only writes when the file is absent. +install -m 0644 debian/default.template "${STAGE}/usr/share/${PKG}/default.template" + +# Maintainer scripts: postinst creates _logtail user and renders defaults; +# prerm stops running services; postrm reloads systemd and removes the +# generated defaults file on purge. +install -m 0755 debian/postinst "${STAGE}/DEBIAN/postinst" +install -m 0755 debian/postrm "${STAGE}/DEBIAN/postrm" +install -m 0755 debian/prerm "${STAGE}/DEBIAN/prerm" + +cat > "${STAGE}/DEBIAN/control" < +Homepage: https://git.ipng.ch/ipng/nginx-logtail +Description: Real-time top-K traffic analysis for nginx clusters + nginx-logtail is a four-binary Go system that ingests nginx access + logs (from files or UDP) and answers ranked top-K queries over + configurable time windows. See /usr/share/doc/nginx-logtail/README.md. +EOF + +dpkg-deb --build --root-owner-group "${STAGE}" "${OUT}" +echo "built ${OUT}" diff --git a/debian/default.template b/debian/default.template new file mode 100644 index 0000000..76b8ab7 --- /dev/null +++ b/debian/default.template @@ -0,0 +1,97 @@ +# /etc/default/nginx-logtail +# +# Shared configuration for the nginx-logtail collector, aggregator, and +# frontend systemd units. Every flag that every binary accepts has a +# matching environment variable (COLLECTOR_*, AGGREGATOR_*, FRONTEND_*); +# the units start their binary with no explicit arguments beyond the +# optional *_ARGS escape hatch, so everything is driven from here. +# +# This file is generated by nginx-logtail's postinst on first install +# (hostname substituted) and is NOT a dpkg conffile. Operator edits are +# preserved across upgrades. `dpkg --purge nginx-logtail` removes it. + +# ========================================================================== +# Collector (nginx-logtail-collector.service) +# ========================================================================== + +# gRPC listen address for TopN/Trend queries and the aggregator's +# StreamSnapshots subscription. +COLLECTOR_LISTEN=:9090 + +# Prometheus /metrics listen address. Set to "" to disable the endpoint. +COLLECTOR_PROM_LISTEN=:9100 + +# Comma-separated log file paths or glob patterns to tail. At least one of +# COLLECTOR_LOGS, COLLECTOR_LOGS_FILE, or COLLECTOR_LOGTAIL_PORT must be set, +# otherwise the collector refuses to start. Leave empty to run UDP-only (no +# file tailer goroutine is started when no patterns are supplied). +COLLECTOR_LOGS= + +# Alternative to COLLECTOR_LOGS: a file listing one path/glob per line. +# Lines starting with # are ignored. +COLLECTOR_LOGS_FILE= + +# Name for this collector in query responses, ListTargets, and snapshot +# streams. Defaults to the short hostname at install time. +COLLECTOR_SOURCE=%HOSTNAME% + +# IPv4 prefix length for client address bucketing (CIDR). /24 groups a +# class-C worth of clients into one key. +COLLECTOR_V4PREFIX=24 + +# IPv6 prefix length. /48 matches the typical residential allocation. +COLLECTOR_V6PREFIX=48 + +# How often to rescan COLLECTOR_LOGS globs for new/removed files. +COLLECTOR_SCAN_INTERVAL=10s + +# UDP port that receives ipng_stats_logtail datagrams from the companion +# nginx-ipng-stats-plugin. Set to 0 to disable the UDP listener entirely. +COLLECTOR_LOGTAIL_PORT=9514 + +# UDP bind address. Keep as 127.0.0.1 unless the plugin emits from a +# different host; the listener has no authentication. +COLLECTOR_LOGTAIL_BIND=127.0.0.1 + +# Extra arguments appended to the collector argv after the env-var-derived +# flags. Useful for flags without an env-var form, or temporary overrides. +COLLECTOR_ARGS= + +# ========================================================================== +# Aggregator (nginx-logtail-aggregator.service) +# ========================================================================== + +# gRPC listen address. Frontend and CLI point their --target at this. +AGGREGATOR_LISTEN=:9091 + +# Comma-separated host:port addresses of every collector this aggregator +# should subscribe to. Mandatory — aggregator refuses to start empty. +AGGREGATOR_COLLECTORS=localhost:9090 + +# Display name for this aggregator in query responses. +AGGREGATOR_SOURCE=%HOSTNAME% + +# Extra arguments appended to the aggregator argv. +AGGREGATOR_ARGS= + +# ========================================================================== +# Frontend (nginx-logtail-frontend.service) +# ========================================================================== + +# HTTP listen address for the dashboard. +FRONTEND_LISTEN=:8080 + +# Default gRPC endpoint the dashboard queries. The aggregator by default; +# override with ?target=host:port per request, or change here to point +# directly at a collector. +FRONTEND_TARGET=localhost:9091 + +# Default number of table rows shown per view. Dashboard users can +# override with ?n=N on individual URLs. +FRONTEND_N=25 + +# Meta-refresh interval (seconds). Set 0 to disable auto-refresh. +FRONTEND_REFRESH=30 + +# Extra arguments appended to the frontend argv. +FRONTEND_ARGS= diff --git a/debian/nginx-logtail-aggregator.service b/debian/nginx-logtail-aggregator.service new file mode 100644 index 0000000..16f8d1e --- /dev/null +++ b/debian/nginx-logtail-aggregator.service @@ -0,0 +1,23 @@ +[Unit] +Description=nginx-logtail aggregator +Documentation=man:nginx-logtail(8) +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User=_logtail +Group=_logtail +EnvironmentFile=-/etc/default/nginx-logtail +ExecStart=/usr/sbin/nginx-logtail-aggregator $AGGREGATOR_ARGS +Restart=on-failure +RestartSec=5 + +# Aggregator needs no filesystem access beyond its binary. +ProtectSystem=strict +ProtectHome=yes +PrivateTmp=yes +NoNewPrivileges=yes + +[Install] +WantedBy=multi-user.target diff --git a/debian/nginx-logtail-collector.service b/debian/nginx-logtail-collector.service new file mode 100644 index 0000000..0168332 --- /dev/null +++ b/debian/nginx-logtail-collector.service @@ -0,0 +1,26 @@ +[Unit] +Description=nginx-logtail collector +Documentation=man:nginx-logtail(8) +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +# Group=www-data lets the collector read nginx access logs that are group-readable +# by www-data. Override with a drop-in if your nginx uses a different group. +User=_logtail +Group=www-data +EnvironmentFile=-/etc/default/nginx-logtail +ExecStart=/usr/sbin/nginx-logtail-collector $COLLECTOR_ARGS +Restart=on-failure +RestartSec=5 + +# Basic hardening — override with a drop-in if your deployment needs more. +ProtectSystem=strict +ProtectHome=yes +PrivateTmp=yes +NoNewPrivileges=yes +ReadOnlyPaths=/var/log + +[Install] +WantedBy=multi-user.target diff --git a/debian/nginx-logtail-frontend.service b/debian/nginx-logtail-frontend.service new file mode 100644 index 0000000..cd1f4e9 --- /dev/null +++ b/debian/nginx-logtail-frontend.service @@ -0,0 +1,22 @@ +[Unit] +Description=nginx-logtail frontend +Documentation=man:nginx-logtail(8) +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User=_logtail +Group=_logtail +EnvironmentFile=-/etc/default/nginx-logtail +ExecStart=/usr/sbin/nginx-logtail-frontend $FRONTEND_ARGS +Restart=on-failure +RestartSec=5 + +ProtectSystem=strict +ProtectHome=yes +PrivateTmp=yes +NoNewPrivileges=yes + +[Install] +WantedBy=multi-user.target diff --git a/debian/nginx-logtail.8 b/debian/nginx-logtail.8 new file mode 100644 index 0000000..6fd26b7 --- /dev/null +++ b/debian/nginx-logtail.8 @@ -0,0 +1,240 @@ +.TH NGINX-LOGTAIL 8 "April 2026" "nginx-logtail 0.9.1" "System Manager's Manual" +.SH NAME +nginx-logtail \- real-time top-K traffic analysis for nginx clusters +.SH SYNOPSIS +.B nginx-logtail-collector +.RI [ options ] +.br +.B nginx-logtail-aggregator +.RI [ options ] +.br +.B nginx-logtail-frontend +.RI [ options ] +.br +.B nginx-logtail +.IR subcommand +.RI [ options ] +.SH DESCRIPTION +.PP +.B nginx-logtail +is a four-binary Go system for real-time analysis of nginx traffic across a +fleet of hosts. Each nginx host runs a +.B collector +that ingests logs from files (via +.BR fsnotify ), +from a UDP socket (fed by the +.B nginx-ipng-stats-plugin +\fBipng_stats_logtail\fR directive), or both. The collector maintains +in-memory ranked top-K counters over 1m/5m/15m/60m/6h/24h windows and +exposes them via gRPC on +.IR :9090 . +A central +.B aggregator +subscribes to every collector, merges their snapshot streams, and serves +the same gRPC contract on +.IR :9091 . +The +.B frontend +renders a server-side HTML dashboard (no JavaScript) on +.I :8080 +against any +.I LogtailService +endpoint. The CLI, +.BR nginx-logtail , +offers the same queries as a shell companion. +.PP +Operators typically run the collector on every nginx host as a systemd +unit, the aggregator and frontend on a central host (either as systemd +units or via the shipped +.B docker-compose.yml +), and invoke +.B nginx-logtail +from an operator laptop. +.PP +The Debian package installs three systemd units — +.BR nginx-logtail-collector.service , +.BR nginx-logtail-aggregator.service , +.BR nginx-logtail-frontend.service +— under +.IR /lib/systemd/system/ . +None are enabled or started on install: the operator opts into each +service per-host with +.BR "systemctl enable --now" . +Services run as the system user +.B _logtail +(created by +.BR postinst ). +The collector uses +.B _logtail:www-data +so it can read nginx access logs that are group-readable by +.BR www-data ; +the aggregator and frontend use +.BR _logtail:_logtail . +All three units read a single environment file, +.IR /etc/default/nginx-logtail , +generated by the package's postinst on first install (with the short +hostname substituted for +.B COLLECTOR_SOURCE +and +.BR AGGREGATOR_SOURCE ). +The file is not a dpkg conffile: the template lives at +.IR /usr/share/nginx-logtail/default.template , +the operator's edits to +.I /etc/default/nginx-logtail +survive upgrades, and +.B dpkg --purge +removes it. +Every flag of every binary has a matching +.BR COLLECTOR_* , +.BR AGGREGATOR_* , +or +.BR FRONTEND_ * +env var; set them in the defaults file. For flags without an env-var form, +or temporary overrides, append to +.BR COLLECTOR_ARGS , +.BR AGGREGATOR_ARGS , +or +.BR FRONTEND_ARGS . +.SH COMPONENTS +.TP +.B nginx-logtail-collector +Installed in +.IR /usr/sbin . +Tails nginx access logs and/or receives UDP datagrams on +.B --logtail-port +(default disabled). Exposes +.I LogtailService +gRPC on +.B --listen +(default +.IR :9090 ) +and Prometheus metrics on +.B --prom-listen +(default +.IR :9100 ). +Pass +.B --version +to print build metadata. +.TP +.B nginx-logtail-aggregator +Installed in +.IR /usr/sbin . +Subscribes to each address in +.B --collectors +and merges their streams. Serves +.I LogtailService +on +.B --listen +(default +.IR :9091 ). +On restart, backfills its ring buffers from every collector via +.IR DumpSnapshots . +.TP +.B nginx-logtail-frontend +Installed in +.IR /usr/sbin . +HTTP dashboard on +.B --listen +(default +.IR :8080 ) +against +.B --target +(default +.IR localhost:9091 , +the aggregator). URL-driven filter state; append +.I &raw=1 +to any dashboard URL for JSON output. +.TP +.B nginx-logtail +Installed in +.IR /usr/bin . +CLI for +.BR topn , +.BR trend , +.BR stream , +and +.B targets +queries. Accepts +.BI \-\-target " host:port[,host:port...]" +for concurrent fan-out. +.SH FILES +.TP +.I /usr/sbin/nginx-logtail-collector +Collector daemon binary. +.TP +.I /usr/sbin/nginx-logtail-aggregator +Aggregator daemon binary. +.TP +.I /usr/sbin/nginx-logtail-frontend +Frontend HTTP server binary. +.TP +.I /usr/bin/nginx-logtail +CLI binary. +.TP +.I /usr/share/doc/nginx-logtail/ +README, copyright, and pointer to the design and user-guide documents. +.SH EXAMPLES +.PP +Run a collector reading one log file and listening on UDP 9514: +.PP +.RS +.nf +nginx-logtail-collector \\ + --logs /var/log/nginx/access.log \\ + --logtail-port 9514 \\ + --source $(hostname) +.fi +.RE +.PP +Query the top 10 websites over the last 5 minutes: +.PP +.RS +.nf +nginx-logtail topn --target agg:9091 --window 5m --n 10 +.fi +.RE +.PP +Show all HTTP 429s by client prefix over the last minute: +.PP +.RS +.nf +nginx-logtail topn --target agg:9091 --window 1m \\ + --group-by prefix --status 429 +.fi +.RE +.SH ENVIRONMENT +All three daemons read +.IR /etc/default/nginx-logtail . +The file is self-documenting — every env var each binary recognises is +listed with a short description and its default value. Representative +variables: +.IP \fBCOLLECTOR_LOGS\fR +Comma-separated log file paths or globs. +.IP \fBCOLLECTOR_LOGTAIL_PORT\fR +UDP port for +.I ipng_stats_logtail +input; 0 disables the listener. +.IP \fBAGGREGATOR_COLLECTORS\fR +Comma-separated collector addresses. Mandatory. +.IP \fBFRONTEND_TARGET\fR +gRPC endpoint the frontend queries (aggregator or collector). +.IP \fBCOLLECTOR_ARGS\fR, \fBAGGREGATOR_ARGS\fR, \fBFRONTEND_ARGS\fR +Raw argv appended after the env-var-derived flags; use for flags +without an env-var form or for temporary overrides. +.SH SECURITY +gRPC endpoints are cleartext HTTP/2 by default. The UDP listener binds to +.I 127.0.0.1 +unless +.B --logtail-bind +is set explicitly. Expose beyond a trusted network only behind a TLS +terminator. +.SH SEE ALSO +.BR nginx (8), +.BR systemd (1). +.PP +Full design and operator guide: +.IR /usr/share/doc/nginx-logtail/README.md . +.SH AUTHORS +Pim van Pelt , with Claude Code. +.SH BUGS +Report issues at https://git.ipng.ch/ipng/nginx-logtail. diff --git a/debian/postinst b/debian/postinst new file mode 100755 index 0000000..96cd964 --- /dev/null +++ b/debian/postinst @@ -0,0 +1,50 @@ +#!/bin/sh +# Runs after the package is unpacked. We: +# 1. create the system user/group _logtail (idempotent); +# 2. on first install, render /etc/default/nginx-logtail from the template; +# 3. reload systemd. +# +# We deliberately do NOT enable or start the units — some hosts run only the +# collector, some only the aggregator, some run both with the frontend, some +# run neither. The operator is expected to run: +# +# systemctl enable --now nginx-logtail-collector.service +# systemctl enable --now nginx-logtail-aggregator.service +# systemctl enable --now nginx-logtail-frontend.service +# +# on the hosts that should run each service. +set -e + +TEMPLATE=/usr/share/nginx-logtail/default.template +TARGET=/etc/default/nginx-logtail + +if [ "$1" = configure ]; then + if ! getent group _logtail >/dev/null; then + addgroup --system _logtail + fi + if ! getent passwd _logtail >/dev/null; then + adduser --system --ingroup _logtail \ + --no-create-home --home /nonexistent \ + --shell /usr/sbin/nologin \ + --gecos "nginx-logtail" \ + _logtail + fi + + # First install: $2 is empty. Render the template with the current + # short hostname, but never clobber an existing file (in case the + # operator dropped one in manually before installing). + if [ -z "$2" ] && [ ! -e "$TARGET" ]; then + HOSTNAME_SHORT="$(hostname -s 2>/dev/null || hostname 2>/dev/null || echo localhost)" + # Use a delimiter unlikely to appear in hostnames. + sed "s|%HOSTNAME%|${HOSTNAME_SHORT}|g" "$TEMPLATE" > "$TARGET" + chmod 0644 "$TARGET" + chown root:root "$TARGET" + fi + + if [ -d /run/systemd/system ]; then + systemctl daemon-reload || true + fi +fi + +#DEBHELPER# +exit 0 diff --git a/debian/postrm b/debian/postrm new file mode 100755 index 0000000..7639da3 --- /dev/null +++ b/debian/postrm @@ -0,0 +1,23 @@ +#!/bin/sh +# Runs after the package is removed or purged. Drop systemd's view of the +# units so they disappear from `systemctl list-unit-files`. On purge, also +# remove the generated /etc/default/nginx-logtail (we don't ship it as a +# conffile; postinst renders it from a template on first install). +set -e + +case "$1" in + purge) + rm -f /etc/default/nginx-logtail + if [ -d /run/systemd/system ]; then + systemctl daemon-reload || true + fi + ;; + remove) + if [ -d /run/systemd/system ]; then + systemctl daemon-reload || true + fi + ;; +esac + +#DEBHELPER# +exit 0 diff --git a/debian/prerm b/debian/prerm new file mode 100755 index 0000000..561c30a --- /dev/null +++ b/debian/prerm @@ -0,0 +1,17 @@ +#!/bin/sh +# Runs before the package is removed. Stop any running instances cleanly so +# the files we're about to delete aren't held open. +set -e + +case "$1" in + remove|upgrade|deconfigure) + if [ -d /run/systemd/system ]; then + for unit in nginx-logtail-collector.service nginx-logtail-aggregator.service nginx-logtail-frontend.service; do + systemctl stop "$unit" 2>/dev/null || true + done + fi + ;; +esac + +#DEBHELPER# +exit 0 diff --git a/docs/design.md b/docs/design.md index 9f93617..e63db0b 100644 --- a/docs/design.md +++ b/docs/design.md @@ -127,15 +127,18 @@ Each requirement carries a unique identifier (`FR-X.Y` or `NFR-X.Y`) so that lat | 8 | `$is_tor` | `is_tor` (optional) | | 9 | `$asn` | `asn` (optional) | -- **FR-2.2 UDP format.** The collector MUST accept datagrams in the following tab-separated layout, as emitted by - `nginx-ipng-stats-plugin`'s `ipng_stats_logtail` directive: +- **FR-2.2 UDP format.** The collector MUST accept datagrams in a versioned tab-separated layout, as emitted by + `nginx-ipng-stats-plugin`'s `ipng_stats_logtail` directive. Every datagram MUST begin with a literal version tag + (`v\t`) so the collector can route each packet to the appropriate parser. Only `v1` is defined in this revision; + unknown versions MUST be counted as parse failures and dropped. ```nginx - log_format ipng_stats_logtail '$host\t$remote_addr\t$request_method\t$request_uri\t$status\t$body_bytes_sent\t$request_time\t$is_tor\t$asn\t$ipng_source_tag\t$server_addr\t$scheme'; + log_format ipng_stats_logtail 'v1\t$host\t$remote_addr\t$request_method\t$request_uri\t$status\t$body_bytes_sent\t$request_time\t$is_tor\t$asn\t$ipng_source_tag\t$server_addr\t$scheme'; ``` - Exactly 12 tab-separated fields are required. `$server_addr` and `$scheme` MUST be parsed but dropped; they are reserved for - future use. Malformed datagrams MUST be counted (FR-8.5) and silently dropped. + The v1 payload MUST have exactly 12 tab-separated fields after the `v1` tag (13 fields total). `$server_addr` and + `$scheme` MUST be parsed but dropped; they are reserved for future use. Malformed datagrams (wrong version, wrong + field count, bad IP) MUST be counted (FR-8.5) and silently dropped. - **FR-2.3** The file tailer MUST set `source_tag="direct"` on every record it parses. The UDP listener MUST propagate `$ipng_source_tag` verbatim. This is the only difference in downstream processing between the two ingest paths. @@ -556,7 +559,8 @@ transitions. No per-request logging. - **UDP datagram loss.** Any datagram dropped in-kernel (socket buffer full, network drop) does not register as a parse failure; it is simply invisible. Operators should size `SO_RCVBUF` appropriately; the collector already requests 4 MiB. - **Malformed log lines.** File format: lines with <8 tab-separated fields are silently skipped; an invalid IP also drops the line. - UDP: packets without exactly 12 fields are counted as received-but-not-success and dropped. + UDP: packets without a recognised `v\t` prefix, or with the wrong field count for the claimed version, or with a bad IP, are + counted as received-but-not-success and dropped. - **Clock skew between collectors.** Trend sparklines derived from merged data assume collectors are roughly NTP-synced. Per-bucket alignment is to the local minute / 5-minute boundary of each collector. - **gRPC traffic over untrusted links.** The system does not ship TLS; operators should front the gRPC ports with a TLS-terminating diff --git a/docs/user-guide.md b/docs/user-guide.md index 83b8d57..7d40e84 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -14,16 +14,126 @@ Components: | Binary | Runs on | Role | |---------------|------------------|----------------------------------------------------| -| `collector` | each nginx host | Tails log files, aggregates in memory, serves gRPC | +| `collector` | each nginx host | Tails log files and/or UDP datagrams, aggregates in memory, serves gRPC | | `aggregator` | central host | Merges all collectors, serves unified gRPC | | `frontend` | central host | HTTP dashboard with drilldown UI | | `cli` | operator laptop | Shell queries against collector or aggregator | +Every binary accepts `-version` (or `nginx-logtail version` for the CLI) and prints its version, +git commit, and build date. + --- -## nginx Configuration +## Installation -Add the `logtail` log format to your `nginx.conf` and apply it to each `server` block: +Three flavors. `make help` lists every target; `make install-deps` sets up a fresh build box +(apt deps, Go toolchain, `protoc-gen-go`, `golangci-lint`). + +### Debian package + +```bash +make pkg-deb # produces nginx-logtail__{amd64,arm64}.deb +sudo dpkg -i nginx-logtail_*_amd64.deb +``` + +The package installs: + +| Path | Contents | +|---------------------------------------------------------------|---------------------------------------------------| +| `/usr/sbin/nginx-logtail-{collector,aggregator,frontend}` | Service binaries | +| `/usr/bin/nginx-logtail` | CLI | +| `/lib/systemd/system/nginx-logtail-*.service` | Three systemd units | +| `/usr/share/man/man8/nginx-logtail.8.gz` | Manpage (`man 8 nginx-logtail`) | +| `/usr/share/nginx-logtail/default.template` | Defaults template | +| `/etc/default/nginx-logtail` | **Generated on first install** from the template | + +The postinst creates a system user/group `_logtail` if absent and renders the template into +`/etc/default/nginx-logtail` with the short hostname substituted. **None of the services are +enabled or started automatically** — installing the package is safe on any host. Operators +opt in per service: + +```bash +sudo systemctl enable --now nginx-logtail-collector.service # on each nginx host +sudo systemctl enable --now nginx-logtail-aggregator.service # on the central host +sudo systemctl enable --now nginx-logtail-frontend.service # on the central host +``` + +The collector runs as `_logtail:www-data` so it can read nginx access logs that are +group-readable by `www-data`; aggregator and frontend run as `_logtail:_logtail`. + +### Docker / Docker Compose + +The repo's `docker-compose.yml` runs the aggregator and frontend together from a single image +that contains all four binaries. + +```bash +make docker # builds git.ipng.ch/ipng/nginx-logtail:v + :latest, native arch +make docker-push # multi-arch (amd64+arm64) buildx push + +AGGREGATOR_COLLECTORS=nginx1:9090,nginx2:9090 docker compose up -d +# frontend on :8080, aggregator gRPC on :9091 +``` + +Each container explicitly selects its binary via `command: ["/usr/local/bin/"]`. + +### From source + +```bash +git clone https://git.ipng.ch/ipng/nginx-logtail +cd nginx-logtail +make build # -> build//{collector,aggregator,frontend,cli} +make test +./build/*/cli version +``` + +Requires Go ≥ 1.24 (see `go.mod`). No CGO, no external runtime dependencies. + +--- + +## Configuration + +### /etc/default/nginx-logtail + +The Debian package ships one shared environment file read by all three systemd units via +`EnvironmentFile=-/etc/default/nginx-logtail`. It enumerates every flag the three daemons +accept as a `COLLECTOR_*`, `AGGREGATOR_*`, or `FRONTEND_*` env var. Defaults on first install +are sensible for a single-host deployment: + +| Variable | First-install default | Purpose | +|----------------------------|------------------------------|---------------------------------------------------| +| `COLLECTOR_LISTEN` | `:9090` | gRPC listen address | +| `COLLECTOR_PROM_LISTEN` | `:9100` | Prometheus metrics; set `""` to disable | +| `COLLECTOR_LOGS` | *(empty — UDP-only)* | Comma-sep log paths/globs | +| `COLLECTOR_LOGS_FILE` | *(empty)* | File with one path/glob per line | +| `COLLECTOR_SOURCE` | `$(hostname -s)` at install | Display name in query responses | +| `COLLECTOR_V4PREFIX` | `24` | IPv4 bucket prefix | +| `COLLECTOR_V6PREFIX` | `48` | IPv6 bucket prefix | +| `COLLECTOR_SCAN_INTERVAL` | `10s` | Log-glob rescan cadence | +| `COLLECTOR_LOGTAIL_PORT` | `9514` | UDP port for `ipng_stats_logtail` (0 disables) | +| `COLLECTOR_LOGTAIL_BIND` | `127.0.0.1` | UDP bind address | +| `AGGREGATOR_LISTEN` | `:9091` | gRPC listen address | +| `AGGREGATOR_COLLECTORS` | `localhost:9090` | Comma-sep collectors (mandatory) | +| `AGGREGATOR_SOURCE` | `$(hostname -s)` at install | Display name | +| `FRONTEND_LISTEN` | `:8080` | HTTP dashboard address | +| `FRONTEND_TARGET` | `localhost:9091` | Default gRPC endpoint | +| `FRONTEND_N` | `25` | Default table row count | +| `FRONTEND_REFRESH` | `30` | Meta-refresh seconds; `0` disables | + +At least one of `COLLECTOR_LOGS`, `COLLECTOR_LOGS_FILE`, or `COLLECTOR_LOGTAIL_PORT > 0` must +be set, otherwise the collector refuses to start. The shipped default (`COLLECTOR_LOGS=` empty +plus `COLLECTOR_LOGTAIL_PORT=9514`) makes the collector UDP-only — no file tailer goroutine +is launched when no log patterns are supplied. + +Three escape-hatch variables — `COLLECTOR_ARGS`, `AGGREGATOR_ARGS`, `FRONTEND_ARGS` — are +appended verbatim to each unit's `ExecStart` argv. Use them for flags without an env-var form, +or for temporary overrides, without editing the unit. + +The file is **not a dpkg conffile**: postinst writes it only when absent, so operator edits +survive upgrades, and `dpkg --purge` removes it. + +### nginx — file-based ingest + +Add the `logtail` format and attach it to whichever `server` blocks you want tracked: ```nginx http { @@ -37,64 +147,128 @@ http { } ``` -The format is tab-separated with fixed field positions. Query strings are stripped from the URI -by the collector at ingest time — only the path is tracked. +Tab-separated, fixed field order, ten fields. The precise layout: -`$is_tor` must be set to `1` when the client IP is a TOR exit node and `0` otherwise (typically -populated by a custom nginx variable or a Lua script that checks the IP against a TOR exit list). -The field is optional for backward compatibility — log lines without it are accepted and treated -as `is_tor=0`. +| # | Field | Ingested into | +|---|-------------------|--------------------------| +| 0 | `$host` | `website` | +| 1 | `$remote_addr` | `client_prefix` (truncated) | +| 2 | `$msec` | *(discarded)* | +| 3 | `$request_method` | Prom `method` label | +| 4 | `$request_uri` | `http_request_uri` (query stripped) | +| 5 | `$status` | `http_response` | +| 6 | `$body_bytes_sent`| Prom body histogram | +| 7 | `$request_time` | Prom duration histogram | +| 8 | `$is_tor` | `is_tor` (optional) | +| 9 | `$asn` | `asn` (optional) | -`$asn` must be set to the client's AS number as a decimal integer (e.g. from MaxMind GeoIP2's -`$geoip2_data_autonomous_system_number`). The field is optional — log lines without it default -to `asn=0`. +`$is_tor` is `1` if the client IP is a TOR exit node and `0` otherwise (typically populated +via a Lua script or `$geoip2_data_*`). `$asn` is the client AS number as a decimal integer +(e.g. MaxMind GeoIP2's `$geoip2_data_autonomous_system_number`). ---- +**If either is unknown, emit `0`.** A literal `0` in `$is_tor` parses as `false`; a literal +`0` in `$asn` parses as ASN `0`, which you can exclude at query time with `--asn '!=0'` / the +`asn!=0` filter expression. Operators who don't have TOR or GeoIP data can simply emit `0` for +both columns and everything works. -## Building +Both fields are also **positionally optional** for backward compatibility — older 8-field +lines are accepted and default to `false` / `0`. Records from the file tailer are always +tagged `source_tag="direct"`. -```bash -git clone https://git.ipng.ch/ipng/nginx-logtail -cd nginx-logtail -go build ./cmd/collector/ -go build ./cmd/aggregator/ -go build ./cmd/frontend/ -go build ./cmd/cli/ +Then point the collector at the log files via `COLLECTOR_LOGS` — comma-separated paths or +glob patterns. Make sure the files are group-readable by `www-data` (the collector's primary +group in the systemd unit). + +### nginx — UDP ingest (`nginx-ipng-stats-plugin`) + +If the nginx host runs [`nginx-ipng-stats-plugin`](https://git.ipng.ch/ipng/nginx-ipng-stats-plugin), +the plugin's `ipng_stats_logtail` directive emits one UDP datagram per request directly to +the collector, no log file involved. The wire format is **versioned** — every datagram starts +with a literal `v1\t` prefix so the collector can ship new parser versions (v2, v3, …) before +emitters are upgraded and route each packet accordingly. + +```nginx +http { + log_format ipng_stats_logtail + 'v1\t$host\t$remote_addr\t$request_method\t$request_uri\t$status\t$body_bytes_sent\t$request_time\t$is_tor\t$asn\t$ipng_source_tag\t$server_addr\t$scheme'; + + ipng_stats_logtail ipng_stats_logtail udp://127.0.0.1:9514 buffer=64k flush=1s; +} ``` -Requires Go 1.21+. No CGO, no external runtime dependencies. +Precise v1 layout — 13 tab-separated fields total (version prefix + 12 payload fields): + +| # | Field | Ingested into | +|---|-------------------|------------------------------| +| 0 | `v1` | version tag | +| 1 | `$host` | `website` | +| 2 | `$remote_addr` | `client_prefix` (truncated) | +| 3 | `$request_method` | Prom `method` label | +| 4 | `$request_uri` | `http_request_uri` (query stripped) | +| 5 | `$status` | `http_response` | +| 6 | `$body_bytes_sent`| Prom body histogram | +| 7 | `$request_time` | Prom duration histogram | +| 8 | `$is_tor` | `is_tor` | +| 9 | `$asn` | `asn` | +| 10| `$ipng_source_tag`| `source_tag` | +| 11| `$server_addr` | *(parsed and discarded)* | +| 12| `$scheme` | *(parsed and discarded)* | + +Compared to the file format: the version tag is added, `$msec` is dropped, and three fields +are appended — `$ipng_source_tag` (propagated into the data model), `$server_addr` and +`$scheme` (reserved for future use). + +**Unknown `$is_tor` / `$asn`: emit `0`.** Same convention as the file format — operators +without TOR or GeoIP data can emit `0` for both columns and everything works. A literal `0` +in `$is_tor` is `false`; a literal `0` in `$asn` is ASN `0`, filterable at query time. + +All 13 fields are required for v1 — malformed packets (wrong version, wrong field count, bad +IP) are silently dropped and counted via `logtail_udp_packets_received_total` minus +`logtail_udp_loglines_success_total`. Both paths (file + UDP) can feed the same collector +simultaneously; they converge on the same aggregation pipeline. --- ## Collector -Runs on each nginx machine. Tails log files, maintains in-memory top-K counters across six time +Runs on each nginx machine. Ingests logs from files (via `fsnotify`) and/or UDP datagrams +(from `nginx-ipng-stats-plugin`), maintains in-memory top-K counters across six time windows, and exposes a gRPC interface for the aggregator (and directly for the CLI). ### Flags -| Flag | Default | Description | -|-------------------|--------------|-----------------------------------------------------------| -| `--listen` | `:9090` | gRPC listen address | -| `--prom-listen` | `:9100` | Prometheus metrics address; empty string to disable | -| `--logs` | — | Comma-separated log file paths or glob patterns | -| `--logs-file` | — | File containing one log path/glob per line | -| `--source` | hostname | Name for this collector in query responses | -| `--v4prefix` | `24` | IPv4 prefix length for client bucketing (e.g. /24 → /23) | -| `--v6prefix` | `48` | IPv6 prefix length for client bucketing | -| `--scan-interval` | `10s` | How often to rescan glob patterns for new/removed files | +| Flag | Default | Description | +|-------------------|---------------|-------------------------------------------------------------------| +| `--listen` | `:9090` | gRPC listen address | +| `--prom-listen` | `:9100` | Prometheus metrics address; empty string to disable | +| `--logs` | — | Comma-separated log file paths or glob patterns | +| `--logs-file` | — | File containing one log path/glob per line | +| `--source` | hostname | Name for this collector in query responses | +| `--v4prefix` | `24` | IPv4 prefix length for client bucketing | +| `--v6prefix` | `48` | IPv6 prefix length for client bucketing | +| `--scan-interval` | `10s` | How often to rescan glob patterns for new/removed files | +| `--logtail-port` | `0` (off) | UDP port receiving `ipng_stats_logtail` datagrams | +| `--logtail-bind` | `127.0.0.1` | UDP bind address | +| `--version` | — | Print version, commit, build date and exit | -At least one of `--logs` or `--logs-file` is required. +At least one of `--logs`, `--logs-file`, or `--logtail-port > 0` is required; otherwise the +collector refuses to start. ### Examples ```bash +# UDP-only (nginx-ipng-stats-plugin feed) +./collector --logtail-port 9514 + # Single file ./collector --logs /var/log/nginx/access.log # Multiple files via glob (one inotify instance regardless of count) ./collector --logs "/var/log/nginx/*/access.log" +# Files and UDP at the same time +./collector --logs "/var/log/nginx/*.log" --logtail-port 9514 + # Many files via a config file ./collector --logs-file /etc/nginx-logtail/logs.conf @@ -129,30 +303,30 @@ the new file appears. No restart or SIGHUP required. The collector exposes a Prometheus-compatible `/metrics` endpoint on `--prom-listen` (default `:9100`). Set `--prom-listen ""` to disable it entirely. -Three metrics are exported: +**Per-host series:** -**`nginx_http_requests_total`** — counter, labeled `{host, method, status}`: -``` -nginx_http_requests_total{host="example.com",method="GET",status="200"} 18432 -nginx_http_requests_total{host="example.com",method="POST",status="201"} 304 -nginx_http_requests_total{host="api.example.com",method="GET",status="429"} 57 -``` +- `nginx_http_requests_total{host, method, status}` — counter. Map capped at 250 000 distinct + label sets; new entries beyond the cap are dropped until the map is rolled over. +- `nginx_http_response_body_bytes_{bucket,count,sum}{host, le}` — histogram of + `$body_bytes_sent`. Buckets (bytes): `256, 1024, 4096, 16384, 65536, 262144, 1048576, +Inf`. +- `nginx_http_request_duration_seconds_{bucket,count,sum}{host, le}` — histogram of + `$request_time`. Buckets (seconds): `0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, + 10, +Inf`. Not split by `source_tag` (duration histogram stays per-host to avoid cardinality + blow-up). -**`nginx_http_response_body_bytes`** — histogram, labeled `{host}`. Observes the -`$body_bytes_sent` value for every request. Bucket upper bounds (bytes): -`256, 1024, 4096, 16384, 65536, 262144, 1048576, +Inf`. +**Per-`source_tag` roll-ups** (parallel series, not a cross-product with `host`): -**`nginx_http_request_duration_seconds`** — histogram, labeled `{host}`. Observes the -`$request_time` value for every request. Bucket upper bounds (seconds): -`0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, +Inf`. +- `nginx_http_requests_by_source_total{source_tag}` — counter. +- `nginx_http_response_body_bytes_by_source_{bucket,count,sum}{source_tag, le}` — histogram. -Body and request-time histograms use only the `host` label (not method/status) to keep -cardinality bounded — the label sets stay proportional to the number of virtual hosts, not -the number of unique method × status combinations. +**UDP ingest counters** — lets operators distinguish parse failures from back-pressure drops: -The counter map is capped at 100 000 distinct `{host, method, status}` tuples. Entries beyond -the cap are silently dropped for the current scrape interval, so memory is bounded regardless -of traffic patterns. +- `logtail_udp_packets_received_total` — datagrams read off the socket. +- `logtail_udp_loglines_success_total` — parsed OK. +- `logtail_udp_loglines_consumed_total` — forwarded to the store (not dropped). + +`received - success` is the parse-failure rate; `success - consumed` is the back-pressure +drop rate. Alert on either being non-zero. **Prometheus scrape config:** @@ -221,25 +395,22 @@ Data is served from two tiered ring buffers: History is lost on restart — the collector resumes tailing immediately but all ring buffers start empty. The fine ring fills in 1 hour; the coarse ring fills in 24 hours. -### Systemd unit example +### Running under systemd -```ini -[Unit] -Description=nginx-logtail collector -After=network.target +The Debian package ships `nginx-logtail-collector.service` ready to run under the `_logtail` +system user with `Group=www-data` (for log-file access). Every flag comes from +`/etc/default/nginx-logtail`. To operate it: -[Service] -ExecStart=/usr/local/bin/collector \ - --logs-file /etc/nginx-logtail/logs.conf \ - --listen :9090 \ - --source %H -Restart=on-failure -RestartSec=5 - -[Install] -WantedBy=multi-user.target +```bash +sudo $EDITOR /etc/default/nginx-logtail # set COLLECTOR_LOGS / COLLECTOR_LOGTAIL_PORT +sudo systemctl enable --now nginx-logtail-collector.service +sudo systemctl status nginx-logtail-collector.service +sudo journalctl -u nginx-logtail-collector.service -f ``` +If you run from source without the package, compose a unit from the packaged template at +`debian/nginx-logtail-collector.service`. + --- ## Aggregator @@ -326,13 +497,13 @@ the selected dimension and time window. **Window tabs** — switch between `1m / 5m / 15m / 60m / 6h / 24h`. Only the window changes; all active filters are preserved. -**Dimension tabs** — switch between grouping by `website / asn / prefix / status / uri`. +**Dimension tabs** — switch between grouping by `website / asn / prefix / status / uri / source`. **Drilldown** — click any table row to add that value as a filter and advance to the next dimension in the hierarchy: ``` -website → client prefix → request URI → HTTP status → ASN → website (cycles) +website → client prefix → request URI → HTTP status → ASN → source_tag → website (cycles) ``` Example: click `example.com` in the website view to see which client prefixes are hitting it; @@ -364,6 +535,7 @@ Supported fields and operators: | `prefix` | `=` | `prefix=1.2.3.0/24` | | `is_tor` | `=` `!=` | `is_tor=1`, `is_tor!=0` | | `asn` | `=` `!=` `>` `>=` `<` `<=` | `asn=8298`, `asn>=1000` | +| `source_tag` | `=` | `source_tag=direct`, `source_tag=cdn` | `is_tor=1` and `is_tor!=0` are equivalent (TOR traffic only). `is_tor=0` and `is_tor!=1` are equivalent (non-TOR traffic only). @@ -389,8 +561,9 @@ accept RE2 regular expressions. The breadcrumb strip shows them as `website~=gou `uri~=^/api/` with the usual `×` remove link. **URL sharing** — all filter state is in the URL query string (`w`, `by`, `f_website`, -`f_prefix`, `f_uri`, `f_status`, `f_website_re`, `f_uri_re`, `f_is_tor`, `f_asn`, `n`). Copy -the URL to share an exact view with another operator, or bookmark a recurring query. +`f_prefix`, `f_uri`, `f_status`, `f_website_re`, `f_uri_re`, `f_is_tor`, `f_asn`, +`f_source_tag`, `n`). Copy the URL to share an exact view with another operator, or bookmark +a recurring query. **JSON output** — append `&raw=1` to any URL to receive the TopN result as JSON instead of HTML. Useful for scripting without the CLI binary: @@ -447,14 +620,15 @@ logtail-cli targets [flags] list targets known to the queried endpoint | `--uri-re` | — | Filter: RE2 regex against request URI | | `--is-tor` | — | Filter: `1` or `!=0` = TOR only; `0` or `!=1` = non-TOR only | | `--asn` | — | Filter: ASN expression (`12345`, `!=65000`, `>=1000`, `<64512`, …) | +| `--source-tag`| — | Filter: exact `ipng_source_tag` (e.g. `direct`, `cdn`) | ### `topn` flags -| Flag | Default | Description | -|---------------|------------|----------------------------------------------------------| -| `--n` | `10` | Number of entries | -| `--window` | `5m` | `1m` `5m` `15m` `60m` `6h` `24h` | -| `--group-by` | `website` | `website` `prefix` `uri` `status` `asn` | +| Flag | Default | Description | +|---------------|------------|-----------------------------------------------------------------------| +| `--n` | `10` | Number of entries | +| `--window` | `5m` | `1m` `5m` `15m` `60m` `6h` `24h` | +| `--group-by` | `website` | `website` `prefix` `uri` `status` `asn` `source_tag` | ### `trend` flags diff --git a/internal/version/version.go b/internal/version/version.go new file mode 100644 index 0000000..1eb1c14 --- /dev/null +++ b/internal/version/version.go @@ -0,0 +1,16 @@ +// Package version exposes the build-time version metadata injected via +// -ldflags -X. The defaults apply to unversioned builds (e.g. plain `go run`). +package version + +import "fmt" + +var ( + Version = "0.9.1" + Commit = "unknown" + Date = "unknown" +) + +// String returns "Version (commit C, built D)". +func String() string { + return fmt.Sprintf("%s (commit %s, built %s)", Version, Commit, Date) +}