From 87050bcf134be3a4e6df0a7f2f0a95f5a334c858 Mon Sep 17 00:00:00 2001 From: Pim van Pelt Date: Thu, 16 Apr 2026 18:49:13 +0200 Subject: [PATCH] Add logtail if=$variable filtering and update log format examples - ipng_stats_logtail now accepts an optional if=$variable parameter that suppresses log lines when the variable is empty or "0", following the same semantics as nginx's access_log if=. The condition is checked before format rendering for zero overhead on filtered requests. Filtered requests are still counted by stats. - Log format examples updated to include $scheme for http/https visibility, and renamed to ipng_stats_logtail to match production. - Robot test added for the if= filter (19 tests, 19 pass). - FR-8.5 added to design doc for the if= semantics. --- docs/config-guide.md | 40 ++++++++++++++++++------ docs/design.md | 13 +++++--- docs/user-guide.md | 44 +++++++++++++++++++-------- src/ngx_http_ipng_stats_module.c | 35 +++++++++++++++++++++ tests/01-module/01-e2e.robot | 9 ++++++ tests/01-module/lab/server/nginx.conf | 13 +++++--- 6 files changed, 124 insertions(+), 30 deletions(-) diff --git a/docs/config-guide.md b/docs/config-guide.md index 0245327..7453224 100644 --- a/docs/config-guide.md +++ b/docs/config-guide.md @@ -122,20 +122,21 @@ endpoint does not inflate its own counters. See FR-5.5. -### `ipng_stats_logtail udp://: [buffer=] [flush=]` +### `ipng_stats_logtail udp://: [buffer=] [flush=] [if=<$variable>]` **Context:** `http`. **Value:** `` is the name of an existing `log_format` defined earlier in the same `http` block. The destination MUST be a `udp://host:port` URI. `buffer=` is an optional nginx size spec (default `64k`, minimum `1k`). `flush=` is an optional -nginx duration string (default `1s`, minimum `100ms`). +nginx duration string (default `1s`, minimum `100ms`). `if=<$variable>` is an optional condition variable — when set, the log line is +only emitted if the variable evaluates to a non-empty value other than `"0"`. **Default:** not set — the directive is optional. When absent, no global logtail output is written. -**Effect:** registers a global log-phase writer that fires unconditionally for every request, regardless of `server` or `location` -context. The named `log_format` is looked up from nginx's log module at configuration time; nginx's standard variable-expansion -machinery renders each line, so any variable usable in a regular `log_format` — including `$ipng_source_tag` and `$server_addr` — is -available here. +**Effect:** registers a global log-phase writer that fires unconditionally for every request (unless suppressed by `if=`), regardless +of `server` or `location` context. The named `log_format` is looked up from nginx's log module at configuration time; nginx's standard +variable-expansion machinery renders each line, so any variable usable in a regular `log_format` — including `$ipng_source_tag` and +`$server_addr` — is available here. Each worker maintains a private in-memory write buffer of `buffer=` bytes. Each buffer flush is transmitted as a single `sendto()` call on a per-worker `SOCK_DGRAM` socket that is opened at worker init and closed at worker exit. The address is resolved @@ -152,12 +153,31 @@ in every log line at no extra configuration cost. File-based access logging is intentionally not supported by this directive — use nginx's built-in `access_log` directive for that. ```nginx -log_format logtail '$host\t$remote_addr\t$ipng_source_tag\t$server_addr\t' - '$request_method\t$request_uri\t$status\t$body_bytes_sent\t' - '$request_time'; -ipng_stats_logtail logtail udp://127.0.0.1:9514 buffer=16k flush=1s; +log_format ipng_stats_logtail '$host\t$remote_addr\t$request_method\t$request_uri\t' + '$status\t$body_bytes_sent\t' + '$ipng_source_tag\t$server_addr\t$scheme'; +ipng_stats_logtail ipng_stats_logtail udp://127.0.0.1:9514 buffer=16k flush=1s; ``` +#### Conditional logging with `if=` + +The `if=$variable` parameter suppresses log lines for requests where the variable is empty, not found, or `"0"`. This uses the same +semantics as nginx's built-in `access_log ... if=` and works well with `map` blocks: + +```nginx +# Suppress health checks from the logtail stream: +map $request_uri $logtail_enabled { + ~^/\.well-known/ipng/healthz 0; + default 1; +} + +ipng_stats_logtail ipng_stats_logtail udp://127.0.0.1:9514 if=$logtail_enabled; +``` + +The `map` compiles to a hash table at configuration time; at request time it costs a single hash probe, evaluated lazily only when +the variable is read. The condition is checked before the log format is rendered, so filtered requests skip the format rendering +entirely. + **Constraints and behavior:** - `host` MUST be a literal IPv4 address. Hostnames and IPv6 addresses are not supported in v0.1. diff --git a/docs/design.md b/docs/design.md index c3b5615..13943bf 100644 --- a/docs/design.md +++ b/docs/design.md @@ -175,10 +175,10 @@ Each requirement carries a unique identifier (`FR-X.Y` or `NFR-X.Y`) so that lat **FR-8 Logtail** -- **FR-8.1** The module MUST support an `ipng_stats_logtail udp://host:port [buffer=] [flush=]` directive - at the `http` level that registers a global log-phase writer which fires unconditionally for every request, regardless of which - `server` or `location` block handled it. One directive at the `http` level is sufficient to cover all vhosts — operators MUST NOT be - required to repeat an `access_log` directive in every `server` block to achieve a single global access log. +- **FR-8.1** The module MUST support an `ipng_stats_logtail udp://host:port [buffer=] [flush=] [if=$var]` + directive at the `http` level that registers a global log-phase writer which fires for every request (unless suppressed by `if=`), + regardless of which `server` or `location` block handled it. One directive at the `http` level is sufficient to cover all vhosts — + operators MUST NOT be required to repeat an `access_log` directive in every `server` block to achieve a single global access log. - **FR-8.2** The `` argument MUST be the name of an existing nginx `log_format` defined in the same `http` block before this directive. The module MUST look up the compiled log format from nginx's log module at configuration time and use it to render each log line at request time. The module MUST NOT define its own format language; all `$variable` expansion is handled by nginx's standard @@ -194,6 +194,11 @@ Each requirement carries a unique identifier (`FR-X.Y` or `NFR-X.Y`) so that lat present are intentional; the UDP transport is designed for fire-and-forget analytics pipelines where delivery guarantees are unnecessary and zero disk I/O is preferred over persistence. File-based access logging is not supported by this directive — operators should use nginx's built-in `access_log` for that purpose. +- **FR-8.5** The directive MAY include an `if=$variable` parameter. When present, the logtail writer MUST evaluate the named nginx + variable at log phase and MUST suppress the log line if the variable is not found, is empty, or equals the string `"0"`. The + condition MUST be checked before the log format is rendered, so that filtered requests incur no formatting cost. This follows the same + semantics as nginx's built-in `access_log ... if=` and is intended for suppressing high-frequency requests (e.g. health checks) from + the logtail stream. Filtered requests MUST still be counted by the stats module — the `if=` condition affects only logtail output. ### Non-Functional Requirements diff --git a/docs/user-guide.md b/docs/user-guide.md index be42801..54a4128 100644 --- a/docs/user-guide.md +++ b/docs/user-guide.md @@ -261,13 +261,13 @@ would add unwanted I/O pressure. For file-based access logging, use nginx's buil Add a `log_format` declaration inside the `http { ... }` block, **before** the `ipng_stats_logtail` directive that references it: ```nginx -log_format logtail '$host\t$remote_addr\t$ipng_source_tag\t$server_addr\t' - '$request_method\t$request_uri\t$status\t$body_bytes_sent\t' - '$request_time'; +log_format ipng_stats_logtail '$host\t$remote_addr\t$request_method\t$request_uri\t' + '$status\t$body_bytes_sent\t' + '$ipng_source_tag\t$server_addr\t$scheme'; ``` -Any nginx variable is usable here, including `$ipng_source_tag` (the device attribution tag, FR-6.1) and `$server_addr` (the VIP -that received the request). +Any nginx variable is usable here, including `$ipng_source_tag` (the device attribution tag, FR-6.1), `$server_addr` (the VIP +that received the request), and `$scheme` (`http` or `https` — useful since `$server_addr` alone doesn't distinguish ports). ### Configuration @@ -275,17 +275,17 @@ that received the request). http { ipng_stats_zone ipng:4m; - log_format logtail '$host\t$remote_addr\t$ipng_source_tag\t$server_addr\t' - '$request_method\t$request_uri\t$status\t$body_bytes_sent\t' - '$request_time'; + log_format ipng_stats_logtail '$host\t$remote_addr\t$request_method\t$request_uri\t' + '$status\t$body_bytes_sent\t' + '$ipng_source_tag\t$server_addr\t$scheme'; - ipng_stats_logtail logtail udp://127.0.0.1:9514 buffer=16k flush=1s; + ipng_stats_logtail ipng_stats_logtail udp://127.0.0.1:9514 buffer=16k flush=1s; server { ... } } ``` -- **`logtail`** (first argument) — the `log_format` name. +- **`ipng_stats_logtail`** (first argument) — the `log_format` name. - **`udp://127.0.0.1:9514`** — destination as a `udp://host:port` URI. `host` must be a literal IPv4 address (no hostnames, no IPv6 in v0.1). - **`buffer=16k`** — per-worker write buffer. Lines are held in memory until the buffer fills, the flush timer fires, or the worker @@ -304,6 +304,25 @@ lost datagrams are acceptable and disk I/O is not. configured buffer sizes. On routed paths, path MTU applies. - There is no acknowledgment, retry, or sequence number. If the receiver is down, the data is gone. +### Filtering with `if=` + +High-frequency requests like health checks can be suppressed from the logtail stream using the `if=$variable` parameter. Use a `map` +block to define which requests should be logged: + +```nginx +map $request_uri $logtail_enabled { + ~^/\.well-known/ipng/healthz 0; + default 1; +} + +ipng_stats_logtail ipng_stats_logtail udp://127.0.0.1:9514 buffer=16k flush=1s if=$logtail_enabled; +``` + +Filtered requests are still counted by the stats module — only the logtail output is suppressed. The condition is checked before the +log format is rendered, so filtered requests have zero logtail overhead. Multiple conditions can be combined using nested `map` blocks. + +See [`config-guide.md`](config-guide.md#conditional-logging-with-if) for the full semantics. + **Starting a receiver** is trivial: ```bash @@ -317,10 +336,11 @@ datagram stream and processes it into structured log output. A typical received log line (with the format above, tab-separated) looks like: ``` -example.com 203.0.113.42 mg1 192.0.2.10 GET /index.html 200 4321 0.003 +example.com 203.0.113.42 GET /index.html 200 4321 mg1 192.0.2.10 https ``` -The third field (`mg1`) comes from `$ipng_source_tag` — free per-device attribution in every log line. +The `mg1` field comes from `$ipng_source_tag` and `https` from `$scheme` — free per-device attribution and protocol visibility in +every log line. ### Why this complements per-server `access_log` diff --git a/src/ngx_http_ipng_stats_module.c b/src/ngx_http_ipng_stats_module.c index 6f4ba52..14893e3 100644 --- a/src/ngx_http_ipng_stats_module.c +++ b/src/ngx_http_ipng_stats_module.c @@ -194,6 +194,7 @@ typedef struct { struct sockaddr_in logtail_udp_addr; /* destination address */ size_t logtail_buf_size; /* per-worker buffer, default 64k */ ngx_msec_t logtail_flush; /* max flush interval, default 1s */ + ngx_uint_t logtail_if_index; /* if=$var: variable index, 0=none */ } ngx_http_ipng_stats_main_conf_t; @@ -908,6 +909,27 @@ ngx_http_ipng_stats_logtail(ngx_conf_t *cf, ngx_command_t *cmd, void *conf) } continue; } + if (value[i].len > 3 + && ngx_strncmp(value[i].data, "if=", 3) == 0) + { + ngx_str_t var_name = { value[i].len - 3, value[i].data + 3 }; + if (var_name.len < 2 || var_name.data[0] != '$') { + ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, + "ipng_stats_logtail: if= requires a $variable, " + "got \"%V\"", &value[i]); + return NGX_CONF_ERROR; + } + var_name.data++; + var_name.len--; + imcf->logtail_if_index = ngx_http_get_variable_index(cf, + &var_name); + if (imcf->logtail_if_index == (ngx_uint_t) NGX_ERROR) { + return NGX_CONF_ERROR; + } + /* Shift from 0-based to 1-based so 0 means "not set". */ + imcf->logtail_if_index++; + continue; + } ngx_conf_log_error(NGX_LOG_EMERG, cf, 0, "ipng_stats_logtail: unknown parameter \"%V\"", &value[i]); return NGX_CONF_ERROR; @@ -1720,6 +1742,19 @@ ngx_http_ipng_stats_logtail_write(ngx_http_request_t *r, return; } + /* if=$variable: skip this request when the variable is empty or "0". */ + if (imcf->logtail_if_index) { + ngx_http_variable_value_t *val; + + val = ngx_http_get_indexed_variable(r, imcf->logtail_if_index - 1); + if (val == NULL || val->not_found + || val->len == 0 + || (val->len == 1 && val->data[0] == '0')) + { + return; + } + } + ops = imcf->logtail_fmt->ops->elts; nops = imcf->logtail_fmt->ops->nelts; diff --git a/tests/01-module/01-e2e.robot b/tests/01-module/01-e2e.robot index abab3b8..24b0e6e 100644 --- a/tests/01-module/01-e2e.robot +++ b/tests/01-module/01-e2e.robot @@ -146,6 +146,15 @@ UDP logtail # Tab-separated format Should Match Regexp ${output} \\t +Logtail if= filter + [Documentation] Requests to /notfound are suppressed from logtail by + ... the if=$logtail_enabled condition, but still counted. + ${output} = Docker Exec ${SERVER} cat /var/log/nginx/logtail-udp.log + Should Not Contain ${output} /notfound + # But /notfound IS in the regular access log (not filtered there). + ${access} = Docker Exec ${SERVER} cat /var/log/nginx/access.log + Should Contain ${access} /notfound + VIP in access log [Documentation] $server_addr resolves to real IPs, not 0.0.0.0. ${output} = Docker Exec ${SERVER} cat /var/log/nginx/access.log diff --git a/tests/01-module/lab/server/nginx.conf b/tests/01-module/lab/server/nginx.conf index eb7c18d..de3d84c 100644 --- a/tests/01-module/lab/server/nginx.conf +++ b/tests/01-module/lab/server/nginx.conf @@ -19,10 +19,15 @@ http { access_log /var/log/nginx/access.log tagged; # Global logtail — fires for ALL requests regardless of server block. - log_format logtail '$host\t$remote_addr\t$ipng_source_tag\t$server_addr\t' - '$request_method\t$request_uri\t$status\t$body_bytes_sent\t' - '$request_time'; - ipng_stats_logtail logtail udp://127.0.0.1:9514 buffer=4k flush=500ms; + # The if= condition suppresses /notfound from the logtail stream. + map $request_uri $logtail_enabled { + ~^/notfound 0; + default 1; + } + log_format ipng_stats_logtail '$host\t$remote_addr\t$request_method\t$request_uri\t' + '$status\t$body_bytes_sent\t' + '$ipng_source_tag\t$server_addr\t$scheme'; + ipng_stats_logtail ipng_stats_logtail udp://127.0.0.1:9514 buffer=4k flush=500ms if=$logtail_enabled; server { # Mgmt-only listener for direct traffic (tagged "direct").