RELEASE 1.0.1: v2 log format, source_tag-labeled metrics, lint cleanup

Wire-format and metric overhaul. Both file and UDP ingest now share one
versioned ParseLine that dispatches on the v<N>\t prefix; v1 stays
unchanged, v2 adds $bytes_sent (replacing $body_bytes_sent),
$request_length, $upstream_response_time, and $upstream_status. File
ingest gains the same versioning, and the legacy positional file format
is removed (no live deployments).

Prometheus exposition is rewritten:

  - nginx_http_bytes_sent and nginx_http_request_duration_seconds gain
    a source_tag label.
  - nginx_http_requests_by_source_total gains status_class.
  - New v2-only metrics: nginx_http_request_bytes,
    nginx_http_upstream_duration_seconds,
    nginx_http_upstream_requests_total{status_class}.
  - Dropped nginx_http_response_body_bytes_by_source (subsumed by the
    dual-labeled bytes_sent metric).

Adds 'make fixstyle' (gofmt -w) and clears all golangci-lint findings
across the repo (errcheck, S1001, ST1005, unused).

Docs in design.md FR-2/FR-8 and user-guide.md are rewritten to present
v2 as the recommended log format.
This commit is contained in:
2026-05-01 15:40:53 +02:00
parent d1a21a7a62
commit 6647f95be4
28 changed files with 931 additions and 724 deletions
+95 -66
View File
@@ -8,87 +8,57 @@ import (
)
// LogRecord holds the dimensions extracted from a single nginx log line.
//
// BytesSent carries $body_bytes_sent for v1 records and $bytes_sent for v2
// records — operators see a small step up when emitters move to v2 because v2
// includes header overhead.
//
// RequestLength, UpstreamResponseTime, UpstreamStatus, HasUpstream are v2-only.
// In v1 records HasUpstream is always false and the related fields are zero.
type LogRecord struct {
Website string
ClientPrefix string
URI string
Status string
IsTor bool
ASN int32
Method string
BodyBytesSent int64
RequestTime float64
SourceTag string
Website string
ClientPrefix string
URI string
Status string
IsTor bool
ASN int32
Method string
BytesSent int64
RequestLength int64
RequestTime float64
UpstreamResponseTime float64
UpstreamStatus string
HasUpstream bool
SourceTag string
}
// fileSourceTag is the SourceTag assigned to records read from on-disk log
// files, which pre-date the tag concept. Mirrors nginx's fallback label.
const fileSourceTag = "direct"
// ParseLine parses a tab-separated logtail log line from a file:
//
// $host \t $remote_addr \t $msec \t $request_method \t $request_uri \t $status \t $body_bytes_sent \t $request_time \t $is_tor \t $asn
//
// The is_tor (field 9) and asn (field 10) fields are optional for backward
// compatibility with older log files that omit them; they default to false/0
// when absent. SourceTag is always set to "direct" (file origin has no tag).
// Returns false for lines with fewer than 8 fields.
// ParseLine parses a versioned nginx-logtail line. Both file ingest and UDP
// ingest funnel through here. Every line MUST start with "v<N>\t"; unknown or
// missing versions return false so operators can ship a parser update before
// the emitter switches.
func ParseLine(line string, v4bits, v6bits int) (LogRecord, bool) {
fields := strings.SplitN(line, "\t", 10)
if len(fields) < 8 {
return LogRecord{}, false
}
prefix, ok := truncateIP(fields[1], v4bits, v6bits)
if !ok {
return LogRecord{}, false
}
isTor := len(fields) >= 9 && fields[8] == "1"
var asn int32
if len(fields) == 10 {
if n, err := strconv.ParseInt(fields[9], 10, 32); err == nil {
asn = int32(n)
}
}
return LogRecord{
Website: fields[0],
ClientPrefix: prefix,
URI: stripQuery(fields[4]),
Status: fields[5],
IsTor: isTor,
ASN: asn,
Method: fields[3],
BodyBytesSent: parseInt(fields[6]),
RequestTime: parseFloat(fields[7]),
SourceTag: fileSourceTag,
}, true
}
// ParseUDPLine dispatches on the version prefix emitted by
// nginx-ipng-stats-plugin's ipng_stats_logtail directive. The wire format is
// "v<N>\t<payload>", where <payload> is version-specific. Unknown or missing
// versions return false so operators can roll out a v2 parser before
// upgrading emitters.
func ParseUDPLine(line string, v4bits, v6bits int) (LogRecord, bool) {
i := strings.IndexByte(line, '\t')
if i < 0 {
return LogRecord{}, false
}
switch line[:i] {
case "v1":
return parseUDPLineV1(line[i+1:], v4bits, v6bits)
return parseV1(line[i+1:], v4bits, v6bits)
case "v2":
return parseV2(line[i+1:], v4bits, v6bits)
default:
return LogRecord{}, false
}
}
// parseUDPLineV1 parses the v1 payload (12 tab-separated fields):
// parseV1 parses the v1 payload (12 tab-separated fields):
//
// $host \t $remote_addr \t $request_method \t $request_uri \t $status \t
// $body_bytes_sent \t $request_time \t $is_tor \t $asn \t
// $ipng_source_tag \t $server_addr \t $scheme
//
// server_addr and scheme are parsed but discarded.
func parseUDPLineV1(payload string, v4bits, v6bits int) (LogRecord, bool) {
// $server_addr and $scheme are parsed but discarded.
func parseV1(payload string, v4bits, v6bits int) (LogRecord, bool) {
fields := strings.Split(payload, "\t")
if len(fields) != 12 {
return LogRecord{}, false
@@ -102,17 +72,76 @@ func parseUDPLineV1(payload string, v4bits, v6bits int) (LogRecord, bool) {
asn = int32(n)
}
return LogRecord{
Website: fields[0],
ClientPrefix: prefix,
URI: stripQuery(fields[3]),
Status: fields[4],
IsTor: fields[7] == "1",
ASN: asn,
Method: fields[2],
BytesSent: parseInt(fields[5]),
RequestTime: parseFloat(fields[6]),
SourceTag: fields[9],
}, true
}
// parseV2 parses the v2 payload (15 tab-separated fields):
//
// $host \t $remote_addr \t $request_method \t $request_uri \t $status \t
// $bytes_sent \t $request_length \t $request_time \t
// $upstream_response_time \t $upstream_status \t
// $is_tor \t $asn \t $ipng_source_tag \t $server_addr \t $scheme
//
// $upstream_response_time and $upstream_status are "-" (or empty) when nginx
// served the response directly — HasUpstream is left false in that case.
// When nginx retried across multiple upstreams the fields are comma-separated;
// the parser keeps the last entry, since that's the upstream that actually
// served the response. $server_addr and $scheme are parsed but discarded.
func parseV2(payload string, v4bits, v6bits int) (LogRecord, bool) {
fields := strings.Split(payload, "\t")
if len(fields) != 15 {
return LogRecord{}, false
}
prefix, ok := truncateIP(fields[1], v4bits, v6bits)
if !ok {
return LogRecord{}, false
}
var asn int32
if n, err := strconv.ParseInt(fields[11], 10, 32); err == nil {
asn = int32(n)
}
r := LogRecord{
Website: fields[0],
ClientPrefix: prefix,
URI: stripQuery(fields[3]),
Status: fields[4],
IsTor: fields[7] == "1",
IsTor: fields[10] == "1",
ASN: asn,
Method: fields[2],
BodyBytesSent: parseInt(fields[5]),
RequestTime: parseFloat(fields[6]),
SourceTag: fields[9],
}, true
BytesSent: parseInt(fields[5]),
RequestLength: parseInt(fields[6]),
RequestTime: parseFloat(fields[7]),
SourceTag: fields[12],
}
if fields[8] != "-" && fields[8] != "" {
timeStr := lastCommaPart(fields[8])
statusStr := lastCommaPart(fields[9])
if t, err := strconv.ParseFloat(timeStr, 64); err == nil {
r.UpstreamResponseTime = t
r.UpstreamStatus = statusStr
r.HasUpstream = true
}
}
return r, true
}
// lastCommaPart returns the substring after the last ", " (nginx's separator
// for retried upstreams). Plain values pass through unchanged.
func lastCommaPart(s string) string {
if i := strings.LastIndex(s, ", "); i >= 0 {
return s[i+2:]
}
return s
}
func stripQuery(uri string) string {