RELEASE 1.0.1: v2 log format, source_tag-labeled metrics, lint cleanup
Wire-format and metric overhaul. Both file and UDP ingest now share one
versioned ParseLine that dispatches on the v<N>\t prefix; v1 stays
unchanged, v2 adds $bytes_sent (replacing $body_bytes_sent),
$request_length, $upstream_response_time, and $upstream_status. File
ingest gains the same versioning, and the legacy positional file format
is removed (no live deployments).
Prometheus exposition is rewritten:
- nginx_http_bytes_sent and nginx_http_request_duration_seconds gain
a source_tag label.
- nginx_http_requests_by_source_total gains status_class.
- New v2-only metrics: nginx_http_request_bytes,
nginx_http_upstream_duration_seconds,
nginx_http_upstream_requests_total{status_class}.
- Dropped nginx_http_response_body_bytes_by_source (subsumed by the
dual-labeled bytes_sent metric).
Adds 'make fixstyle' (gofmt -w) and clears all golangci-lint findings
across the repo (errcheck, S1001, ST1005, unused).
Docs in design.md FR-2/FR-8 and user-guide.md are rewritten to present
v2 as the recommended log format.
This commit is contained in:
+95
-66
@@ -8,87 +8,57 @@ import (
|
||||
)
|
||||
|
||||
// LogRecord holds the dimensions extracted from a single nginx log line.
|
||||
//
|
||||
// BytesSent carries $body_bytes_sent for v1 records and $bytes_sent for v2
|
||||
// records — operators see a small step up when emitters move to v2 because v2
|
||||
// includes header overhead.
|
||||
//
|
||||
// RequestLength, UpstreamResponseTime, UpstreamStatus, HasUpstream are v2-only.
|
||||
// In v1 records HasUpstream is always false and the related fields are zero.
|
||||
type LogRecord struct {
|
||||
Website string
|
||||
ClientPrefix string
|
||||
URI string
|
||||
Status string
|
||||
IsTor bool
|
||||
ASN int32
|
||||
Method string
|
||||
BodyBytesSent int64
|
||||
RequestTime float64
|
||||
SourceTag string
|
||||
Website string
|
||||
ClientPrefix string
|
||||
URI string
|
||||
Status string
|
||||
IsTor bool
|
||||
ASN int32
|
||||
Method string
|
||||
BytesSent int64
|
||||
RequestLength int64
|
||||
RequestTime float64
|
||||
UpstreamResponseTime float64
|
||||
UpstreamStatus string
|
||||
HasUpstream bool
|
||||
SourceTag string
|
||||
}
|
||||
|
||||
// fileSourceTag is the SourceTag assigned to records read from on-disk log
|
||||
// files, which pre-date the tag concept. Mirrors nginx's fallback label.
|
||||
const fileSourceTag = "direct"
|
||||
|
||||
// ParseLine parses a tab-separated logtail log line from a file:
|
||||
//
|
||||
// $host \t $remote_addr \t $msec \t $request_method \t $request_uri \t $status \t $body_bytes_sent \t $request_time \t $is_tor \t $asn
|
||||
//
|
||||
// The is_tor (field 9) and asn (field 10) fields are optional for backward
|
||||
// compatibility with older log files that omit them; they default to false/0
|
||||
// when absent. SourceTag is always set to "direct" (file origin has no tag).
|
||||
// Returns false for lines with fewer than 8 fields.
|
||||
// ParseLine parses a versioned nginx-logtail line. Both file ingest and UDP
|
||||
// ingest funnel through here. Every line MUST start with "v<N>\t"; unknown or
|
||||
// missing versions return false so operators can ship a parser update before
|
||||
// the emitter switches.
|
||||
func ParseLine(line string, v4bits, v6bits int) (LogRecord, bool) {
|
||||
fields := strings.SplitN(line, "\t", 10)
|
||||
if len(fields) < 8 {
|
||||
return LogRecord{}, false
|
||||
}
|
||||
prefix, ok := truncateIP(fields[1], v4bits, v6bits)
|
||||
if !ok {
|
||||
return LogRecord{}, false
|
||||
}
|
||||
isTor := len(fields) >= 9 && fields[8] == "1"
|
||||
var asn int32
|
||||
if len(fields) == 10 {
|
||||
if n, err := strconv.ParseInt(fields[9], 10, 32); err == nil {
|
||||
asn = int32(n)
|
||||
}
|
||||
}
|
||||
return LogRecord{
|
||||
Website: fields[0],
|
||||
ClientPrefix: prefix,
|
||||
URI: stripQuery(fields[4]),
|
||||
Status: fields[5],
|
||||
IsTor: isTor,
|
||||
ASN: asn,
|
||||
Method: fields[3],
|
||||
BodyBytesSent: parseInt(fields[6]),
|
||||
RequestTime: parseFloat(fields[7]),
|
||||
SourceTag: fileSourceTag,
|
||||
}, true
|
||||
}
|
||||
|
||||
// ParseUDPLine dispatches on the version prefix emitted by
|
||||
// nginx-ipng-stats-plugin's ipng_stats_logtail directive. The wire format is
|
||||
// "v<N>\t<payload>", where <payload> is version-specific. Unknown or missing
|
||||
// versions return false so operators can roll out a v2 parser before
|
||||
// upgrading emitters.
|
||||
func ParseUDPLine(line string, v4bits, v6bits int) (LogRecord, bool) {
|
||||
i := strings.IndexByte(line, '\t')
|
||||
if i < 0 {
|
||||
return LogRecord{}, false
|
||||
}
|
||||
switch line[:i] {
|
||||
case "v1":
|
||||
return parseUDPLineV1(line[i+1:], v4bits, v6bits)
|
||||
return parseV1(line[i+1:], v4bits, v6bits)
|
||||
case "v2":
|
||||
return parseV2(line[i+1:], v4bits, v6bits)
|
||||
default:
|
||||
return LogRecord{}, false
|
||||
}
|
||||
}
|
||||
|
||||
// parseUDPLineV1 parses the v1 payload (12 tab-separated fields):
|
||||
// parseV1 parses the v1 payload (12 tab-separated fields):
|
||||
//
|
||||
// $host \t $remote_addr \t $request_method \t $request_uri \t $status \t
|
||||
// $body_bytes_sent \t $request_time \t $is_tor \t $asn \t
|
||||
// $ipng_source_tag \t $server_addr \t $scheme
|
||||
//
|
||||
// server_addr and scheme are parsed but discarded.
|
||||
func parseUDPLineV1(payload string, v4bits, v6bits int) (LogRecord, bool) {
|
||||
// $server_addr and $scheme are parsed but discarded.
|
||||
func parseV1(payload string, v4bits, v6bits int) (LogRecord, bool) {
|
||||
fields := strings.Split(payload, "\t")
|
||||
if len(fields) != 12 {
|
||||
return LogRecord{}, false
|
||||
@@ -102,17 +72,76 @@ func parseUDPLineV1(payload string, v4bits, v6bits int) (LogRecord, bool) {
|
||||
asn = int32(n)
|
||||
}
|
||||
return LogRecord{
|
||||
Website: fields[0],
|
||||
ClientPrefix: prefix,
|
||||
URI: stripQuery(fields[3]),
|
||||
Status: fields[4],
|
||||
IsTor: fields[7] == "1",
|
||||
ASN: asn,
|
||||
Method: fields[2],
|
||||
BytesSent: parseInt(fields[5]),
|
||||
RequestTime: parseFloat(fields[6]),
|
||||
SourceTag: fields[9],
|
||||
}, true
|
||||
}
|
||||
|
||||
// parseV2 parses the v2 payload (15 tab-separated fields):
|
||||
//
|
||||
// $host \t $remote_addr \t $request_method \t $request_uri \t $status \t
|
||||
// $bytes_sent \t $request_length \t $request_time \t
|
||||
// $upstream_response_time \t $upstream_status \t
|
||||
// $is_tor \t $asn \t $ipng_source_tag \t $server_addr \t $scheme
|
||||
//
|
||||
// $upstream_response_time and $upstream_status are "-" (or empty) when nginx
|
||||
// served the response directly — HasUpstream is left false in that case.
|
||||
// When nginx retried across multiple upstreams the fields are comma-separated;
|
||||
// the parser keeps the last entry, since that's the upstream that actually
|
||||
// served the response. $server_addr and $scheme are parsed but discarded.
|
||||
func parseV2(payload string, v4bits, v6bits int) (LogRecord, bool) {
|
||||
fields := strings.Split(payload, "\t")
|
||||
if len(fields) != 15 {
|
||||
return LogRecord{}, false
|
||||
}
|
||||
prefix, ok := truncateIP(fields[1], v4bits, v6bits)
|
||||
if !ok {
|
||||
return LogRecord{}, false
|
||||
}
|
||||
var asn int32
|
||||
if n, err := strconv.ParseInt(fields[11], 10, 32); err == nil {
|
||||
asn = int32(n)
|
||||
}
|
||||
r := LogRecord{
|
||||
Website: fields[0],
|
||||
ClientPrefix: prefix,
|
||||
URI: stripQuery(fields[3]),
|
||||
Status: fields[4],
|
||||
IsTor: fields[7] == "1",
|
||||
IsTor: fields[10] == "1",
|
||||
ASN: asn,
|
||||
Method: fields[2],
|
||||
BodyBytesSent: parseInt(fields[5]),
|
||||
RequestTime: parseFloat(fields[6]),
|
||||
SourceTag: fields[9],
|
||||
}, true
|
||||
BytesSent: parseInt(fields[5]),
|
||||
RequestLength: parseInt(fields[6]),
|
||||
RequestTime: parseFloat(fields[7]),
|
||||
SourceTag: fields[12],
|
||||
}
|
||||
if fields[8] != "-" && fields[8] != "" {
|
||||
timeStr := lastCommaPart(fields[8])
|
||||
statusStr := lastCommaPart(fields[9])
|
||||
if t, err := strconv.ParseFloat(timeStr, 64); err == nil {
|
||||
r.UpstreamResponseTime = t
|
||||
r.UpstreamStatus = statusStr
|
||||
r.HasUpstream = true
|
||||
}
|
||||
}
|
||||
return r, true
|
||||
}
|
||||
|
||||
// lastCommaPart returns the substring after the last ", " (nginx's separator
|
||||
// for retried upstreams). Plain values pass through unchanged.
|
||||
func lastCommaPart(s string) string {
|
||||
if i := strings.LastIndex(s, ", "); i >= 0 {
|
||||
return s[i+2:]
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func stripQuery(uri string) string {
|
||||
|
||||
Reference in New Issue
Block a user