6647f95be4
Wire-format and metric overhaul. Both file and UDP ingest now share one
versioned ParseLine that dispatches on the v<N>\t prefix; v1 stays
unchanged, v2 adds $bytes_sent (replacing $body_bytes_sent),
$request_length, $upstream_response_time, and $upstream_status. File
ingest gains the same versioning, and the legacy positional file format
is removed (no live deployments).
Prometheus exposition is rewritten:
- nginx_http_bytes_sent and nginx_http_request_duration_seconds gain
a source_tag label.
- nginx_http_requests_by_source_total gains status_class.
- New v2-only metrics: nginx_http_request_bytes,
nginx_http_upstream_duration_seconds,
nginx_http_upstream_requests_total{status_class}.
- Dropped nginx_http_response_body_bytes_by_source (subsumed by the
dual-labeled bytes_sent metric).
Adds 'make fixstyle' (gofmt -w) and clears all golangci-lint findings
across the repo (errcheck, S1001, ST1005, unused).
Docs in design.md FR-2/FR-8 and user-guide.md are rewritten to present
v2 as the recommended log format.
189 lines
5.2 KiB
Go
189 lines
5.2 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"net"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
// LogRecord holds the dimensions extracted from a single nginx log line.
|
|
//
|
|
// BytesSent carries $body_bytes_sent for v1 records and $bytes_sent for v2
|
|
// records — operators see a small step up when emitters move to v2 because v2
|
|
// includes header overhead.
|
|
//
|
|
// RequestLength, UpstreamResponseTime, UpstreamStatus, HasUpstream are v2-only.
|
|
// In v1 records HasUpstream is always false and the related fields are zero.
|
|
type LogRecord struct {
|
|
Website string
|
|
ClientPrefix string
|
|
URI string
|
|
Status string
|
|
IsTor bool
|
|
ASN int32
|
|
Method string
|
|
BytesSent int64
|
|
RequestLength int64
|
|
RequestTime float64
|
|
UpstreamResponseTime float64
|
|
UpstreamStatus string
|
|
HasUpstream bool
|
|
SourceTag string
|
|
}
|
|
|
|
// ParseLine parses a versioned nginx-logtail line. Both file ingest and UDP
|
|
// ingest funnel through here. Every line MUST start with "v<N>\t"; unknown or
|
|
// missing versions return false so operators can ship a parser update before
|
|
// the emitter switches.
|
|
func ParseLine(line string, v4bits, v6bits int) (LogRecord, bool) {
|
|
i := strings.IndexByte(line, '\t')
|
|
if i < 0 {
|
|
return LogRecord{}, false
|
|
}
|
|
switch line[:i] {
|
|
case "v1":
|
|
return parseV1(line[i+1:], v4bits, v6bits)
|
|
case "v2":
|
|
return parseV2(line[i+1:], v4bits, v6bits)
|
|
default:
|
|
return LogRecord{}, false
|
|
}
|
|
}
|
|
|
|
// parseV1 parses the v1 payload (12 tab-separated fields):
|
|
//
|
|
// $host \t $remote_addr \t $request_method \t $request_uri \t $status \t
|
|
// $body_bytes_sent \t $request_time \t $is_tor \t $asn \t
|
|
// $ipng_source_tag \t $server_addr \t $scheme
|
|
//
|
|
// $server_addr and $scheme are parsed but discarded.
|
|
func parseV1(payload string, v4bits, v6bits int) (LogRecord, bool) {
|
|
fields := strings.Split(payload, "\t")
|
|
if len(fields) != 12 {
|
|
return LogRecord{}, false
|
|
}
|
|
prefix, ok := truncateIP(fields[1], v4bits, v6bits)
|
|
if !ok {
|
|
return LogRecord{}, false
|
|
}
|
|
var asn int32
|
|
if n, err := strconv.ParseInt(fields[8], 10, 32); err == nil {
|
|
asn = int32(n)
|
|
}
|
|
return LogRecord{
|
|
Website: fields[0],
|
|
ClientPrefix: prefix,
|
|
URI: stripQuery(fields[3]),
|
|
Status: fields[4],
|
|
IsTor: fields[7] == "1",
|
|
ASN: asn,
|
|
Method: fields[2],
|
|
BytesSent: parseInt(fields[5]),
|
|
RequestTime: parseFloat(fields[6]),
|
|
SourceTag: fields[9],
|
|
}, true
|
|
}
|
|
|
|
// parseV2 parses the v2 payload (15 tab-separated fields):
|
|
//
|
|
// $host \t $remote_addr \t $request_method \t $request_uri \t $status \t
|
|
// $bytes_sent \t $request_length \t $request_time \t
|
|
// $upstream_response_time \t $upstream_status \t
|
|
// $is_tor \t $asn \t $ipng_source_tag \t $server_addr \t $scheme
|
|
//
|
|
// $upstream_response_time and $upstream_status are "-" (or empty) when nginx
|
|
// served the response directly — HasUpstream is left false in that case.
|
|
// When nginx retried across multiple upstreams the fields are comma-separated;
|
|
// the parser keeps the last entry, since that's the upstream that actually
|
|
// served the response. $server_addr and $scheme are parsed but discarded.
|
|
func parseV2(payload string, v4bits, v6bits int) (LogRecord, bool) {
|
|
fields := strings.Split(payload, "\t")
|
|
if len(fields) != 15 {
|
|
return LogRecord{}, false
|
|
}
|
|
prefix, ok := truncateIP(fields[1], v4bits, v6bits)
|
|
if !ok {
|
|
return LogRecord{}, false
|
|
}
|
|
var asn int32
|
|
if n, err := strconv.ParseInt(fields[11], 10, 32); err == nil {
|
|
asn = int32(n)
|
|
}
|
|
r := LogRecord{
|
|
Website: fields[0],
|
|
ClientPrefix: prefix,
|
|
URI: stripQuery(fields[3]),
|
|
Status: fields[4],
|
|
IsTor: fields[10] == "1",
|
|
ASN: asn,
|
|
Method: fields[2],
|
|
BytesSent: parseInt(fields[5]),
|
|
RequestLength: parseInt(fields[6]),
|
|
RequestTime: parseFloat(fields[7]),
|
|
SourceTag: fields[12],
|
|
}
|
|
if fields[8] != "-" && fields[8] != "" {
|
|
timeStr := lastCommaPart(fields[8])
|
|
statusStr := lastCommaPart(fields[9])
|
|
if t, err := strconv.ParseFloat(timeStr, 64); err == nil {
|
|
r.UpstreamResponseTime = t
|
|
r.UpstreamStatus = statusStr
|
|
r.HasUpstream = true
|
|
}
|
|
}
|
|
return r, true
|
|
}
|
|
|
|
// lastCommaPart returns the substring after the last ", " (nginx's separator
|
|
// for retried upstreams). Plain values pass through unchanged.
|
|
func lastCommaPart(s string) string {
|
|
if i := strings.LastIndex(s, ", "); i >= 0 {
|
|
return s[i+2:]
|
|
}
|
|
return s
|
|
}
|
|
|
|
func stripQuery(uri string) string {
|
|
if i := strings.IndexByte(uri, '?'); i >= 0 {
|
|
return uri[:i]
|
|
}
|
|
return uri
|
|
}
|
|
|
|
func parseInt(s string) int64 {
|
|
n, _ := strconv.ParseInt(s, 10, 64)
|
|
return n
|
|
}
|
|
|
|
func parseFloat(s string) float64 {
|
|
f, _ := strconv.ParseFloat(s, 64)
|
|
return f
|
|
}
|
|
|
|
// truncateIP masks addr to the given prefix length depending on IP version.
|
|
// Returns the CIDR string (e.g. "1.2.3.0/24") and true on success.
|
|
func truncateIP(addr string, v4bits, v6bits int) (string, bool) {
|
|
ip := net.ParseIP(addr)
|
|
if ip == nil {
|
|
return "", false
|
|
}
|
|
|
|
var bits int
|
|
if ip.To4() != nil {
|
|
ip = ip.To4()
|
|
bits = v4bits
|
|
} else {
|
|
ip = ip.To16()
|
|
bits = v6bits
|
|
}
|
|
|
|
mask := net.CIDRMask(bits, len(ip)*8)
|
|
masked := make(net.IP, len(ip))
|
|
for i := range ip {
|
|
masked[i] = ip[i] & mask[i]
|
|
}
|
|
|
|
return fmt.Sprintf("%s/%d", masked.String(), bits), true
|
|
}
|