package main import ( "fmt" "net" "strconv" "strings" ) // LogRecord holds the dimensions extracted from a single nginx log line. // // BytesSent carries $body_bytes_sent for v1 records and $bytes_sent for v2 // records — operators see a small step up when emitters move to v2 because v2 // includes header overhead. // // RequestLength, UpstreamResponseTime, UpstreamStatus, HasUpstream are v2-only. // In v1 records HasUpstream is always false and the related fields are zero. type LogRecord struct { Website string ClientPrefix string URI string Status string IsTor bool ASN int32 Method string BytesSent int64 RequestLength int64 RequestTime float64 UpstreamResponseTime float64 UpstreamStatus string HasUpstream bool SourceTag string } // ParseLine parses a versioned nginx-logtail line. Both file ingest and UDP // ingest funnel through here. Every line MUST start with "v\t"; unknown or // missing versions return false so operators can ship a parser update before // the emitter switches. func ParseLine(line string, v4bits, v6bits int) (LogRecord, bool) { i := strings.IndexByte(line, '\t') if i < 0 { return LogRecord{}, false } switch line[:i] { case "v1": return parseV1(line[i+1:], v4bits, v6bits) case "v2": return parseV2(line[i+1:], v4bits, v6bits) default: return LogRecord{}, false } } // parseV1 parses the v1 payload (12 tab-separated fields): // // $host \t $remote_addr \t $request_method \t $request_uri \t $status \t // $body_bytes_sent \t $request_time \t $is_tor \t $asn \t // $ipng_source_tag \t $server_addr \t $scheme // // $server_addr and $scheme are parsed but discarded. func parseV1(payload string, v4bits, v6bits int) (LogRecord, bool) { fields := strings.Split(payload, "\t") if len(fields) != 12 { return LogRecord{}, false } prefix, ok := truncateIP(fields[1], v4bits, v6bits) if !ok { return LogRecord{}, false } var asn int32 if n, err := strconv.ParseInt(fields[8], 10, 32); err == nil { asn = int32(n) } return LogRecord{ Website: fields[0], ClientPrefix: prefix, URI: stripQuery(fields[3]), Status: fields[4], IsTor: fields[7] == "1", ASN: asn, Method: fields[2], BytesSent: parseInt(fields[5]), RequestTime: parseFloat(fields[6]), SourceTag: fields[9], }, true } // parseV2 parses the v2 payload (15 tab-separated fields): // // $host \t $remote_addr \t $request_method \t $request_uri \t $status \t // $bytes_sent \t $request_length \t $request_time \t // $upstream_response_time \t $upstream_status \t // $is_tor \t $asn \t $ipng_source_tag \t $server_addr \t $scheme // // $upstream_response_time and $upstream_status are "-" (or empty) when nginx // served the response directly — HasUpstream is left false in that case. // When nginx retried across multiple upstreams the fields are comma-separated; // the parser keeps the last entry, since that's the upstream that actually // served the response. $server_addr and $scheme are parsed but discarded. func parseV2(payload string, v4bits, v6bits int) (LogRecord, bool) { fields := strings.Split(payload, "\t") if len(fields) != 15 { return LogRecord{}, false } prefix, ok := truncateIP(fields[1], v4bits, v6bits) if !ok { return LogRecord{}, false } var asn int32 if n, err := strconv.ParseInt(fields[11], 10, 32); err == nil { asn = int32(n) } r := LogRecord{ Website: fields[0], ClientPrefix: prefix, URI: stripQuery(fields[3]), Status: fields[4], IsTor: fields[10] == "1", ASN: asn, Method: fields[2], BytesSent: parseInt(fields[5]), RequestLength: parseInt(fields[6]), RequestTime: parseFloat(fields[7]), SourceTag: fields[12], } if fields[8] != "-" && fields[8] != "" { timeStr := lastCommaPart(fields[8]) statusStr := lastCommaPart(fields[9]) if t, err := strconv.ParseFloat(timeStr, 64); err == nil { r.UpstreamResponseTime = t r.UpstreamStatus = statusStr r.HasUpstream = true } } return r, true } // lastCommaPart returns the substring after the last ", " (nginx's separator // for retried upstreams). Plain values pass through unchanged. func lastCommaPart(s string) string { if i := strings.LastIndex(s, ", "); i >= 0 { return s[i+2:] } return s } func stripQuery(uri string) string { if i := strings.IndexByte(uri, '?'); i >= 0 { return uri[:i] } return uri } func parseInt(s string) int64 { n, _ := strconv.ParseInt(s, 10, 64) return n } func parseFloat(s string) float64 { f, _ := strconv.ParseFloat(s, 64) return f } // truncateIP masks addr to the given prefix length depending on IP version. // Returns the CIDR string (e.g. "1.2.3.0/24") and true on success. func truncateIP(addr string, v4bits, v6bits int) (string, bool) { ip := net.ParseIP(addr) if ip == nil { return "", false } var bits int if ip.To4() != nil { ip = ip.To4() bits = v4bits } else { ip = ip.To16() bits = v6bits } mask := net.CIDRMask(bits, len(ip)*8) masked := make(net.IP, len(ip)) for i := range ip { masked[i] = ip[i] & mask[i] } return fmt.Sprintf("%s/%d", masked.String(), bits), true }