Files
nginx-logtail/cmd/collector/parser.go

144 lines
3.7 KiB
Go

package main
import (
"fmt"
"net"
"strconv"
"strings"
)
// LogRecord holds the dimensions extracted from a single nginx log line.
type LogRecord struct {
Website string
ClientPrefix string
URI string
Status string
IsTor bool
ASN int32
Method string
BodyBytesSent int64
RequestTime float64
SourceTag string
}
// fileSourceTag is the SourceTag assigned to records read from on-disk log
// files, which pre-date the tag concept. Mirrors nginx's fallback label.
const fileSourceTag = "direct"
// ParseLine parses a tab-separated logtail log line from a file:
//
// $host \t $remote_addr \t $msec \t $request_method \t $request_uri \t $status \t $body_bytes_sent \t $request_time \t $is_tor \t $asn
//
// The is_tor (field 9) and asn (field 10) fields are optional for backward
// compatibility with older log files that omit them; they default to false/0
// when absent. SourceTag is always set to "direct" (file origin has no tag).
// Returns false for lines with fewer than 8 fields.
func ParseLine(line string, v4bits, v6bits int) (LogRecord, bool) {
fields := strings.SplitN(line, "\t", 10)
if len(fields) < 8 {
return LogRecord{}, false
}
prefix, ok := truncateIP(fields[1], v4bits, v6bits)
if !ok {
return LogRecord{}, false
}
isTor := len(fields) >= 9 && fields[8] == "1"
var asn int32
if len(fields) == 10 {
if n, err := strconv.ParseInt(fields[9], 10, 32); err == nil {
asn = int32(n)
}
}
return LogRecord{
Website: fields[0],
ClientPrefix: prefix,
URI: stripQuery(fields[4]),
Status: fields[5],
IsTor: isTor,
ASN: asn,
Method: fields[3],
BodyBytesSent: parseInt(fields[6]),
RequestTime: parseFloat(fields[7]),
SourceTag: fileSourceTag,
}, true
}
// ParseUDPLine parses a tab-separated logtail log line from the UDP listener:
//
// $host \t $remote_addr \t $request_method \t $request_uri \t $status \t
// $body_bytes_sent \t $request_time \t $is_tor \t $asn \t
// $ipng_source_tag \t $server_addr \t $scheme
//
// All 12 fields are required. server_addr and scheme are consumed but not
// propagated. Returns false for any malformed packet (wrong field count,
// bad IP).
func ParseUDPLine(line string, v4bits, v6bits int) (LogRecord, bool) {
fields := strings.Split(line, "\t")
if len(fields) != 12 {
return LogRecord{}, false
}
prefix, ok := truncateIP(fields[1], v4bits, v6bits)
if !ok {
return LogRecord{}, false
}
var asn int32
if n, err := strconv.ParseInt(fields[8], 10, 32); err == nil {
asn = int32(n)
}
return LogRecord{
Website: fields[0],
ClientPrefix: prefix,
URI: stripQuery(fields[3]),
Status: fields[4],
IsTor: fields[7] == "1",
ASN: asn,
Method: fields[2],
BodyBytesSent: parseInt(fields[5]),
RequestTime: parseFloat(fields[6]),
SourceTag: fields[9],
}, true
}
func stripQuery(uri string) string {
if i := strings.IndexByte(uri, '?'); i >= 0 {
return uri[:i]
}
return uri
}
func parseInt(s string) int64 {
n, _ := strconv.ParseInt(s, 10, 64)
return n
}
func parseFloat(s string) float64 {
f, _ := strconv.ParseFloat(s, 64)
return f
}
// truncateIP masks addr to the given prefix length depending on IP version.
// Returns the CIDR string (e.g. "1.2.3.0/24") and true on success.
func truncateIP(addr string, v4bits, v6bits int) (string, bool) {
ip := net.ParseIP(addr)
if ip == nil {
return "", false
}
var bits int
if ip.To4() != nil {
ip = ip.To4()
bits = v4bits
} else {
ip = ip.To16()
bits = v6bits
}
mask := net.CIDRMask(bits, len(ip)*8)
masked := make(net.IP, len(ip))
for i := range ip {
masked[i] = ip[i] & mask[i]
}
return fmt.Sprintf("%s/%d", masked.String(), bits), true
}