Move all heap allocation out of the slab-mutex critical section in render_prom/render_json: snapshot cardinality under a brief lock, allocate aggs/snaps/string tables outside the lock, then re-acquire only to deep-copy strings and walk the LRU into the pre-allocated buffers. A worker crash during output buffer allocation can no longer leave the shared-memory zone locked, and a corrupt cardinality count is caught by a 10k sanity cap rather than causing a runaway ngx_pcalloc. Add build-asan and tests/02-asan/: a full sanitizer-instrumented nginx + module built via apt-source, and a 2-node containerlab Robot suite that drives reload storms, concurrent scrape-during-reload, and intern-table growth, failing if AddressSanitizer or UBSan reports anything on stderr. The two Robot suites now check for their required build artifacts up front so `make robot-test` no longer rebuilds them on every invocation. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
62 lines
2.7 KiB
Bash
Executable File
62 lines
2.7 KiB
Bash
Executable File
#!/bin/bash
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
# Server container entrypoint for the ASan test suite. Installs libasan
|
|
# runtime (the sanitizer-instrumented binary was linked against host
|
|
# gcc's libasan.so.8), wires up the data-plane interface, and execs the
|
|
# ASan nginx in the foreground with stderr captured so the Robot suite
|
|
# can grep for AddressSanitizer/UBSan findings at teardown.
|
|
|
|
set -e
|
|
|
|
apt-get update -qq
|
|
apt-get install -y -qq libasan8 libubsan1 ncat iproute2 curl > /dev/null 2>&1
|
|
|
|
# Wait for containerlab to attach the data-plane veth, configure the IP.
|
|
echo "Waiting for eth1 ..."
|
|
while ! ip link show eth1 > /dev/null 2>&1; do
|
|
sleep 0.2
|
|
done
|
|
ip link set eth1 up
|
|
ip addr add 10.0.1.1/24 dev eth1
|
|
|
|
# UDP logtail listener — drains the module's datagrams so sendto() has
|
|
# a real destination. The test doesn't assert on this file's contents
|
|
# (01-module already covers logtail semantics); we just need the socket
|
|
# to exist so ASan sees a complete write/flush cycle in the module.
|
|
mkdir -p /var/log/nginx
|
|
ncat -u -l -k 127.0.0.1 9514 --recv-only >> /var/log/nginx/logtail-udp.log &
|
|
|
|
# ASan options:
|
|
# detect_odr_violation=0 — nginx intentionally duplicates symbols like
|
|
# ngx_module_names between the main binary and each dynamic module.
|
|
# abort_on_error=1, halt_on_error=1 — fail fast so the Robot suite
|
|
# sees the exit status and the ASan report is preserved at the tail
|
|
# of /tmp/nginx.stderr.
|
|
# detect_leaks=0 — nginx exits without running its pool destructors in
|
|
# many paths; leak detection is not the goal here.
|
|
# log_path — ASan writes each finding to this prefix + pid, so even
|
|
# when nginx wipes its own error log on reload the ASan traces
|
|
# survive for post-run inspection.
|
|
ASAN_OPTS="detect_odr_violation=0:abort_on_error=1:halt_on_error=1:detect_leaks=0:log_path=/tmp/asan"
|
|
UBSAN_OPTS="print_stacktrace=1:halt_on_error=0:log_path=/tmp/ubsan"
|
|
|
|
# Wrapper so every subsequent `docker exec ... ngxasan ...` (e.g. the
|
|
# reload signal from the Robot suite) inherits the same sanitizer
|
|
# settings. `docker exec` does not carry the master's env.
|
|
cat > /usr/local/bin/ngxasan <<EOF
|
|
#!/bin/bash
|
|
export ASAN_OPTIONS="${ASAN_OPTS}"
|
|
export UBSAN_OPTIONS="${UBSAN_OPTS}"
|
|
exec /opt/nginx-asan/sbin/nginx -p /opt/nginx-asan -c /opt/nginx-asan/conf/nginx.conf "\$@"
|
|
EOF
|
|
chmod +x /usr/local/bin/ngxasan
|
|
|
|
export ASAN_OPTIONS="${ASAN_OPTS}"
|
|
export UBSAN_OPTIONS="${UBSAN_OPTS}"
|
|
|
|
# Tee stderr so both docker logs and /tmp/nginx.stderr see it. The
|
|
# Robot suite inspects the file; ASan writes its report to stderr
|
|
# before abort_on_error kicks the process.
|
|
exec /opt/nginx-asan/sbin/nginx -p /opt/nginx-asan -c /opt/nginx-asan/conf/nginx.conf \
|
|
2> >(tee /tmp/nginx.stderr >&2)
|