From 684400ff9e843ae00dd5dc55aa24418fc0631e34 Mon Sep 17 00:00:00 2001 From: Pim van Pelt Date: Sat, 14 Jan 2023 11:12:06 +0000 Subject: [PATCH] Reduce logging on AgentX connections Previous logging was very noisy when the agent connection to snmpd drops: [ERROR ] agentx.network - run : Empty PDU, connection closed! [INFO ] agentx.network - disconnect : Disconnecting from localhost:705 [ERROR ] agentx.agent - run : An exception occurred: Empty PDU, disconnecting [ERROR ] agentx.agent - run : Reconnecting [INFO ] agentx.agent - run : Opening AgentX connection [INFO ] agentx.network - connect : Connecting to localhost:705 [ERROR ] agentx.network - connect : Failed to connect to localhost:705 [ERROR ] agentx.agent - run : An exception occurred: Not connected [ERROR ] agentx.agent - run : Reconnecting [INFO ] agentx.agent - run : Opening AgentX connection [INFO ] agentx.network - connect : Connecting to localhost:705 [ERROR ] agentx.network - connect : Failed to connect to localhost:705 [ERROR ] agentx.agent - run : An exception occurred: Not connected [ERROR ] agentx.agent - run : Reconnecting Also, reconnects were attempted every 0.1s, but field research shows that snmpd, if it restarts, takes ~3-5 seconds to come back (note: this is also due to a systemd delay in restarting it upon failures). Hammering the connection is not useful. This change refactors the logging, to avoid redundant messages: - sleep 1s between attempts (reducing the loop by 10x) - Either print 'Connected to' or 'Failed to connect to', not both. - Remove the 'reconnecting' superfluous message --- agentx/agent.py | 5 ++--- agentx/network.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/agentx/agent.py b/agentx/agent.py index 706c6dc..cd8233d 100644 --- a/agentx/agent.py +++ b/agentx/agent.py @@ -71,10 +71,9 @@ class Agent(object): try: self._net.run() except Exception as e: - self.logger.error("An exception occurred: %s" % e) - self.logger.error("Reconnecting") + self.logger.error("Disconnecting due to exception: %s" % e) self._net.disconnect() - time.sleep(0.1) + time.sleep(1) def stop(self): self.logger.debug("Stopping") diff --git a/agentx/network.py b/agentx/network.py index 0aff3b5..cda3f1f 100644 --- a/agentx/network.py +++ b/agentx/network.py @@ -44,7 +44,6 @@ class Network: return try: - logger.info("Connecting to %s" % self._server_address) if self._server_address.startswith("/"): self.socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) self.socket.connect(self._server_address) @@ -55,9 +54,10 @@ class Network: self.socket.connect((host, int(port))) self.socket.settimeout(self._timeout) self._connected = True + logger.info("Connected to %s" % self._server_address) except socket.error: - logger.error("Failed to connect to %s" % self._server_address) self._connected = False + logger.error("Failed to connect to %s" % self._server_address) def disconnect(self): if not self._connected: