From 95d96d5e61341e8aca581c616af549a53fe82177 Mon Sep 17 00:00:00 2001 From: Pim van Pelt Date: Tue, 10 Jan 2023 11:24:44 +0100 Subject: [PATCH] bugfix: add a control_ping() before each update If VPP were to disconnect either the Stats Segment or the API endpoint, for example if it crashes and restarts, vpp-snmp-agent will not detect this. In such a situation, it will hold on to the stale stats and no longer receive interface updates. Before each run, send a control_ping() API request, and if that were to fail (for example with Broken Pipe, or Connection Refused), disconnect both API and Stats (in the vpp.disconnect() call, also invalidate the interface and LCP cache), and then fail the update. The Agent runner will then retry once per second until the connection (and control_ping()) succeeds. TESTED: - Start vpp-snmp-agent, it connects and starts up per normal. - Exit / Kill vpp - Upon the next update(), the control_ping() call will fail, causing the agent to disconnect - The agent will now loop: [ERROR ] agentx.agent - update : VPP API: [Errno 1] Sendall error: BrokenPipeError(32, 'Broken pipe'), retrying [WARNING ] agentx.agent - run : Update failed, last successful update was 1673345631.7658572 [INFO ] agentx.vppapi - connect : Connecting to VPP [ERROR ] agentx.agent - update : VPP API: Not connected, api definitions not available, retrying - Start VPP again, when its API endpoint is ready: [INFO ] agentx.vppapi - connect : Connecting to VPP [INFO ] agentx.vppapi - connect : VPP version is 23.02-rc0~199-gcfaf44020 [INFO ] agentx.vppapi - connect : Enabling VPP API interface events [DEBUG ] agentx.agent - update : VPP API: control_ping_reply(_0=24, context=12, retval=0, client_index=0, vpe_pid=705326) [INFO ] agentx.vppapi - get_ifaces : Requesting interfaces from VPP API [INFO ] agentx.vppapi - get_lcp : Requesting LCPs from VPP API - The agent resumes where it left off --- vpp-snmp-agent.py | 9 +++++++-- vppapi.py | 2 ++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/vpp-snmp-agent.py b/vpp-snmp-agent.py index 1049b4a..fa99c8f 100755 --- a/vpp-snmp-agent.py +++ b/vpp-snmp-agent.py @@ -96,9 +96,14 @@ class MyAgent(agentx.Agent): def update(self): try: + self.vpp.connect() + r = self.vpp.vpp.api.control_ping() + self.logger.debug(f"VPP API: {r}") self.vppstat.connect() - except: - self.logger.error("Could not connect to VPPStats segment") + except Exception as e: + self.logger.error(f"VPP API: {e}, retrying") + self.vppstat.disconnect() + self.vpp.disconnect() return False ds = agentx.DataSet() diff --git a/vppapi.py b/vppapi.py index bf86107..6a80788 100644 --- a/vppapi.py +++ b/vppapi.py @@ -78,6 +78,8 @@ class VPPApi: if not self.connected: return True self.vpp.disconnect() + self.iface_dict = None + self.lcp_dict = None self.connected = False return True