Refactor vpp.go to have the connection mgmt and vpp_*.go to have one Manager each

This commit is contained in:
Pim van Pelt
2025-06-24 07:00:52 +02:00
parent 96b9dd501d
commit bdaa2e366b
4 changed files with 364 additions and 206 deletions

View File

@@ -3,164 +3,126 @@
package vpp
import (
"flag"
"time"
"go.fd.io/govpp/adapter/socketclient"
"go.fd.io/govpp/adapter/statsclient"
"go.fd.io/govpp/api"
"go.fd.io/govpp/binapi/vpe"
"go.fd.io/govpp/core"
"govpp-snmp-agentx/logger"
)
// StatsCallback is called when interface stats are retrieved
type StatsCallback func(*api.InterfaceStats)
// Global callback for interface events
var interfaceEventCallback InterfaceEventCallback
var (
// Flags for VPP stats configuration
ApiAddr = flag.String("vppstats.api.addr", "/var/run/vpp/api.sock", "VPP API socket path")
StatsAddr = flag.String("vppstats.stats.addr", "/var/run/vpp/stats.sock", "VPP stats socket path")
IfIndexOffset = flag.Int("vppstats.ifindex-offset", 1000, "Offset to add to VPP interface indices for SNMP")
Period = flag.Int("vppstats.period", 10, "Interval in seconds for querying VPP interface stats")
)
// SetInterfaceEventCallback sets the callback for interface events
func SetInterfaceEventCallback(callback InterfaceEventCallback) {
interfaceEventCallback = callback
// StatsManager handles VPP statistics operations
type StatsManager struct {
client *VPPClient
interfaceManager *InterfaceManager
statsCallback StatsCallback
period time.Duration
running bool
}
// StartStatsRoutine starts a goroutine that queries VPP interface stats at the configured interval
func StartStatsRoutine(callback StatsCallback) {
period := time.Duration(*Period) * time.Second
go statsRoutine(period, callback)
// NewStatsManager creates a new stats manager
func NewStatsManager(client *VPPClient, interfaceManager *InterfaceManager) *StatsManager {
return &StatsManager{
client: client,
interfaceManager: interfaceManager,
period: time.Duration(*Period) * time.Second,
}
}
func statsRoutine(period time.Duration, callback StatsCallback) {
logger.Debugf("Starting VPP stats routine with API: %s, Stats: %s, period: %v", *ApiAddr, *StatsAddr, period)
// SetStatsCallback sets the callback for stats updates
func (sm *StatsManager) SetStatsCallback(callback StatsCallback) {
sm.statsCallback = callback
}
var conn *core.Connection
var statsConn *core.StatsConnection
var connected = false
var wasConnected = false
// SetPeriod sets the polling period for stats
func (sm *StatsManager) SetPeriod(period time.Duration) {
sm.period = period
}
ticker := time.NewTicker(period)
// StartStatsRoutine starts the stats polling routine
func (sm *StatsManager) StartStatsRoutine() {
if sm.running {
logger.Debugf("Stats routine already running")
return
}
sm.running = true
go sm.statsRoutine()
}
// StopStatsRoutine stops the stats polling routine
func (sm *StatsManager) StopStatsRoutine() {
sm.running = false
}
// GetInterfaceStats retrieves current interface statistics
func (sm *StatsManager) GetInterfaceStats() (*api.InterfaceStats, error) {
if !sm.client.IsConnected() {
return nil, &VPPError{Message: "VPP client not connected"}
}
statsConn := sm.client.GetStatsConnection()
if statsConn == nil {
return nil, &VPPError{Message: "Stats connection not available"}
}
stats := new(api.InterfaceStats)
if err := statsConn.GetInterfaceStats(stats); err != nil {
return nil, err
}
return stats, nil
}
// statsRoutine is the main stats polling loop
func (sm *StatsManager) statsRoutine() {
logger.Debugf("Starting VPP stats routine with period: %v", sm.period)
ticker := time.NewTicker(sm.period)
defer ticker.Stop()
defer func() {
// Safely disconnect connections with panic recovery
if conn != nil {
func() {
defer func() {
if r := recover(); r != nil {
logger.Debugf("Recovered from conn.Disconnect panic: %v", r)
}
}()
conn.Disconnect()
}()
}
if statsConn != nil {
func() {
defer func() {
if r := recover(); r != nil {
logger.Debugf("Recovered from statsConn.Disconnect panic: %v", r)
}
}()
statsConn.Disconnect()
}()
}
}()
var wasConnected = false
for {
if !sm.running {
logger.Debugf("Stats routine stopping")
break
}
// Check if we need to connect/reconnect
if !connected {
// Clean up existing connections
if conn != nil {
func() {
defer func() {
if r := recover(); r != nil {
logger.Debugf("Recovered from conn.Disconnect during reconnect: %v", r)
}
}()
conn.Disconnect()
}()
conn = nil
}
if statsConn != nil {
func() {
defer func() {
if r := recover(); r != nil {
logger.Debugf("Recovered from statsConn.Disconnect during reconnect: %v", r)
}
}()
statsConn.Disconnect()
}()
statsConn = nil
if !sm.client.IsConnected() {
if wasConnected {
logger.Printf("VPP connection lost, attempting reconnect...")
wasConnected = false
} else {
logger.Debugf("VPP not connected, attempting connection...")
}
// Create API connection first - only proceed if this succeeds
var err error
conn, err = core.Connect(socketclient.NewVppClient(*ApiAddr))
if err != nil {
if wasConnected {
logger.Printf("VPP API connection lost: %v", err)
wasConnected = false
} else {
logger.Debugf("Failed to connect to VPP API: %v", err)
}
connected = false
if err := sm.client.Connect(); err != nil {
logger.Debugf("Failed to connect to VPP: %v", err)
time.Sleep(time.Second)
continue
}
// Only try stats connection if API connection succeeded
statsClient := statsclient.NewStatsClient(*StatsAddr)
statsConn, err = core.ConnectStats(statsClient)
if err != nil {
logger.Printf("VPP stats connection failed: %v", err)
// Close the API connection since we can't get stats
func() {
defer func() {
if r := recover(); r != nil {
logger.Debugf("Recovered from conn.Disconnect during stats error: %v", r)
}
}()
conn.Disconnect()
}()
conn = nil
connected = false
time.Sleep(time.Second)
continue
}
logger.Printf("Connected to VPP (API: %s, Stats: %s)", *ApiAddr, *StatsAddr)
connected = true
logger.Printf("VPP connection established")
wasConnected = true
// Start watching interface events
logger.Debugf("Creating API channel for interface events...")
ch, err := conn.NewAPIChannel()
if err != nil {
logger.Debugf("Failed to create API channel for interface events: %v", err)
} else {
logger.Debugf("API channel created successfully, calling WatchInterfaceEvents...")
if err := WatchInterfaceEvents(ch, interfaceEventCallback); err != nil {
// Initialize interface event watching
if sm.interfaceManager != nil {
if err := sm.interfaceManager.StartEventWatcher(); err != nil {
logger.Debugf("Failed to start interface event watching: %v", err)
ch.Close()
} else {
logger.Printf("Interface event watching started successfully")
logger.Debugf("Interface event watching started")
// Do initial retrieval of interface details
if interfaceEventCallback != nil {
details, err := GetAllInterfaceDetails(ch)
if err != nil {
logger.Debugf("Failed to get initial interface details: %v", err)
} else {
logger.Debugf("Retrieved initial interface details for %d interfaces", len(details))
interfaceEventCallback(details)
// Get initial interface details
if details, err := sm.interfaceManager.GetAllInterfaceDetails(); err != nil {
logger.Debugf("Failed to get initial interface details: %v", err)
} else {
logger.Debugf("Retrieved initial interface details for %d interfaces", len(details))
if sm.interfaceManager.eventCallback != nil {
sm.interfaceManager.eventCallback(details)
}
}
}
@@ -168,9 +130,10 @@ func statsRoutine(period time.Duration, callback StatsCallback) {
}
// Query stats if connected
if connected {
if !queryInterfaceStats(conn, statsConn, callback) {
connected = false
if sm.client.IsConnected() {
if !sm.queryAndReportStats() {
logger.Printf("Stats query failed, marking connection as lost")
sm.client.Disconnect()
continue
}
}
@@ -178,20 +141,21 @@ func statsRoutine(period time.Duration, callback StatsCallback) {
// Wait for next tick
<-ticker.C
}
logger.Debugf("Stats routine ended")
}
func queryInterfaceStats(conn *core.Connection, statsConn *core.StatsConnection, callback StatsCallback) bool {
// Check VPP liveness using API call
if !checkVPPLiveness(conn) {
logger.Printf("VPP liveness check failed")
// queryAndReportStats queries stats and calls the callback
func (sm *StatsManager) queryAndReportStats() bool {
// Check VPP liveness first
if !sm.client.CheckLiveness() {
logger.Debugf("VPP liveness check failed")
return false
}
// Create the proper struct for interface stats
stats := new(api.InterfaceStats)
// Use the GetInterfaceStats method - this is the correct approach
if err := statsConn.GetInterfaceStats(stats); err != nil {
// Get interface stats
stats, err := sm.GetInterfaceStats()
if err != nil {
logger.Printf("Failed to get interface stats: %v", err)
return false
}
@@ -208,60 +172,9 @@ func queryInterfaceStats(conn *core.Connection, statsConn *core.StatsConnection,
}
// Call the callback to update the MIB
if callback != nil {
callback(stats)
if sm.statsCallback != nil {
sm.statsCallback(stats)
}
return true
}
func checkVPPLiveness(conn *core.Connection) bool {
// Create a channel for the API call
ch, err := conn.NewAPIChannel()
if err != nil {
logger.Debugf("Failed to create API channel: %v", err)
return false
}
// Use a flag to track if channel was closed successfully
var channelClosed bool
defer func() {
if !channelClosed {
// Recover from potential panic when closing already closed channel
defer func() {
if r := recover(); r != nil {
logger.Debugf("Recovered from channel close panic: %v", r)
}
}()
ch.Close()
}
}()
// Create ShowVersion request
req := &vpe.ShowVersion{}
reply := &vpe.ShowVersionReply{}
// Send the request with timeout
if err := ch.SendRequest(req).ReceiveReply(reply); err != nil {
logger.Debugf("VPP ShowVersion failed: %v", err)
// Try to close the channel properly on error
func() {
defer func() {
if r := recover(); r != nil {
logger.Debugf("Channel already closed during error handling")
}
}()
ch.Close()
channelClosed = true
}()
return false
}
// Close channel successfully
ch.Close()
channelClosed = true
// If we got here, VPP is responsive
logger.Debugf("VPP liveness check passed (version: %s)", string(reply.Version))
return true
}