From a3a5f68926d8faf5694679aac40f8be6e1dfa0c1 Mon Sep 17 00:00:00 2001 From: Pim van Pelt Date: Tue, 24 Aug 2021 18:11:51 +0200 Subject: [PATCH] Add newlink/delink processing. - Can up/down a link. - Can set MAC on a link, if it's a phy. - Can set MTU on a link. - Can delete link (including phy). Because link state and mtu changes tend to go around in circles (from netlink -> vpp; and then with lcp-sync on, as well from vpp -> netlink) when we consume a batch of netlink messages, we'll temporarily turn off lcp-sync if it's enabled. TODO (in the next commit), the whole nine yards of creating interfaces in VPP based on NEWLINK vlans that come in. Conceptualy not too difficult: if NEWLINK doesn't have a LIP associated with it, but it's a VLAN, and the parent of the VLAN is a link which _does_ have a LIP, then we can create the subint in VPP in the correct way. --- README.md | 10 +-- lcpng_netlink.c | 15 +++++ lcpng_netlink.h | 2 + lcpng_nl_sync.c | 162 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 185 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index cf0762e..bf55227 100644 --- a/README.md +++ b/README.md @@ -44,10 +44,10 @@ column shows changes in LInux that are copied into VPP. | Function | VPP -> Linux | Linux -> VPP | | -------------- | ------------- | -------------| -| Up/Down Link | ✅ | 🟠 | -| Change MTU | ✅ | 🟠 | -| Change MAC | ❌ 1) | 🟠 | -| Add/Del IP4/IP6 Address | ✅ | 🟠 | +| Up/Down Link | ✅ | ✅ | +| Change MTU | ✅ | ✅ | +| Change MAC | ❌ 1) | ✅ | +| Add/Del IP4/IP6 Address | ✅ | ✅ | | MPLS | ❌ | ❌ | | Route | ❌ 2) | 🟠 | | Add/Del Tunnel | ❌ | ❌ | @@ -85,6 +85,7 @@ are dis/enabled, by providing the following `startup.conf`: plugins { path ~/src/vpp/build-root/install-vpp_debug-native/vpp/lib/vpp_plugins plugin lcpng_if_plugin.so { enable } + plugin lcpng_nl_plugin.so { enable } plugin linux_cp_plugin.so { disable } } @@ -93,6 +94,7 @@ logging { default-syslog-log-level crit ## Set per-class configuration class linux-cp/if { rate-limit 10000 level debug syslog-level debug } + class linux-cp/nl { rate-limit 10000 level debug syslog-level debug } } lcpng { diff --git a/lcpng_netlink.c b/lcpng_netlink.c index 49b6fef..eabee77 100644 --- a/lcpng_netlink.c +++ b/lcpng_netlink.c @@ -211,6 +211,10 @@ lcp_nl_dispatch (struct nl_object *obj, void *arg) return lcp_nl_addr_add ((struct rtnl_addr *) obj); case RTM_DELADDR: return lcp_nl_addr_del ((struct rtnl_addr *) obj); + case RTM_NEWLINK: + return lcp_nl_link_add ((struct rtnl_link *) obj, arg); + case RTM_DELLINK: + return lcp_nl_link_del ((struct rtnl_link *) obj); default: NL_WARN ("dispatch: ignored %U", format_nl_object, obj); break; @@ -226,6 +230,15 @@ lcp_nl_process_msgs (void) f64 start = vlib_time_now (vlib_get_main ()); u64 usecs = 0; + /* To avoid loops where VPP->LCP sync fights with LCP->VPP + * sync, we turn off the former if it's enabled, while we consume + * the netlink messages in this function, and put it back at the + * end of the function. + */ + lcp_main_t *lcpm = &lcp_main; + u8 old_lcp_sync = lcpm->lcp_sync; + lcpm->lcp_sync = 0; + /* process a batch of messages. break if we hit our batch_size * count limit or batch_delay_ms time limit. * @@ -264,6 +277,8 @@ lcp_nl_process_msgs (void) "process_msgs: Processed %u messages in %llu usecs, %u left in queue", n_msgs, usecs, vec_len (nm->nl_ns.nl_msg_queue)); + lcpm->lcp_sync = old_lcp_sync; + return n_msgs; } diff --git a/lcpng_netlink.h b/lcpng_netlink.h index 1aefaf4..faacbad 100644 --- a/lcpng_netlink.h +++ b/lcpng_netlink.h @@ -85,6 +85,8 @@ void lcp_nl_neigh_add (struct rtnl_neigh *rn); void lcp_nl_neigh_del (struct rtnl_neigh *rn); void lcp_nl_addr_add (struct rtnl_addr *ra); void lcp_nl_addr_del (struct rtnl_addr *ra); +void lcp_nl_link_add (struct rtnl_link *rl, void *ctx); +void lcp_nl_link_del (struct rtnl_link *rl); /* * fd.io coding-style-patch-verification: ON diff --git a/lcpng_nl_sync.c b/lcpng_nl_sync.c index 7b87863..afad96b 100644 --- a/lcpng_nl_sync.c +++ b/lcpng_nl_sync.c @@ -52,6 +52,168 @@ lcp_nl_mk_mac_addr (const struct nl_addr *rna, mac_address_t *mac) mac_address_from_bytes (mac, nl_addr_get_binary_addr (rna)); } +/* + * Check timestamps on netlink message and interface pair to decide whether + * the message should be applied. See the declaration of nl_msg_info_t for + * an explanation on why this is necessary. + * If timestamps are good (message ts is newer than intf pair ts), return 0. + * Else, return -1. + */ +static int +lcp_nl_lip_ts_check (nl_msg_info_t *msg_info, lcp_itf_pair_t *lip) +{ + if (msg_info->ts > lip->lip_create_ts) + return 0; + + NL_DBG ("lip_ts_check: Early message for %U", format_lcp_itf_pair, lip); + return -1; +} + +void +lcp_nl_link_del (struct rtnl_link *rl) +{ + lcp_itf_pair_t *lip; + + NL_DBG ("link_del: netlink %U", format_nl_object, rl); + + if (!(lip = lcp_itf_pair_get ( + lcp_itf_pair_find_by_vif (rtnl_link_get_ifindex (rl))))) + { + NL_WARN ("link_del: no LCP for %U ", format_nl_object, rl); + return; + } + + NL_NOTICE ("link_del: Removing %U", format_lcp_itf_pair, lip); + vlib_worker_thread_barrier_sync (vlib_get_main ()); + lcp_itf_pair_delete (lip->lip_phy_sw_if_index); + vlib_worker_thread_barrier_release (vlib_get_main ()); + + if (rtnl_link_is_vlan (rl)) + { + NL_NOTICE ("link_del: Removing subint %U", format_vnet_sw_if_index_name, + vnet_get_main (), lip->lip_phy_sw_if_index); + vlib_worker_thread_barrier_sync (vlib_get_main ()); + vnet_delete_sub_interface (lip->lip_phy_sw_if_index); + vnet_delete_sub_interface (lip->lip_host_sw_if_index); + vlib_worker_thread_barrier_release (vlib_get_main ()); + } + + return; +} + +static void +lcp_nl_link_set_mtu (struct rtnl_link *rl, lcp_itf_pair_t *lip) +{ + vnet_main_t *vnm = vnet_get_main (); + u32 mtu; + vnet_sw_interface_t *sw; + vnet_hw_interface_t *hw; + + mtu = rtnl_link_get_mtu (rl); + if (!mtu) + return; + + sw = vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index); + hw = vnet_get_sup_hw_interface (vnm, lip->lip_phy_sw_if_index); + if (!sw || !hw) + return; + + /* Set the MTU on the TAP and sw */ + vnet_sw_interface_set_mtu (vnm, lip->lip_host_sw_if_index, mtu); + vnet_sw_interface_set_mtu (vnm, lip->lip_phy_sw_if_index, mtu); +} + +static void +lcp_nl_link_set_lladdr (struct rtnl_link *rl, lcp_itf_pair_t *lip) +{ + vnet_main_t *vnm = vnet_get_main (); + struct nl_addr *mac_addr; + vnet_sw_interface_t *sw; + vnet_hw_interface_t *hw; + void *mac_addr_bytes; + + mac_addr = rtnl_link_get_addr (rl); + if (!mac_addr || (nl_addr_get_family (mac_addr) != AF_LLC)) + return; + + sw = vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index); + hw = vnet_get_sup_hw_interface (vnm, lip->lip_phy_sw_if_index); + if (!sw || !hw) + return; + + /* can only change address on hw interface */ + if (sw->sw_if_index != sw->sup_sw_if_index) + return; + /* can only change if there's an address present */ + if (!vec_len (hw->hw_address)) + return; + + mac_addr_bytes = nl_addr_get_binary_addr (mac_addr); + if (clib_memcmp (mac_addr_bytes, hw->hw_address, nl_addr_get_len (mac_addr))) + vnet_hw_interface_change_mac_address (vnm, hw->hw_if_index, + mac_addr_bytes); + + /* mcast adjacencies need to be updated */ + vnet_update_adjacency_for_sw_interface (vnm, lip->lip_phy_sw_if_index, + lip->lip_phy_adjs.adj_index[AF_IP4]); + vnet_update_adjacency_for_sw_interface (vnm, lip->lip_phy_sw_if_index, + lip->lip_phy_adjs.adj_index[AF_IP6]); +} + +void +lcp_nl_link_add (struct rtnl_link *rl, void *ctx) +{ + vnet_main_t *vnm = vnet_get_main (); + lcp_itf_pair_t *lip; + int admin_state; + + NL_DBG ("link_add: netlink %U", format_nl_object, rl); + + if (!(lip = lcp_itf_pair_get ( + lcp_itf_pair_find_by_vif (rtnl_link_get_ifindex (rl))))) + { + NL_WARN ("link_add: no LCP for %U (see TODO in code)", format_nl_object, + rl); + // TODO(pim) -- here's where auto-creation of sub-int's comes into play + // if this is a nelink vlan interface, its parent may have a LIP, and if + // so, we can auto-create in VPP. + return; + } + + if (lcp_nl_lip_ts_check ((nl_msg_info_t *) ctx, lip)) + return; + + // 0 == unknown; 2 == down; 6 == up; TODO(pim) figure out operstate values + /* + if (2 == rtnl_link_get_operstate(rl)) { + NL_WARN ("link_add: ignoring %U (wrong operstate)", format_nl_object, + rl); return; + } + */ + + admin_state = (IFF_UP & rtnl_link_get_flags (rl)); + vlib_worker_thread_barrier_sync (vlib_get_main ()); + if (admin_state) + { + vnet_sw_interface_admin_up (vnm, lip->lip_host_sw_if_index); + vnet_sw_interface_admin_up (vnm, lip->lip_phy_sw_if_index); + } + else + { + vnet_sw_interface_admin_down (vnm, lip->lip_phy_sw_if_index); + vnet_sw_interface_admin_down (vnm, lip->lip_host_sw_if_index); + } + + lcp_nl_link_set_mtu (rl, lip); + lcp_nl_link_set_lladdr (rl, lip); + vlib_worker_thread_barrier_release (vlib_get_main ()); + + NL_NOTICE ("link_add: %U admin %s", format_lcp_itf_pair, lip, + admin_state ? "up" : "down"); + + return; +} + static const mfib_prefix_t ip4_specials[] = { /* ALL prefixes are in network order */ {