Netlink namespaces!

I have been very careless in using the correct network namespace when
manipulating LCP host devices. Around any/every netlink write operation,
we must first clib_setns() into the correct namespace. So, wrap every
call of vnet_netlink_*() in all places.

For consistency, use the convention 'curr_ns_fd' (for the one we are
coming from) and 'vif_ns_fd' (to signal the one that the netlink VIF
is in).

Be careful as well to enter and exit everywhere without losing file
descriptors.
This commit is contained in:
Pim van Pelt
2021-08-13 20:58:28 +02:00
parent 79a395b3c9
commit 72f55fd901
2 changed files with 157 additions and 19 deletions

View File

@ -21,6 +21,8 @@
#include <vnet/vnet.h> #include <vnet/vnet.h>
#include <vnet/plugin/plugin.h> #include <vnet/plugin/plugin.h>
#include <vppinfra/linux/netns.h>
#include <plugins/lcpng/lcpng_interface.h> #include <plugins/lcpng/lcpng_interface.h>
#include <vlibapi/api.h> #include <vlibapi/api.h>
@ -45,6 +47,8 @@ lcp_itf_pair_walk_sync_state_cb (index_t lipi, void *ctx)
lcp_itf_pair_t *lip; lcp_itf_pair_t *lip;
vnet_sw_interface_t *sw; vnet_sw_interface_t *sw;
vnet_sw_interface_t *sup_sw; vnet_sw_interface_t *sup_sw;
int curr_ns_fd = -1;
int vif_ns_fd = -1;
lip = lcp_itf_pair_get (lipi); lip = lcp_itf_pair_get (lipi);
if (!lip) if (!lip)
@ -57,6 +61,14 @@ lcp_itf_pair_walk_sync_state_cb (index_t lipi, void *ctx)
sup_sw = sup_sw =
vnet_get_sw_interface_or_null (vnet_get_main (), sw->sup_sw_if_index); vnet_get_sw_interface_or_null (vnet_get_main (), sw->sup_sw_if_index);
if (lip->lip_namespace)
{
curr_ns_fd = clib_netns_open (NULL /* self */);
vif_ns_fd = clib_netns_open (lip->lip_namespace);
if (vif_ns_fd != -1)
clib_setns (vif_ns_fd);
}
LCP_ITF_PAIR_DBG ("walk_sync_state: lip %U flags %u mtu %u sup-mtu %u", LCP_ITF_PAIR_DBG ("walk_sync_state: lip %U flags %u mtu %u sup-mtu %u",
format_lcp_itf_pair, lip, sw->flags, sw->mtu[VNET_MTU_L3], format_lcp_itf_pair, lip, sw->flags, sw->mtu[VNET_MTU_L3],
sup_sw->mtu[VNET_MTU_L3]); sup_sw->mtu[VNET_MTU_L3]);
@ -85,6 +97,15 @@ lcp_itf_pair_walk_sync_state_cb (index_t lipi, void *ctx)
lcp_itf_set_interface_addr (lip); lcp_itf_set_interface_addr (lip);
} }
if (vif_ns_fd != -1)
close (vif_ns_fd);
if (curr_ns_fd != -1)
{
clib_setns (curr_ns_fd);
close (curr_ns_fd);
}
return WALK_CONTINUE; return WALK_CONTINUE;
} }
@ -95,6 +116,9 @@ lcp_itf_admin_state_change (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
vnet_hw_interface_t *hi; vnet_hw_interface_t *hi;
vnet_sw_interface_t *si; vnet_sw_interface_t *si;
int curr_ns_fd = -1;
int vif_ns_fd = -1;
LCP_ITF_PAIR_DBG ("admin_state_change: sw %U %u", LCP_ITF_PAIR_DBG ("admin_state_change: sw %U %u",
format_vnet_sw_if_index_name, vnm, sw_if_index, format_vnet_sw_if_index_name, vnm, sw_if_index,
flags); flags);
@ -103,9 +127,25 @@ lcp_itf_admin_state_change (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index)); lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
if (!lip) return NULL; if (!lip) return NULL;
if (lip->lip_namespace)
{
curr_ns_fd = clib_netns_open (NULL /* self */);
vif_ns_fd = clib_netns_open (lip->lip_namespace);
if (vif_ns_fd != -1)
clib_setns (vif_ns_fd);
}
LCP_ITF_PAIR_INFO ("admin_state_change: %U flags %u", format_lcp_itf_pair, lip, flags); LCP_ITF_PAIR_INFO ("admin_state_change: %U flags %u", format_lcp_itf_pair, lip, flags);
lcp_itf_set_link_state (lip, (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)); lcp_itf_set_link_state (lip, (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP));
if (vif_ns_fd != -1)
close (vif_ns_fd);
if (curr_ns_fd != -1)
{
clib_setns (curr_ns_fd);
close (curr_ns_fd);
}
// Sync PHY carrier changes into TAP // Sync PHY carrier changes into TAP
hi = vnet_get_hw_interface_or_null (vnm, sw_if_index); hi = vnet_get_hw_interface_or_null (vnm, sw_if_index);
si = vnet_get_sw_interface_or_null (vnm, lip->lip_host_sw_if_index); si = vnet_get_sw_interface_or_null (vnm, lip->lip_host_sw_if_index);
@ -132,6 +172,8 @@ lcp_itf_mtu_change (vnet_main_t *vnm, u32 sw_if_index, u32 flags)
{ {
const lcp_itf_pair_t *lip; const lcp_itf_pair_t *lip;
vnet_sw_interface_t *si; vnet_sw_interface_t *si;
int curr_ns_fd = -1;
int vif_ns_fd = -1;
LCP_ITF_PAIR_DBG ("mtu_change: sw %U %u", format_vnet_sw_if_index_name, vnm, LCP_ITF_PAIR_DBG ("mtu_change: sw %U %u", format_vnet_sw_if_index_name, vnm,
sw_if_index, flags); sw_if_index, flags);
@ -145,9 +187,25 @@ lcp_itf_mtu_change (vnet_main_t *vnm, u32 sw_if_index, u32 flags)
if (!si) if (!si)
return NULL; return NULL;
if (lip->lip_namespace)
{
curr_ns_fd = clib_netns_open (NULL /* self */);
vif_ns_fd = clib_netns_open (lip->lip_namespace);
if (vif_ns_fd != -1)
clib_setns (vif_ns_fd);
}
LCP_ITF_PAIR_INFO ("mtu_change: %U mtu %u", format_lcp_itf_pair, lip, LCP_ITF_PAIR_INFO ("mtu_change: %U mtu %u", format_lcp_itf_pair, lip,
si->mtu[VNET_MTU_L3]); si->mtu[VNET_MTU_L3]);
vnet_netlink_set_link_mtu (lip->lip_vif_index, si->mtu[VNET_MTU_L3]); vnet_netlink_set_link_mtu (lip->lip_vif_index, si->mtu[VNET_MTU_L3]);
if (vif_ns_fd != -1)
close (vif_ns_fd);
if (curr_ns_fd != -1)
{
clib_setns (curr_ns_fd);
close (curr_ns_fd);
}
// When Linux changes MTU on a master interface, all of its children that // When Linux changes MTU on a master interface, all of its children that
// have a higher MTU are clamped to this value. This is not true in VPP, // have a higher MTU are clamped to this value. This is not true in VPP,
@ -305,6 +363,8 @@ lcp_itf_ip4_add_del_interface_addr (ip4_main_t *im, uword opaque,
u32 is_del) u32 is_del)
{ {
const lcp_itf_pair_t *lip; const lcp_itf_pair_t *lip;
int curr_ns_fd = -1;
int vif_ns_fd = -1;
LCP_ITF_PAIR_DBG ("ip4_addr_%s: si:%U %U/%u", is_del ? "del" : "add", LCP_ITF_PAIR_DBG ("ip4_addr_%s: si:%U %U/%u", is_del ? "del" : "add",
format_vnet_sw_if_index_name, vnet_get_main (), format_vnet_sw_if_index_name, vnet_get_main (),
@ -313,6 +373,15 @@ lcp_itf_ip4_add_del_interface_addr (ip4_main_t *im, uword opaque,
lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index)); lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
if (!lip) if (!lip)
return; return;
if (lip->lip_namespace)
{
curr_ns_fd = clib_netns_open (NULL /* self */);
vif_ns_fd = clib_netns_open (lip->lip_namespace);
if (vif_ns_fd != -1)
clib_setns (vif_ns_fd);
}
LCP_ITF_PAIR_ERR ("ip4_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add", LCP_ITF_PAIR_ERR ("ip4_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add",
format_lcp_itf_pair, lip, format_ip4_address, address, format_lcp_itf_pair, lip, format_ip4_address, address,
address_length); address_length);
@ -322,6 +391,14 @@ lcp_itf_ip4_add_del_interface_addr (ip4_main_t *im, uword opaque,
else else
vnet_netlink_add_ip4_addr (lip->lip_vif_index, address, address_length); vnet_netlink_add_ip4_addr (lip->lip_vif_index, address, address_length);
if (vif_ns_fd != -1)
close (vif_ns_fd);
if (curr_ns_fd != -1)
{
clib_setns (curr_ns_fd);
close (curr_ns_fd);
}
return; return;
} }
@ -332,6 +409,8 @@ lcp_itf_ip6_add_del_interface_addr (ip6_main_t *im, uword opaque,
u32 is_del) u32 is_del)
{ {
const lcp_itf_pair_t *lip; const lcp_itf_pair_t *lip;
int curr_ns_fd = -1;
int vif_ns_fd = -1;
LCP_ITF_PAIR_DBG ("ip6_addr_%s: si:%U %U/%u", is_del ? "del" : "add", LCP_ITF_PAIR_DBG ("ip6_addr_%s: si:%U %U/%u", is_del ? "del" : "add",
format_vnet_sw_if_index_name, vnet_get_main (), format_vnet_sw_if_index_name, vnet_get_main (),
@ -340,6 +419,14 @@ lcp_itf_ip6_add_del_interface_addr (ip6_main_t *im, uword opaque,
lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index)); lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
if (!lip) if (!lip)
return; return;
if (lip->lip_namespace)
{
curr_ns_fd = clib_netns_open (NULL /* self */);
vif_ns_fd = clib_netns_open (lip->lip_namespace);
if (vif_ns_fd != -1)
clib_setns (vif_ns_fd);
}
LCP_ITF_PAIR_ERR ("ip6_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add", LCP_ITF_PAIR_ERR ("ip6_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add",
format_lcp_itf_pair, lip, format_ip6_address, address, format_lcp_itf_pair, lip, format_ip6_address, address,
address_length); address_length);
@ -347,4 +434,13 @@ lcp_itf_ip6_add_del_interface_addr (ip6_main_t *im, uword opaque,
vnet_netlink_del_ip6_addr (lip->lip_vif_index, address, address_length); vnet_netlink_del_ip6_addr (lip->lip_vif_index, address, address_length);
else else
vnet_netlink_add_ip6_addr (lip->lip_vif_index, address, address_length); vnet_netlink_add_ip6_addr (lip->lip_vif_index, address, address_length);
if (vif_ns_fd != -1)
close (vif_ns_fd);
if (curr_ns_fd != -1)
{
clib_setns (curr_ns_fd);
close (curr_ns_fd);
}
} }

View File

@ -626,6 +626,16 @@ lcp_itf_set_interface_addr (const lcp_itf_pair_t *lip)
ip_lookup_main_t *lm4 = &im4->lookup_main; ip_lookup_main_t *lm4 = &im4->lookup_main;
ip_lookup_main_t *lm6 = &im6->lookup_main; ip_lookup_main_t *lm6 = &im6->lookup_main;
ip_interface_address_t *ia = 0; ip_interface_address_t *ia = 0;
int vif_ns_fd = -1;
int curr_ns_fd = -1;
if (lip->lip_namespace)
{
curr_ns_fd = clib_netns_open (NULL /* self */);
vif_ns_fd = clib_netns_open (lip->lip_namespace);
if (vif_ns_fd != -1)
clib_setns (vif_ns_fd);
}
/* Display any IP4 addressing info */ /* Display any IP4 addressing info */
foreach_ip_interface_address ( foreach_ip_interface_address (
@ -646,6 +656,15 @@ lcp_itf_set_interface_addr (const lcp_itf_pair_t *lip)
ia->address_length); ia->address_length);
vnet_netlink_add_ip6_addr (lip->lip_vif_index, r6, ia->address_length); vnet_netlink_add_ip6_addr (lip->lip_vif_index, r6, ia->address_length);
})); }));
if (vif_ns_fd != -1)
close (vif_ns_fd);
if (curr_ns_fd != -1)
{
clib_setns (curr_ns_fd);
close (curr_ns_fd);
}
} }
typedef struct typedef struct
@ -676,11 +695,27 @@ lcp_itf_pair_find_walk (vnet_main_t *vnm, u32 sw_if_index, void *arg)
return WALK_CONTINUE; return WALK_CONTINUE;
} }
/* Return the index of the sub-int on thie phy that has the given vlan and proto */ /* Return the index of the sub-int on the phy that has the given vlan and
* proto, optionally in the given 'ns' namespace (which can be NULL, signifying
* the 'self' namespace
*/
static index_t static index_t
lcp_itf_pair_find_by_outer_vlan (u32 hw_if_index, u16 vlan, bool dot1ad) lcp_itf_pair_find_by_outer_vlan (u32 hw_if_index, u8 *ns, u16 vlan,
bool dot1ad)
{ {
lcp_itf_match_t match; lcp_itf_match_t match;
int orig_ns_fd = -1;
int vif_ns_fd = -1;
index_t ret = INDEX_INVALID;
if (ns && ns[0] != 0)
{
orig_ns_fd = clib_netns_open (NULL /* self */);
vif_ns_fd = clib_netns_open (ns);
if (orig_ns_fd == -1 || vif_ns_fd == -1)
goto exit;
clib_setns (vif_ns_fd);
}
clib_memset (&match, 0, sizeof (match)); clib_memset (&match, 0, sizeof (match));
match.vlan = vlan; match.vlan = vlan;
@ -691,8 +726,20 @@ lcp_itf_pair_find_by_outer_vlan (u32 hw_if_index, u16 vlan, bool dot1ad)
vnet_hw_interface_walk_sw (vnet_get_main(), hw_if_index, lcp_itf_pair_find_walk, &match); vnet_hw_interface_walk_sw (vnet_get_main(), hw_if_index, lcp_itf_pair_find_walk, &match);
if (match.matched_sw_if_index >= vec_len (lip_db_by_phy)) if (match.matched_sw_if_index >= vec_len (lip_db_by_phy))
return INDEX_INVALID; {
return lip_db_by_phy[match.matched_sw_if_index]; goto exit;
}
ret = lip_db_by_phy[match.matched_sw_if_index];
exit:
if (orig_ns_fd != -1)
{
clib_setns (orig_ns_fd);
close (orig_ns_fd);
}
if (vif_ns_fd != -1)
close (vif_ns_fd);
return ret;
} }
int int
@ -738,7 +785,7 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
{ {
const lcp_itf_pair_t *llip; const lcp_itf_pair_t *llip;
index_t parent_if_index, linux_parent_if_index; index_t parent_if_index, linux_parent_if_index;
int orig_ns_fd, ns_fd; int orig_ns_fd, vif_ns_fd;
clib_error_t *err; clib_error_t *err;
u16 outer_vlan, inner_vlan; u16 outer_vlan, inner_vlan;
u16 outer_proto, inner_proto; u16 outer_proto, inner_proto;
@ -766,7 +813,9 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
vlan=inner_vlan; vlan=inner_vlan;
proto=inner_proto; proto=inner_proto;
parent_if_index = lcp_itf_pair_find_by_phy (sw->sup_sw_if_index); parent_if_index = lcp_itf_pair_find_by_phy (sw->sup_sw_if_index);
linux_parent_if_index = lcp_itf_pair_find_by_outer_vlan (hw->sw_if_index, sw->sub.eth.outer_vlan_id, sw->sub.eth.flags.dot1ad); linux_parent_if_index = lcp_itf_pair_find_by_outer_vlan (
hw->sw_if_index, ns, sw->sub.eth.outer_vlan_id,
sw->sub.eth.flags.dot1ad);
if (INDEX_INVALID == linux_parent_if_index) { if (INDEX_INVALID == linux_parent_if_index) {
LCP_ITF_PAIR_ERR ("pair_create: can't find LCP for outer vlan %d proto %s on %U", LCP_ITF_PAIR_ERR ("pair_create: can't find LCP for outer vlan %d proto %s on %U",
outer_vlan, outer_proto==ETH_P_8021AD?"dot1ad":"dot1q", outer_vlan, outer_proto==ETH_P_8021AD?"dot1ad":"dot1q",
@ -805,17 +854,17 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
/* /*
* see if the requested host interface has already been created * see if the requested host interface has already been created
*/ */
orig_ns_fd = ns_fd = -1; orig_ns_fd = vif_ns_fd = -1;
err = NULL; err = NULL;
if (ns && ns[0] != 0) if (ns && ns[0] != 0)
{ {
orig_ns_fd = clib_netns_open (NULL /* self */); orig_ns_fd = clib_netns_open (NULL /* self */);
ns_fd = clib_netns_open (ns); vif_ns_fd = clib_netns_open (ns);
if (orig_ns_fd == -1 || ns_fd == -1) if (orig_ns_fd == -1 || vif_ns_fd == -1)
goto socket_close; goto socket_close;
clib_setns (ns_fd); clib_setns (vif_ns_fd);
} }
vif_index = if_nametoindex ((const char *) host_if_name); vif_index = if_nametoindex ((const char *) host_if_name);
@ -832,13 +881,6 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
outer_proto, outer_vlan, inner_proto, inner_vlan, host_if_name); outer_proto, outer_vlan, inner_proto, inner_vlan, host_if_name);
} }
if (!err && -1 != ns_fd) {
err = vnet_netlink_set_link_netns (vif_index, ns_fd, NULL);
if (err != 0) {
LCP_ITF_PAIR_ERR ("pair_create: cannot set link name:'%s' in namespace:'%s'",
host_if_name, ns);
}
}
if (!err) if (!err)
vif_index = if_nametoindex ((char *) host_if_name); vif_index = if_nametoindex ((char *) host_if_name);
} }
@ -860,8 +902,8 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
clib_setns (orig_ns_fd); clib_setns (orig_ns_fd);
close (orig_ns_fd); close (orig_ns_fd);
} }
if (ns_fd != -1) if (vif_ns_fd != -1)
close (ns_fd); close (vif_ns_fd);
if (err) if (err)
return VNET_API_ERROR_INVALID_ARGUMENT; return VNET_API_ERROR_INVALID_ARGUMENT;