Clamp sub-int MTU to parent's MTU

In VPP, a sub-interface can have a larger MTU than its parent.
In Linux, this cannot happen, so the following is problematic:

```
DBGvpp# create sub TenGigabitEthernet3/0/0 10
DBGvpp# set int mtu packet 1500 TenGigabitEthernet3/0/0
DBGvpp# set int mtu packet 9000 TenGigabitEthernet3/0/0.10
694: e0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UNKNOWN mode DEFAULT group default qlen 1000
695: e0.10@e0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default qlen 1000
```

The best way to ensure this works is to clamp the sub-int to a maximum MTU
of that of its parent, and override the user request to change the VPP
sub-int to anything higher than that, perhaps logging an error explaining
why. This means two things:

1.  Any change in VPP of a child MTU to larger than its parent, should be
    reverted.
1   Any change in VPP of a parent MTU should ensure all children are clamped
    to at most that.
This commit is contained in:
Pim van Pelt
2021-08-13 19:59:55 +02:00
parent f7e1bb951d
commit 79a395b3c9

View File

@ -34,33 +34,53 @@
#include <linux/netlink.h> #include <linux/netlink.h>
#include <linux/rtnetlink.h> #include <linux/rtnetlink.h>
/* walk function to copy forward all sw interface link state flags into /* walk function to copy forward all sw interface link state flags
* their counterpart LIP link state. * MTU, and IP addresses into their counterpart LIP interface.
*
* This is called upon MTU changes and state changes.
*/ */
static walk_rc_t static walk_rc_t
lcp_itf_pair_walk_sync_state_cb (index_t lipi, void *ctx) lcp_itf_pair_walk_sync_state_cb (index_t lipi, void *ctx)
{ {
lcp_itf_pair_t *lip; lcp_itf_pair_t *lip;
vnet_sw_interface_t *phy; vnet_sw_interface_t *sw;
vnet_sw_interface_t *sup_sw;
lip = lcp_itf_pair_get (lipi); lip = lcp_itf_pair_get (lipi);
if (!lip) if (!lip)
return WALK_CONTINUE; return WALK_CONTINUE;
phy = sw =
vnet_get_sw_interface_or_null (vnet_get_main (), lip->lip_phy_sw_if_index); vnet_get_sw_interface_or_null (vnet_get_main (), lip->lip_phy_sw_if_index);
if (!phy) if (!sw)
return WALK_CONTINUE; return WALK_CONTINUE;
sup_sw =
vnet_get_sw_interface_or_null (vnet_get_main (), sw->sup_sw_if_index);
LCP_ITF_PAIR_DBG ("walk_sync_state: lip %U flags %u", format_lcp_itf_pair, LCP_ITF_PAIR_DBG ("walk_sync_state: lip %U flags %u mtu %u sup-mtu %u",
lip, phy->flags); format_lcp_itf_pair, lip, sw->flags, sw->mtu[VNET_MTU_L3],
lcp_itf_set_link_state (lip, (phy->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)); sup_sw->mtu[VNET_MTU_L3]);
lcp_itf_set_link_state (lip, (sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP));
/* Linux will clamp MTU of children when the parent is lower. VPP is fine
* with differing MTUs. Reconcile any differences
*/
if (sup_sw->mtu[VNET_MTU_L3] < sw->mtu[VNET_MTU_L3])
{
LCP_ITF_PAIR_ERR ("walk_sync_state: lip %U flags %u mtu %u sup-mtu %u: "
"clamping to sup-mtu to satisfy netlink",
format_lcp_itf_pair, lip, sw->flags,
sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]);
vnet_sw_interface_set_mtu (vnet_get_main (), sw->sw_if_index,
sup_sw->mtu[VNET_MTU_L3]);
vnet_netlink_set_link_mtu (lip->lip_vif_index, sup_sw->mtu[VNET_MTU_L3]);
}
/* Linux will remove IPv6 addresses on children when the master state /* Linux will remove IPv6 addresses on children when the master state
* goes down, so we ensure all IPv4/IPv6 addresses are set when the phy * goes down, so we ensure all IPv4/IPv6 addresses are set when the phy
* comes back up. * comes back up.
*/ */
if (phy->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) if (sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
{ {
lcp_itf_set_interface_addr (lip); lcp_itf_set_interface_addr (lip);
} }
@ -129,6 +149,13 @@ lcp_itf_mtu_change (vnet_main_t *vnm, u32 sw_if_index, u32 flags)
si->mtu[VNET_MTU_L3]); si->mtu[VNET_MTU_L3]);
vnet_netlink_set_link_mtu (lip->lip_vif_index, si->mtu[VNET_MTU_L3]); vnet_netlink_set_link_mtu (lip->lip_vif_index, si->mtu[VNET_MTU_L3]);
// When Linux changes MTU on a master interface, all of its children that
// have a higher MTU are clamped to this value. This is not true in VPP,
// so we are forced to undo that change by walking the sub-interfaces of
// a phy and syncing their state back into linux.
// For simplicity, just walk all interfaces.
lcp_itf_pair_walk (lcp_itf_pair_walk_sync_state_cb, 0);
return NULL; return NULL;
} }