Add NEWROUTE/DELROUTE handler
This is super complicated work, taken mostly verbatim from the upstream linux-cp Gerrit, with due credit mgsmith@netgate.com neale@grafiant.com First, add main handler lcp_nl_route_add() and lcp_nl_route_del() Introduce two FIB sources: one for manual routes, one for dynamic routes. See lcp_nl_proto_fib_source() fo details. Add a bunch of helpers that translate Netlink message data into VPP primitives: - lcp_nl_mk_addr46() converts a Netlink nl_addr to a VPP ip46_address_t. - lcp_nl_mk_route_prefix() converts a Netlink rtnl_route to a VPP fib_prefix_t. - lcp_nl_mk_route_mprefix() converts a Netlink rtnl_route to a VPP mfib_prefix_t. - lcp_nl_proto_fib_source() selects the most appropciate fib_src by looking at the rt_proto (see /etc/iproute2/rt_protos for a hint). Anything RTPROT_STATIC or better is 'fib_src', while anything above that becomes fib_src_dynamic. - lcp_nl_mk_route_entry_flags() generates fib_entry_flag_t from the Netlink route type, table and proto metadata. - lcp_nl_route_path_parse() converts a Netlink rtnl_nexthop to VPP fib_route_path_t and adds that to a growing list of paths. - lcp_nl_route_path_add_special() adds a blackhole/unreach/prohibit route to the list of paths, in the special-case there is not yet a path for the destination. Now we're ready to insert FIB entries: - lcp_nl_table_find() selects the matching table-id,protocol(v4/v6) from a hash of tables. - lcp_nl_table_add_or_lock() if at table-id,protocol(v4/v6) hasn't been used yet, create one, otherwise increment a table reference counter so we know how many FIB entries we have in this table. Then, return it. - lcp_nl_table_unlock() Decrease the refcount on a table, and if no more prefixes are in the table, remove it from VPP. - lcp_nl_route_del() Remove a route from the given table-id/protocol. Do this by applying rtnl_route_foreach_nexthop() to the list of Netlink nexthops, converting them into VPP paths in a lcp_nl_route_path_parse_t structure. If the route is for unreachable/blackhole/prohibit in Linux, add that path too. Then, remove the VPP paths from the FIB and reduce refcnt or remove the table if it's empty using table_unlock(). - lcp_nl_route_add() Not all routes are relevant for VPP. Those in table 255 are 'local' routes, already set up by ip[46]_address_add(), and some other route types are invalid, skip those. Link-local IPv6 and IPv6 multicast is also skipped. Then, construct lcp_nl_route_path_parse_t by walking the Netlink nexthops, and optionally add a special (in case the route was for unreachable/blackhole/prohibit in Linux -- those won't have a nexthop). Then, insert the VPP paths found in the Netlink message into the FIB or the multicast FIB, respectively. And with that, Bird shoots to life. Both IPv4 and IPv6 OSPF interior gateway protocol and BGP full tables can be consumed, on my bench in about 9 seconds: - A batch of 2048 Netlink messages is handled in 9-11ms, so we can do approx 200K messages/sec at peak (and this will consume 50% CPU due to the yielding logic in lcp_nl_process() (see the 'case NL_EVENT_READ' block that adds a cooldown period of LCP_NL_PROCESS_WAIT milliseconds between batches. - With 3 route reflectors and 2 full BGP peers, at peak I could see 309K messages left in the producer queue. - All IPv4 and IPv6 prefixes made their way into the FIB pim@hippo:~/src/lcpng$ echo -n "IPv6: "; vppctl sh ip6 fib summary | awk '$1~/[0-9]+/ { total += $2 } END { print total }' IPv6: 132506 pim@hippo:~/src/lcpng$ echo -n "IPv4: "; vppctl sh ip fib summary | awk '$1~/[0-9]+/ { total += $2 } END { print total }' IPv4: 869966 - Compared to Bird2's view: pim@hippo:~/src/lcpng$ birdc show route count BIRD 2.0.7 ready. 3477845 of 3477845 routes for 869942 networks in table master4 527887 of 527887 routes for 132484 networks in table master6 Total: 4005732 of 4005732 routes for 1002426 networks in 2 tables - Flipping one of the full feeds to another, forcing a reconvergence of every prefix in the FIB took about 8 seconds, peaking at 242K messages in the queue, with again an average consumption of 2048 messages per 9-10ms. - All of this was done while iperf'ing 6Gbps to and from the controlplane. --- Because handling full BGP table is O(1M) messages, I will have to make some changes in the logging: - all neigh/route messages become DBG/INFO at best - all addr/link messages become INFO/NOTICE at best - when we overflow time/msgs, turn process_msgs into a WARN, otherwise keep it at INFO so as not to spam. In lcpng_interface.c: - Log NOTICE for pair_add() and pair_del() call; - Log NOTICE for set_interface_addr() call; With this approach, setting the logging level of the linux-cp/nl plugin to 'notice' hits the sweet spot: with things that the operator has ~explicitly done, leaving implicit actions (BGP route adds/dels, ARP/ND) to stay below the NOTICE level.
This commit is contained in:
@ -48,7 +48,7 @@ column shows changes in LInux that are copied into VPP.
|
||||
| Change MTU | ✅ | ✅ |
|
||||
| Change MAC | ❌ 1) | ✅ |
|
||||
| Add/Del IP4/IP6 Address | ✅ | ✅ |
|
||||
| Route | ❌ 2) | 🟠 |
|
||||
| Route | ❌ 2) | ✅ |
|
||||
| Add/Del Tunnel | ❌ | ❌ |
|
||||
| Add/Del Phy | ✅ | 🟠 |
|
||||
| Add/Del .1q | ✅ | ✅ |
|
||||
|
@ -227,15 +227,25 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
|
||||
|
||||
lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index);
|
||||
|
||||
LCP_ITF_PAIR_INFO ("pair_add: host:%U phy:%U, host_if:%v vif:%d ns:%s",
|
||||
if (lipi != INDEX_INVALID)
|
||||
return VNET_API_ERROR_VALUE_EXIST;
|
||||
|
||||
if (host_sw_if_index == ~0) {
|
||||
LCP_ITF_PAIR_ERR ("pair_add: Cannot add LIP - invalid host");
|
||||
return VNET_API_ERROR_INVALID_SW_IF_INDEX;
|
||||
}
|
||||
|
||||
if (phy_sw_if_index == ~0) {
|
||||
LCP_ITF_PAIR_ERR ("pair_add: Cannot add LIP - invalid phy");
|
||||
return VNET_API_ERROR_INVALID_SW_IF_INDEX;
|
||||
}
|
||||
|
||||
LCP_ITF_PAIR_NOTICE ("pair_add: Adding LIP for host:%U phy:%U, host_if:%v vif:%d ns:%s",
|
||||
format_vnet_sw_if_index_name, vnet_get_main (),
|
||||
host_sw_if_index, format_vnet_sw_if_index_name,
|
||||
vnet_get_main (), phy_sw_if_index, host_name, host_index,
|
||||
ns);
|
||||
|
||||
if (lipi != INDEX_INVALID)
|
||||
return VNET_API_ERROR_VALUE_EXIST;
|
||||
|
||||
/*
|
||||
* Create a new pair.
|
||||
*/
|
||||
@ -260,9 +270,6 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
|
||||
if (ns && ns[0] != 0)
|
||||
lip->lip_namespace = (u8 *) strdup ((const char *) ns);
|
||||
|
||||
if (lip->lip_host_sw_if_index == ~0)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* First use of this host interface.
|
||||
* Enable the x-connect feature on the host to send
|
||||
@ -406,7 +413,7 @@ lcp_itf_pair_del (u32 phy_sw_if_index)
|
||||
|
||||
lip = lcp_itf_pair_get (lipi);
|
||||
|
||||
LCP_ITF_PAIR_INFO (
|
||||
LCP_ITF_PAIR_NOTICE (
|
||||
"pair_del: host:%U phy:%U host_if:%s vif:%d ns:%s",
|
||||
format_vnet_sw_if_index_name, vnet_get_main (), lip->lip_host_sw_if_index,
|
||||
format_vnet_sw_if_index_name, vnet_get_main (), lip->lip_phy_sw_if_index,
|
||||
@ -673,7 +680,7 @@ lcp_itf_set_interface_addr (const lcp_itf_pair_t *lip)
|
||||
foreach_ip_interface_address (
|
||||
lm4, ia, lip->lip_phy_sw_if_index, 1 /* honor unnumbered */, ({
|
||||
ip4_address_t *r4 = ip_interface_address_get_address (lm4, ia);
|
||||
LCP_ITF_PAIR_INFO ("set_interface_addr: %U add ip4 %U/%d",
|
||||
LCP_ITF_PAIR_NOTICE ("set_interface_addr: %U add ip4 %U/%d",
|
||||
format_lcp_itf_pair, lip, format_ip4_address, r4,
|
||||
ia->address_length);
|
||||
vnet_netlink_add_ip4_addr (lip->lip_vif_index, r4, ia->address_length);
|
||||
@ -683,7 +690,7 @@ lcp_itf_set_interface_addr (const lcp_itf_pair_t *lip)
|
||||
foreach_ip_interface_address (
|
||||
lm6, ia, lip->lip_phy_sw_if_index, 1 /* honor unnumbered */, ({
|
||||
ip6_address_t *r6 = ip_interface_address_get_address (lm6, ia);
|
||||
LCP_ITF_PAIR_INFO ("set_interface_addr: %U add ip6 %U/%d",
|
||||
LCP_ITF_PAIR_NOTICE ("set_interface_addr: %U add ip6 %U/%d",
|
||||
format_lcp_itf_pair, lip, format_ip6_address, r6,
|
||||
ia->address_length);
|
||||
vnet_netlink_add_ip6_addr (lip->lip_vif_index, r6, ia->address_length);
|
||||
@ -842,7 +849,7 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
|
||||
* - if this is an inner VLAN, find the pair from the outer sub-int, which must exist.
|
||||
*/
|
||||
if (inner_vlan) {
|
||||
LCP_ITF_PAIR_INFO ("pair_create: trying to create dot1%s %d inner-dot1q %d on %U",
|
||||
LCP_ITF_PAIR_DBG ("pair_create: trying to create dot1%s %d inner-dot1q %d on %U",
|
||||
sw->sub.eth.flags.dot1ad?"ad":"q", outer_vlan, inner_vlan,
|
||||
format_vnet_sw_if_index_name, vnet_get_main (), hw->sw_if_index);
|
||||
vlan=inner_vlan;
|
||||
@ -858,7 +865,7 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
|
||||
return VNET_API_ERROR_INVALID_SW_IF_INDEX;
|
||||
}
|
||||
} else {
|
||||
LCP_ITF_PAIR_INFO ("pair_create: trying to create dot1%s %d on %U",
|
||||
LCP_ITF_PAIR_DBG ("pair_create: trying to create dot1%s %d on %U",
|
||||
sw->sub.eth.flags.dot1ad?"ad":"q", outer_vlan,
|
||||
format_vnet_sw_if_index_name, vnet_get_main (), hw->sw_if_index);
|
||||
vlan=outer_vlan;
|
||||
|
@ -216,6 +216,10 @@ lcp_nl_dispatch (struct nl_object *obj, void *arg)
|
||||
return lcp_nl_link_add ((struct rtnl_link *) obj, arg);
|
||||
case RTM_DELLINK:
|
||||
return lcp_nl_link_del ((struct rtnl_link *) obj);
|
||||
case RTM_NEWROUTE:
|
||||
return lcp_nl_route_add ((struct rtnl_route *) obj);
|
||||
case RTM_DELROUTE:
|
||||
return lcp_nl_route_del ((struct rtnl_route *) obj);
|
||||
default:
|
||||
NL_WARN ("dispatch: ignored %U", format_nl_object, obj);
|
||||
break;
|
||||
@ -258,13 +262,15 @@ lcp_nl_process_msgs (void)
|
||||
|
||||
if (++n_msgs >= nm->batch_size)
|
||||
{
|
||||
NL_DBG ("process_msgs: batch_size reached");
|
||||
NL_INFO ("process_msgs: batch_size %d reached, yielding",
|
||||
nm->batch_size);
|
||||
break;
|
||||
}
|
||||
usecs = (u64) (1e6 * (vlib_time_now (vlib_get_main ()) - start));
|
||||
if (usecs >= 1e3 * NL_BATCH_DELAY_MS_DEF)
|
||||
if (usecs >= 1e3 * nm->batch_delay_ms)
|
||||
{
|
||||
NL_DBG ("process_msgs: batch_delay_ms reached");
|
||||
NL_INFO ("process_msgs: batch_delay_ms %s reached, yielding",
|
||||
nm->batch_delay_ms);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -274,9 +280,20 @@ lcp_nl_process_msgs (void)
|
||||
vec_delete (nm->nl_ns.nl_msg_queue, n_msgs, 0);
|
||||
|
||||
if (n_msgs > 0)
|
||||
NL_DBG (
|
||||
"process_msgs: Processed %u messages in %llu usecs, %u left in queue",
|
||||
n_msgs, usecs, vec_len (nm->nl_ns.nl_msg_queue));
|
||||
{
|
||||
if (vec_len (nm->nl_ns.nl_msg_queue))
|
||||
{
|
||||
NL_WARN ("process_msgs: Processed %u messages in %llu usecs, %u "
|
||||
"left in queue",
|
||||
n_msgs, usecs, vec_len (nm->nl_ns.nl_msg_queue));
|
||||
}
|
||||
else
|
||||
{
|
||||
NL_INFO ("process_msgs: Processed %u messages in %llu usecs, %u "
|
||||
"left in queue",
|
||||
n_msgs, usecs, vec_len (nm->nl_ns.nl_msg_queue));
|
||||
}
|
||||
}
|
||||
|
||||
lcpm->lcp_sync = old_lcp_sync;
|
||||
|
||||
@ -587,6 +604,13 @@ lcp_nl_init (vlib_main_t *vm)
|
||||
|
||||
lcp_itf_pair_register_vft (&nl_itf_pair_vft);
|
||||
|
||||
/* Add two FIB sources: one for manual routes, one for dynamic routes
|
||||
* See lcp_nl_proto_fib_source() */
|
||||
nm->fib_src =
|
||||
fib_source_allocate ("lcp-rt", FIB_SOURCE_PRIORITY_HI, FIB_SOURCE_BH_API);
|
||||
nm->fib_src_dynamic = fib_source_allocate (
|
||||
"lcp-rt-dynamic", FIB_SOURCE_PRIORITY_HI + 1, FIB_SOURCE_BH_API);
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
|
@ -25,6 +25,9 @@
|
||||
#include <netlink/route/addr.h>
|
||||
#include <netlink/route/link/vlan.h>
|
||||
|
||||
#include <vnet/fib/fib_table.h>
|
||||
#include <vnet/mfib/mfib_table.h>
|
||||
|
||||
typedef enum nl_event_type_t_
|
||||
{
|
||||
NL_EVENT_READ,
|
||||
@ -61,6 +64,15 @@ typedef struct lcp_nl_netlink_namespace
|
||||
u8 netns_name[LCP_NS_LEN]; // namespace name (can be empty, for 'self')
|
||||
} lcp_nl_netlink_namespace_t;
|
||||
|
||||
typedef struct lcp_nl_table_t_
|
||||
{
|
||||
uint32_t nlt_id;
|
||||
fib_protocol_t nlt_proto;
|
||||
u32 nlt_fib_index;
|
||||
u32 nlt_mfib_index;
|
||||
u32 nlt_refs;
|
||||
} lcp_nl_table_t;
|
||||
|
||||
typedef struct lcp_nl_main
|
||||
{
|
||||
vlib_log_class_t nl_logger;
|
||||
@ -69,14 +81,30 @@ typedef struct lcp_nl_main
|
||||
*/
|
||||
lcp_nl_netlink_namespace_t nl_ns;
|
||||
|
||||
fib_source_t fib_src; // For static routes set manually
|
||||
fib_source_t
|
||||
fib_src_dynamic; // For routes set by routing software (Bird, FRR, etc)
|
||||
uword *table_db[FIB_PROTOCOL_MAX];
|
||||
lcp_nl_table_t *table_pool;
|
||||
|
||||
u32 rx_buf_size;
|
||||
u32 tx_buf_size;
|
||||
u32 batch_size;
|
||||
u32 batch_delay_ms;
|
||||
|
||||
} lcp_nl_main_t;
|
||||
|
||||
extern lcp_nl_main_t lcp_nl_main;
|
||||
|
||||
typedef struct lcp_nl_route_path_parse_t_
|
||||
{
|
||||
fib_route_path_t *paths;
|
||||
fib_protocol_t route_proto;
|
||||
bool is_mcast;
|
||||
fib_route_path_flags_t type_flags;
|
||||
u8 preference;
|
||||
} lcp_nl_route_path_parse_t;
|
||||
|
||||
u8 *format_nl_object (u8 *s, va_list *args);
|
||||
|
||||
/* Functions from lcpng_nl_sync.c
|
||||
@ -87,6 +115,8 @@ void lcp_nl_addr_add (struct rtnl_addr *ra);
|
||||
void lcp_nl_addr_del (struct rtnl_addr *ra);
|
||||
void lcp_nl_link_add (struct rtnl_link *rl, void *ctx);
|
||||
void lcp_nl_link_del (struct rtnl_link *rl);
|
||||
void lcp_nl_route_add (struct rtnl_route *rr);
|
||||
void lcp_nl_route_del (struct rtnl_route *rr);
|
||||
|
||||
/*
|
||||
* fd.io coding-style-patch-verification: ON
|
||||
|
494
lcpng_nl_sync.c
494
lcpng_nl_sync.c
@ -39,6 +39,66 @@
|
||||
NUD_DELAY)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Map of supported route types. Some types are omitted:
|
||||
* RTN_LOCAL - interface address addition creates these automatically
|
||||
* RTN_BROADCAST - same as RTN_LOCAL
|
||||
* RTN_UNSPEC, RTN_ANYCAST, RTN_THROW, RTN_NAT, RTN_XRESOLVE -
|
||||
* There's not a VPP equivalent for these currently.
|
||||
*/
|
||||
const static u8 lcp_nl_route_type_valid[__RTN_MAX] = {
|
||||
[RTN_UNICAST] = 1, [RTN_MULTICAST] = 1, [RTN_BLACKHOLE] = 1,
|
||||
[RTN_UNREACHABLE] = 1, [RTN_PROHIBIT] = 1,
|
||||
};
|
||||
|
||||
/* Map of fib entry flags by route type */
|
||||
const static fib_entry_flag_t lcp_nl_route_type_feflags[__RTN_MAX] = {
|
||||
[RTN_LOCAL] = FIB_ENTRY_FLAG_LOCAL | FIB_ENTRY_FLAG_CONNECTED,
|
||||
[RTN_BROADCAST] = FIB_ENTRY_FLAG_DROP | FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT,
|
||||
[RTN_BLACKHOLE] = FIB_ENTRY_FLAG_DROP,
|
||||
};
|
||||
|
||||
/* Map of fib route path flags by route type */
|
||||
const static fib_route_path_flags_t lcp_nl_route_type_frpflags[__RTN_MAX] = {
|
||||
[RTN_UNREACHABLE] = FIB_ROUTE_PATH_ICMP_UNREACH,
|
||||
[RTN_PROHIBIT] = FIB_ROUTE_PATH_ICMP_PROHIBIT,
|
||||
[RTN_BLACKHOLE] = FIB_ROUTE_PATH_DROP,
|
||||
};
|
||||
|
||||
const static fib_prefix_t pfx_all1s = {
|
||||
.fp_addr = {
|
||||
.ip4 = {
|
||||
.as_u32 = 0xffffffff,
|
||||
}
|
||||
},
|
||||
.fp_proto = FIB_PROTOCOL_IP4,
|
||||
.fp_len = 32,
|
||||
};
|
||||
|
||||
const static mfib_prefix_t ip4_specials[] = {
|
||||
/* ALL prefixes are in network order */
|
||||
{
|
||||
/* (*,224.0.0.0)/24 - all local subnet */
|
||||
.fp_grp_addr = {
|
||||
.ip4.data_u32 = 0x000000e0,
|
||||
},
|
||||
.fp_len = 24,
|
||||
.fp_proto = FIB_PROTOCOL_IP4,
|
||||
},
|
||||
};
|
||||
|
||||
const static mfib_prefix_t ip6_specials[] = {
|
||||
/* ALL prefixes are in network order */
|
||||
{
|
||||
/* (*,ff00::)/8 - all local subnet */
|
||||
.fp_grp_addr = {
|
||||
.ip6.as_u64[0] = 0x00000000000000ff,
|
||||
},
|
||||
.fp_len = 8,
|
||||
.fp_proto = FIB_PROTOCOL_IP6,
|
||||
},
|
||||
};
|
||||
|
||||
static void
|
||||
lcp_nl_mk_ip_addr (const struct nl_addr *rna, ip_address_t *ia)
|
||||
{
|
||||
@ -85,6 +145,397 @@ vnet_sw_interface_get_available_subid (vnet_main_t *vnm, u32 sw_if_index,
|
||||
return 1;
|
||||
}
|
||||
|
||||
static fib_protocol_t
|
||||
lcp_nl_mk_addr46 (const struct nl_addr *rna, ip46_address_t *ia)
|
||||
{
|
||||
fib_protocol_t fproto;
|
||||
|
||||
fproto =
|
||||
nl_addr_get_family (rna) == AF_INET6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4;
|
||||
ip46_address_reset (ia);
|
||||
if (FIB_PROTOCOL_IP4 == fproto)
|
||||
memcpy (&ia->ip4, nl_addr_get_binary_addr (rna), nl_addr_get_len (rna));
|
||||
else
|
||||
memcpy (&ia->ip6, nl_addr_get_binary_addr (rna), nl_addr_get_len (rna));
|
||||
|
||||
return (fproto);
|
||||
}
|
||||
|
||||
static void
|
||||
lcp_nl_mk_route_prefix (struct rtnl_route *r, fib_prefix_t *p)
|
||||
{
|
||||
const struct nl_addr *addr = rtnl_route_get_dst (r);
|
||||
|
||||
p->fp_len = nl_addr_get_prefixlen (addr);
|
||||
p->fp_proto = lcp_nl_mk_addr46 (addr, &p->fp_addr);
|
||||
}
|
||||
|
||||
static void
|
||||
lcp_nl_mk_route_mprefix (struct rtnl_route *r, mfib_prefix_t *p)
|
||||
{
|
||||
const struct nl_addr *addr;
|
||||
|
||||
addr = rtnl_route_get_dst (r);
|
||||
|
||||
p->fp_len = nl_addr_get_prefixlen (addr);
|
||||
p->fp_proto = lcp_nl_mk_addr46 (addr, &p->fp_grp_addr);
|
||||
|
||||
addr = rtnl_route_get_src (r);
|
||||
if (addr)
|
||||
p->fp_proto = lcp_nl_mk_addr46 (addr, &p->fp_src_addr);
|
||||
}
|
||||
|
||||
static inline fib_source_t
|
||||
lcp_nl_proto_fib_source (u8 rt_proto)
|
||||
{
|
||||
lcp_nl_main_t *nlm = &lcp_nl_main;
|
||||
|
||||
/* See /etc/iproute2/rt_protos for the list */
|
||||
return (rt_proto <= RTPROT_STATIC) ? nlm->fib_src : nlm->fib_src_dynamic;
|
||||
}
|
||||
|
||||
static fib_entry_flag_t
|
||||
lcp_nl_mk_route_entry_flags (uint8_t rtype, int table_id, uint8_t rproto)
|
||||
{
|
||||
fib_entry_flag_t fef = FIB_ENTRY_FLAG_NONE;
|
||||
|
||||
fef |= lcp_nl_route_type_feflags[rtype];
|
||||
if ((rproto == RTPROT_KERNEL) || PREDICT_FALSE (255 == table_id))
|
||||
/* kernel proto is interface prefixes, 255 is linux's 'local' table */
|
||||
fef |= FIB_ENTRY_FLAG_ATTACHED | FIB_ENTRY_FLAG_CONNECTED;
|
||||
|
||||
return (fef);
|
||||
}
|
||||
|
||||
static void
|
||||
lcp_nl_route_path_parse (struct rtnl_nexthop *rnh, void *arg)
|
||||
{
|
||||
lcp_nl_route_path_parse_t *ctx = arg;
|
||||
fib_route_path_t *path;
|
||||
lcp_itf_pair_t *lip;
|
||||
fib_protocol_t fproto;
|
||||
struct nl_addr *addr;
|
||||
|
||||
/* We do not log a warning/error here, because some routes (like
|
||||
* blackhole/unreach) don't have an interface associated with them.
|
||||
*/
|
||||
if (!(lip = lcp_itf_pair_get (
|
||||
lcp_itf_pair_find_by_vif (rtnl_route_nh_get_ifindex (rnh)))))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
vec_add2 (ctx->paths, path, 1);
|
||||
|
||||
path->frp_flags = FIB_ROUTE_PATH_FLAG_NONE | ctx->type_flags;
|
||||
path->frp_sw_if_index = lip->lip_phy_sw_if_index;
|
||||
path->frp_weight = rtnl_route_nh_get_weight (rnh);
|
||||
path->frp_preference = ctx->preference;
|
||||
|
||||
addr = rtnl_route_nh_get_gateway (rnh);
|
||||
|
||||
if (addr)
|
||||
fproto =
|
||||
lcp_nl_mk_addr46 (rtnl_route_nh_get_gateway (rnh), &path->frp_addr);
|
||||
else
|
||||
fproto = ctx->route_proto;
|
||||
|
||||
path->frp_proto = fib_proto_to_dpo (fproto);
|
||||
|
||||
if (ctx->is_mcast)
|
||||
path->frp_mitf_flags = MFIB_ITF_FLAG_FORWARD;
|
||||
|
||||
NL_DBG ("route_path_parse: path %U", format_fib_route_path, path);
|
||||
}
|
||||
|
||||
/*
|
||||
* blackhole, unreachable, prohibit will not have a next hop in an
|
||||
* RTM_NEWROUTE. Add a path for them.
|
||||
*/
|
||||
static void
|
||||
lcp_nl_route_path_add_special (struct rtnl_route *rr,
|
||||
lcp_nl_route_path_parse_t *ctx)
|
||||
{
|
||||
fib_route_path_t *path;
|
||||
|
||||
if (rtnl_route_get_type (rr) < RTN_BLACKHOLE)
|
||||
return;
|
||||
|
||||
/* if it already has a path, it does not need us to add one */
|
||||
if (vec_len (ctx->paths) > 0)
|
||||
return;
|
||||
|
||||
vec_add2 (ctx->paths, path, 1);
|
||||
|
||||
path->frp_flags = FIB_ROUTE_PATH_FLAG_NONE | ctx->type_flags;
|
||||
path->frp_sw_if_index = ~0;
|
||||
path->frp_proto = fib_proto_to_dpo (ctx->route_proto);
|
||||
path->frp_preference = ctx->preference;
|
||||
|
||||
NL_DBG ("route_path_add_special: path %U", format_fib_route_path, path);
|
||||
}
|
||||
|
||||
static lcp_nl_table_t *
|
||||
lcp_nl_table_find (uint32_t id, fib_protocol_t fproto)
|
||||
{
|
||||
lcp_nl_main_t *nlm = &lcp_nl_main;
|
||||
uword *p;
|
||||
|
||||
p = hash_get (nlm->table_db[fproto], id);
|
||||
|
||||
if (p)
|
||||
return pool_elt_at_index (nlm->table_pool, p[0]);
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
lcp_nl_table_k2f (uint32_t k)
|
||||
{
|
||||
// the kernel's table ID 255 is the default table
|
||||
if (k == 255 || k == 254)
|
||||
return 0;
|
||||
return k;
|
||||
}
|
||||
|
||||
static lcp_nl_table_t *
|
||||
lcp_nl_table_add_or_lock (uint32_t id, fib_protocol_t fproto)
|
||||
{
|
||||
lcp_nl_table_t *nlt;
|
||||
lcp_nl_main_t *nlm = &lcp_nl_main;
|
||||
|
||||
id = lcp_nl_table_k2f (id);
|
||||
nlt = lcp_nl_table_find (id, fproto);
|
||||
|
||||
if (NULL == nlt)
|
||||
{
|
||||
pool_get_zero (nlm->table_pool, nlt);
|
||||
|
||||
nlt->nlt_id = id;
|
||||
nlt->nlt_proto = fproto;
|
||||
|
||||
nlt->nlt_fib_index = fib_table_find_or_create_and_lock (
|
||||
nlt->nlt_proto, nlt->nlt_id, nlm->fib_src);
|
||||
nlt->nlt_mfib_index = mfib_table_find_or_create_and_lock (
|
||||
nlt->nlt_proto, nlt->nlt_id, MFIB_SOURCE_PLUGIN_LOW);
|
||||
|
||||
hash_set (nlm->table_db[fproto], nlt->nlt_id, nlt - nlm->table_pool);
|
||||
|
||||
if (FIB_PROTOCOL_IP4 == fproto)
|
||||
{
|
||||
/* Set the all 1s address in this table to punt */
|
||||
fib_table_entry_special_add (nlt->nlt_fib_index, &pfx_all1s,
|
||||
nlm->fib_src, FIB_ENTRY_FLAG_LOCAL);
|
||||
|
||||
const fib_route_path_t path = {
|
||||
.frp_proto = DPO_PROTO_IP4,
|
||||
.frp_addr = zero_addr,
|
||||
.frp_sw_if_index = ~0,
|
||||
.frp_fib_index = ~0,
|
||||
.frp_weight = 1,
|
||||
.frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
|
||||
.frp_flags = FIB_ROUTE_PATH_LOCAL,
|
||||
};
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < ARRAY_LEN (ip4_specials); ii++)
|
||||
{
|
||||
mfib_table_entry_path_update (nlt->nlt_mfib_index,
|
||||
&ip4_specials[ii],
|
||||
MFIB_SOURCE_PLUGIN_LOW, &path);
|
||||
}
|
||||
}
|
||||
else if (FIB_PROTOCOL_IP6 == fproto)
|
||||
{
|
||||
const fib_route_path_t path = {
|
||||
.frp_proto = DPO_PROTO_IP6,
|
||||
.frp_addr = zero_addr,
|
||||
.frp_sw_if_index = ~0,
|
||||
.frp_fib_index = ~0,
|
||||
.frp_weight = 1,
|
||||
.frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
|
||||
.frp_flags = FIB_ROUTE_PATH_LOCAL,
|
||||
};
|
||||
int ii;
|
||||
|
||||
for (ii = 0; ii < ARRAY_LEN (ip6_specials); ii++)
|
||||
{
|
||||
mfib_table_entry_path_update (nlt->nlt_mfib_index,
|
||||
&ip6_specials[ii],
|
||||
MFIB_SOURCE_PLUGIN_LOW, &path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nlt->nlt_refs++;
|
||||
|
||||
return (nlt);
|
||||
}
|
||||
|
||||
static void
|
||||
lcp_nl_table_unlock (lcp_nl_table_t *nlt)
|
||||
{
|
||||
lcp_nl_main_t *nlm = &lcp_nl_main;
|
||||
nlt->nlt_refs--;
|
||||
|
||||
if (0 == nlt->nlt_refs)
|
||||
{
|
||||
if (FIB_PROTOCOL_IP4 == nlt->nlt_proto)
|
||||
{
|
||||
/* Remove the all 1s address in this table to punt */
|
||||
fib_table_entry_special_remove (nlt->nlt_fib_index, &pfx_all1s,
|
||||
nlm->fib_src);
|
||||
}
|
||||
|
||||
fib_table_unlock (nlt->nlt_fib_index, nlt->nlt_proto, nlm->fib_src);
|
||||
|
||||
hash_unset (nlm->table_db[nlt->nlt_proto], nlt->nlt_id);
|
||||
pool_put (nlm->table_pool, nlt);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
lcp_nl_route_del (struct rtnl_route *rr)
|
||||
{
|
||||
uint32_t table_id;
|
||||
fib_prefix_t pfx;
|
||||
lcp_nl_table_t *nlt;
|
||||
uint8_t rtype, rproto;
|
||||
|
||||
NL_DBG ("route_del: netlink %U", format_nl_object, rr);
|
||||
|
||||
rtype = rtnl_route_get_type (rr);
|
||||
table_id = rtnl_route_get_table (rr);
|
||||
rproto = rtnl_route_get_protocol (rr);
|
||||
|
||||
/* skip unsupported route types and local table */
|
||||
if (!lcp_nl_route_type_valid[rtype] || (table_id == 255))
|
||||
return;
|
||||
|
||||
lcp_nl_mk_route_prefix (rr, &pfx);
|
||||
nlt = lcp_nl_table_find (lcp_nl_table_k2f (table_id), pfx.fp_proto);
|
||||
|
||||
if (NULL == nlt)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
lcp_nl_route_path_parse_t np = {
|
||||
.route_proto = pfx.fp_proto,
|
||||
.type_flags = lcp_nl_route_type_frpflags[rtype],
|
||||
};
|
||||
|
||||
rtnl_route_foreach_nexthop (rr, lcp_nl_route_path_parse, &np);
|
||||
lcp_nl_route_path_add_special (rr, &np);
|
||||
|
||||
if (0 != vec_len (np.paths))
|
||||
{
|
||||
fib_source_t fib_src = lcp_nl_proto_fib_source (rproto);
|
||||
fib_entry_flag_t entry_flags;
|
||||
|
||||
entry_flags = lcp_nl_mk_route_entry_flags (rtype, table_id, rproto);
|
||||
NL_INFO ("route_del: table %d prefix %U flags %U",
|
||||
rtnl_route_get_table (rr), format_fib_prefix, &pfx,
|
||||
format_fib_entry_flags, entry_flags);
|
||||
if (pfx.fp_proto == FIB_PROTOCOL_IP6)
|
||||
fib_table_entry_delete (nlt->nlt_fib_index, &pfx, fib_src);
|
||||
else
|
||||
fib_table_entry_path_remove2 (nlt->nlt_fib_index, &pfx, fib_src,
|
||||
np.paths);
|
||||
}
|
||||
|
||||
vec_free (np.paths);
|
||||
|
||||
lcp_nl_table_unlock (nlt);
|
||||
}
|
||||
|
||||
void
|
||||
lcp_nl_route_add (struct rtnl_route *rr)
|
||||
{
|
||||
fib_entry_flag_t entry_flags;
|
||||
uint32_t table_id;
|
||||
fib_prefix_t pfx;
|
||||
lcp_nl_table_t *nlt;
|
||||
uint8_t rtype, rproto;
|
||||
|
||||
NL_DBG ("route_add: netlink %U", format_nl_object, rr);
|
||||
|
||||
rtype = rtnl_route_get_type (rr);
|
||||
table_id = rtnl_route_get_table (rr);
|
||||
rproto = rtnl_route_get_protocol (rr);
|
||||
|
||||
/* skip unsupported route types and local table */
|
||||
if (!lcp_nl_route_type_valid[rtype] || (table_id == 255))
|
||||
return;
|
||||
|
||||
lcp_nl_mk_route_prefix (rr, &pfx);
|
||||
entry_flags = lcp_nl_mk_route_entry_flags (rtype, table_id, rproto);
|
||||
|
||||
/* link local IPv6 */
|
||||
if (FIB_PROTOCOL_IP6 == pfx.fp_proto &&
|
||||
(ip6_address_is_multicast (&pfx.fp_addr.ip6) ||
|
||||
ip6_address_is_link_local_unicast (&pfx.fp_addr.ip6)))
|
||||
{
|
||||
NL_DBG ("route_add: skip table %d prefix %U flags %U",
|
||||
rtnl_route_get_table (rr), format_fib_prefix, &pfx,
|
||||
format_fib_entry_flags, entry_flags);
|
||||
return;
|
||||
}
|
||||
lcp_nl_route_path_parse_t np = {
|
||||
.route_proto = pfx.fp_proto,
|
||||
.is_mcast = (rtype == RTN_MULTICAST),
|
||||
.type_flags = lcp_nl_route_type_feflags[rtype],
|
||||
.preference = (u8) rtnl_route_get_priority (rr),
|
||||
};
|
||||
|
||||
rtnl_route_foreach_nexthop (rr, lcp_nl_route_path_parse, &np);
|
||||
|
||||
lcp_nl_route_path_add_special (rr, &np);
|
||||
|
||||
if (0 != vec_len (np.paths))
|
||||
{
|
||||
nlt = lcp_nl_table_add_or_lock (table_id, pfx.fp_proto);
|
||||
if (rtype == RTN_MULTICAST)
|
||||
{
|
||||
/* it's not clear to me how linux expresses the RPF paramters
|
||||
* so we'll allow from all interfaces and hope for the best */
|
||||
mfib_prefix_t mpfx = {};
|
||||
|
||||
lcp_nl_mk_route_mprefix (rr, &mpfx);
|
||||
|
||||
NL_INFO ("route_add: mcast table %d prefix %U flags %U",
|
||||
rtnl_route_get_table (rr), format_mfib_prefix, &mpfx,
|
||||
format_fib_entry_flags, entry_flags);
|
||||
mfib_table_entry_update (nlt->nlt_mfib_index, &mpfx,
|
||||
MFIB_SOURCE_PLUGIN_LOW, MFIB_RPF_ID_NONE,
|
||||
MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF);
|
||||
|
||||
mfib_table_entry_paths_update (nlt->nlt_mfib_index, &mpfx,
|
||||
MFIB_SOURCE_PLUGIN_LOW, np.paths);
|
||||
}
|
||||
else
|
||||
{
|
||||
fib_source_t fib_src = lcp_nl_proto_fib_source (rproto);
|
||||
|
||||
NL_INFO ("route_add: table %d prefix %U flags %U",
|
||||
rtnl_route_get_table (rr), format_fib_prefix, &pfx,
|
||||
format_fib_entry_flags, entry_flags);
|
||||
|
||||
if (pfx.fp_proto == FIB_PROTOCOL_IP6)
|
||||
fib_table_entry_path_add2 (nlt->nlt_fib_index, &pfx, fib_src,
|
||||
entry_flags, np.paths);
|
||||
else
|
||||
fib_table_entry_update (nlt->nlt_fib_index, &pfx, fib_src,
|
||||
entry_flags, np.paths);
|
||||
}
|
||||
}
|
||||
else
|
||||
NL_ERROR ("route_add: no paths table %d prefix %U flags %U",
|
||||
rtnl_route_get_table (rr), format_fib_prefix, &pfx,
|
||||
format_fib_entry_flags, entry_flags);
|
||||
vec_free (np.paths);
|
||||
}
|
||||
|
||||
// Returns the LIP for a newly created sub-int pair, or
|
||||
// NULL in case no sub-int could be created.
|
||||
static lcp_itf_pair_t *
|
||||
@ -176,7 +627,7 @@ lcp_nl_link_add_vlan (struct rtnl_link *rl)
|
||||
}
|
||||
|
||||
vlib_worker_thread_barrier_sync (vlib_get_main ());
|
||||
NL_NOTICE (
|
||||
NL_INFO (
|
||||
"link_add_vlan: creating subid %u outer %u inner %u flags %u on phy %U",
|
||||
subid, outer_vlan, inner_vlan, flags, format_vnet_sw_if_index_name, vnm,
|
||||
parent_sw->sup_sw_if_index);
|
||||
@ -206,7 +657,7 @@ lcp_nl_link_add_vlan (struct rtnl_link *rl)
|
||||
lcpm->lcp_auto_subint = old_lcp_auto_subint;
|
||||
return NULL;
|
||||
}
|
||||
NL_NOTICE ("link_add_vlan: creating subid %u outer %u inner %u flags %u on "
|
||||
NL_INFO ("link_add_vlan: creating subid %u outer %u inner %u flags %u on "
|
||||
"host %U phy %U",
|
||||
subid, outer_vlan, inner_vlan, flags,
|
||||
format_vnet_sw_if_index_name, vnm,
|
||||
@ -373,24 +824,12 @@ lcp_nl_link_add (struct rtnl_link *rl, void *ctx)
|
||||
lcp_nl_link_set_lladdr (rl, lip);
|
||||
vlib_worker_thread_barrier_release (vlib_get_main ());
|
||||
|
||||
NL_NOTICE ("link_add: %U admin %s", format_lcp_itf_pair, lip,
|
||||
NL_INFO ("link_add: %U admin %s", format_lcp_itf_pair, lip,
|
||||
admin_state ? "up" : "down");
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static const mfib_prefix_t ip4_specials[] = {
|
||||
/* ALL prefixes are in network order */
|
||||
{
|
||||
/* (*,224.0.0.0)/24 - all local subnet */
|
||||
.fp_grp_addr = {
|
||||
.ip4.data_u32 = 0x000000e0,
|
||||
},
|
||||
.fp_len = 24,
|
||||
.fp_proto = FIB_PROTOCOL_IP4,
|
||||
},
|
||||
};
|
||||
|
||||
static void
|
||||
lcp_nl_ip4_mroutes_add_del (u32 sw_if_index, u8 is_add)
|
||||
{
|
||||
@ -423,18 +862,6 @@ lcp_nl_ip4_mroutes_add_del (u32 sw_if_index, u8 is_add)
|
||||
}
|
||||
}
|
||||
|
||||
static const mfib_prefix_t ip6_specials[] = {
|
||||
/* ALL prefixes are in network order */
|
||||
{
|
||||
/* (*,ff00::)/8 - all local subnet */
|
||||
.fp_grp_addr = {
|
||||
.ip6.as_u64[0] = 0x00000000000000ff,
|
||||
},
|
||||
.fp_len = 8,
|
||||
.fp_proto = FIB_PROTOCOL_IP6,
|
||||
},
|
||||
};
|
||||
|
||||
static void
|
||||
lcp_nl_ip6_mroutes_add_del (u32 sw_if_index, u8 is_add)
|
||||
{
|
||||
@ -577,10 +1004,9 @@ lcp_nl_neigh_add (struct rtnl_neigh *rn)
|
||||
}
|
||||
else
|
||||
{
|
||||
NL_NOTICE ("neigh_add: Added %U lladdr %U iface %U",
|
||||
format_ip_address, &nh, format_mac_address, &mac,
|
||||
format_vnet_sw_if_index_name, vnet_get_main (),
|
||||
lip->lip_phy_sw_if_index);
|
||||
NL_INFO ("neigh_add: Added %U lladdr %U iface %U", format_ip_address,
|
||||
&nh, format_mac_address, &mac, format_vnet_sw_if_index_name,
|
||||
vnet_get_main (), lip->lip_phy_sw_if_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -607,9 +1033,9 @@ lcp_nl_neigh_del (struct rtnl_neigh *rn)
|
||||
|
||||
if (rv == 0 || rv == VNET_API_ERROR_NO_SUCH_ENTRY)
|
||||
{
|
||||
NL_NOTICE ("neigh_del: Deleted %U iface %U", format_ip_address, &nh,
|
||||
format_vnet_sw_if_index_name, vnet_get_main (),
|
||||
lip->lip_phy_sw_if_index);
|
||||
NL_INFO ("neigh_del: Deleted %U iface %U", format_ip_address, &nh,
|
||||
format_vnet_sw_if_index_name, vnet_get_main (),
|
||||
lip->lip_phy_sw_if_index);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
Reference in New Issue
Block a user