[Babel-users] [PATCH v2] Use per-table dumps on kernels where this is available
Daniel Gröber
dxld at darkboxed.org
Sat Apr 16 21:31:19 BST 2022
Hi Toke,
That still doesn't seem to work, I get
Interface eth0 has no link-local address.
so it seems to getaddr stuff is still not working.
--Daniel
On Sat, Apr 16, 2022 at 10:10:30PM +0200, Toke Høiland-Jørgensen wrote:
> Starting with version 4.20, the Linux kernel gained the ability to filter
> route dumps on (among other things) table ID on the kernel side of the
> netlink transaction. This can significantly increase the performance of
> route dumps in cases where babeld is configured to only monitor a subset of
> the kernel's route tables. In particular, it avoids the per-table lock
> contention inside the kernel if another routing daemon is updating a table
> that babeld is not using while babeld issues a dump request.
>
> The filtering works by setting the rtm_table attribute on the netlink dump
> request, but it only works if the NETLINK_GET_STRICT_CHK socket option is
> set on the netlink socket. Older kernels will just ignore the option, so we
> can just always pass it to the kernel, and simply break out of the
> per-table loop if we detected that table filtering is unavailable.
>
> Signed-off-by: Toke Høiland-Jørgensen <toke at toke.dk>
> ---
> v2:
> - Use right struct type for RTM_GETADDR request
>
> kernel_netlink.c | 50 +++++++++++++++++++++++++++++++++---------------
> 1 file changed, 35 insertions(+), 15 deletions(-)
>
> diff --git a/kernel_netlink.c b/kernel_netlink.c
> index 3d17d7149090..d400c1153d74 100644
> --- a/kernel_netlink.c
> +++ b/kernel_netlink.c
> @@ -46,6 +46,10 @@ THE SOFTWARE.
> #define BRCTL_GET_BRIDGES 1
> #endif
>
> +#ifndef NETLINK_GET_STRICT_CHK
> +#define NETLINK_GET_STRICT_CHK 12
> +#endif
> +
> #if(__GLIBC__ < 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ <= 5)
> #define RTA_TABLE 15
> #endif
> @@ -74,6 +78,7 @@ THE SOFTWARE.
> } while(0)
>
> int export_table = -1, import_tables[MAX_IMPORT_TABLES], import_table_count = 0;
> +int per_table_dumps = 0;
>
> struct sysctl_setting {
> char *name;
> @@ -283,7 +288,7 @@ static int nl_setup = 0;
> static int
> netlink_socket(struct netlink *nl, uint32_t groups)
> {
> - int rc;
> + int rc, strict = 1;
> int rcvsize = 512 * 1024;
>
> nl->sock = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
> @@ -319,6 +324,10 @@ netlink_socket(struct netlink *nl, uint32_t groups)
> }
> }
>
> + rc = setsockopt(nl->sock, SOL_NETLINK, NETLINK_GET_STRICT_CHK,
> + &strict, sizeof(strict));
> + per_table_dumps = (rc == 0);
> +
> rc = bind(nl->sock, (struct sockaddr *)&nl->sockaddr, nl->socklen);
> if(rc < 0)
> goto fail;
> @@ -1302,9 +1311,8 @@ filter_kernel_routes(struct nlmsghdr *nh, struct kernel_route *route)
> int
> kernel_dump(int operation, struct kernel_filter *filter)
> {
> - int i, rc;
> + int i, j, rc;
> int families[2] = { AF_INET6, AF_INET };
> - struct rtgenmsg g;
>
> if(!nl_setup) {
> fprintf(stderr,"kernel_dump: netlink not initialized.\n");
> @@ -1323,24 +1331,36 @@ kernel_dump(int operation, struct kernel_filter *filter)
> }
>
> for(i = 0; i < 2; i++) {
> - memset(&g, 0, sizeof(g));
> - g.rtgen_family = families[i];
> + struct rtmsg msg = {
> + .rtm_family = families[i]
> + };
> +
> if(operation & CHANGE_ROUTE) {
> - rc = netlink_send_dump(RTM_GETROUTE, &g, sizeof(g));
> - if(rc < 0)
> - return -1;
> + for (j = 0; j < import_table_count; j++) {
> + msg.rtm_table = import_tables[j];
>
> - rc = netlink_read(&nl_command, NULL, 1, filter);
> - if(rc < 0)
> - return -1;
> - }
> + rc = netlink_send_dump(RTM_GETROUTE, &msg, sizeof(msg));
> + if(rc < 0)
> + return -1;
> +
> + rc = netlink_read(&nl_command, NULL, 1, filter);
> + if(rc < 0)
> + return -1;
>
> + /* the filtering on rtm_table above won't work on old kernels,
> + in which case we'll just get routes from all tables in one
> + dump; we detect this on socket setup, so we can just break
> + the loop if we know it won't work */
> + if (!per_table_dumps)
> + break;
> + }
> + }
> }
>
> if(operation & CHANGE_ADDR) {
> - memset(&g, 0, sizeof(g));
> - g.rtgen_family = AF_UNSPEC;
> - rc = netlink_send_dump(RTM_GETADDR, &g, sizeof(g));
> + struct ifaddrmsg msg = {};
> +
> + rc = netlink_send_dump(RTM_GETADDR, &msg, sizeof(msg));
> if(rc < 0)
> return -1;
>
> --
> 2.35.3
>
More information about the Babel-users
mailing list