[Babel-users] [PATCH] Use per-table dumps on kernels where this is available

Daniel Gröber dxld at darkboxed.org
Sat Apr 16 20:21:53 BST 2022


Hi Toke,

I did some deeper testing on this. It seems to me while STRICT_CHK enables
the route filtering and that bit does seem to work it actually breaks
something else as babeld is no longer installing any routes after applying
this patch (when it did before). The xroutes are missing from the status
socket dump too and all interfaces show as down.

My guss is the RTM_GETADDR dump is breaking as inet6_dump_addr also changes
behaviour based on the strict_check flag. I'll need to do some more digging
later to see what's going on there.

--Daniel

On Sat, Apr 16, 2022 at 08:00:59PM +0200, Toke Høiland-Jørgensen wrote:
> Starting with version 4.20, the Linux kernel gained the ability to filter
> route dumps on (among other things) table ID on the kernel side of the
> netlink transaction. This can significantly increase the performance of
> route dumps in cases where babeld is configured to only monitor a subset of
> the kernel's route tables. In particular, it avoids the per-table lock
> contention inside the kernel if another routing daemon is updating a table
> that babeld is not using while babeld issues a dump request.
> 
> The filtering works by setting the rtm_table attribute on the netlink dump
> request, but it only works if the NETLINK_GET_STRICT_CHK socket option is
> set on the netlink socket. Older kernels will just ignore the option, so we
> can just always pass it to the kernel, and simply break out of the
> per-table loop if we detected that table filtering is unavailable.
> 
> Signed-off-by: Toke Høiland-Jørgensen <toke at toke.dk>
> ---
>  kernel_netlink.c | 49 +++++++++++++++++++++++++++++++++---------------
>  1 file changed, 34 insertions(+), 15 deletions(-)
> 
> diff --git a/kernel_netlink.c b/kernel_netlink.c
> index 3d17d7149090..3829d933afa9 100644
> --- a/kernel_netlink.c
> +++ b/kernel_netlink.c
> @@ -46,6 +46,10 @@ THE SOFTWARE.
>  #define BRCTL_GET_BRIDGES 1
>  #endif
>  
> +#ifndef NETLINK_GET_STRICT_CHK
> +#define NETLINK_GET_STRICT_CHK 12
> +#endif
> +
>  #if(__GLIBC__ < 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ <= 5)
>  #define RTA_TABLE 15
>  #endif
> @@ -74,6 +78,7 @@ THE SOFTWARE.
>      } while(0)
>  
>  int export_table = -1, import_tables[MAX_IMPORT_TABLES], import_table_count = 0;
> +int per_table_dumps = 0;
>  
>  struct sysctl_setting {
>      char *name;
> @@ -283,7 +288,7 @@ static int nl_setup = 0;
>  static int
>  netlink_socket(struct netlink *nl, uint32_t groups)
>  {
> -    int rc;
> +    int rc, strict = 1;
>      int rcvsize = 512 * 1024;
>  
>      nl->sock = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
> @@ -319,6 +324,10 @@ netlink_socket(struct netlink *nl, uint32_t groups)
>          }
>      }
>  
> +    rc = setsockopt(nl->sock, SOL_NETLINK, NETLINK_GET_STRICT_CHK,
> +                    &strict, sizeof(strict));
> +    per_table_dumps = (rc == 0);
> +
>      rc = bind(nl->sock, (struct sockaddr *)&nl->sockaddr, nl->socklen);
>      if(rc < 0)
>          goto fail;
> @@ -1302,9 +1311,9 @@ filter_kernel_routes(struct nlmsghdr *nh, struct kernel_route *route)
>  int
>  kernel_dump(int operation, struct kernel_filter *filter)
>  {
> -    int i, rc;
> +    int i, j, rc;
>      int families[2] = { AF_INET6, AF_INET };
> -    struct rtgenmsg g;
> +    struct rtmsg rtm;
>  
>      if(!nl_setup) {
>          fprintf(stderr,"kernel_dump: netlink not initialized.\n");
> @@ -1323,24 +1332,34 @@ kernel_dump(int operation, struct kernel_filter *filter)
>      }
>  
>      for(i = 0; i < 2; i++) {
> -        memset(&g, 0, sizeof(g));
> -        g.rtgen_family = families[i];
> +        memset(&rtm, 0, sizeof(rtm));
> +        rtm.rtm_family = families[i];
>          if(operation & CHANGE_ROUTE) {
> -            rc = netlink_send_dump(RTM_GETROUTE, &g, sizeof(g));
> -            if(rc < 0)
> -                return -1;
> +            for (j = 0; j < import_table_count; j++) {
> +                rtm.rtm_table = import_tables[j];
>  
> -            rc = netlink_read(&nl_command, NULL, 1, filter);
> -            if(rc < 0)
> -                return -1;
> -        }
> +                rc = netlink_send_dump(RTM_GETROUTE, &rtm, sizeof(rtm));
> +                if(rc < 0)
> +                    return -1;
>  
> +                rc = netlink_read(&nl_command, NULL, 1, filter);
> +                if(rc < 0)
> +                    return -1;
> +
> +                /* the filtering on rtm_table above won't work on old kernels,
> +                   in which case we'll just get routes from all tables in one
> +                   dump; we detect this on socket setup, so we can just break
> +                   the loop if we know it won't work */
> +                if (!per_table_dumps)
> +                    break;
> +            }
> +        }
>      }
>  
>      if(operation & CHANGE_ADDR) {
> -        memset(&g, 0, sizeof(g));
> -        g.rtgen_family = AF_UNSPEC;
> -        rc = netlink_send_dump(RTM_GETADDR, &g, sizeof(g));
> +        memset(&rtm, 0, sizeof(rtm));
> +        rtm.rtm_family = AF_UNSPEC;
> +        rc = netlink_send_dump(RTM_GETADDR, &rtm, sizeof(rtm));
>          if(rc < 0)
>              return -1;
>  
> -- 
> 2.35.3
> 



More information about the Babel-users mailing list