[Babel-users] [PATCH v2] Use per-table dumps on kernels where this is available

Daniel Gröber dxld at darkboxed.org
Sat Apr 16 21:31:19 BST 2022


Hi Toke,

That still doesn't seem to work, I get

    Interface eth0 has no link-local address.

so it seems to getaddr stuff is still not working.

--Daniel

On Sat, Apr 16, 2022 at 10:10:30PM +0200, Toke Høiland-Jørgensen wrote:
> Starting with version 4.20, the Linux kernel gained the ability to filter
> route dumps on (among other things) table ID on the kernel side of the
> netlink transaction. This can significantly increase the performance of
> route dumps in cases where babeld is configured to only monitor a subset of
> the kernel's route tables. In particular, it avoids the per-table lock
> contention inside the kernel if another routing daemon is updating a table
> that babeld is not using while babeld issues a dump request.
> 
> The filtering works by setting the rtm_table attribute on the netlink dump
> request, but it only works if the NETLINK_GET_STRICT_CHK socket option is
> set on the netlink socket. Older kernels will just ignore the option, so we
> can just always pass it to the kernel, and simply break out of the
> per-table loop if we detected that table filtering is unavailable.
> 
> Signed-off-by: Toke Høiland-Jørgensen <toke at toke.dk>
> ---
> v2:
> - Use right struct type for RTM_GETADDR request
> 
>  kernel_netlink.c | 50 +++++++++++++++++++++++++++++++++---------------
>  1 file changed, 35 insertions(+), 15 deletions(-)
> 
> diff --git a/kernel_netlink.c b/kernel_netlink.c
> index 3d17d7149090..d400c1153d74 100644
> --- a/kernel_netlink.c
> +++ b/kernel_netlink.c
> @@ -46,6 +46,10 @@ THE SOFTWARE.
>  #define BRCTL_GET_BRIDGES 1
>  #endif
>  
> +#ifndef NETLINK_GET_STRICT_CHK
> +#define NETLINK_GET_STRICT_CHK 12
> +#endif
> +
>  #if(__GLIBC__ < 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ <= 5)
>  #define RTA_TABLE 15
>  #endif
> @@ -74,6 +78,7 @@ THE SOFTWARE.
>      } while(0)
>  
>  int export_table = -1, import_tables[MAX_IMPORT_TABLES], import_table_count = 0;
> +int per_table_dumps = 0;
>  
>  struct sysctl_setting {
>      char *name;
> @@ -283,7 +288,7 @@ static int nl_setup = 0;
>  static int
>  netlink_socket(struct netlink *nl, uint32_t groups)
>  {
> -    int rc;
> +    int rc, strict = 1;
>      int rcvsize = 512 * 1024;
>  
>      nl->sock = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
> @@ -319,6 +324,10 @@ netlink_socket(struct netlink *nl, uint32_t groups)
>          }
>      }
>  
> +    rc = setsockopt(nl->sock, SOL_NETLINK, NETLINK_GET_STRICT_CHK,
> +                    &strict, sizeof(strict));
> +    per_table_dumps = (rc == 0);
> +
>      rc = bind(nl->sock, (struct sockaddr *)&nl->sockaddr, nl->socklen);
>      if(rc < 0)
>          goto fail;
> @@ -1302,9 +1311,8 @@ filter_kernel_routes(struct nlmsghdr *nh, struct kernel_route *route)
>  int
>  kernel_dump(int operation, struct kernel_filter *filter)
>  {
> -    int i, rc;
> +    int i, j, rc;
>      int families[2] = { AF_INET6, AF_INET };
> -    struct rtgenmsg g;
>  
>      if(!nl_setup) {
>          fprintf(stderr,"kernel_dump: netlink not initialized.\n");
> @@ -1323,24 +1331,36 @@ kernel_dump(int operation, struct kernel_filter *filter)
>      }
>  
>      for(i = 0; i < 2; i++) {
> -        memset(&g, 0, sizeof(g));
> -        g.rtgen_family = families[i];
> +        struct rtmsg msg = {
> +            .rtm_family = families[i]
> +        };
> +
>          if(operation & CHANGE_ROUTE) {
> -            rc = netlink_send_dump(RTM_GETROUTE, &g, sizeof(g));
> -            if(rc < 0)
> -                return -1;
> +            for (j = 0; j < import_table_count; j++) {
> +                msg.rtm_table = import_tables[j];
>  
> -            rc = netlink_read(&nl_command, NULL, 1, filter);
> -            if(rc < 0)
> -                return -1;
> -        }
> +                rc = netlink_send_dump(RTM_GETROUTE, &msg, sizeof(msg));
> +                if(rc < 0)
> +                    return -1;
> +
> +                rc = netlink_read(&nl_command, NULL, 1, filter);
> +                if(rc < 0)
> +                    return -1;
>  
> +                /* the filtering on rtm_table above won't work on old kernels,
> +                   in which case we'll just get routes from all tables in one
> +                   dump; we detect this on socket setup, so we can just break
> +                   the loop if we know it won't work */
> +                if (!per_table_dumps)
> +                    break;
> +            }
> +        }
>      }
>  
>      if(operation & CHANGE_ADDR) {
> -        memset(&g, 0, sizeof(g));
> -        g.rtgen_family = AF_UNSPEC;
> -        rc = netlink_send_dump(RTM_GETADDR, &g, sizeof(g));
> +        struct ifaddrmsg msg = {};
> +
> +        rc = netlink_send_dump(RTM_GETADDR, &msg, sizeof(msg));
>          if(rc < 0)
>              return -1;
>  
> -- 
> 2.35.3
> 



More information about the Babel-users mailing list