linuxdebug/drivers/net/wireguard/ratelimiter.c

// SPDX-License-Identifier: GPL-2.0
/*
 * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 */

#include "ratelimiter.h"
#include <linux/siphash.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <net/ip.h>

static struct kmem_cache *entry_cache;
static hsiphash_key_t key;
static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock");
static DEFINE_MUTEX(init_lock);
static u64 init_refcnt; /* Protected by init_lock, hence not atomic. */
static atomic_t total_entries = ATOMIC_INIT(0);
static unsigned int max_entries, table_size;
static void wg_ratelimiter_gc_entries(struct work_struct *);
static DECLARE_DEFERRABLE_WORK(gc_work, wg_ratelimiter_gc_entries);
static struct hlist_head *table_v4;
#if IS_ENABLED(CONFIG_IPV6)
static struct hlist_head *table_v6;
#endif

struct ratelimiter_entry {
	u64 last_time_ns, tokens, ip;
	void *net;
	spinlock_t lock;
	struct hlist_node hash;
	struct rcu_head rcu;
};

enum {
	PACKETS_PER_SECOND = 20,
	PACKETS_BURSTABLE = 5,
	PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND,
	TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE
};

static void entry_free(struct rcu_head *rcu)
{
	kmem_cache_free(entry_cache,
			container_of(rcu, struct ratelimiter_entry, rcu));
	atomic_dec(&total_entries);
}

static void entry_uninit(struct ratelimiter_entry *entry)
{
	hlist_del_rcu(&entry->hash);
	call_rcu(&entry->rcu, entry_free);
}

/* Calling this function with a NULL work uninits all entries. */
static void wg_ratelimiter_gc_entries(struct work_struct *work)
{
	const u64 now = ktime_get_coarse_boottime_ns();
	struct ratelimiter_entry *entry;
	struct hlist_node *temp;
	unsigned int i;

	for (i = 0; i < table_size; ++i) {
		spin_lock(&table_lock);
		hlist_for_each_entry_safe(entry, temp, &table_v4[i], hash) {
			if (unlikely(!work) ||
			    now - entry->last_time_ns > NSEC_PER_SEC)
				entry_uninit(entry);
		}
#if IS_ENABLED(CONFIG_IPV6)
		hlist_for_each_entry_safe(entry, temp, &table_v6[i], hash) {
			if (unlikely(!work) ||
			    now - entry->last_time_ns > NSEC_PER_SEC)
				entry_uninit(entry);
		}
#endif
		spin_unlock(&table_lock);
		if (likely(work))
			cond_resched();
	}
	if (likely(work))
		queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
}

bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net)
{
	/* We only take the bottom half of the net pointer, so that we can hash
	 * 3 words in the end. This way, siphash's len param fits into the final
	 * u32, and we don't incur an extra round.
	 */
	const u32 net_word = (unsigned long)net;
	struct ratelimiter_entry *entry;
	struct hlist_head *bucket;
	u64 ip;

	if (skb->protocol == htons(ETH_P_IP)) {
		ip = (u64 __force)ip_hdr(skb)->saddr;
		bucket = &table_v4[hsiphash_2u32(net_word, ip, &key) &
				   (table_size - 1)];
	}
#if IS_ENABLED(CONFIG_IPV6)
	else if (skb->protocol == htons(ETH_P_IPV6)) {
		/* Only use 64 bits, so as to ratelimit the whole /64. */
		memcpy(&ip, &ipv6_hdr(skb)->saddr, sizeof(ip));
		bucket = &table_v6[hsiphash_3u32(net_word, ip >> 32, ip, &key) &
				   (table_size - 1)];
	}
#endif
	else
		return false;
	rcu_read_lock();
	hlist_for_each_entry_rcu(entry, bucket, hash) {
		if (entry->net == net && entry->ip == ip) {
			u64 now, tokens;
			bool ret;
			/* Quasi-inspired by nft_limit.c, but this is actually a
			 * slightly different algorithm. Namely, we incorporate
			 * the burst as part of the maximum tokens, rather than
			 * as part of the rate.
			 */
			spin_lock(&entry->lock);
			now = ktime_get_coarse_boottime_ns();
			tokens = min_t(u64, TOKEN_MAX,
				       entry->tokens + now -
					       entry->last_time_ns);
			entry->last_time_ns = now;
			ret = tokens >= PACKET_COST;
			entry->tokens = ret ? tokens - PACKET_COST : tokens;
			spin_unlock(&entry->lock);
			rcu_read_unlock();
			return ret;
		}
	}
	rcu_read_unlock();

	if (atomic_inc_return(&total_entries) > max_entries)
		goto err_oom;

	entry = kmem_cache_alloc(entry_cache, GFP_KERNEL);
	if (unlikely(!entry))
		goto err_oom;

	entry->net = net;
	entry->ip = ip;
	INIT_HLIST_NODE(&entry->hash);
	spin_lock_init(&entry->lock);
	entry->last_time_ns = ktime_get_coarse_boottime_ns();
	entry->tokens = TOKEN_MAX - PACKET_COST;
	spin_lock(&table_lock);
	hlist_add_head_rcu(&entry->hash, bucket);
	spin_unlock(&table_lock);
	return true;

err_oom:
	atomic_dec(&total_entries);
	return false;
}

int wg_ratelimiter_init(void)
{
	mutex_lock(&init_lock);
	if (++init_refcnt != 1)
		goto out;

	entry_cache = KMEM_CACHE(ratelimiter_entry, 0);
	if (!entry_cache)
		goto err;

	/* xt_hashlimit.c uses a slightly different algorithm for ratelimiting,
	 * but what it shares in common is that it uses a massive hashtable. So,
	 * we borrow their wisdom about good table sizes on different systems
	 * dependent on RAM. This calculation here comes from there.
	 */
	table_size = (totalram_pages() > (1U << 30) / PAGE_SIZE) ? 8192 :
		max_t(unsigned long, 16, roundup_pow_of_two(
			(totalram_pages() << PAGE_SHIFT) /
			(1U << 14) / sizeof(struct hlist_head)));
	max_entries = table_size * 8;

	table_v4 = kvcalloc(table_size, sizeof(*table_v4), GFP_KERNEL);
	if (unlikely(!table_v4))
		goto err_kmemcache;

#if IS_ENABLED(CONFIG_IPV6)
	table_v6 = kvcalloc(table_size, sizeof(*table_v6), GFP_KERNEL);
	if (unlikely(!table_v6)) {
		kvfree(table_v4);
		goto err_kmemcache;
	}
#endif

	queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
	get_random_bytes(&key, sizeof(key));
out:
	mutex_unlock(&init_lock);
	return 0;

err_kmemcache:
	kmem_cache_destroy(entry_cache);
err:
	--init_refcnt;
	mutex_unlock(&init_lock);
	return -ENOMEM;
}

void wg_ratelimiter_uninit(void)
{
	mutex_lock(&init_lock);
	if (!init_refcnt || --init_refcnt)
		goto out;

	cancel_delayed_work_sync(&gc_work);
	wg_ratelimiter_gc_entries(NULL);
	rcu_barrier();
	kvfree(table_v4);
#if IS_ENABLED(CONFIG_IPV6)
	kvfree(table_v6);
#endif
	kmem_cache_destroy(entry_cache);
out:
	mutex_unlock(&init_lock);
}

#include "selftest/ratelimiter.c"
null pointer error 2024-07-16 15:50:57 +02:00			`// SPDX-License-Identifier: GPL-2.0`
			`/*`
			`* Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.`
			`*/`

			`#include "ratelimiter.h"`
			`#include <linux/siphash.h>`
			`#include <linux/mm.h>`
			`#include <linux/slab.h>`
			`#include <net/ip.h>`

			`static struct kmem_cache *entry_cache;`
			`static hsiphash_key_t key;`
			`static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock");`
			`static DEFINE_MUTEX(init_lock);`
			`static u64 init_refcnt; /* Protected by init_lock, hence not atomic. */`
			`static atomic_t total_entries = ATOMIC_INIT(0);`
			`static unsigned int max_entries, table_size;`
			`static void wg_ratelimiter_gc_entries(struct work_struct *);`
			`static DECLARE_DEFERRABLE_WORK(gc_work, wg_ratelimiter_gc_entries);`
			`static struct hlist_head *table_v4;`
			`#if IS_ENABLED(CONFIG_IPV6)`
			`static struct hlist_head *table_v6;`
			`#endif`

			`struct ratelimiter_entry {`
			`u64 last_time_ns, tokens, ip;`
			`void *net;`
			`spinlock_t lock;`
			`struct hlist_node hash;`
			`struct rcu_head rcu;`
			`};`

			`enum {`
			`PACKETS_PER_SECOND = 20,`
			`PACKETS_BURSTABLE = 5,`
			`PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND,`
			`TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE`
			`};`

			`static void entry_free(struct rcu_head *rcu)`
			`{`
			`kmem_cache_free(entry_cache,`
			`container_of(rcu, struct ratelimiter_entry, rcu));`
			`atomic_dec(&total_entries);`
			`}`

			`static void entry_uninit(struct ratelimiter_entry *entry)`
			`{`
			`hlist_del_rcu(&entry->hash);`
			`call_rcu(&entry->rcu, entry_free);`
			`}`

			`/* Calling this function with a NULL work uninits all entries. */`
			`static void wg_ratelimiter_gc_entries(struct work_struct *work)`
			`{`
			`const u64 now = ktime_get_coarse_boottime_ns();`
			`struct ratelimiter_entry *entry;`
			`struct hlist_node *temp;`
			`unsigned int i;`

			`for (i = 0; i < table_size; ++i) {`
			`spin_lock(&table_lock);`
			`hlist_for_each_entry_safe(entry, temp, &table_v4[i], hash) {`
			`if (unlikely(!work) \|\|`
			`now - entry->last_time_ns > NSEC_PER_SEC)`
			`entry_uninit(entry);`
			`}`
			`#if IS_ENABLED(CONFIG_IPV6)`
			`hlist_for_each_entry_safe(entry, temp, &table_v6[i], hash) {`
			`if (unlikely(!work) \|\|`
			`now - entry->last_time_ns > NSEC_PER_SEC)`
			`entry_uninit(entry);`
			`}`
			`#endif`
			`spin_unlock(&table_lock);`
			`if (likely(work))`
			`cond_resched();`
			`}`
			`if (likely(work))`
			`queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);`
			`}`

			`bool wg_ratelimiter_allow(struct sk_buff skb, struct net net)`
			`{`
			`/* We only take the bottom half of the net pointer, so that we can hash`
			`* 3 words in the end. This way, siphash's len param fits into the final`
			`* u32, and we don't incur an extra round.`
			`*/`
			`const u32 net_word = (unsigned long)net;`
			`struct ratelimiter_entry *entry;`
			`struct hlist_head *bucket;`
			`u64 ip;`

			`if (skb->protocol == htons(ETH_P_IP)) {`
			`ip = (u64 __force)ip_hdr(skb)->saddr;`
			`bucket = &table_v4[hsiphash_2u32(net_word, ip, &key) &`
			`(table_size - 1)];`
			`}`
			`#if IS_ENABLED(CONFIG_IPV6)`
			`else if (skb->protocol == htons(ETH_P_IPV6)) {`
			`/* Only use 64 bits, so as to ratelimit the whole /64. */`
			`memcpy(&ip, &ipv6_hdr(skb)->saddr, sizeof(ip));`
			`bucket = &table_v6[hsiphash_3u32(net_word, ip >> 32, ip, &key) &`
			`(table_size - 1)];`
			`}`
			`#endif`
			`else`
			`return false;`
			`rcu_read_lock();`
			`hlist_for_each_entry_rcu(entry, bucket, hash) {`
			`if (entry->net == net && entry->ip == ip) {`
			`u64 now, tokens;`
			`bool ret;`
			`/* Quasi-inspired by nft_limit.c, but this is actually a`
			`* slightly different algorithm. Namely, we incorporate`
			`* the burst as part of the maximum tokens, rather than`
			`* as part of the rate.`
			`*/`
			`spin_lock(&entry->lock);`
			`now = ktime_get_coarse_boottime_ns();`
			`tokens = min_t(u64, TOKEN_MAX,`
			`entry->tokens + now -`
			`entry->last_time_ns);`
			`entry->last_time_ns = now;`
			`ret = tokens >= PACKET_COST;`
			`entry->tokens = ret ? tokens - PACKET_COST : tokens;`
			`spin_unlock(&entry->lock);`
			`rcu_read_unlock();`
			`return ret;`
			`}`
			`}`
			`rcu_read_unlock();`

			`if (atomic_inc_return(&total_entries) > max_entries)`
			`goto err_oom;`

			`entry = kmem_cache_alloc(entry_cache, GFP_KERNEL);`
			`if (unlikely(!entry))`
			`goto err_oom;`

			`entry->net = net;`
			`entry->ip = ip;`
			`INIT_HLIST_NODE(&entry->hash);`
			`spin_lock_init(&entry->lock);`
			`entry->last_time_ns = ktime_get_coarse_boottime_ns();`
			`entry->tokens = TOKEN_MAX - PACKET_COST;`
			`spin_lock(&table_lock);`
			`hlist_add_head_rcu(&entry->hash, bucket);`
			`spin_unlock(&table_lock);`
			`return true;`

			`err_oom:`
			`atomic_dec(&total_entries);`
			`return false;`
			`}`

			`int wg_ratelimiter_init(void)`
			`{`
			`mutex_lock(&init_lock);`
			`if (++init_refcnt != 1)`
			`goto out;`

			`entry_cache = KMEM_CACHE(ratelimiter_entry, 0);`
			`if (!entry_cache)`
			`goto err;`

			`/* xt_hashlimit.c uses a slightly different algorithm for ratelimiting,`
			`* but what it shares in common is that it uses a massive hashtable. So,`
			`* we borrow their wisdom about good table sizes on different systems`
			`* dependent on RAM. This calculation here comes from there.`
			`*/`
			`table_size = (totalram_pages() > (1U << 30) / PAGE_SIZE) ? 8192 :`
			`max_t(unsigned long, 16, roundup_pow_of_two(`
			`(totalram_pages() << PAGE_SHIFT) /`
			`(1U << 14) / sizeof(struct hlist_head)));`
			`max_entries = table_size * 8;`

			`table_v4 = kvcalloc(table_size, sizeof(*table_v4), GFP_KERNEL);`
			`if (unlikely(!table_v4))`
			`goto err_kmemcache;`

			`#if IS_ENABLED(CONFIG_IPV6)`
			`table_v6 = kvcalloc(table_size, sizeof(*table_v6), GFP_KERNEL);`
			`if (unlikely(!table_v6)) {`
			`kvfree(table_v4);`
			`goto err_kmemcache;`
			`}`
			`#endif`

			`queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);`
			`get_random_bytes(&key, sizeof(key));`
			`out:`
			`mutex_unlock(&init_lock);`
			`return 0;`

			`err_kmemcache:`
			`kmem_cache_destroy(entry_cache);`
			`err:`
			`--init_refcnt;`
			`mutex_unlock(&init_lock);`
			`return -ENOMEM;`
			`}`

			`void wg_ratelimiter_uninit(void)`
			`{`
			`mutex_lock(&init_lock);`
			`if (!init_refcnt \|\| --init_refcnt)`
			`goto out;`

			`cancel_delayed_work_sync(&gc_work);`
			`wg_ratelimiter_gc_entries(NULL);`
			`rcu_barrier();`
			`kvfree(table_v4);`
			`#if IS_ENABLED(CONFIG_IPV6)`
			`kvfree(table_v6);`
			`#endif`
			`kmem_cache_destroy(entry_cache);`
			`out:`
			`mutex_unlock(&init_lock);`
			`}`

			`#include "selftest/ratelimiter.c"`