/*
 * livepatch_bsc1233072
 *
 * Fix for CVE-2024-50154, bsc#1233072
 *
 *  Copyright (c) 2025 SUSE
 *  Author: Lidong Zhong <lidong.zhong@suse.com>
 *
 *  Based on the original Linux kernel code. Other copyrights apply.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */


/* klp-ccp: from net/ipv4/inet_connection_sock.c */
#include <linux/module.h>
#include <linux/jhash.h>

#include <net/inet_connection_sock.h>

/* klp-ccp: from net/ipv4/inet_connection_sock.c */
#include <net/inet_hashtables.h>

/* klp-ccp: from include/net/inet_hashtables.h */
static bool (*klpe_inet_ehash_insert)(struct sock *sk, struct sock *osk, bool *found_dup_sk);

/* klp-ccp: from net/ipv4/inet_connection_sock.c */
#include <net/inet_timewait_sock.h>
#include <net/ip.h>
#include <net/route.h>
#include <net/tcp_states.h>
#include <net/xfrm.h>
#include <net/tcp.h>
#include <net/sock_reuseport.h>

static void syn_ack_recalc(struct request_sock *req,
			   const int max_syn_ack_retries,
			   const u8 rskq_defer_accept,
			   int *expire, int *resend)
{
	if (!rskq_defer_accept) {
		*expire = req->num_timeout >= max_syn_ack_retries;
		*resend = 1;
		return;
	}
	*expire = req->num_timeout >= max_syn_ack_retries &&
		  (!inet_rsk(req)->acked || req->num_timeout >= rskq_defer_accept);
	/* Do not resend while waiting for data after ACK,
	 * start to resend on end of deferring period to give
	 * last chance for data or ACK to create established socket.
	 */
	*resend = !inet_rsk(req)->acked ||
		  req->num_timeout >= rskq_defer_accept - 1;
}

int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req);

extern typeof(inet_rtx_syn_ack) inet_rtx_syn_ack;

static struct request_sock *(*klpe_inet_reqsk_clone)(struct request_sock *req,
					     struct sock *sk);

static void reqsk_queue_migrated(struct request_sock_queue *queue,
				 const struct request_sock *req)
{
	if (req->num_timeout == 0)
		atomic_inc(&queue->young);
	atomic_inc(&queue->qlen);
}

static void reqsk_migrate_reset(struct request_sock *req)
{
	req->saved_syn = NULL;
#if IS_ENABLED(CONFIG_IPV6)
	inet_rsk(req)->ipv6_opt = NULL;
	inet_rsk(req)->pktopts = NULL;
#else
#error "klp-ccp: non-taken branch"
#endif
}

static bool reqsk_queue_unlink(struct request_sock *req)
{
	struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo;
	bool found = false;

	if (sk_hashed(req_to_sk(req))) {
		spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash);

		spin_lock(lock);
		found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
		spin_unlock(lock);
	}

	return found;
}

static bool __inet_csk_reqsk_queue_drop(struct sock *sk,
					struct request_sock *req,
					bool from_timer)
{
	bool unlinked = reqsk_queue_unlink(req);

	if (!from_timer && timer_delete_sync(&req->rsk_timer))
		reqsk_put(req);

	if (unlinked) {
		reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
		reqsk_put(req);
	}

	return unlinked;
}

bool klpp_inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
{
	return __inet_csk_reqsk_queue_drop(sk, req, false);
}

typeof(klpp_inet_csk_reqsk_queue_drop) klpp_inet_csk_reqsk_queue_drop;

void klpp_reqsk_timer_handler(struct timer_list *t)
{
	struct request_sock *req = from_timer(req, t, rsk_timer);
	struct request_sock *nreq = NULL, *oreq = req;
	struct sock *sk_listener = req->rsk_listener;
	struct inet_connection_sock *icsk;
	struct request_sock_queue *queue;
	struct net *net;
	int max_syn_ack_retries, qlen, expire = 0, resend = 0;

	if (inet_sk_state_load(sk_listener) != TCP_LISTEN) {
		struct sock *nsk;

		nsk = reuseport_migrate_sock(sk_listener, req_to_sk(req), NULL);
		if (!nsk)
			goto drop;

		nreq = (*klpe_inet_reqsk_clone)(req, nsk);
		if (!nreq)
			goto drop;

		/* The new timer for the cloned req can decrease the 2
		 * by calling inet_csk_reqsk_queue_drop_and_put(), so
		 * hold another count to prevent use-after-free and
		 * call reqsk_put() just before return.
		 */
		refcount_set(&nreq->rsk_refcnt, 2 + 1);
		timer_setup(&nreq->rsk_timer, klpp_reqsk_timer_handler, TIMER_PINNED);
		reqsk_queue_migrated(&inet_csk(nsk)->icsk_accept_queue, req);

		req = nreq;
		sk_listener = nsk;
	}

	icsk = inet_csk(sk_listener);
	net = sock_net(sk_listener);
	max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
	/* Normally all the openreqs are young and become mature
	 * (i.e. converted to established socket) for first timeout.
	 * If synack was not acknowledged for 1 second, it means
	 * one of the following things: synack was lost, ack was lost,
	 * rtt is high or nobody planned to ack (i.e. synflood).
	 * When server is a bit loaded, queue is populated with old
	 * open requests, reducing effective size of queue.
	 * When server is well loaded, queue size reduces to zero
	 * after several minutes of work. It is not synflood,
	 * it is normal operation. The solution is pruning
	 * too old entries overriding normal timeout, when
	 * situation becomes dangerous.
	 *
	 * Essentially, we reserve half of room for young
	 * embrions; and abort old ones without pity, if old
	 * ones are about to clog our table.
	 */
	queue = &icsk->icsk_accept_queue;
	qlen = reqsk_queue_len(queue);
	if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) {
		int young = reqsk_queue_len_young(queue) << 1;

		while (max_syn_ack_retries > 2) {
			if (qlen < young)
				break;
			max_syn_ack_retries--;
			young <<= 1;
		}
	}
	syn_ack_recalc(req, max_syn_ack_retries, READ_ONCE(queue->rskq_defer_accept),
		       &expire, &resend);
	req->rsk_ops->syn_ack_timeout(req);
	if (!expire &&
	    (!resend ||
	     !inet_rtx_syn_ack(sk_listener, req) ||
	     inet_rsk(req)->acked)) {
		unsigned long timeo;

		if (req->num_timeout++ == 0)
			atomic_dec(&queue->young);
		timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
		mod_timer(&req->rsk_timer, jiffies + timeo);

		if (!nreq)
			return;

		if (!(*klpe_inet_ehash_insert)(req_to_sk(nreq), req_to_sk(oreq), NULL)) {
			/* delete timer */
			__inet_csk_reqsk_queue_drop(sk_listener, nreq, true);
			goto no_ownership;
		}

		__NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQSUCCESS);
		reqsk_migrate_reset(oreq);
		reqsk_queue_removed(&inet_csk(oreq->rsk_listener)->icsk_accept_queue, oreq);
		reqsk_put(oreq);

		reqsk_put(nreq);
		return;
	}

	/* Even if we can clone the req, we may need not retransmit any more
	 * SYN+ACKs (nreq->num_timeout > max_syn_ack_retries, etc), or another
	 * CPU may win the "own_req" race so that inet_ehash_insert() fails.
	 */
	if (nreq) {
		__NET_INC_STATS(net, LINUX_MIB_TCPMIGRATEREQFAILURE);
no_ownership:
		reqsk_migrate_reset(nreq);
		reqsk_queue_removed(queue, nreq);
		__reqsk_free(nreq);
	}

drop:
	__inet_csk_reqsk_queue_drop(sk_listener, oreq, true);
	reqsk_put(oreq);
}


#include "livepatch_bsc1233072.h"

#include <linux/kernel.h>
#include "../kallsyms_relocs.h"

static struct klp_kallsyms_reloc klp_funcs[] = {
	{ "inet_ehash_insert", (void *)&klpe_inet_ehash_insert },
	{ "inet_reqsk_clone", (void *)&klpe_inet_reqsk_clone },
};

int livepatch_bsc1233072_init(void)
{
	return klp_resolve_kallsyms_relocs(klp_funcs, ARRAY_SIZE(klp_funcs));
}

