/*
 * livepatch_bsc1228585
 *
 * Fix for CVE-2024-40956, bsc#1228585
 *
 *  Upstream commit:
 *  e3215deca452 ("dmaengine: idxd: Fix possible Use-After-Free in irq_process_work_list")
 *
 *  SLE12-SP5 commit:
 *  Not affected
 *
 *  SLE15-SP3 commit:
 *  26f1077906e5901a4dc6aa055864b2810237fba1
 *
 *  SLE15-SP4 and -SP5 commit:
 *  3632d87c54841cfe0e62cead09f2efaeab96f60b
 *
 *  SLE15-SP6 commit:
 *  36cedd66a94c171cc3001ffa27aa1372ddb85ed1
 *
 *  SLE MICRO-6-0 commit:
 *  36cedd66a94c171cc3001ffa27aa1372ddb85ed1
 *
 *  Copyright (c) 2025 SUSE
 *  Author: Fernando Gonzalez <fernando.gonzalez@suse.com>
 *
 *  Based on the original Linux kernel code. Other copyrights apply.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */

#if IS_ENABLED(CONFIG_INTEL_IDXD)

#if !IS_MODULE(CONFIG_INTEL_IDXD)
#error "Live patch supports only CONFIG=m"
#endif

/* klp-ccp: from drivers/dma/idxd/irq.c */
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci.h>

#include <linux/dmaengine.h>
#include <uapi/linux/idxd.h>

/* klp-ccp: from drivers/dma/dmaengine.h */
#include <linux/bug.h>
#include <linux/dmaengine.h>

/* klp-ccp: from drivers/dma/idxd/idxd.h */
#include <linux/sbitmap.h>
#include <linux/dmaengine.h>
#include <linux/percpu-rwsem.h>
#include <linux/wait.h>

/* klp-ccp: from drivers/dma/idxd/registers.h */
union gen_cap_reg {
	struct {
		u64 block_on_fault:1;
		u64 overlap_copy:1;
		u64 cache_control_mem:1;
		u64 cache_control_cache:1;
		u64 rsvd:3;
		u64 int_handle_req:1;
		u64 dest_readback:1;
		u64 drain_readback:1;
		u64 rsvd2:6;
		u64 max_xfer_shift:5;
		u64 max_batch_shift:4;
		u64 max_ims_mult:6;
		u64 config_en:1;
		u64 max_descs_per_engine:8;
		u64 rsvd3:24;
	};
	u64 bits;
} __packed;

union wq_cap_reg {
	struct {
		u64 total_wq_size:16;
		u64 num_wqs:8;
		u64 wqcfg_size:4;
		u64 rsvd:20;
		u64 shared_mode:1;
		u64 dedicated_mode:1;
		u64 wq_ats_support:1;
		u64 priority:1;
		u64 occupancy:1;
		u64 occupancy_int:1;
		u64 rsvd3:10;
	};
	u64 bits;
} __packed;

union group_cap_reg {
	struct {
		u64 num_groups:8;
		u64 total_tokens:8;
		u64 token_en:1;
		u64 token_limit:1;
		u64 rsvd:46;
	};
	u64 bits;
} __packed;

union engine_cap_reg {
	struct {
		u64 num_engines:8;
		u64 rsvd:56;
	};
	u64 bits;
} __packed;

struct opcap {
	u64 bits[4];
};

union sw_err_reg {
	struct {
		u64 valid:1;
		u64 overflow:1;
		u64 desc_valid:1;
		u64 wq_idx_valid:1;
		u64 batch:1;
		u64 fault_rw:1;
		u64 priv:1;
		u64 rsvd:1;
		u64 error:8;
		u64 wq_idx:8;
		u64 rsvd2:8;
		u64 operation:8;
		u64 pasid:20;
		u64 rsvd3:4;

		u64 batch_idx:16;
		u64 rsvd4:16;
		u64 invalid_flags:32;

		u64 fault_addr;

		u64 rsvd5;
	};
	u64 bits[4];
} __packed;

/* klp-ccp: from drivers/dma/idxd/idxd.h */
enum idxd_type {
	IDXD_TYPE_UNKNOWN = -1,
	IDXD_TYPE_DSA = 0,
	IDXD_TYPE_IAX,
	IDXD_TYPE_MAX,
};

struct idxd_irq_entry {
	struct idxd_device *idxd;
	int id;
	int vector;
	struct llist_head pending_llist;
	struct list_head work_list;
	/*
	 * Lock to protect access between irq thread process descriptor
	 * and irq thread processing error descriptor.
	 */
	spinlock_t list_lock;
};

enum idxd_wq_state {
	IDXD_WQ_DISABLED = 0,
	IDXD_WQ_ENABLED,
};

enum idxd_wq_type {
	IDXD_WQT_NONE = 0,
	IDXD_WQT_KERNEL,
	IDXD_WQT_USER,
};

#define WQ_NAME_SIZE   1024

enum idxd_complete_type {
	IDXD_COMPLETE_NORMAL = 0,
	IDXD_COMPLETE_ABORT,
	IDXD_COMPLETE_DEV_FAIL,
};

struct idxd_wq {
	void __iomem *portal;
	struct device conf_dev;
	struct idxd_cdev *idxd_cdev;
	struct wait_queue_head err_queue;
	struct idxd_device *idxd;
	int id;
	enum idxd_wq_type type;
	struct idxd_group *group;
	int client_count;
	struct mutex wq_lock;	/* mutex for workqueue */
	u32 size;
	u32 threshold;
	u32 priority;
	enum idxd_wq_state state;
	unsigned long flags;
	union wqcfg *wqcfg;
	u32 vec_ptr;		/* interrupt steering */
	struct dsa_hw_desc **hw_descs;
	int num_descs;
	union {
		struct dsa_completion_record *compls;
		struct iax_completion_record *iax_compls;
	};
	void *compls_raw;
	dma_addr_t compls_addr;
	dma_addr_t compls_addr_raw;
	int compls_size;
	struct idxd_desc **descs;
	struct sbitmap_queue sbq;
	struct idxd_dma_chan *idxd_chan;
	char name[WQ_NAME_SIZE + 1];
	u64 max_xfer_bytes;
	u32 max_batch_size;
	bool ats_dis;
};

struct idxd_hw {
	u32 version;
	union gen_cap_reg gen_cap;
	union wq_cap_reg wq_cap;
	union group_cap_reg group_cap;
	union engine_cap_reg engine_cap;
	struct opcap opcap;
};

enum idxd_device_state {
	IDXD_DEV_HALTED = -1,
	IDXD_DEV_DISABLED = 0,
	IDXD_DEV_CONF_READY,
	IDXD_DEV_ENABLED,
};

struct idxd_device {
	enum idxd_type type;
	struct device conf_dev;
	struct list_head list;
	struct idxd_hw hw;
	enum idxd_device_state state;
	unsigned long flags;
	int id;
	int major;
	u8 cmd_status;

	struct pci_dev *pdev;
	void __iomem *reg_base;

	spinlock_t dev_lock;	/* spinlock for device */
	struct completion *cmd_done;
	struct idxd_group *groups;
	struct idxd_wq *wqs;
	struct idxd_engine *engines;

	struct iommu_sva *sva;
	unsigned int pasid;

	int num_groups;

	u32 msix_perm_offset;
	u32 wqcfg_offset;
	u32 grpcfg_offset;
	u32 perfmon_offset;

	u64 max_xfer_bytes;
	u32 max_batch_size;
	int max_groups;
	int max_engines;
	int max_tokens;
	int max_wqs;
	int max_wq_size;
	int token_limit;
	int nr_tokens;		/* non-reserved tokens */
	unsigned int wqcfg_size;
	int compl_size;

	union sw_err_reg sw_err;
	wait_queue_head_t cmd_waitq;
	int num_wq_irqs;
	struct idxd_irq_entry *irq_entries;

	struct idxd_dma_dev *idxd_dma;
	struct workqueue_struct *wq;
	struct work_struct work;
};

struct idxd_desc {
	union {
		struct dsa_hw_desc *hw;
		struct iax_hw_desc *iax_hw;
	};
	dma_addr_t desc_dma;
	union {
		struct dsa_completion_record *completion;
		struct iax_completion_record *iax_completion;
	};
	dma_addr_t compl_dma;
	struct dma_async_tx_descriptor txd;
	struct llist_node llnode;
	struct list_head list;
	int id;
	int cpu;
	struct idxd_wq *wq;
};

static void (*klpe_idxd_free_desc)(struct idxd_wq *wq, struct idxd_desc *desc);

static void (*klpe_idxd_dma_complete_txd)(struct idxd_desc *desc,
			   enum idxd_complete_type comp_type);

/* klp-ccp: from drivers/dma/idxd/irq.c */
enum irq_work_type {
	IRQ_WORK_NORMAL = 0,
	IRQ_WORK_PROCESS_FAULT,
};

static inline bool match_fault(struct idxd_desc *desc, u64 fault_addr)
{
	/*
	 * Completion address can be bad as well. Check fault address match for descriptor
	 * and completion address.
	 */
	if ((u64)desc->hw == fault_addr || (u64)desc->completion == fault_addr) {
		struct idxd_device *idxd = desc->wq->idxd;
		struct device *dev = &idxd->pdev->dev;

		dev_warn(dev, "desc with fault address: %#llx\n", fault_addr);
		return true;
	}

	return false;
}

static inline void klpr_complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason)
{
	(*klpe_idxd_dma_complete_txd)(desc, reason);
	(*klpe_idxd_free_desc)(desc->wq, desc);
}

int klpp_irq_process_work_list(struct idxd_irq_entry *irq_entry,
				 enum irq_work_type wtype,
				 int *processed, u64 data)
{
	int queued = 0;
	unsigned long flags;
	LIST_HEAD(flist);
	struct idxd_desc *desc, *n;
	enum idxd_complete_type reason;

	*processed = 0;
	if (wtype == IRQ_WORK_NORMAL)
		reason = IDXD_COMPLETE_NORMAL;
	else
		reason = IDXD_COMPLETE_DEV_FAIL;

	/*
	 * This lock protects list corruption from access of list outside of the irq handler
	 * thread.
	 */
	spin_lock_irqsave(&irq_entry->list_lock, flags);
	if (list_empty(&irq_entry->work_list)) {
		spin_unlock_irqrestore(&irq_entry->list_lock, flags);
		return 0;
	}

	list_for_each_entry_safe(desc, n, &irq_entry->work_list, list) {
		if (desc->completion->status) {
			list_del(&desc->list);
			(*processed)++;
			list_add_tail(&desc->list, &flist);
		} else {
			queued++;
		}
	}

	spin_unlock_irqrestore(&irq_entry->list_lock, flags);

	list_for_each_entry_safe(desc, n, &flist, list) {
		list_del(&desc->list);
		if ((desc->completion->status & DSA_COMP_STATUS_MASK) != DSA_COMP_SUCCESS)
			match_fault(desc, data);
		klpr_complete_desc(desc, reason);
	}

	return queued;
}

#include "livepatch_bsc1228585.h"

#include <linux/kernel.h>
#include <linux/module.h>
#include "../kallsyms_relocs.h"

#define LP_MODULE "idxd"

static struct klp_kallsyms_reloc klp_funcs[] = {
	{ "idxd_dma_complete_txd", (void *)&klpe_idxd_dma_complete_txd, "idxd"},
	{ "idxd_free_desc", (void *)&klpe_idxd_free_desc, "idxd" },
};

static int module_notify(struct notifier_block *nb,
			unsigned long action, void *data)
{
	struct module *mod = data;
	int ret;

	if (action != MODULE_STATE_COMING || strcmp(mod->name, LP_MODULE))
		return 0;
	mutex_lock(&module_mutex);
	ret = __klp_resolve_kallsyms_relocs(klp_funcs, ARRAY_SIZE(klp_funcs));
	mutex_unlock(&module_mutex);

	WARN(ret, "%s: delayed kallsyms lookup failed. System is broken and can crash.\n",
		__func__);

	return ret;
}

static struct notifier_block module_nb = {
	.notifier_call = module_notify,
	.priority = INT_MIN+1,
};

int livepatch_bsc1228585_init(void)
{
	int ret;

	mutex_lock(&module_mutex);
	if (find_module(LP_MODULE)) {
		ret = __klp_resolve_kallsyms_relocs(klp_funcs,
						    ARRAY_SIZE(klp_funcs));
		if (ret)
			goto out;
	}

	ret = register_module_notifier(&module_nb);
out:
	mutex_unlock(&module_mutex);
	return ret;
}

void livepatch_bsc1228585_cleanup(void)
{
	unregister_module_notifier(&module_nb);
}

#endif /* IS_ENABLED(CONFIG_INTEL_IDXD) */
