/*
 * livepatch_bsc1235815
 *
 * Fix for CVE-2024-57849, bsc#1235815
 *
 *  Copyright (c) 2025 SUSE
 *  Author: Vincenzo Mezzela <vincenzo.mezzela@suse.com>
 *
 *  Based on the original Linux kernel code. Other copyrights apply.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */

#if defined(CONFIG_S390)

#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/perf_event.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/moduleparam.h>
#include <asm/cpu_mf.h>
#include <asm/irq.h>
#include <asm/debug.h>
#include <asm/timex.h>
#include <asm-generic/io.h>

struct sf_buffer {
	unsigned long	 *sdbt;	    /* Sample-data-block-table origin */
	/* buffer characteristics (required for buffer increments) */
	unsigned long  num_sdb;	    /* Number of sample-data-blocks */
	unsigned long num_sdbt;	    /* Number of sample-data-block-tables */
	unsigned long	 *tail;	    /* last sample-data-block-table */
};

struct cpu_hw_sf {
	/* CPU-measurement sampling information block */
	struct hws_qsi_info_block qsi;
	/* CPU-measurement sampling control block */
	struct hws_lsctl_request_block lsctl;
	struct sf_buffer sfb;	    /* Sampling buffer */
	unsigned int flags;	    /* Status flags */
	struct perf_event *event;   /* Scheduled perf event */
	struct perf_output_handle handle; /* AUX buffer output handle */
};

extern struct cpu_hw_sf __percpu cpu_hw_sf;
extern void perf_pmu_disable(struct pmu *pmu);
extern void perf_pmu_enable(struct pmu *pmu);
extern int perf_push_sample(struct perf_event *event, struct hws_basic_entry *basic);
extern debug_info_t *sfdbg;

static inline unsigned long *get_next_sdbt(unsigned long *s)
{
	return phys_to_virt(*s & ~0x1UL);
}

static inline int is_link_entry(unsigned long *s)
{
	return *s & 0x1UL ? 1 : 0;
}

static inline struct hws_trailer_entry *trailer_entry_ptr(unsigned long v)
{
	void *ret;

	ret = (void *)v;
	ret += PAGE_SIZE;
	ret -= sizeof(struct hws_trailer_entry);

	return ret;
}


static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
{
	/* TOD in STCKE format */
	if (te->header.t)
		return *((unsigned long long *)&te->timestamp[1]);

	/* TOD in STCK format */
	return *((unsigned long long *)&te->timestamp[0]);
}


static void perf_event_count_update(struct perf_event *event, u64 count)
{
	local64_add(count, &event->count);
}

static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
			       unsigned long long *overflow)
{
	struct hws_trailer_entry *te;
	struct hws_basic_entry *sample;

	te = trailer_entry_ptr((unsigned long)sdbt);
	sample = (struct hws_basic_entry *)sdbt;
	while ((unsigned long *) sample < (unsigned long *) te) {
		/* Check for an empty sample */
		if (!sample->def || sample->LS)
			break;

		/* Update perf event period */
		perf_event_count_update(event, SAMPL_RATE(&event->hw));

		/* Check whether sample is valid */
		if (sample->def == 0x0001) {
			/* If an event overflow occurred, the PMU is stopped to
			 * throttle event delivery.  Remaining sample data is
			 * discarded.
			 */
			if (!*overflow) {
				/* Check whether sample is consistent */
				if (sample->I == 0 && sample->W == 0) {
					/* Deliver sample data to perf */
					*overflow = perf_push_sample(event,
								     sample);
				}
			} else
				/* Count discarded samples */
				*overflow += 1;
		} else {
			debug_sprintf_event(sfdbg, 4,
					    "%s: Found unknown"
					    " sampling data entry: te->f %i"
					    " basic.def %#4x (%p)\n", __func__,
					    te->header.f, sample->def, sample);
			/* Sample slot is not yet written or other record.
			 *
			 * This condition can occur if the buffer was reused
			 * from a combined basic- and diagnostic-sampling.
			 * If only basic-sampling is then active, entries are
			 * written into the larger diagnostic entries.
			 * This is typically the case for sample-data-blocks
			 * that are not full.  Stop processing if the first
			 * invalid format was detected.
			 */
			if (!te->header.f)
				break;
		}

		/* Reset sample slot and advance to next sample */
		sample->def = 0;
		sample++;
	}
}

static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new)
{
	asm volatile(
		"	cdsg	%[old],%[new],%[ptr]\n"
		: [old] "+d" (old), [ptr] "+QS" (*ptr)
		: [new] "d" (new)
		: "memory", "cc");
	return old;
}

static void hw_perf_event_update(struct perf_event *event, int flush_all)
{
	unsigned long long event_overflow, sampl_overflow, num_sdb;
	union hws_trailer_header old, prev, new;
	struct hw_perf_event *hwc = &event->hw;
	struct hws_trailer_entry *te;
	unsigned long *sdbt, sdb;
	int done;

	/*
	 * AUX buffer is used when in diagnostic sampling mode.
	 * No perf events/samples are created.
	 */
	if (SAMPL_DIAG_MODE(&event->hw))
		return;

	sdbt = (unsigned long *) TEAR_REG(hwc);
	done = event_overflow = sampl_overflow = num_sdb = 0;
	while (!done) {
		/* Get the trailer entry of the sample-data-block */
		sdb = (unsigned long)phys_to_virt(*sdbt);
		te = trailer_entry_ptr(sdb);

		/* Leave loop if no more work to do (block full indicator) */
		if (!te->header.f) {
			done = 1;
			if (!flush_all)
				break;
		}

		/* Check the sample overflow count */
		if (te->header.overflow)
			/* Account sample overflows and, if a particular limit
			 * is reached, extend the sampling buffer.
			 * For details, see sfb_account_overflows().
			 */
			sampl_overflow += te->header.overflow;

		/* Timestamps are valid for full sample-data-blocks only */
		debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx/%#lx "
				    "overflow %llu timestamp %#llx\n",
				    __func__, sdb, (unsigned long)sdbt,
				    te->header.overflow,
				    (te->header.f) ? trailer_timestamp(te) : 0ULL);

		/* Collect all samples from a single sample-data-block and
		 * flag if an (perf) event overflow happened.  If so, the PMU
		 * is stopped and remaining samples will be discarded.
		 */
		hw_collect_samples(event, (unsigned long *)sdb, &event_overflow);
		num_sdb++;

		/* Reset trailer (using compare-double-and-swap) */
		prev.val = READ_ONCE_ALIGNED_128(te->header.val);
		do {
			old.val = prev.val;
			new.val = prev.val;
			new.f = 0;
			new.a = 1;
			new.overflow = 0;
			prev.val = __cdsg(&te->header.val, old.val, new.val);
		} while (prev.val != old.val);

		/* Advance to next sample-data-block */
		sdbt++;
		if (is_link_entry(sdbt))
			sdbt = get_next_sdbt(sdbt);

		/* Update event hardware registers */
		TEAR_REG(hwc) = (unsigned long) sdbt;

		/* Stop processing sample-data if all samples of the current
		 * sample-data-block were flushed even if it was not full.
		 */
		if (flush_all && done)
			break;
	}

	/* Account sample overflows in the event hardware structure */
	if (sampl_overflow)
		OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
						 sampl_overflow, 1 + num_sdb);

	/* Perf_event_overflow() and perf_event_account_interrupt() limit
	 * the interrupt rate to an upper limit. Roughly 1000 samples per
	 * task tick.
	 * Hitting this limit results in a large number
	 * of throttled REF_REPORT_THROTTLE entries and the samples
	 * are dropped.
	 * Slightly increase the interval to avoid hitting this limit.
	 */
	if (event_overflow) {
		SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
		debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n",
				    __func__,
				    DIV_ROUND_UP(SAMPL_RATE(hwc), 10));
	}

	if (sampl_overflow || event_overflow)
		debug_sprintf_event(sfdbg, 4, "%s: "
				    "overflows: sample %llu event %llu"
				    " total %llu num_sdb %llu\n",
				    __func__, sampl_overflow, event_overflow,
				    OVERFLOW_REG(hwc), num_sdb);
}

void klpp_cpumsf_pmu_stop(struct perf_event *event, int flags)
{
	struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);

	if (event->hw.state & PERF_HES_STOPPED)
		return;

	perf_pmu_disable(event->pmu);
	cpuhw->lsctl.cs = 0;
	cpuhw->lsctl.cd = 0;
	event->hw.state |= PERF_HES_STOPPED;

	if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) {
		/* CPU hotplug off removes SDBs. No samples to extract. */
		if (cpuhw->flags & PMU_F_RESERVED)
			hw_perf_event_update(event, 1);
		event->hw.state |= PERF_HES_UPTODATE;
	}
	perf_pmu_enable(event->pmu);
}

#include <linux/livepatch.h>

extern typeof(perf_pmu_disable) perf_pmu_disable
	 KLP_RELOC_SYMBOL(vmlinux, vmlinux, perf_pmu_disable);
extern typeof(perf_pmu_enable) perf_pmu_enable
	 KLP_RELOC_SYMBOL(vmlinux, vmlinux, perf_pmu_enable);
extern typeof(perf_push_sample) perf_push_sample
	 KLP_RELOC_SYMBOL(vmlinux, vmlinux, perf_push_sample);
extern typeof(cpu_hw_sf) cpu_hw_sf
	 KLP_RELOC_SYMBOL(vmlinux, vmlinux, cpu_hw_sf);
extern typeof(sfdbg) sfdbg
	 KLP_RELOC_SYMBOL(vmlinux, vmlinux, sfdbg);

#endif
