/*
 * livepatch_bsc1189418
 *
 * Fix for CVE-2021-3656, bsc#1189418
 *
 * Fix for CVE-2021-3656, bsc#1189418 and for CVE-2021-3653, bsc#1189420
 *
 *  Upstream commits:
 *  c7dfa4009965 ("KVM: nSVM: always intercept VMLOAD/VMSAVE when nested
 *                 (CVE-2021-3656)")
 *  0f923e07124d ("KVM: nSVM: avoid picking up unsupported bits from L2 in
 *                 int_ctl (CVE-2021-3653)")
 *
 *  SLE12-SP3 commits:
 *  0f83408195dd943fde0b6a63e8c962af157a3627
 *
 *  SLE12-SP4, SLE15 and SLE15-SP1 commits:
 *  f4931e99860c4a52d4f57ff6ce7f83419638c565
 *  9c35f8d4e13594b6cf0357e3d5aa5f4dbdf379b7
 *
 *  SLE12-SP4, SLE15 and SLE15-SP1 commits:
 *  f4931e99860c4a52d4f57ff6ce7f83419638c565
 *  9c35f8d4e13594b6cf0357e3d5aa5f4dbdf379b7
 *  a1c39b14d6b74df84321ae691c82b622f343e68a
 *
 *  SLE15-SP2 and -SP3 commits:
 *  89ee512871d4be84dbcb3776e57fab48c6a9d489
 *  790261594034490f018538690ab890fad71a62f4
 *
 *
 *  Copyright (c) 2021 SUSE
 *  Author: Nicolai Stange <nstange@suse.de>
 *
 *  Based on the original Linux kernel code. Other copyrights apply.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */

#if IS_ENABLED(CONFIG_KVM_AMD)

#if !IS_MODULE(CONFIG_KVM_AMD)
#error "Live patch supports only CONFIG_KVM_AMD=m"
#endif

/* klp-ccp: from arch/x86/kvm/svm.c */
#define pr_fmt(fmt) "SVM: " fmt



#include <linux/kernel.h>
#include <linux/tracepoint.h>

/* from include/linux/tracepoint.h */
#define KLPR___DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
	static struct tracepoint (*klpe___tracepoint_##name);		\
	static inline void klpr_trace_##name(proto)			\
	{								\
		if (unlikely(static_key_enabled(&(*klpe___tracepoint_##name).key))) \
			__DO_TRACE(&(*klpe___tracepoint_##name),	\
				TP_PROTO(data_proto),			\
				TP_ARGS(data_args),			\
				TP_CONDITION(cond), 0);		\
		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
			rcu_read_lock_sched_notrace();			\
			rcu_dereference_sched((*klpe___tracepoint_##name).funcs); \
			rcu_read_unlock_sched_notrace();		\
		}							\
	}								\

#define KLPR_DECLARE_TRACE(name, proto, args)				\
	KLPR___DECLARE_TRACE(name, PARAMS(proto), PARAMS(args),		\
			cpu_online(raw_smp_processor_id()),		\
			PARAMS(void *__data, proto),			\
			PARAMS(__data, args))

#define KLPR_TRACE_EVENT(name, proto, args)	\
	KLPR_DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))



/* klp-ccp: from arch/x86/kvm/svm.c */
#define pr_fmt(fmt) "SVM: " fmt

#include <linux/kvm_host.h>

/* klp-ccp: from arch/x86/include/asm/kvm_host.h */
static struct kvm_x86_ops *(*klpe_kvm_x86_ops);

static void (*klpe_kvm_mmu_reset_context)(struct kvm_vcpu *vcpu);

static int (*klpe_kvm_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);

static int (*klpe_kvm_set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long val);
static int (*klpe_kvm_get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *val);

static unsigned long (*klpe_kvm_get_rflags)(struct kvm_vcpu *vcpu);
static void (*klpe_kvm_set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);

static bool (*klpe_kvm_require_dr)(struct kvm_vcpu *vcpu, int dr);

static int (*klpe_kvm_skip_emulated_instruction)(struct kvm_vcpu *vcpu);

/* klp-ccp: from include/linux/kvm_host.h */
static void (*klpe_kvm_vcpu_unmap)(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);

/* klp-ccp: from arch/x86/kvm/irq.h */
#include <linux/mm_types.h>
#include <linux/hrtimer.h>
#include <linux/kvm_host.h>
#include <linux/spinlock.h>

/* klp-ccp: from include/kvm/iodev.h */
#define __KVM_IODEV_H__

/* klp-ccp: from arch/x86/kvm/ioapic.h */
#include <linux/kvm_host.h>
#include <kvm/iodev.h>
/* klp-ccp: from arch/x86/kvm/lapic.h */
#include <kvm/iodev.h>
#include <linux/kvm_host.h>

static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu)
{
	return vcpu->arch.apic && vcpu->arch.apicv_active;
}

/* klp-ccp: from arch/x86/kvm/mmu.h */
#include <linux/kvm_host.h>
/* klp-ccp: from arch/x86/kvm/kvm_cache_regs.h */
#include <linux/kvm_host.h>

#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS

/* klp-ccp: from arch/x86/kvm/kvm_cache_regs.h */
static void kvm_rax_write(struct kvm_vcpu *vcpu, unsigned long val)
{
	vcpu->arch.regs[VCPU_REGS_RAX] = val;
}

/* klp-ccp: from arch/x86/kvm/kvm_cache_regs.h */
static inline unsigned long klpr_kvm_register_read(struct kvm_vcpu *vcpu,
					      enum kvm_reg reg)
{
	if (!test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail))
		(*klpe_kvm_x86_ops)->cache_reg(vcpu, reg);

	return vcpu->arch.regs[reg];
}

static inline void kvm_register_write(struct kvm_vcpu *vcpu,
				      enum kvm_reg reg,
				      unsigned long val)
{
	vcpu->arch.regs[reg] = val;
	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
	__set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
}

static inline unsigned long klpr_kvm_rip_read(struct kvm_vcpu *vcpu)
{
	return klpr_kvm_register_read(vcpu, VCPU_REGS_RIP);
}

static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val)
{
	kvm_register_write(vcpu, VCPU_REGS_RIP, val);
}

static inline void kvm_rsp_write(struct kvm_vcpu *vcpu, unsigned long val)
{
	kvm_register_write(vcpu, VCPU_REGS_RSP, val);
}

static inline ulong klpr_kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
{
	ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS;
	if (tmask & vcpu->arch.cr0_guest_owned_bits)
		(*klpe_kvm_x86_ops)->decache_cr0_guest_bits(vcpu);
	return vcpu->arch.cr0 & mask;
}

static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
{
	vcpu->arch.hflags |= HF_GUEST_MASK;
}

static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
{
	return vcpu->arch.hflags & HF_GUEST_MASK;
}

/* klp-ccp: from arch/x86/kvm/mmu.h */
static void
(*klpe_reset_shadow_zero_bits_mask)(struct kvm_vcpu *vcpu, struct kvm_mmu *context);

static void (*klpe_kvm_init_shadow_mmu)(struct kvm_vcpu *vcpu);

/* klp-ccp: from arch/x86/kvm/x86.h */
#include <linux/kvm_host.h>

static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
{
	return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
}

static inline int klpr_is_paging(struct kvm_vcpu *vcpu)
{
	return likely(klpr_kvm_read_cr0_bits(vcpu, X86_CR0_PG));
}

static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk)
{
	return !(kvm->arch.disabled_quirks & quirk);
}

static bool (*klpe_enable_vmware_backdoor);

static inline bool kvm_mwait_in_guest(struct kvm *kvm)
{
	return kvm->arch.mwait_in_guest;
}

static inline bool kvm_hlt_in_guest(struct kvm *kvm)
{
	return kvm->arch.hlt_in_guest;
}

/* klp-ccp: from arch/x86/kvm/cpuid.h */
#include <asm/processor.h>
/* klp-ccp: from arch/x86/kvm/pmu.h */
#include <linux/nospec.h>
/* klp-ccp: from arch/x86/kvm/svm.c */
#include <linux/module.h>
#include <linux/mod_devicetable.h>
#include <linux/kernel.h>
#include <linux/vmalloc.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/frame.h>
#include <linux/file.h>
#include <asm/apic.h>
#include <asm/perf_event.h>
#include <asm/tlbflush.h>
#include <asm/desc.h>
#include <asm/kvm_para.h>
#include <asm/virtext.h>

/* klp-ccp: from arch/x86/kvm/trace.h */
#include <linux/tracepoint.h>
#include <asm/vmx.h>
#include <asm/svm.h>
#include <asm/clocksource.h>
#include <asm/pvclock-abi.h>

KLPR_TRACE_EVENT(kvm_nested_intr_vmexit,
	    TP_PROTO(__u64 rip),
	    TP_ARGS(rip)
);

/* klp-ccp: from arch/x86/kvm/svm.c */
#define SEG_TYPE_LDT 2
#define SEG_TYPE_BUSY_TSS16 3

#define AVIC_HPA_MASK	~((0xFFFULL << 52) | 0xFFF)

#define AVIC_MAX_PHYSICAL_ID_COUNT	255

static const u32 host_save_user_msrs[10];

#define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)

struct kvm_sev_info {
	bool active;		/* SEV enabled guest */
	unsigned int asid;	/* ASID used for this guest */
	unsigned int handle;	/* SEV firmware handle */
	int fd;			/* SEV device fd */
	unsigned long pages_locked; /* Number of pages locked */
	struct list_head regions_list;  /* List of registered regions */
};

struct kvm_svm {
	struct kvm kvm;

	/* Struct members for AVIC */
	u32 avic_vm_id;
	struct page *avic_logical_id_table_page;
	struct page *avic_physical_id_table_page;
	struct hlist_node hnode;

	struct kvm_sev_info sev_info;
};

struct nested_state {
	struct vmcb *hsave;
	u64 hsave_msr;
	u64 vm_cr_msr;
	u64 vmcb;

	/* These are the merged vectors */
	u32 *msrpm;

	/* gpa pointers to the real vectors */
	u64 vmcb_msrpm;
	u64 vmcb_iopm;

	/* A VMEXIT is required but not yet emulated */
	bool exit_required;

	/* cache for intercepts of the guest */
	u32 intercept_cr;
	u32 intercept_dr;
	u32 intercept_exceptions;
	u64 intercept;

	/* Nested Paging related state */
	u64 nested_cr3;
};

struct vcpu_svm {
	struct kvm_vcpu vcpu;
	struct vmcb *vmcb;
	unsigned long vmcb_pa;
	struct svm_cpu_data *svm_data;
	uint64_t asid_generation;
	uint64_t sysenter_esp;
	uint64_t sysenter_eip;
	uint64_t tsc_aux;

	u64 msr_decfg;

	u64 next_rip;

	u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
	struct {
		u16 fs;
		u16 gs;
		u16 ldt;
		u64 gs_base;
	} host;

	u64 spec_ctrl;
	/*
	 * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
	 * translated into the appropriate L2_CFG bits on the host to
	 * perform speculative control.
	 */
	u64 virt_spec_ctrl;

	u32 *msrpm;

	ulong nmi_iret_rip;

	struct nested_state nested;

	bool nmi_singlestep;
	u64 nmi_singlestep_guest_rflags;

	unsigned int3_injected;
	unsigned long int3_rip;

	/* cached guest cpuid flags for faster access */
	bool nrips_enabled	: 1;

	u32 ldr_reg;
	u32 dfr_reg;
	struct page *avic_backing_page;
	u64 *avic_physical_id_cache;
	bool avic_is_running;

	/*
	 * Per-vcpu list of struct amd_svm_iommu_ir:
	 * This is used mainly to store interrupt remapping information used
	 * when update the vcpu affinity. This avoids the need to scan for
	 * IRTE and try to match ga_tag in the IOMMU driver.
	 */
	struct list_head ir_list;
	spinlock_t ir_list_lock;

	/* which host CPU was used for running this vcpu */
	unsigned int last_cpu;
};

#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
static bool (*klpe_npt_enabled);
#else
#error "klp-ccp: non-taken branch"
#endif

static unsigned short (*klpe_pause_filter_thresh);

static unsigned short (*klpe_pause_filter_count);

static int (*klpe_vls);

static int (*klpe_vgif);

void klpp_svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
static int (*klpe_svm_set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
static void (*klpe_svm_flush_tlb)(struct kvm_vcpu *vcpu, bool invalidate_gpa);

enum {
	VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
			    pause filter count */
	VMCB_PERM_MAP,   /* IOPM Base and MSRPM Base */
	VMCB_ASID,	 /* ASID */
	VMCB_INTR,	 /* int_ctl, int_vector */
	VMCB_NPT,        /* npt_en, nCR3, gPAT */
	VMCB_CR,	 /* CR0, CR3, CR4, EFER */
	VMCB_DR,         /* DR6, DR7 */
	VMCB_DT,         /* GDT, IDT */
	VMCB_SEG,        /* CS, DS, SS, ES, CPL */
	VMCB_CR2,        /* CR2 only */
	VMCB_LBR,        /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
	VMCB_AVIC,       /* AVIC APIC_BAR, AVIC APIC_BACKING_PAGE,
			  * AVIC PHYSICAL_TABLE pointer,
			  * AVIC LOGICAL_TABLE pointer
			  */
	VMCB_DIRTY_MAX,
};

/* klp-ccp: from arch/x86/kvm/svm.c */
static inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
{
	return container_of(kvm, struct kvm_svm, kvm);
}

static inline bool sev_guest(struct kvm *kvm)
{
#ifdef CONFIG_KVM_AMD_SEV
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;

	return sev->active;
#else
#error "klp-ccp: non-taken branch"
#endif
}

static inline void mark_all_dirty(struct vmcb *vmcb)
{
	vmcb->control.clean = 0;
}

static inline void mark_dirty(struct vmcb *vmcb, int bit)
{
	vmcb->control.clean &= ~(1 << bit);
}

static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
{
	return container_of(vcpu, struct vcpu_svm, vcpu);
}

static void klpp_recalc_intercepts(struct vcpu_svm *svm)
{
	struct vmcb_control_area *c, *h;
	struct nested_state *g;

	mark_dirty(svm->vmcb, VMCB_INTERCEPTS);

	if (!is_guest_mode(&svm->vcpu))
		return;

	c = &svm->vmcb->control;
	h = &svm->nested.hsave->control;
	g = &svm->nested;

	c->intercept_cr = h->intercept_cr | g->intercept_cr;
	c->intercept_dr = h->intercept_dr | g->intercept_dr;
	c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
	c->intercept = h->intercept | g->intercept;

	/*
	 * Fix CVE-2021-3656
	 *  +2 lines
	 */
	c->intercept |= (1ULL << INTERCEPT_VMLOAD);
	c->intercept |= (1ULL << INTERCEPT_VMSAVE);

	/*
	 * Fix for CVE-2021-3653
	 *  +2 lines
	 *
	 * Livepatch specific deviation from upstream to also
	 * mitigate against another issue not directly related to the
	 * CVE, but still fixed silently by the stable backports of
	 * upstream commit 0f923e07124d ("KVM: nSVM: avoid picking up
	 * unsupported bits from L2 in int_ctl (CVE-2021-3653)") by
	 * means of folding in commit 91b7130cb660 ("KVM: SVM:
	 * preserve VGIF across VMCB switch"). Rationale:
	 * VGIF is not virtualized and not announced as a capability
	 * to L1. Hence, the vmcb12's ->int_ctl is expected to have
	 * V_GIF_*_MASK cleared. The vmcb02 will also have
	 * V_GIF_*_MASK clear, independent of whether
	 * V_GIF_ENABLE_MASK is set in vmcb01,
	 * c.f. enter_svm_guest_mode(). In conclusion, VGIF will not
	 * be active when L2 is being run and if stgi/clgi was not
	 * intercepted, then a clgi from L2 would operate directly on
	 * the HW GIF bit. This would enable L2 to block external
	 * IRQs, NMIs and the like for forever and thus hog the CPU
	 * and cause a DOS. We (L0) don't enable stgi/clgi intercepts
	 * if vgif is enabled, i.e. if V_GIF_ENABLE_MASK has been set
	 * for vmcb01. Any real-world L1 hypervisor *is* expected to
	 * have stgi/clgi intercepts enabled, but might fail to do so
	 * by mistake or with malicious intent. In this case,
	 * stgi/clgi intercepts are forced on below. If the L1 does
	 * not have stgi/clgi intercepts enabled, then any stgi/clgi
	 * event from L2 would cause a NESTED_EXIT_HOST return of
	 * handle_exit()->nested_svm_exit_handled() and handle_exit()
	 * would then proceed to invoke stgi_interception() resp.
	 * clgi_interception(). These would then clobber L0's view of
	 * L1's GIF status, but it's still better to mess up a
	 * broken/malicious L1 rather than a well-behaving L0.
	 */
	c->intercept |= (1ULL << INTERCEPT_STGI);
	c->intercept |= (1ULL << INTERCEPT_CLGI);
}

static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
{
	if (is_guest_mode(&svm->vcpu))
		return svm->nested.hsave;
	else
		return svm->vmcb;
}

static inline void klpp_set_cr_intercept(struct vcpu_svm *svm, int bit)
{
	struct vmcb *vmcb = get_host_vmcb(svm);

	vmcb->control.intercept_cr |= (1U << bit);

	klpp_recalc_intercepts(svm);
}

static inline void klpp_clr_cr_intercept(struct vcpu_svm *svm, int bit)
{
	struct vmcb *vmcb = get_host_vmcb(svm);

	vmcb->control.intercept_cr &= ~(1U << bit);

	klpp_recalc_intercepts(svm);
}

static inline void klpp_set_dr_intercepts(struct vcpu_svm *svm)
{
	struct vmcb *vmcb = get_host_vmcb(svm);

	vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
		| (1 << INTERCEPT_DR1_READ)
		| (1 << INTERCEPT_DR2_READ)
		| (1 << INTERCEPT_DR3_READ)
		| (1 << INTERCEPT_DR4_READ)
		| (1 << INTERCEPT_DR5_READ)
		| (1 << INTERCEPT_DR6_READ)
		| (1 << INTERCEPT_DR7_READ)
		| (1 << INTERCEPT_DR0_WRITE)
		| (1 << INTERCEPT_DR1_WRITE)
		| (1 << INTERCEPT_DR2_WRITE)
		| (1 << INTERCEPT_DR3_WRITE)
		| (1 << INTERCEPT_DR4_WRITE)
		| (1 << INTERCEPT_DR5_WRITE)
		| (1 << INTERCEPT_DR6_WRITE)
		| (1 << INTERCEPT_DR7_WRITE);

	klpp_recalc_intercepts(svm);
}

static inline void klpp_clr_dr_intercepts(struct vcpu_svm *svm)
{
	struct vmcb *vmcb = get_host_vmcb(svm);

	vmcb->control.intercept_dr = 0;

	klpp_recalc_intercepts(svm);
}

static inline void klpp_set_exception_intercept(struct vcpu_svm *svm, int bit)
{
	struct vmcb *vmcb = get_host_vmcb(svm);

	vmcb->control.intercept_exceptions |= (1U << bit);

	klpp_recalc_intercepts(svm);
}

static inline void klpp_clr_exception_intercept(struct vcpu_svm *svm, int bit)
{
	struct vmcb *vmcb = get_host_vmcb(svm);

	vmcb->control.intercept_exceptions &= ~(1U << bit);

	klpp_recalc_intercepts(svm);
}

static inline void klpp_set_intercept(struct vcpu_svm *svm, int bit)
{
	struct vmcb *vmcb = get_host_vmcb(svm);

	vmcb->control.intercept |= (1ULL << bit);

	klpp_recalc_intercepts(svm);
}

static inline void klpp_clr_intercept(struct vcpu_svm *svm, int bit)
{
	struct vmcb *vmcb = get_host_vmcb(svm);

	vmcb->control.intercept &= ~(1ULL << bit);

	klpp_recalc_intercepts(svm);
}

static inline bool vgif_enabled(struct vcpu_svm *svm)
{
	return !!(svm->vmcb->control.int_ctl & V_GIF_ENABLE_MASK);
}

static inline void enable_gif(struct vcpu_svm *svm)
{
	if (vgif_enabled(svm))
		svm->vmcb->control.int_ctl |= V_GIF_MASK;
	else
		svm->vcpu.arch.hflags |= HF_GIF_MASK;
}

static inline void disable_gif(struct vcpu_svm *svm)
{
	if (vgif_enabled(svm))
		svm->vmcb->control.int_ctl &= ~V_GIF_MASK;
	else
		svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
}

static inline bool gif_set(struct vcpu_svm *svm)
{
	if (vgif_enabled(svm))
		return !!(svm->vmcb->control.int_ctl & V_GIF_MASK);
	else
		return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
}

static unsigned long (*klpe_iopm_base);

static int (*klpe_get_npt_level)(struct kvm_vcpu *vcpu);

static void (*klpe_svm_set_efer)(struct kvm_vcpu *vcpu, u64 efer);

static void init_seg(struct vmcb_seg *seg)
{
	seg->selector = 0;
	seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
		      SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
	seg->limit = 0xffff;
	seg->base = 0;
}

static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
{
	seg->selector = 0;
	seg->attrib = SVM_SELECTOR_P_MASK | type;
	seg->limit = 0xffff;
	seg->base = 0;
}

static void avic_init_vmcb(struct vcpu_svm *svm)
{
	struct vmcb *vmcb = svm->vmcb;
	struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
	phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
	phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
	phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));

	vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
	vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
	vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
	vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
	vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
}

void klpp_init_vmcb(struct vcpu_svm *svm)
{
	struct vmcb_control_area *control = &svm->vmcb->control;
	struct vmcb_save_area *save = &svm->vmcb->save;

	svm->vcpu.arch.hflags = 0;

	klpp_set_cr_intercept(svm, INTERCEPT_CR0_READ);
	klpp_set_cr_intercept(svm, INTERCEPT_CR3_READ);
	klpp_set_cr_intercept(svm, INTERCEPT_CR4_READ);
	klpp_set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
	klpp_set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
	klpp_set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
	if (!kvm_vcpu_apicv_active(&svm->vcpu))
		klpp_set_cr_intercept(svm, INTERCEPT_CR8_WRITE);

	klpp_set_dr_intercepts(svm);

	klpp_set_exception_intercept(svm, PF_VECTOR);
	klpp_set_exception_intercept(svm, UD_VECTOR);
	klpp_set_exception_intercept(svm, MC_VECTOR);
	klpp_set_exception_intercept(svm, AC_VECTOR);
	klpp_set_exception_intercept(svm, DB_VECTOR);
	/*
	 * Guest access to VMware backdoor ports could legitimately
	 * trigger #GP because of TSS I/O permission bitmap.
	 * We intercept those #GP and allow access to them anyway
	 * as VMware does.
	 */
	if ((*klpe_enable_vmware_backdoor))
		klpp_set_exception_intercept(svm, GP_VECTOR);

	klpp_set_intercept(svm, INTERCEPT_INTR);
	klpp_set_intercept(svm, INTERCEPT_NMI);
	klpp_set_intercept(svm, INTERCEPT_SMI);
	klpp_set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
	klpp_set_intercept(svm, INTERCEPT_RDPMC);
	klpp_set_intercept(svm, INTERCEPT_CPUID);
	klpp_set_intercept(svm, INTERCEPT_INVD);
	klpp_set_intercept(svm, INTERCEPT_INVLPG);
	klpp_set_intercept(svm, INTERCEPT_INVLPGA);
	klpp_set_intercept(svm, INTERCEPT_IOIO_PROT);
	klpp_set_intercept(svm, INTERCEPT_MSR_PROT);
	klpp_set_intercept(svm, INTERCEPT_TASK_SWITCH);
	klpp_set_intercept(svm, INTERCEPT_SHUTDOWN);
	klpp_set_intercept(svm, INTERCEPT_VMRUN);
	klpp_set_intercept(svm, INTERCEPT_VMMCALL);
	klpp_set_intercept(svm, INTERCEPT_VMLOAD);
	klpp_set_intercept(svm, INTERCEPT_VMSAVE);
	klpp_set_intercept(svm, INTERCEPT_STGI);
	klpp_set_intercept(svm, INTERCEPT_CLGI);
	klpp_set_intercept(svm, INTERCEPT_SKINIT);
	klpp_set_intercept(svm, INTERCEPT_WBINVD);
	klpp_set_intercept(svm, INTERCEPT_XSETBV);
	klpp_set_intercept(svm, INTERCEPT_RSM);

	if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
		klpp_set_intercept(svm, INTERCEPT_MONITOR);
		klpp_set_intercept(svm, INTERCEPT_MWAIT);
	}

	if (!kvm_hlt_in_guest(svm->vcpu.kvm))
		klpp_set_intercept(svm, INTERCEPT_HLT);

	control->iopm_base_pa = __sme_set((*klpe_iopm_base));
	control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
	control->int_ctl = V_INTR_MASKING_MASK;

	init_seg(&save->es);
	init_seg(&save->ss);
	init_seg(&save->ds);
	init_seg(&save->fs);
	init_seg(&save->gs);

	save->cs.selector = 0xf000;
	save->cs.base = 0xffff0000;
	/* Executable/Readable Code Segment */
	save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
		SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
	save->cs.limit = 0xffff;

	save->gdtr.limit = 0xffff;
	save->idtr.limit = 0xffff;

	init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
	init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);

	(*klpe_svm_set_cr4)(&svm->vcpu, 0);
	(*klpe_svm_set_efer)(&svm->vcpu, 0);
	save->dr6 = 0xffff0ff0;
	(*klpe_kvm_set_rflags)(&svm->vcpu, 2);
	save->rip = 0x0000fff0;
	svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;

	/*
	 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
	 * It also updates the guest-visible cr0 value.
	 */
	klpp_svm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
	(*klpe_kvm_mmu_reset_context)(&svm->vcpu);

	save->cr4 = X86_CR4_PAE;
	/* rdx = ?? */

	if ((*klpe_npt_enabled)) {
		/* Setup VMCB for Nested Paging */
		control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
		klpp_clr_intercept(svm, INTERCEPT_INVLPG);
		klpp_clr_exception_intercept(svm, PF_VECTOR);
		klpp_clr_cr_intercept(svm, INTERCEPT_CR3_READ);
		klpp_clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
		save->g_pat = svm->vcpu.arch.pat;
		save->cr3 = 0;
		save->cr4 = 0;
	}
	svm->asid_generation = 0;

	svm->nested.vmcb = 0;
	svm->vcpu.arch.hflags = 0;

	if ((*klpe_pause_filter_count)) {
		control->pause_filter_count = (*klpe_pause_filter_count);
		if ((*klpe_pause_filter_thresh))
			control->pause_filter_thresh = (*klpe_pause_filter_thresh);
		klpp_set_intercept(svm, INTERCEPT_PAUSE);
	} else {
		klpp_clr_intercept(svm, INTERCEPT_PAUSE);
	}

	if (kvm_vcpu_apicv_active(&svm->vcpu))
		avic_init_vmcb(svm);

	/*
	 * If hardware supports Virtual VMLOAD VMSAVE then enable it
	 * in VMCB and clear intercepts to avoid #VMEXIT.
	 */
	if ((*klpe_vls)) {
		klpp_clr_intercept(svm, INTERCEPT_VMLOAD);
		klpp_clr_intercept(svm, INTERCEPT_VMSAVE);
		svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
	}

	if ((*klpe_vgif)) {
		klpp_clr_intercept(svm, INTERCEPT_STGI);
		klpp_clr_intercept(svm, INTERCEPT_CLGI);
		svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
	}

	if (sev_guest(svm->vcpu.kvm)) {
		svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
		klpp_clr_exception_intercept(svm, UD_VECTOR);
	}

	mark_all_dirty(svm->vmcb);

	enable_gif(svm);

}

static unsigned long (*klpe_svm_get_rflags)(struct kvm_vcpu *vcpu);

static void klpp_svm_set_vintr(struct vcpu_svm *svm)
{
	klpp_set_intercept(svm, INTERCEPT_VINTR);
}

static void klpp_svm_clear_vintr(struct vcpu_svm *svm)
{
	klpp_clr_intercept(svm, INTERCEPT_VINTR);
}

static void klpp_update_cr0_intercept(struct vcpu_svm *svm)
{
	ulong gcr0 = svm->vcpu.arch.cr0;
	u64 *hcr0 = &svm->vmcb->save.cr0;

	*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
		| (gcr0 & SVM_CR0_SELECTIVE_MASK);

	mark_dirty(svm->vmcb, VMCB_CR);

	if (gcr0 == *hcr0) {
		klpp_clr_cr_intercept(svm, INTERCEPT_CR0_READ);
		klpp_clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
	} else {
		klpp_set_cr_intercept(svm, INTERCEPT_CR0_READ);
		klpp_set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
	}
}

void klpp_svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
	struct vcpu_svm *svm = to_svm(vcpu);

#ifdef CONFIG_X86_64
	if (vcpu->arch.efer & EFER_LME) {
		if (!klpr_is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
			vcpu->arch.efer |= EFER_LMA;
			svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
		}

		if (klpr_is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
			vcpu->arch.efer &= ~EFER_LMA;
			svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
		}
	}
#else
#error "klp-ccp: a preceeding branch should have been taken"
#endif
	vcpu->arch.cr0 = cr0;

	if (!(*klpe_npt_enabled))
		cr0 |= X86_CR0_PG | X86_CR0_WP;

	/*
	 * re-enable caching here because the QEMU bios
	 * does not do it - this results in some delay at
	 * reboot
	 */
	if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
		cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
	svm->vmcb->save.cr0 = cr0;
	mark_dirty(svm->vmcb, VMCB_CR);
	klpp_update_cr0_intercept(svm);
}

static int (*klpe_svm_set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);

void klpp_update_bp_intercept(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	klpp_clr_exception_intercept(svm, BP_VECTOR);

	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
			klpp_set_exception_intercept(svm, BP_VECTOR);
	} else
		vcpu->guest_debug = 0;
}

static u64 (*klpe_svm_get_dr6)(struct kvm_vcpu *vcpu);

void klpp_svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	get_debugreg(vcpu->arch.db[0], 0);
	get_debugreg(vcpu->arch.db[1], 1);
	get_debugreg(vcpu->arch.db[2], 2);
	get_debugreg(vcpu->arch.db[3], 3);
	vcpu->arch.dr6 = (*klpe_svm_get_dr6)(vcpu);
	vcpu->arch.dr7 = svm->vmcb->save.dr7;

	vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
	klpp_set_dr_intercepts(svm);
}

static unsigned long (*klpe_nested_svm_get_tdp_cr3)(struct kvm_vcpu *vcpu);

static u64 (*klpe_nested_svm_get_tdp_pdptr)(struct kvm_vcpu *vcpu, int index);

static void (*klpe_nested_svm_set_tdp_cr3)(struct kvm_vcpu *vcpu,
				   unsigned long root);

static void (*klpe_nested_svm_inject_npf_exit)(struct kvm_vcpu *vcpu,
				       struct x86_exception *fault);

static void klpr_nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
{
	WARN_ON(mmu_is_nested(vcpu));

	vcpu->arch.mmu = &vcpu->arch.guest_mmu;
	(*klpe_kvm_init_shadow_mmu)(vcpu);
	vcpu->arch.mmu->set_cr3           = (*klpe_nested_svm_set_tdp_cr3);
	vcpu->arch.mmu->get_cr3           = (*klpe_nested_svm_get_tdp_cr3);
	vcpu->arch.mmu->get_pdptr         = (*klpe_nested_svm_get_tdp_pdptr);
	vcpu->arch.mmu->inject_page_fault = (*klpe_nested_svm_inject_npf_exit);
	vcpu->arch.mmu->shadow_root_level = (*klpe_get_npt_level)(vcpu);
	(*klpe_reset_shadow_zero_bits_mask)(vcpu, vcpu->arch.mmu);
	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
}

static int (*klpe_nested_svm_check_permissions)(struct vcpu_svm *svm);

static inline bool klpr_nested_svm_intr(struct vcpu_svm *svm)
{
	if (!is_guest_mode(&svm->vcpu))
		return true;

	if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
		return true;

	if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
		return false;

	/*
	 * if vmexit was already requested (by intercepted exception
	 * for instance) do not overwrite it with "external interrupt"
	 * vmexit.
	 */
	if (svm->nested.exit_required)
		return false;

	svm->vmcb->control.exit_code   = SVM_EXIT_INTR;
	svm->vmcb->control.exit_info_1 = 0;
	svm->vmcb->control.exit_info_2 = 0;

	if (svm->nested.intercept & 1ULL) {
		/*
		 * The #vmexit can't be emulated here directly because this
		 * code path runs with irqs and preemption disabled. A
		 * #vmexit emulation might sleep. Only signal request for
		 * the #vmexit here.
		 */
		svm->nested.exit_required = true;
		klpr_trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
		return false;
	}

	return true;
}

/*
 * Fix CVE-2021-3653
 *  +1 line
 */
#define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK)

void klpp_enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
				 struct vmcb *nested_vmcb, struct kvm_host_map *map)
{
	if ((*klpe_kvm_get_rflags)(&svm->vcpu) & X86_EFLAGS_IF)
		svm->vcpu.arch.hflags |= HF_HIF_MASK;
	else
		svm->vcpu.arch.hflags &= ~HF_HIF_MASK;

	if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) {
		svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
		klpr_nested_svm_init_mmu_context(&svm->vcpu);
	}

	/* Load the nested guest state */
	svm->vmcb->save.es = nested_vmcb->save.es;
	svm->vmcb->save.cs = nested_vmcb->save.cs;
	svm->vmcb->save.ss = nested_vmcb->save.ss;
	svm->vmcb->save.ds = nested_vmcb->save.ds;
	svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
	svm->vmcb->save.idtr = nested_vmcb->save.idtr;
	(*klpe_kvm_set_rflags)(&svm->vcpu, nested_vmcb->save.rflags);
	(*klpe_svm_set_efer)(&svm->vcpu, nested_vmcb->save.efer);
	klpp_svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
	(*klpe_svm_set_cr4)(&svm->vcpu, nested_vmcb->save.cr4);
	if ((*klpe_npt_enabled)) {
		svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
		svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
	} else
		(void)(*klpe_kvm_set_cr3)(&svm->vcpu, nested_vmcb->save.cr3);

	/* Guest paging mode is active - reset mmu */
	(*klpe_kvm_mmu_reset_context)(&svm->vcpu);

	svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
	kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax);
	kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp);
	kvm_rip_write(&svm->vcpu, nested_vmcb->save.rip);

	/* In case we don't even reach vcpu_run, the fields are not updated */
	svm->vmcb->save.rax = nested_vmcb->save.rax;
	svm->vmcb->save.rsp = nested_vmcb->save.rsp;
	svm->vmcb->save.rip = nested_vmcb->save.rip;
	svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
	svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
	svm->vmcb->save.cpl = nested_vmcb->save.cpl;

	svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
	svm->nested.vmcb_iopm  = nested_vmcb->control.iopm_base_pa  & ~0x0fffULL;

	/* cache intercepts */
	svm->nested.intercept_cr         = nested_vmcb->control.intercept_cr;
	svm->nested.intercept_dr         = nested_vmcb->control.intercept_dr;
	svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
	svm->nested.intercept            = nested_vmcb->control.intercept;

	(*klpe_svm_flush_tlb)(&svm->vcpu, true);
	/*
	 * Fix CVE-2021-3653
	 *  -1 line, +4 lines
	 *
	 * Livepatch specific deviation from upstream / resp. the
	 * stable backports: as V_GIF is not being virtualized and the
	 * capability not announced to L1, V_GIF_ENABLE_MASK and V_GIF
	 * are expected to always be unset in nested_vmcb. Thus, the
	 * additional mask below doesn't change anything
	 * wrt. V_GIF. Unlike upstream, don't take the V_GIF bits from
	 * L0, i.e. vmcb01, either: instead enable stgi/clgi
	 * intercepts in case the L1 hypervisor failed to do so,
	 * c.f. klpp_recalc_intercepts() from above.
	 */
	svm->vmcb->control.int_ctl = (nested_vmcb->control.int_ctl &
				      (V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK));
	svm->vmcb->control.int_ctl |= V_INTR_MASKING_MASK;

	if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
		svm->vcpu.arch.hflags |= HF_VINTR_MASK;
	else
		svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;

	if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
		/* We only want the cr8 intercept bits of the guest */
		klpp_clr_cr_intercept(svm, INTERCEPT_CR8_READ);
		klpp_clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
	}

	/* We don't want to see VMMCALLs from a nested guest */
	klpp_clr_intercept(svm, INTERCEPT_VMMCALL);

	svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset;
	svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;

	svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
	svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
	svm->vmcb->control.int_state = nested_vmcb->control.int_state;
	svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
	svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;

	svm->vmcb->control.pause_filter_count =
		nested_vmcb->control.pause_filter_count;
	svm->vmcb->control.pause_filter_thresh =
		nested_vmcb->control.pause_filter_thresh;

	(*klpe_kvm_vcpu_unmap)(&svm->vcpu, map, true);

	/* Enter Guest-Mode */
	enter_guest_mode(&svm->vcpu);

	/*
	 * Merge guest and host intercepts - must be called  with vcpu in
	 * guest-mode to take affect here
	 */
	klpp_recalc_intercepts(svm);

	svm->nested.vmcb = vmcb_gpa;

	enable_gif(svm);

	mark_all_dirty(svm->vmcb);
}

int klpp_stgi_interception(struct vcpu_svm *svm)
{
	int ret;

	if ((*klpe_nested_svm_check_permissions)(svm))
		return 1;

	/*
	 * If VGIF is enabled, the STGI intercept is only added to
	 * detect the opening of the SMI/NMI window; remove it now.
	 */
	if (vgif_enabled(svm))
		klpp_clr_intercept(svm, INTERCEPT_STGI);

	svm->next_rip = klpr_kvm_rip_read(&svm->vcpu) + 3;
	ret = (*klpe_kvm_skip_emulated_instruction)(&svm->vcpu);
	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);

	enable_gif(svm);

	return ret;
}

int klpp_clgi_interception(struct vcpu_svm *svm)
{
	int ret;

	if ((*klpe_nested_svm_check_permissions)(svm))
		return 1;

	svm->next_rip = klpr_kvm_rip_read(&svm->vcpu) + 3;
	ret = (*klpe_kvm_skip_emulated_instruction)(&svm->vcpu);

	disable_gif(svm);

	/* After a CLGI no interrupts should come */
	if (!kvm_vcpu_apicv_active(&svm->vcpu)) {
		klpp_svm_clear_vintr(svm);
		svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
		mark_dirty(svm->vmcb, VMCB_INTR);
	}

	return ret;
}

int klpp_iret_interception(struct vcpu_svm *svm)
{
	++svm->vcpu.stat.nmi_window_exits;
	klpp_clr_intercept(svm, INTERCEPT_IRET);
	svm->vcpu.arch.hflags |= HF_IRET_MASK;
	svm->nmi_iret_rip = klpr_kvm_rip_read(&svm->vcpu);
	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
	return 1;
}

static int (*klpe_emulate_on_interception)(struct vcpu_svm *svm);

int klpp_dr_interception(struct vcpu_svm *svm)
{
	int reg, dr;
	unsigned long val;

	if (svm->vcpu.guest_debug == 0) {
		/*
		 * No more DR vmexits; force a reload of the debug registers
		 * and reenter on this instruction.  The next vmexit will
		 * retrieve the full state of the debug registers.
		 */
		klpp_clr_dr_intercepts(svm);
		svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
		return 1;
	}

	if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
		return (*klpe_emulate_on_interception)(svm);

	reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
	dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;

	if (dr >= 16) { /* mov to DRn */
		if (!(*klpe_kvm_require_dr)(&svm->vcpu, dr - 16))
			return 1;
		val = klpr_kvm_register_read(&svm->vcpu, reg);
		(*klpe_kvm_set_dr)(&svm->vcpu, dr - 16, val);
	} else {
		if (!(*klpe_kvm_require_dr)(&svm->vcpu, dr))
			return 1;
		(*klpe_kvm_get_dr)(&svm->vcpu, dr, &val);
		kvm_register_write(&svm->vcpu, reg, val);
	}

	return (*klpe_kvm_skip_emulated_instruction)(&svm->vcpu);
}

int klpp_interrupt_window_interception(struct vcpu_svm *svm)
{
	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
	klpp_svm_clear_vintr(svm);
	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
	mark_dirty(svm->vmcb, VMCB_INTR);
	++svm->vcpu.stat.irq_window_exits;
	return 1;
}

void klpp_svm_inject_nmi(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
	vcpu->arch.hflags |= HF_NMI_MASK;
	klpp_set_intercept(svm, INTERCEPT_IRET);
	++vcpu->stat.nmi_injections;
}

static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
{
	struct vmcb_control_area *control;

	/* The following fields are ignored when AVIC is enabled */
	control = &svm->vmcb->control;
	control->int_vector = irq;
	control->int_ctl &= ~V_INTR_PRIO_MASK;
	control->int_ctl |= V_IRQ_MASK |
		((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
	mark_dirty(svm->vmcb, VMCB_INTR);
}

static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu)
{
	return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK);
}

void klpp_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	if (svm_nested_virtualize_tpr(vcpu) ||
	    kvm_vcpu_apicv_active(vcpu))
		return;

	klpp_clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);

	if (irr == -1)
		return;

	if (tpr >= irr)
		klpp_set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
}

void klpp_svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	if (masked) {
		svm->vcpu.arch.hflags |= HF_NMI_MASK;
		klpp_set_intercept(svm, INTERCEPT_IRET);
	} else {
		svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
		klpp_clr_intercept(svm, INTERCEPT_IRET);
	}
}

void klpp_enable_irq_window(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	if (kvm_vcpu_apicv_active(vcpu))
		return;

	/*
	 * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
	 * 1, because that's a separate STGI/VMRUN intercept.  The next time we
	 * get that intercept, this function will be called again though and
	 * we'll get the vintr intercept. However, if the vGIF feature is
	 * enabled, the STGI interception will not occur. Enable the irq
	 * window under the assumption that the hardware will set the GIF.
	 */
	if ((vgif_enabled(svm) || gif_set(svm)) && klpr_nested_svm_intr(svm)) {
		klpp_svm_set_vintr(svm);
		svm_inject_irq(svm, 0x0);
	}
}

void klpp_enable_nmi_window(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
	    == HF_NMI_MASK)
		return; /* IRET will cause a vm exit */

	if (!gif_set(svm)) {
		if (vgif_enabled(svm))
			klpp_set_intercept(svm, INTERCEPT_STGI);
		return; /* STGI will cause a vm exit */
	}

	if (svm->nested.exit_required)
		return; /* we're not going to run the guest yet */

	/*
	 * Something prevents NMI from been injected. Single step over possible
	 * problem (IRET or exception injection or interrupt shadow)
	 */
	svm->nmi_singlestep_guest_rflags = (*klpe_svm_get_rflags)(vcpu);
	svm->nmi_singlestep = true;
	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
}

static void (*klpe_svm_flush_tlb)(struct kvm_vcpu *vcpu, bool invalidate_gpa);

int klpp_enable_smi_window(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	if (!gif_set(svm)) {
		if (vgif_enabled(svm))
			klpp_set_intercept(svm, INTERCEPT_STGI);
		/* STGI will cause a vm exit */
		return 1;
	}
	return 0;
}



#include <linux/kernel.h>
#include <linux/module.h>
#include "livepatch_bsc1189418.h"
#include "../kallsyms_relocs.h"

#define LIVEPATCHED_MODULE "kvm_amd"

static struct klp_kallsyms_reloc klp_funcs[] = {
	{ "kvm_x86_ops", (void *)&klpe_kvm_x86_ops, "kvm" },
	{ "enable_vmware_backdoor", (void *)&klpe_enable_vmware_backdoor,
	  "kvm" },
	{ "__tracepoint_kvm_nested_intr_vmexit",
	  (void *)&klpe___tracepoint_kvm_nested_intr_vmexit, "kvm" },
	{ "pause_filter_count", (void *)&klpe_pause_filter_count, "kvm_amd" },
	{ "pause_filter_thresh", (void *)&klpe_pause_filter_thresh, "kvm_amd" },
	{ "npt_enabled", (void *)&klpe_npt_enabled, "kvm_amd" },
	{ "iopm_base", (void *)&klpe_iopm_base, "kvm_amd" },
	{ "vgif", (void *)&klpe_vgif, "kvm_amd" },
	{ "vls", (void *)&klpe_vls, "kvm_amd" },
	{ "kvm_mmu_reset_context", (void *)&klpe_kvm_mmu_reset_context, "kvm" },
	{ "kvm_set_cr3", (void *)&klpe_kvm_set_cr3, "kvm" },
	{ "kvm_set_dr", (void *)&klpe_kvm_set_dr, "kvm" },
	{ "kvm_get_dr", (void *)&klpe_kvm_get_dr, "kvm" },
	{ "kvm_get_rflags", (void *)&klpe_kvm_get_rflags, "kvm" },
	{ "kvm_set_rflags", (void *)&klpe_kvm_set_rflags, "kvm" },
	{ "kvm_require_dr", (void *)&klpe_kvm_require_dr, "kvm" },
	{ "kvm_skip_emulated_instruction",
	  (void *)&klpe_kvm_skip_emulated_instruction, "kvm" },
	{ "kvm_vcpu_unmap", (void *)&klpe_kvm_vcpu_unmap, "kvm" },
	{ "reset_shadow_zero_bits_mask",
	  (void *)&klpe_reset_shadow_zero_bits_mask, "kvm" },
	{ "kvm_init_shadow_mmu", (void *)&klpe_kvm_init_shadow_mmu, "kvm" },
	{ "svm_set_cr4", (void *)&klpe_svm_set_cr4, "kvm_amd" },
	{ "svm_flush_tlb", (void *)&klpe_svm_flush_tlb, "kvm_amd" },
	{ "get_npt_level", (void *)&klpe_get_npt_level, "kvm_amd" },
	{ "svm_set_efer", (void *)&klpe_svm_set_efer, "kvm_amd" },
	{ "svm_get_rflags", (void *)&klpe_svm_get_rflags, "kvm_amd" },
	{ "emulate_on_interception", (void *)&klpe_emulate_on_interception,
	  "kvm_amd" },
	{ "svm_get_dr6", (void *)&klpe_svm_get_dr6, "kvm_amd" },
	{ "nested_svm_set_tdp_cr3", (void *)&klpe_nested_svm_set_tdp_cr3,
	  "kvm_amd" },
	{ "nested_svm_check_permissions",
	  (void *)&klpe_nested_svm_check_permissions, "kvm_amd" },
	{ "nested_svm_inject_npf_exit",
	  (void *)&klpe_nested_svm_inject_npf_exit, "kvm_amd" },
	{ "nested_svm_get_tdp_cr3", (void *)&klpe_nested_svm_get_tdp_cr3,
	  "kvm_amd" },
	{ "nested_svm_get_tdp_pdptr", (void *)&klpe_nested_svm_get_tdp_pdptr,
	  "kvm_amd" },
};

static int livepatch_bsc1189418_module_notify(struct notifier_block *nb,
					      unsigned long action, void *data)
{
	struct module *mod = data;
	int ret;

	if (action != MODULE_STATE_COMING || strcmp(mod->name, LIVEPATCHED_MODULE))
		return 0;

	mutex_lock(&module_mutex);
	ret = __klp_resolve_kallsyms_relocs(klp_funcs, ARRAY_SIZE(klp_funcs));
	mutex_unlock(&module_mutex);
	WARN(ret, "livepatch: delayed kallsyms lookup failed. System is broken and can crash.\n");

	return ret;
}

static struct notifier_block livepatch_bsc1189418_module_nb = {
	.notifier_call = livepatch_bsc1189418_module_notify,
	.priority = INT_MIN+1,
};

int livepatch_bsc1189418_init(void)
{
	int ret;

	mutex_lock(&module_mutex);
	if (find_module(LIVEPATCHED_MODULE)) {
		ret = __klp_resolve_kallsyms_relocs(klp_funcs,
						    ARRAY_SIZE(klp_funcs));
		if (ret)
			goto out;
	}

	ret = register_module_notifier(&livepatch_bsc1189418_module_nb);
out:
	mutex_unlock(&module_mutex);
	return ret;
}

void livepatch_bsc1189418_cleanup(void)
{
	unregister_module_notifier(&livepatch_bsc1189418_module_nb);
}

#endif /* IS_ENABLED(CONFIG_KVM_AMD) */
