/*
 * kgraft_patch_bsc1099306_kvm_intel
 *
 * Fix for CVE-2018-3646 (kvm_intel.ko part), bsc#1099306
 *
 *  Copyright (c) 2018 SUSE
 *  Author: Nicolai Stange <nstange@suse.de>
 *
 *  Based on the original Linux kernel code. Other copyrights apply.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */

#if IS_ENABLED(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM)

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/kvm_host.h>
#include <asm/vmx.h>
#include <linux/highmem.h>
#include <asm/segment.h>
#include "kgr_patch_bsc1099306_kvm_intel.h"
#include "bsc1099306.h"

#if !IS_MODULE(CONFIG_KVM_INTEL)
#error "Live patch supports only CONFIG_KVM_INTEL=m"
#endif

#define KGR_PATCHED_MODULE "kvm_intel"


struct vcpu_vmx;
struct vmcs12;

static bool *kgr_kvm_rebooting;
static asmlinkage void (*kgr_kvm_spurious_fault)(void);
struct page* (*kgr_kvm_vcpu_gfn_to_page)(struct kvm_vcpu *vcpu, gfn_t gfn);
static void (*kgr_kvm_release_page_dirty)(struct page *page);
static void (*kgr_kvm_release_page_clean)(struct page *page);
static bool (*kgr_kvm_valid_efer)(struct kvm_vcpu *vcpu, u64 efer);
static int (*kgr_kvm_vcpu_halt)(struct kvm_vcpu *vcpu);
static int (*kgr_kvm_set_shared_msr)(unsigned slot, u64 value, u64 mask);

static bool *kgr_enable_ept;
static bool *kgr_enable_shadow_vmcs;
static int (*kgr_nested_vmx_check_permission)(struct kvm_vcpu *vcpu);
static void (*kgr_nested_vmx_failInvalid)(struct kvm_vcpu *vcpu);
static void (*kgr_skip_emulated_instruction)(struct kvm_vcpu *vcpu);
static void (*kgr_copy_shadow_to_vmcs12)(struct vcpu_vmx *vmx);
static void (*kgr_nested_vmx_failValid)(struct kvm_vcpu *vcpu,
					u32 vm_instruction_error);
static int (*kgr_nested_vmx_check_msr_switch)(struct kvm_vcpu *vcpu,
					      unsigned long count_field,
					      unsigned long addr_field);
static void (*kgr_nested_vmx_entry_failure)(struct kvm_vcpu *vcpu,
					    struct vmcs12 *vmcs12,
					    u32 reason,
					    unsigned long qualification);
static struct vmcs* (*kgr_alloc_vmcs_cpu)(int cpu);
static void (*kgr_vmcs_clear)(struct vmcs *vmcs);
static void (*kgr_vmx_vcpu_put)(struct kvm_vcpu *vcpu);
static void (*kgr_vmx_vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
static void (*kgr_prepare_vmcs02)(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12);
static u32 (*kgr_nested_vmx_load_msr)(struct kvm_vcpu *vcpu, u64 gpa,
				      u32 count);
static void (*kgr_vmx_load_vmcs01)(struct kvm_vcpu *vcpu);
static void (*kgr_vmcs_writel)(unsigned long field, unsigned long value);


static struct {
	char *name;
	void **addr;
} kgr_funcs[] = {
	{ "kvm:kvm_rebooting", (void *)&kgr_kvm_rebooting },
	{ "kvm:kvm_spurious_fault", (void *)&kgr_kvm_spurious_fault },
	{ "kvm:kvm_vcpu_gfn_to_page", (void *)&kgr_kvm_vcpu_gfn_to_page },
	{ "kvm:kvm_release_page_dirty", (void *)&kgr_kvm_release_page_dirty },
	{ "kvm:kvm_release_page_clean", (void *)&kgr_kvm_release_page_clean },
	{ "kvm:kvm_valid_efer", (void *)&kgr_kvm_valid_efer },
	{ "kvm:kvm_vcpu_halt", (void *)&kgr_kvm_vcpu_halt },
	{ "kvm:kvm_set_shared_msr", (void *)&kgr_kvm_set_shared_msr },

	{ "kvm_intel:enable_ept", (void *)&kgr_enable_ept },
	{ "kvm_intel:enable_shadow_vmcs", (void *)&kgr_enable_shadow_vmcs },
	{ "kvm_intel:nested_vmx_check_permission",
				(void *)&kgr_nested_vmx_check_permission },
	{ "kvm_intel:nested_vmx_failInvalid",
				(void *)&kgr_nested_vmx_failInvalid },
	{ "kvm_intel:skip_emulated_instruction",
				(void *)&kgr_skip_emulated_instruction },
	{ "kvm_intel:copy_shadow_to_vmcs12",
				(void *)&kgr_copy_shadow_to_vmcs12 },
	{ "kvm_intel:nested_vmx_failValid",
				(void *)&kgr_nested_vmx_failValid },
	{ "kvm_intel:nested_vmx_check_msr_switch",
				(void *)&kgr_nested_vmx_check_msr_switch },
	{ "kvm_intel:nested_vmx_entry_failure",
				(void *)&kgr_nested_vmx_entry_failure },
	{ "kvm_intel:alloc_vmcs_cpu", (void *)&kgr_alloc_vmcs_cpu },
	{ "kvm_intel:vmcs_clear", (void *)&kgr_vmcs_clear },
	{ "kvm_intel:vmx_vcpu_put", (void *)&kgr_vmx_vcpu_put },
	{ "kvm_intel:vmx_vcpu_load", (void *)&kgr_vmx_vcpu_load },
	{ "kvm_intel:prepare_vmcs02", (void *)&kgr_prepare_vmcs02 },
	{ "kvm_intel:nested_vmx_load_msr", (void *)&kgr_nested_vmx_load_msr },
	{ "kvm_intel:vmx_load_vmcs01", (void *)&kgr_vmx_load_vmcs01 },
	{ "kvm_intel:vmcs_writel", (void *)&kgr_vmcs_writel },
};


/* from arch/x86/include/asm/kvm_host.h */
/*
 * Avoid module dependency on kvm.ko.
 *
 * This is the original macro but with the load from kvm_rebooting
 * replaced by an indirect load from *kgr_kvm_rebooting and the call
 * to kvm_spurious_fault() replaced by an indirect call to
 * *kgr_kvm_spurious_fault.
 */
#define kgr____kvm_handle_fault_on_reboot(insn, cleanup_insn)	\
	"666: " insn "\n\t"				\
	"668: \n\t"					\
	".pushsection .fixup, \"ax\" \n"		\
	"667: \n\t"					\
	cleanup_insn "\n\t"				\
	"pushq %%rax \n\t"				\
	"movq kgr_kvm_rebooting, %%rax\n\t"		\
	"cmpb $0, (%%rax) \n\t"			\
	"popq %%rax \n\t"				\
	"jne 668b \n\t"				\
	__ASM_SIZE(push) " $666b \n\t"			\
	"movq kgr_kvm_spurious_fault, %%rax \n\t"	\
	"call *%%rax\n\t"				\
	".popsection \n\t"				\
	_ASM_EXTABLE(666b, 667b)


/* from arch/x86/kvm/x86.h */
/* inlined */
static inline int kgr_is_long_mode(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
	return vcpu->arch.efer & EFER_LMA;
#else
	return 0;
#endif
}


/* from arch/x86/kvm/cpuid.h */
/* inlined */
static inline int kgr_cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
{
	return vcpu->arch.maxphyaddr;
}


/* from arch/x86/kvm/kvm_cache_regs.h */
static inline void kgr_enter_guest_mode(struct kvm_vcpu *vcpu)
{
	vcpu->arch.hflags |= HF_GUEST_MASK;
}

static inline void kgr_leave_guest_mode(struct kvm_vcpu *vcpu)
{
	vcpu->arch.hflags &= ~HF_GUEST_MASK;
}


/* from arch/x86/kvm/vmx.c */
#define kgr__ex_clear(x, reg) \
	kgr____kvm_handle_fault_on_reboot(x, "xor " reg " , " reg)

#define KGR_NR_AUTOLOAD_MSRS 8
#define KGR_VMCS02_POOL_SIZE 1

struct loaded_vmcs {
	struct vmcs *vmcs;
	int cpu;
	int launched;
	struct list_head loaded_vmcss_on_cpu_link;
};

struct shared_msr_entry {
	unsigned index;
	u64 data;
	u64 mask;
};

typedef u64 natural_width;
struct __packed vmcs12 {
	/* According to the Intel spec, a VMCS region must start with the
	 * following two fields. Then follow implementation-specific data.
	 */
	u32 revision_id;
	u32 abort;

	u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
	u32 padding[7]; /* room for future expansion */

	u64 io_bitmap_a;
	u64 io_bitmap_b;
	u64 msr_bitmap;
	u64 vm_exit_msr_store_addr;
	u64 vm_exit_msr_load_addr;
	u64 vm_entry_msr_load_addr;
	u64 tsc_offset;
	u64 virtual_apic_page_addr;
	u64 apic_access_addr;
	u64 posted_intr_desc_addr;
	u64 ept_pointer;
	u64 eoi_exit_bitmap0;
	u64 eoi_exit_bitmap1;
	u64 eoi_exit_bitmap2;
	u64 eoi_exit_bitmap3;
	u64 xss_exit_bitmap;
	u64 guest_physical_address;
	u64 vmcs_link_pointer;
	u64 guest_ia32_debugctl;
	u64 guest_ia32_pat;
	u64 guest_ia32_efer;
	u64 guest_ia32_perf_global_ctrl;
	u64 guest_pdptr0;
	u64 guest_pdptr1;
	u64 guest_pdptr2;
	u64 guest_pdptr3;
	u64 guest_bndcfgs;
	u64 host_ia32_pat;
	u64 host_ia32_efer;
	u64 host_ia32_perf_global_ctrl;
	u64 padding64[8]; /* room for future expansion */
	/*
	 * To allow migration of L1 (complete with its L2 guests) between
	 * machines of different natural widths (32 or 64 bit), we cannot have
	 * unsigned long fields with no explict size. We use u64 (aliased
	 * natural_width) instead. Luckily, x86 is little-endian.
	 */
	natural_width cr0_guest_host_mask;
	natural_width cr4_guest_host_mask;
	natural_width cr0_read_shadow;
	natural_width cr4_read_shadow;
	natural_width cr3_target_value0;
	natural_width cr3_target_value1;
	natural_width cr3_target_value2;
	natural_width cr3_target_value3;
	natural_width exit_qualification;
	natural_width guest_linear_address;
	natural_width guest_cr0;
	natural_width guest_cr3;
	natural_width guest_cr4;
	natural_width guest_es_base;
	natural_width guest_cs_base;
	natural_width guest_ss_base;
	natural_width guest_ds_base;
	natural_width guest_fs_base;
	natural_width guest_gs_base;
	natural_width guest_ldtr_base;
	natural_width guest_tr_base;
	natural_width guest_gdtr_base;
	natural_width guest_idtr_base;
	natural_width guest_dr7;
	natural_width guest_rsp;
	natural_width guest_rip;
	natural_width guest_rflags;
	natural_width guest_pending_dbg_exceptions;
	natural_width guest_sysenter_esp;
	natural_width guest_sysenter_eip;
	natural_width host_cr0;
	natural_width host_cr3;
	natural_width host_cr4;
	natural_width host_fs_base;
	natural_width host_gs_base;
	natural_width host_tr_base;
	natural_width host_gdtr_base;
	natural_width host_idtr_base;
	natural_width host_ia32_sysenter_esp;
	natural_width host_ia32_sysenter_eip;
	natural_width host_rsp;
	natural_width host_rip;
	natural_width paddingl[8]; /* room for future expansion */
	u32 pin_based_vm_exec_control;
	u32 cpu_based_vm_exec_control;
	u32 exception_bitmap;
	u32 page_fault_error_code_mask;
	u32 page_fault_error_code_match;
	u32 cr3_target_count;
	u32 vm_exit_controls;
	u32 vm_exit_msr_store_count;
	u32 vm_exit_msr_load_count;
	u32 vm_entry_controls;
	u32 vm_entry_msr_load_count;
	u32 vm_entry_intr_info_field;
	u32 vm_entry_exception_error_code;
	u32 vm_entry_instruction_len;
	u32 tpr_threshold;
	u32 secondary_vm_exec_control;
	u32 vm_instruction_error;
	u32 vm_exit_reason;
	u32 vm_exit_intr_info;
	u32 vm_exit_intr_error_code;
	u32 idt_vectoring_info_field;
	u32 idt_vectoring_error_code;
	u32 vm_exit_instruction_len;
	u32 vmx_instruction_info;
	u32 guest_es_limit;
	u32 guest_cs_limit;
	u32 guest_ss_limit;
	u32 guest_ds_limit;
	u32 guest_fs_limit;
	u32 guest_gs_limit;
	u32 guest_ldtr_limit;
	u32 guest_tr_limit;
	u32 guest_gdtr_limit;
	u32 guest_idtr_limit;
	u32 guest_es_ar_bytes;
	u32 guest_cs_ar_bytes;
	u32 guest_ss_ar_bytes;
	u32 guest_ds_ar_bytes;
	u32 guest_fs_ar_bytes;
	u32 guest_gs_ar_bytes;
	u32 guest_ldtr_ar_bytes;
	u32 guest_tr_ar_bytes;
	u32 guest_interruptibility_info;
	u32 guest_activity_state;
	u32 guest_sysenter_cs;
	u32 host_ia32_sysenter_cs;
	u32 vmx_preemption_timer_value;
	u32 padding32[7]; /* room for future expansion */
	u16 virtual_processor_id;
	u16 posted_intr_nv;
	u16 guest_es_selector;
	u16 guest_cs_selector;
	u16 guest_ss_selector;
	u16 guest_ds_selector;
	u16 guest_fs_selector;
	u16 guest_gs_selector;
	u16 guest_ldtr_selector;
	u16 guest_tr_selector;
	u16 guest_intr_status;
	u16 host_es_selector;
	u16 host_cs_selector;
	u16 host_ss_selector;
	u16 host_ds_selector;
	u16 host_fs_selector;
	u16 host_gs_selector;
	u16 host_tr_selector;
};

struct vmcs02_list {
	struct list_head list;
	gpa_t vmptr;
	struct loaded_vmcs vmcs02;
};

struct nested_vmx {
	/* Has the level1 guest done vmxon? */
	bool vmxon;
	gpa_t vmxon_ptr;

	/* The guest-physical address of the current VMCS L1 keeps for L2 */
	gpa_t current_vmptr;
	/* The host-usable pointer to the above */
	struct page *current_vmcs12_page;
	struct vmcs12 *current_vmcs12;
	struct vmcs *current_shadow_vmcs;
	/*
	 * Indicates if the shadow vmcs must be updated with the
	 * data hold by vmcs12
	 */
	bool sync_shadow_vmcs;

	/* vmcs02_list cache of VMCSs recently used to run L2 guests */
	struct list_head vmcs02_pool;
	int vmcs02_num;
	u64 vmcs01_tsc_offset;
	bool change_vmcs01_virtual_x2apic_mode;
	/* L2 must run next, and mustn't decide to exit to L1. */
	bool nested_run_pending;
	/*
	 * Guest pages referred to in vmcs02 with host-physical pointers, so
	 * we must keep them pinned while L2 runs.
	 */
	struct page *apic_access_page;
	struct page *virtual_apic_page;
	struct page *pi_desc_page;
	struct pi_desc *pi_desc;
	bool pi_pending;
	u16 posted_intr_nv;
	u64 msr_ia32_feature_control;

	unsigned long *msr_bitmap;

	struct hrtimer preemption_timer;
	bool preemption_timer_expired;

	/* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */
	u64 vmcs01_debugctl;

	u16 vpid02;
	u16 last_vpid;

	u32 nested_vmx_procbased_ctls_low;
	u32 nested_vmx_procbased_ctls_high;
	u32 nested_vmx_true_procbased_ctls_low;
	u32 nested_vmx_secondary_ctls_low;
	u32 nested_vmx_secondary_ctls_high;
	u32 nested_vmx_pinbased_ctls_low;
	u32 nested_vmx_pinbased_ctls_high;
	u32 nested_vmx_exit_ctls_low;
	u32 nested_vmx_exit_ctls_high;
	u32 nested_vmx_true_exit_ctls_low;
	u32 nested_vmx_entry_ctls_low;
	u32 nested_vmx_entry_ctls_high;
	u32 nested_vmx_true_entry_ctls_low;
	u32 nested_vmx_misc_low;
	u32 nested_vmx_misc_high;
	u32 nested_vmx_ept_caps;
	u32 nested_vmx_vpid_caps;
};

struct pi_desc {
	u32 pir[8];     /* Posted interrupt requested */
	union {
		struct {
				/* bit 256 - Outstanding Notification */
			u16	on	: 1,
				/* bit 257 - Suppress Notification */
				sn	: 1,
				/* bit 271:258 - Reserved */
				rsvd_1	: 14;
				/* bit 279:272 - Notification Vector */
			u8	nv;
				/* bit 287:280 - Reserved */
			u8	rsvd_2;
				/* bit 319:288 - Notification Destination */
			u32	ndst;
		};
		u64 control;
	};
	u32 rsvd[6];
} __aligned(64);

struct vcpu_vmx {
	struct kvm_vcpu       vcpu;
	unsigned long         host_rsp;
	u8                    fail;
	bool                  nmi_known_unmasked;
	u32                   exit_intr_info;
	u32                   idt_vectoring_info;
	ulong                 rflags;
	struct shared_msr_entry *guest_msrs;
	int                   nmsrs;
	int                   save_nmsrs;
	unsigned long	      host_idt_base;
#ifdef CONFIG_X86_64
	u64 		      msr_host_kernel_gs_base;
	u64 		      msr_guest_kernel_gs_base;
#endif
	u32 vm_entry_controls_shadow;
	u32 vm_exit_controls_shadow;
	/*
	 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
	 * non-nested (L1) guest, it always points to vmcs01. For a nested
	 * guest (L2), it points to a different VMCS.
	 */
	struct loaded_vmcs    vmcs01;
	struct loaded_vmcs   *loaded_vmcs;
	bool                  __launched; /* temporary, used in vmx_vcpu_run */
	struct msr_autoload {
		unsigned nr;
		struct vmx_msr_entry guest[KGR_NR_AUTOLOAD_MSRS];
		struct vmx_msr_entry host[KGR_NR_AUTOLOAD_MSRS];
	} msr_autoload;
	struct {
		int           loaded;
		u16           fs_sel, gs_sel, ldt_sel;
#ifdef CONFIG_X86_64
		u16           ds_sel, es_sel;
#endif
		int           gs_ldt_reload_needed;
		int           fs_reload_needed;
		u64           msr_host_bndcfgs;
		unsigned long vmcs_host_cr4;	/* May not match real cr4 */
	} host_state;
	struct {
		int vm86_active;
		ulong save_rflags;
		struct kvm_segment segs[8];
	} rmode;
	struct {
		u32 bitmask; /* 4 bits per segment (1 bit per field) */
		struct kvm_save_segment {
			u16 selector;
			unsigned long base;
			u32 limit;
			u32 ar;
		} seg[8];
	} segment_cache;
	int vpid;
	bool emulation_required;

	/* Support for vnmi-less CPUs */
	int soft_vnmi_blocked;
	ktime_t entry_time;
	s64 vnmi_blocked_time;
	u32 exit_reason;

	/* Posted interrupt descriptor */
	struct pi_desc pi_desc;

	/* Support for a guest hypervisor (nested VMX) */
	struct nested_vmx nested;

	/* Dynamic PLE window. */
	int ple_window;
	bool ple_window_dirty;

	/* Support for PML */
#define PML_ENTITY_NUM		512
	struct page *pml_pg;

	u64 current_tsc_ratio;

	u64 spec_ctrl;
};

/* inlined */
static inline struct vcpu_vmx *kgr_to_vmx(struct kvm_vcpu *vcpu)
{
	return container_of(vcpu, struct vcpu_vmx, vcpu);
}

/* inlined */
static inline struct vmcs12 *kgr_get_vmcs12(struct kvm_vcpu *vcpu)
{
	return kgr_to_vmx(vcpu)->nested.current_vmcs12;
}

/* inlined */
static struct page *kgr_nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
{
	struct page *page = kgr_kvm_vcpu_gfn_to_page(vcpu, addr >> PAGE_SHIFT);
	if (is_error_page(page))
		return NULL;

	return page;
}

/* inlined */
static void kgr_nested_release_page(struct page *page)
{
	kgr_kvm_release_page_dirty(page);
}

/* inlined */
static void kgr_nested_release_page_clean(struct page *page)
{
	kgr_kvm_release_page_clean(page);
}

/* inlined */
static inline bool kgr_nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
{
	return vmcs12->cpu_based_vm_exec_control & bit;
}

/* inlined */
static inline bool kgr_nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
{
	return (vmcs12->cpu_based_vm_exec_control &
			CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
		(vmcs12->secondary_vm_exec_control & bit);
}

/* inlined */
static inline bool kgr_nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
{
	return kgr_nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
}

/* inlined */
static inline bool kgr_nested_cpu_has_vpid(struct vmcs12 *vmcs12)
{
	return kgr_nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VPID);
}

/* inlined */
static inline bool kgr_nested_cpu_has_apic_reg_virt(struct vmcs12 *vmcs12)
{
	return kgr_nested_cpu_has2(vmcs12, SECONDARY_EXEC_APIC_REGISTER_VIRT);
}

/* inlined */
static inline bool kgr_nested_cpu_has_vid(struct vmcs12 *vmcs12)
{
	return kgr_nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
}

/* inlined */
static inline bool kgr_nested_cpu_has_posted_intr(struct vmcs12 *vmcs12)
{
	return vmcs12->pin_based_vm_exec_control & PIN_BASED_POSTED_INTR;
}

/* inlined */
static inline void kgr_loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs)
{
	kgr_vmcs_clear(loaded_vmcs->vmcs);
	loaded_vmcs->cpu = -1;
	loaded_vmcs->launched = 0;
}

/* inlined */
static __always_inline unsigned long kgr_vmcs_readl(unsigned long field)
{
	unsigned long value;

	asm volatile (kgr__ex_clear(ASM_VMX_VMREAD_RDX_RAX, "%0")
		      : [thunk_target] "=a"(value) : "d"(field) : "cc");
	return value;
}

/* inlined */
static __always_inline u32 kgr_vmcs_read32(unsigned long field)
{
	return kgr_vmcs_readl(field);
}

/* inlined */
static __always_inline u64 kgr_vmcs_read64(unsigned long field)
{
#ifdef CONFIG_X86_64
	return kgr_vmcs_readl(field);
#else
	return kgr_vmcs_readl(field) | ((u64)kgr_vmcs_readl(field+1) << 32);
#endif
}

/* inlined */
static void kgr_vmcs_write16(unsigned long field, u16 value)
{
	kgr_vmcs_writel(field, value);
}

/* inlined */
static void kgr_vmx_segment_cache_clear(struct vcpu_vmx *vmx)
{
	vmx->segment_cache.bitmask = 0;
}

/* inlined */
static inline bool kgr_vmx_control_verify(u32 control, u32 low, u32 high)
{
	/*
	 * Bits 0 in high must be 0, and bits 1 in low must be 1.
	 */
	return ((control & high) | low) == control;
}

#define KGR_VMXON_CR0_ALWAYSON	(X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
#define KGR_VMXON_CR4_ALWAYSON	X86_CR4_VMXE

/* inlined */
static struct vmcs *kgr_alloc_vmcs(void)
{
	return kgr_alloc_vmcs_cpu(raw_smp_processor_id());
}

/* inlined */
static bool kgr_page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
{
	return PAGE_ALIGNED(gpa) && !(gpa >> kgr_cpuid_maxphyaddr(vcpu));
}

/* inlined */
static bool kgr_nested_exit_on_intr(struct kvm_vcpu *vcpu)
{
	return kgr_get_vmcs12(vcpu)->pin_based_vm_exec_control &
		PIN_BASED_EXT_INTR_MASK;
}

/* inlined */
static bool kgr_nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
{
	return kgr_get_vmcs12(vcpu)->vm_exit_controls &
		VM_EXIT_ACK_INTR_ON_EXIT;
}

/* inlined */
static bool kgr_nested_cr0_valid(struct kvm_vcpu *vcpu, unsigned long val)
{
	unsigned long always_on = KGR_VMXON_CR0_ALWAYSON;
	struct vmcs12 *vmcs12 = kgr_get_vmcs12(vcpu);

	if (kgr_to_vmx(vcpu)->nested.nested_vmx_secondary_ctls_high &
		SECONDARY_EXEC_UNRESTRICTED_GUEST &&
	    kgr_nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
		always_on &= ~(X86_CR0_PE | X86_CR0_PG);
	return (val & always_on) == always_on;
}

/* inlined */
static struct loaded_vmcs *kgr_nested_get_current_vmcs02(struct vcpu_vmx *vmx)
{
	struct vmcs02_list *item;
	list_for_each_entry(item, &vmx->nested.vmcs02_pool, list)
		if (item->vmptr == vmx->nested.current_vmptr) {
			list_move(&item->list, &vmx->nested.vmcs02_pool);
			return &item->vmcs02;
		}

	if (vmx->nested.vmcs02_num >= max(KGR_VMCS02_POOL_SIZE, 1)) {
		/* Recycle the least recently used VMCS. */
		item = list_entry(vmx->nested.vmcs02_pool.prev,
			struct vmcs02_list, list);
		item->vmptr = vmx->nested.current_vmptr;
		list_move(&item->list, &vmx->nested.vmcs02_pool);
		return &item->vmcs02;
	}

	/* Create a new VMCS */
	item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
	if (!item)
		return NULL;
	item->vmcs02.vmcs = kgr_alloc_vmcs();
	if (!item->vmcs02.vmcs) {
		kfree(item);
		return NULL;
	}
	kgr_loaded_vmcs_init(&item->vmcs02);
	item->vmptr = vmx->nested.current_vmptr;
	list_add(&(item->list), &(vmx->nested.vmcs02_pool));
	vmx->nested.vmcs02_num++;
	return &item->vmcs02;
}

/* inlined */
static int kgr_nested_vmx_check_vmcs12(struct kvm_vcpu *vcpu)
{
	struct vcpu_vmx *vmx = kgr_to_vmx(vcpu);
	if (vmx->nested.current_vmptr == -1ull) {
		kgr_nested_vmx_failInvalid(vcpu);
		kgr_skip_emulated_instruction(vcpu);
		return 0;
	}
	return 1;
}

/* inlined */
static bool kgr_nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
					struct vmcs12 *vmcs12)
{
	struct vcpu_vmx *vmx = kgr_to_vmx(vcpu);
	int maxphyaddr = kgr_cpuid_maxphyaddr(vcpu);

	if (kgr_nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
		if (!PAGE_ALIGNED(vmcs12->apic_access_addr) ||
		    vmcs12->apic_access_addr >> maxphyaddr)
			return false;

		/*
		 * Translate L1 physical address to host physical
		 * address for vmcs02. Keep the page pinned, so this
		 * physical address remains valid. We keep a reference
		 * to it so we can release it later.
		 */
		if (vmx->nested.apic_access_page) /* shouldn't happen */
			kgr_nested_release_page(vmx->nested.apic_access_page);
		vmx->nested.apic_access_page =
			kgr_nested_get_page(vcpu, vmcs12->apic_access_addr);
	}

	if (kgr_nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
		if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr) ||
		    vmcs12->virtual_apic_page_addr >> maxphyaddr)
			return false;

		if (vmx->nested.virtual_apic_page) /* shouldn't happen */
			kgr_nested_release_page(vmx->nested.virtual_apic_page);
		vmx->nested.virtual_apic_page =
			kgr_nested_get_page(vcpu, vmcs12->virtual_apic_page_addr);

		/*
		 * Failing the vm entry is _not_ what the processor does
		 * but it's basically the only possibility we have.
		 * We could still enter the guest if CR8 load exits are
		 * enabled, CR8 store exits are enabled, and virtualize APIC
		 * access is disabled; in this case the processor would never
		 * use the TPR shadow and we could simply clear the bit from
		 * the execution control.  But such a configuration is useless,
		 * so let's keep the code simple.
		 */
		if (!vmx->nested.virtual_apic_page)
			return false;
	}

	if (kgr_nested_cpu_has_posted_intr(vmcs12)) {
		if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64) ||
		    vmcs12->posted_intr_desc_addr >> maxphyaddr)
			return false;

		if (vmx->nested.pi_desc_page) { /* shouldn't happen */
			kunmap(vmx->nested.pi_desc_page);
			kgr_nested_release_page(vmx->nested.pi_desc_page);
		}
		vmx->nested.pi_desc_page =
			kgr_nested_get_page(vcpu, vmcs12->posted_intr_desc_addr);
		if (!vmx->nested.pi_desc_page)
			return false;

		vmx->nested.pi_desc =
			(struct pi_desc *)kmap(vmx->nested.pi_desc_page);
		if (!vmx->nested.pi_desc) {
			kgr_nested_release_page_clean(vmx->nested.pi_desc_page);
			return false;
		}
		vmx->nested.pi_desc =
			(struct pi_desc *)((void *)vmx->nested.pi_desc +
			(unsigned long)(vmcs12->posted_intr_desc_addr &
			(PAGE_SIZE - 1)));
	}

	return true;
}

/* inlined */
static int kgr_nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
						    struct vmcs12 *vmcs12)
{
	if (!kgr_nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
		return 0;

	if (!kgr_page_address_valid(vcpu, vmcs12->msr_bitmap))
		return -EINVAL;

	return 0;
}

/* inlined */
static int kgr_nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
					       struct vmcs12 *vmcs12)
{
	if (!kgr_nested_cpu_has_virt_x2apic_mode(vmcs12) &&
	    !kgr_nested_cpu_has_apic_reg_virt(vmcs12) &&
	    !kgr_nested_cpu_has_vid(vmcs12) &&
	    !kgr_nested_cpu_has_posted_intr(vmcs12))
		return 0;

	/*
	 * If virtualize x2apic mode is enabled,
	 * virtualize apic access must be disabled.
	 */
	if (kgr_nested_cpu_has_virt_x2apic_mode(vmcs12) &&
	    kgr_nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
		return -EINVAL;

	/*
	 * If virtual interrupt delivery is enabled,
	 * we must exit on external interrupts.
	 */
	if (kgr_nested_cpu_has_vid(vmcs12) &&
	   !kgr_nested_exit_on_intr(vcpu))
		return -EINVAL;

	/*
	 * bits 15:8 should be zero in posted_intr_nv,
	 * the descriptor address has been already checked
	 * in nested_get_vmcs12_pages.
	 */
	if (kgr_nested_cpu_has_posted_intr(vmcs12) &&
	   (!kgr_nested_cpu_has_vid(vmcs12) ||
	    !kgr_nested_exit_intr_ack_set(vcpu) ||
	    vmcs12->posted_intr_nv & 0xff00))
		return -EINVAL;

	/* tpr shadow is needed by all apicv features. */
	if (!kgr_nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
		return -EINVAL;

	return 0;
}

/* inlined */
static int kgr_nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
						    struct vmcs12 *vmcs12)
{
	if (vmcs12->vm_exit_msr_load_count == 0 &&
	    vmcs12->vm_exit_msr_store_count == 0 &&
	    vmcs12->vm_entry_msr_load_count == 0)
		return 0; /* Fast path */
	if (kgr_nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT,
					    VM_EXIT_MSR_LOAD_ADDR) ||
	    kgr_nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT,
					    VM_EXIT_MSR_STORE_ADDR) ||
	    kgr_nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT,
					    VM_ENTRY_MSR_LOAD_ADDR))
		return -EINVAL;
	return 0;
}



/* patched */
void kgr_vmx_save_host_state(struct kvm_vcpu *vcpu)
{
	struct vcpu_vmx *vmx = kgr_to_vmx(vcpu);
	int i;

	/*
	 * Fix CVE-2018-3646
	 *  +3 lines
	 *
	 * Note: we can't rely on the kgr_enable_ept pointer being
	 * valid when executing in the context of kvm.ko. Hence we
	 * must either check it here or live with L1D flushes on
	 * !enable_ept hosts.
	 */
	if (!*kgr_enable_ept)
		kgr_get_and_clear_vcpu_unconfined(vcpu, GFP_ATOMIC);

	if (vmx->host_state.loaded)
		return;

	vmx->host_state.loaded = 1;
	/*
	 * Set host fs and gs selectors.  Unfortunately, 22.2.3 does not
	 * allow segment selectors with cpl > 0 or ti == 1.
	 */
	vmx->host_state.ldt_sel = kvm_read_ldt();
	vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel;
	savesegment(fs, vmx->host_state.fs_sel);
	if (!(vmx->host_state.fs_sel & 7)) {
		kgr_vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel);
		vmx->host_state.fs_reload_needed = 0;
	} else {
		kgr_vmcs_write16(HOST_FS_SELECTOR, 0);
		vmx->host_state.fs_reload_needed = 1;
	}
	savesegment(gs, vmx->host_state.gs_sel);
	if (!(vmx->host_state.gs_sel & 7))
		kgr_vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel);
	else {
		kgr_vmcs_write16(HOST_GS_SELECTOR, 0);
		vmx->host_state.gs_ldt_reload_needed = 1;
	}

#ifdef CONFIG_X86_64
	savesegment(ds, vmx->host_state.ds_sel);
	savesegment(es, vmx->host_state.es_sel);
#endif

#ifdef CONFIG_X86_64
	kgr_vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
	kgr_vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
#else
	kgr_vmcs_writel(HOST_FS_BASE, segment_base(vmx->host_state.fs_sel));
	kgr_vmcs_writel(HOST_GS_BASE, segment_base(vmx->host_state.gs_sel));
#endif

#ifdef CONFIG_X86_64
	rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
	if (kgr_is_long_mode(&vmx->vcpu))
		wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
#endif
	if (boot_cpu_has(X86_FEATURE_MPX))
		rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
	for (i = 0; i < vmx->save_nmsrs; ++i)
		kgr_kvm_set_shared_msr(vmx->guest_msrs[i].index,
				       vmx->guest_msrs[i].data,
				       vmx->guest_msrs[i].mask);
}

/* patched */
void kgr_vmx_handle_external_intr(struct kvm_vcpu *vcpu)
{
	u32 exit_intr_info = kgr_vmcs_read32(VM_EXIT_INTR_INFO);

	/*
	 * If external interrupt exists, IF bit is set in rflags/eflags on the
	 * interrupt stack frame, and interrupt will be enabled on a return
	 * from interrupt handler.
	 */
	if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
			== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
		unsigned int vector;
		unsigned long entry;
		gate_desc *desc;
		struct vcpu_vmx *vmx = kgr_to_vmx(vcpu);
#ifdef CONFIG_X86_64
		unsigned long tmp;
#endif

		vector =  exit_intr_info & INTR_INFO_VECTOR_MASK;
		desc = (gate_desc *)vmx->host_idt_base + vector;
		entry = gate_offset(*desc);
		asm volatile(
#ifdef CONFIG_X86_64
			"mov %%" _ASM_SP ", %[sp]\n\t"
			"and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
			"push $%c[ss]\n\t"
			"push %[sp]\n\t"
#endif
			"pushf\n\t"
			"orl $0x200, (%%" _ASM_SP ")\n\t"
			__ASM_SIZE(push) " $%c[cs]\n\t"
			"call *%[entry]\n\t"
			:
#ifdef CONFIG_X86_64
			[sp]"=&r"(tmp)
#endif
			:
			[entry]"r"(entry),
			[ss]"i"(__KERNEL_DS),
			[cs]"i"(__KERNEL_CS)
			);

		/*
		 * Fix CVE-2018-3646
		 *  +1 line
		 */
		kgr_set_vcpu_unconfined(vcpu, GFP_ATOMIC);

	} else
		local_irq_enable();
}

/* patched, optimized */
static int kgr_nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
{
	struct vmcs12 *vmcs12;
	struct vcpu_vmx *vmx = kgr_to_vmx(vcpu);
	int cpu;
	struct loaded_vmcs *vmcs02;
	bool ia32e;
	u32 msr_entry_idx;

	if (!kgr_nested_vmx_check_permission(vcpu) ||
	    !kgr_nested_vmx_check_vmcs12(vcpu))
		return 1;

	kgr_skip_emulated_instruction(vcpu);
	vmcs12 = kgr_get_vmcs12(vcpu);

	if (*kgr_enable_shadow_vmcs)
		kgr_copy_shadow_to_vmcs12(vmx);

	/*
	 * The nested entry process starts with enforcing various prerequisites
	 * on vmcs12 as required by the Intel SDM, and act appropriately when
	 * they fail: As the SDM explains, some conditions should cause the
	 * instruction to fail, while others will cause the instruction to seem
	 * to succeed, but return an EXIT_REASON_INVALID_STATE.
	 * To speed up the normal (success) code path, we should avoid checking
	 * for misconfigurations which will anyway be caught by the processor
	 * when using the merged vmcs02.
	 */
	if (vmcs12->launch_state == launch) {
		kgr_nested_vmx_failValid(vcpu,
			launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
			       : VMXERR_VMRESUME_NONLAUNCHED_VMCS);
		return 1;
	}

	if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
	    vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) {
		kgr_nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
		return 1;
	}

	if (!kgr_nested_get_vmcs12_pages(vcpu, vmcs12)) {
		kgr_nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
		return 1;
	}

	if (kgr_nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) {
		kgr_nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
		return 1;
	}

	if (kgr_nested_vmx_check_apicv_controls(vcpu, vmcs12)) {
		kgr_nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
		return 1;
	}

	if (kgr_nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) {
		kgr_nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
		return 1;
	}

	if (!kgr_vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
				    vmx->nested.nested_vmx_true_procbased_ctls_low,
				    vmx->nested.nested_vmx_procbased_ctls_high) ||
	    !kgr_vmx_control_verify(vmcs12->secondary_vm_exec_control,
				    vmx->nested.nested_vmx_secondary_ctls_low,
				    vmx->nested.nested_vmx_secondary_ctls_high) ||
	    !kgr_vmx_control_verify(vmcs12->pin_based_vm_exec_control,
				    vmx->nested.nested_vmx_pinbased_ctls_low,
				    vmx->nested.nested_vmx_pinbased_ctls_high) ||
	    !kgr_vmx_control_verify(vmcs12->vm_exit_controls,
				    vmx->nested.nested_vmx_true_exit_ctls_low,
				    vmx->nested.nested_vmx_exit_ctls_high) ||
	    !kgr_vmx_control_verify(vmcs12->vm_entry_controls,
				    vmx->nested.nested_vmx_true_entry_ctls_low,
				    vmx->nested.nested_vmx_entry_ctls_high))
	{
		kgr_nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
		return 1;
	}

	if (((vmcs12->host_cr0 & KGR_VMXON_CR0_ALWAYSON) != KGR_VMXON_CR0_ALWAYSON) ||
	    ((vmcs12->host_cr4 & KGR_VMXON_CR4_ALWAYSON) != KGR_VMXON_CR4_ALWAYSON)) {
		kgr_nested_vmx_failValid(vcpu,
			VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
		return 1;
	}

	if (!kgr_nested_cr0_valid(vcpu, vmcs12->guest_cr0) ||
	    ((vmcs12->guest_cr4 & KGR_VMXON_CR4_ALWAYSON) != KGR_VMXON_CR4_ALWAYSON)) {
		kgr_nested_vmx_entry_failure(vcpu, vmcs12,
			EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
		return 1;
	}
	if (vmcs12->vmcs_link_pointer != -1ull) {
		kgr_nested_vmx_entry_failure(vcpu, vmcs12,
			EXIT_REASON_INVALID_STATE, ENTRY_FAIL_VMCS_LINK_PTR);
		return 1;
	}

	/*
	 * If the load IA32_EFER VM-entry control is 1, the following checks
	 * are performed on the field for the IA32_EFER MSR:
	 * - Bits reserved in the IA32_EFER MSR must be 0.
	 * - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of
	 *   the IA-32e mode guest VM-exit control. It must also be identical
	 *   to bit 8 (LME) if bit 31 in the CR0 field (corresponding to
	 *   CR0.PG) is 1.
	 */
	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) {
		ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
		if (!kgr_kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) ||
		    ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) ||
		    ((vmcs12->guest_cr0 & X86_CR0_PG) &&
		     ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) {
			kgr_nested_vmx_entry_failure(vcpu, vmcs12,
				EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
			return 1;
		}
	}

	/*
	 * If the load IA32_EFER VM-exit control is 1, bits reserved in the
	 * IA32_EFER MSR must be 0 in the field for that register. In addition,
	 * the values of the LMA and LME bits in the field must each be that of
	 * the host address-space size VM-exit control.
	 */
	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
		ia32e = (vmcs12->vm_exit_controls &
			 VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0;
		if (!kgr_kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) ||
		    ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) ||
		    ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) {
			kgr_nested_vmx_entry_failure(vcpu, vmcs12,
				EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT);
			return 1;
		}
	}

	/*
	 * We're finally done with prerequisite checking, and can start with
	 * the nested entry.
	 */

	vmcs02 = kgr_nested_get_current_vmcs02(vmx);
	if (!vmcs02)
		return -ENOMEM;

	kgr_enter_guest_mode(vcpu);

	vmx->nested.vmcs01_tsc_offset = kgr_vmcs_read64(TSC_OFFSET);

	if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
		vmx->nested.vmcs01_debugctl = kgr_vmcs_read64(GUEST_IA32_DEBUGCTL);

	cpu = get_cpu();
	vmx->loaded_vmcs = vmcs02;
	kgr_vmx_vcpu_put(vcpu);
	kgr_vmx_vcpu_load(vcpu, cpu);
	vcpu->cpu = cpu;
	put_cpu();

	kgr_vmx_segment_cache_clear(vmx);

	kgr_prepare_vmcs02(vcpu, vmcs12);

	msr_entry_idx = kgr_nested_vmx_load_msr(vcpu,
						vmcs12->vm_entry_msr_load_addr,
						vmcs12->vm_entry_msr_load_count);
	if (msr_entry_idx) {
		kgr_leave_guest_mode(vcpu);
		kgr_vmx_load_vmcs01(vcpu);
		kgr_nested_vmx_entry_failure(vcpu, vmcs12,
				EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx);
		return 1;
	}

	vmcs12->launch_state = 1;

	/*
	 * Fix CVE-2018-3646
	 *  +3 lines
	 */
	/* Hide L1D cache contents from the nested guest. */
	kgr_set_vcpu_unconfined(vcpu, GFP_KERNEL);

	if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
		return kgr_kvm_vcpu_halt(vcpu);

	vmx->nested.nested_run_pending = 1;

	/*
	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
	 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
	 * returned as far as L1 is concerned. It will only return (and set
	 * the success flag) when L2 exits (see nested_vmx_vmexit()).
	 */
	return 1;
}

/* patched, calls nested_vmx_run() */
int kgr_handle_vmlaunch(struct kvm_vcpu *vcpu)
{
	return kgr_nested_vmx_run(vcpu, true);
}

/* patched, calls nested_vmx_run() */
int kgr_handle_vmresume(struct kvm_vcpu *vcpu)
{
	return kgr_nested_vmx_run(vcpu, false);
}


static int kgr_patch_bsc1099306_kvm_intel_kallsyms(void)
{
	unsigned long addr;
	int i;

	for (i = 0; i < ARRAY_SIZE(kgr_funcs); i++) {
		/* mod_find_symname would be nice, but it is not exported */
		addr = kallsyms_lookup_name(kgr_funcs[i].name);
		if (!addr) {
			pr_err("kgraft-patch: symbol %s not resolved\n",
				kgr_funcs[i].name);
			return -ENOENT;
		}

		*(kgr_funcs[i].addr) = (void *)addr;
	}

	return 0;
}

static int
kgr_patch_bsc1099306_kvm_intel_module_notify(struct notifier_block *nb,
					     unsigned long action, void *data)
{
	struct module *mod = data;
	int ret;

	if (action != MODULE_STATE_COMING || strcmp(mod->name, KGR_PATCHED_MODULE))
		return 0;

	ret = kgr_patch_bsc1099306_kvm_intel_kallsyms();
	WARN(ret, "kgraft-patch: delayed kallsyms lookup failed. System is broken and can crash.\n");

	return ret;
}

static struct notifier_block kgr_patch_bsc1099306_kvm_intel_module_nb = {
	.notifier_call = kgr_patch_bsc1099306_kvm_intel_module_notify,
	.priority = INT_MIN+1,
};

int __kgr_patch_bsc1099306_kvm_intel_init(void)
{
	int ret;

	mutex_lock(&module_mutex);
	if (find_module(KGR_PATCHED_MODULE)) {
		ret = kgr_patch_bsc1099306_kvm_intel_kallsyms();
		if (ret)
			goto out;
	}

	ret = register_module_notifier(&kgr_patch_bsc1099306_kvm_intel_module_nb);
out:
	mutex_unlock(&module_mutex);
	return ret;
}

void __kgr_patch_bsc1099306_kvm_intel_cleanup(void)
{
	unregister_module_notifier(&kgr_patch_bsc1099306_kvm_intel_module_nb);
}

#endif /* IS_ENABLED(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM) */
