/*
 * livepatch_bsc1203624
 *
 * Fix for CVE-2022-41222, bsc#1203624
 *
 *  Upstream commit:
 *  97113eb39fa7 (mm/mremap: hold the rmap lock in write mode when moving page
 *                table entries.")
 *
 *  SLE12-SP4, SLE12-SP5, SLE15 and SLE15-SP1 commit:
 *  not affected
 *
 *  SLE15-SP2 and -SP3 commit:
 *  07909f0fb7f2ce54d63319186784604fb4c46754
 *
 *  SLE15-SP4 commit:
 *  not affected
 *
 *
 *  Copyright (c) 2022 SUSE
 *  Author: Nicolai Stange <nstange@suse.de>
 *
 *  Based on the original Linux kernel code. Other copyrights apply.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */


#if IS_ENABLED(CONFIG_HAVE_MOVE_PMD)

#if !IS_ENABLED(CONFIG_X86_64)
#error "Unexpected architecture, support not implemented"
#endif

/* klp-ccp: from mm/mremap.c */
#include <linux/mm.h>

/* klp-ccp: from include/asm-generic/pgtable.h */
static void (*klpe_pgd_clear_bad)(pgd_t *);
static void (*klpe_p4d_clear_bad)(p4d_t *);
static void (*klpe_pud_clear_bad)(pud_t *);
static void (*klpe_pmd_clear_bad)(pmd_t *);

static inline int klpr_pgd_none_or_clear_bad(pgd_t *pgd)
{
	if (pgd_none(*pgd))
		return 1;
	if (unlikely(pgd_bad(*pgd))) {
		(*klpe_pgd_clear_bad)(pgd);
		return 1;
	}
	return 0;
}

static inline int klpr_p4d_none_or_clear_bad(p4d_t *p4d)
{
	if (p4d_none(*p4d))
		return 1;
	if (unlikely(p4d_bad(*p4d))) {
		(*klpe_p4d_clear_bad)(p4d);
		return 1;
	}
	return 0;
}

static inline int klpr_pud_none_or_clear_bad(pud_t *pud)
{
	if (pud_none(*pud))
		return 1;
	if (unlikely(pud_bad(*pud))) {
		(*klpe_pud_clear_bad)(pud);
		return 1;
	}
	return 0;
}

static inline int klpr_pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
{
	pmd_t pmdval = pmd_read_atomic(pmd);

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	barrier();
#else
#error "klp-ccp: a preceeding branch should have been taken"
#endif
	if (pmd_none(pmdval) || pmd_trans_huge(pmdval) ||
		(IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION) && !pmd_present(pmdval)))
		return 1;
	if (unlikely(pmd_bad(pmdval))) {
		(*klpe_pmd_clear_bad)(pmd);
		return 1;
	}
	return 0;
}

static inline int klpr_pmd_trans_unstable(pmd_t *pmd)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
	return klpr_pmd_none_or_trans_huge_or_clear_bad(pmd);
#else
#error "klp-ccp: non-taken branch"
#endif
}

/* klp-ccp: from include/linux/huge_mm.h */
static bool (*klpe_move_huge_pmd)(struct vm_area_struct *vma, unsigned long old_addr,
			 unsigned long new_addr, unsigned long old_end,
			 pmd_t *old_pmd, pmd_t *new_pmd);

#ifdef CONFIG_TRANSPARENT_HUGEPAGE

static void (*klpe___split_huge_pmd)(struct vm_area_struct *vma, pmd_t *pmd,
		unsigned long address, bool freeze, struct page *page);

#define klpr_split_huge_pmd(__vma, __pmd, __address)				\
	do {								\
		pmd_t *____pmd = (__pmd);				\
		if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd)	\
					|| pmd_devmap(*____pmd))	\
			(*klpe___split_huge_pmd)(__vma, __pmd, __address, \
						false, NULL);		\
	}  while (0)

#else /* CONFIG_TRANSPARENT_HUGEPAGE */
#error "klp-ccp: non-taken branch"
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

/* klp-ccp: from include/linux/mm.h */
unsigned long klpp_move_page_tables(struct vm_area_struct *vma,
		unsigned long old_addr, struct vm_area_struct *new_vma,
		unsigned long new_addr, unsigned long len,
		bool need_rmap_locks);

#ifdef __PAGETABLE_P4D_FOLDED
#error "klp-ccp: non-taken branch"
#else
static int (*klpe___p4d_alloc)(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
#endif

#if defined(__PAGETABLE_PUD_FOLDED) || !defined(CONFIG_MMU)
#error "klp-ccp: non-taken branch"
#else
static int (*klpe___pud_alloc)(struct mm_struct *mm, p4d_t *p4d, unsigned long address);

#endif

#if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU)
#error "klp-ccp: non-taken branch"
#else
static int (*klpe___pmd_alloc)(struct mm_struct *mm, pud_t *pud, unsigned long address);

#endif

static int (*klpe___pte_alloc)(struct mm_struct *mm, pmd_t *pmd);

#define klpr_pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && (*klpe___pte_alloc)(mm, pmd))

#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)

#ifndef __ARCH_HAS_5LEVEL_HACK
static inline p4d_t *klpr_p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
		unsigned long address)
{
	return (unlikely(pgd_none(*pgd)) && (*klpe___p4d_alloc)(mm, pgd, address)) ?
		NULL : p4d_offset(pgd, address);
}

static inline pud_t *klpr_pud_alloc(struct mm_struct *mm, p4d_t *p4d,
		unsigned long address)
{
	return (unlikely(p4d_none(*p4d)) && (*klpe___pud_alloc)(mm, p4d, address)) ?
		NULL : pud_offset(p4d, address);
}
#else
#error "klp-ccp: a preceeding branch should have been taken"
#endif /* !__ARCH_HAS_5LEVEL_HACK */

static inline pmd_t *klpr_pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
	return (unlikely(pud_none(*pud)) && (*klpe___pmd_alloc)(mm, pud, address))?
		NULL: pmd_offset(pud, address);
}
#else
#error "klp-ccp: a preceeding branch should have been taken"
#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */

/* klp-ccp: from mm/mremap.c */
#include <linux/hugetlb.h>

/* klp-ccp: from include/linux/hugetlb.h */
#ifdef CONFIG_HUGETLB_PAGE

/* klp-ccp: from arch/x86/include/asm/tlbflush.h */
static void (*klpe_flush_tlb_mm_range)(struct mm_struct *mm, unsigned long start,
				unsigned long end, unsigned int stride_shift,
				bool freed_tables);

#define klpr_flush_tlb_range(vma, start, end)				\
	(*klpe_flush_tlb_mm_range)((vma)->vm_mm, start, end,		\
			   ((vma)->vm_flags & VM_HUGETLB)		\
				? huge_page_shift(hstate_vma(vma))	\
				: PAGE_SHIFT, false)

/* klp-ccp: from include/linux/hugetlb.h */
#else /* !CONFIG_HUGETLB_PAGE */
#error "klp-ccp: non-taken branch"
#endif /* !CONFIG_HUGETLB_PAGE */

/* klp-ccp: from mm/mremap.c */
#include <linux/shm.h>
#include <linux/ksm.h>
#include <linux/swap.h>
#include <linux/capability.h>
#include <linux/fs.h>
#include <linux/swapops.h>
#include <linux/highmem.h>
#include <linux/mmu_notifier.h>

/* klp-ccp: from include/linux/mmu_notifier.h */
#ifdef CONFIG_MMU_NOTIFIER

static int (*klpe___mmu_notifier_invalidate_range_start)(struct mmu_notifier_range *r);
static void (*klpe___mmu_notifier_invalidate_range_end)(struct mmu_notifier_range *r,
				  bool only_end);

static inline void
klpr_mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range)
{
	might_sleep();

	lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
	if (mm_has_notifiers(range->mm)) {
		range->flags |= MMU_NOTIFIER_RANGE_BLOCKABLE;
		(*klpe___mmu_notifier_invalidate_range_start)(range);
	}
	lock_map_release(&__mmu_notifier_invalidate_range_start_map);
}

static inline void
klpr_mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range)
{
	if (mmu_notifier_range_blockable(range))
		might_sleep();

	if (mm_has_notifiers(range->mm))
		(*klpe___mmu_notifier_invalidate_range_end)(range, false);
}

#else /* CONFIG_MMU_NOTIFIER */
#error "klp-ccp: non-taken branch"
#endif /* CONFIG_MMU_NOTIFIER */

/* klp-ccp: from mm/mremap.c */
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
/* klp-ccp: from mm/internal.h */
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/tracepoint-defs.h>

#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH

static void (*klpe_flush_tlb_batched_pending)(struct mm_struct *mm);
#else
#error "klp-ccp: non-taken branch"
#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */

/* klp-ccp: from mm/mremap.c */
static pmd_t *klpr_get_old_pmd(struct mm_struct *mm, unsigned long addr)
{
	pgd_t *pgd;
	p4d_t *p4d;
	pud_t *pud;
	pmd_t *pmd;

	pgd = pgd_offset(mm, addr);
	if (klpr_pgd_none_or_clear_bad(pgd))
		return NULL;

	p4d = p4d_offset(pgd, addr);
	if (klpr_p4d_none_or_clear_bad(p4d))
		return NULL;

	pud = pud_offset(p4d, addr);
	if (klpr_pud_none_or_clear_bad(pud))
		return NULL;

	pmd = pmd_offset(pud, addr);
	if (pmd_none(*pmd))
		return NULL;

	return pmd;
}

static pmd_t *klpr_alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
			    unsigned long addr)
{
	pgd_t *pgd;
	p4d_t *p4d;
	pud_t *pud;
	pmd_t *pmd;

	pgd = pgd_offset(mm, addr);
	p4d = klpr_p4d_alloc(mm, pgd, addr);
	if (!p4d)
		return NULL;
	pud = klpr_pud_alloc(mm, p4d, addr);
	if (!pud)
		return NULL;

	pmd = klpr_pmd_alloc(mm, pud, addr);
	if (!pmd)
		return NULL;

	VM_BUG_ON(pmd_trans_huge(*pmd));

	return pmd;
}

static void take_rmap_locks(struct vm_area_struct *vma)
{
	if (vma->vm_file)
		i_mmap_lock_write(vma->vm_file->f_mapping);
	if (vma->anon_vma)
		anon_vma_lock_write(vma->anon_vma);
}

static void drop_rmap_locks(struct vm_area_struct *vma)
{
	if (vma->anon_vma)
		anon_vma_unlock_write(vma->anon_vma);
	if (vma->vm_file)
		i_mmap_unlock_write(vma->vm_file->f_mapping);
}

static pte_t move_soft_dirty_pte(pte_t pte)
{

#ifdef CONFIG_MEM_SOFT_DIRTY
	if (pte_present(pte))
		pte = pte_mksoft_dirty(pte);
	else if (is_swap_pte(pte))
		pte = pte_swp_mksoft_dirty(pte);
#else
#error "klp-ccp: a preceeding branch should have been taken"
#endif
	return pte;
}

static void klpr_move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
		unsigned long old_addr, unsigned long old_end,
		struct vm_area_struct *new_vma, pmd_t *new_pmd,
		unsigned long new_addr, bool need_rmap_locks)
{
	struct mm_struct *mm = vma->vm_mm;
	pte_t *old_pte, *new_pte, pte;
	spinlock_t *old_ptl, *new_ptl;
	bool force_flush = false;
	unsigned long len = old_end - old_addr;

	/*
	 * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma
	 * locks to ensure that rmap will always observe either the old or the
	 * new ptes. This is the easiest way to avoid races with
	 * truncate_pagecache(), page migration, etc...
	 *
	 * When need_rmap_locks is false, we use other ways to avoid
	 * such races:
	 *
	 * - During exec() shift_arg_pages(), we use a specially tagged vma
	 *   which rmap call sites look for using is_vma_temporary_stack().
	 *
	 * - During mremap(), new_vma is often known to be placed after vma
	 *   in rmap traversal order. This ensures rmap will always observe
	 *   either the old pte, or the new pte, or both (the page table locks
	 *   serialize access to individual ptes, but only rmap traversal
	 *   order guarantees that we won't miss both the old and new ptes).
	 */
	if (need_rmap_locks)
		take_rmap_locks(vma);

	/*
	 * We don't have to worry about the ordering of src and dst
	 * pte locks because exclusive mmap_sem prevents deadlock.
	 */
	old_pte = pte_offset_map_lock(mm, old_pmd, old_addr, &old_ptl);
	new_pte = pte_offset_map(new_pmd, new_addr);
	new_ptl = pte_lockptr(mm, new_pmd);
	if (new_ptl != old_ptl)
		spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
	(*klpe_flush_tlb_batched_pending)(vma->vm_mm);
	arch_enter_lazy_mmu_mode();

	for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
				   new_pte++, new_addr += PAGE_SIZE) {
		if (pte_none(*old_pte))
			continue;

		pte = ptep_get_and_clear(mm, old_addr, old_pte);
		/*
		 * If we are remapping a valid PTE, make sure
		 * to flush TLB before we drop the PTL for the
		 * PTE.
		 *
		 * NOTE! Both old and new PTL matter: the old one
		 * for racing with page_mkclean(), the new one to
		 * make sure the physical page stays valid until
		 * the TLB entry for the old mapping has been
		 * flushed.
		 */
		if (pte_present(pte))
			force_flush = true;
		pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
		pte = move_soft_dirty_pte(pte);
		set_pte_at(mm, new_addr, new_pte, pte);
	}

	arch_leave_lazy_mmu_mode();
	if (force_flush)
		klpr_flush_tlb_range(vma, old_end - len, old_end);
	if (new_ptl != old_ptl)
		spin_unlock(new_ptl);
	pte_unmap(new_pte - 1);
	pte_unmap_unlock(old_pte - 1, old_ptl);
	if (need_rmap_locks)
		drop_rmap_locks(vma);
}

#ifdef CONFIG_HAVE_MOVE_PMD
static bool klpr_move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
		  unsigned long new_addr, unsigned long old_end,
		  pmd_t *old_pmd, pmd_t *new_pmd)
{
	spinlock_t *old_ptl, *new_ptl;
	struct mm_struct *mm = vma->vm_mm;
	pmd_t pmd;

	if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK)
	    || old_end - old_addr < PMD_SIZE)
		return false;

	/*
	 * The destination pmd shouldn't be established, free_pgtables()
	 * should have release it.
	 */
	if (WARN_ON(!pmd_none(*new_pmd)))
		return false;

	/*
	 * We don't have to worry about the ordering of src and dst
	 * ptlocks because exclusive mmap_sem prevents deadlock.
	 */
	old_ptl = pmd_lock(vma->vm_mm, old_pmd);
	new_ptl = pmd_lockptr(mm, new_pmd);
	if (new_ptl != old_ptl)
		spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);

	/* Clear the pmd */
	pmd = *old_pmd;
	pmd_clear(old_pmd);

	VM_BUG_ON(!pmd_none(*new_pmd));

	/* Set the new pmd */
	set_pmd_at(mm, new_addr, new_pmd, pmd);
	klpr_flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
	if (new_ptl != old_ptl)
		spin_unlock(new_ptl);
	spin_unlock(old_ptl);

	return true;
}
#else
#error "klp-ccp: a preceeding branch should have been taken"
#endif

unsigned long klpp_move_page_tables(struct vm_area_struct *vma,
		unsigned long old_addr, struct vm_area_struct *new_vma,
		unsigned long new_addr, unsigned long len,
		bool need_rmap_locks)
{
	unsigned long extent, next, old_end;
	struct mmu_notifier_range range;
	pmd_t *old_pmd, *new_pmd;

	old_end = old_addr + len;
	flush_cache_range(vma, old_addr, old_end);

	mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
				old_addr, old_end);
	klpr_mmu_notifier_invalidate_range_start(&range);

	for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
		cond_resched();
		next = (old_addr + PMD_SIZE) & PMD_MASK;
		/* even if next overflowed, extent below will be ok */
		extent = next - old_addr;
		if (extent > old_end - old_addr)
			extent = old_end - old_addr;
		old_pmd = klpr_get_old_pmd(vma->vm_mm, old_addr);
		if (!old_pmd)
			continue;
		new_pmd = klpr_alloc_new_pmd(vma->vm_mm, vma, new_addr);
		if (!new_pmd)
			break;
		if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd) || pmd_devmap(*old_pmd)) {
			if (extent == HPAGE_PMD_SIZE) {
				bool moved;
				/* See comment in move_ptes() */
				if (need_rmap_locks)
					take_rmap_locks(vma);
				moved = (*klpe_move_huge_pmd)(vma, old_addr, new_addr,
						    old_end, old_pmd, new_pmd);
				if (need_rmap_locks)
					drop_rmap_locks(vma);
				if (moved)
					continue;
			}
			klpr_split_huge_pmd(vma, old_pmd, old_addr);
			if (klpr_pmd_trans_unstable(old_pmd))
				continue;
		} else if (extent == PMD_SIZE) {
#ifdef CONFIG_HAVE_MOVE_PMD
			bool moved;

			/*
			 * Fix CVE-2022-41222
			 *  -2 lines, +1 line
			 */
			take_rmap_locks(vma);
			moved = klpr_move_normal_pmd(vma, old_addr, new_addr,
					old_end, old_pmd, new_pmd);
			/*
			 * Fix CVE-2022-41222
			 *  -2 lines, +1 line
			 */
			drop_rmap_locks(vma);
			if (moved)
				continue;
#else
#error "klp-ccp: a preceeding branch should have been taken"
#endif
		}

		if (klpr_pte_alloc(new_vma->vm_mm, new_pmd))
			break;
		next = (new_addr + PMD_SIZE) & PMD_MASK;
		if (extent > next - new_addr)
			extent = next - new_addr;
		klpr_move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
			  new_pmd, new_addr, need_rmap_locks);
	}

	klpr_mmu_notifier_invalidate_range_end(&range);

	return len + old_addr - old_end;	/* how much done */
}



#include <linux/kernel.h>
#include <linux/module.h>
#include "livepatch_bsc1203624.h"
#include "../kallsyms_relocs.h"

static struct klp_kallsyms_reloc klp_funcs[] = {
	{ "__mmu_notifier_invalidate_range_end",
	  (void *)&klpe___mmu_notifier_invalidate_range_end },
	{ "__mmu_notifier_invalidate_range_start",
	  (void *)&klpe___mmu_notifier_invalidate_range_start },
	{ "__p4d_alloc", (void *)&klpe___p4d_alloc },
	{ "__pmd_alloc", (void *)&klpe___pmd_alloc },
	{ "__pte_alloc", (void *)&klpe___pte_alloc },
	{ "__pud_alloc", (void *)&klpe___pud_alloc },
	{ "__split_huge_pmd", (void *)&klpe___split_huge_pmd },
	{ "flush_tlb_batched_pending",
	  (void *)&klpe_flush_tlb_batched_pending },
	{ "flush_tlb_mm_range", (void *)&klpe_flush_tlb_mm_range },
	{ "move_huge_pmd", (void *)&klpe_move_huge_pmd },
	{ "p4d_clear_bad", (void *)&klpe_p4d_clear_bad },
	{ "pgd_clear_bad", (void *)&klpe_pgd_clear_bad },
	{ "pmd_clear_bad", (void *)&klpe_pmd_clear_bad },
	{ "pud_clear_bad", (void *)&klpe_pud_clear_bad },
};

int livepatch_bsc1203624_init(void)
{
	return __klp_resolve_kallsyms_relocs(klp_funcs, ARRAY_SIZE(klp_funcs));
}

#endif /* IS_ENABLED(CONFIG_HAVE_MOVE_PMD) */
