/*
 * kgraft_patch_bsc1070307
 *
 * Fix for CVE-2017-1000405, bsc#1070307
 *
 *  Upstream commits:
 *  a8f97366452e ("mm, thp: Do not make page table dirty unconditionally
 *                 in touch_p[mu]d()")
 *  152e93af3cfe ("mm, thp: Do not make pmd/pud dirty without a reason")
 *  f55e1014f9e5 ('Revert "mm, thp: Do not make pmd/pud dirty without a
 *                 reason"')
 *
 *  SLE12(-SP1) commit:
 *  not affected
 *
 *  SLE12-SP2 commits:
 *  8d8387fc5c88b4e2c0c4d84f0bd1ded9eac90deb
 *  6a6cb781163176165923726222ff1646e3322a66
 *  4f024a3f7cf2511529b7aa12e7b6311d3b0bf584
 *
 *  SLE12-SP3 commits:
 *  613f0f05e7ef5549a89d36d3150fd17ef7ea6840
 *  deef723eb671b12a5318ffa47f58c2029c3f0681
 *  36319500a07cf99d643f53d7cf4f5e8a4c3da7eb
 *
 *  Copyright (c) 2017 SUSE
 *  Author: Nicolai Stange <nstange@suse.de>
 *
 *  Based on the original Linux kernel code. Other copyrights apply.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */

#include <linux/kernel.h>
#include <linux/kallsyms.h>
#include <linux/mm.h>
#include <linux/memremap.h>
#include <linux/pagemap.h>
#include "kgr_patch_bsc1070307.h"

#if !IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)
#error "KGR patch supports only CONFIG_TRANSPARENT_HUGEPAGE=y."
#endif

static struct page **kgr_huge_zero_page;

/*
 * This is declared in arch/.../include/asm headers.
 * Redeclare it here to make sure our declaration of
 * kgr_pmdp_set_access_flags below is compatible.
 */
extern int pmdp_set_access_flags(struct vm_area_struct *vma,
				 unsigned long address, pmd_t *pmdp,
				 pmd_t entry, int dirty);
static int (*kgr_pmdp_set_access_flags)(struct vm_area_struct *vma,
					unsigned long address, pmd_t *pmdp,
					pmd_t entry, int dirty);

#if IS_ENABLED(CONFIG_ZONE_DEVICE)
static struct dev_pagemap* (*kgr_find_dev_pagemap)(resource_size_t phys);
#endif
static void (*kgr_lru_add_drain)(void);
static void (*kgr_mlock_vma_page)(struct page *page);
static int (*kgr_PageHeadHuge)(struct page *page_head);

#if defined(__powerpc64__)
static struct page* (*kgr_pmd_page)(pmd_t pmd);
static void (*kgr_update_mmu_cache_pmd)(struct vm_area_struct *vma,
					unsigned long addr,
					pmd_t *pmd);
#else
#define kgr_pmd_page pmd_page
#define kgr_update_mmu_cache_pmd update_mmu_cache_pmd
#endif


static struct {
	char *name;
	char **addr;
} kgr_funcs[] = {
	{ "huge_zero_page", (void *)&kgr_huge_zero_page },
	{ "pmdp_set_access_flags", (void *)&kgr_pmdp_set_access_flags },
#if IS_ENABLED(CONFIG_ZONE_DEVICE)
	{ "find_dev_pagemap", (void *)&kgr_find_dev_pagemap },
#endif
	{ "lru_add_drain", (void *)&kgr_lru_add_drain },
	{ "mlock_vma_page", (void *)&kgr_mlock_vma_page },
	{ "PageHeadHuge", (void *)&kgr_PageHeadHuge },
#if defined(__powerpc64__)
	{ "pmd_page", (void *)&kgr_pmd_page },
	{ "update_mmu_cache_pmd", (void *)&kgr_update_mmu_cache_pmd },
#endif
};



/* from include/linux/huge_mm.h */
/* reproduced here because it accesses non-EXPORTed huge_zero_page */
static inline bool kgr_is_huge_zero_page(struct page *page)
{
	return ACCESS_ONCE(*kgr_huge_zero_page) == page;
}

/* reproduced because it calls is_huge_zero_page() */
static inline bool kgr_is_huge_zero_pmd(pmd_t pmd)
{
	return kgr_is_huge_zero_page(kgr_pmd_page(pmd));
}



/* from include/linux/memremap.h */
#if !IS_ENABLED(CONFIG_ZONE_DEVICE)
static inline struct dev_pagemap *kgr_find_dev_pagemap(resource_size_t phys)
{
	return NULL;
}
#endif

/* reproduced here because it calls the non-EXPORTed find_dev_pagemap() */
static inline struct dev_pagemap *kgr_get_dev_pagemap(unsigned long pfn,
		struct dev_pagemap *pgmap)
{
	const struct resource *res = pgmap ? pgmap->res : NULL;
	resource_size_t phys = PFN_PHYS(pfn);

	/*
	 * In the cached case we're already holding a live reference so
	 * we can simply do a blind increment
	 */
	if (res && phys >= res->start && phys <= res->end) {
		percpu_ref_get(pgmap->ref);
		return pgmap;
	}

	/* fall back to slow path lookup */
	rcu_read_lock();
	pgmap = kgr_find_dev_pagemap(phys);
	if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
		pgmap = NULL;
	rcu_read_unlock();

	return pgmap;
}



/* from include/linux/mm.h */
/* reproduced here because it calls the non-EXPORTed PageHeadHuge() */
static inline bool kgr__compound_tail_refcounted(struct page *page)
{
	return PageAnon(page) && !PageSlab(page) && !kgr_PageHeadHuge(page);
}

/* reproduced because it calls __compound_tail_refcounted() */
static inline bool kgr_compound_tail_refcounted(struct page *page)
{
	VM_BUG_ON_PAGE(!PageHead(page), page);
	return kgr__compound_tail_refcounted(page);
}

/* reproduced because it calls compound_tail_refcounted() */
static inline void kgr_get_huge_page_tail(struct page *page)
{
	/*
	 * __split_huge_page_refcount() cannot run from under us.
	 */
	VM_BUG_ON_PAGE(!PageTail(page), page);
	VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
	VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page);
	if (kgr_compound_tail_refcounted(compound_head(page)))
		atomic_inc(&page->_mapcount);
}



/* from mm/internal.h */
/* inlined */
static inline void kgr__get_page_tail_foll(struct page *page,
					   bool get_page_head)
{
	/*
	 * If we're getting a tail page, the elevated page->_count is
	 * required only in the head page and we will elevate the head
	 * page->_count and tail page->_mapcount.
	 *
	 * We elevate page_tail->_mapcount for tail pages to force
	 * page_tail->_count to be zero at all times to avoid getting
	 * false positives from get_page_unless_zero() with
	 * speculative page access (like in
	 * page_cache_get_speculative()) on tail pages.
	 */
	VM_BUG_ON_PAGE(atomic_read(&compound_head(page)->_count) <= 0, page);
	if (get_page_head)
		atomic_inc(&compound_head(page)->_count);
	kgr_get_huge_page_tail(page);
}

/* inlined */
static inline void kgr_get_page_foll(struct page *page)
{
	if (unlikely(PageTail(page)))
		/*
		 * This is safe only because
		 * __split_huge_page_refcount() can't run under
		 * get_page_foll() because we hold the proper PT lock.
		 */
		kgr__get_page_tail_foll(page, true);
	else {
		/*
		 * Getting a normal page or the head of a compound page
		 * requires to already have an elevated page->_count.
		 */
		VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page);
		atomic_inc(&page->_count);
	}
}



/* from mm/huge_memory.c */
/* inlined */
static inline bool kgr_can_follow_write_pmd(pmd_t pmd, unsigned int flags)
{
	return pmd_write(pmd) ||
	       ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pmd_dirty(pmd));
}


/* patched, all callers also patched */
static void kgr_touch_pmd(struct vm_area_struct *vma, unsigned long addr,
			  /*
			   * Fix CVE-2017-1000405
			   *  -1 line, +1 line
			   */
			  pmd_t *pmd, int flags)
{
	pmd_t _pmd;

	/*
	 * Fix CVE-2017-1000405
	 *  -8 lines, +3 lines
	 */
	/*
	 * We should set the dirty bit only for FOLL_WRITE but for now
	 * the dirty bit in the pmd is meaningless.  And if the dirty
	 * bit will become meaningful and we'll only set it with
	 * FOLL_WRITE, an atomic set_bit will be required on the pmd to
	 * set the young bit, instead of the current set_pmd_at.
	 */
	_pmd = pmd_mkyoung(*pmd);
	if (flags & FOLL_WRITE)
		_pmd = pmd_mkdirty(_pmd);
	if (kgr_pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK,
				      /*
				       * Fix CVE-2017-1000405
				       *  -1 line, +1 line
				       */
				      pmd, _pmd, flags & FOLL_WRITE))
		kgr_update_mmu_cache_pmd(vma, addr, pmd);
}

/* patched */
struct page *kgr_follow_devmap_pmd(struct vm_area_struct *vma,
				   unsigned long addr,
				   pmd_t *pmd, int flags)
{
	unsigned long pfn = pmd_pfn(*pmd);
	struct mm_struct *mm = vma->vm_mm;
	struct dev_pagemap *pgmap;
	struct page *page;

	assert_spin_locked(pmd_lockptr(mm, pmd));

	if (flags & FOLL_WRITE && !pmd_write(*pmd))
		return NULL;

	if (pmd_present(*pmd) && pmd_devmap(*pmd))
		/* pass */;
	else
		return NULL;

	if (flags & FOLL_TOUCH)
		/*
		 * Fix CVE-2017-1000405
		 *  -1 line, +1 line
		 */
		kgr_touch_pmd(vma, addr, pmd, flags);

	/*
	 * device mapped pages can only be returned if the
	 * caller will manage the page reference count.
	 */
	if (!(flags & FOLL_GET))
		return ERR_PTR(-EEXIST);

	pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
	pgmap = kgr_get_dev_pagemap(pfn, NULL);
	if (!pgmap)
		return ERR_PTR(-EFAULT);
	page = pfn_to_page(pfn);
	get_page(page);
	put_dev_pagemap(pgmap);

	return page;
}

/* patched */
struct page *kgr_follow_trans_huge_pmd(struct vm_area_struct *vma,
				       unsigned long addr,
				       pmd_t *pmd,
				       unsigned int flags)
{
	struct mm_struct *mm = vma->vm_mm;
	struct page *page = NULL;

	assert_spin_locked(pmd_lockptr(mm, pmd));

	if (flags & FOLL_WRITE && !kgr_can_follow_write_pmd(*pmd, flags))
		goto out;

	/* Avoid dumping huge zero page */
	if ((flags & FOLL_DUMP) && kgr_is_huge_zero_pmd(*pmd))
		return ERR_PTR(-EFAULT);

	/* Full NUMA hinting faults to serialise migration in fault paths */
	if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
		goto out;

	page = kgr_pmd_page(*pmd);
	VM_BUG_ON_PAGE(!PageHead(page), page);
	if (flags & FOLL_TOUCH)
		/*
		 * Fix CVE-2017-1000405
		 *  -1 line, +1 line
		 */
		kgr_touch_pmd(vma, addr, pmd, flags);
	if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
		if (page->mapping && trylock_page(page)) {
			kgr_lru_add_drain();
			if (page->mapping)
				kgr_mlock_vma_page(page);
			unlock_page(page);
		}
	}
	page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT;
	VM_BUG_ON_PAGE(!PageCompound(page), page);
	if (flags & FOLL_GET)
		kgr_get_page_foll(page);

out:
	return page;
}



static int kgr_patch_bsc1070307_kallsyms(void)
{
	unsigned long addr;
	int i;

	for (i = 0; i < ARRAY_SIZE(kgr_funcs); i++) {
		addr = kallsyms_lookup_name(kgr_funcs[i].name);
		if (!addr) {
			pr_err("kgraft-patch: symbol %s not resolved\n",
				kgr_funcs[i].name);
			return -ENOENT;
		}

		*(kgr_funcs[i].addr) = (void *)addr;
	}

	return 0;
}

int kgr_patch_bsc1070307_init(void)
{
	return kgr_patch_bsc1070307_kallsyms();
}
