/*
 * livepatch_bsc1234885
 *
 * Fix for CVE-2024-53166, bsc#1234885
 *
 *  Upstream commit:
 *  e8b8344de398 ("block, bfq: fix bfqq uaf in bfq_limit_depth()")
 *
 *  SLE12-SP5 commit:
 *  Not affected
 *
 *  SLE15-SP3 commit:
 *  Not affected
 *
 *  SLE15-SP4 and -SP5 commit:
 *  76a96c4c5dfff3dd24aef67b5ca5ae0e7055b15e
 *
 *  SLE15-SP6 commit:
 *  261dfc3b8778ac09f27d5f8002f72274ac256960
 *
 *  SLE MICRO-6-0 commit:
 *  261dfc3b8778ac09f27d5f8002f72274ac256960
 *
 *  Copyright (c) 2025 SUSE
 *  Author: Ali Abdallah <ali.abdallah@suse.de>
 *
 *  Based on the original Linux kernel code. Other copyrights apply.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
 */


/* klp-ccp: from block/bfq-iosched.c */
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
#include <linux/cgroup.h>
#include <linux/elevator.h>
#include <linux/ktime.h>
#include <linux/rbtree.h>
#include <linux/ioprio.h>
#include <linux/sbitmap.h>

#include <linux/backing-dev.h>

#include <trace/events/block.h>

/* klp-ccp: from block/blk.h */
#include <linux/idr.h>

/* klp-ccp: from include/linux/blk-mq.h */
#define BLK_MQ_H

/* klp-ccp: from block/blk-crypto-internal.h */
#include <linux/bio.h>
#include <linux/blkdev.h>
/* klp-ccp: from block/blk-stat.h */
#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/ktime.h>
#include <linux/rcupdate.h>
#include <linux/timer.h>

/* klp-ccp: from block/blk-mq.h */
struct blk_mq_alloc_data {
	/* input parameter */
	struct request_queue *q;
	blk_mq_req_flags_t flags;
	unsigned int shallow_depth;
	unsigned int cmd_flags;

	/* input & output parameter */
	struct blk_mq_ctx *ctx;
	struct blk_mq_hw_ctx *hctx;
};

/* klp-ccp: from block/blk-mq-sched.h */
struct io_cq *blk_mq_sched_get_icq(struct request_queue *q);

/* klp-ccp: from block/bfq-iosched.h */
#include <linux/blktrace_api.h>
#include <linux/hrtimer.h>
#include <linux/blk-cgroup.h>

/* klp-ccp: from block/blk-cgroup-rwstat.h */
#include <linux/blk-cgroup.h>

/* klp-ccp: from block/bfq-iosched.h */
#define BFQ_IOPRIO_CLASSES	3

struct bfq_service_tree {
	/* tree for active entities (i.e., those backlogged) */
	struct rb_root active;
	/* tree for idle entities (i.e., not backlogged, with V < F_i)*/
	struct rb_root idle;

	/* idle entity with minimum F_i */
	struct bfq_entity *first_idle;
	/* idle entity with maximum F_i */
	struct bfq_entity *last_idle;

	/* scheduler virtual time */
	u64 vtime;
	/* scheduler weight sum; active and idle entities contribute to it */
	unsigned long wsum;
};

struct bfq_sched_data {
	/* entity in service */
	struct bfq_entity *in_service_entity;
	/* head-of-line entity (see comments above) */
	struct bfq_entity *next_in_service;
	/* array of service trees, one per ioprio_class */
	struct bfq_service_tree service_tree[BFQ_IOPRIO_CLASSES];
	/* last time CLASS_IDLE was served */
	unsigned long bfq_class_idle_last_service;

};

struct bfq_entity {
	/* service_tree member */
	struct rb_node rb_node;

	/*
	 * Flag, true if the entity is on a tree (either the active or
	 * the idle one of its service_tree) or is in service.
	 */
	bool on_st_or_in_serv;

	/* B-WF2Q+ start and finish timestamps [sectors/weight] */
	u64 start, finish;

	/* tree the entity is enqueued into; %NULL if not on a tree */
	struct rb_root *tree;

	/*
	 * minimum start time of the (active) subtree rooted at this
	 * entity; used for O(log N) lookups into active trees
	 */
	u64 min_start;

	/* amount of service received during the last service slot */
	int service;

	/* budget, used also to calculate F_i: F_i = S_i + @budget / @weight */
	int budget;

	/* Number of requests allocated in the subtree of this entity */
	int allocated;

	/* device weight, if non-zero, it overrides the default weight of
	 * bfq_group_data */
	int dev_weight;
	/* weight of the queue */
	int weight;
	/* next weight if a change is in progress */
	int new_weight;

	/* original weight, used to implement weight boosting */
	int orig_weight;

	/* parent entity, for hierarchical scheduling */
	struct bfq_entity *parent;

	/*
	 * For non-leaf nodes in the hierarchy, the associated
	 * scheduler queue, %NULL on leaf nodes.
	 */
	struct bfq_sched_data *my_sched_data;
	/* the scheduler queue this entity belongs to */
	struct bfq_sched_data *sched_data;

	/* flag, set to request a weight, ioprio or ioprio_class change  */
	int prio_changed;

	/* flag, set if the entity is counted in groups_with_pending_reqs */
	bool in_groups_with_pending_reqs;

	/* last child queue of entity created (for non-leaf entities) */
	struct bfq_queue *last_bfqq_created;
};

struct bfq_ttime {
	/* completion time of the last request */
	u64 last_end_request;

	/* total process thinktime */
	u64 ttime_total;
	/* number of thinktime samples */
	unsigned long ttime_samples;
	/* average process thinktime */
	u64 ttime_mean;
};

struct bfq_queue {
	/* reference counter */
	int ref;
	/* counter of references from other queues for delayed stable merge */
	int stable_ref;
	/* parent bfq_data */
	struct bfq_data *bfqd;

	/* current ioprio and ioprio class */
	unsigned short ioprio, ioprio_class;
	/* next ioprio and ioprio class if a change is in progress */
	unsigned short new_ioprio, new_ioprio_class;

	/* last total-service-time sample, see bfq_update_inject_limit() */
	u64 last_serv_time_ns;
	/* limit for request injection */
	unsigned int inject_limit;
	/* last time the inject limit has been decreased, in jiffies */
	unsigned long decrease_time_jif;

	/*
	 * Shared bfq_queue if queue is cooperating with one or more
	 * other queues.
	 */
	struct bfq_queue *new_bfqq;
	/* request-position tree member (see bfq_group's @rq_pos_tree) */
	struct rb_node pos_node;
	/* request-position tree root (see bfq_group's @rq_pos_tree) */
	struct rb_root *pos_root;

	/* sorted list of pending requests */
	struct rb_root sort_list;
	/* if fifo isn't expired, next request to serve */
	struct request *next_rq;
	/* number of sync and async requests queued */
	int queued[2];
	/* number of pending metadata requests */
	int meta_pending;
	/* fifo list of requests in sort_list */
	struct list_head fifo;

	/* entity representing this queue in the scheduler */
	struct bfq_entity entity;

	/* pointer to the weight counter associated with this entity */
	struct bfq_weight_counter *weight_counter;

	/* maximum budget allowed from the feedback mechanism */
	int max_budget;
	/* budget expiration (in jiffies) */
	unsigned long budget_timeout;

	/* number of requests on the dispatch list or inside driver */
	int dispatched;

	/* status flags */
	unsigned long flags;

	/* node for active/idle bfqq list inside parent bfqd */
	struct list_head bfqq_list;

	/* associated @bfq_ttime struct */
	struct bfq_ttime ttime;

	/* when bfqq started to do I/O within the last observation window */
	u64 io_start_time;
	/* how long bfqq has remained empty during the last observ. window */
	u64 tot_idle_time;

	/* bit vector: a 1 for each seeky requests in history */
	u32 seek_history;

	/* node for the device's burst list */
	struct hlist_node burst_list_node;

	/* position of the last request enqueued */
	sector_t last_request_pos;

	/* Number of consecutive pairs of request completion and
	 * arrival, such that the queue becomes idle after the
	 * completion, but the next request arrives within an idle
	 * time slice; used only if the queue's IO_bound flag has been
	 * cleared.
	 */
	unsigned int requests_within_timer;

	/* pid of the process owning the queue, used for logging purposes */
	pid_t pid;

	/*
	 * Pointer to the bfq_io_cq owning the bfq_queue, set to %NULL
	 * if the queue is shared.
	 */
	struct bfq_io_cq *bic;

	/* current maximum weight-raising time for this queue */
	unsigned long wr_cur_max_time;
	/*
	 * Minimum time instant such that, only if a new request is
	 * enqueued after this time instant in an idle @bfq_queue with
	 * no outstanding requests, then the task associated with the
	 * queue it is deemed as soft real-time (see the comments on
	 * the function bfq_bfqq_softrt_next_start())
	 */
	unsigned long soft_rt_next_start;
	/*
	 * Start time of the current weight-raising period if
	 * the @bfq-queue is being weight-raised, otherwise
	 * finish time of the last weight-raising period.
	 */
	unsigned long last_wr_start_finish;
	/* factor by which the weight of this queue is multiplied */
	unsigned int wr_coeff;
	/*
	 * Time of the last transition of the @bfq_queue from idle to
	 * backlogged.
	 */
	unsigned long last_idle_bklogged;
	/*
	 * Cumulative service received from the @bfq_queue since the
	 * last transition from idle to backlogged.
	 */
	unsigned long service_from_backlogged;
	/*
	 * Cumulative service received from the @bfq_queue since its
	 * last transition to weight-raised state.
	 */
	unsigned long service_from_wr;

	/*
	 * Value of wr start time when switching to soft rt
	 */
	unsigned long wr_start_at_switch_to_srt;

	unsigned long split_time; /* time of last split */

	unsigned long first_IO_time; /* time of first I/O for this queue */

	unsigned long creation_time; /* when this queue is created */

	/* max service rate measured so far */
	u32 max_service_rate;

	/*
	 * Pointer to the waker queue for this queue, i.e., to the
	 * queue Q such that this queue happens to get new I/O right
	 * after some I/O request of Q is completed. For details, see
	 * the comments on the choice of the queue for injection in
	 * bfq_select_queue().
	 */
	struct bfq_queue *waker_bfqq;
	/* pointer to the curr. tentative waker queue, see bfq_check_waker() */
	struct bfq_queue *tentative_waker_bfqq;
	/* number of times the same tentative waker has been detected */
	unsigned int num_waker_detections;
	/* time when we started considering this waker */
	u64 waker_detection_started;

	/* node for woken_list, see below */
	struct hlist_node woken_list_node;
	/*
	 * Head of the list of the woken queues for this queue, i.e.,
	 * of the list of the queues for which this queue is a waker
	 * queue. This list is used to reset the waker_bfqq pointer in
	 * the woken queues when this queue exits.
	 */
	struct hlist_head woken_list;
};

struct bfq_io_cq {
	/* associated io_cq structure */
	struct io_cq icq; /* must be the first member */
	/* array of two process queues, the sync and the async */
	struct bfq_queue *bfqq[2];
	/* per (request_queue, blkcg) ioprio */
	int ioprio;
#ifdef CONFIG_BFQ_GROUP_IOSCHED
	uint64_t blkcg_serial_nr; /* the current blkcg serial */
#endif
	bool saved_has_short_ttime;
	/*
	 * Same purpose as the previous two fields for the I/O bound
	 * classification of a queue.
	 */
	bool saved_IO_bound;

	u64 saved_io_start_time;
	u64 saved_tot_idle_time;

	/*
	 * Same purpose as the previous fields for the value of the
	 * field keeping the queue's belonging to a large burst
	 */
	bool saved_in_large_burst;
	/*
	 * True if the queue belonged to a burst list before its merge
	 * with another cooperating queue.
	 */
	bool was_in_burst_list;

	/*
	 * Save the weight when a merge occurs, to be able
	 * to restore it in case of split. If the weight is not
	 * correctly resumed when the queue is recycled,
	 * then the weight of the recycled queue could differ
	 * from the weight of the original queue.
	 */
	unsigned int saved_weight;

	/*
	 * Similar to previous fields: save wr information.
	 */
	unsigned long saved_wr_coeff;
	unsigned long saved_last_wr_start_finish;
	unsigned long saved_service_from_wr;
	unsigned long saved_wr_start_at_switch_to_srt;
	unsigned int saved_wr_cur_max_time;
	struct bfq_ttime saved_ttime;

	/* Save also injection state */
	u64 saved_last_serv_time_ns;
	unsigned int saved_inject_limit;
	unsigned long saved_decrease_time_jif;

	/* candidate queue for a stable merge (due to close creation time) */
	struct bfq_queue *stable_merge_bfqq;

	bool stably_merged;	/* non splittable if true */
	unsigned int requests;	/* Number of requests this process has in flight */
};

struct bfq_data {
	/* device request queue */
	struct request_queue *queue;
	/* dispatch queue */
	struct list_head dispatch;

	/* root bfq_group for the device */
	struct bfq_group *root_group;

	/*
	 * rbtree of weight counters of @bfq_queues, sorted by
	 * weight. Used to keep track of whether all @bfq_queues have
	 * the same weight. The tree contains one counter for each
	 * distinct weight associated to some active and not
	 * weight-raised @bfq_queue (see the comments to the functions
	 * bfq_weights_tree_[add|remove] for further details).
	 */
	struct rb_root_cached queue_weights_tree;

	/*
	 * Number of groups with at least one descendant process that
	 * has at least one request waiting for completion. Note that
	 * this accounts for also requests already dispatched, but not
	 * yet completed. Therefore this number of groups may differ
	 * (be larger) than the number of active groups, as a group is
	 * considered active only if its corresponding entity has
	 * descendant queues with at least one request queued. This
	 * number is used to decide whether a scenario is symmetric.
	 * For a detailed explanation see comments on the computation
	 * of the variable asymmetric_scenario in the function
	 * bfq_better_to_idle().
	 *
	 * However, it is hard to compute this number exactly, for
	 * groups with multiple descendant processes. Consider a group
	 * that is inactive, i.e., that has no descendant process with
	 * pending I/O inside BFQ queues. Then suppose that
	 * num_groups_with_pending_reqs is still accounting for this
	 * group, because the group has descendant processes with some
	 * I/O request still in flight. num_groups_with_pending_reqs
	 * should be decremented when the in-flight request of the
	 * last descendant process is finally completed (assuming that
	 * nothing else has changed for the group in the meantime, in
	 * terms of composition of the group and active/inactive state of child
	 * groups and processes). To accomplish this, an additional
	 * pending-request counter must be added to entities, and must
	 * be updated correctly. To avoid this additional field and operations,
	 * we resort to the following tradeoff between simplicity and
	 * accuracy: for an inactive group that is still counted in
	 * num_groups_with_pending_reqs, we decrement
	 * num_groups_with_pending_reqs when the first descendant
	 * process of the group remains with no request waiting for
	 * completion.
	 *
	 * Even this simpler decrement strategy requires a little
	 * carefulness: to avoid multiple decrements, we flag a group,
	 * more precisely an entity representing a group, as still
	 * counted in num_groups_with_pending_reqs when it becomes
	 * inactive. Then, when the first descendant queue of the
	 * entity remains with no request waiting for completion,
	 * num_groups_with_pending_reqs is decremented, and this flag
	 * is reset. After this flag is reset for the entity,
	 * num_groups_with_pending_reqs won't be decremented any
	 * longer in case a new descendant queue of the entity remains
	 * with no request waiting for completion.
	 */
	unsigned int num_groups_with_pending_reqs;

	/*
	 * Per-class (RT, BE, IDLE) number of bfq_queues containing
	 * requests (including the queue in service, even if it is
	 * idling).
	 */
	unsigned int busy_queues[3];
	/* number of weight-raised busy @bfq_queues */
	int wr_busy_queues;
	/* number of queued requests */
	int queued;
	/* number of requests dispatched and waiting for completion */
	int rq_in_driver;

	/* true if the device is non rotational and performs queueing */
	bool nonrot_with_queueing;

	/*
	 * Maximum number of requests in driver in the last
	 * @hw_tag_samples completed requests.
	 */
	int max_rq_in_driver;
	/* number of samples used to calculate hw_tag */
	int hw_tag_samples;
	/* flag set to one if the driver is showing a queueing behavior */
	int hw_tag;

	/* number of budgets assigned */
	int budgets_assigned;

	/*
	 * Timer set when idling (waiting) for the next request from
	 * the queue in service.
	 */
	struct hrtimer idle_slice_timer;

	/* bfq_queue in service */
	struct bfq_queue *in_service_queue;

	/* on-disk position of the last served request */
	sector_t last_position;

	/* position of the last served request for the in-service queue */
	sector_t in_serv_last_pos;

	/* time of last request completion (ns) */
	u64 last_completion;

	/* bfqq owning the last completed rq */
	struct bfq_queue *last_completed_rq_bfqq;

	/* last bfqq created, among those in the root group */
	struct bfq_queue *last_bfqq_created;

	/* time of last transition from empty to non-empty (ns) */
	u64 last_empty_occupied_ns;

	/*
	 * Flag set to activate the sampling of the total service time
	 * of a just-arrived first I/O request (see
	 * bfq_update_inject_limit()). This will cause the setting of
	 * waited_rq when the request is finally dispatched.
	 */
	bool wait_dispatch;
	/*
	 *  If set, then bfq_update_inject_limit() is invoked when
	 *  waited_rq is eventually completed.
	 */
	struct request *waited_rq;
	/*
	 * True if some request has been injected during the last service hole.
	 */
	bool rqs_injected;

	/* time of first rq dispatch in current observation interval (ns) */
	u64 first_dispatch;
	/* time of last rq dispatch in current observation interval (ns) */
	u64 last_dispatch;

	/* beginning of the last budget */
	ktime_t last_budget_start;
	/* beginning of the last idle slice */
	ktime_t last_idling_start;
	unsigned long last_idling_start_jiffies;

	/* number of samples in current observation interval */
	int peak_rate_samples;
	/* num of samples of seq dispatches in current observation interval */
	u32 sequential_samples;
	/* total num of sectors transferred in current observation interval */
	u64 tot_sectors_dispatched;
	/* max rq size seen during current observation interval (sectors) */
	u32 last_rq_max_size;
	/* time elapsed from first dispatch in current observ. interval (us) */
	u64 delta_from_first;
	/*
	 * Current estimate of the device peak rate, measured in
	 * [(sectors/usec) / 2^BFQ_RATE_SHIFT]. The left-shift by
	 * BFQ_RATE_SHIFT is performed to increase precision in
	 * fixed-point calculations.
	 */
	u32 peak_rate;

	/* maximum budget allotted to a bfq_queue before rescheduling */
	int bfq_max_budget;

	/* list of all the bfq_queues active on the device */
	struct list_head active_list;
	/* list of all the bfq_queues idle on the device */
	struct list_head idle_list;

	/*
	 * Timeout for async/sync requests; when it fires, requests
	 * are served in fifo order.
	 */
	u64 bfq_fifo_expire[2];
	/* weight of backward seeks wrt forward ones */
	unsigned int bfq_back_penalty;
	/* maximum allowed backward seek */
	unsigned int bfq_back_max;
	/* maximum idling time */
	u32 bfq_slice_idle;

	/* user-configured max budget value (0 for auto-tuning) */
	int bfq_user_max_budget;
	/*
	 * Timeout for bfq_queues to consume their budget; used to
	 * prevent seeky queues from imposing long latencies to
	 * sequential or quasi-sequential ones (this also implies that
	 * seeky queues cannot receive guarantees in the service
	 * domain; after a timeout they are charged for the time they
	 * have been in service, to preserve fairness among them, but
	 * without service-domain guarantees).
	 */
	unsigned int bfq_timeout;

	/*
	 * Force device idling whenever needed to provide accurate
	 * service guarantees, without caring about throughput
	 * issues. CAVEAT: this may even increase latencies, in case
	 * of useless idling for processes that did stop doing I/O.
	 */
	bool strict_guarantees;

	/*
	 * Last time at which a queue entered the current burst of
	 * queues being activated shortly after each other; for more
	 * details about this and the following parameters related to
	 * a burst of activations, see the comments on the function
	 * bfq_handle_burst.
	 */
	unsigned long last_ins_in_burst;
	/*
	 * Reference time interval used to decide whether a queue has
	 * been activated shortly after @last_ins_in_burst.
	 */
	unsigned long bfq_burst_interval;
	/* number of queues in the current burst of queue activations */
	int burst_size;

	/* common parent entity for the queues in the burst */
	struct bfq_entity *burst_parent_entity;
	/* Maximum burst size above which the current queue-activation
	 * burst is deemed as 'large'.
	 */
	unsigned long bfq_large_burst_thresh;
	/* true if a large queue-activation burst is in progress */
	bool large_burst;
	/*
	 * Head of the burst list (as for the above fields, more
	 * details in the comments on the function bfq_handle_burst).
	 */
	struct hlist_head burst_list;

	/* if set to true, low-latency heuristics are enabled */
	bool low_latency;
	/*
	 * Maximum factor by which the weight of a weight-raised queue
	 * is multiplied.
	 */
	unsigned int bfq_wr_coeff;
	/* maximum duration of a weight-raising period (jiffies) */
	unsigned int bfq_wr_max_time;

	/* Maximum weight-raising duration for soft real-time processes */
	unsigned int bfq_wr_rt_max_time;
	/*
	 * Minimum idle period after which weight-raising may be
	 * reactivated for a queue (in jiffies).
	 */
	unsigned int bfq_wr_min_idle_time;
	/*
	 * Minimum period between request arrivals after which
	 * weight-raising may be reactivated for an already busy async
	 * queue (in jiffies).
	 */
	unsigned long bfq_wr_min_inter_arr_async;

	/* Max service-rate for a soft real-time queue, in sectors/sec */
	unsigned int bfq_wr_max_softrt_rate;
	/*
	 * Cached value of the product ref_rate*ref_wr_duration, used
	 * for computing the maximum duration of weight raising
	 * automatically.
	 */
	u64 rate_dur_prod;

	/* fallback dummy bfqq for extreme OOM conditions */
	struct bfq_queue oom_bfqq;

	spinlock_t lock;

	/*
	 * bic associated with the task issuing current bio for
	 * merging. This and the next field are used as a support to
	 * be able to perform the bic lookup, needed by bio-merge
	 * functions, before the scheduler lock is taken, and thus
	 * avoid taking the request-queue lock while the scheduler
	 * lock is being held.
	 */
	struct bfq_io_cq *bio_bic;
	/* bfqq associated with the task issuing current bio for merging */
	struct bfq_queue *bio_bfqq;

	/*
	 * Depth limits used in bfq_limit_depth (see comments on the
	 * function)
	 */
	unsigned int word_depths[2][2];
	unsigned int full_depth_shift;
};

static int (*klpe_bfq_bfqq_sync)(const struct bfq_queue *bfqq);

static struct bfq_queue *(*klpe_bic_to_bfqq)(struct bfq_io_cq *bic, bool is_sync);

static struct blkcg_gq *(*klpe_bfqg_to_blkg)(struct bfq_group *bfqg);

#define for_each_entity(entity)	\
	for (; entity ; entity = entity->parent)

static struct bfq_service_tree *(*klpe_bfq_entity_service_tree)(struct bfq_entity *entity);

static inline void bfq_pid_to_str(int pid, char *str, int len)
{
	if (pid != -1)
		snprintf(str, len, "%d", pid);
	else
		snprintf(str, len, "SHARED-");
}

#ifdef CONFIG_BFQ_GROUP_IOSCHED
static struct bfq_group *(*klpe_bfqq_group)(struct bfq_queue *bfqq);
#endif /* CONFIG_BFQ_GROUP_IOSCHED */

#define bfq_log(bfqd, fmt, args...) \
	blk_add_trace_msg((bfqd)->queue, "bfq " fmt, ##args)

/* klp-ccp: from block/blk-wbt.h */
#include <linux/kernel.h>
#include <linux/atomic.h>
#include <linux/wait.h>
#include <linux/timer.h>
#include <linux/ktime.h>

/* klp-ccp: from block/blk-rq-qos.h */
#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/blk_types.h>
#include <linux/atomic.h>
#include <linux/wait.h>
#include <linux/blk-mq.h>

/* klp-ccp: from block/blk-mq-debugfs.h */
#ifdef CONFIG_BLK_DEBUG_FS

#include <linux/seq_file.h>

#else
#error "klp-ccp: non-taken branch"
#endif

static struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
{
	/* bic->icq is the first member, %NULL will convert to %NULL */
	return container_of(icq, struct bfq_io_cq, icq);
}

#define BFQ_LIMIT_INLINE_DEPTH 16

#ifdef CONFIG_BFQ_GROUP_IOSCHED
bool klpp_bfqq_request_over_limit(struct bfq_data *bfqd,
				  struct bfq_io_cq *bic, unsigned int op,
				  int limit)
{
	struct bfq_entity *inline_entities[BFQ_LIMIT_INLINE_DEPTH];
	struct bfq_entity **entities = inline_entities;
	int alloc_depth = BFQ_LIMIT_INLINE_DEPTH;
	struct bfq_sched_data *sched_data;
	struct bfq_entity *entity;
	struct bfq_queue *bfqq;
	unsigned long wsum;
	bool ret = false;
	int depth;
	int level;

retry:
	spin_lock_irq(&bfqd->lock);
	bfqq = (*klpe_bic_to_bfqq)(bic, op_is_sync(op));
	if (!bfqq)
		goto out;

	entity = &bfqq->entity;
	if (!entity->on_st_or_in_serv)
		goto out;

	/* +1 for bfqq entity, root cgroup not included */
	depth = (*klpe_bfqg_to_blkg)((*klpe_bfqq_group)(bfqq))->blkcg->css.cgroup->level + 1;
	if (depth > alloc_depth) {
		spin_unlock_irq(&bfqd->lock);
		if (entities != inline_entities)
			kfree(entities);
		entities = kmalloc_array(depth, sizeof(*entities), GFP_NOIO);
		if (!entities)
			return false;
		alloc_depth = depth;
		goto retry;
	}

	sched_data = entity->sched_data;
	/* Gather our ancestors as we need to traverse them in reverse order */
	level = 0;
	for_each_entity(entity) {
		/*
		 * If at some level entity is not even active, allow request
		 * queueing so that BFQ knows there's work to do and activate
		 * entities.
		 */
		if (!entity->on_st_or_in_serv)
			goto out;
		/* Uh, more parents than cgroup subsystem thinks? */
		if (WARN_ON_ONCE(level >= depth))
			break;
		entities[level++] = entity;
	}
	WARN_ON_ONCE(level != depth);
	for (level--; level >= 0; level--) {
		entity = entities[level];
		if (level > 0) {
			wsum = (*klpe_bfq_entity_service_tree)(entity)->wsum;
		} else {
			int i;
			/*
			 * For bfqq itself we take into account service trees
			 * of all higher priority classes and multiply their
			 * weights so that low prio queue from higher class
			 * gets more requests than high prio queue from lower
			 * class.
			 */
			wsum = 0;
			for (i = 0; i <= bfqq->ioprio_class - 1; i++) {
				wsum = wsum * IOPRIO_BE_NR +
					sched_data->service_tree[i].wsum;
			}
		}
		if (!wsum)
			continue;
		limit = DIV_ROUND_CLOSEST(limit * entity->weight, wsum);
		if (entity->allocated >= limit) {
			do { char pid_str[12]; if (__builtin_expect(!!(!blk_trace_note_message_enabled((bfqq->bfqd)->queue)), 1)) break; bfq_pid_to_str((bfqq)->pid, pid_str, 12); do { struct blk_trace *bt; rcu_read_lock(); bt = ({ typeof(*(((bfqq->bfqd)->queue)->blk_trace)) *________p1 = (typeof(*(((bfqq->bfqd)->queue)->blk_trace)) *)({ do { extern void __compiletime_assert_466(void) __attribute__((__error__("Unsupported access size for {READ,WRITE}_ONCE()."))); if (!((sizeof((((bfqq->bfqd)->queue)->blk_trace)) == sizeof(char) || sizeof((((bfqq->bfqd)->queue)->blk_trace)) == sizeof(short) || sizeof((((bfqq->bfqd)->queue)->blk_trace)) == sizeof(int) || sizeof((((bfqq->bfqd)->queue)->blk_trace)) == sizeof(long)) || sizeof((((bfqq->bfqd)->queue)->blk_trace)) == sizeof(long long))) __compiletime_assert_466(); } while (0); (*(const volatile typeof( _Generic(((((bfqq->bfqd)->queue)->blk_trace)), char: (char)0, unsigned char: (unsigned char)0, signed char: (signed char)0, unsigned short: (unsigned short)0, signed short: (signed short)0, unsigned int: (unsigned int)0, signed int: (signed int)0, unsigned long: (unsigned long)0, signed long: (signed long)0, unsigned long long: (unsigned long long)0, signed long long: (signed long long)0, default: ((((bfqq->bfqd)->queue)->blk_trace)))) *)&((((bfqq->bfqd)->queue)->blk_trace))); }); do { } while (0 && (!((0) || rcu_read_lock_held()))); ; ((typeof(*(((bfqq->bfqd)->queue)->blk_trace)) *)(________p1)); }); if (__builtin_expect(!!(bt), 0)) __trace_note_message(bt, (*klpe_bfqg_to_blkg)((*klpe_bfqq_group)(bfqq))->blkcg, "bfq%s%c " "too many requests: allocated %d limit %d level %d",pid_str, (*klpe_bfq_bfqq_sync)((bfqq)) ? 'S' : 'A',entity->allocated, limit, level); rcu_read_unlock(); } while (0); } while (0);
			ret = true;
			break;
		}
	}
out:
	spin_unlock_irq(&bfqd->lock);
	if (entities != inline_entities)
		kfree(entities);
	return ret;
}
#else
bool klpp_bfqq_request_over_limit(struct bfq_data *bfqd,
				  struct bfq_io_cq *bic, unsigned int op,
				  int limit) {
	return false;
}
#endif

void klpp_bfq_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
{
	struct bfq_data *bfqd = data->q->elevator->elevator_data;
	struct bfq_io_cq *bic = icq_to_bic(blk_mq_sched_get_icq(data->q));
	int depth;
	unsigned limit = data->q->nr_requests;

	/* Sync reads have full depth available */
	if (op_is_sync(op) && !op_is_write(op)) {
		depth = 0;
	} else {
		depth = bfqd->word_depths[!!bfqd->wr_busy_queues][op_is_sync(op)];
		limit = (limit * depth) >> bfqd->full_depth_shift;
	}

	/*
	 * Does queue (or any parent entity) exceed number of requests that
	 * should be available to it? Heavily limit depth so that it cannot
	 * consume more available requests and thus starve other entities.
	 */
	if (bic) {
		/* Fast path to check if bfqq is already allocated. */
		if ((*klpe_bic_to_bfqq)(bic, op_is_sync(op)))
			if (klpp_bfqq_request_over_limit(bfqd, bic, op, limit))
				depth = 1;
	}

	bfq_log(bfqd, "[%s] wr_busy %d sync %d depth %u",
		__func__, bfqd->wr_busy_queues, op_is_sync(op), depth);
	if (depth)
		data->shallow_depth = depth;
}


#include "livepatch_bsc1234885.h"

#include <linux/kernel.h>
#include "../kallsyms_relocs.h"

static struct klp_kallsyms_reloc klp_funcs[] = {
	{ "bfq_bfqq_sync", (void *)&klpe_bfq_bfqq_sync },
	{ "bfq_entity_service_tree", (void *)&klpe_bfq_entity_service_tree },
	{ "bfqg_to_blkg", (void *)&klpe_bfqg_to_blkg },
	{ "bfqq_group", (void *)&klpe_bfqq_group },
	{ "bic_to_bfqq", (void *)&klpe_bic_to_bfqq },
};

int livepatch_bsc1234885_init(void)
{
	return klp_resolve_kallsyms_relocs(klp_funcs, ARRAY_SIZE(klp_funcs));
}

