From 1f25d291e36b84c7f7f8687b5a28dbffb9884b94 Mon Sep 17 00:00:00 2001
From: Rohit Gupta <rohgup@codeaurora.org>
Date: Fri, 14 Mar 2014 18:56:14 -0700
Subject: [PATCH] cpufreq: cpu-boost: Introduce scheduler assisted load based
 syncs

Previously, on getting a migration notification cpu-boost changed
the scaling min of the destination frequency to match that of the
source frequency or sync_threshold whichever was minimum.

If the scheduler migration notification is extended with task load
(cpu demand) information, the cpu boost driver can use this load to
compute a suitable frequency for the migrating task. The required
frequency for the task is calculated by taking the load percentage
of the max frequency and no sync is performed if the load is less
than a particular value (migration_load_threshold).This change is
beneficial for both perf and power as demand of a task is taken into
consideration while making cpufreq decisions and unnecessary syncs
for lightweight tasks are avoided.

The task load information provided by scheduler comes from a
window-based load collection mechanism which also normalizes the
load collected by the scheduler to the max possible frequency
across all CPUs.

Change-Id: Id2ba91cc4139c90602557f9b3801fb06b3c38992
Signed-off-by: Rohit Gupta <rohgup@codeaurora.org>
---
 drivers/cpufreq/cpu-boost.c | 43 +++++++++++++++++++++++++++----------
 include/linux/sched.h       |  5 +++++
 kernel/sched/core.c         | 27 ++++++++++++++++++-----
 kernel/sched/fair.c         | 39 ++++++++++++++++++++++-----------
 kernel/sched/sched.h        |  1 +
 5 files changed, 87 insertions(+), 28 deletions(-)

diff --git a/drivers/cpufreq/cpu-boost.c b/drivers/cpufreq/cpu-boost.c
index 6d0fd2def7e5..1e41e79b155e 100644
--- a/drivers/cpufreq/cpu-boost.c
+++ b/drivers/cpufreq/cpu-boost.c
@@ -37,6 +37,7 @@ struct cpu_sync {
 	int src_cpu;
 	unsigned int boost_min;
 	unsigned int input_boost_min;
+	unsigned int task_load;
 };
 
 static DEFINE_PER_CPU(struct cpu_sync, sync_info);
@@ -56,6 +57,12 @@ module_param(input_boost_freq, uint, 0644);
 static unsigned int input_boost_ms = 40;
 module_param(input_boost_ms, uint, 0644);
 
+static unsigned int migration_load_threshold = 15;
+module_param(migration_load_threshold, uint, 0644);
+
+static bool load_based_syncs;
+module_param(load_based_syncs, bool, 0644);
+
 static u64 last_input_time;
 #define MIN_INPUT_INTERVAL (150 * USEC_PER_MSEC)
 
@@ -139,6 +146,7 @@ static int boost_mig_sync_thread(void *data)
 	struct cpufreq_policy dest_policy;
 	struct cpufreq_policy src_policy;
 	unsigned long flags;
+	unsigned int req_freq;
 
 	while (1) {
 		wait_event(s->sync_wq, s->pending || kthread_should_stop());
@@ -159,17 +167,20 @@ static int boost_mig_sync_thread(void *data)
 		if (ret)
 			continue;
 
-		if (src_policy.cur == src_policy.cpuinfo.min_freq) {
-			pr_debug("No sync. Source CPU%d@%dKHz at min freq\n",
-				 src_cpu, src_policy.cur);
+		req_freq = max((dest_policy.max * s->task_load) / 100,
+							src_policy.cur);
+
+		if (req_freq <= dest_policy.cpuinfo.min_freq) {
+			pr_debug("No sync. Sync Freq:%u\n", req_freq);
 			continue;
 		}
 
-		cancel_delayed_work_sync(&s->boost_rem);
 		if (sync_threshold)
-			s->boost_min = min(sync_threshold, src_policy.cur);
-		else
-			s->boost_min = src_policy.cur;
+			req_freq = min(sync_threshold, req_freq);
+
+		cancel_delayed_work_sync(&s->boost_rem);
+
+		s->boost_min = req_freq;
 
 		/* Force policy re-evaluation to trigger adjust notifier. */
 		get_online_cpus();
@@ -198,10 +209,19 @@ static int boost_mig_sync_thread(void *data)
 }
 
 static int boost_migration_notify(struct notifier_block *nb,
-				unsigned long dest_cpu, void *arg)
+				unsigned long unused, void *arg)
 {
+	struct migration_notify_data *mnd = arg;
 	unsigned long flags;
-	struct cpu_sync *s = &per_cpu(sync_info, dest_cpu);
+	struct cpu_sync *s = &per_cpu(sync_info, mnd->dest_cpu);
+
+	if (load_based_syncs && (mnd->load <= migration_load_threshold))
+		return NOTIFY_OK;
+
+	if (load_based_syncs && ((mnd->load < 0) || (mnd->load > 100))) {
+		pr_err("cpu-boost:Invalid load: %d\n", mnd->load);
+		return NOTIFY_OK;
+	}
 
 	if (!boost_ms)
 		return NOTIFY_OK;
@@ -210,10 +230,11 @@ static int boost_migration_notify(struct notifier_block *nb,
 	if (s->thread == current)
 		return NOTIFY_OK;
 
-	pr_debug("Migration: CPU%d --> CPU%d\n", (int) arg, (int) dest_cpu);
+	pr_debug("Migration: CPU%d --> CPU%d\n", mnd->src_cpu, mnd->dest_cpu);
 	spin_lock_irqsave(&s->lock, flags);
 	s->pending = true;
-	s->src_cpu = (int) arg;
+	s->src_cpu = mnd->src_cpu;
+	s->task_load = load_based_syncs ? mnd->load : 0;
 	spin_unlock_irqrestore(&s->lock, flags);
 	wake_up(&s->sync_wq);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a78d877c5f25..484c090e9a81 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2599,6 +2599,11 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 #endif /* CONFIG_SMP */
 
 extern struct atomic_notifier_head migration_notifier_head;
+struct migration_notify_data {
+	int src_cpu;
+	int dest_cpu;
+	int load;
+};
 
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 701626c1d66e..76754b30f4fb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1658,9 +1658,19 @@ stat:
 out:
 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
-	if (src_cpu != cpu && task_notify_on_migrate(p))
+	if (src_cpu != cpu && task_notify_on_migrate(p)) {
+		struct migration_notify_data mnd;
+
+		mnd.src_cpu = src_cpu;
+		mnd.dest_cpu = cpu;
+		if (sysctl_sched_ravg_window)
+			mnd.load = div64_u64((u64)p->se.ravg.demand * 100,
+				(u64)(sysctl_sched_ravg_window));
+		else
+			mnd.load = 0;
 		atomic_notifier_call_chain(&migration_notifier_head,
-					   cpu, (void *)src_cpu);
+					   0, (void *)&mnd);
+	}
 	return success;
 }
 
@@ -5038,10 +5048,17 @@ fail:
 	double_rq_unlock(rq_src, rq_dest);
 	raw_spin_unlock(&p->pi_lock);
 	if (moved && task_notify_on_migrate(p)) {
-		unsigned long _src_cpu;
-		_src_cpu = src_cpu;
+		struct migration_notify_data mnd;
+
+		mnd.src_cpu = src_cpu;
+		mnd.dest_cpu = dest_cpu;
+		if (sysctl_sched_ravg_window)
+			mnd.load = div64_u64((u64)p->se.ravg.demand * 100,
+				(u64)(sysctl_sched_ravg_window));
+		else
+			mnd.load = 0;
 		atomic_notifier_call_chain(&migration_notifier_head,
-					   dest_cpu, (void *)_src_cpu);
+					   0, (void *)&mnd);
 	}
 	return ret;
 }
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 35b8595ebce2..bc68ac7f95c3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3984,7 +3984,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
  *
  * Called with both runqueues locked.
  */
-static int move_one_task(struct lb_env *env)
+static int move_one_task(struct lb_env *env, int *total_run_moved)
 {
 	struct task_struct *p, *n;
 
@@ -3999,6 +3999,10 @@ static int move_one_task(struct lb_env *env)
 		 * stats here rather than inside move_task().
 		 */
 		schedstat_inc(env->sd, lb_gained[env->idle]);
+		if (sysctl_sched_ravg_window)
+			*total_run_moved += div64_u64((u64)p->se.ravg.demand *
+					100, (u64)(sysctl_sched_ravg_window));
+
 		return 1;
 	}
 	return 0;
@@ -4015,7 +4019,7 @@ static const unsigned int sched_nr_migrate_break = 32;
  *
  * Called with both runqueues locked.
  */
-static int move_tasks(struct lb_env *env)
+static int move_tasks(struct lb_env *env, int *total_run_moved)
 {
 	struct list_head *tasks = &env->src_rq->cfs_tasks;
 	struct task_struct *p;
@@ -4054,6 +4058,9 @@ static int move_tasks(struct lb_env *env)
 		move_task(p, env);
 		pulled++;
 		env->imbalance -= load;
+		if (sysctl_sched_ravg_window)
+			*total_run_moved += div64_u64((u64)p->se.ravg.demand *
+					100, (u64)(sysctl_sched_ravg_window));
 
 #ifdef CONFIG_PREEMPT
 		/*
@@ -5026,6 +5033,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 			int *balance)
 {
 	int ld_moved, cur_ld_moved, active_balance = 0;
+	int total_run_moved = 0;
 	struct sched_group *group;
 	struct rq *busiest = NULL;
 	unsigned long flags;
@@ -5095,7 +5103,7 @@ more_balance:
 		 * cur_ld_moved - load moved in current iteration
 		 * ld_moved     - cumulative load moved across iterations
 		 */
-		cur_ld_moved = move_tasks(&env);
+		cur_ld_moved = move_tasks(&env, &total_run_moved);
 		ld_moved += cur_ld_moved;
 		double_rq_unlock(env.dst_rq, busiest);
 		local_irq_restore(flags);
@@ -5213,12 +5221,15 @@ more_balance:
 	} else {
 		sd->nr_balance_failed = 0;
 		if (per_cpu(dbs_boost_needed, this_cpu)) {
-			unsigned long _busy_cpu;
-			_busy_cpu = cpu_of(busiest);
+			struct migration_notify_data mnd;
+
 			per_cpu(dbs_boost_needed, this_cpu) = false;
+
+			mnd.src_cpu = cpu_of(busiest);
+			mnd.dest_cpu = this_cpu;
+			mnd.load = total_run_moved;
 			atomic_notifier_call_chain(&migration_notifier_head,
-						   this_cpu,
-						   (void *)_busy_cpu);
+						   0, (void *)&mnd);
 		}
 	}
 	if (likely(!active_balance)) {
@@ -5326,6 +5337,7 @@ static int active_load_balance_cpu_stop(void *data)
 	struct rq *busiest_rq = data;
 	int busiest_cpu = cpu_of(busiest_rq);
 	int target_cpu = busiest_rq->push_cpu;
+	int total_run_moved = 0;
 	struct rq *target_rq = cpu_rq(target_cpu);
 	struct sched_domain *sd;
 
@@ -5370,7 +5382,7 @@ static int active_load_balance_cpu_stop(void *data)
 
 		schedstat_inc(sd, alb_count);
 
-		if (move_one_task(&env))
+		if (move_one_task(&env, &total_run_moved))
 			schedstat_inc(sd, alb_pushed);
 		else
 			schedstat_inc(sd, alb_failed);
@@ -5381,12 +5393,15 @@ out_unlock:
 	busiest_rq->active_balance = 0;
 	raw_spin_unlock_irq(&busiest_rq->lock);
 	if (per_cpu(dbs_boost_needed, target_cpu)) {
-		unsigned long _busy_cpu;
+		struct migration_notify_data mnd;
+
 		per_cpu(dbs_boost_needed, target_cpu) = false;
-		_busy_cpu = cpu_of(busiest_rq);
+
+		mnd.src_cpu = cpu_of(busiest_rq);
+		mnd.dest_cpu = target_cpu;
+		mnd.load = total_run_moved;
 		atomic_notifier_call_chain(&migration_notifier_head,
-					   target_cpu,
-					   (void *)_busy_cpu);
+					   0, (void *)&mnd);
 	}
 	return 0;
 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a6badc2c9723..93e2dffe6ea0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -12,6 +12,7 @@
 
 extern __read_mostly int scheduler_running;
 
+extern unsigned int sysctl_sched_ravg_window;
 /*
  * Convert user-nice values [ -20 ... 0 ... 19 ]
  * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
-- 
GitLab