Commit 16b3d0cf authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'sched-core-2021-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:

 - Clean up SCHED_DEBUG: move the decades old mess of sysctl, procfs and
   debugfs interfaces to a unified debugfs interface.

 - Signals: Allow caching one sigqueue object per task, to improve
   performance & latencies.

 - Improve newidle_balance() irq-off latencies on systems with a large
   number of CPU cgroups.

 - Improve energy-aware scheduling

 - Improve the PELT metrics for certain workloads

 - Reintroduce select_idle_smt() to improve load-balancing locality -
   but without the previous regressions

 - Add 'scheduler latency debugging': warn after long periods of pending
   need_resched. This is an opt-in feature that requires the enabling of
   the LATENCY_WARN scheduler feature, or the use of the
   resched_latency_warn_ms=xx boot parameter.

 - CPU hotplug fixes for HP-rollback, and for the 'fail' interface. Fix
   remaining balance_push() vs. hotplug holes/races

 - PSI fixes, plus allow /proc/pressure/ files to be written by
   CAP_SYS_RESOURCE tasks as well

 - Fix/improve various load-balancing corner cases vs. capacity margins

 - Fix sched topology on systems with NUMA diameter of 3 or above

 - Fix PF_KTHREAD vs to_kthread() race

 - Minor rseq optimizations

 - Misc cleanups, optimizations, fixes and smaller updates

* tag 'sched-core-2021-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (61 commits)
  cpumask/hotplug: Fix cpu_dying() state tracking
  kthread: Fix PF_KTHREAD vs to_kthread() race
  sched/debug: Fix cgroup_path[] serialization
  sched,psi: Handle potential task count underflow bugs more gracefully
  sched: Warn on long periods of pending need_resched
  sched/fair: Move update_nohz_stats() to the CONFIG_NO_HZ_COMMON block to simplify the code & fix an unused function warning
  sched/debug: Rename the sched_debug parameter to sched_verbose
  sched,fair: Alternative sched_slice()
  sched: Move /proc/sched_debug to debugfs
  sched,debug: Convert sysctl sched_domains to debugfs
  debugfs: Implement debugfs_create_str()
  sched,preempt: Move preempt_dynamic to debug.c
  sched: Move SCHED_DEBUG sysctl to debugfs
  sched: Don't make LATENCYTOP select SCHED_DEBUG
  sched: Remove sched_schedstats sysctl out from under SCHED_DEBUG
  sched/numa: Allow runtime enabling/disabling of NUMA balance without SCHED_DEBUG
  sched: Use cpu_dying() to fix balance_push vs hotplug-rollback
  cpumask: Introduce DYING mask
  cpumask: Make cpu_{online,possible,present,active}() inline
  rseq: Optimise rseq_get_rseq_cs() and clear_rseq_cs()
  ...
parents 42dec9a9 2ea46c6f
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -4754,7 +4754,7 @@

	sbni=		[NET] Granch SBNI12 leased line adapter

	sched_debug	[KNL] Enables verbose scheduler debug messages.
	sched_verbose	[KNL] Enables verbose scheduler debug messages.

	schedstats=	[KNL,X86] Enable or disable scheduled statistics.
			Allowed values are enable and disable. This feature
+5 −5
Original line number Diff line number Diff line
@@ -74,8 +74,8 @@ for a given topology level by creating a sched_domain_topology_level array and
calling set_sched_topology() with this array as the parameter.

The sched-domains debugging infrastructure can be enabled by enabling
CONFIG_SCHED_DEBUG and adding 'sched_debug' to your cmdline. If you forgot to
tweak your cmdline, you can also flip the /sys/kernel/debug/sched_debug
knob. This enables an error checking parse of the sched domains which should
catch most possible errors (described above). It also prints out the domain
structure in a visual format.
CONFIG_SCHED_DEBUG and adding 'sched_debug_verbose' to your cmdline. If you
forgot to tweak your cmdline, you can also flip the
/sys/kernel/debug/sched/verbose knob. This enables an error checking parse of
the sched domains which should catch most possible errors (described above). It
also prints out the domain structure in a visual format.
+1 −0
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
#include <linux/usb.h>
#include <linux/wait.h>
#include <linux/sched/task.h>
#include <linux/kcov.h>
#include <uapi/linux/usbip.h>

#undef pr_fmt
+91 −0
Original line number Diff line number Diff line
@@ -864,6 +864,97 @@ struct dentry *debugfs_create_bool(const char *name, umode_t mode,
}
EXPORT_SYMBOL_GPL(debugfs_create_bool);

ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf,
			      size_t count, loff_t *ppos)
{
	struct dentry *dentry = F_DENTRY(file);
	char *str, *copy = NULL;
	int copy_len, len;
	ssize_t ret;

	ret = debugfs_file_get(dentry);
	if (unlikely(ret))
		return ret;

	str = *(char **)file->private_data;
	len = strlen(str) + 1;
	copy = kmalloc(len, GFP_KERNEL);
	if (!copy) {
		debugfs_file_put(dentry);
		return -ENOMEM;
	}

	copy_len = strscpy(copy, str, len);
	debugfs_file_put(dentry);
	if (copy_len < 0) {
		kfree(copy);
		return copy_len;
	}

	copy[copy_len] = '\n';

	ret = simple_read_from_buffer(user_buf, count, ppos, copy, copy_len);
	kfree(copy);

	return ret;
}

static ssize_t debugfs_write_file_str(struct file *file, const char __user *user_buf,
				      size_t count, loff_t *ppos)
{
	/* This is really only for read-only strings */
	return -EINVAL;
}

static const struct file_operations fops_str = {
	.read =		debugfs_read_file_str,
	.write =	debugfs_write_file_str,
	.open =		simple_open,
	.llseek =	default_llseek,
};

static const struct file_operations fops_str_ro = {
	.read =		debugfs_read_file_str,
	.open =		simple_open,
	.llseek =	default_llseek,
};

static const struct file_operations fops_str_wo = {
	.write =	debugfs_write_file_str,
	.open =		simple_open,
	.llseek =	default_llseek,
};

/**
 * debugfs_create_str - create a debugfs file that is used to read and write a string value
 * @name: a pointer to a string containing the name of the file to create.
 * @mode: the permission that the file should have
 * @parent: a pointer to the parent dentry for this file.  This should be a
 *          directory dentry if set.  If this parameter is %NULL, then the
 *          file will be created in the root of the debugfs filesystem.
 * @value: a pointer to the variable that the file should read to and write
 *         from.
 *
 * This function creates a file in debugfs with the given name that
 * contains the value of the variable @value.  If the @mode variable is so
 * set, it can be read from, and written to.
 *
 * This function will return a pointer to a dentry if it succeeds.  This
 * pointer must be passed to the debugfs_remove() function when the file is
 * to be removed (no automatic cleanup happens if your module is unloaded,
 * you are responsible here.)  If an error occurs, ERR_PTR(-ERROR) will be
 * returned.
 *
 * If debugfs is not enabled in the kernel, the value ERR_PTR(-ENODEV) will
 * be returned.
 */
void debugfs_create_str(const char *name, umode_t mode,
			struct dentry *parent, char **value)
{
	debugfs_create_mode_unsafe(name, mode, parent, value, &fops_str,
				   &fops_str_ro, &fops_str_wo);
}

static ssize_t read_file_blob(struct file *file, char __user *user_buf,
			      size_t count, loff_t *ppos)
{
+86 −31
Original line number Diff line number Diff line
@@ -91,44 +91,15 @@ extern struct cpumask __cpu_possible_mask;
extern struct cpumask __cpu_online_mask;
extern struct cpumask __cpu_present_mask;
extern struct cpumask __cpu_active_mask;
extern struct cpumask __cpu_dying_mask;
#define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask)
#define cpu_online_mask   ((const struct cpumask *)&__cpu_online_mask)
#define cpu_present_mask  ((const struct cpumask *)&__cpu_present_mask)
#define cpu_active_mask   ((const struct cpumask *)&__cpu_active_mask)
#define cpu_dying_mask    ((const struct cpumask *)&__cpu_dying_mask)

extern atomic_t __num_online_cpus;

#if NR_CPUS > 1
/**
 * num_online_cpus() - Read the number of online CPUs
 *
 * Despite the fact that __num_online_cpus is of type atomic_t, this
 * interface gives only a momentary snapshot and is not protected against
 * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held
 * region.
 */
static inline unsigned int num_online_cpus(void)
{
	return atomic_read(&__num_online_cpus);
}
#define num_possible_cpus()	cpumask_weight(cpu_possible_mask)
#define num_present_cpus()	cpumask_weight(cpu_present_mask)
#define num_active_cpus()	cpumask_weight(cpu_active_mask)
#define cpu_online(cpu)		cpumask_test_cpu((cpu), cpu_online_mask)
#define cpu_possible(cpu)	cpumask_test_cpu((cpu), cpu_possible_mask)
#define cpu_present(cpu)	cpumask_test_cpu((cpu), cpu_present_mask)
#define cpu_active(cpu)		cpumask_test_cpu((cpu), cpu_active_mask)
#else
#define num_online_cpus()	1U
#define num_possible_cpus()	1U
#define num_present_cpus()	1U
#define num_active_cpus()	1U
#define cpu_online(cpu)		((cpu) == 0)
#define cpu_possible(cpu)	((cpu) == 0)
#define cpu_present(cpu)	((cpu) == 0)
#define cpu_active(cpu)		((cpu) == 0)
#endif

extern cpumask_t cpus_booted_once_mask;

static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
@@ -857,6 +828,14 @@ set_cpu_active(unsigned int cpu, bool active)
		cpumask_clear_cpu(cpu, &__cpu_active_mask);
}

static inline void
set_cpu_dying(unsigned int cpu, bool dying)
{
	if (dying)
		cpumask_set_cpu(cpu, &__cpu_dying_mask);
	else
		cpumask_clear_cpu(cpu, &__cpu_dying_mask);
}

/**
 * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
@@ -894,6 +873,82 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
	return to_cpumask(p);
}

#if NR_CPUS > 1
/**
 * num_online_cpus() - Read the number of online CPUs
 *
 * Despite the fact that __num_online_cpus is of type atomic_t, this
 * interface gives only a momentary snapshot and is not protected against
 * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held
 * region.
 */
static inline unsigned int num_online_cpus(void)
{
	return atomic_read(&__num_online_cpus);
}
#define num_possible_cpus()	cpumask_weight(cpu_possible_mask)
#define num_present_cpus()	cpumask_weight(cpu_present_mask)
#define num_active_cpus()	cpumask_weight(cpu_active_mask)

static inline bool cpu_online(unsigned int cpu)
{
	return cpumask_test_cpu(cpu, cpu_online_mask);
}

static inline bool cpu_possible(unsigned int cpu)
{
	return cpumask_test_cpu(cpu, cpu_possible_mask);
}

static inline bool cpu_present(unsigned int cpu)
{
	return cpumask_test_cpu(cpu, cpu_present_mask);
}

static inline bool cpu_active(unsigned int cpu)
{
	return cpumask_test_cpu(cpu, cpu_active_mask);
}

static inline bool cpu_dying(unsigned int cpu)
{
	return cpumask_test_cpu(cpu, cpu_dying_mask);
}

#else

#define num_online_cpus()	1U
#define num_possible_cpus()	1U
#define num_present_cpus()	1U
#define num_active_cpus()	1U

static inline bool cpu_online(unsigned int cpu)
{
	return cpu == 0;
}

static inline bool cpu_possible(unsigned int cpu)
{
	return cpu == 0;
}

static inline bool cpu_present(unsigned int cpu)
{
	return cpu == 0;
}

static inline bool cpu_active(unsigned int cpu)
{
	return cpu == 0;
}

static inline bool cpu_dying(unsigned int cpu)
{
	return false;
}

#endif /* NR_CPUS > 1 */

#define cpu_is_offline(cpu)	unlikely(!cpu_online(cpu))

#if NR_CPUS <= BITS_PER_LONG
Loading