diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 1bd144ed2..dd178cde7 100755
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -142,6 +142,60 @@ config GENERIC_IRQ_DEBUGFS
 
 	  If you don't know what to do here, say N.
 
+config IRQ_SBALANCE
+	bool "SBalance IRQ balancer"
+	depends on SMP
+	default n
+	help
+	  This is a simple IRQ balancer that polls every X number of
+	  milliseconds and moves IRQs from the most interrupt-heavy CPU to the
+	  least interrupt-heavy CPUs until the heaviest CPU is no longer the
+	  heaviest. IRQs are only moved from one source CPU to any number of
+	  destination CPUs per balance run. Balancing is skipped if the gap
+	  between the most interrupt-heavy CPU and the least interrupt-heavy CPU
+	  is below the configured threshold of interrupts.
+
+	  The heaviest IRQs are targeted for migration in order to reduce the
+	  number of IRQs to migrate. If moving an IRQ would reduce overall
+	  balance, then it won't be migrated.
+
+	  The most interrupt-heavy CPU is calculated by scaling the number of
+	  new interrupts on that CPU to the CPU's current capacity. This way,
+	  interrupt heaviness takes into account factors such as thermal
+	  pressure and time spent processing interrupts rather than just the
+	  sheer number of them. This also makes SBalance aware of CPU asymmetry,
+	  where different CPUs can have different performance capacities and be
+	  proportionally balanced.
+
+if IRQ_SBALANCE
+config IRQ_SBALANCE_POLL_MSEC
+	int "Polling interval in milliseconds"
+	default 3000
+	help
+	  Perform IRQ balancing every X milliseconds.
+
+config IRQ_SBALANCE_THRESH
+	int "Balance threshold in number of interrupts"
+	default 1024
+	help
+	  There needs to be a difference of at least this many new interrupts
+	  between the heaviest and least-heavy CPUs during the last polling
+	  window in order for balancing to occur. This is to avoid balancing
+	  when the system is quiet.
+
+	  This threshold is compared to the _scaled_ interrupt counts per CPU;
+	  i.e., the number of interrupts scaled to the CPU's capacity.
+
+config SBALANCE_EXCLUDE_CPUS
+	string "CPUs to exclude from balancing"
+	help
+	  Comma-separated list of CPUs to exclude from IRQ balancing.
+
+	  For example, to ignore CPU0, CPU1, and CPU2, it is valid to provide
+	  "0,1-2" or "0-2" or "0,1,2".
+
+endif
+
 endmenu
 
 config GENERIC_IRQ_MULTI_HANDLER
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index b4f53717d..a485d87af 100755
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -18,3 +18,4 @@ obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o
 obj-$(CONFIG_SMP) += affinity.o
 obj-$(CONFIG_GENERIC_IRQ_DEBUGFS) += debugfs.o
 obj-$(CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR) += matrix.o
+obj-$(CONFIG_IRQ_SBALANCE) += sbalance.o
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index f1d83a8b4..a241db3f6 100755
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -130,6 +130,15 @@ static inline void unregister_handler_proc(unsigned int irq,
 					   struct irqaction *action) { }
 #endif
 
+#ifdef CONFIG_IRQ_SBALANCE
+extern void sbalance_desc_add(struct irq_desc *desc);
+extern void sbalance_desc_del(struct irq_desc *desc);
+#else
+static inline void sbalance_desc_add(struct irq_desc *desc) { }
+static inline void sbalance_desc_del(struct irq_desc *desc) { }
+#endif
+
+extern bool __irq_can_set_affinity(struct irq_desc *desc);
 extern bool irq_can_set_affinity_usr(unsigned int irq);
 
 extern void irq_set_thread_affinity(struct irq_desc *desc);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 74df02894..f799994d2 100755
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -413,6 +413,7 @@ static struct irq_desc *alloc_desc(int irq, int node, unsigned int flags,
 	desc_set_defaults(irq, desc, node, affinity, owner);
 	irqd_set(&desc->irq_data, flags);
 	kobject_init(&desc->kobj, &irq_kobj_type);
+	sbalance_desc_add(desc);
 
 	return desc;
 
@@ -443,6 +444,7 @@ static void free_desc(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
 
+	sbalance_desc_del(desc);
 	irq_remove_debugfs_entry(desc);
 	unregister_irq_proc(irq, desc);
 
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index df19cc9ae..41e986c3e 100755
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -147,7 +147,7 @@ EXPORT_SYMBOL(synchronize_irq);
 #ifdef CONFIG_SMP
 cpumask_var_t irq_default_affinity;
 
-static bool __irq_can_set_affinity(struct irq_desc *desc)
+bool __irq_can_set_affinity(struct irq_desc *desc)
 {
 	if (!desc || !irqd_can_balance(&desc->irq_data) ||
 	    !desc->irq_data.chip || !desc->irq_data.chip->irq_set_affinity)
diff --git a/kernel/irq/sbalance.c b/kernel/irq/sbalance.c
new file mode 100644
index 000000000..b062c4f6a
--- /dev/null
+++ b/kernel/irq/sbalance.c
@@ -0,0 +1,357 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Sultan Alsawaf <sultan@kerneltoast.com>.
+ */
+
+/**
+ * DOC: SBalance description
+ *
+ * This is a simple IRQ balancer that polls every X number of milliseconds and
+ * moves IRQs from the most interrupt-heavy CPU to the least interrupt-heavy
+ * CPUs until the heaviest CPU is no longer the heaviest. IRQs are only moved
+ * from one source CPU to any number of destination CPUs per balance run.
+ * Balancing is skipped if the gap between the most interrupt-heavy CPU and the
+ * least interrupt-heavy CPU is below the configured threshold of interrupts.
+ *
+ * The heaviest IRQs are targeted for migration in order to reduce the number of
+ * IRQs to migrate. If moving an IRQ would reduce overall balance, then it won't
+ * be migrated.
+ *
+ * The most interrupt-heavy CPU is calculated by scaling the number of new
+ * interrupts on that CPU to the CPU's current capacity. This way, interrupt
+ * heaviness takes into account factors such as thermal pressure and time spent
+ * processing interrupts rather than just the sheer number of them. This also
+ * makes SBalance aware of CPU asymmetry, where different CPUs can have
+ * different performance capacities and be proportionally balanced.
+ */
+
+#define pr_fmt(fmt) "sbalance: " fmt
+
+#include <linux/freezer.h>
+#include <linux/irq.h>
+#include <linux/list_sort.h>
+#include "../sched/sched.h"
+#include "internals.h"
+
+/* Perform IRQ balancing every POLL_MS milliseconds */
+#define POLL_MS CONFIG_IRQ_SBALANCE_POLL_MSEC
+
+/*
+ * There needs to be a difference of at least this many new interrupts between
+ * the heaviest and least-heavy CPUs during the last polling window in order for
+ * balancing to occur. This is to avoid balancing when the system is quiet.
+ *
+ * This threshold is compared to the _scaled_ interrupt counts per CPU; i.e.,
+ * the number of interrupts scaled to the CPU's capacity.
+ */
+#define IRQ_SCALED_THRESH CONFIG_IRQ_SBALANCE_THRESH
+
+struct bal_irq {
+	struct list_head node;
+	struct list_head move_node;
+	struct rcu_head rcu;
+	struct irq_desc *desc;
+	unsigned int delta_nr;
+	unsigned int old_nr;
+	int prev_cpu;
+};
+
+struct bal_domain {
+	struct list_head movable_irqs;
+	unsigned int intrs;
+	int cpu;
+};
+
+static LIST_HEAD(bal_irq_list);
+static DEFINE_SPINLOCK(bal_irq_lock);
+static DEFINE_PER_CPU(struct bal_domain, balance_data);
+static DEFINE_PER_CPU(unsigned long, cpu_cap);
+static cpumask_t cpu_exclude_mask __read_mostly;
+
+void sbalance_desc_add(struct irq_desc *desc)
+{
+	struct bal_irq *bi;
+
+	bi = kmalloc(sizeof(*bi), GFP_KERNEL);
+	if (WARN_ON(!bi))
+		return;
+
+	*bi = (typeof(*bi)){ .desc = desc };
+	spin_lock(&bal_irq_lock);
+	list_add_tail_rcu(&bi->node, &bal_irq_list);
+	spin_unlock(&bal_irq_lock);
+}
+
+void sbalance_desc_del(struct irq_desc *desc)
+{
+	struct bal_irq *bi;
+
+	spin_lock(&bal_irq_lock);
+	list_for_each_entry(bi, &bal_irq_list, node) {
+		if (bi->desc == desc) {
+			list_del_rcu(&bi->node);
+			kfree_rcu(bi, rcu);
+			break;
+		}
+	}
+	spin_unlock(&bal_irq_lock);
+}
+
+static int bal_irq_move_node_cmp(void *priv, const struct list_head *lhs_p,
+				 const struct list_head *rhs_p)
+{
+	const struct bal_irq *lhs = list_entry(lhs_p, typeof(*lhs), move_node);
+	const struct bal_irq *rhs = list_entry(rhs_p, typeof(*rhs), move_node);
+
+	return rhs->delta_nr - lhs->delta_nr;
+}
+
+/* Returns false if this IRQ should be totally ignored for this balancing run */
+static bool update_irq_data(struct bal_irq *bi, int *cpu)
+{
+	struct irq_desc *desc = bi->desc;
+	unsigned int nr;
+
+	/* Find the CPU which currently has this IRQ affined */
+	raw_spin_lock_irq(&desc->lock);
+	*cpu = cpumask_first(desc->irq_common_data.affinity);
+	raw_spin_unlock_irq(&desc->lock);
+	if (*cpu >= nr_cpu_ids)
+		return false;
+
+	/*
+	 * Calculate the number of new interrupts from this IRQ. It is assumed
+	 * that the IRQ has been running on the same CPU since the last
+	 * balancing run. This might not hold true if the IRQ was moved by
+	 * someone else since the last balancing run, or if the CPU this IRQ was
+	 * previously running on has since gone offline.
+	 */
+	nr = *per_cpu_ptr(desc->kstat_irqs, *cpu);
+	if (nr <= bi->old_nr) {
+		bi->old_nr = nr;
+		return false;
+	}
+
+	/* Calculate the number of new interrupts on this CPU from this IRQ */
+	bi->delta_nr = nr - bi->old_nr;
+	bi->old_nr = nr;
+	return true;
+}
+
+static int move_irq_to_cpu(struct bal_irq *bi, int cpu)
+{
+	struct irq_desc *desc = bi->desc;
+	int prev_cpu, ret;
+
+	/* Set the affinity if it wasn't changed since we looked at it */
+	raw_spin_lock_irq(&desc->lock);
+	prev_cpu = cpumask_first(desc->irq_common_data.affinity);
+	if (prev_cpu == bi->prev_cpu) {
+		ret = irq_set_affinity_locked(&desc->irq_data, cpumask_of(cpu),
+					      false);
+	} else {
+		bi->prev_cpu = prev_cpu;
+		ret = -EINVAL;
+	}
+	raw_spin_unlock_irq(&desc->lock);
+
+	if (!ret) {
+		/* Update the old interrupt count using the new CPU */
+		bi->old_nr = *per_cpu_ptr(desc->kstat_irqs, cpu);
+		pr_debug("Moved IRQ%d (CPU%d -> CPU%d)\n",
+			 irq_desc_get_irq(desc), prev_cpu, cpu);
+	}
+	return ret;
+}
+
+static unsigned int scale_intrs(unsigned int intrs, int cpu)
+{
+	/* Scale the number of interrupts to this CPU's current capacity */
+	return intrs * SCHED_CAPACITY_SCALE / per_cpu(cpu_cap, cpu);
+}
+
+/* Returns true if IRQ balancing should stop */
+static bool find_min_bd(const cpumask_t *mask, unsigned int max_intrs,
+			struct bal_domain **min_bd)
+{
+	unsigned int intrs, min_intrs = UINT_MAX;
+	struct bal_domain *bd;
+	int cpu;
+
+	for_each_cpu(cpu, mask) {
+		bd = per_cpu_ptr(&balance_data, cpu);
+		intrs = scale_intrs(bd->intrs, bd->cpu);
+
+		/* Terminate when the formerly-max CPU isn't the max anymore */
+		if (intrs > max_intrs)
+			return true;
+
+		/* Find the CPU with the lowest relative number of interrupts */
+		if (intrs < min_intrs) {
+			min_intrs = intrs;
+			*min_bd = bd;
+		}
+	}
+
+	/* Don't balance if IRQs are already balanced evenly enough */
+	return max_intrs - min_intrs < IRQ_SCALED_THRESH;
+}
+
+static void balance_irqs(void)
+{
+	static cpumask_t cpus;
+	struct bal_domain *bd, *max_bd, *min_bd;
+	unsigned int intrs, max_intrs;
+	bool moved_irq = false;
+	struct bal_irq *bi;
+	int cpu;
+
+	rcu_read_lock();
+
+	/* Find the available CPUs for balancing, if there are any */
+	cpumask_andnot(&cpus, cpu_active_mask, &cpu_exclude_mask);
+	if (unlikely(cpumask_weight(&cpus) <= 1))
+		goto unlock;
+
+	/*
+	 * Get the current capacity for each CPU. This is adjusted for time
+	 * spent processing IRQs, RT-task time, and thermal pressure. We don't
+	 * exclude time spent processing IRQs when balancing because balancing
+	 * is only done using interrupt counts rather than time spent in
+	 * interrupts. That way, time spent processing each interrupt is
+	 * considered when balancing.
+	 */
+	for_each_cpu(cpu, &cpus)
+		per_cpu(cpu_cap, cpu) = cpu_rq(cpu)->cpu_capacity;
+
+	list_for_each_entry_rcu(bi, &bal_irq_list, node) {
+		if (!update_irq_data(bi, &cpu))
+			continue;
+
+		/* Add the number of new interrupts to this CPU's count */
+		bd = per_cpu_ptr(&balance_data, cpu);
+		bd->intrs += bi->delta_nr;
+
+		/* Consider this IRQ for balancing if it's movable */
+		if (!__irq_can_set_affinity(bi->desc))
+			continue;
+
+		/* Ignore for this balancing run if something else moved it */
+		if (cpu != bi->prev_cpu) {
+			bi->prev_cpu = cpu;
+			continue;
+		}
+
+		list_add_tail(&bi->move_node, &bd->movable_irqs);
+	}
+
+	/* Find the most interrupt-heavy CPU with movable IRQs */
+	while (1) {
+		max_intrs = 0;
+		for_each_cpu(cpu, &cpus) {
+			bd = per_cpu_ptr(&balance_data, cpu);
+			intrs = scale_intrs(bd->intrs, bd->cpu);
+			if (intrs > max_intrs) {
+				max_intrs = intrs;
+				max_bd = bd;
+			}
+		}
+
+		/* No balancing to do if there aren't any movable IRQs */
+		if (unlikely(!max_intrs))
+			goto unlock;
+
+		/* Ensure the heaviest CPU has IRQs which can be moved away */
+		if (!list_empty(&max_bd->movable_irqs))
+			break;
+
+try_next_heaviest:
+		/*
+		 * If the heaviest CPU has no movable IRQs then it can neither
+		 * receive IRQs nor give IRQs. Exclude it from balancing so the
+		 * remaining CPUs can be balanced, if there are any.
+		 */
+		if (cpumask_weight(&cpus) == 2)
+			goto unlock;
+
+		cpumask_clear_cpu(max_bd->cpu, &cpus);
+	}
+
+	/* Find the CPU with the lowest relative interrupt count */
+	if (find_min_bd(&cpus, max_intrs, &min_bd))
+		goto unlock;
+
+	/* Sort movable IRQs in descending order of number of new interrupts */
+	list_sort(NULL, &max_bd->movable_irqs, bal_irq_move_node_cmp);
+
+	/* Push IRQs away from the heaviest CPU to the least-heavy CPUs */
+	list_for_each_entry(bi, &max_bd->movable_irqs, move_node) {
+		/* Skip this IRQ if it would just overload the target CPU */
+		intrs = scale_intrs(min_bd->intrs + bi->delta_nr, min_bd->cpu);
+		if (intrs >= max_intrs)
+			continue;
+
+		/* Try to migrate this IRQ, or skip it if migration fails */
+		if (move_irq_to_cpu(bi, min_bd->cpu))
+			continue;
+
+		/* Keep track of whether or not any IRQs are moved */
+		moved_irq = true;
+
+		/* Update the counts and recalculate the max scaled count */
+		min_bd->intrs += bi->delta_nr;
+		max_bd->intrs -= bi->delta_nr;
+		max_intrs = scale_intrs(max_bd->intrs, max_bd->cpu);
+
+		/* Recheck for the least-heavy CPU since it may have changed */
+		if (find_min_bd(&cpus, max_intrs, &min_bd))
+			break;
+	}
+
+	/*
+	 * If the heaviest CPU has movable IRQs which can't actually be moved,
+	 * then ignore it and try balancing the next heaviest CPU.
+	 */
+	if (!moved_irq)
+		goto try_next_heaviest;
+unlock:
+	rcu_read_unlock();
+
+	/* Reset each balance domain for the next run */
+	for_each_possible_cpu(cpu) {
+		bd = per_cpu_ptr(&balance_data, cpu);
+		INIT_LIST_HEAD(&bd->movable_irqs);
+		bd->intrs = 0;
+	}
+}
+
+static int __noreturn sbalance_thread(void *data)
+{
+	long poll_jiffies = msecs_to_jiffies(POLL_MS);
+	struct bal_domain *bd;
+	int cpu;
+
+	/* Parse the list of CPUs to exclude, if any */
+	if (cpulist_parse(CONFIG_SBALANCE_EXCLUDE_CPUS, &cpu_exclude_mask))
+		cpu_exclude_mask = CPU_MASK_NONE;
+
+	/* Initialize the data used for balancing */
+	for_each_possible_cpu(cpu) {
+		bd = per_cpu_ptr(&balance_data, cpu);
+		INIT_LIST_HEAD(&bd->movable_irqs);
+		bd->cpu = cpu;
+	}
+
+	set_freezable();
+	while (1) {
+		freezable_schedule_timeout_interruptible(poll_jiffies);
+		balance_irqs();
+	}
+}
+
+static int __init sbalance_init(void)
+{
+	BUG_ON(IS_ERR(kthread_run(sbalance_thread, NULL, "sbalanced")));
+	return 0;
+}
+late_initcall(sbalance_init);