diff --git a/lisa/_assets/kmodules/lisa/Makefile b/lisa/_assets/kmodules/lisa/Makefile
index e22bceaa475f337fbfe66689a90012af113728e2..59431eb0e1b5a57910ec602febbfff073443ee4f 100644
--- a/lisa/_assets/kmodules/lisa/Makefile
+++ b/lisa/_assets/kmodules/lisa/Makefile
@@ -34,7 +34,7 @@ ifneq ($(KERNELRELEASE),)
 
 LISA_KMOD_NAME ?= lisa
 obj-m := $(LISA_KMOD_NAME).o
-$(LISA_KMOD_NAME)-y := main.o tp.o wq.o features.o pixel6.o
+$(LISA_KMOD_NAME)-y := main.o tp.o wq.o features.o pixel6.o perf_counters.o
 ldflags-y += -T "$(M)/features.lds"
 
 clean-files := vmlinux.h
diff --git a/lisa/_assets/kmodules/lisa/ftrace_events.h b/lisa/_assets/kmodules/lisa/ftrace_events.h
index e71d6961fa86c3fe5607961aaeb69c393f3cc9f9..8639739c12d4e5d0c85a20fe21b8f80028e80bb4 100644
--- a/lisa/_assets/kmodules/lisa/ftrace_events.h
+++ b/lisa/_assets/kmodules/lisa/ftrace_events.h
@@ -359,6 +359,25 @@ TRACE_EVENT(lisa__pixel6_emeter,
 		  __entry->ts, __entry->device, __entry->chan, __entry->chan_name, __entry->value)
 );
 
+TRACE_EVENT(lisa__perf_counter,
+	TP_PROTO(unsigned int cpu, unsigned int counter_id, u64 value),
+	TP_ARGS(cpu, counter_id, value),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	cpu		)
+		__field(	unsigned int,	counter_id	)
+		__field(	u64,		value		)
+	),
+
+	TP_fast_assign(
+		__entry->cpu		= cpu;
+		__entry->counter_id	= counter_id;
+		__entry->value		= value;
+	),
+
+	TP_printk("cpu=%u counter_id=%u value=%llu",
+		  __entry->cpu, __entry->counter_id, __entry->value)
+);
 #endif /* _FTRACE_EVENTS_H */
 
 /* This part must be outside protection */
diff --git a/lisa/_assets/kmodules/lisa/perf_counters.c b/lisa/_assets/kmodules/lisa/perf_counters.c
new file mode 100644
index 0000000000000000000000000000000000000000..b5334cbabc7f1f276cce41cd4ed069dbbe08f153
--- /dev/null
+++ b/lisa/_assets/kmodules/lisa/perf_counters.c
@@ -0,0 +1,492 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2023 ARM Ltd.
+#include <linux/perf_event.h>
+#if defined(CONFIG_HW_PERF_EVENTS) && defined(CONFIG_ARM_PMU)
+#include <linux/perf/arm_pmu.h>
+#endif
+#include "main.h"
+#include "ftrace_events.h"
+#include "tp.h"
+
+#define MAX_PERF_COUNTERS	6
+
+#define __PERFCTR_PARAM(name, param_name, type, param_type, desc)	\
+	static type param_name[MAX_PERF_COUNTERS];			\
+	static unsigned int param_name##_count;				\
+	module_param_array_named(name, param_name, param_type,		\
+				 &param_name##_count, 0644);		\
+	MODULE_PARM_DESC(name, desc);
+
+#define PERFCTR_PARAM(name, type, param_type, desc)	\
+	__PERFCTR_PARAM(perf_counter_##name, name##_param, type, param_type, desc)
+
+/* Set of perf counters to enable - comma-separated names of events */
+PERFCTR_PARAM(generic_perf_events, char *, charp,
+	      "Comma-separated list of symbolic names for generic perf events");
+/* Set of perf counters to enable - comma-separated PMU raw counter ids */
+PERFCTR_PARAM(pmu_raw_counters, unsigned int , uint,
+	      "Comma-separated list of raw PMU event counter ids");
+
+/* Initial set of supported counters to be enabled through module params */
+struct perfctr_desc {
+	/* unique name to identify the counter */
+	const char 		*name;
+	/* counter id (may be generic or raw) */
+	u64			id;
+	enum perf_type_id	type;
+	/* enable by default if no counters requested */
+	bool			default_on;
+};
+
+#define PERFCTR_DESC(__name, __id, __type, __en)				\
+	((struct perfctr_desc) {						\
+		.name = __name, .id = __id, .type = __type, .default_on = __en,	\
+	})
+
+#define PERFCTR_DESC_COUNT_HW(__name, __id, __en)	\
+	PERFCTR_DESC(__name, __id, PERF_TYPE_HARDWARE, __en)
+
+/* Initial set of supported counters to be enabled based on provided event names */
+static const struct perfctr_desc perfctr_generic_lt [] = {
+	PERFCTR_DESC_COUNT_HW("cpu_cycles", PERF_COUNT_HW_CPU_CYCLES, 1),
+	PERFCTR_DESC_COUNT_HW("inst_retired", PERF_COUNT_HW_INSTRUCTIONS, 0),
+	PERFCTR_DESC_COUNT_HW("cache_references", PERF_COUNT_HW_CACHE_REFERENCES, 0),
+	PERFCTR_DESC_COUNT_HW("cache_misses", PERF_COUNT_HW_CACHE_MISSES, 0),
+	PERFCTR_DESC_COUNT_HW("branch_retired", PERF_COUNT_HW_BRANCH_INSTRUCTIONS, 0),
+	PERFCTR_DESC_COUNT_HW("branch_mispred", PERF_COUNT_HW_BRANCH_MISSES, 0),
+	PERFCTR_DESC_COUNT_HW("bus_cycles", PERF_COUNT_HW_BUS_CYCLES, 0),
+	PERFCTR_DESC_COUNT_HW("stall_frontend", PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, 0),
+	PERFCTR_DESC_COUNT_HW("stall_backend", PERF_COUNT_HW_STALLED_CYCLES_BACKEND, 0),
+};
+
+struct perfctr_event_entry {
+	struct hlist_node		node;
+	struct hlist_node		group_link;
+	struct perf_event		*event;
+	struct perfctr_event_group	*group;
+	struct rcu_head			rcu_head;
+};
+
+struct perfctr_event_group {
+	struct list_head	node;
+	struct hlist_head	entries;
+	u64			raw_id;
+};
+
+struct perfctr_pcpu_data {
+	struct hlist_head	events;
+};
+
+struct perfctr_core {
+	struct list_head			events;
+	struct perfctr_pcpu_data __percpu	*pcpu_data;
+	unsigned int				nr_events;
+	unsigned int				max_nr_events;
+};
+
+static inline void perfctr_show_supported_generic_events(void)
+{
+	int i;
+
+	pr_info("Possible (subject to actual support) generic perf events: ");
+	for (i = 0; i < ARRAY_SIZE(perfctr_generic_lt); ++i)
+		printk(KERN_CONT "%s, ", perfctr_generic_lt[i].name);
+}
+
+static void perfctr_event_release_entry(struct perfctr_event_entry *entry);
+
+static int perfctr_event_activate_single(struct perfctr_core *perf_data,
+					 struct perf_event_attr *attr)
+{
+	struct perfctr_event_entry *entry= NULL;
+	struct perfctr_event_group *group;
+	struct hlist_node *next;
+	cpumask_var_t active_mask;
+	int cpu;
+
+	group = kzalloc(sizeof(*group), GFP_KERNEL);
+	if (!group)
+		return -ENOMEM;
+
+	if (!zalloc_cpumask_var(&active_mask, GFP_KERNEL)) {
+		kfree(group);
+		return -ENOMEM;
+	}
+
+	group->raw_id = PERF_COUNT_HW_MAX;
+
+	for_each_online_cpu(cpu) {
+		entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+		if (!entry)
+			goto activate_failed;
+
+		entry->event =
+			/* No overflow handler, at least not at this point */
+			perf_event_create_kernel_counter(attr, cpu, NULL,
+							 NULL, NULL);
+		if (IS_ERR(entry->event)) {
+			pr_err("Failed to create counter id=%llu on cpu%d\n",
+			       attr->config, cpu);
+			kfree(entry);
+			continue;
+		}
+
+		perf_event_enable(entry->event);
+		/*
+		 * the PMU driver might still fail to assign a slot for a given
+		 * counter (@see armpmu_add) which leaves the event ineffective
+		 */
+		if (entry->event->state != PERF_EVENT_STATE_ACTIVE) {
+			pr_err("Failed to enable counter id=%llu on cpu%d\n",
+			       attr->config, cpu);
+			perf_event_disable(entry->event);
+			perf_event_release_kernel(entry->event);
+			kfree(entry);
+			continue;
+		}
+
+		hlist_add_head_rcu(&entry->node,
+				   &per_cpu_ptr(perf_data->pcpu_data, cpu)->events);
+
+		hlist_add_head(&entry->group_link, &group->entries);
+		entry->group = group;
+		cpumask_set_cpu(cpu, active_mask);
+		/* One-time only */
+		if (group->raw_id != PERF_COUNT_HW_MAX)
+			continue;
+		if (attr->type == PERF_TYPE_RAW || !IS_ENABLED(CONFIG_ARM_PMU)) {
+			group->raw_id = attr->config;
+		} else {
+			struct arm_pmu *arm_pmu;
+			/* arm_pmu only for the time being */
+			arm_pmu = to_arm_pmu(entry->event->pmu);
+			/* There needs to be a better way to do this !!*/
+			group->raw_id = arm_pmu->map_event(entry->event);
+		}
+	}
+	list_add_tail(&group->node, &perf_data->events);
+	++perf_data->nr_events;
+
+	pr_info("%s event counter id=%llu activated on cpus=%*pbl",
+		 attr->type == PERF_TYPE_RAW ? "PMU raw" : "Generic perf",
+		 attr->config, cpumask_pr_args(active_mask));
+	free_cpumask_var(active_mask);
+	return 0;
+
+activate_failed:
+
+	hlist_for_each_entry(entry, &group->entries, group_link) {
+		hlist_del_rcu(&entry->node);
+	}
+	synchronize_rcu();
+	hlist_for_each_entry_safe(entry, next, &group->entries, group_link) {
+		hlist_del(&entry->group_link);
+		perfctr_event_release_entry(entry);
+	}
+	kfree(group);
+	free_cpumask_var(active_mask);
+	return -ENOMEM;
+
+}
+
+/* Lookup match type */
+enum perfctr_match_type {
+	PERFCTR_MATCH_NAME,
+	PERFCTR_MATCH_STATUS
+};
+
+struct perfctr_match {
+	union {
+		char *name;  /* generic perf hw event name */
+		bool status; /* enable by default */
+	};
+	enum perfctr_match_type type;
+};
+
+static int perfctr_event_activate(struct perfctr_core *perf_data,
+				  const struct perfctr_match *match)
+{
+	int result = -EINVAL;
+	int i;
+
+	struct perf_event_attr attr = {
+		.size		= sizeof(struct perf_event_attr),
+		.pinned		= 1,
+		.disabled	= 1,
+	};
+
+	for (i = 0; i < ARRAY_SIZE(perfctr_generic_lt); ++i) {
+		switch (match->type) {
+		case PERFCTR_MATCH_NAME:
+			if (strcmp(match->name, perfctr_generic_lt[i].name))
+				continue;
+			break;
+		case PERFCTR_MATCH_STATUS:
+			if (match->status != perfctr_generic_lt[i].default_on)
+				continue;
+			else
+				break;
+		default:
+			unreachable();
+		}
+		attr.config = perfctr_generic_lt[i].id;
+		attr.type   = perfctr_generic_lt[i].type;
+
+		result = perfctr_event_activate_single(perf_data, &attr);
+		if (!result || match->type == PERFCTR_MATCH_NAME)
+			break;
+	}
+	return result;
+}
+
+static void perfctr_event_release_entry(struct perfctr_event_entry *entry)
+{
+	perf_event_disable(entry->event);
+	perf_event_release_kernel(entry->event);
+	kfree(entry);
+}
+
+static void perfctr_events_release_group(struct perfctr_core *perf_data,
+					  struct perfctr_event_group *group)
+{
+	struct perfctr_event_entry *entry;
+	struct hlist_node *next;
+
+	hlist_for_each_entry(entry, &group->entries, group_link) {
+		hlist_del_rcu(&entry->node);
+	}
+	synchronize_rcu();
+	hlist_for_each_entry_safe(entry, next, &group->entries, group_link) {
+		hlist_del(&entry->group_link);
+		perfctr_event_release_entry(entry);
+	}
+	list_del(&group->node);
+	kfree(group);
+	--perf_data->nr_events;
+}
+
+static void perfctr_events_release(struct perfctr_core *perf_data)
+{
+	struct perfctr_event_group *group, *next;
+
+	list_for_each_entry_safe(group, next, &perf_data->events, node) {
+		perfctr_events_release_group(perf_data, group);
+	}
+}
+
+static void perfctr_sched_switch_probe(void *feature, bool preempt,
+				       struct task_struct *prev,
+				       struct task_struct *next,
+				       unsigned int prev_state)
+{
+	if (trace_lisa__perf_counter_enabled()) {
+		struct perfctr_core *perf_data = ((struct feature*)feature)->data;
+		struct perfctr_event_entry *entry;
+		struct hlist_head *entry_list;
+		int cpu = smp_processor_id();
+		u64 value = 0;
+
+		entry_list = &per_cpu_ptr(perf_data->pcpu_data, cpu)->events;
+
+		rcu_read_lock();
+		hlist_for_each_entry_rcu(entry, entry_list, node) {
+			/*
+			 * The approach taken is a *semi*-safe one as:
+			 * - the execution context is one as of the caller
+			 *   (__schedule) with preemption and interrupts being
+			 *   disabled
+			 * - the events being traced are per-cpu ones only
+			 * - kernel counter so no inheritance (no child events)
+			 * - counter is being read on/for a local cpu
+			 */
+			struct perf_event *event = entry->event;
+
+			event->pmu->read(event);
+			value = local64_read(&event->count);
+			trace_lisa__perf_counter(cpu, entry->group->raw_id, value);
+		}
+		rcu_read_unlock();
+	}
+}
+
+static int perfctr_register_events(struct perfctr_core *perf_data)
+{
+	struct perfctr_match match;
+	unsigned int count;
+	int result = 0;
+
+	count = generic_perf_events_param_count + pmu_raw_counters_param_count;
+	if (count > perf_data->max_nr_events) {
+		pr_err("Requested more than max %d counters\n",
+		       perf_data->max_nr_events);
+		return -EINVAL;
+	}
+
+	count = generic_perf_events_param_count;
+	if (count) {
+		match.type = PERFCTR_MATCH_NAME;
+		for (; count > 0; --count) {
+			match.name  = generic_perf_events_param[count - 1];
+			result = perfctr_event_activate(perf_data, &match);
+			if (result) {
+				pr_err("Failed to activate event counter: %s\n",
+				       match.name);
+				perfctr_show_supported_generic_events();
+				goto done;
+			}
+		}
+	}
+
+	count = pmu_raw_counters_param_count;
+	if (count) {
+		struct perf_event_attr attr = {
+			.size		= sizeof(struct perf_event_attr),
+			.type		= PERF_TYPE_RAW,
+			.pinned		= 1,
+			.disabled	= 1,
+		};
+
+		for (; count > 0; --count) {
+			struct perfctr_event_group *group;
+			bool duplicate = false;
+
+			attr.config = pmu_raw_counters_param[count -1];
+			/* Skip duplicates */
+			list_for_each_entry(group, &perf_data->events, node) {
+				if (group->raw_id == attr.config) {
+					duplicate = true;
+					break;
+				}
+			}
+
+			result = duplicate ? 0 : perfctr_event_activate_single(perf_data, &attr);
+			if (result) {
+				pr_err("Failed to activate event counter: %llu\n",
+				       attr.config);
+				goto done;
+			};
+
+		}
+	}
+done:
+	/* All or nothing ..... */
+	if (result)
+		perfctr_events_release(perf_data);
+	return result;
+}
+
+static void perfctr_pmu_discover(struct perfctr_core *perf_data)
+{
+	struct perf_event *event;
+	cpumask_var_t active_mask;
+	int cpu;
+
+	/*
+	 * This is absolutely loathsome but there seems to be no other way
+	 * to poke relevant pmu driver for details so, there it is ....
+	 */
+	struct perf_event_attr attr = {
+		.type		= PERF_TYPE_HARDWARE,
+		.size		= sizeof(struct perf_event_attr),
+		.pinned		= 1,
+		.disabled	= 1,
+		.config		= PERF_COUNT_HW_CPU_CYCLES,
+	};
+
+	perf_data->max_nr_events = MAX_PERF_COUNTERS;
+
+	if (!IS_ENABLED(CONFIG_ARM_PMU))
+		return;
+
+	if (!zalloc_cpumask_var(&active_mask, GFP_KERNEL))
+		return;
+
+	for_each_possible_cpu(cpu) {
+
+		if (cpumask_test_cpu(cpu, active_mask))
+		    continue;
+
+		event = perf_event_create_kernel_counter(&attr, cpu, NULL ,
+							 NULL, NULL);
+
+		if (IS_ERR(event)) {
+			pr_err("Failed to create an event (cpu%d) while discovery\n",
+				cpu);
+			break;
+		}
+
+		if (event->pmu) {
+			struct arm_pmu *pmu = to_arm_pmu(event->pmu);
+
+			perf_data->max_nr_events = min_t(unsigned int,
+							 perf_data->max_nr_events,
+							 pmu->num_events);
+
+			cpumask_or(active_mask, active_mask, &pmu->supported_cpus);
+
+		}
+		perf_event_release_kernel(event);
+
+		if (cpumask_equal(active_mask, cpu_possible_mask))
+				break;
+	}
+	free_cpumask_var(active_mask);
+	pr_info("Max of %d PMU counters available on cpus=%*pbl\n",
+		perf_data->max_nr_events, cpumask_pr_args(cpu_possible_mask));
+	return;
+}
+
+static int perfctr_disable(struct feature *feature);
+
+static int perfctr_enable(struct feature *feature)
+{
+	struct perfctr_core *perf_data;
+
+	if (!IS_ENABLED(CONFIG_HW_PERF_EVENTS)) {
+		pr_err("Missing support for HW performance event counters\n");
+		return 1;
+	}
+
+	perf_data = kzalloc(sizeof(*perf_data), GFP_KERNEL);
+	if (!perf_data)
+		return 1;
+
+	INIT_LIST_HEAD(&perf_data->events);
+
+	feature->data = perf_data;
+
+	perf_data->pcpu_data = alloc_percpu(struct perfctr_pcpu_data);
+	if (!perf_data->pcpu_data) {
+		return 1;
+	}
+
+	perfctr_pmu_discover(perf_data);
+
+	if (perfctr_register_events(perf_data))
+		return 1;
+
+	if (!perf_data->nr_events)
+		pr_warn("No counters have been activated\n");
+
+	return 0;
+
+}
+
+static int perfctr_disable(struct feature *feature)
+{
+	struct perfctr_core *perf_data = feature->data;
+
+	if (!perf_data)
+		return 0;
+
+	if (perf_data->pcpu_data) {
+		perfctr_events_release(perf_data);
+		free_percpu(perf_data->pcpu_data);
+	}
+	kfree(perf_data);
+	feature->data = NULL;
+	return 0;
+}
+DEFINE_EXTENDED_TP_EVENT_FEATURE(lisa__perf_counter,
+				 sched_switch, perfctr_sched_switch_probe,
+				 perfctr_enable, perfctr_disable);
diff --git a/lisa/_assets/kmodules/lisa/tp.h b/lisa/_assets/kmodules/lisa/tp.h
index 27e54be876e38803bdda9f269582920c5fa0ff99..c92d058e8b6aac7c41878572759dda5e983ecd56 100644
--- a/lisa/_assets/kmodules/lisa/tp.h
+++ b/lisa/_assets/kmodules/lisa/tp.h
@@ -100,15 +100,23 @@ __attribute__((unused)) static struct tracepoint *__find_tracepoint(const char *
 #define DEFINE_TP_FEATURE(feature_name, tp_name, probe) DEFINE_EXTENDED_TP_FEATURE(feature_name, tp_name, probe, NULL, NULL)
 
 #define __EVENT_FEATURE(event_name) event__##event_name
-
 /**
  * DEFINE_TP_EVENT_FEATURE() - Same as DEFINE_TP_FEATURE() with automatic
  * "event__" prefixing of the feature name.
  */
 #define DEFINE_TP_EVENT_FEATURE(event_name, tp_name, probe) DEFINE_TP_FEATURE(__EVENT_FEATURE(event_name), tp_name, probe)
+
+/**
+ * __DEFINE_EXTENDED_TP_EVENT_FEATURE - Wrapper for
+ * DEFINE_EXTENDED_TP_EVENT_FEATURE to allow safe macro-expansion for
+ * __EVENT_FEATURE
+ */
+#define __DEFINE_EXTENDED_TP_EVENT_FEATURE(feature_name, ...) \
+	DEFINE_EXTENDED_TP_FEATURE(feature_name, ##__VA_ARGS__)
 /**
  * DEFINE_EXTENDED_TP_EVENT_FEATURE() - Same as DEFINE_EXTENDED_TP_FEATURE()
  * with automatic "event__" prefixing of the feature name.
  */
-#define DEFINE_EXTENDED_TP_EVENT_FEATURE(event_name, tp_name, probe, enable_f, disable_f) DEFINE_EXTENDED_TP_FEATURE(__EVENT_FEATURE(event_name), tp_name, probe, enable_f, disable_f)
+#define DEFINE_EXTENDED_TP_EVENT_FEATURE(event_name, tp_name, probe, enable_f, disable_f) \
+	__DEFINE_EXTENDED_TP_EVENT_FEATURE(__EVENT_FEATURE(event_name), tp_name, probe, enable_f, disable_f)
 #endif
diff --git a/lisa/_assets/kmodules/lisa/wq.c b/lisa/_assets/kmodules/lisa/wq.c
index e9cbb54847eff4c16e24c8606dcd505252f0c5b9..f8510021288c242b02bf58eb4e188e40786674bc 100644
--- a/lisa/_assets/kmodules/lisa/wq.c
+++ b/lisa/_assets/kmodules/lisa/wq.c
@@ -22,7 +22,17 @@ static void worker(struct work_struct* work) {
 		queue_delayed_work(item->__wq, &item->__dwork, delay);
 }
 
-struct work_item *start_work(worker_t f, int delay, void *data) {
+static __always_inline void __start_work(struct work_item *item)
+{
+	if (item->__cpu < 0)
+		/* cpu-unbound work - try to use local */
+		queue_delayed_work(item->__wq, &item->__dwork, item->__delay);
+	else
+		queue_delayed_work_on(item->__cpu, item->__wq, &item->__dwork,
+				      item->__delay);
+}
+
+struct work_item *start_work_on(worker_t f, int delay, int cpu, void *data) {
 	struct work_item *item;
 	struct workqueue_struct *wq = FEATURE(__worqueue)->data;
 	if (!wq)
@@ -33,15 +43,27 @@ struct work_item *start_work(worker_t f, int delay, void *data) {
 		item->f = f;
 		item->data = data;
 
+		item->__cpu = cpu;
 		item->__delay = delay;
 		item->__wq = wq;
 		INIT_DELAYED_WORK(&item->__dwork, worker);
 
-		queue_delayed_work(wq, &item->__dwork, delay);
+		__start_work(item);
 	}
 	return item;
 }
 
+void restart_work(struct work_item *item, int delay)
+{
+	struct workqueue_struct *wq = FEATURE(__worqueue)->data;
+
+	if (!wq || !item)
+		return;
+
+	item->__delay = delay;
+	__start_work(item);
+}
+
 int destroy_work(struct work_item *item) {
 	if (item) {
 		cancel_delayed_work_sync(&item->__dwork);
diff --git a/lisa/_assets/kmodules/lisa/wq.h b/lisa/_assets/kmodules/lisa/wq.h
index 42fc5e0ea0f7aac8b27d99fcc320da7c58d831cf..07269d29ebf062a89b942a24ea36b0a1872a4a7f 100644
--- a/lisa/_assets/kmodules/lisa/wq.h
+++ b/lisa/_assets/kmodules/lisa/wq.h
@@ -36,6 +36,8 @@ struct work_item {
 	worker_t f;
 	void *data;
 
+	/* CPU to queue the work on (-1 for cpu-unbound) */
+	int __cpu;
 	/* Workqueue the item got scheduled on */
 	struct workqueue_struct *__wq;
 	/* Delayed work from kernel workqueue API */
@@ -44,6 +46,20 @@ struct work_item {
 	int __delay;
 };
 
+/**
+ * start_work_on() - Start a worker on a workqueue
+ * @f: User function of the worker.
+ * @delay: An amount of time (in jiffies) to wait before queueing the work
+ * @cpu: cpu id to queue the work on
+ * @data: void * passed to f()
+ *
+ * Context: The __workqueue feature must be enabled using
+ * ENABLE_FEATURE(__workqueue) before starting any work.
+ *
+ * Return struct work_item* to be passed to destroy_work().
+ */
+struct work_item *start_work_on(worker_t f, int delay, int cpu, void *data);
+
 /**
  * start_work() - Start a worker on a workqueue
  * @f: User function of the worker.
@@ -54,7 +70,21 @@ struct work_item {
  *
  * Return struct work_item* to be passed to destroy_work().
  */
-struct work_item *start_work(worker_t f, int delay, void *data);
+static __always_inline
+struct work_item *start_work(worker_t f, int delay, void *data)
+{
+	return start_work_on(f, delay, -1, data);
+}
+
+/**
+ * restart_work() - Queue existing worker
+ * @wi - An existing struct work_item instance to queue
+ * @delay - An amount of time (in jiffies) to wait before queueing the work
+ *
+ * Context: The struct work_item should be properly initialised prior to
+ * re-queueing on a dedicated workqueue.
+ */
+void restart_work(struct work_item *wi, int delay);
 
 /**
  * destroy_work() - Stop a work item and deallocate it.
diff --git a/tools/kmodules/lisa/perf_counters.c b/tools/kmodules/lisa/perf_counters.c
new file mode 120000
index 0000000000000000000000000000000000000000..8bc3d0ed1c5d78e3f6f4cd6de746b9397f810415
--- /dev/null
+++ b/tools/kmodules/lisa/perf_counters.c
@@ -0,0 +1 @@
+./../../../lisa/_assets/kmodules/lisa/perf_counters.c
\ No newline at end of file