Commit 24198ad3 authored by Marco Elver's avatar Marco Elver Committed by Peter Zijlstra

perf/hw_breakpoint: Remove useless code related to flexible breakpoints

Flexible breakpoints have never been implemented, with
bp_cpuinfo::flexible always being 0. Unfortunately, they still occupy 4
bytes in each bp_cpuinfo and bp_busy_slots, as well as computing the max
flexible count in fetch_bp_busy_slots().

This again causes suboptimal code generation, when we always know that
`!!slots.flexible` will be 0.

Just get rid of the flexible "placeholder" and remove all real code
related to it. Make a note in the comment related to the constraints
algorithm but don't remove them from the algorithm, so that if in future
flexible breakpoints need supporting, it should be trivial to revive
them (along with reverting this change).
Signed-off-by: default avatarMarco Elver <elver@google.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarDmitry Vyukov <dvyukov@google.com>
Acked-by: default avatarIan Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20220829124719.675715-9-elver@google.com
parent 9caf87be
...@@ -45,8 +45,6 @@ struct bp_cpuinfo { ...@@ -45,8 +45,6 @@ struct bp_cpuinfo {
#else #else
unsigned int *tsk_pinned; unsigned int *tsk_pinned;
#endif #endif
/* Number of non-pinned cpu/task breakpoints in a cpu */
unsigned int flexible; /* XXX: placeholder, see fetch_this_slot() */
}; };
static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]); static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
...@@ -67,12 +65,6 @@ static const struct rhashtable_params task_bps_ht_params = { ...@@ -67,12 +65,6 @@ static const struct rhashtable_params task_bps_ht_params = {
static bool constraints_initialized __ro_after_init; static bool constraints_initialized __ro_after_init;
/* Gather the number of total pinned and un-pinned bp in a cpuset */
struct bp_busy_slots {
unsigned int pinned;
unsigned int flexible;
};
/* Serialize accesses to the above constraints */ /* Serialize accesses to the above constraints */
static DEFINE_MUTEX(nr_bp_mutex); static DEFINE_MUTEX(nr_bp_mutex);
...@@ -190,14 +182,14 @@ static const struct cpumask *cpumask_of_bp(struct perf_event *bp) ...@@ -190,14 +182,14 @@ static const struct cpumask *cpumask_of_bp(struct perf_event *bp)
} }
/* /*
* Report the number of pinned/un-pinned breakpoints we have in * Returns the max pinned breakpoint slots in a given
* a given cpu (cpu > -1) or in all of them (cpu = -1). * CPU (cpu > -1) or across all of them (cpu = -1).
*/ */
static void static int
fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, max_bp_pinned_slots(struct perf_event *bp, enum bp_type_idx type)
enum bp_type_idx type)
{ {
const struct cpumask *cpumask = cpumask_of_bp(bp); const struct cpumask *cpumask = cpumask_of_bp(bp);
int pinned_slots = 0;
int cpu; int cpu;
for_each_cpu(cpu, cpumask) { for_each_cpu(cpu, cpumask) {
...@@ -210,24 +202,10 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, ...@@ -210,24 +202,10 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
else else
nr += task_bp_pinned(cpu, bp, type); nr += task_bp_pinned(cpu, bp, type);
if (nr > slots->pinned) pinned_slots = max(nr, pinned_slots);
slots->pinned = nr;
nr = info->flexible;
if (nr > slots->flexible)
slots->flexible = nr;
} }
}
/* return pinned_slots;
* For now, continue to consider flexible as pinned, until we can
* ensure no flexible event can ever be scheduled before a pinned event
* in a same cpu.
*/
static void
fetch_this_slot(struct bp_busy_slots *slots, int weight)
{
slots->pinned += weight;
} }
/* /*
...@@ -298,7 +276,12 @@ __weak void arch_unregister_hw_breakpoint(struct perf_event *bp) ...@@ -298,7 +276,12 @@ __weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
} }
/* /*
* Constraints to check before allowing this new breakpoint counter: * Constraints to check before allowing this new breakpoint counter.
*
* Note: Flexible breakpoints are currently unimplemented, but outlined in the
* below algorithm for completeness. The implementation treats flexible as
* pinned due to no guarantee that we currently always schedule flexible events
* before a pinned event in a same CPU.
* *
* == Non-pinned counter == (Considered as pinned for now) * == Non-pinned counter == (Considered as pinned for now)
* *
...@@ -340,8 +323,8 @@ __weak void arch_unregister_hw_breakpoint(struct perf_event *bp) ...@@ -340,8 +323,8 @@ __weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
*/ */
static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type) static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
{ {
struct bp_busy_slots slots = {0};
enum bp_type_idx type; enum bp_type_idx type;
int max_pinned_slots;
int weight; int weight;
int ret; int ret;
...@@ -357,15 +340,9 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type) ...@@ -357,15 +340,9 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
type = find_slot_idx(bp_type); type = find_slot_idx(bp_type);
weight = hw_breakpoint_weight(bp); weight = hw_breakpoint_weight(bp);
fetch_bp_busy_slots(&slots, bp, type); /* Check if this new breakpoint can be satisfied across all CPUs. */
/* max_pinned_slots = max_bp_pinned_slots(bp, type) + weight;
* Simulate the addition of this breakpoint to the constraints if (max_pinned_slots > hw_breakpoint_slots_cached(type))
* and see the result.
*/
fetch_this_slot(&slots, weight);
/* Flexible counters need to keep at least one slot */
if (slots.pinned + (!!slots.flexible) > hw_breakpoint_slots_cached(type))
return -ENOSPC; return -ENOSPC;
ret = arch_reserve_bp_slot(bp); ret = arch_reserve_bp_slot(bp);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment