mirror-linux/drivers/perf/arm_cspmu/nvidia_cspmu.c

1075 lines
30 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
*/
/* Support for NVIDIA specific attributes. */
#include <linux/io.h>
#include <linux/module.h>
#include <linux/property.h>
#include <linux/topology.h>
#include "arm_cspmu.h"
#define NV_PCIE_PORT_COUNT 10ULL
#define NV_PCIE_FILTER_ID_MASK GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0)
#define NV_NVL_C2C_PORT_COUNT 2ULL
#define NV_NVL_C2C_FILTER_ID_MASK GENMASK_ULL(NV_NVL_C2C_PORT_COUNT - 1, 0)
#define NV_CNVL_PORT_COUNT 4ULL
#define NV_CNVL_FILTER_ID_MASK GENMASK_ULL(NV_CNVL_PORT_COUNT - 1, 0)
#define NV_UCF_SRC_COUNT 3ULL
#define NV_UCF_DST_COUNT 4ULL
#define NV_UCF_FILTER_ID_MASK GENMASK_ULL(11, 0)
#define NV_UCF_FILTER_SRC GENMASK_ULL(2, 0)
#define NV_UCF_FILTER_DST GENMASK_ULL(11, 8)
#define NV_UCF_FILTER_DEFAULT (NV_UCF_FILTER_SRC | NV_UCF_FILTER_DST)
#define NV_PCIE_V2_PORT_COUNT 8ULL
#define NV_PCIE_V2_FILTER_ID_MASK GENMASK_ULL(24, 0)
#define NV_PCIE_V2_FILTER_PORT GENMASK_ULL(NV_PCIE_V2_PORT_COUNT - 1, 0)
#define NV_PCIE_V2_FILTER_BDF_VAL GENMASK_ULL(23, NV_PCIE_V2_PORT_COUNT)
#define NV_PCIE_V2_FILTER_BDF_EN BIT(24)
#define NV_PCIE_V2_FILTER_BDF_VAL_EN GENMASK_ULL(24, NV_PCIE_V2_PORT_COUNT)
#define NV_PCIE_V2_FILTER_DEFAULT NV_PCIE_V2_FILTER_PORT
#define NV_PCIE_V2_DST_COUNT 5ULL
#define NV_PCIE_V2_FILTER2_ID_MASK GENMASK_ULL(4, 0)
#define NV_PCIE_V2_FILTER2_DST GENMASK_ULL(NV_PCIE_V2_DST_COUNT - 1, 0)
#define NV_PCIE_V2_FILTER2_DEFAULT NV_PCIE_V2_FILTER2_DST
#define NV_PCIE_TGT_PORT_COUNT 8ULL
#define NV_PCIE_TGT_EV_TYPE_CC 0x4
#define NV_PCIE_TGT_EV_TYPE_COUNT 3ULL
#define NV_PCIE_TGT_EV_TYPE_MASK GENMASK_ULL(NV_PCIE_TGT_EV_TYPE_COUNT - 1, 0)
#define NV_PCIE_TGT_FILTER2_MASK GENMASK_ULL(NV_PCIE_TGT_PORT_COUNT, 0)
#define NV_PCIE_TGT_FILTER2_PORT GENMASK_ULL(NV_PCIE_TGT_PORT_COUNT - 1, 0)
#define NV_PCIE_TGT_FILTER2_ADDR_EN BIT(NV_PCIE_TGT_PORT_COUNT)
#define NV_PCIE_TGT_FILTER2_ADDR GENMASK_ULL(15, NV_PCIE_TGT_PORT_COUNT)
#define NV_PCIE_TGT_FILTER2_DEFAULT NV_PCIE_TGT_FILTER2_PORT
#define NV_PCIE_TGT_ADDR_COUNT 8ULL
#define NV_PCIE_TGT_ADDR_STRIDE 20
#define NV_PCIE_TGT_ADDR_CTRL 0xD38
#define NV_PCIE_TGT_ADDR_BASE_LO 0xD3C
#define NV_PCIE_TGT_ADDR_BASE_HI 0xD40
#define NV_PCIE_TGT_ADDR_MASK_LO 0xD44
#define NV_PCIE_TGT_ADDR_MASK_HI 0xD48
#define NV_GENERIC_FILTER_ID_MASK GENMASK_ULL(31, 0)
#define NV_PRODID_MASK (PMIIDR_PRODUCTID | PMIIDR_VARIANT | PMIIDR_REVISION)
#define NV_FORMAT_NAME_GENERIC 0
#define to_nv_cspmu_ctx(cspmu) ((struct nv_cspmu_ctx *)(cspmu->impl.ctx))
#define NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _num, _suff, _config) \
ARM_CSPMU_EVENT_ATTR(_pref##_num##_suff, _config)
#define NV_CSPMU_EVENT_ATTR_4(_pref, _suff, _config) \
NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _0_, _suff, _config), \
NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _1_, _suff, _config + 1), \
NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _2_, _suff, _config + 2), \
NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _3_, _suff, _config + 3)
struct nv_cspmu_ctx {
const char *name;
struct attribute **event_attr;
struct attribute **format_attr;
u32 filter_mask;
u32 filter_default_val;
u32 filter2_mask;
u32 filter2_default_val;
u32 (*get_filter)(const struct perf_event *event);
u32 (*get_filter2)(const struct perf_event *event);
void *data;
int (*init_data)(struct arm_cspmu *cspmu);
};
static struct attribute *scf_pmu_event_attrs[] = {
ARM_CSPMU_EVENT_ATTR(bus_cycles, 0x1d),
ARM_CSPMU_EVENT_ATTR(scf_cache_allocate, 0xF0),
ARM_CSPMU_EVENT_ATTR(scf_cache_refill, 0xF1),
ARM_CSPMU_EVENT_ATTR(scf_cache, 0xF2),
ARM_CSPMU_EVENT_ATTR(scf_cache_wb, 0xF3),
NV_CSPMU_EVENT_ATTR_4(socket, rd_data, 0x101),
NV_CSPMU_EVENT_ATTR_4(socket, wb_data, 0x109),
NV_CSPMU_EVENT_ATTR_4(socket, rd_outstanding, 0x115),
NV_CSPMU_EVENT_ATTR_4(socket, rd_access, 0x12d),
NV_CSPMU_EVENT_ATTR_4(socket, wb_access, 0x135),
NV_CSPMU_EVENT_ATTR_4(socket, wr_access, 0x139),
ARM_CSPMU_EVENT_ATTR(gmem_rd_data, 0x16d),
ARM_CSPMU_EVENT_ATTR(gmem_rd_access, 0x16e),
ARM_CSPMU_EVENT_ATTR(gmem_rd_outstanding, 0x16f),
ARM_CSPMU_EVENT_ATTR(gmem_wb_data, 0x173),
ARM_CSPMU_EVENT_ATTR(gmem_wb_access, 0x174),
ARM_CSPMU_EVENT_ATTR(gmem_wr_data, 0x179),
ARM_CSPMU_EVENT_ATTR(gmem_wr_access, 0x17b),
NV_CSPMU_EVENT_ATTR_4(socket, wr_data, 0x17c),
ARM_CSPMU_EVENT_ATTR(gmem_wr_total_bytes, 0x1a0),
ARM_CSPMU_EVENT_ATTR(remote_socket_wr_total_bytes, 0x1a1),
ARM_CSPMU_EVENT_ATTR(remote_socket_rd_data, 0x1a2),
ARM_CSPMU_EVENT_ATTR(remote_socket_rd_outstanding, 0x1a3),
ARM_CSPMU_EVENT_ATTR(remote_socket_rd_access, 0x1a4),
ARM_CSPMU_EVENT_ATTR(cmem_rd_data, 0x1a5),
ARM_CSPMU_EVENT_ATTR(cmem_rd_access, 0x1a6),
ARM_CSPMU_EVENT_ATTR(cmem_rd_outstanding, 0x1a7),
ARM_CSPMU_EVENT_ATTR(cmem_wb_data, 0x1ab),
ARM_CSPMU_EVENT_ATTR(cmem_wb_access, 0x1ac),
ARM_CSPMU_EVENT_ATTR(cmem_wr_data, 0x1b1),
ARM_CSPMU_EVENT_ATTR(cmem_wr_access, 0x1ca),
ARM_CSPMU_EVENT_ATTR(cmem_wr_total_bytes, 0x1db),
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
NULL,
};
static struct attribute *mcf_pmu_event_attrs[] = {
ARM_CSPMU_EVENT_ATTR(rd_bytes_loc, 0x0),
ARM_CSPMU_EVENT_ATTR(rd_bytes_rem, 0x1),
ARM_CSPMU_EVENT_ATTR(wr_bytes_loc, 0x2),
ARM_CSPMU_EVENT_ATTR(wr_bytes_rem, 0x3),
ARM_CSPMU_EVENT_ATTR(total_bytes_loc, 0x4),
ARM_CSPMU_EVENT_ATTR(total_bytes_rem, 0x5),
ARM_CSPMU_EVENT_ATTR(rd_req_loc, 0x6),
ARM_CSPMU_EVENT_ATTR(rd_req_rem, 0x7),
ARM_CSPMU_EVENT_ATTR(wr_req_loc, 0x8),
ARM_CSPMU_EVENT_ATTR(wr_req_rem, 0x9),
ARM_CSPMU_EVENT_ATTR(total_req_loc, 0xa),
ARM_CSPMU_EVENT_ATTR(total_req_rem, 0xb),
ARM_CSPMU_EVENT_ATTR(rd_cum_outs_loc, 0xc),
ARM_CSPMU_EVENT_ATTR(rd_cum_outs_rem, 0xd),
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
NULL,
};
static struct attribute *ucf_pmu_event_attrs[] = {
ARM_CSPMU_EVENT_ATTR(bus_cycles, 0x1D),
ARM_CSPMU_EVENT_ATTR(slc_allocate, 0xF0),
ARM_CSPMU_EVENT_ATTR(slc_wb, 0xF3),
ARM_CSPMU_EVENT_ATTR(slc_refill_rd, 0x109),
ARM_CSPMU_EVENT_ATTR(slc_refill_wr, 0x10A),
ARM_CSPMU_EVENT_ATTR(slc_hit_rd, 0x119),
ARM_CSPMU_EVENT_ATTR(slc_access_dataless, 0x183),
ARM_CSPMU_EVENT_ATTR(slc_access_atomic, 0x184),
ARM_CSPMU_EVENT_ATTR(slc_access_rd, 0x111),
ARM_CSPMU_EVENT_ATTR(slc_access_wr, 0x112),
ARM_CSPMU_EVENT_ATTR(slc_bytes_rd, 0x113),
ARM_CSPMU_EVENT_ATTR(slc_bytes_wr, 0x114),
ARM_CSPMU_EVENT_ATTR(mem_access_rd, 0x121),
ARM_CSPMU_EVENT_ATTR(mem_access_wr, 0x122),
ARM_CSPMU_EVENT_ATTR(mem_bytes_rd, 0x123),
ARM_CSPMU_EVENT_ATTR(mem_bytes_wr, 0x124),
ARM_CSPMU_EVENT_ATTR(local_snoop, 0x180),
ARM_CSPMU_EVENT_ATTR(ext_snp_access, 0x181),
ARM_CSPMU_EVENT_ATTR(ext_snp_evict, 0x182),
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
NULL
};
static struct attribute *pcie_v2_pmu_event_attrs[] = {
ARM_CSPMU_EVENT_ATTR(rd_bytes, 0x0),
ARM_CSPMU_EVENT_ATTR(wr_bytes, 0x1),
ARM_CSPMU_EVENT_ATTR(rd_req, 0x2),
ARM_CSPMU_EVENT_ATTR(wr_req, 0x3),
ARM_CSPMU_EVENT_ATTR(rd_cum_outs, 0x4),
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
NULL
};
static struct attribute *pcie_tgt_pmu_event_attrs[] = {
ARM_CSPMU_EVENT_ATTR(rd_bytes, 0x0),
ARM_CSPMU_EVENT_ATTR(wr_bytes, 0x1),
ARM_CSPMU_EVENT_ATTR(rd_req, 0x2),
ARM_CSPMU_EVENT_ATTR(wr_req, 0x3),
ARM_CSPMU_EVENT_ATTR(cycles, NV_PCIE_TGT_EV_TYPE_CC),
NULL
};
static struct attribute *generic_pmu_event_attrs[] = {
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
NULL,
};
static struct attribute *scf_pmu_format_attrs[] = {
ARM_CSPMU_FORMAT_EVENT_ATTR,
NULL,
};
static struct attribute *pcie_pmu_format_attrs[] = {
ARM_CSPMU_FORMAT_EVENT_ATTR,
ARM_CSPMU_FORMAT_ATTR(root_port, "config1:0-9"),
NULL,
};
static struct attribute *nvlink_c2c_pmu_format_attrs[] = {
ARM_CSPMU_FORMAT_EVENT_ATTR,
ARM_CSPMU_FORMAT_ATTR(port, "config1:0-1"),
NULL,
};
static struct attribute *cnvlink_pmu_format_attrs[] = {
ARM_CSPMU_FORMAT_EVENT_ATTR,
ARM_CSPMU_FORMAT_ATTR(rem_socket, "config1:0-3"),
NULL,
};
static struct attribute *ucf_pmu_format_attrs[] = {
ARM_CSPMU_FORMAT_EVENT_ATTR,
ARM_CSPMU_FORMAT_ATTR(src_loc_noncpu, "config1:0"),
ARM_CSPMU_FORMAT_ATTR(src_loc_cpu, "config1:1"),
ARM_CSPMU_FORMAT_ATTR(src_rem, "config1:2"),
ARM_CSPMU_FORMAT_ATTR(dst_loc_cmem, "config1:8"),
ARM_CSPMU_FORMAT_ATTR(dst_loc_gmem, "config1:9"),
ARM_CSPMU_FORMAT_ATTR(dst_loc_other, "config1:10"),
ARM_CSPMU_FORMAT_ATTR(dst_rem, "config1:11"),
NULL
};
static struct attribute *pcie_v2_pmu_format_attrs[] = {
ARM_CSPMU_FORMAT_EVENT_ATTR,
ARM_CSPMU_FORMAT_ATTR(src_rp_mask, "config1:0-7"),
ARM_CSPMU_FORMAT_ATTR(src_bdf, "config1:8-23"),
ARM_CSPMU_FORMAT_ATTR(src_bdf_en, "config1:24"),
ARM_CSPMU_FORMAT_ATTR(dst_loc_cmem, "config2:0"),
ARM_CSPMU_FORMAT_ATTR(dst_loc_gmem, "config2:1"),
ARM_CSPMU_FORMAT_ATTR(dst_loc_pcie_p2p, "config2:2"),
ARM_CSPMU_FORMAT_ATTR(dst_loc_pcie_cxl, "config2:3"),
ARM_CSPMU_FORMAT_ATTR(dst_rem, "config2:4"),
NULL
};
static struct attribute *pcie_tgt_pmu_format_attrs[] = {
ARM_CSPMU_FORMAT_ATTR(event, "config:0-2"),
ARM_CSPMU_FORMAT_ATTR(dst_rp_mask, "config:3-10"),
ARM_CSPMU_FORMAT_ATTR(dst_addr_en, "config:11"),
ARM_CSPMU_FORMAT_ATTR(dst_addr_base, "config1:0-63"),
ARM_CSPMU_FORMAT_ATTR(dst_addr_mask, "config2:0-63"),
NULL
};
static struct attribute *generic_pmu_format_attrs[] = {
ARM_CSPMU_FORMAT_EVENT_ATTR,
ARM_CSPMU_FORMAT_FILTER_ATTR,
ARM_CSPMU_FORMAT_FILTER2_ATTR,
NULL,
};
static struct attribute **
nv_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
{
const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
return ctx->event_attr;
}
static struct attribute **
nv_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
{
const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
return ctx->format_attr;
}
static const char *
nv_cspmu_get_name(const struct arm_cspmu *cspmu)
{
const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
return ctx->name;
}
#if defined(CONFIG_ACPI) && defined(CONFIG_ARM64)
static int nv_cspmu_get_inst_id(const struct arm_cspmu *cspmu, u32 *id)
{
struct fwnode_handle *fwnode;
struct acpi_device *adev;
int ret;
adev = arm_cspmu_acpi_dev_get(cspmu);
if (!adev)
return -ENODEV;
fwnode = acpi_fwnode_handle(adev);
ret = fwnode_property_read_u32(fwnode, "instance_id", id);
if (ret)
dev_err(cspmu->dev, "Failed to get instance ID\n");
acpi_dev_put(adev);
return ret;
}
#else
static int nv_cspmu_get_inst_id(const struct arm_cspmu *cspmu, u32 *id)
{
return -EINVAL;
}
#endif
static u32 nv_cspmu_event_filter(const struct perf_event *event)
{
const struct nv_cspmu_ctx *ctx =
to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
const u32 filter_val = event->attr.config1 & ctx->filter_mask;
if (filter_val == 0)
return ctx->filter_default_val;
return filter_val;
}
static u32 nv_cspmu_event_filter2(const struct perf_event *event)
{
const struct nv_cspmu_ctx *ctx =
to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
const u32 filter_val = event->attr.config2 & ctx->filter2_mask;
if (filter_val == 0)
return ctx->filter2_default_val;
return filter_val;
}
static void nv_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
const struct perf_event *event)
{
u32 filter, offset;
const struct nv_cspmu_ctx *ctx =
to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
offset = 4 * event->hw.idx;
if (ctx->get_filter) {
filter = ctx->get_filter(event);
writel(filter, cspmu->base0 + PMEVFILTR + offset);
}
if (ctx->get_filter2) {
filter = ctx->get_filter2(event);
writel(filter, cspmu->base0 + PMEVFILT2R + offset);
}
}
static void nv_cspmu_reset_ev_filter(struct arm_cspmu *cspmu,
const struct perf_event *event)
{
const struct nv_cspmu_ctx *ctx =
to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
const u32 offset = 4 * event->hw.idx;
if (ctx->get_filter)
writel(0, cspmu->base0 + PMEVFILTR + offset);
if (ctx->get_filter2)
writel(0, cspmu->base0 + PMEVFILT2R + offset);
}
static void nv_cspmu_set_cc_filter(struct arm_cspmu *cspmu,
const struct perf_event *event)
{
u32 filter = nv_cspmu_event_filter(event);
writel(filter, cspmu->base0 + PMCCFILTR);
}
static u32 ucf_pmu_event_filter(const struct perf_event *event)
{
u32 ret, filter, src, dst;
filter = nv_cspmu_event_filter(event);
/* Monitor all sources if none is selected. */
src = FIELD_GET(NV_UCF_FILTER_SRC, filter);
if (src == 0)
src = GENMASK_ULL(NV_UCF_SRC_COUNT - 1, 0);
/* Monitor all destinations if none is selected. */
dst = FIELD_GET(NV_UCF_FILTER_DST, filter);
if (dst == 0)
dst = GENMASK_ULL(NV_UCF_DST_COUNT - 1, 0);
ret = FIELD_PREP(NV_UCF_FILTER_SRC, src);
ret |= FIELD_PREP(NV_UCF_FILTER_DST, dst);
return ret;
}
static u32 pcie_v2_pmu_bdf_val_en(u32 filter)
{
const u32 bdf_en = FIELD_GET(NV_PCIE_V2_FILTER_BDF_EN, filter);
/* Returns both BDF value and enable bit if BDF filtering is enabled. */
if (bdf_en)
return FIELD_GET(NV_PCIE_V2_FILTER_BDF_VAL_EN, filter);
/* Ignore the BDF value if BDF filter is not enabled. */
return 0;
}
static u32 pcie_v2_pmu_event_filter(const struct perf_event *event)
{
u32 filter, lead_filter, lead_bdf;
struct perf_event *leader;
const struct nv_cspmu_ctx *ctx =
to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
filter = event->attr.config1 & ctx->filter_mask;
if (filter != 0)
return filter;
leader = event->group_leader;
/* Use leader's filter value if its BDF filtering is enabled. */
if (event != leader) {
lead_filter = pcie_v2_pmu_event_filter(leader);
lead_bdf = pcie_v2_pmu_bdf_val_en(lead_filter);
if (lead_bdf != 0)
return lead_filter;
}
/* Otherwise, return default filter value. */
return ctx->filter_default_val;
}
static int pcie_v2_pmu_validate_event(struct arm_cspmu *cspmu,
struct perf_event *new_ev)
{
/*
* Make sure the events are using same BDF filter since the PCIE-SRC PMU
* only supports one common BDF filter setting for all of the counters.
*/
int idx;
u32 new_filter, new_rp, new_bdf, new_lead_filter, new_lead_bdf;
struct perf_event *new_leader;
if (cspmu->impl.ops.is_cycle_counter_event(new_ev))
return 0;
new_leader = new_ev->group_leader;
new_filter = pcie_v2_pmu_event_filter(new_ev);
new_lead_filter = pcie_v2_pmu_event_filter(new_leader);
new_bdf = pcie_v2_pmu_bdf_val_en(new_filter);
new_lead_bdf = pcie_v2_pmu_bdf_val_en(new_lead_filter);
new_rp = FIELD_GET(NV_PCIE_V2_FILTER_PORT, new_filter);
if (new_rp != 0 && new_bdf != 0) {
dev_err(cspmu->dev,
"RP and BDF filtering are mutually exclusive\n");
return -EINVAL;
}
if (new_bdf != new_lead_bdf) {
dev_err(cspmu->dev,
"sibling and leader BDF value should be equal\n");
return -EINVAL;
}
/* Compare BDF filter on existing events. */
idx = find_first_bit(cspmu->hw_events.used_ctrs,
cspmu->cycle_counter_logical_idx);
if (idx != cspmu->cycle_counter_logical_idx) {
struct perf_event *leader = cspmu->hw_events.events[idx]->group_leader;
const u32 lead_filter = pcie_v2_pmu_event_filter(leader);
const u32 lead_bdf = pcie_v2_pmu_bdf_val_en(lead_filter);
if (new_lead_bdf != lead_bdf) {
dev_err(cspmu->dev, "only one BDF value is supported\n");
return -EINVAL;
}
}
return 0;
}
struct pcie_tgt_addr_filter {
u32 refcount;
u64 base;
u64 mask;
};
struct pcie_tgt_data {
struct pcie_tgt_addr_filter addr_filter[NV_PCIE_TGT_ADDR_COUNT];
void __iomem *addr_filter_reg;
};
#if defined(CONFIG_ACPI) && defined(CONFIG_ARM64)
static int pcie_tgt_init_data(struct arm_cspmu *cspmu)
{
int ret;
struct acpi_device *adev;
struct pcie_tgt_data *data;
struct list_head resource_list;
struct resource_entry *rentry;
struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
struct device *dev = cspmu->dev;
data = devm_kzalloc(dev, sizeof(struct pcie_tgt_data), GFP_KERNEL);
if (!data)
return -ENOMEM;
adev = arm_cspmu_acpi_dev_get(cspmu);
if (!adev) {
dev_err(dev, "failed to get associated PCIE-TGT device\n");
return -ENODEV;
}
INIT_LIST_HEAD(&resource_list);
ret = acpi_dev_get_memory_resources(adev, &resource_list);
if (ret < 0) {
dev_err(dev, "failed to get PCIE-TGT device memory resources\n");
acpi_dev_put(adev);
return ret;
}
rentry = list_first_entry_or_null(
&resource_list, struct resource_entry, node);
if (rentry) {
data->addr_filter_reg = devm_ioremap_resource(dev, rentry->res);
ret = 0;
}
if (IS_ERR(data->addr_filter_reg)) {
dev_err(dev, "failed to get address filter resource\n");
ret = PTR_ERR(data->addr_filter_reg);
}
acpi_dev_free_resource_list(&resource_list);
acpi_dev_put(adev);
ctx->data = data;
return ret;
}
#else
static int pcie_tgt_init_data(struct arm_cspmu *cspmu)
{
return -ENODEV;
}
#endif
static struct pcie_tgt_data *pcie_tgt_get_data(struct arm_cspmu *cspmu)
{
struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
return ctx->data;
}
/* Find the first available address filter slot. */
static int pcie_tgt_find_addr_idx(struct arm_cspmu *cspmu, u64 base, u64 mask,
bool is_reset)
{
int i;
struct pcie_tgt_data *data = pcie_tgt_get_data(cspmu);
for (i = 0; i < NV_PCIE_TGT_ADDR_COUNT; i++) {
if (!is_reset && data->addr_filter[i].refcount == 0)
return i;
if (data->addr_filter[i].base == base &&
data->addr_filter[i].mask == mask)
return i;
}
return -ENODEV;
}
static u32 pcie_tgt_pmu_event_filter(const struct perf_event *event)
{
u32 filter;
filter = (event->attr.config >> NV_PCIE_TGT_EV_TYPE_COUNT) &
NV_PCIE_TGT_FILTER2_MASK;
return filter;
}
static bool pcie_tgt_pmu_addr_en(const struct perf_event *event)
{
u32 filter = pcie_tgt_pmu_event_filter(event);
return FIELD_GET(NV_PCIE_TGT_FILTER2_ADDR_EN, filter) != 0;
}
static u32 pcie_tgt_pmu_port_filter(const struct perf_event *event)
{
u32 filter = pcie_tgt_pmu_event_filter(event);
return FIELD_GET(NV_PCIE_TGT_FILTER2_PORT, filter);
}
static u64 pcie_tgt_pmu_dst_addr_base(const struct perf_event *event)
{
return event->attr.config1;
}
static u64 pcie_tgt_pmu_dst_addr_mask(const struct perf_event *event)
{
return event->attr.config2;
}
static int pcie_tgt_pmu_validate_event(struct arm_cspmu *cspmu,
struct perf_event *new_ev)
{
u64 base, mask;
int idx;
if (!pcie_tgt_pmu_addr_en(new_ev))
return 0;
/* Make sure there is a slot available for the address filter. */
base = pcie_tgt_pmu_dst_addr_base(new_ev);
mask = pcie_tgt_pmu_dst_addr_mask(new_ev);
idx = pcie_tgt_find_addr_idx(cspmu, base, mask, false);
if (idx < 0)
return -EINVAL;
return 0;
}
static void pcie_tgt_pmu_config_addr_filter(struct arm_cspmu *cspmu,
bool en, u64 base, u64 mask, int idx)
{
struct pcie_tgt_data *data;
struct pcie_tgt_addr_filter *filter;
void __iomem *filter_reg;
data = pcie_tgt_get_data(cspmu);
filter = &data->addr_filter[idx];
filter_reg = data->addr_filter_reg + (idx * NV_PCIE_TGT_ADDR_STRIDE);
if (en) {
filter->refcount++;
if (filter->refcount == 1) {
filter->base = base;
filter->mask = mask;
writel(lower_32_bits(base), filter_reg + NV_PCIE_TGT_ADDR_BASE_LO);
writel(upper_32_bits(base), filter_reg + NV_PCIE_TGT_ADDR_BASE_HI);
writel(lower_32_bits(mask), filter_reg + NV_PCIE_TGT_ADDR_MASK_LO);
writel(upper_32_bits(mask), filter_reg + NV_PCIE_TGT_ADDR_MASK_HI);
writel(1, filter_reg + NV_PCIE_TGT_ADDR_CTRL);
}
} else {
filter->refcount--;
if (filter->refcount == 0) {
writel(0, filter_reg + NV_PCIE_TGT_ADDR_CTRL);
writel(0, filter_reg + NV_PCIE_TGT_ADDR_BASE_LO);
writel(0, filter_reg + NV_PCIE_TGT_ADDR_BASE_HI);
writel(0, filter_reg + NV_PCIE_TGT_ADDR_MASK_LO);
writel(0, filter_reg + NV_PCIE_TGT_ADDR_MASK_HI);
filter->base = 0;
filter->mask = 0;
}
}
}
static void pcie_tgt_pmu_set_ev_filter(struct arm_cspmu *cspmu,
const struct perf_event *event)
{
bool addr_filter_en;
int idx;
u32 filter2_val, filter2_offset, port_filter;
u64 base, mask;
filter2_val = 0;
filter2_offset = PMEVFILT2R + (4 * event->hw.idx);
addr_filter_en = pcie_tgt_pmu_addr_en(event);
if (addr_filter_en) {
base = pcie_tgt_pmu_dst_addr_base(event);
mask = pcie_tgt_pmu_dst_addr_mask(event);
idx = pcie_tgt_find_addr_idx(cspmu, base, mask, false);
if (idx < 0) {
dev_err(cspmu->dev,
"Unable to find a slot for address filtering\n");
writel(0, cspmu->base0 + filter2_offset);
return;
}
/* Configure address range filter registers.*/
pcie_tgt_pmu_config_addr_filter(cspmu, true, base, mask, idx);
/* Config the counter to use the selected address filter slot. */
filter2_val |= FIELD_PREP(NV_PCIE_TGT_FILTER2_ADDR, 1U << idx);
}
port_filter = pcie_tgt_pmu_port_filter(event);
/* Monitor all ports if no filter is selected. */
if (!addr_filter_en && port_filter == 0)
port_filter = NV_PCIE_TGT_FILTER2_PORT;
filter2_val |= FIELD_PREP(NV_PCIE_TGT_FILTER2_PORT, port_filter);
writel(filter2_val, cspmu->base0 + filter2_offset);
}
static void pcie_tgt_pmu_reset_ev_filter(struct arm_cspmu *cspmu,
const struct perf_event *event)
{
bool addr_filter_en;
u64 base, mask;
int idx;
addr_filter_en = pcie_tgt_pmu_addr_en(event);
if (!addr_filter_en)
return;
base = pcie_tgt_pmu_dst_addr_base(event);
mask = pcie_tgt_pmu_dst_addr_mask(event);
idx = pcie_tgt_find_addr_idx(cspmu, base, mask, true);
if (idx < 0) {
dev_err(cspmu->dev,
"Unable to find the address filter slot to reset\n");
return;
}
pcie_tgt_pmu_config_addr_filter(cspmu, false, base, mask, idx);
}
static u32 pcie_tgt_pmu_event_type(const struct perf_event *event)
{
return event->attr.config & NV_PCIE_TGT_EV_TYPE_MASK;
}
static bool pcie_tgt_pmu_is_cycle_counter_event(const struct perf_event *event)
{
u32 event_type = pcie_tgt_pmu_event_type(event);
return event_type == NV_PCIE_TGT_EV_TYPE_CC;
}
enum nv_cspmu_name_fmt {
NAME_FMT_GENERIC,
NAME_FMT_SOCKET,
NAME_FMT_SOCKET_INST,
};
struct nv_cspmu_match {
u32 prodid;
u32 prodid_mask;
const char *name_pattern;
enum nv_cspmu_name_fmt name_fmt;
struct nv_cspmu_ctx template_ctx;
struct arm_cspmu_impl_ops ops;
};
static const struct nv_cspmu_match nv_cspmu_match[] = {
{
.prodid = 0x10300000,
.prodid_mask = NV_PRODID_MASK,
.name_pattern = "nvidia_pcie_pmu_%u",
.name_fmt = NAME_FMT_SOCKET,
.template_ctx = {
.event_attr = mcf_pmu_event_attrs,
.format_attr = pcie_pmu_format_attrs,
.filter_mask = NV_PCIE_FILTER_ID_MASK,
.filter_default_val = NV_PCIE_FILTER_ID_MASK,
.filter2_mask = 0x0,
.filter2_default_val = 0x0,
.get_filter = nv_cspmu_event_filter,
.get_filter2 = NULL,
.data = NULL,
.init_data = NULL
},
},
{
.prodid = 0x10400000,
.prodid_mask = NV_PRODID_MASK,
.name_pattern = "nvidia_nvlink_c2c1_pmu_%u",
.name_fmt = NAME_FMT_SOCKET,
.template_ctx = {
.event_attr = mcf_pmu_event_attrs,
.format_attr = nvlink_c2c_pmu_format_attrs,
.filter_mask = NV_NVL_C2C_FILTER_ID_MASK,
.filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
.filter2_mask = 0x0,
.filter2_default_val = 0x0,
.get_filter = nv_cspmu_event_filter,
.get_filter2 = NULL,
.data = NULL,
.init_data = NULL
},
},
{
.prodid = 0x10500000,
.prodid_mask = NV_PRODID_MASK,
.name_pattern = "nvidia_nvlink_c2c0_pmu_%u",
.name_fmt = NAME_FMT_SOCKET,
.template_ctx = {
.event_attr = mcf_pmu_event_attrs,
.format_attr = nvlink_c2c_pmu_format_attrs,
.filter_mask = NV_NVL_C2C_FILTER_ID_MASK,
.filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
.filter2_mask = 0x0,
.filter2_default_val = 0x0,
.get_filter = nv_cspmu_event_filter,
.get_filter2 = NULL,
.data = NULL,
.init_data = NULL
},
},
{
.prodid = 0x10600000,
.prodid_mask = NV_PRODID_MASK,
.name_pattern = "nvidia_cnvlink_pmu_%u",
.name_fmt = NAME_FMT_SOCKET,
.template_ctx = {
.event_attr = mcf_pmu_event_attrs,
.format_attr = cnvlink_pmu_format_attrs,
.filter_mask = NV_CNVL_FILTER_ID_MASK,
.filter_default_val = NV_CNVL_FILTER_ID_MASK,
.filter2_mask = 0x0,
.filter2_default_val = 0x0,
.get_filter = nv_cspmu_event_filter,
.get_filter2 = NULL,
.data = NULL,
.init_data = NULL
},
},
{
.prodid = 0x2CF00000,
.prodid_mask = NV_PRODID_MASK,
.name_pattern = "nvidia_scf_pmu_%u",
.name_fmt = NAME_FMT_SOCKET,
.template_ctx = {
.event_attr = scf_pmu_event_attrs,
.format_attr = scf_pmu_format_attrs,
.filter_mask = 0x0,
.filter_default_val = 0x0,
.filter2_mask = 0x0,
.filter2_default_val = 0x0,
.get_filter = nv_cspmu_event_filter,
.get_filter2 = NULL,
.data = NULL,
.init_data = NULL
},
},
{
.prodid = 0x2CF20000,
.prodid_mask = NV_PRODID_MASK,
.name_pattern = "nvidia_ucf_pmu_%u",
.name_fmt = NAME_FMT_SOCKET,
.template_ctx = {
.event_attr = ucf_pmu_event_attrs,
.format_attr = ucf_pmu_format_attrs,
.filter_mask = NV_UCF_FILTER_ID_MASK,
.filter_default_val = NV_UCF_FILTER_DEFAULT,
.filter2_mask = 0x0,
.filter2_default_val = 0x0,
.get_filter = ucf_pmu_event_filter,
},
},
{
.prodid = 0x10301000,
.prodid_mask = NV_PRODID_MASK,
.name_pattern = "nvidia_pcie_pmu_%u_rc_%u",
.name_fmt = NAME_FMT_SOCKET_INST,
.template_ctx = {
.event_attr = pcie_v2_pmu_event_attrs,
.format_attr = pcie_v2_pmu_format_attrs,
.filter_mask = NV_PCIE_V2_FILTER_ID_MASK,
.filter_default_val = NV_PCIE_V2_FILTER_DEFAULT,
.filter2_mask = NV_PCIE_V2_FILTER2_ID_MASK,
.filter2_default_val = NV_PCIE_V2_FILTER2_DEFAULT,
.get_filter = pcie_v2_pmu_event_filter,
.get_filter2 = nv_cspmu_event_filter2,
},
.ops = {
.validate_event = pcie_v2_pmu_validate_event,
.reset_ev_filter = nv_cspmu_reset_ev_filter,
}
},
{
.prodid = 0x10700000,
.prodid_mask = NV_PRODID_MASK,
.name_pattern = "nvidia_pcie_tgt_pmu_%u_rc_%u",
.name_fmt = NAME_FMT_SOCKET_INST,
.template_ctx = {
.event_attr = pcie_tgt_pmu_event_attrs,
.format_attr = pcie_tgt_pmu_format_attrs,
.filter_mask = 0x0,
.filter_default_val = 0x0,
.filter2_mask = NV_PCIE_TGT_FILTER2_MASK,
.filter2_default_val = NV_PCIE_TGT_FILTER2_DEFAULT,
.init_data = pcie_tgt_init_data
},
.ops = {
.is_cycle_counter_event = pcie_tgt_pmu_is_cycle_counter_event,
.event_type = pcie_tgt_pmu_event_type,
.validate_event = pcie_tgt_pmu_validate_event,
.set_ev_filter = pcie_tgt_pmu_set_ev_filter,
.reset_ev_filter = pcie_tgt_pmu_reset_ev_filter,
}
},
{
.prodid = 0,
.prodid_mask = 0,
.name_pattern = "nvidia_uncore_pmu_%u",
.name_fmt = NAME_FMT_GENERIC,
.template_ctx = {
.event_attr = generic_pmu_event_attrs,
.format_attr = generic_pmu_format_attrs,
.filter_mask = NV_GENERIC_FILTER_ID_MASK,
.filter_default_val = NV_GENERIC_FILTER_ID_MASK,
.filter2_mask = NV_GENERIC_FILTER_ID_MASK,
.filter2_default_val = NV_GENERIC_FILTER_ID_MASK,
.get_filter = nv_cspmu_event_filter,
.get_filter2 = nv_cspmu_event_filter2,
.data = NULL,
.init_data = NULL
},
},
};
static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,
const struct nv_cspmu_match *match)
{
char *name = NULL;
struct device *dev = cspmu->dev;
static atomic_t pmu_generic_idx = {0};
switch (match->name_fmt) {
case NAME_FMT_SOCKET: {
const int cpu = cpumask_first(&cspmu->associated_cpus);
const int socket = cpu_to_node(cpu);
name = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,
socket);
break;
}
case NAME_FMT_SOCKET_INST: {
const int cpu = cpumask_first(&cspmu->associated_cpus);
const int socket = cpu_to_node(cpu);
u32 inst_id;
if (!nv_cspmu_get_inst_id(cspmu, &inst_id))
name = devm_kasprintf(dev, GFP_KERNEL,
match->name_pattern, socket, inst_id);
break;
}
case NAME_FMT_GENERIC:
name = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,
atomic_fetch_inc(&pmu_generic_idx));
break;
}
return name;
}
#define SET_OP(name, impl, match, default_op) \
do { \
if (match->ops.name) \
impl->name = match->ops.name; \
else if (default_op != NULL) \
impl->name = default_op; \
} while (false)
static int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
{
struct nv_cspmu_ctx *ctx;
struct device *dev = cspmu->dev;
struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
const struct nv_cspmu_match *match = nv_cspmu_match;
ctx = devm_kzalloc(dev, sizeof(struct nv_cspmu_ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
/* Find matching PMU. */
for (; match->prodid; match++) {
const u32 prodid_mask = match->prodid_mask;
if ((match->prodid & prodid_mask) ==
(cspmu->impl.pmiidr & prodid_mask))
break;
}
/* Initialize the context with the matched template. */
memcpy(ctx, &match->template_ctx, sizeof(struct nv_cspmu_ctx));
ctx->name = nv_cspmu_format_name(cspmu, match);
cspmu->impl.ctx = ctx;
/* NVIDIA specific callbacks. */
SET_OP(validate_event, impl_ops, match, NULL);
SET_OP(event_type, impl_ops, match, NULL);
SET_OP(is_cycle_counter_event, impl_ops, match, NULL);
SET_OP(set_cc_filter, impl_ops, match, nv_cspmu_set_cc_filter);
SET_OP(set_ev_filter, impl_ops, match, nv_cspmu_set_ev_filter);
SET_OP(reset_ev_filter, impl_ops, match, NULL);
SET_OP(get_event_attrs, impl_ops, match, nv_cspmu_get_event_attrs);
SET_OP(get_format_attrs, impl_ops, match, nv_cspmu_get_format_attrs);
SET_OP(get_name, impl_ops, match, nv_cspmu_get_name);
if (ctx->init_data)
return ctx->init_data(cspmu);
return 0;
}
/* Match all NVIDIA Coresight PMU devices */
static const struct arm_cspmu_impl_match nv_cspmu_param = {
.pmiidr_val = ARM_CSPMU_IMPL_ID_NVIDIA,
.module = THIS_MODULE,
.impl_init_ops = nv_cspmu_init_ops
};
static int __init nvidia_cspmu_init(void)
{
int ret;
ret = arm_cspmu_impl_register(&nv_cspmu_param);
if (ret)
pr_err("nvidia_cspmu backend registration error: %d\n", ret);
return ret;
}
static void __exit nvidia_cspmu_exit(void)
{
arm_cspmu_impl_unregister(&nv_cspmu_param);
}
module_init(nvidia_cspmu_init);
module_exit(nvidia_cspmu_exit);
MODULE_DESCRIPTION("NVIDIA Coresight Architecture Performance Monitor Driver");
MODULE_LICENSE("GPL v2");