net/smc: make wr buffer count configurable
Think SMC_WR_BUF_CNT_SEND := SMC_WR_BUF_CNT used in send context and SMC_WR_BUF_CNT_RECV := 3 * SMC_WR_BUF_CNT used in recv context. Those get replaced with lgr->max_send_wr and lgr->max_recv_wr respective. Please note that although with the default sysctl values qp_attr.cap.max_send_wr == qp_attr.cap.max_recv_wr is maintained but can not be assumed to be generally true any more. I see no downside to that, but my confidence level is rather modest. Signed-off-by: Halil Pasic <pasic@linux.ibm.com> Reviewed-by: Sidraya Jayagond <sidraya@linux.ibm.com> Reviewed-by: Dust Li <dust.li@linux.alibaba.com> Tested-by: Mahanta Jambigi <mjambigi@linux.ibm.com> Link: https://patch.msgid.link/20251027224856.2970019-2-pasic@linux.ibm.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>pull/1354/merge
parent
ea7d0d60eb
commit
aef3cdb47b
|
|
@ -71,3 +71,39 @@ smcr_max_conns_per_lgr - INTEGER
|
|||
acceptable value ranges from 16 to 255. Only for SMC-R v2.1 and later.
|
||||
|
||||
Default: 255
|
||||
|
||||
smcr_max_send_wr - INTEGER
|
||||
So-called work request buffers are SMCR link (and RDMA queue pair) level
|
||||
resources necessary for performing RDMA operations. Since up to 255
|
||||
connections can share a link group and thus also a link and the number
|
||||
of the work request buffers is decided when the link is allocated,
|
||||
depending on the workload it can be a bottleneck in a sense that threads
|
||||
have to wait for work request buffers to become available. Before the
|
||||
introduction of this control the maximal number of work request buffers
|
||||
available on the send path used to be hard coded to 16. With this control
|
||||
it becomes configurable. The acceptable range is between 2 and 2048.
|
||||
|
||||
Please be aware that all the buffers need to be allocated as a physically
|
||||
continuous array in which each element is a single buffer and has the size
|
||||
of SMC_WR_BUF_SIZE (48) bytes. If the allocation fails we give up much
|
||||
like before having this control.
|
||||
|
||||
Default: 16
|
||||
|
||||
smcr_max_recv_wr - INTEGER
|
||||
So-called work request buffers are SMCR link (and RDMA queue pair) level
|
||||
resources necessary for performing RDMA operations. Since up to 255
|
||||
connections can share a link group and thus also a link and the number
|
||||
of the work request buffers is decided when the link is allocated,
|
||||
depending on the workload it can be a bottleneck in a sense that threads
|
||||
have to wait for work request buffers to become available. Before the
|
||||
introduction of this control the maximal number of work request buffers
|
||||
available on the receive path used to be hard coded to 16. With this control
|
||||
it becomes configurable. The acceptable range is between 2 and 2048.
|
||||
|
||||
Please be aware that all the buffers need to be allocated as a physically
|
||||
continuous array in which each element is a single buffer and has the size
|
||||
of SMC_WR_BUF_SIZE (48) bytes. If the allocation fails we give up much
|
||||
like before having this control.
|
||||
|
||||
Default: 48
|
||||
|
|
|
|||
|
|
@ -24,5 +24,7 @@ struct netns_smc {
|
|||
int sysctl_rmem;
|
||||
int sysctl_max_links_per_lgr;
|
||||
int sysctl_max_conns_per_lgr;
|
||||
unsigned int sysctl_smcr_max_send_wr;
|
||||
unsigned int sysctl_smcr_max_recv_wr;
|
||||
};
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -34,6 +34,8 @@
|
|||
* distributions may modify it to a value between
|
||||
* 16-255 as needed.
|
||||
*/
|
||||
#define SMCR_MAX_SEND_WR_DEF 16 /* Default number of work requests per send queue */
|
||||
#define SMCR_MAX_RECV_WR_DEF 48 /* Default number of work requests per recv queue */
|
||||
|
||||
struct smc_lgr_list { /* list of link group definition */
|
||||
struct list_head list;
|
||||
|
|
@ -366,6 +368,10 @@ struct smc_link_group {
|
|||
/* max conn can be assigned to lgr */
|
||||
u8 max_links;
|
||||
/* max links can be added in lgr */
|
||||
u16 max_send_wr;
|
||||
/* number of WR buffers on send */
|
||||
u16 max_recv_wr;
|
||||
/* number of WR buffers on recv */
|
||||
};
|
||||
struct { /* SMC-D */
|
||||
struct smcd_gid peer_gid;
|
||||
|
|
|
|||
|
|
@ -669,11 +669,6 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
|
|||
.recv_cq = lnk->smcibdev->roce_cq_recv,
|
||||
.srq = NULL,
|
||||
.cap = {
|
||||
/* include unsolicited rdma_writes as well,
|
||||
* there are max. 2 RDMA_WRITE per 1 WR_SEND
|
||||
*/
|
||||
.max_send_wr = SMC_WR_BUF_CNT * 3,
|
||||
.max_recv_wr = SMC_WR_BUF_CNT * 3,
|
||||
.max_send_sge = SMC_IB_MAX_SEND_SGE,
|
||||
.max_recv_sge = lnk->wr_rx_sge_cnt,
|
||||
.max_inline_data = 0,
|
||||
|
|
@ -683,6 +678,11 @@ int smc_ib_create_queue_pair(struct smc_link *lnk)
|
|||
};
|
||||
int rc;
|
||||
|
||||
/* include unsolicited rdma_writes as well,
|
||||
* there are max. 2 RDMA_WRITE per 1 WR_SEND
|
||||
*/
|
||||
qp_attr.cap.max_send_wr = 3 * lnk->lgr->max_send_wr;
|
||||
qp_attr.cap.max_recv_wr = lnk->lgr->max_recv_wr;
|
||||
lnk->roce_qp = ib_create_qp(lnk->roce_pd, &qp_attr);
|
||||
rc = PTR_ERR_OR_ZERO(lnk->roce_qp);
|
||||
if (IS_ERR(lnk->roce_qp))
|
||||
|
|
|
|||
|
|
@ -2157,6 +2157,8 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
|
|||
init_waitqueue_head(&lgr->llc_msg_waiter);
|
||||
init_rwsem(&lgr->llc_conf_mutex);
|
||||
lgr->llc_testlink_time = READ_ONCE(net->smc.sysctl_smcr_testlink_time);
|
||||
lgr->max_send_wr = (u16)(READ_ONCE(net->smc.sysctl_smcr_max_send_wr));
|
||||
lgr->max_recv_wr = (u16)(READ_ONCE(net->smc.sysctl_smcr_max_recv_wr));
|
||||
}
|
||||
|
||||
/* called after lgr was removed from lgr_list */
|
||||
|
|
|
|||
|
|
@ -29,6 +29,8 @@ static int links_per_lgr_min = SMC_LINKS_ADD_LNK_MIN;
|
|||
static int links_per_lgr_max = SMC_LINKS_ADD_LNK_MAX;
|
||||
static int conns_per_lgr_min = SMC_CONN_PER_LGR_MIN;
|
||||
static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX;
|
||||
static unsigned int smcr_max_wr_min = 2;
|
||||
static unsigned int smcr_max_wr_max = 2048;
|
||||
|
||||
static struct ctl_table smc_table[] = {
|
||||
{
|
||||
|
|
@ -99,6 +101,24 @@ static struct ctl_table smc_table[] = {
|
|||
.extra1 = SYSCTL_ZERO,
|
||||
.extra2 = SYSCTL_ONE,
|
||||
},
|
||||
{
|
||||
.procname = "smcr_max_send_wr",
|
||||
.data = &init_net.smc.sysctl_smcr_max_send_wr,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &smcr_max_wr_min,
|
||||
.extra2 = &smcr_max_wr_max,
|
||||
},
|
||||
{
|
||||
.procname = "smcr_max_recv_wr",
|
||||
.data = &init_net.smc.sysctl_smcr_max_recv_wr,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &smcr_max_wr_min,
|
||||
.extra2 = &smcr_max_wr_max,
|
||||
},
|
||||
};
|
||||
|
||||
int __net_init smc_sysctl_net_init(struct net *net)
|
||||
|
|
@ -130,6 +150,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
|
|||
WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init);
|
||||
net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER;
|
||||
net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER;
|
||||
net->smc.sysctl_smcr_max_send_wr = SMCR_MAX_SEND_WR_DEF;
|
||||
net->smc.sysctl_smcr_max_recv_wr = SMCR_MAX_RECV_WR_DEF;
|
||||
/* disable handshake limitation by default */
|
||||
net->smc.limit_smc_hs = 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@ static inline int smc_sysctl_net_init(struct net *net)
|
|||
net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
|
||||
net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER;
|
||||
net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER;
|
||||
net->smc.sysctl_smcr_max_send_wr = SMCR_MAX_SEND_WR_DEF;
|
||||
net->smc.sysctl_smcr_max_recv_wr = SMCR_MAX_RECV_WR_DEF;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -547,9 +547,9 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk)
|
|||
IB_QP_DEST_QPN,
|
||||
&init_attr);
|
||||
|
||||
lnk->wr_tx_cnt = min_t(size_t, SMC_WR_BUF_CNT,
|
||||
lnk->wr_tx_cnt = min_t(size_t, lnk->lgr->max_send_wr,
|
||||
lnk->qp_attr.cap.max_send_wr);
|
||||
lnk->wr_rx_cnt = min_t(size_t, SMC_WR_BUF_CNT * 3,
|
||||
lnk->wr_rx_cnt = min_t(size_t, lnk->lgr->max_recv_wr,
|
||||
lnk->qp_attr.cap.max_recv_wr);
|
||||
}
|
||||
|
||||
|
|
@ -741,50 +741,51 @@ int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr)
|
|||
int smc_wr_alloc_link_mem(struct smc_link *link)
|
||||
{
|
||||
/* allocate link related memory */
|
||||
link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL);
|
||||
link->wr_tx_bufs = kcalloc(link->lgr->max_send_wr,
|
||||
SMC_WR_BUF_SIZE, GFP_KERNEL);
|
||||
if (!link->wr_tx_bufs)
|
||||
goto no_mem;
|
||||
link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, link->wr_rx_buflen,
|
||||
link->wr_rx_bufs = kcalloc(link->lgr->max_recv_wr, link->wr_rx_buflen,
|
||||
GFP_KERNEL);
|
||||
if (!link->wr_rx_bufs)
|
||||
goto no_mem_wr_tx_bufs;
|
||||
link->wr_tx_ibs = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_ibs[0]),
|
||||
GFP_KERNEL);
|
||||
link->wr_tx_ibs = kcalloc(link->lgr->max_send_wr,
|
||||
sizeof(link->wr_tx_ibs[0]), GFP_KERNEL);
|
||||
if (!link->wr_tx_ibs)
|
||||
goto no_mem_wr_rx_bufs;
|
||||
link->wr_rx_ibs = kcalloc(SMC_WR_BUF_CNT * 3,
|
||||
link->wr_rx_ibs = kcalloc(link->lgr->max_recv_wr,
|
||||
sizeof(link->wr_rx_ibs[0]),
|
||||
GFP_KERNEL);
|
||||
if (!link->wr_rx_ibs)
|
||||
goto no_mem_wr_tx_ibs;
|
||||
link->wr_tx_rdmas = kcalloc(SMC_WR_BUF_CNT,
|
||||
link->wr_tx_rdmas = kcalloc(link->lgr->max_send_wr,
|
||||
sizeof(link->wr_tx_rdmas[0]),
|
||||
GFP_KERNEL);
|
||||
if (!link->wr_tx_rdmas)
|
||||
goto no_mem_wr_rx_ibs;
|
||||
link->wr_tx_rdma_sges = kcalloc(SMC_WR_BUF_CNT,
|
||||
link->wr_tx_rdma_sges = kcalloc(link->lgr->max_send_wr,
|
||||
sizeof(link->wr_tx_rdma_sges[0]),
|
||||
GFP_KERNEL);
|
||||
if (!link->wr_tx_rdma_sges)
|
||||
goto no_mem_wr_tx_rdmas;
|
||||
link->wr_tx_sges = kcalloc(SMC_WR_BUF_CNT, sizeof(link->wr_tx_sges[0]),
|
||||
link->wr_tx_sges = kcalloc(link->lgr->max_send_wr, sizeof(link->wr_tx_sges[0]),
|
||||
GFP_KERNEL);
|
||||
if (!link->wr_tx_sges)
|
||||
goto no_mem_wr_tx_rdma_sges;
|
||||
link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3,
|
||||
link->wr_rx_sges = kcalloc(link->lgr->max_recv_wr,
|
||||
sizeof(link->wr_rx_sges[0]) * link->wr_rx_sge_cnt,
|
||||
GFP_KERNEL);
|
||||
if (!link->wr_rx_sges)
|
||||
goto no_mem_wr_tx_sges;
|
||||
link->wr_tx_mask = bitmap_zalloc(SMC_WR_BUF_CNT, GFP_KERNEL);
|
||||
link->wr_tx_mask = bitmap_zalloc(link->lgr->max_send_wr, GFP_KERNEL);
|
||||
if (!link->wr_tx_mask)
|
||||
goto no_mem_wr_rx_sges;
|
||||
link->wr_tx_pends = kcalloc(SMC_WR_BUF_CNT,
|
||||
link->wr_tx_pends = kcalloc(link->lgr->max_send_wr,
|
||||
sizeof(link->wr_tx_pends[0]),
|
||||
GFP_KERNEL);
|
||||
if (!link->wr_tx_pends)
|
||||
goto no_mem_wr_tx_mask;
|
||||
link->wr_tx_compl = kcalloc(SMC_WR_BUF_CNT,
|
||||
link->wr_tx_compl = kcalloc(link->lgr->max_send_wr,
|
||||
sizeof(link->wr_tx_compl[0]),
|
||||
GFP_KERNEL);
|
||||
if (!link->wr_tx_compl)
|
||||
|
|
@ -905,7 +906,7 @@ int smc_wr_create_link(struct smc_link *lnk)
|
|||
goto dma_unmap;
|
||||
}
|
||||
smc_wr_init_sge(lnk);
|
||||
bitmap_zero(lnk->wr_tx_mask, SMC_WR_BUF_CNT);
|
||||
bitmap_zero(lnk->wr_tx_mask, lnk->lgr->max_send_wr);
|
||||
init_waitqueue_head(&lnk->wr_tx_wait);
|
||||
rc = percpu_ref_init(&lnk->wr_tx_refs, smcr_wr_tx_refs_free, 0, GFP_KERNEL);
|
||||
if (rc)
|
||||
|
|
|
|||
|
|
@ -19,8 +19,6 @@
|
|||
#include "smc.h"
|
||||
#include "smc_core.h"
|
||||
|
||||
#define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */
|
||||
|
||||
#define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ)
|
||||
|
||||
#define SMC_WR_TX_SIZE 44 /* actual size of wr_send data (<=SMC_WR_BUF_SIZE) */
|
||||
|
|
|
|||
Loading…
Reference in New Issue