amd-xgbe: add TX descriptor cleanup for link-down
Add intelligent TX descriptor cleanup mechanism to reclaim abandoned
descriptors when the physical link goes down.
When the link goes down while TX packets are in-flight, the hardware
stops processing descriptors with the OWN bit still set. The current
driver waits indefinitely for these descriptors to complete, which
never happens. This causes:
- TX ring exhaustion (no descriptors available for new packets)
- Memory leaks (skbs never freed)
- DMA mapping leaks (mappings never unmapped)
- Network stack backpressure buildup
Add force-cleanup mechanism in xgbe_tx_poll() that detects link-down
state and reclaims abandoned descriptors. The helper functions and DMA
optimizations support efficient TX shutdown:
- xgbe_wait_for_dma_tx_complete(): Wait for DMA completion with
link-down optimization
- Restructure xgbe_disable_tx() for proper shutdown sequence
Implementation:
1. Check link state at the start of tx_poll
2. If link is down, set force_cleanup flag
3. For descriptors that hardware hasn't completed (!tx_complete):
- If force_cleanup: treat as completed and reclaim resources
- If link up: break and wait for hardware (normal behavior)
The cleanup process:
- Frees skbs that will never be transmitted
- Unmaps DMA mappings
- Resets descriptors for reuse
- Does NOT count as successful transmission (correct statistics)
Benefits:
- Prevents TX ring starvation
- Eliminates memory and DMA mapping leaks
- Enables fast link recovery when link comes back up
- Critical for link aggregation failover scenarios
Signed-off-by: Raju Rangoju <Raju.Rangoju@amd.com>
Link: https://patch.msgid.link/20260319163251.1808611-4-Raju.Rangoju@amd.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
master
parent
0898849ad9
commit
b7fb367784
|
|
@ -330,6 +330,10 @@
|
|||
#define MAC_ISR_SMI_WIDTH 1
|
||||
#define MAC_ISR_TSIS_INDEX 12
|
||||
#define MAC_ISR_TSIS_WIDTH 1
|
||||
#define MAC_ISR_LS_INDEX 24
|
||||
#define MAC_ISR_LS_WIDTH 2
|
||||
#define MAC_ISR_LSI_INDEX 0
|
||||
#define MAC_ISR_LSI_WIDTH 1
|
||||
#define MAC_MACA1HR_AE_INDEX 31
|
||||
#define MAC_MACA1HR_AE_WIDTH 1
|
||||
#define MAC_MDIOIER_SNGLCOMPIE_INDEX 12
|
||||
|
|
|
|||
|
|
@ -3276,28 +3276,83 @@ static void xgbe_enable_tx(struct xgbe_prv_data *pdata)
|
|||
XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* xgbe_wait_for_dma_tx_complete - Wait for DMA to complete pending TX
|
||||
* @pdata: driver private data
|
||||
*
|
||||
* Wait for the DMA TX channels to complete all pending descriptors.
|
||||
* This ensures no frames are in-flight before we disable the transmitter.
|
||||
* If link is down, return immediately as TX will never complete.
|
||||
*
|
||||
* Return: 0 on success, -ETIMEDOUT on timeout
|
||||
*/
|
||||
static int xgbe_wait_for_dma_tx_complete(struct xgbe_prv_data *pdata)
|
||||
{
|
||||
struct xgbe_channel *channel;
|
||||
struct xgbe_ring *ring;
|
||||
unsigned long timeout;
|
||||
unsigned int i;
|
||||
bool complete;
|
||||
|
||||
/* If link is down, TX will never complete - skip waiting */
|
||||
if (!pdata->phy.link)
|
||||
return 0;
|
||||
|
||||
timeout = jiffies + (XGBE_DMA_STOP_TIMEOUT * HZ);
|
||||
|
||||
do {
|
||||
complete = true;
|
||||
|
||||
for (i = 0; i < pdata->channel_count; i++) {
|
||||
channel = pdata->channel[i];
|
||||
ring = channel->tx_ring;
|
||||
if (!ring)
|
||||
continue;
|
||||
|
||||
/* Check if DMA has processed all descriptors */
|
||||
if (ring->dirty != ring->cur) {
|
||||
complete = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (complete)
|
||||
return 0;
|
||||
|
||||
usleep_range(100, 200);
|
||||
} while (time_before(jiffies, timeout));
|
||||
|
||||
netif_warn(pdata, drv, pdata->netdev,
|
||||
"timeout waiting for DMA TX to complete\n");
|
||||
return -ETIMEDOUT;
|
||||
}
|
||||
|
||||
static void xgbe_disable_tx(struct xgbe_prv_data *pdata)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
/* Prepare for Tx DMA channel stop */
|
||||
for (i = 0; i < pdata->tx_q_count; i++)
|
||||
xgbe_prepare_tx_stop(pdata, i);
|
||||
/* Step 1: Wait for DMA to complete pending descriptors */
|
||||
xgbe_wait_for_dma_tx_complete(pdata);
|
||||
|
||||
/* Disable MAC Tx */
|
||||
XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0);
|
||||
|
||||
/* Disable each Tx queue */
|
||||
for (i = 0; i < pdata->tx_q_count; i++)
|
||||
XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TXQEN, 0);
|
||||
|
||||
/* Disable each Tx DMA channel */
|
||||
/* Step 2: Disable each Tx DMA channel to stop
|
||||
* processing new descriptors
|
||||
*/
|
||||
for (i = 0; i < pdata->channel_count; i++) {
|
||||
if (!pdata->channel[i]->tx_ring)
|
||||
break;
|
||||
|
||||
XGMAC_DMA_IOWRITE_BITS(pdata->channel[i], DMA_CH_TCR, ST, 0);
|
||||
}
|
||||
|
||||
/* Step 3: Wait for MTL TX queues to drain */
|
||||
for (i = 0; i < pdata->tx_q_count; i++)
|
||||
xgbe_prepare_tx_stop(pdata, i);
|
||||
|
||||
/* Step 4: Disable MTL TX queues */
|
||||
for (i = 0; i < pdata->tx_q_count; i++)
|
||||
XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TXQEN, 0);
|
||||
|
||||
/* Step 5: Disable MAC TX last */
|
||||
XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0);
|
||||
}
|
||||
|
||||
static void xgbe_prepare_rx_stop(struct xgbe_prv_data *pdata,
|
||||
|
|
|
|||
|
|
@ -2169,6 +2169,7 @@ static int xgbe_tx_poll(struct xgbe_channel *channel)
|
|||
struct net_device *netdev = pdata->netdev;
|
||||
struct netdev_queue *txq;
|
||||
int processed = 0;
|
||||
int force_cleanup;
|
||||
unsigned int tx_packets = 0, tx_bytes = 0;
|
||||
unsigned int cur;
|
||||
|
||||
|
|
@ -2185,13 +2186,41 @@ static int xgbe_tx_poll(struct xgbe_channel *channel)
|
|||
|
||||
txq = netdev_get_tx_queue(netdev, channel->queue_index);
|
||||
|
||||
/* Smart descriptor cleanup during link-down conditions.
|
||||
*
|
||||
* When link is down, hardware stops processing TX descriptors (OWN bit
|
||||
* remains set). Enable intelligent cleanup to reclaim these abandoned
|
||||
* descriptors and maintain TX queue health.
|
||||
*
|
||||
* This cleanup mechanism enables:
|
||||
* - Continuous TX queue availability for new packets when link recovers
|
||||
* - Clean resource management (skbs, DMA mappings, descriptors)
|
||||
* - Fast failover in link aggregation scenarios
|
||||
*/
|
||||
force_cleanup = !pdata->phy.link;
|
||||
|
||||
while ((processed < XGBE_TX_DESC_MAX_PROC) &&
|
||||
(ring->dirty != cur)) {
|
||||
rdata = XGBE_GET_DESC_DATA(ring, ring->dirty);
|
||||
rdesc = rdata->rdesc;
|
||||
|
||||
if (!hw_if->tx_complete(rdesc))
|
||||
break;
|
||||
if (!hw_if->tx_complete(rdesc)) {
|
||||
if (!force_cleanup)
|
||||
break;
|
||||
/* Link-down descriptor cleanup: reclaim abandoned
|
||||
* resources.
|
||||
*
|
||||
* Hardware has stopped processing this descriptor, so
|
||||
* perform intelligent cleanup to free skbs and reclaim
|
||||
* descriptors for future use when link recovers.
|
||||
*
|
||||
* These are not counted as successful transmissions
|
||||
* since packets never reached the wire.
|
||||
*/
|
||||
netif_dbg(pdata, tx_err, netdev,
|
||||
"force-freeing stuck TX desc %u (link down)\n",
|
||||
ring->dirty);
|
||||
}
|
||||
|
||||
/* Make sure descriptor fields are read after reading the OWN
|
||||
* bit */
|
||||
|
|
@ -2200,9 +2229,13 @@ static int xgbe_tx_poll(struct xgbe_channel *channel)
|
|||
if (netif_msg_tx_done(pdata))
|
||||
xgbe_dump_tx_desc(pdata, ring, ring->dirty, 1, 0);
|
||||
|
||||
if (hw_if->is_last_desc(rdesc)) {
|
||||
tx_packets += rdata->tx.packets;
|
||||
tx_bytes += rdata->tx.bytes;
|
||||
/* Only count packets actually transmitted (not force-cleaned)
|
||||
*/
|
||||
if (!force_cleanup || hw_if->is_last_desc(rdesc)) {
|
||||
if (hw_if->is_last_desc(rdesc)) {
|
||||
tx_packets += rdata->tx.packets;
|
||||
tx_bytes += rdata->tx.bytes;
|
||||
}
|
||||
}
|
||||
|
||||
/* Free the SKB and reset the descriptor for re-use */
|
||||
|
|
|
|||
Loading…
Reference in New Issue