From ab2e3f79587150a6a1499417875b4aed7915fead Mon Sep 17 00:00:00 2001 From: Stephen Hurd Date: Sat, 16 Sep 2017 02:41:38 +0000 Subject: [PATCH] Revert r323516 (iflib rollup) This was really too big of a commit even if everything worked, but there are multiple new issues introduced in the one huge commit, so it's not worth keeping this until it's fixed. I'll work on splitting this up into logical chunks and introduce them one at a time over the next week or two. Approved by: sbruno (mentor) Sponsored by: Limelight Networks --- sys/dev/bnxt/if_bnxt.c | 3 +- sys/dev/e1000/e1000_80003es2lan.c | 87 ++- sys/dev/e1000/e1000_82571.c | 157 ++++- sys/dev/e1000/e1000_82575.c | 92 ++- sys/dev/e1000/e1000_hw.h | 3 + sys/dev/e1000/e1000_i210.c | 150 +++- sys/dev/e1000/e1000_i210.h | 2 + sys/dev/e1000/e1000_ich8lan.c | 20 +- sys/dev/e1000/e1000_mac.c | 253 ++----- sys/dev/e1000/e1000_mac.h | 7 +- sys/dev/e1000/e1000_osdep.h | 78 +-- sys/dev/e1000/em_txrx.c | 32 +- sys/dev/e1000/if_em.c | 186 ++--- sys/dev/e1000/if_em.h | 3 +- sys/kern/subr_gtaskqueue.c | 334 ++------- sys/net/iflib.c | 1069 +++++++++-------------------- sys/net/iflib.h | 12 +- sys/net/mp_ring.c | 22 +- sys/sys/gtaskqueue.h | 41 +- 19 files changed, 1017 insertions(+), 1534 deletions(-) diff --git a/sys/dev/bnxt/if_bnxt.c b/sys/dev/bnxt/if_bnxt.c index aaf18b04a120..94c65007ab0f 100644 --- a/sys/dev/bnxt/if_bnxt.c +++ b/sys/dev/bnxt/if_bnxt.c @@ -1640,8 +1640,7 @@ bnxt_msix_intr_assign(if_ctx_t ctx, int msix) } for (i=0; iscctx->isc_ntxqsets; i++) - /* TODO: Benchmark and see if tying to the RX irqs helps */ - iflib_softirq_alloc_generic(ctx, -1, IFLIB_INTR_TX, NULL, i, + iflib_softirq_alloc_generic(ctx, i + 1, IFLIB_INTR_TX, NULL, i, "tx_cp"); return rc; diff --git a/sys/dev/e1000/e1000_80003es2lan.c b/sys/dev/e1000/e1000_80003es2lan.c index 7377d8e9d867..e7c42d5386eb 100644 --- a/sys/dev/e1000/e1000_80003es2lan.c +++ b/sys/dev/e1000/e1000_80003es2lan.c @@ -59,6 +59,7 @@ static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw); static s32 e1000_init_hw_80003es2lan(struct e1000_hw *hw); static s32 e1000_setup_copper_link_80003es2lan(struct e1000_hw *hw); static void e1000_clear_hw_cntrs_80003es2lan(struct e1000_hw *hw); +static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask); static s32 e1000_cfg_kmrn_10_100_80003es2lan(struct e1000_hw *hw, u16 duplex); static s32 e1000_cfg_kmrn_1000_80003es2lan(struct e1000_hw *hw); static s32 e1000_cfg_on_link_up_80003es2lan(struct e1000_hw *hw); @@ -67,6 +68,7 @@ static s32 e1000_read_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, static s32 e1000_write_kmrn_reg_80003es2lan(struct e1000_hw *hw, u32 offset, u16 data); static void e1000_initialize_hw_bits_80003es2lan(struct e1000_hw *hw); +static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask); static s32 e1000_read_mac_addr_80003es2lan(struct e1000_hw *hw); static void e1000_power_down_phy_copper_80003es2lan(struct e1000_hw *hw); @@ -297,7 +299,7 @@ static s32 e1000_acquire_phy_80003es2lan(struct e1000_hw *hw) DEBUGFUNC("e1000_acquire_phy_80003es2lan"); mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM; - return e1000_acquire_swfw_sync(hw, mask); + return e1000_acquire_swfw_sync_80003es2lan(hw, mask); } /** @@ -313,7 +315,7 @@ static void e1000_release_phy_80003es2lan(struct e1000_hw *hw) DEBUGFUNC("e1000_release_phy_80003es2lan"); mask = hw->bus.func ? E1000_SWFW_PHY1_SM : E1000_SWFW_PHY0_SM; - e1000_release_swfw_sync(hw, mask); + e1000_release_swfw_sync_80003es2lan(hw, mask); } /** @@ -331,7 +333,7 @@ static s32 e1000_acquire_mac_csr_80003es2lan(struct e1000_hw *hw) mask = E1000_SWFW_CSR_SM; - return e1000_acquire_swfw_sync(hw, mask); + return e1000_acquire_swfw_sync_80003es2lan(hw, mask); } /** @@ -348,7 +350,7 @@ static void e1000_release_mac_csr_80003es2lan(struct e1000_hw *hw) mask = E1000_SWFW_CSR_SM; - e1000_release_swfw_sync(hw, mask); + e1000_release_swfw_sync_80003es2lan(hw, mask); } /** @@ -363,14 +365,14 @@ static s32 e1000_acquire_nvm_80003es2lan(struct e1000_hw *hw) DEBUGFUNC("e1000_acquire_nvm_80003es2lan"); - ret_val = e1000_acquire_swfw_sync(hw, E1000_SWFW_EEP_SM); + ret_val = e1000_acquire_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); if (ret_val) return ret_val; ret_val = e1000_acquire_nvm_generic(hw); if (ret_val) - e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM); + e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); return ret_val; } @@ -386,7 +388,78 @@ static void e1000_release_nvm_80003es2lan(struct e1000_hw *hw) DEBUGFUNC("e1000_release_nvm_80003es2lan"); e1000_release_nvm_generic(hw); - e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM); + e1000_release_swfw_sync_80003es2lan(hw, E1000_SWFW_EEP_SM); +} + +/** + * e1000_acquire_swfw_sync_80003es2lan - Acquire SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Acquire the SW/FW semaphore to access the PHY or NVM. The mask + * will also specify which port we're acquiring the lock for. + **/ +static s32 e1000_acquire_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + u32 swmask = mask; + u32 fwmask = mask << 16; + s32 i = 0; + s32 timeout = 50; + + DEBUGFUNC("e1000_acquire_swfw_sync_80003es2lan"); + + while (i < timeout) { + if (e1000_get_hw_semaphore_generic(hw)) + return -E1000_ERR_SWFW_SYNC; + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + if (!(swfw_sync & (fwmask | swmask))) + break; + + /* Firmware currently using resource (fwmask) + * or other software thread using resource (swmask) + */ + e1000_put_hw_semaphore_generic(hw); + msec_delay_irq(5); + i++; + } + + if (i == timeout) { + DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); + return -E1000_ERR_SWFW_SYNC; + } + + swfw_sync |= swmask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore_generic(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_release_swfw_sync_80003es2lan - Release SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Release the SW/FW semaphore used to access the PHY or NVM. The mask + * will also specify which port we're releasing the lock for. + **/ +static void e1000_release_swfw_sync_80003es2lan(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + + DEBUGFUNC("e1000_release_swfw_sync_80003es2lan"); + + while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS) + ; /* Empty */ + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + swfw_sync &= ~mask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore_generic(hw); } /** diff --git a/sys/dev/e1000/e1000_82571.c b/sys/dev/e1000/e1000_82571.c index 38f7e0f90955..5ff17f098a21 100644 --- a/sys/dev/e1000/e1000_82571.c +++ b/sys/dev/e1000/e1000_82571.c @@ -70,8 +70,11 @@ static s32 e1000_check_for_serdes_link_82571(struct e1000_hw *hw); static s32 e1000_setup_fiber_serdes_link_82571(struct e1000_hw *hw); static s32 e1000_valid_led_default_82571(struct e1000_hw *hw, u16 *data); static void e1000_clear_hw_cntrs_82571(struct e1000_hw *hw); +static s32 e1000_get_hw_semaphore_82571(struct e1000_hw *hw); static s32 e1000_fix_nvm_checksum_82571(struct e1000_hw *hw); static s32 e1000_get_phy_id_82571(struct e1000_hw *hw); +static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw); +static void e1000_put_hw_semaphore_82573(struct e1000_hw *hw); static s32 e1000_get_hw_semaphore_82574(struct e1000_hw *hw); static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw); static s32 e1000_set_d0_lplu_state_82574(struct e1000_hw *hw, @@ -122,8 +125,8 @@ static s32 e1000_init_phy_params_82571(struct e1000_hw *hw) phy->ops.get_cable_length = e1000_get_cable_length_igp_2; phy->ops.read_reg = e1000_read_phy_reg_igp; phy->ops.write_reg = e1000_write_phy_reg_igp; - phy->ops.acquire = e1000_get_hw_semaphore; - phy->ops.release = e1000_put_hw_semaphore; + phy->ops.acquire = e1000_get_hw_semaphore_82571; + phy->ops.release = e1000_put_hw_semaphore_82571; break; case e1000_82573: phy->type = e1000_phy_m88; @@ -135,11 +138,12 @@ static s32 e1000_init_phy_params_82571(struct e1000_hw *hw) phy->ops.get_cable_length = e1000_get_cable_length_m88; phy->ops.read_reg = e1000_read_phy_reg_m88; phy->ops.write_reg = e1000_write_phy_reg_m88; - phy->ops.acquire = e1000_get_hw_semaphore; - phy->ops.release = e1000_put_hw_semaphore; + phy->ops.acquire = e1000_get_hw_semaphore_82571; + phy->ops.release = e1000_put_hw_semaphore_82571; break; case e1000_82574: case e1000_82583: + E1000_MUTEX_INIT(&hw->dev_spec._82571.swflag_mutex); phy->type = e1000_phy_bm; phy->ops.get_cfg_done = e1000_get_cfg_done_generic; @@ -502,21 +506,99 @@ static s32 e1000_get_phy_id_82571(struct e1000_hw *hw) } /** - * e1000_get_hw_semaphore_82574 - Acquire hardware semaphore + * e1000_get_hw_semaphore_82571 - Acquire hardware semaphore + * @hw: pointer to the HW structure + * + * Acquire the HW semaphore to access the PHY or NVM + **/ +static s32 e1000_get_hw_semaphore_82571(struct e1000_hw *hw) +{ + u32 swsm; + s32 sw_timeout = hw->nvm.word_size + 1; + s32 fw_timeout = hw->nvm.word_size + 1; + s32 i = 0; + + DEBUGFUNC("e1000_get_hw_semaphore_82571"); + + /* If we have timedout 3 times on trying to acquire + * the inter-port SMBI semaphore, there is old code + * operating on the other port, and it is not + * releasing SMBI. Modify the number of times that + * we try for the semaphore to interwork with this + * older code. + */ + if (hw->dev_spec._82571.smb_counter > 2) + sw_timeout = 1; + + /* Get the SW semaphore */ + while (i < sw_timeout) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + if (!(swsm & E1000_SWSM_SMBI)) + break; + + usec_delay(50); + i++; + } + + if (i == sw_timeout) { + DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); + hw->dev_spec._82571.smb_counter++; + } + /* Get the FW semaphore. */ + for (i = 0; i < fw_timeout; i++) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); + + /* Semaphore acquired if bit latched */ + if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) + break; + + usec_delay(50); + } + + if (i == fw_timeout) { + /* Release semaphores */ + e1000_put_hw_semaphore_82571(hw); + DEBUGOUT("Driver can't access the NVM\n"); + return -E1000_ERR_NVM; + } + + return E1000_SUCCESS; +} + +/** + * e1000_put_hw_semaphore_82571 - Release hardware semaphore + * @hw: pointer to the HW structure + * + * Release hardware semaphore used to access the PHY or NVM + **/ +static void e1000_put_hw_semaphore_82571(struct e1000_hw *hw) +{ + u32 swsm; + + DEBUGFUNC("e1000_put_hw_semaphore_generic"); + + swsm = E1000_READ_REG(hw, E1000_SWSM); + + swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); + + E1000_WRITE_REG(hw, E1000_SWSM, swsm); +} + +/** + * e1000_get_hw_semaphore_82573 - Acquire hardware semaphore * @hw: pointer to the HW structure * * Acquire the HW semaphore during reset. * **/ -static s32 -e1000_get_hw_semaphore_82574(struct e1000_hw *hw) +static s32 e1000_get_hw_semaphore_82573(struct e1000_hw *hw) { u32 extcnf_ctrl; s32 i = 0; - /* XXX assert that mutex is held */ + DEBUGFUNC("e1000_get_hw_semaphore_82573"); - ASSERT_CTX_LOCK_HELD(hw); extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); do { extcnf_ctrl |= E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP; @@ -532,7 +614,7 @@ e1000_get_hw_semaphore_82574(struct e1000_hw *hw) if (i == MDIO_OWNERSHIP_TIMEOUT) { /* Release semaphores */ - e1000_put_hw_semaphore_82574(hw); + e1000_put_hw_semaphore_82573(hw); DEBUGOUT("Driver can't access the PHY\n"); return -E1000_ERR_PHY; } @@ -541,24 +623,58 @@ e1000_get_hw_semaphore_82574(struct e1000_hw *hw) } /** - * e1000_put_hw_semaphore_82574 - Release hardware semaphore + * e1000_put_hw_semaphore_82573 - Release hardware semaphore * @hw: pointer to the HW structure * * Release hardware semaphore used during reset. * **/ -static void -e1000_put_hw_semaphore_82574(struct e1000_hw *hw) +static void e1000_put_hw_semaphore_82573(struct e1000_hw *hw) { u32 extcnf_ctrl; - DEBUGFUNC("e1000_put_hw_semaphore_82574"); + DEBUGFUNC("e1000_put_hw_semaphore_82573"); extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); extcnf_ctrl &= ~E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP; E1000_WRITE_REG(hw, E1000_EXTCNF_CTRL, extcnf_ctrl); } +/** + * e1000_get_hw_semaphore_82574 - Acquire hardware semaphore + * @hw: pointer to the HW structure + * + * Acquire the HW semaphore to access the PHY or NVM. + * + **/ +static s32 e1000_get_hw_semaphore_82574(struct e1000_hw *hw) +{ + s32 ret_val; + + DEBUGFUNC("e1000_get_hw_semaphore_82574"); + + E1000_MUTEX_LOCK(&hw->dev_spec._82571.swflag_mutex); + ret_val = e1000_get_hw_semaphore_82573(hw); + if (ret_val) + E1000_MUTEX_UNLOCK(&hw->dev_spec._82571.swflag_mutex); + return ret_val; +} + +/** + * e1000_put_hw_semaphore_82574 - Release hardware semaphore + * @hw: pointer to the HW structure + * + * Release hardware semaphore used to access the PHY or NVM + * + **/ +static void e1000_put_hw_semaphore_82574(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_put_hw_semaphore_82574"); + + e1000_put_hw_semaphore_82573(hw); + E1000_MUTEX_UNLOCK(&hw->dev_spec._82571.swflag_mutex); +} + /** * e1000_set_d0_lplu_state_82574 - Set Low Power Linkup D0 state * @hw: pointer to the HW structure @@ -630,7 +746,7 @@ static s32 e1000_acquire_nvm_82571(struct e1000_hw *hw) DEBUGFUNC("e1000_acquire_nvm_82571"); - ret_val = e1000_get_hw_semaphore(hw); + ret_val = e1000_get_hw_semaphore_82571(hw); if (ret_val) return ret_val; @@ -643,7 +759,7 @@ static s32 e1000_acquire_nvm_82571(struct e1000_hw *hw) } if (ret_val) - e1000_put_hw_semaphore(hw); + e1000_put_hw_semaphore_82571(hw); return ret_val; } @@ -659,7 +775,7 @@ static void e1000_release_nvm_82571(struct e1000_hw *hw) DEBUGFUNC("e1000_release_nvm_82571"); e1000_release_nvm_generic(hw); - e1000_put_hw_semaphore(hw); + e1000_put_hw_semaphore_82571(hw); } /** @@ -976,6 +1092,8 @@ static s32 e1000_reset_hw_82571(struct e1000_hw *hw) */ switch (hw->mac.type) { case e1000_82573: + ret_val = e1000_get_hw_semaphore_82573(hw); + break; case e1000_82574: case e1000_82583: ret_val = e1000_get_hw_semaphore_82574(hw); @@ -992,6 +1110,10 @@ static s32 e1000_reset_hw_82571(struct e1000_hw *hw) /* Must release MDIO ownership and mutex after MAC reset. */ switch (hw->mac.type) { case e1000_82573: + /* Release mutex only if the hw semaphore is acquired */ + if (!ret_val) + e1000_put_hw_semaphore_82573(hw); + break; case e1000_82574: case e1000_82583: /* Release mutex only if the hw semaphore is acquired */ @@ -999,7 +1121,6 @@ static s32 e1000_reset_hw_82571(struct e1000_hw *hw) e1000_put_hw_semaphore_82574(hw); break; default: - panic("unknown mac type %x\n", hw->mac.type); break; } diff --git a/sys/dev/e1000/e1000_82575.c b/sys/dev/e1000/e1000_82575.c index 064731a7d551..5d68e8b9718c 100644 --- a/sys/dev/e1000/e1000_82575.c +++ b/sys/dev/e1000/e1000_82575.c @@ -79,9 +79,11 @@ static s32 e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data); static s32 e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, u16 data); static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw); +static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask); static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw, u16 *speed, u16 *duplex); static s32 e1000_get_phy_id_82575(struct e1000_hw *hw); +static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask); static bool e1000_sgmii_active_82575(struct e1000_hw *hw); static s32 e1000_reset_init_script_82575(struct e1000_hw *hw); static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw); @@ -509,8 +511,12 @@ static s32 e1000_init_mac_params_82575(struct e1000_hw *hw) /* link info */ mac->ops.get_link_up_info = e1000_get_link_up_info_82575; /* acquire SW_FW sync */ - mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync; - mac->ops.release_swfw_sync = e1000_release_swfw_sync; + mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_82575; + mac->ops.release_swfw_sync = e1000_release_swfw_sync_82575; + if (mac->type >= e1000_i210) { + mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_i210; + mac->ops.release_swfw_sync = e1000_release_swfw_sync_i210; + } /* set lan id for port to determine which phy lock to use */ hw->mac.ops.set_lan_id(hw); @@ -982,7 +988,7 @@ static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw) DEBUGFUNC("e1000_acquire_nvm_82575"); - ret_val = e1000_acquire_swfw_sync(hw, E1000_SWFW_EEP_SM); + ret_val = e1000_acquire_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); if (ret_val) goto out; @@ -1013,7 +1019,7 @@ static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw) ret_val = e1000_acquire_nvm_generic(hw); if (ret_val) - e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM); + e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); out: return ret_val; @@ -1032,7 +1038,83 @@ static void e1000_release_nvm_82575(struct e1000_hw *hw) e1000_release_nvm_generic(hw); - e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM); + e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); +} + +/** + * e1000_acquire_swfw_sync_82575 - Acquire SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Acquire the SW/FW semaphore to access the PHY or NVM. The mask + * will also specify which port we're acquiring the lock for. + **/ +static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + u32 swmask = mask; + u32 fwmask = mask << 16; + s32 ret_val = E1000_SUCCESS; + s32 i = 0, timeout = 200; + + DEBUGFUNC("e1000_acquire_swfw_sync_82575"); + + while (i < timeout) { + if (e1000_get_hw_semaphore_generic(hw)) { + ret_val = -E1000_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + if (!(swfw_sync & (fwmask | swmask))) + break; + + /* + * Firmware currently using resource (fwmask) + * or other software thread using resource (swmask) + */ + e1000_put_hw_semaphore_generic(hw); + msec_delay_irq(5); + i++; + } + + if (i == timeout) { + DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); + ret_val = -E1000_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync |= swmask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore_generic(hw); + +out: + return ret_val; +} + +/** + * e1000_release_swfw_sync_82575 - Release SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Release the SW/FW semaphore used to access the PHY or NVM. The mask + * will also specify which port we're releasing the lock for. + **/ +static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + + DEBUGFUNC("e1000_release_swfw_sync_82575"); + + while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS) + ; /* Empty */ + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + swfw_sync &= ~mask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore_generic(hw); } /** diff --git a/sys/dev/e1000/e1000_hw.h b/sys/dev/e1000/e1000_hw.h index c90066dc9435..e1464a7b655a 100644 --- a/sys/dev/e1000/e1000_hw.h +++ b/sys/dev/e1000/e1000_hw.h @@ -934,6 +934,7 @@ struct e1000_dev_spec_82543 { struct e1000_dev_spec_82571 { bool laa_is_present; u32 smb_counter; + E1000_MUTEX swflag_mutex; }; struct e1000_dev_spec_80003es2lan { @@ -957,6 +958,8 @@ enum e1000_ulp_state { struct e1000_dev_spec_ich8lan { bool kmrn_lock_loss_workaround_enabled; struct e1000_shadow_ram shadow_ram[E1000_SHADOW_RAM_WORDS]; + E1000_MUTEX nvm_mutex; + E1000_MUTEX swflag_mutex; bool nvm_k1_enabled; bool disable_k1_off; bool eee_disable; diff --git a/sys/dev/e1000/e1000_i210.c b/sys/dev/e1000/e1000_i210.c index f03fbac1f13d..cd8d7c7e1f56 100644 --- a/sys/dev/e1000/e1000_i210.c +++ b/sys/dev/e1000/e1000_i210.c @@ -37,6 +37,7 @@ static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw); static void e1000_release_nvm_i210(struct e1000_hw *hw); +static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw); static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); static s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw); @@ -57,7 +58,7 @@ static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw) DEBUGFUNC("e1000_acquire_nvm_i210"); - ret_val = e1000_acquire_swfw_sync(hw, E1000_SWFW_EEP_SM); + ret_val = e1000_acquire_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); return ret_val; } @@ -73,7 +74,152 @@ static void e1000_release_nvm_i210(struct e1000_hw *hw) { DEBUGFUNC("e1000_release_nvm_i210"); - e1000_release_swfw_sync(hw, E1000_SWFW_EEP_SM); + e1000_release_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); +} + +/** + * e1000_acquire_swfw_sync_i210 - Acquire SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Acquire the SW/FW semaphore to access the PHY or NVM. The mask + * will also specify which port we're acquiring the lock for. + **/ +s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + u32 swmask = mask; + u32 fwmask = mask << 16; + s32 ret_val = E1000_SUCCESS; + s32 i = 0, timeout = 200; /* FIXME: find real value to use here */ + + DEBUGFUNC("e1000_acquire_swfw_sync_i210"); + + while (i < timeout) { + if (e1000_get_hw_semaphore_i210(hw)) { + ret_val = -E1000_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + if (!(swfw_sync & (fwmask | swmask))) + break; + + /* + * Firmware currently using resource (fwmask) + * or other software thread using resource (swmask) + */ + e1000_put_hw_semaphore_generic(hw); + msec_delay_irq(5); + i++; + } + + if (i == timeout) { + DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); + ret_val = -E1000_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync |= swmask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore_generic(hw); + +out: + return ret_val; +} + +/** + * e1000_release_swfw_sync_i210 - Release SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Release the SW/FW semaphore used to access the PHY or NVM. The mask + * will also specify which port we're releasing the lock for. + **/ +void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + + DEBUGFUNC("e1000_release_swfw_sync_i210"); + + while (e1000_get_hw_semaphore_i210(hw) != E1000_SUCCESS) + ; /* Empty */ + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + swfw_sync &= ~mask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore_generic(hw); +} + +/** + * e1000_get_hw_semaphore_i210 - Acquire hardware semaphore + * @hw: pointer to the HW structure + * + * Acquire the HW semaphore to access the PHY or NVM + **/ +static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw) +{ + u32 swsm; + s32 timeout = hw->nvm.word_size + 1; + s32 i = 0; + + DEBUGFUNC("e1000_get_hw_semaphore_i210"); + + /* Get the SW semaphore */ + while (i < timeout) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + if (!(swsm & E1000_SWSM_SMBI)) + break; + + usec_delay(50); + i++; + } + + if (i == timeout) { + /* In rare circumstances, the SW semaphore may already be held + * unintentionally. Clear the semaphore once before giving up. + */ + if (hw->dev_spec._82575.clear_semaphore_once) { + hw->dev_spec._82575.clear_semaphore_once = FALSE; + e1000_put_hw_semaphore_generic(hw); + for (i = 0; i < timeout; i++) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + if (!(swsm & E1000_SWSM_SMBI)) + break; + + usec_delay(50); + } + } + + /* If we do not have the semaphore here, we have to give up. */ + if (i == timeout) { + DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); + return -E1000_ERR_NVM; + } + } + + /* Get the FW semaphore. */ + for (i = 0; i < timeout; i++) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); + + /* Semaphore acquired if bit latched */ + if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) + break; + + usec_delay(50); + } + + if (i == timeout) { + /* Release semaphores */ + e1000_put_hw_semaphore_generic(hw); + DEBUGOUT("Driver can't access the NVM\n"); + return -E1000_ERR_NVM; + } + + return E1000_SUCCESS; } /** diff --git a/sys/dev/e1000/e1000_i210.h b/sys/dev/e1000/e1000_i210.h index 960e2c5a730f..f940915b0619 100644 --- a/sys/dev/e1000/e1000_i210.h +++ b/sys/dev/e1000/e1000_i210.h @@ -43,6 +43,8 @@ s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); +s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask); +void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask); s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data); s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, diff --git a/sys/dev/e1000/e1000_ich8lan.c b/sys/dev/e1000/e1000_ich8lan.c index ebb5aad94494..9be9ac799d94 100644 --- a/sys/dev/e1000/e1000_ich8lan.c +++ b/sys/dev/e1000/e1000_ich8lan.c @@ -694,6 +694,9 @@ static s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw) dev_spec->shadow_ram[i].value = 0xFFFF; } + E1000_MUTEX_INIT(&dev_spec->nvm_mutex); + E1000_MUTEX_INIT(&dev_spec->swflag_mutex); + /* Function Pointers */ nvm->ops.acquire = e1000_acquire_nvm_ich8lan; nvm->ops.release = e1000_release_nvm_ich8lan; @@ -1844,7 +1847,7 @@ static s32 e1000_acquire_nvm_ich8lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_acquire_nvm_ich8lan"); - ASSERT_CTX_LOCK_HELD(hw); + E1000_MUTEX_LOCK(&hw->dev_spec.ich8lan.nvm_mutex); return E1000_SUCCESS; } @@ -1859,7 +1862,9 @@ static void e1000_release_nvm_ich8lan(struct e1000_hw *hw) { DEBUGFUNC("e1000_release_nvm_ich8lan"); - ASSERT_CTX_LOCK_HELD(hw); + E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.nvm_mutex); + + return; } /** @@ -1876,7 +1881,7 @@ static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw) DEBUGFUNC("e1000_acquire_swflag_ich8lan"); - ASSERT_CTX_LOCK_HELD(hw); + E1000_MUTEX_LOCK(&hw->dev_spec.ich8lan.swflag_mutex); while (timeout) { extcnf_ctrl = E1000_READ_REG(hw, E1000_EXTCNF_CTRL); @@ -1917,6 +1922,9 @@ static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw) } out: + if (ret_val) + E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex); + return ret_val; } @@ -1941,6 +1949,10 @@ static void e1000_release_swflag_ich8lan(struct e1000_hw *hw) } else { DEBUGOUT("Semaphore unexpectedly released by sw/fw/hw\n"); } + + E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex); + + return; } /** @@ -5010,6 +5022,8 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) E1000_WRITE_REG(hw, E1000_FEXTNVM3, reg); } + if (!ret_val) + E1000_MUTEX_UNLOCK(&hw->dev_spec.ich8lan.swflag_mutex); if (ctrl & E1000_CTRL_PHY_RST) { ret_val = hw->phy.ops.get_cfg_done(hw); diff --git a/sys/dev/e1000/e1000_mac.c b/sys/dev/e1000/e1000_mac.c index 2140ba985e2f..1c863073f082 100644 --- a/sys/dev/e1000/e1000_mac.c +++ b/sys/dev/e1000/e1000_mac.c @@ -1706,6 +1706,76 @@ s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw E1000_UNUSED return E1000_SUCCESS; } +/** + * e1000_get_hw_semaphore_generic - Acquire hardware semaphore + * @hw: pointer to the HW structure + * + * Acquire the HW semaphore to access the PHY or NVM + **/ +s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw) +{ + u32 swsm; + s32 timeout = hw->nvm.word_size + 1; + s32 i = 0; + + DEBUGFUNC("e1000_get_hw_semaphore_generic"); + + /* Get the SW semaphore */ + while (i < timeout) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + if (!(swsm & E1000_SWSM_SMBI)) + break; + + usec_delay(50); + i++; + } + + if (i == timeout) { + DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); + return -E1000_ERR_NVM; + } + + /* Get the FW semaphore. */ + for (i = 0; i < timeout; i++) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); + + /* Semaphore acquired if bit latched */ + if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) + break; + + usec_delay(50); + } + + if (i == timeout) { + /* Release semaphores */ + e1000_put_hw_semaphore_generic(hw); + DEBUGOUT("Driver can't access the NVM\n"); + return -E1000_ERR_NVM; + } + + return E1000_SUCCESS; +} + +/** + * e1000_put_hw_semaphore_generic - Release hardware semaphore + * @hw: pointer to the HW structure + * + * Release hardware semaphore used to access the PHY or NVM + **/ +void e1000_put_hw_semaphore_generic(struct e1000_hw *hw) +{ + u32 swsm; + + DEBUGFUNC("e1000_put_hw_semaphore_generic"); + + swsm = E1000_READ_REG(hw, E1000_SWSM); + + swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); + + E1000_WRITE_REG(hw, E1000_SWSM, swsm); +} + /** * e1000_get_auto_rd_done_generic - Check for auto read completion * @hw: pointer to the HW structure @@ -2181,186 +2251,3 @@ s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg, return E1000_SUCCESS; } - -/** - * e1000_get_hw_semaphore - Acquire hardware semaphore - * @hw: pointer to the HW structure - * - * Acquire the HW semaphore to access the PHY or NVM - **/ -s32 e1000_get_hw_semaphore(struct e1000_hw *hw) -{ - u32 swsm; - s32 timeout = hw->nvm.word_size + 1; - s32 i = 0; - - DEBUGFUNC("e1000_get_hw_semaphore"); -#ifdef notyet - /* _82571 */ - /* If we have timedout 3 times on trying to acquire - * the inter-port SMBI semaphore, there is old code - * operating on the other port, and it is not - * releasing SMBI. Modify the number of times that - * we try for the semaphore to interwork with this - * older code. - */ - if (hw->dev_spec._82571.smb_counter > 2) - sw_timeout = 1; - -#endif - /* Get the SW semaphore */ - while (i < timeout) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - if (!(swsm & E1000_SWSM_SMBI)) - break; - - usec_delay(50); - i++; - } - - if (i == timeout) { -#ifdef notyet - /* - * XXX This sounds more like a driver bug whereby we either - * recursed accidentally or missed clearing it previously - */ - /* In rare circumstances, the SW semaphore may already be held - * unintentionally. Clear the semaphore once before giving up. - */ - if (hw->dev_spec._82575.clear_semaphore_once) { - hw->dev_spec._82575.clear_semaphore_once = FALSE; - e1000_put_hw_semaphore_generic(hw); - for (i = 0; i < timeout; i++) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - if (!(swsm & E1000_SWSM_SMBI)) - break; - - usec_delay(50); - } - } -#endif - - DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); - return -E1000_ERR_NVM; - } - - /* Get the FW semaphore. */ - for (i = 0; i < timeout; i++) { - swsm = E1000_READ_REG(hw, E1000_SWSM); - E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); - - /* Semaphore acquired if bit latched */ - if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) - break; - - usec_delay(50); - } - - if (i == timeout) { - /* Release semaphores */ - e1000_put_hw_semaphore(hw); - DEBUGOUT("Driver can't access the NVM\n"); - return -E1000_ERR_NVM; - } - - return E1000_SUCCESS; -} - -/** - * e1000_put_hw_semaphore - Release hardware semaphore - * @hw: pointer to the HW structure - * - * Release hardware semaphore used to access the PHY or NVM - **/ -void e1000_put_hw_semaphore(struct e1000_hw *hw) -{ - u32 swsm; - - DEBUGFUNC("e1000_put_hw_semaphore"); - - swsm = E1000_READ_REG(hw, E1000_SWSM); - - swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); - - E1000_WRITE_REG(hw, E1000_SWSM, swsm); -} - - -/** - * e1000_acquire_swfw_sync - Acquire SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Acquire the SW/FW semaphore to access the PHY or NVM. The mask - * will also specify which port we're acquiring the lock for. - **/ -s32 -e1000_acquire_swfw_sync(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - u32 swmask = mask; - u32 fwmask = mask << 16; - s32 ret_val = E1000_SUCCESS; - s32 i = 0, timeout = 200; - - DEBUGFUNC("e1000_acquire_swfw_sync"); - ASSERT_NO_LOCKS(); - while (i < timeout) { - if (e1000_get_hw_semaphore(hw)) { - ret_val = -E1000_ERR_SWFW_SYNC; - goto out; - } - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - if (!(swfw_sync & (fwmask | swmask))) - break; - - /* - * Firmware currently using resource (fwmask) - * or other software thread using resource (swmask) - */ - e1000_put_hw_semaphore(hw); - msec_delay_irq(5); - i++; - } - - if (i == timeout) { - DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); - ret_val = -E1000_ERR_SWFW_SYNC; - goto out; - } - - swfw_sync |= swmask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore(hw); - -out: - return ret_val; -} - -/** - * e1000_release_swfw_sync - Release SW/FW semaphore - * @hw: pointer to the HW structure - * @mask: specifies which semaphore to acquire - * - * Release the SW/FW semaphore used to access the PHY or NVM. The mask - * will also specify which port we're releasing the lock for. - **/ -void -e1000_release_swfw_sync(struct e1000_hw *hw, u16 mask) -{ - u32 swfw_sync; - - DEBUGFUNC("e1000_release_swfw_sync"); - - while (e1000_get_hw_semaphore(hw) != E1000_SUCCESS) - ; /* Empty */ - - swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); - swfw_sync &= ~mask; - E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); - - e1000_put_hw_semaphore(hw); -} - diff --git a/sys/dev/e1000/e1000_mac.h b/sys/dev/e1000/e1000_mac.h index 2953bd52364f..ef9789bbb537 100644 --- a/sys/dev/e1000/e1000_mac.h +++ b/sys/dev/e1000/e1000_mac.h @@ -60,6 +60,7 @@ s32 e1000_get_bus_info_pci_generic(struct e1000_hw *hw); s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw); void e1000_set_lan_id_single_port(struct e1000_hw *hw); void e1000_set_lan_id_multi_port_pci(struct e1000_hw *hw); +s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw); s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed, u16 *duplex); s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw *hw, @@ -84,15 +85,11 @@ void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw); void e1000_clear_vfta_generic(struct e1000_hw *hw); void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count); void e1000_pcix_mmrbc_workaround_generic(struct e1000_hw *hw); +void e1000_put_hw_semaphore_generic(struct e1000_hw *hw); s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw); void e1000_reset_adaptive_generic(struct e1000_hw *hw); void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop); void e1000_update_adaptive_generic(struct e1000_hw *hw); void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value); -s32 e1000_get_hw_semaphore(struct e1000_hw *hw); -void e1000_put_hw_semaphore(struct e1000_hw *hw); -s32 e1000_acquire_swfw_sync(struct e1000_hw *hw, u16 mask); -void e1000_release_swfw_sync(struct e1000_hw *hw, u16 mask); - #endif diff --git a/sys/dev/e1000/e1000_osdep.h b/sys/dev/e1000/e1000_osdep.h index 840bbfcfcdce..c7c23e582ca9 100644 --- a/sys/dev/e1000/e1000_osdep.h +++ b/sys/dev/e1000/e1000_osdep.h @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -48,14 +47,6 @@ #include #include #include - -#include -#include -#include -#include - - - #include #include #include @@ -67,40 +58,11 @@ #define ASSERT(x) if(!(x)) panic("EM: x") -#define us_scale(x) max(1, (x/(1000000/hz))) -static inline int -ms_scale(int x) { - if (hz == 1000) { - return (x); - } else if (hz > 1000) { - return (x*(hz/1000)); - } else { - return (max(1, x/(1000/hz))); - } -} -static inline void -safe_pause_us(int x) { - if (cold) { - DELAY(x); - } else { - pause("e1000_delay", max(1, x/(1000000/hz))); - } -} - -static inline void -safe_pause_ms(int x) { - if (cold) { - DELAY(x*1000); - } else { - pause("e1000_delay", ms_scale(x)); - } -} - -#define usec_delay(x) safe_pause_us(x) +#define usec_delay(x) DELAY(x) #define usec_delay_irq(x) usec_delay(x) -#define msec_delay(x) safe_pause_ms(x) -#define msec_delay_irq(x) msec_delay(x) +#define msec_delay(x) DELAY(1000*(x)) +#define msec_delay_irq(x) DELAY(1000*(x)) /* Enable/disable debugging statements in shared code */ #define DBG 0 @@ -119,6 +81,16 @@ safe_pause_ms(int x) { #define CMD_MEM_WRT_INVALIDATE 0x0010 /* BIT_4 */ #define PCI_COMMAND_REGISTER PCIR_COMMAND +/* Mutex used in the shared code */ +#define E1000_MUTEX struct mtx +#define E1000_MUTEX_INIT(mutex) mtx_init((mutex), #mutex, \ + MTX_NETWORK_LOCK, \ + MTX_DEF | MTX_DUPOK) +#define E1000_MUTEX_DESTROY(mutex) mtx_destroy(mutex) +#define E1000_MUTEX_LOCK(mutex) mtx_lock(mutex) +#define E1000_MUTEX_TRYLOCK(mutex) mtx_trylock(mutex) +#define E1000_MUTEX_UNLOCK(mutex) mtx_unlock(mutex) + typedef uint64_t u64; typedef uint32_t u32; typedef uint16_t u16; @@ -144,12 +116,6 @@ typedef int8_t s8; #endif #endif /*__FreeBSD_version < 800000 */ -#ifdef INVARIANTS -#define ASSERT_CTX_LOCK_HELD(hw) (sx_assert(iflib_ctx_lock_get(((struct e1000_osdep *)hw->back)->ctx), SX_XLOCKED)) -#else -#define ASSERT_CTX_LOCK_HELD(hw) -#endif - #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) @@ -169,7 +135,6 @@ struct e1000_osdep bus_space_tag_t flash_bus_space_tag; bus_space_handle_t flash_bus_space_handle; device_t dev; - if_ctx_t ctx; }; #define E1000_REGISTER(hw, reg) (((hw)->mac.type >= e1000_82543) \ @@ -251,22 +216,5 @@ struct e1000_osdep bus_space_write_2(((struct e1000_osdep *)(hw)->back)->flash_bus_space_tag, \ ((struct e1000_osdep *)(hw)->back)->flash_bus_space_handle, reg, value) - -#if defined(INVARIANTS) -#include - -#define ASSERT_NO_LOCKS() \ - do { \ - int unknown_locks = curthread->td_locks - mtx_owned(&Giant); \ - if (unknown_locks > 0) { \ - WITNESS_WARN(WARN_GIANTOK|WARN_SLEEPOK|WARN_PANIC, NULL, "unexpected non-sleepable lock"); \ - } \ - MPASS(curthread->td_rw_rlocks == 0); \ - MPASS(curthread->td_lk_slocks == 0); \ - } while (0) -#else -#define ASSERT_NO_LOCKS() -#endif - #endif /* _FREEBSD_OS_H_ */ diff --git a/sys/dev/e1000/em_txrx.c b/sys/dev/e1000/em_txrx.c index 6e3ddedc65d0..22e983b370a0 100644 --- a/sys/dev/e1000/em_txrx.c +++ b/sys/dev/e1000/em_txrx.c @@ -66,7 +66,6 @@ static void em_receive_checksum(uint32_t status, if_rxd_info_t ri); static int em_determine_rsstype(u32 pkt_info); extern int em_intr(void *arg); - struct if_txrx em_txrx = { em_isc_txd_encap, em_isc_txd_flush, @@ -75,7 +74,7 @@ struct if_txrx em_txrx = { em_isc_rxd_pkt_get, em_isc_rxd_refill, em_isc_rxd_flush, - em_intr, + em_intr }; struct if_txrx lem_txrx = { @@ -86,7 +85,7 @@ struct if_txrx lem_txrx = { lem_isc_rxd_pkt_get, lem_isc_rxd_refill, em_isc_rxd_flush, - em_intr, + em_intr }; extern if_shared_ctx_t em_sctx; @@ -524,8 +523,8 @@ em_isc_rxd_refill(void *arg, if_rxd_update_t iru) for (i = 0, next_pidx = pidx; i < count; i++) { rxd = &rxr->rx_base[next_pidx]; rxd->read.buffer_addr = htole64(paddrs[i]); - /* Zero out rx desc status */ - rxd->wb.upper.status_error &= htole32(~0xFF); + /* DD bits must be cleared */ + rxd->wb.upper.status_error = 0; if (++next_pidx == scctx->isc_nrxd[0]) next_pidx = 0; @@ -552,9 +551,14 @@ lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) struct e1000_rx_desc *rxd; u32 staterr = 0; int cnt, i; - budget = min(budget, scctx->isc_nrxd[0]); - for (cnt = 0, i = idx; cnt <= budget;) { + if (budget == 1) { + rxd = (struct e1000_rx_desc *)&rxr->rx_base[idx]; + staterr = rxd->status; + return (staterr & E1000_RXD_STAT_DD); + } + + for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { rxd = (struct e1000_rx_desc *)&rxr->rx_base[i]; staterr = rxd->status; @@ -567,7 +571,6 @@ lem_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) if (staterr & E1000_RXD_STAT_EOP) cnt++; } - MPASS(cnt <= scctx->isc_nrxd[0]); return (cnt); } @@ -581,9 +584,14 @@ em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) union e1000_rx_desc_extended *rxd; u32 staterr = 0; int cnt, i; - budget = min(budget, scctx->isc_nrxd[0]); - for (cnt = 0, i = idx; cnt <= budget;) { + if (budget == 1) { + rxd = &rxr->rx_base[idx]; + staterr = le32toh(rxd->wb.upper.status_error); + return (staterr & E1000_RXD_STAT_DD); + } + + for (cnt = 0, i = idx; cnt < scctx->isc_nrxd[0] && cnt <= budget;) { rxd = &rxr->rx_base[i]; staterr = le32toh(rxd->wb.upper.status_error); @@ -598,7 +606,6 @@ em_isc_rxd_available(void *arg, uint16_t rxqid, qidx_t idx, qidx_t budget) cnt++; } - MPASS(cnt <= scctx->isc_nrxd[0]); return (cnt); } @@ -687,8 +694,7 @@ em_isc_rxd_pkt_get(void *arg, if_rxd_info_t ri) pkt_info = le32toh(rxd->wb.lower.mrq); /* Error Checking then decrement count */ - KASSERT(staterr & E1000_RXD_STAT_DD, - ("cidx=%d i=%d iri_len=%d", cidx, i, ri->iri_len)); + MPASS ((staterr & E1000_RXD_STAT_DD) != 0); len = le16toh(rxd->wb.upper.length); ri->iri_len += len; diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index 516c27d25be4..e29891cd6de1 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -483,7 +483,7 @@ static struct if_shared_ctx em_sctx_init = { .isc_vendor_info = em_vendor_info_array, .isc_driver_version = em_driver_version, .isc_driver = &em_if_driver, - .isc_flags = IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM, + .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP, .isc_nrxd_min = {EM_MIN_RXD}, .isc_ntxd_min = {EM_MIN_TXD}, @@ -511,7 +511,7 @@ static struct if_shared_ctx igb_sctx_init = { .isc_vendor_info = igb_vendor_info_array, .isc_driver_version = em_driver_version, .isc_driver = &em_if_driver, - .isc_flags = IFLIB_TSO_INIT_IP | IFLIB_NEED_ZERO_CSUM, + .isc_flags = IFLIB_NEED_SCRATCH | IFLIB_TSO_INIT_IP, .isc_nrxd_min = {EM_MIN_RXD}, .isc_ntxd_min = {EM_MIN_TXD}, @@ -723,7 +723,7 @@ em_if_attach_pre(if_ctx_t ctx) return (ENXIO); } - adapter->ctx = adapter->osdep.ctx = ctx; + adapter->ctx = ctx; adapter->dev = adapter->osdep.dev = dev; scctx = adapter->shared = iflib_get_softc_ctx(ctx); adapter->media = iflib_get_media(ctx); @@ -1405,9 +1405,7 @@ em_msix_link(void *arg) { struct adapter *adapter = arg; u32 reg_icr; - int is_igb; - is_igb = (adapter->hw.mac.type >= igb_mac_min); ++adapter->link_irq; MPASS(adapter->hw.back != NULL); reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); @@ -1415,29 +1413,26 @@ em_msix_link(void *arg) if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; - if (is_igb) { - if (reg_icr & E1000_ICR_LSC) - em_handle_link(adapter->ctx); - E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); - E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); + if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { + em_handle_link(adapter->ctx); } else { - if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { - em_handle_link(adapter->ctx); - } E1000_WRITE_REG(&adapter->hw, E1000_IMS, - EM_MSIX_LINK | E1000_IMS_LSC); - - /* - * Because we must read the ICR for this interrupt - * it may clear other causes using autoclear, for - * this reason we simply create a soft interrupt - * for all these vectors. - */ - if (reg_icr) { - E1000_WRITE_REG(&adapter->hw, - E1000_ICS, adapter->ims); - } + EM_MSIX_LINK | E1000_IMS_LSC); + if (adapter->hw.mac.type >= igb_mac_min) + E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); } + + /* + * Because we must read the ICR for this interrupt + * it may clear other causes using autoclear, for + * this reason we simply create a soft interrupt + * for all these vectors. + */ + if (reg_icr && adapter->hw.mac.type < igb_mac_min) { + E1000_WRITE_REG(&adapter->hw, + E1000_ICS, adapter->ims); + } + return (FILTER_HANDLED); } @@ -1675,6 +1670,13 @@ em_if_timer(if_ctx_t ctx, uint16_t qid) return; iflib_admin_intr_deferred(ctx); + /* Reset LAA into RAR[0] on 82571 */ + if ((adapter->hw.mac.type == e1000_82571) && + e1000_get_laa_state_82571(&adapter->hw)) + e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); + + if (adapter->hw.mac.type < em_mac_min) + lem_smartspeed(adapter); /* Mask to use in the irq trigger */ if (adapter->intr_type == IFLIB_INTR_MSIX) { @@ -1785,14 +1787,6 @@ em_if_update_admin_status(if_ctx_t ctx) } em_update_stats_counters(adapter); - /* Reset LAA into RAR[0] on 82571 */ - if ((adapter->hw.mac.type == e1000_82571) && - e1000_get_laa_state_82571(&adapter->hw)) - e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); - - if (adapter->hw.mac.type < em_mac_min) - lem_smartspeed(adapter); - E1000_WRITE_REG(&adapter->hw, E1000_IMS, EM_MSIX_LINK | E1000_IMS_LSC); } @@ -1908,87 +1902,6 @@ em_allocate_pci_resources(if_ctx_t ctx) return (0); } -static int -igb_intr_assign(if_ctx_t ctx, int msix) -{ - struct adapter *adapter = iflib_get_softc(ctx); - struct em_rx_queue *rx_que = adapter->rx_queues; - struct em_tx_queue *tx_que = adapter->tx_queues; - int error, rid, i, vector = 0, rx_vectors; - char buf[16]; - - /* First set up ring resources */ - for (i = 0; i < adapter->rx_num_queues; i++, rx_que++, vector++) { - rid = vector + 1; - snprintf(buf, sizeof(buf), "rxq%d", i); - error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, - em_msix_que, rx_que, rx_que->me, buf); - if (error) { - device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d\n", i, error); - adapter->rx_num_queues = i; - goto fail; - } - - rx_que->msix = vector; - - /* - * Set the bit to enable interrupt - * in E1000_IMS -- bits 20 and 21 - * are for RX0 and RX1, note this has - * NOTHING to do with the MSIX vector - */ - if (adapter->hw.mac.type == e1000_82574) { - rx_que->eims = 1 << (20 + i); - adapter->ims |= rx_que->eims; - adapter->ivars |= (8 | rx_que->msix) << (i * 4); - } else if (adapter->hw.mac.type == e1000_82575) - rx_que->eims = E1000_EICR_TX_QUEUE0 << vector; - else - rx_que->eims = 1 << vector; - } - rx_vectors = vector; - - vector = 0; - for (i = 0; i < adapter->tx_num_queues; i++, tx_que++, vector++) { - snprintf(buf, sizeof(buf), "txq%d", i); - tx_que = &adapter->tx_queues[i]; - tx_que->msix = adapter->rx_queues[i % adapter->rx_num_queues].msix; - rid = rman_get_start(adapter->rx_queues[i % adapter->rx_num_queues].que_irq.ii_res); - iflib_softirq_alloc_generic(ctx, rid, IFLIB_INTR_TX, tx_que, tx_que->me, buf); - - if (adapter->hw.mac.type == e1000_82574) { - tx_que->eims = 1 << (22 + i); - adapter->ims |= tx_que->eims; - adapter->ivars |= (8 | tx_que->msix) << (8 + (i * 4)); - } else if (adapter->hw.mac.type == e1000_82575) { - tx_que->eims = E1000_EICR_TX_QUEUE0 << (i % adapter->tx_num_queues); - } else { - tx_que->eims = 1 << (i % adapter->tx_num_queues); - } - } - - /* Link interrupt */ - rid = rx_vectors + 1; - error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, em_msix_link, adapter, 0, "aq"); - - if (error) { - device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); - goto fail; - } - adapter->linkvec = rx_vectors; - if (adapter->hw.mac.type < igb_mac_min) { - adapter->ivars |= (8 | rx_vectors) << 16; - adapter->ivars |= 0x80000000; - } - return (0); -fail: - iflib_irq_free(ctx, &adapter->irq); - rx_que = adapter->rx_queues; - for (int i = 0; i < adapter->rx_num_queues; i++, rx_que++) - iflib_irq_free(ctx, &rx_que->que_irq); - return (error); -} - /********************************************************************* * * Setup the MSIX Interrupt handlers @@ -2000,18 +1913,14 @@ em_if_msix_intr_assign(if_ctx_t ctx, int msix) struct adapter *adapter = iflib_get_softc(ctx); struct em_rx_queue *rx_que = adapter->rx_queues; struct em_tx_queue *tx_que = adapter->tx_queues; - int error, rid, i, vector = 0; + int error, rid, i, vector = 0, rx_vectors; char buf[16]; - if (adapter->hw.mac.type >= igb_mac_min) { - return igb_intr_assign(ctx, msix); - } - /* First set up ring resources */ for (i = 0; i < adapter->rx_num_queues; i++, rx_que++, vector++) { rid = vector + 1; snprintf(buf, sizeof(buf), "rxq%d", i); - error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RX, em_msix_que, rx_que, rx_que->me, buf); + error = iflib_irq_alloc_generic(ctx, &rx_que->que_irq, rid, IFLIB_INTR_RXTX, em_msix_que, rx_que, rx_que->me, buf); if (error) { device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); adapter->rx_num_queues = i + 1; @@ -2035,19 +1944,16 @@ em_if_msix_intr_assign(if_ctx_t ctx, int msix) else rx_que->eims = 1 << vector; } + rx_vectors = vector; + vector = 0; for (i = 0; i < adapter->tx_num_queues; i++, tx_que++, vector++) { rid = vector + 1; snprintf(buf, sizeof(buf), "txq%d", i); tx_que = &adapter->tx_queues[i]; + iflib_softirq_alloc_generic(ctx, rid, IFLIB_INTR_TX, tx_que, tx_que->me, buf); - error = iflib_irq_alloc_generic(ctx, &tx_que->que_irq, rid, IFLIB_INTR_TX, em_msix_que, tx_que, tx_que->me, buf); - if (error) { - device_printf(iflib_get_dev(ctx), "Failed to allocate que int %d err: %d", i, error); - adapter->tx_num_queues = i + 1; - goto fail; - } - tx_que->msix = vector; + tx_que->msix = (vector % adapter->tx_num_queues); /* * Set the bit to enable interrupt @@ -2060,24 +1966,23 @@ em_if_msix_intr_assign(if_ctx_t ctx, int msix) adapter->ims |= tx_que->eims; adapter->ivars |= (8 | tx_que->msix) << (8 + (i * 4)); } else if (adapter->hw.mac.type == e1000_82575) { - tx_que->eims = E1000_EICR_TX_QUEUE0 << vector; + tx_que->eims = E1000_EICR_TX_QUEUE0 << (i % adapter->tx_num_queues); } else { - tx_que->eims = 1 << vector; + tx_que->eims = 1 << (i % adapter->tx_num_queues); } } /* Link interrupt */ - rid = vector + 1; + rid = rx_vectors + 1; error = iflib_irq_alloc_generic(ctx, &adapter->irq, rid, IFLIB_INTR_ADMIN, em_msix_link, adapter, 0, "aq"); if (error) { device_printf(iflib_get_dev(ctx), "Failed to register admin handler"); goto fail; } - - adapter->linkvec = vector; + adapter->linkvec = rx_vectors; if (adapter->hw.mac.type < igb_mac_min) { - adapter->ivars |= (8 | vector) << 16; + adapter->ivars |= (8 | rx_vectors) << 16; adapter->ivars |= 0x80000000; } return (0); @@ -2234,24 +2139,15 @@ static void em_free_pci_resources(if_ctx_t ctx) { struct adapter *adapter = iflib_get_softc(ctx); - struct em_rx_queue *rxque = adapter->rx_queues; - struct em_tx_queue *txque = adapter->tx_queues; + struct em_rx_queue *que = adapter->rx_queues; device_t dev = iflib_get_dev(ctx); - int is_igb; - is_igb = (adapter->hw.mac.type >= igb_mac_min); /* Release all msix queue resources */ if (adapter->intr_type == IFLIB_INTR_MSIX) iflib_irq_free(ctx, &adapter->irq); - for (int i = 0; i < adapter->rx_num_queues; i++, rxque++) { - iflib_irq_free(ctx, &rxque->que_irq); - } - - if (!is_igb) { - for (int i = 0; i < adapter->tx_num_queues; i++, txque++) { - iflib_irq_free(ctx, &txque->que_irq); - } + for (int i = 0; i < adapter->rx_num_queues; i++, que++) { + iflib_irq_free(ctx, &que->que_irq); } /* First release all the interrupt resources */ diff --git a/sys/dev/e1000/if_em.h b/sys/dev/e1000/if_em.h index 19154c5a3c8d..6e19449ac8b4 100644 --- a/sys/dev/e1000/if_em.h +++ b/sys/dev/e1000/if_em.h @@ -434,7 +434,6 @@ struct em_tx_queue { u32 eims; /* This queue's EIMS bit */ u32 me; struct tx_ring txr; - struct if_irq que_irq; }; struct em_rx_queue { @@ -444,7 +443,7 @@ struct em_rx_queue { u32 eims; struct rx_ring rxr; u64 irqs; - struct if_irq que_irq; + struct if_irq que_irq; }; /* Our adapter structure */ diff --git a/sys/kern/subr_gtaskqueue.c b/sys/kern/subr_gtaskqueue.c index 716aef6b1053..6a39a3aac14a 100644 --- a/sys/kern/subr_gtaskqueue.c +++ b/sys/kern/subr_gtaskqueue.c @@ -48,26 +48,17 @@ __FBSDID("$FreeBSD$"); #include #include -static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues"); +static MALLOC_DEFINE(M_GTASKQUEUE, "taskqueue", "Task Queues"); static void gtaskqueue_thread_enqueue(void *); static void gtaskqueue_thread_loop(void *arg); -static int _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri); -TASKQGROUP_DEFINE(softirq, mp_ncpus, 1, false, PI_SOFT); + +TASKQGROUP_DEFINE(softirq, mp_ncpus, 1); struct gtaskqueue_busy { struct gtask *tb_running; TAILQ_ENTRY(gtaskqueue_busy) tb_link; }; -struct gt_intr_thread { - int git_flags; /* (j) IT_* flags. */ - int git_need; /* Needs service. */ -}; - -/* Interrupt thread flags kept in it_flags */ -#define IT_DEAD 0x000001 /* Thread is waiting to exit. */ -#define IT_WAIT 0x000002 /* Thread is waiting for completion. */ - static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1; struct gtaskqueue { @@ -78,7 +69,6 @@ struct gtaskqueue { TAILQ_HEAD(, gtaskqueue_busy) tq_active; struct mtx tq_mutex; struct thread **tq_threads; - struct gt_intr_thread *tq_gt_intrs; int tq_tcount; int tq_spin; int tq_flags; @@ -90,7 +80,6 @@ struct gtaskqueue { #define TQ_FLAGS_ACTIVE (1 << 0) #define TQ_FLAGS_BLOCKED (1 << 1) #define TQ_FLAGS_UNLOCKED_ENQUEUE (1 << 2) -#define TQ_FLAGS_INTR (1 << 3) #define DT_CALLOUT_ARMED (1 << 0) @@ -191,32 +180,6 @@ gtaskqueue_free(struct gtaskqueue *queue) free(queue, M_GTASKQUEUE); } -static void -schedule_ithread(struct gtaskqueue *queue) -{ - struct proc *p; - struct thread *td; - struct gt_intr_thread *git; - - MPASS(queue->tq_tcount == 1); - td = queue->tq_threads[0]; - git = &queue->tq_gt_intrs[0]; - p = td->td_proc; - - atomic_store_rel_int(&git->git_need, 1); - thread_lock(td); - if (TD_AWAITING_INTR(td)) { - CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid, - td->td_name); - TD_CLR_IWAIT(td); - sched_add(td, SRQ_INTR); - } else { - CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d", - __func__, p->p_pid, td->td_name, git->git_need, td->td_state); - } - thread_unlock(td); -} - int grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask) { @@ -234,13 +197,8 @@ grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask) STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link); gtask->ta_flags |= TASK_ENQUEUED; TQ_UNLOCK(queue); - if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0) { - if (queue->tq_flags & TQ_FLAGS_INTR) { - schedule_ithread(queue); - } else { - queue->tq_enqueue(queue->tq_context); - } - } + if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0) + queue->tq_enqueue(queue->tq_context); return (0); } @@ -445,7 +403,7 @@ gtaskqueue_drain_all(struct gtaskqueue *queue) static int _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, - cpuset_t *mask, bool intr, const char *name, va_list ap) + cpuset_t *mask, const char *name, va_list ap) { char ktname[MAXCOMLEN + 1]; struct thread *td; @@ -464,12 +422,6 @@ _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, printf("%s: no memory for %s threads\n", __func__, ktname); return (ENOMEM); } - tq->tq_gt_intrs = malloc(sizeof(struct gt_intr_thread) * count, M_GTASKQUEUE, - M_NOWAIT | M_ZERO); - if (tq->tq_gt_intrs == NULL) { - printf("%s: no memory for %s intr info\n", __func__, ktname); - return (ENOMEM); - } for (i = 0; i < count; i++) { if (count == 1) @@ -487,9 +439,6 @@ _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, } else tq->tq_tcount++; } - if (intr) - tq->tq_flags |= TQ_FLAGS_INTR; - for (i = 0; i < count; i++) { if (tq->tq_threads[i] == NULL) continue; @@ -509,14 +458,7 @@ _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, } thread_lock(td); sched_prio(td, pri); - if (intr) { - /* we need to schedule the thread from the interrupt handler for this to work */ - TD_SET_IWAIT(td); - sched_class(td, PRI_ITHD); - td->td_pflags |= TDP_ITHREAD; - } else { - sched_add(td, SRQ_BORING); - } + sched_add(td, SRQ_BORING); thread_unlock(td); } @@ -525,13 +467,13 @@ _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, static int gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri, - bool intr, const char *name, ...) + const char *name, ...) { va_list ap; int error; va_start(ap, name); - error = _gtaskqueue_start_threads(tqp, count, pri, NULL, intr, name, ap); + error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap); va_end(ap); return (error); } @@ -549,58 +491,16 @@ gtaskqueue_run_callback(struct gtaskqueue *tq, } static void -intr_thread_loop(struct gtaskqueue *tq) +gtaskqueue_thread_loop(void *arg) { - struct gt_intr_thread *git; - struct thread *td; - - git = &tq->tq_gt_intrs[0]; - td = tq->tq_threads[0]; - MPASS(tq->tq_tcount == 1); + struct gtaskqueue **tqp, *tq; + tqp = arg; + tq = *tqp; + gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT); + TQ_LOCK(tq); while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) { - THREAD_NO_SLEEPING(); - while (atomic_cmpset_acq_int(&git->git_need, 1, 0) != 0) { - gtaskqueue_run_locked(tq); - } - THREAD_SLEEPING_OK(); - - /* - * Because taskqueue_run() can drop tq_mutex, we need to - * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the - * meantime, which means we missed a wakeup. - */ - if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0) - break; - - TQ_UNLOCK(tq); - WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread"); - mtx_assert(&Giant, MA_NOTOWNED); - thread_lock(td); - if (atomic_load_acq_int(&git->git_need) == 0 && - (git->git_flags & (IT_DEAD | IT_WAIT)) == 0) { - TD_SET_IWAIT(td); - mi_switch(SW_VOL | SWT_IWAIT, NULL); - } -#if 0 - /* XXX is this something we want? */ - if (git->git_flags & IT_WAIT) { - wake = 1; - git->git_flags &= ~IT_WAIT; - } -#endif - thread_unlock(td); - TQ_LOCK(tq); - } - THREAD_NO_SLEEPING(); - gtaskqueue_run_locked(tq); - THREAD_SLEEPING_OK(); -} - -static void -timeshare_thread_loop(struct gtaskqueue *tq) -{ - while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) { + /* XXX ? */ gtaskqueue_run_locked(tq); /* * Because taskqueue_run() can drop tq_mutex, we need to @@ -612,23 +512,6 @@ timeshare_thread_loop(struct gtaskqueue *tq) TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0); } gtaskqueue_run_locked(tq); -} - -static void -gtaskqueue_thread_loop(void *arg) -{ - struct gtaskqueue **tqp, *tq; - - tqp = arg; - tq = *tqp; - gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT); - TQ_LOCK(tq); - if (curthread->td_pflags & TDP_ITHREAD) { - intr_thread_loop(tq); - } else { - timeshare_thread_loop(tq); - } - /* * This thread is on its way out, so just drop the lock temporarily * in order to call the shutdown callback. This allows the callback @@ -675,17 +558,11 @@ struct taskqgroup_cpu { struct taskqgroup { struct taskqgroup_cpu tqg_queue[MAXCPU]; struct mtx tqg_lock; - void (*adjust_func)(void*); char * tqg_name; int tqg_adjusting; int tqg_stride; int tqg_cnt; - int tqg_pri; - int tqg_flags; - bool tqg_intr; }; -#define TQG_NEED_ADJUST 0x1 -#define TQG_ADJUSTED 0x2 struct taskq_bind_task { struct gtask bt_task; @@ -693,16 +570,16 @@ struct taskq_bind_task { }; static void -taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu, bool intr, int pri) +taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu) { struct taskqgroup_cpu *qcpu; qcpu = &qgroup->tqg_queue[idx]; LIST_INIT(&qcpu->tgc_tasks); - qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK | M_ZERO, + qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK, taskqueue_thread_enqueue, &qcpu->tgc_taskq); - gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, pri, - intr, "%s_%d", qgroup->tqg_name, idx); + gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT, + "%s_%d", qgroup->tqg_name, idx); qcpu->tgc_cpu = cpu; } @@ -786,20 +663,12 @@ taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask, void *uniq, int irq, char *name) { cpuset_t mask; - int qid, error; + int qid; gtask->gt_uniq = uniq; gtask->gt_name = name; gtask->gt_irq = irq; gtask->gt_cpu = -1; - - mtx_lock(&qgroup->tqg_lock); - qgroup->tqg_flags |= TQG_NEED_ADJUST; - mtx_unlock(&qgroup->tqg_lock); - - if (tqg_smp_started && !(qgroup->tqg_flags & TQG_ADJUSTED)) - qgroup->adjust_func(NULL); - mtx_lock(&qgroup->tqg_lock); qid = taskqgroup_find(qgroup, uniq); qgroup->tqg_queue[qid].tgc_cnt++; @@ -810,9 +679,7 @@ taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask, CPU_ZERO(&mask); CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask); mtx_unlock(&qgroup->tqg_lock); - error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask); - if (error) - printf("taskqgroup_attach: setaffinity failed: %d\n", error); + intr_setaffinity(irq, CPU_WHICH_IRQ, &mask); } else mtx_unlock(&qgroup->tqg_lock); } @@ -821,7 +688,7 @@ static void taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask) { cpuset_t mask; - int qid, cpu, error; + int qid, cpu; mtx_lock(&qgroup->tqg_lock); qid = taskqgroup_find(qgroup, gtask->gt_uniq); @@ -831,10 +698,9 @@ taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask) CPU_ZERO(&mask); CPU_SET(cpu, &mask); - error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_INTRHANDLER, &mask); + intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask); + mtx_lock(&qgroup->tqg_lock); - if (error) - printf("taskqgroup_attach_deferred: setaffinity failed: %d\n", error); } qgroup->tqg_queue[qid].tgc_cnt++; @@ -845,79 +711,27 @@ taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask) mtx_unlock(&qgroup->tqg_lock); } -static int -taskqgroup_adjust_deferred(struct taskqgroup *qgroup, int cpu) -{ - int i, error = 0, cpu_max = -1; - - mtx_lock(&qgroup->tqg_lock); - for (i = 0; i < qgroup->tqg_cnt; i++) - if (qgroup->tqg_queue[i].tgc_cpu > cpu_max) - cpu_max = qgroup->tqg_queue[i].tgc_cpu; - if (cpu_max >= cpu) { - mtx_unlock(&qgroup->tqg_lock); - return (0); - } - MPASS(cpu <= mp_maxid); - error = _taskqgroup_adjust(qgroup, cpu + 1, qgroup->tqg_stride, - qgroup->tqg_intr, qgroup->tqg_pri); - if (error) { - printf("%s: _taskqgroup_adjust(%p, %d, %d, %d, %d) => %d\n\n", - __func__, qgroup, cpu + 1, qgroup->tqg_stride, qgroup->tqg_intr, - qgroup->tqg_pri, error); - goto out; - } - for (i = 0; i < qgroup->tqg_cnt; i++) - if (qgroup->tqg_queue[i].tgc_cpu > cpu_max) - cpu_max = qgroup->tqg_queue[i].tgc_cpu; - MPASS(cpu_max >= cpu); -out: - mtx_unlock(&qgroup->tqg_lock); - return (error); -} - int taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask, void *uniq, int cpu, int irq, char *name) { cpuset_t mask; - int i, error, qid; + int i, qid; qid = -1; gtask->gt_uniq = uniq; gtask->gt_name = name; gtask->gt_irq = irq; gtask->gt_cpu = cpu; - MPASS(cpu >= 0); - - mtx_lock(&qgroup->tqg_lock); - qgroup->tqg_flags |= TQG_NEED_ADJUST; - mtx_unlock(&qgroup->tqg_lock); - - if (tqg_smp_started && !(qgroup->tqg_flags & TQG_ADJUSTED)) { - uintptr_t cpuid = cpu + 1; - qgroup->adjust_func((void *)cpuid); - } - if ((error = taskqgroup_adjust_deferred(qgroup, cpu))) - return (error); - mtx_lock(&qgroup->tqg_lock); if (tqg_smp_started) { - for (i = 0; i < qgroup->tqg_cnt; i++) { + for (i = 0; i < qgroup->tqg_cnt; i++) if (qgroup->tqg_queue[i].tgc_cpu == cpu) { qid = i; break; } -#ifdef INVARIANTS - else - printf("qgroup->tqg_queue[%d].tgc_cpu=0x%x tgc_cnt=0x%x\n", - i, qgroup->tqg_queue[i].tgc_cpu, qgroup->tqg_queue[i].tgc_cnt); - -#endif - } if (qid == -1) { mtx_unlock(&qgroup->tqg_lock); - printf("%s: qid not found for cpu=%d\n", __func__, cpu); return (EINVAL); } } else @@ -930,11 +744,8 @@ taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask, CPU_ZERO(&mask); CPU_SET(cpu, &mask); - if (irq != -1 && tqg_smp_started) { - error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask); - if (error) - printf("taskqgroup_attach_cpu: setaffinity failed: %d\n", error); - } + if (irq != -1 && tqg_smp_started) + intr_setaffinity(irq, CPU_WHICH_IRQ, &mask); return (0); } @@ -942,18 +753,13 @@ static int taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask) { cpuset_t mask; - int i, qid, irq, cpu, error; + int i, qid, irq, cpu; qid = -1; irq = gtask->gt_irq; cpu = gtask->gt_cpu; MPASS(tqg_smp_started); - - if ((error = taskqgroup_adjust_deferred(qgroup, cpu))) - return (error); mtx_lock(&qgroup->tqg_lock); - /* adjust as needed */ - MPASS(cpu <= mp_maxid); for (i = 0; i < qgroup->tqg_cnt; i++) if (qgroup->tqg_queue[i].tgc_cpu == cpu) { qid = i; @@ -961,7 +767,6 @@ taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtas } if (qid == -1) { mtx_unlock(&qgroup->tqg_lock); - printf("%s: qid not found for cpu=%d\n", __func__, cpu); return (EINVAL); } qgroup->tqg_queue[qid].tgc_cnt++; @@ -973,11 +778,8 @@ taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtas CPU_ZERO(&mask); CPU_SET(cpu, &mask); - if (irq != -1) { - error = intr_setaffinity(irq, CPU_WHICH_INTRHANDLER, &mask); - if (error) - printf("taskqgroup_attach_cpu: setaffinity failed: %d\n", error); - } + if (irq != -1) + intr_setaffinity(irq, CPU_WHICH_IRQ, &mask); return (0); } @@ -1016,25 +818,8 @@ taskqgroup_binder(void *ctx) printf("taskqgroup_binder: setaffinity failed: %d\n", error); free(gtask, M_DEVBUF); - } -static void -taskqgroup_ithread_binder(void *ctx) -{ - struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx; - cpuset_t mask; - int error; - CPU_ZERO(&mask); - CPU_SET(gtask->bt_cpuid, &mask); - error = cpuset_setthread(curthread->td_tid, &mask); - - if (error) - printf("taskqgroup_binder: setaffinity failed: %d\n", - error); - free(gtask, M_DEVBUF); - -} static void taskqgroup_bind(struct taskqgroup *qgroup) { @@ -1050,10 +835,7 @@ taskqgroup_bind(struct taskqgroup *qgroup) for (i = 0; i < qgroup->tqg_cnt; i++) { gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK); - if (qgroup->tqg_intr) - GTASK_INIT(>ask->bt_task, 0, 0, taskqgroup_ithread_binder, gtask); - else - GTASK_INIT(>ask->bt_task, 0, 0, taskqgroup_binder, gtask); + GTASK_INIT(>ask->bt_task, 0, 0, taskqgroup_binder, gtask); gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu; grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq, >ask->bt_task); @@ -1061,7 +843,7 @@ taskqgroup_bind(struct taskqgroup *qgroup) } static int -_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri) +_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride) { LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL); struct grouptask *gtask; @@ -1076,22 +858,14 @@ _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, return (EINVAL); } if (qgroup->tqg_adjusting) { - printf("%s: failed: adjusting\n", __func__); + printf("taskqgroup_adjust failed: adjusting\n"); return (EBUSY); } - /* No work to be done */ - if (qgroup->tqg_cnt == cnt) - return (0); qgroup->tqg_adjusting = 1; old_cnt = qgroup->tqg_cnt; old_cpu = 0; - if (old_cnt < cnt) { - int old_max_idx = max(0, old_cnt-1); - old_cpu = qgroup->tqg_queue[old_max_idx].tgc_cpu; - if (old_cnt > 0) - for (k = 0; k < stride; k++) - old_cpu = CPU_NEXT(old_cpu); - } + if (old_cnt < cnt) + old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu; mtx_unlock(&qgroup->tqg_lock); /* * Set up queue for tasks added before boot. @@ -1107,7 +881,7 @@ _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, */ cpu = old_cpu; for (i = old_cnt; i < cnt; i++) { - taskqgroup_cpu_create(qgroup, i, cpu, ithread, pri); + taskqgroup_cpu_create(qgroup, i, cpu); for (k = 0; k < stride; k++) cpu = CPU_NEXT(cpu); @@ -1115,8 +889,6 @@ _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, mtx_lock(&qgroup->tqg_lock); qgroup->tqg_cnt = cnt; qgroup->tqg_stride = stride; - qgroup->tqg_intr = ithread; - qgroup->tqg_pri = pri; /* * Adjust drivers to use new taskqs. @@ -1162,34 +934,12 @@ _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, } int -taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri) +taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride) { int error; mtx_lock(&qgroup->tqg_lock); - error = _taskqgroup_adjust(qgroup, cnt, stride, ithread, pri); - mtx_unlock(&qgroup->tqg_lock); - - return (error); -} - -void -taskqgroup_set_adjust(struct taskqgroup *qgroup, void (*adjust_func)(void*)) -{ - qgroup-> adjust_func = adjust_func; -} - -int -taskqgroup_adjust_once(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri) -{ - int error = 0; - - mtx_lock(&qgroup->tqg_lock); - if ((qgroup->tqg_flags & (TQG_ADJUSTED|TQG_NEED_ADJUST)) == TQG_NEED_ADJUST) { - qgroup->tqg_flags |= TQG_ADJUSTED; - error = _taskqgroup_adjust(qgroup, cnt, stride, ithread, pri); - MPASS(error == 0); - } + error = _taskqgroup_adjust(qgroup, cnt, stride); mtx_unlock(&qgroup->tqg_lock); return (error); @@ -1204,9 +954,7 @@ taskqgroup_create(char *name) mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF); qgroup->tqg_name = name; LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks); - MPASS(qgroup->tqg_queue[0].tgc_cnt == 0); - MPASS(qgroup->tqg_queue[0].tgc_cpu == 0); - MPASS(qgroup->tqg_queue[0].tgc_taskq == 0); + return (qgroup); } diff --git a/sys/net/iflib.c b/sys/net/iflib.c index 4c077d5c0a03..41772d250f4a 100644 --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include #include + #include #include #include @@ -156,7 +157,7 @@ struct iflib_ctx { if_shared_ctx_t ifc_sctx; struct if_softc_ctx ifc_softc_ctx; - struct sx ifc_sx; + struct mtx ifc_mtx; uint16_t ifc_nhwtxqs; uint16_t ifc_nhwrxqs; @@ -184,8 +185,6 @@ struct iflib_ctx { uint16_t ifc_sysctl_ntxqs; uint16_t ifc_sysctl_nrxqs; uint16_t ifc_sysctl_qs_eq_override; - uint16_t ifc_cpuid_highest; - uint16_t ifc_sysctl_rx_budget; qidx_t ifc_sysctl_ntxds[8]; qidx_t ifc_sysctl_nrxds[8]; @@ -204,66 +203,8 @@ struct iflib_ctx { eventhandler_tag ifc_vlan_detach_event; uint8_t ifc_mac[ETHER_ADDR_LEN]; char ifc_mtx_name[16]; - LIST_ENTRY(iflib_ctx) ifc_next; }; -static LIST_HEAD(ctx_head, iflib_ctx) ctx_list; -static struct mtx ctx_list_lock; - -TASKQGROUP_DEFINE(if_io, mp_ncpus, 1, true, PI_NET); -TASKQGROUP_DEFINE(if_config, 1, 1, false, PI_SOFT); - -static void -iflib_ctx_apply(void (*fn)(if_ctx_t ctx, void *arg), void *arg) -{ - if_ctx_t ctx; - - mtx_lock(&ctx_list_lock); - LIST_FOREACH(ctx, &ctx_list, ifc_next) { - (fn)(ctx, arg); - } - mtx_unlock(&ctx_list_lock); -} - -static void -_iflib_cpuid_highest(if_ctx_t ctx, void *arg) { - int *cpuid = arg; - - if (*cpuid < ctx->ifc_cpuid_highest) - *cpuid = ctx->ifc_cpuid_highest; -} - -static int -iflib_cpuid_highest(void) -{ - int cpuid = 0; - - iflib_ctx_apply(_iflib_cpuid_highest, &cpuid); - return (cpuid); -} - -static void -iflib_ctx_insert(if_ctx_t ctx) -{ - mtx_lock(&ctx_list_lock); - LIST_INSERT_HEAD(&ctx_list, ctx, ifc_next); - mtx_unlock(&ctx_list_lock); -} - -static void -iflib_ctx_remove(if_ctx_t ctx) -{ - int max_cpuid_prev, max_cpuid_new; - - max_cpuid_prev = iflib_cpuid_highest(); - mtx_lock(&ctx_list_lock); - LIST_REMOVE(ctx, ifc_next); - mtx_unlock(&ctx_list_lock); - max_cpuid_new = max(1, iflib_cpuid_highest()); - if (max_cpuid_new < max_cpuid_prev) { - taskqgroup_adjust(qgroup_if_io, max_cpuid_new, 1, true, PI_NET); - } -} void * iflib_get_softc(if_ctx_t ctx) @@ -322,11 +263,9 @@ iflib_get_sctx(if_ctx_t ctx) #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF) #define RX_SW_DESC_MAP_CREATED (1 << 0) -#define RX_SW_DESC_INUSE (1 << 1) -#define RX_NETMAP_INUSE (1 << 2) - -#define TX_SW_DESC_MAP_CREATED (1 << 0) -#define TX_SW_DESC_MAPPED (1 << 1) +#define TX_SW_DESC_MAP_CREATED (1 << 1) +#define RX_SW_DESC_INUSE (1 << 3) +#define TX_SW_DESC_MAPPED (1 << 4) #define M_TOOBIG M_PROTO1 @@ -418,7 +357,6 @@ struct iflib_txq { uint8_t ift_qstatus; uint8_t ift_closed; uint8_t ift_update_freq; - uint8_t ift_stall_count; struct iflib_filter_info ift_filter_info; bus_dma_tag_t ift_desc_tag; bus_dma_tag_t ift_tso_desc_tag; @@ -510,11 +448,9 @@ struct iflib_rxq { struct grouptask ifr_task; struct iflib_filter_info ifr_filter_info; iflib_dma_info_t ifr_ifdi; - struct if_rxd_info ifr_ri; - struct if_rxd_update ifr_iru; + /* dynamically allocate if any drivers need a value substantially larger than this */ struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE); - #ifdef IFLIB_DIAGNOSTICS uint64_t ifr_cpu_exec_count[256]; #endif @@ -529,11 +465,11 @@ typedef struct if_rxsd { /* multiple of word size */ #ifdef __LP64__ -#define PKT_INFO_SIZE 7 +#define PKT_INFO_SIZE 6 #define RXD_INFO_SIZE 5 #define PKT_TYPE uint64_t #else -#define PKT_INFO_SIZE 12 +#define PKT_INFO_SIZE 11 #define RXD_INFO_SIZE 8 #define PKT_TYPE uint32_t #endif @@ -559,10 +495,9 @@ pkt_info_zero(if_pkt_info_t pi) pi_pad = (if_pkt_info_pad_t)pi; pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0; pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0; - pi_pad->pkt_val[6] = 0; #ifndef __LP64__ - pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; pi_pad->pkt_val[9] = 0; - pi_pad->pkt_val[10] = 0; pi_pad->pkt_val[11] = 0; + pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; + pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0; #endif } @@ -590,24 +525,14 @@ rxd_info_zero(if_rxd_info_t ri) #define MAX_SINGLE_PACKET_FRACTION 12 #define IF_BAD_DMA (bus_addr_t)-1 -static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0, - "iflib driver parameters"); - -static int iflib_timer_int; -SYSCTL_INT(_net_iflib, OID_AUTO, timer_int, CTLFLAG_RW, &iflib_timer_int, - 0, "interval at which to run per-queue timers (in ticks)"); - -static int force_busdma = 0; -SYSCTL_INT(_net_iflib, OID_AUTO, force_busdma, CTLFLAG_RDTUN, &force_busdma, - 1, "force busdma"); - #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) -#define CTX_LOCK_INIT(_sc, _name) sx_init(&(_sc)->ifc_sx, _name) +#define CTX_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_mtx, _name, "iflib ctx lock", MTX_DEF) + +#define CTX_LOCK(ctx) mtx_lock(&(ctx)->ifc_mtx) +#define CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_mtx) +#define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_mtx) -#define CTX_LOCK(ctx) sx_xlock(&(ctx)->ifc_sx) -#define CTX_UNLOCK(ctx) sx_xunlock(&(ctx)->ifc_sx) -#define CTX_LOCK_DESTROY(ctx) sx_destroy(&(ctx)->ifc_sx) #define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx) #define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx) @@ -628,6 +553,9 @@ MODULE_VERSION(iflib, 1); MODULE_DEPEND(iflib, pci, 1, 1, 1); MODULE_DEPEND(iflib, ether, 1, 1, 1); +TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1); +TASKQGROUP_DEFINE(if_config_tqg, 1, 1); + #ifndef IFLIB_DEBUG_COUNTERS #ifdef INVARIANTS #define IFLIB_DEBUG_COUNTERS 1 @@ -636,6 +564,9 @@ MODULE_DEPEND(iflib, ether, 1, 1, 1); #endif /* !INVARIANTS */ #endif +static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0, + "iflib driver parameters"); + /* * XXX need to ensure that this can't accidentally cause the head to be moved backwards */ @@ -758,14 +689,7 @@ iflib_debug_reset(void) static void iflib_debug_reset(void) {} #endif -typedef void async_gtask_fn_t(if_ctx_t ctx, void *arg); -struct async_task_arg { - async_gtask_fn_t *ata_fn; - if_ctx_t ata_ctx; - void *ata_arg; - struct grouptask *ata_gtask; -}; #define IFLIB_DEBUG 0 @@ -787,12 +711,6 @@ static void iflib_ifmp_purge(iflib_txq_t txq); static void _iflib_pre_assert(if_softc_ctx_t scctx); static void iflib_stop(if_ctx_t ctx); static void iflib_if_init_locked(if_ctx_t ctx); -static int async_if_ioctl(if_ctx_t ctx, u_long command, caddr_t data); -static int iflib_config_async_gtask_dispatch(if_ctx_t ctx, async_gtask_fn_t *fn, char *name, void *arg); -static void iflib_admin_reset_deferred(if_ctx_t ctx); - - - #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m); #endif @@ -866,94 +784,6 @@ iflib_netmap_register(struct netmap_adapter *na, int onoff) return (status); } -static void -iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid) -{ - iflib_fl_t fl; - - fl = &rxq->ifr_fl[flid]; - iru->iru_paddrs = fl->ifl_bus_addrs; - iru->iru_vaddrs = &fl->ifl_vm_addrs[0]; - iru->iru_idxs = fl->ifl_rxd_idxs; - iru->iru_qsidx = rxq->ifr_id; - iru->iru_buf_size = fl->ifl_buf_size; - iru->iru_flidx = fl->ifl_id; -} - -static int -netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init) -{ - struct netmap_adapter *na = kring->na; - u_int const lim = kring->nkr_num_slots - 1; - u_int head = kring->rhead; - struct netmap_ring *ring = kring->ring; - bus_dmamap_t *map; - if_rxd_update_t iru; - if_ctx_t ctx = rxq->ifr_ctx; - iflib_fl_t fl = &rxq->ifr_fl[0]; - uint32_t refill_pidx, nic_i; - - iru = &rxq->ifr_iru; - iru_init(iru, rxq, 0 /* flid */); - map = fl->ifl_sds.ifsd_map; - refill_pidx = netmap_idx_k2n(kring, nm_i); - if (init && (nm_i == head)) - head = nm_prev(head, lim); - for (int tmp_pidx = 0; nm_i != head; tmp_pidx++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[tmp_pidx]); - uint32_t nic_i_dma = refill_pidx; - nic_i = netmap_idx_k2n(kring, nm_i); - - MPASS(tmp_pidx < IFLIB_MAX_RX_REFRESH); - - if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ - return netmap_ring_reinit(kring); - - fl->ifl_vm_addrs[tmp_pidx] = addr; - if (__predict_false(init) && map) { - netmap_load_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr); - } else if (map && (slot->flags & NS_BUF_CHANGED)) { - /* buffer has changed, reload map */ - netmap_reload_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr); - } - slot->flags &= ~NS_BUF_CHANGED; - - nm_i = nm_next(nm_i, lim); - fl->ifl_rxd_idxs[tmp_pidx] = nic_i = nm_next(nic_i, lim); - if (nm_i != head && tmp_pidx < IFLIB_MAX_RX_REFRESH-1) - continue; - - iru->iru_pidx = refill_pidx; - iru->iru_count = tmp_pidx+1; - ctx->isc_rxd_refill(ctx->ifc_softc, iru); - - tmp_pidx = 0; - refill_pidx = nic_i; - if (map == NULL) - continue; - - for (int n = 0; n < iru->iru_count; n++) { - bus_dmamap_sync(fl->ifl_ifdi->idi_tag, map[nic_i_dma], - BUS_DMASYNC_PREREAD); - /* XXX - change this to not use the netmap func*/ - nic_i_dma = nm_next(nic_i_dma, lim); - } - } - kring->nr_hwcur = head; - - if (map) - bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, - BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); - /* - * IMPORTANT: we must leave one free slot in the ring, - * so move nic_i back by one unit - */ - nic_i = nm_prev(nic_i, lim); - ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i); - return (0); -} - /* * Reconcile kernel and user view of the transmit ring. * @@ -1111,20 +941,18 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags) struct netmap_adapter *na = kring->na; struct netmap_ring *ring = kring->ring; uint32_t nm_i; /* index into the netmap ring */ - uint32_t nic_i; /* index into the NIC ring */ + uint32_t nic_i, nic_i_start; /* index into the NIC ring */ u_int i, n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; - struct if_rxd_info *ri; - struct if_rxd_update *iru; + struct if_rxd_info ri; + struct if_rxd_update iru; struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id]; iflib_fl_t fl = rxq->ifr_fl; - ri = &rxq->ifr_ri; - iru = &rxq->ifr_iru; if (head > lim) return netmap_ring_reinit(kring); @@ -1160,14 +988,14 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags) nm_i = netmap_idx_n2k(kring, nic_i); avail = iflib_rxd_avail(ctx, rxq, nic_i, USHRT_MAX); for (n = 0; avail > 0; n++, avail--) { - rxd_info_zero(ri); - ri->iri_frags = rxq->ifr_frags; - ri->iri_qsidx = kring->ring_id; - ri->iri_ifp = ctx->ifc_ifp; - ri->iri_cidx = nic_i; + rxd_info_zero(&ri); + ri.iri_frags = rxq->ifr_frags; + ri.iri_qsidx = kring->ring_id; + ri.iri_ifp = ctx->ifc_ifp; + ri.iri_cidx = nic_i; - error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, ri); - ring->slot[nm_i].len = error ? 0 : ri->iri_len - crclen; + error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); + ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen; ring->slot[nm_i].flags = slot_flags; if (fl->ifl_sds.ifsd_map) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, @@ -1200,7 +1028,63 @@ iflib_netmap_rxsync(struct netmap_kring *kring, int flags) if (nm_i == head) return (0); - return (netmap_fl_refill(rxq, kring, nm_i, false)); + iru.iru_paddrs = fl->ifl_bus_addrs; + iru.iru_vaddrs = &fl->ifl_vm_addrs[0]; + iru.iru_idxs = fl->ifl_rxd_idxs; + iru.iru_qsidx = rxq->ifr_id; + iru.iru_buf_size = fl->ifl_buf_size; + iru.iru_flidx = fl->ifl_id; + nic_i_start = nic_i = netmap_idx_k2n(kring, nm_i); + for (i = 0; nm_i != head; i++) { + struct netmap_slot *slot = &ring->slot[nm_i]; + void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[i]); + + if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ + goto ring_reset; + + fl->ifl_vm_addrs[i] = addr; + if (fl->ifl_sds.ifsd_map && (slot->flags & NS_BUF_CHANGED)) { + /* buffer has changed, reload map */ + netmap_reload_map(na, fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i], addr); + } + slot->flags &= ~NS_BUF_CHANGED; + + nm_i = nm_next(nm_i, lim); + fl->ifl_rxd_idxs[i] = nic_i = nm_next(nic_i, lim); + if (nm_i != head && i < IFLIB_MAX_RX_REFRESH) + continue; + + iru.iru_pidx = nic_i_start; + iru.iru_count = i; + i = 0; + ctx->isc_rxd_refill(ctx->ifc_softc, &iru); + if (fl->ifl_sds.ifsd_map == NULL) { + nic_i_start = nic_i; + continue; + } + nic_i = nic_i_start; + for (n = 0; n < iru.iru_count; n++) { + bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i], + BUS_DMASYNC_PREREAD); + nic_i = nm_next(nic_i, lim); + } + nic_i_start = nic_i; + } + kring->nr_hwcur = head; + + if (fl->ifl_sds.ifsd_map) + bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + /* + * IMPORTANT: we must leave one free slot in the ring, + * so move nic_i back by one unit + */ + nic_i = nm_prev(nic_i, lim); + ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i); + return 0; + +ring_reset: + return netmap_ring_reinit(kring); } static void @@ -1209,12 +1093,13 @@ iflib_netmap_intr(struct netmap_adapter *na, int onoff) struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; - /* XXX - do we need synchronization here?*/ + CTX_LOCK(ctx); if (onoff) { IFDI_INTR_ENABLE(ctx); } else { IFDI_INTR_DISABLE(ctx); } + CTX_UNLOCK(ctx); } @@ -1271,15 +1156,55 @@ static void iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); - struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id]; struct netmap_slot *slot; - uint32_t nm_i; + struct if_rxd_update iru; + iflib_fl_t fl; + bus_dmamap_t *map; + int nrxd; + uint32_t i, j, pidx_start; slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0); if (slot == NULL) return; - nm_i = netmap_idx_n2k(kring, 0); - netmap_fl_refill(rxq, kring, nm_i, true); + fl = &rxq->ifr_fl[0]; + map = fl->ifl_sds.ifsd_map; + nrxd = ctx->ifc_softc_ctx.isc_nrxd[0]; + iru.iru_paddrs = fl->ifl_bus_addrs; + iru.iru_vaddrs = &fl->ifl_vm_addrs[0]; + iru.iru_idxs = fl->ifl_rxd_idxs; + iru.iru_qsidx = rxq->ifr_id; + iru.iru_buf_size = rxq->ifr_fl[0].ifl_buf_size; + iru.iru_flidx = 0; + + for (pidx_start = i = j = 0; i < nrxd; i++, j++) { + int sj = netmap_idx_n2k(&na->rx_rings[rxq->ifr_id], i); + void *addr; + + fl->ifl_rxd_idxs[j] = i; + addr = fl->ifl_vm_addrs[j] = PNMB(na, slot + sj, &fl->ifl_bus_addrs[j]); + if (map) { + netmap_load_map(na, rxq->ifr_fl[0].ifl_ifdi->idi_tag, *map, addr); + map++; + } + + if (j < IFLIB_MAX_RX_REFRESH && i < nrxd - 1) + continue; + + iru.iru_pidx = pidx_start; + pidx_start = i; + iru.iru_count = j; + j = 0; + MPASS(pidx_start + j <= nrxd); + /* Update descriptors and the cached value */ + ctx->isc_rxd_refill(ctx->ifc_softc, &iru); + } + /* preserve queue */ + if (ctx->ifc_ifp->if_capenable & IFCAP_NETMAP) { + struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id]; + int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); + ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, t); + } else + ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, nrxd-1); } #define iflib_netmap_detach(ifp) netmap_detach(ifp) @@ -1301,17 +1226,8 @@ prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } -static __inline void -prefetch2(void *x) -{ - __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); -#if (CACHE_LINE_SIZE < 128) - __asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long))))); -#endif -} #else #define prefetch(x) -#define prefetch2(x) #endif static void @@ -1427,25 +1343,6 @@ iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count) iflib_dma_free(*dmaiter); } -static void -txq_validate(iflib_txq_t txq) { -#ifdef INVARIANTS - uint32_t cidx = txq->ift_cidx; - struct mbuf **ifsd_m = txq->ift_sds.ifsd_m; - if (txq->ift_pidx > cidx) { - int i; - for (i = txq->ift_pidx; i < txq->ift_size; i++) - MPASS(ifsd_m[i] == NULL); - for (i = 0; i < cidx; i++) - MPASS(ifsd_m[i] == NULL); - } else if (txq->ift_pidx < cidx) { - int i; - for (i = txq->ift_pidx; i < cidx; i++) - MPASS(ifsd_m[i] == NULL); - } -#endif -} - #ifdef EARLY_AP_STARTUP static const int iflib_started = 1; #else @@ -1474,7 +1371,6 @@ iflib_fast_intr(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; - if (!iflib_started) return (FILTER_HANDLED); @@ -1486,35 +1382,6 @@ iflib_fast_intr(void *arg) return (FILTER_HANDLED); } -static int -iflib_fast_intr_rx(void *arg) -{ - iflib_filter_info_t info = arg; - struct grouptask *gtask = info->ifi_task; - iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; - if_ctx_t ctx; - int cidx; - - if (!iflib_started) - return (FILTER_HANDLED); - - DBG_COUNTER_INC(fast_intrs); - if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) - return (FILTER_HANDLED); - - ctx = rxq->ifr_ctx; - if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ) - cidx = rxq->ifr_cq_cidx; - else - cidx = rxq->ifr_fl[0].ifl_cidx; - if (iflib_rxd_avail(ctx, rxq, cidx, 1)) - GROUPTASK_ENQUEUE(gtask); - else - IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); - return (FILTER_HANDLED); -} - - static int iflib_fast_intr_rxtx(void *arg) { @@ -1531,10 +1398,11 @@ iflib_fast_intr_rxtx(void *arg) if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); - ctx = rxq->ifr_ctx; for (i = 0; i < rxq->ifr_ntxqirq; i++) { qidx_t txqid = rxq->ifr_txqid[i]; + ctx = rxq->ifr_ctx; + if (!ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false)) { IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid); continue; @@ -2106,33 +1974,20 @@ iflib_fl_bufs_free(iflib_fl_t fl) if (*sd_cl != NULL) uma_zfree(fl->ifl_zone, *sd_cl); *sd_flags = 0; - } else if (*sd_flags & RX_NETMAP_INUSE) { - if (fl->ifl_sds.ifsd_map != NULL) { - bus_dmamap_t sd_map = fl->ifl_sds.ifsd_map[i]; - bus_dmamap_unload(fl->ifl_desc_tag, sd_map); - bus_dmamap_destroy(fl->ifl_desc_tag, sd_map); - } - *sd_flags = 0; - MPASS(*sd_cl == NULL); - MPASS(*sd_m == NULL); } else { MPASS(*sd_cl == NULL); MPASS(*sd_m == NULL); } - #if MEMORY_LOGGING - if (*sd_m != NULL) - fl->ifl_m_dequeued++; - if (*sd_cl != NULL) - fl->ifl_cl_dequeued++; + fl->ifl_m_dequeued++; + fl->ifl_cl_dequeued++; #endif *sd_cl = NULL; *sd_m = NULL; } #ifdef INVARIANTS for (i = 0; i < fl->ifl_size; i++) { - KASSERT(fl->ifl_sds.ifsd_flags[i] == 0, ("fl->ifl_sds.ifsd_flags[%d]=0x%x, expected 0", - i, fl->ifl_sds.ifsd_flags[i])); + MPASS(fl->ifl_sds.ifsd_flags[i] == 0); MPASS(fl->ifl_sds.ifsd_cl[i] == NULL); MPASS(fl->ifl_sds.ifsd_m[i] == NULL); } @@ -2156,7 +2011,7 @@ iflib_fl_setup(iflib_fl_t fl) if_ctx_t ctx = rxq->ifr_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; - bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size-1); + bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size); /* ** Free current RX buffer structs and their mbufs */ @@ -2235,19 +2090,6 @@ iflib_rx_sds_free(iflib_rxq_t rxq) } } -/* CONFIG context only */ -static void -iflib_handle_hang(if_ctx_t ctx, void *arg __unused) -{ - - CTX_LOCK(ctx); - if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); - IFDI_WATCHDOG_RESET(ctx); - ctx->ifc_watchdog_events++; - iflib_if_init_locked(ctx); - CTX_UNLOCK(ctx); -} - /* * MI independent logic * @@ -2255,49 +2097,46 @@ iflib_handle_hang(if_ctx_t ctx, void *arg __unused) static void iflib_timer(void *arg) { - iflib_txq_t txq_i, txq = arg; + iflib_txq_t txq = arg; if_ctx_t ctx = txq->ift_ctx; + if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; - /* handle any laggards */ - if (txq->ift_db_pending) - GROUPTASK_ENQUEUE(&txq->ift_task); - IFDI_TIMER(ctx, txq->ift_id); - - if (ifmp_ring_is_stalled(txq->ift_br) && - txq->ift_cleaned_prev == txq->ift_cleaned) - txq->ift_stall_count++; - txq->ift_cleaned_prev = txq->ift_cleaned; - if (txq->ift_stall_count > 2) { - txq->ift_qstatus = IFLIB_QUEUE_HUNG; - device_printf(ctx->ifc_dev, "TX(%d) desc avail = %d, pidx = %d\n", - txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); - } - if (txq->ift_id != 0) { - if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) - callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer, - txq, txq->ift_timer.c_cpu); - return; - } /* ** Check on the state of the TX queue(s), this ** can be done without the lock because its RO ** and the HUNG state will be static if set. */ - txq_i = ctx->ifc_txqs; - for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq_i++) { - if (txq_i->ift_qstatus == IFLIB_QUEUE_HUNG) { - iflib_config_async_gtask_dispatch(ctx, iflib_handle_hang, "hang handler", txq); - /* init will reset the callout */ - return; - } - } + IFDI_TIMER(ctx, txq->ift_id); + if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) && + ((txq->ift_cleaned_prev == txq->ift_cleaned) || + (sctx->isc_pause_frames == 0))) + goto hung; + if (ifmp_ring_is_stalled(txq->ift_br)) + txq->ift_qstatus = IFLIB_QUEUE_HUNG; + txq->ift_cleaned_prev = txq->ift_cleaned; + /* handle any laggards */ + if (txq->ift_db_pending) + GROUPTASK_ENQUEUE(&txq->ift_task); + sctx->isc_pause_frames = 0; if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) - callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer, - txq, txq->ift_timer.c_cpu); + callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); + return; +hung: + CTX_LOCK(ctx); + if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); + device_printf(ctx->ifc_dev, "TX(%d) desc avail = %d, pidx = %d\n", + txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); + + IFDI_WATCHDOG_RESET(ctx); + ctx->ifc_watchdog_events++; + + ctx->ifc_flags |= IFC_DO_RESET; + iflib_admin_intr_deferred(ctx); + CTX_UNLOCK(ctx); } static void @@ -2309,10 +2148,8 @@ iflib_init_locked(if_ctx_t ctx) iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; - int i, j, tx_ip_csum_flags, tx_ip6_csum_flags, running, reset; + int i, j, tx_ip_csum_flags, tx_ip6_csum_flags; - running = !!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING); - reset = !!(ctx->ifc_flags & IFC_DO_RESET); if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); @@ -2336,20 +2173,19 @@ iflib_init_locked(if_ctx_t ctx) CALLOUT_UNLOCK(txq); iflib_netmap_txq_init(ctx, txq); } + for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { + MPASS(rxq->ifr_id == i); + iflib_netmap_rxq_init(ctx, rxq); + } #ifdef INVARIANTS i = if_getdrvflags(ifp); #endif IFDI_INIT(ctx); MPASS(if_getdrvflags(ifp) == i); - if (!running && reset) - return; for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { /* XXX this should really be done on a per-queue basis */ - if (if_getcapenable(ifp) & IFCAP_NETMAP) { - MPASS(rxq->ifr_id == i); - iflib_netmap_rxq_init(ctx, rxq); + if (if_getcapenable(ifp) & IFCAP_NETMAP) continue; - } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { if (iflib_fl_setup(fl)) { device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n"); @@ -2362,11 +2198,10 @@ iflib_init_locked(if_ctx_t ctx) IFDI_INTR_ENABLE(ctx); txq = ctx->ifc_txqs; for (i = 0; i < sctx->isc_ntxqsets; i++, txq++) - callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer, - txq, txq->ift_timer.c_cpu); + callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, + txq->ift_timer.c_cpu); } -/* CONFIG context only */ static int iflib_media_change(if_t ifp) { @@ -2380,19 +2215,17 @@ iflib_media_change(if_t ifp) return (err); } -/* CONFIG context only */ static void iflib_media_status(if_t ifp, struct ifmediareq *ifmr) { if_ctx_t ctx = if_getsoftc(ifp); - iflib_admin_intr_deferred(ctx); CTX_LOCK(ctx); + IFDI_UPDATE_ADMIN_STATUS(ctx); IFDI_MEDIA_STATUS(ctx, ifmr); CTX_UNLOCK(ctx); } -/* CONFIG context only */ static void iflib_stop(if_ctx_t ctx) { @@ -2407,7 +2240,9 @@ iflib_stop(if_ctx_t ctx) if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); + DELAY(1000); IFDI_STOP(ctx); + DELAY(1000); iflib_debug_reset(); /* Wait for current tx queue users to exit to disarm watchdog timer. */ @@ -2420,13 +2255,11 @@ iflib_stop(if_ctx_t ctx) for (j = 0; j < txq->ift_size; j++) { iflib_txsd_free(ctx, txq, j); } - /* XXX please rewrite to simply bzero this range */ - txq->ift_processed = txq->ift_cleaned = txq->ift_cleaned_prev = 0; - txq->ift_stall_count = txq->ift_cidx_processed = 0; - txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = 0; + txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0; + txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0; txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0; txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0; - txq->ift_no_desc_avail = txq->ift_pullups = 0; + txq->ift_pullups = 0; ifmp_ring_reset_stats(txq->ift_br); for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwtxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); @@ -2569,9 +2402,6 @@ assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd) } cl = *sd->ifsd_cl; *sd->ifsd_cl = NULL; -#if MEMORY_LOGGING - sd->ifsd_fl->ifl_cl_dequeued++; -#endif /* Can these two be made one ? */ m_init(m, M_NOWAIT, MT_DATA, flags); @@ -2641,12 +2471,20 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) * XXX early demux data packets so that if_input processing only handles * acks in interrupt context */ - struct mbuf *m, *mh, *mt, *mf; + struct mbuf *m, *mh, *mt; ifp = ctx->ifc_ifp; +#ifdef DEV_NETMAP + if (ifp->if_capenable & IFCAP_NETMAP) { + u_int work = 0; + if (netmap_rx_irq(ifp, rxq->ifr_id, &work)) + return (FALSE); + } +#endif + mh = mt = NULL; MPASS(budget > 0); - rx_pkts = rx_bytes = 0; + rx_pkts = rx_bytes = 0; if (sctx->isc_flags & IFLIB_HAS_RXCQ) cidxp = &rxq->ifr_cq_cidx; else @@ -2709,14 +2547,11 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) } /* make sure that we can refill faster than drain */ for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) - __iflib_fl_refill_lt(ctx, fl, 2*budget + 8); + __iflib_fl_refill_lt(ctx, fl, budget + 8); lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); - mt = mf = NULL; while (mh != NULL) { m = mh; - if (mf == NULL) - mf = m; mh = mh->m_nextpkt; m->m_nextpkt = NULL; #ifndef __NO_STRICT_ALIGNMENT @@ -2726,25 +2561,15 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) rx_bytes += m->m_pkthdr.len; rx_pkts++; #if defined(INET6) || defined(INET) - if (lro_enabled && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) { - if (mf == m) - mf = NULL; + if (lro_enabled && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) continue; - } #endif - if (mt != NULL) - mt->m_nextpkt = m; - mt = m; - } - if (mf != NULL) { - ifp->if_input(ifp, mf); DBG_COUNTER_INC(rx_if_input); + ifp->if_input(ifp, m); } - if (rx_pkts) { - if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); - if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); - } + if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); + if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); /* * Flush any outstanding LRO work @@ -2752,9 +2577,14 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) #if defined(INET6) || defined(INET) tcp_lro_flush_all(&rxq->ifr_lc); #endif - return (avail || iflib_rxd_avail(ctx, rxq, *cidxp, 1)); + if (avail) + return true; + return (iflib_rxd_avail(ctx, rxq, *cidxp, 1)); err: - iflib_admin_reset_deferred(ctx); + CTX_LOCK(ctx); + ctx->ifc_flags |= IFC_DO_RESET; + iflib_admin_intr_deferred(ctx); + CTX_UNLOCK(ctx); return (false); } @@ -2841,19 +2671,20 @@ print_pkt(if_pkt_info_t pi) static int iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) { - if_ctx_t ctx = txq->ift_ctx; -#ifdef INET - if_shared_ctx_t sctx = ctx->ifc_sctx; -#endif - if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; + if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx; struct ether_vlan_header *eh; struct mbuf *m, *n; - int err; - if (scctx->isc_txrx->ift_txd_errata && - (err = scctx->isc_txrx->ift_txd_errata(ctx->ifc_softc, mp))) - return (err); n = m = *mp; + if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) && + M_WRITABLE(m) == 0) { + if ((m = m_dup(m, M_NOWAIT)) == NULL) { + return (ENOMEM); + } else { + m_freem(*mp); + n = *mp = m; + } + } /* * Determine where frame payload starts. @@ -2874,10 +2705,6 @@ iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) pi->ipi_ehdrlen = ETHER_HDR_LEN; } - if (if_getmtu(txq->ift_ctx->ifc_ifp) >= pi->ipi_len) { - pi->ipi_csum_flags &= ~(CSUM_IP_TSO|CSUM_IP6_TSO); - } - switch (pi->ipi_etype) { #ifdef INET case ETHERTYPE_IP: @@ -2922,21 +2749,21 @@ iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) pi->ipi_ipproto = ip->ip_p; pi->ipi_flags |= IPI_TX_IPV4; - if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP)) + if (pi->ipi_csum_flags & CSUM_IP) ip->ip_sum = 0; - if (IS_TSO4(pi)) { - if (pi->ipi_ipproto == IPPROTO_TCP) { - if (__predict_false(th == NULL)) { - txq->ift_pullups++; - if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) - return (ENOMEM); - th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); - } - pi->ipi_tcp_hflags = th->th_flags; - pi->ipi_tcp_hlen = th->th_off << 2; - pi->ipi_tcp_seq = th->th_seq; + if (pi->ipi_ipproto == IPPROTO_TCP) { + if (__predict_false(th == NULL)) { + txq->ift_pullups++; + if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) + return (ENOMEM); + th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); } + pi->ipi_tcp_hflags = th->th_flags; + pi->ipi_tcp_hlen = th->th_off << 2; + pi->ipi_tcp_seq = th->th_seq; + } + if (IS_TSO4(pi)) { if (__predict_false(ip->ip_p != IPPROTO_TCP)) return (ENXIO); th->th_sum = in_pseudo(ip->ip_src.s_addr, @@ -2967,15 +2794,15 @@ iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) pi->ipi_ipproto = ip6->ip6_nxt; pi->ipi_flags |= IPI_TX_IPV6; - if (IS_TSO6(pi)) { - if (pi->ipi_ipproto == IPPROTO_TCP) { - if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { - if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) - return (ENOMEM); - } - pi->ipi_tcp_hflags = th->th_flags; - pi->ipi_tcp_hlen = th->th_off << 2; + if (pi->ipi_ipproto == IPPROTO_TCP) { + if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { + if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) + return (ENOMEM); } + pi->ipi_tcp_hflags = th->th_flags; + pi->ipi_tcp_hlen = th->th_off << 2; + } + if (IS_TSO6(pi)) { if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP)) return (ENXIO); @@ -3084,9 +2911,9 @@ iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map, ifsd_m = txq->ift_sds.ifsd_m; ntxd = txq->ift_size; pidx = txq->ift_pidx; - MPASS(ifsd_m[pidx] == NULL); - if (force_busdma || map != NULL) { + if (map != NULL) { uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags; + err = bus_dmamap_load_mbuf_sg(tag, map, *m0, segs, nsegs, BUS_DMA_NOWAIT); if (err) @@ -3239,8 +3066,7 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) next = (cidx + CACHE_LINE_SIZE) & (ntxd-1); prefetch(&txq->ift_sds.ifsd_flags[next]); } - } - if (txq->ift_sds.ifsd_map != NULL) + } else if (txq->ift_sds.ifsd_map != NULL) map = txq->ift_sds.ifsd_map[pidx]; if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { @@ -3253,19 +3079,18 @@ iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) m_head = *m_headp; pkt_info_zero(&pi); - pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST)); - pi.ipi_pidx = pidx; - pi.ipi_qsidx = txq->ift_id; pi.ipi_len = m_head->m_pkthdr.len; + pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST)); pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0; + pi.ipi_pidx = pidx; + pi.ipi_qsidx = txq->ift_id; /* deliberate bitwise OR to make one condition */ if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) { if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0)) return (err); m_head = *m_headp; - pi.ipi_hdr_data = mtod(m_head, caddr_t); } retry: @@ -3442,7 +3267,6 @@ iflib_tx_desc_free(iflib_txq_t txq, int n) gen = 0; } } - txq_validate(txq); txq->ift_cidx = cidx; txq->ift_gen = gen; } @@ -3492,10 +3316,10 @@ _ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining) prefetch(items[(cidx + offset) & (size-1)]); if (remaining > 1) { - prefetch2(&items[next]); - prefetch2(items[(cidx + offset + 1) & (size-1)]); - prefetch2(items[(cidx + offset + 2) & (size-1)]); - prefetch2(items[(cidx + offset + 3) & (size-1)]); + prefetch(&items[next]); + prefetch(items[(cidx + offset + 1) & (size-1)]); + prefetch(items[(cidx + offset + 2) & (size-1)]); + prefetch(items[(cidx + offset + 3) & (size-1)]); } return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)])); } @@ -3676,7 +3500,7 @@ _task_fn_tx(void *context) #endif if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; - if (if_getcapenable(ifp) & IFCAP_NETMAP) { + if ((ifp->if_capenable & IFCAP_NETMAP)) { if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)) netmap_tx_irq(ifp, txq->ift_id); IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); @@ -3684,7 +3508,8 @@ _task_fn_tx(void *context) } if (txq->ift_db_pending) ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE); - ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); + else + ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else { @@ -3700,7 +3525,6 @@ _task_fn_rx(void *context) if_ctx_t ctx = rxq->ifr_ctx; bool more; int rc; - uint16_t budget; #ifdef IFLIB_DIAGNOSTICS rxq->ifr_cpu_exec_count[curcpu]++; @@ -3708,19 +3532,7 @@ _task_fn_rx(void *context) DBG_COUNTER_INC(task_fn_rxs); if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; - more = true; -#ifdef DEV_NETMAP - if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) { - u_int work = 0; - if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work)) { - more = false; - } - } -#endif - budget = ctx->ifc_sysctl_rx_budget; - if (budget == 0) - budget = 16; /* XXX */ - if (more == false || (more = iflib_rxeof(rxq, budget)) == false) { + if ((more = iflib_rxeof(rxq, 16 /* XXX */)) == false) { if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else { @@ -3735,44 +3547,43 @@ _task_fn_rx(void *context) GROUPTASK_ENQUEUE(&rxq->ifr_task); } -/* CONFIG context only */ static void _task_fn_admin(void *context) { if_ctx_t ctx = context; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; iflib_txq_t txq; - int i, running; + int i; + + if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) { + if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { + return; + } + } CTX_LOCK(ctx); - running = !!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING); - for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); } - if (running) { - for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) - callout_reset_on(&txq->ift_timer, iflib_timer_int, iflib_timer, - txq, txq->ift_timer.c_cpu); - IFDI_LINK_INTR_ENABLE(ctx); - } - if (ctx->ifc_flags & IFC_DO_RESET) { - iflib_if_init_locked(ctx); - ctx->ifc_flags &= ~IFC_DO_RESET; - } IFDI_UPDATE_ADMIN_STATUS(ctx); + for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) + callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); + IFDI_LINK_INTR_ENABLE(ctx); + if (ctx->ifc_flags & IFC_DO_RESET) { + ctx->ifc_flags &= ~IFC_DO_RESET; + iflib_if_init_locked(ctx); + } CTX_UNLOCK(ctx); - if (LINK_ACTIVE(ctx) == 0 || !running) + if (LINK_ACTIVE(ctx) == 0) return; for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); } -/* CONFIG context only */ static void _task_fn_iov(void *context) { @@ -3887,20 +3698,21 @@ iflib_if_transmit(if_t ifp, struct mbuf *m) DBG_COUNTER_INC(tx_seen); err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE); - GROUPTASK_ENQUEUE(&txq->ift_task); if (err) { + GROUPTASK_ENQUEUE(&txq->ift_task); /* support forthcoming later */ #ifdef DRIVER_BACKPRESSURE txq->ift_closed = TRUE; #endif ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); m_freem(m); + } else if (TXQ_AVAIL(txq) < (txq->ift_size >> 1)) { + GROUPTASK_ENQUEUE(&txq->ift_task); } return (err); } -/* CONFIG context only */ static void iflib_if_qflush(if_t ifp) { @@ -3984,12 +3796,29 @@ iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) CTX_UNLOCK(ctx); break; case SIOCSIFFLAGS: - err = async_if_ioctl(ctx, command, data); + CTX_LOCK(ctx); + if (if_getflags(ifp) & IFF_UP) { + if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { + if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & + (IFF_PROMISC | IFF_ALLMULTI)) { + err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); + } + } else + reinit = 1; + } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { + iflib_stop(ctx); + } + ctx->ifc_if_flags = if_getflags(ifp); + CTX_UNLOCK(ctx); break; case SIOCADDMULTI: case SIOCDELMULTI: if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - err = async_if_ioctl(ctx, command, data); + CTX_LOCK(ctx); + IFDI_INTR_DISABLE(ctx); + IFDI_MULTI_SET(ctx); + IFDI_INTR_ENABLE(ctx); + CTX_UNLOCK(ctx); } break; case SIOCSIFMEDIA: @@ -4083,7 +3912,6 @@ iflib_if_get_counter(if_t ifp, ift_counter cnt) * **********************************************************************/ -/* CONFIG context only */ static void iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag) { @@ -4103,7 +3931,6 @@ iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag) CTX_UNLOCK(ctx); } -/* CONFIG context only */ static void iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag) { @@ -4123,7 +3950,6 @@ iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag) CTX_UNLOCK(ctx); } -/* CONFIG context only */ static void iflib_led_func(void *arg, int onoff) { @@ -4268,10 +4094,8 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; } } - CTX_LOCK(ctx); - err = IFDI_ATTACH_PRE(ctx); - CTX_UNLOCK(ctx); - if (err) { + + if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); return (err); } @@ -4299,8 +4123,6 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct /* set unconditionally for !x86 */ ctx->ifc_flags |= IFC_DMAR; #endif - if (force_busdma) - ctx->ifc_flags |= IFC_DMAR; msix_bar = scctx->isc_msix_bar; main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0; @@ -4313,7 +4135,6 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct if (!powerof2(scctx->isc_nrxd[i])) { /* round down instead? */ device_printf(dev, "# rx descriptors must be a power of 2\n"); - err = EINVAL; goto fail; } @@ -4352,7 +4173,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); /* XXX format name */ - taskqgroup_attach(qgroup_if_config, &ctx->ifc_admin_task, ctx, -1, "admin"); + taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin"); /* ** Now setup MSI or MSI/X, should ** return us the number of supported @@ -4411,10 +4232,7 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct } } ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac); - CTX_LOCK(ctx); - err = IFDI_ATTACH_POST(ctx); - CTX_UNLOCK(ctx); - if (err) { + if ((err = IFDI_ATTACH_POST(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); goto fail_detach; } @@ -4426,7 +4244,6 @@ iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ct if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); - iflib_ctx_insert(ctx); ctx->ifc_flags |= IFC_INIT_DONE; return (0); fail_detach: @@ -4437,9 +4254,7 @@ fail_intr_free: fail_queues: /* XXX free queues */ fail: - CTX_LOCK(ctx); IFDI_DETACH(ctx); - CTX_UNLOCK(ctx); return (err); } @@ -4487,10 +4302,12 @@ iflib_device_deregister(if_ctx_t ctx) iflib_netmap_detach(ifp); ether_ifdetach(ifp); + /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ + CTX_LOCK_DESTROY(ctx); if (ctx->ifc_led_dev != NULL) led_destroy(ctx->ifc_led_dev); /* XXX drain any dependent tasks */ - tqg = qgroup_if_io; + tqg = qgroup_if_io_tqg; for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { callout_drain(&txq->ift_timer); if (txq->ift_task.gt_uniq != NULL) @@ -4504,16 +4321,13 @@ iflib_device_deregister(if_ctx_t ctx) free(fl->ifl_rx_bitmap, M_IFLIB); } - tqg = qgroup_if_config; + tqg = qgroup_if_config_tqg; if (ctx->ifc_admin_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_admin_task); if (ctx->ifc_vflr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_vflr_task); - CTX_LOCK(ctx); IFDI_DETACH(ctx); - CTX_UNLOCK(ctx); - CTX_LOCK_DESTROY(ctx); device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { pci_release_msi(dev); @@ -4534,7 +4348,6 @@ iflib_device_deregister(if_ctx_t ctx) iflib_rx_structures_free(ctx); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); - iflib_ctx_remove(ctx); free(ctx, M_IFLIB); return (0); } @@ -4630,14 +4443,13 @@ iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params) * **********************************************************************/ +/* + * - Start a fast taskqueue thread for each core + * - Start a taskqueue for control operations + */ static int iflib_module_init(void) { - - iflib_timer_int = hz / 2; - TUNABLE_INT_FETCH("net.iflib.timer_int", &iflib_timer_int); - LIST_INIT(&ctx_list); - mtx_init(&ctx_list_lock, "ctx list", NULL, MTX_DEF); return (0); } @@ -5081,124 +4893,25 @@ iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name)); } -#ifdef SMP static int -find_nth(if_ctx_t ctx, int qid) +find_nth(if_ctx_t ctx, cpuset_t *cpus, int qid) { - cpuset_t cpus; int i, cpuid, eqid, count; - CPU_COPY(&ctx->ifc_cpus, &cpus); + CPU_COPY(&ctx->ifc_cpus, cpus); count = CPU_COUNT(&ctx->ifc_cpus); eqid = qid % count; /* clear up to the qid'th bit */ for (i = 0; i < eqid; i++) { - cpuid = CPU_FFS(&cpus); + cpuid = CPU_FFS(cpus); MPASS(cpuid != 0); - CPU_CLR(cpuid-1, &cpus); + CPU_CLR(cpuid-1, cpus); } - cpuid = CPU_FFS(&cpus); + cpuid = CPU_FFS(cpus); MPASS(cpuid != 0); return (cpuid-1); } -static int -find_child_with_core(int cpu, struct cpu_group *grp) -{ - int i; - - if (grp->cg_children == 0) - return -1; - - MPASS(grp->cg_child); - for (i = 0; i < grp->cg_children; i++) { - if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask)) - return i; - } - - return -1; -} - -/* - * Find the nth thread on the specified core - */ -static int -find_thread(int cpu, int thread_num) -{ - struct cpu_group *grp; - int i; - cpuset_t cs; - - grp = smp_topo(); - if (grp == NULL) - return cpu; - i = 0; - while ((i = find_child_with_core(cpu, grp)) != -1) { - /* If the child only has one cpu, don't descend */ - if (grp->cg_child[i].cg_count <= 1) - break; - grp = &grp->cg_child[i]; - } - - /* If they don't share at least an L2 cache, use the same CPU */ - if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE) - return cpu; - - /* Now pick one */ - CPU_COPY(&grp->cg_mask, &cs); - for (i = thread_num % grp->cg_count; i > 0; i--) { - MPASS(CPU_FFS(&cs)); - CPU_CLR(CPU_FFS(&cs) - 1, &cs); - } - MPASS(CPU_FFS(&cs)); - return CPU_FFS(&cs) - 1; -} - -static int -get_thread_num(if_ctx_t ctx, iflib_intr_type_t type, int qid) -{ - switch (type) { - case IFLIB_INTR_TX: - /* TX queues get threads on the same core as the corresponding RX queue */ - /* XXX handle multiple RX threads per core and more than two threads per core */ - return qid / CPU_COUNT(&ctx->ifc_cpus) + 1; - case IFLIB_INTR_RX: - case IFLIB_INTR_RXTX: - /* RX queues get the first thread on their core */ - return qid / CPU_COUNT(&ctx->ifc_cpus); - default: - return -1; - } -} -#else -#define get_thread_num(ctx, type, qid) 0 -#define find_thread(cpuid, tid) 0 -#define find_nth(ctx, gid) 0 -#endif - -/* Just to avoid copy/paste */ -static inline int -iflib_irq_set_affinity(if_ctx_t ctx, int irq, iflib_intr_type_t type, int qid, - struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, char *name) -{ - int cpuid; - int err, tid; - - cpuid = find_nth(ctx, qid); - tid = get_thread_num(ctx, type, qid); - MPASS(tid >= 0); - cpuid = find_thread(cpuid, tid); - err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, irq, name); - if (err) { - device_printf(ctx->ifc_dev, "taskqgroup_attach_cpu failed %d\n", err); - return (err); - } - if (cpuid > ctx->ifc_cpuid_highest) - ctx->ifc_cpuid_highest = cpuid; - MPASS(gtask->gt_taskqueue != NULL); - return 0; -} - int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, @@ -5207,8 +4920,9 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, struct grouptask *gtask; struct taskqgroup *tqg; iflib_filter_info_t info; + cpuset_t cpus; gtask_fn_t *fn; - int tqrid, err; + int tqrid, err, cpuid; driver_filter_t *intr_fast; void *q; @@ -5221,7 +4935,7 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, q = &ctx->ifc_txqs[qid]; info = &ctx->ifc_txqs[qid].ift_filter_info; gtask = &ctx->ifc_txqs[qid].ift_task; - tqg = qgroup_if_io; + tqg = qgroup_if_io_tqg; fn = _task_fn_tx; intr_fast = iflib_fast_intr; GROUPTASK_INIT(gtask, 0, fn, q); @@ -5230,16 +4944,16 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; - tqg = qgroup_if_io; + tqg = qgroup_if_io_tqg; fn = _task_fn_rx; - intr_fast = iflib_fast_intr_rx; + intr_fast = iflib_fast_intr; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RXTX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; - tqg = qgroup_if_io; + tqg = qgroup_if_io_tqg; fn = _task_fn_rx; intr_fast = iflib_fast_intr_rxtx; GROUPTASK_INIT(gtask, 0, fn, q); @@ -5249,7 +4963,7 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, tqrid = -1; info = &ctx->ifc_filter_info; gtask = &ctx->ifc_admin_task; - tqg = qgroup_if_config; + tqg = qgroup_if_config_tqg; fn = _task_fn_admin; intr_fast = iflib_fast_intr_ctx; break; @@ -5271,9 +4985,8 @@ iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, return (0); if (tqrid != -1) { - err = iflib_irq_set_affinity(ctx, rman_get_start(irq->ii_res), type, qid, gtask, tqg, q, name); - if (err) - return (err); + cpuid = find_nth(ctx, &cpus, qid); + taskqgroup_attach_cpu(tqg, gtask, q, cpuid, irq->ii_rid, name); } else { taskqgroup_attach(tqg, gtask, q, tqrid, name); } @@ -5288,25 +5001,24 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type, void struct taskqgroup *tqg; gtask_fn_t *fn; void *q; - int err; switch (type) { case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; gtask = &ctx->ifc_txqs[qid].ift_task; - tqg = qgroup_if_io; + tqg = qgroup_if_io_tqg; fn = _task_fn_tx; break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; gtask = &ctx->ifc_rxqs[qid].ifr_task; - tqg = qgroup_if_io; + tqg = qgroup_if_io_tqg; fn = _task_fn_rx; break; case IFLIB_INTR_IOV: q = ctx; gtask = &ctx->ifc_vflr_task; - tqg = qgroup_if_config; + tqg = qgroup_if_config_tqg; rid = -1; fn = _task_fn_iov; break; @@ -5314,14 +5026,7 @@ iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type, void panic("unknown net intr type"); } GROUPTASK_INIT(gtask, 0, fn, q); - if (rid != -1) { - err = iflib_irq_set_affinity(ctx, rid, type, qid, gtask, tqg, q, name); - if (err) - taskqgroup_attach(tqg, gtask, q, rid, name); - } - else { - taskqgroup_attach(tqg, gtask, q, rid, name); - } + taskqgroup_attach(tqg, gtask, q, rid, name); } void @@ -5351,7 +5056,7 @@ iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int * q = &ctx->ifc_rxqs[0]; info = &rxq[0].ifr_filter_info; gtask = &rxq[0].ifr_task; - tqg = qgroup_if_io; + tqg = qgroup_if_io_tqg; tqrid = irq->ii_rid = *rid; fn = _task_fn_rx; @@ -5368,7 +5073,7 @@ iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int * taskqgroup_attach(tqg, gtask, q, tqrid, name); GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq); - taskqgroup_attach(qgroup_if_io, &txq->ift_task, txq, tqrid, "tx"); + taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, tqrid, "tx"); return (0); } @@ -5401,28 +5106,12 @@ iflib_admin_intr_deferred(if_ctx_t ctx) struct grouptask *gtask; gtask = &ctx->ifc_admin_task; - MPASS(gtask != NULL && gtask->gt_taskqueue != NULL); + MPASS(gtask->gt_taskqueue != NULL); #endif GROUPTASK_ENQUEUE(&ctx->ifc_admin_task); } -/* CONFIG context only */ -static void -iflib_handle_reset(if_ctx_t ctx, void *arg) -{ - CTX_LOCK(ctx); - ctx->ifc_flags |= IFC_DO_RESET; - iflib_admin_intr_deferred(ctx); - CTX_UNLOCK(ctx); -} - -static void -iflib_admin_reset_deferred(if_ctx_t ctx) -{ - iflib_config_async_gtask_dispatch(ctx, iflib_handle_reset, "reset handler", NULL); -} - void iflib_iov_intr_deferred(if_ctx_t ctx) { @@ -5434,7 +5123,7 @@ void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name) { - taskqgroup_attach_cpu(qgroup_if_io, gt, uniq, cpu, -1, name); + taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, -1, name); } void @@ -5443,104 +5132,14 @@ iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn, { GROUPTASK_INIT(gtask, 0, fn, ctx); - taskqgroup_attach(qgroup_if_config, gtask, gtask, -1, name); + taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, -1, name); } -static void -iflib_multi_set(if_ctx_t ctx, void *arg) -{ - CTX_LOCK(ctx); - IFDI_INTR_DISABLE(ctx); - IFDI_MULTI_SET(ctx); - IFDI_INTR_ENABLE(ctx); - CTX_UNLOCK(ctx); -} - -static void -iflib_flags_set(if_ctx_t ctx, void *arg) -{ - int reinit, err; - if_t ifp = ctx->ifc_ifp; - - err = reinit = 0; - CTX_LOCK(ctx); - if (if_getflags(ifp) & IFF_UP) { - if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & - (IFF_PROMISC | IFF_ALLMULTI)) { - err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); - } - } else - reinit = 1; - } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { - iflib_stop(ctx); - } - ctx->ifc_if_flags = if_getflags(ifp); - if (reinit) - iflib_if_init_locked(ctx); - CTX_UNLOCK(ctx); - if (err) - log(LOG_WARNING, "IFDI_PROMISC_SET returned %d\n", err); -} - -static void -async_gtask(void *ctx) -{ - struct async_task_arg *at_arg = ctx; - if_ctx_t if_ctx = at_arg->ata_ctx; - void *arg = at_arg->ata_arg; - - at_arg->ata_fn(if_ctx, arg); - taskqgroup_detach(qgroup_if_config, at_arg->ata_gtask); - free(at_arg->ata_gtask, M_IFLIB); -} - -static int -iflib_config_async_gtask_dispatch(if_ctx_t ctx, async_gtask_fn_t *fn, char *name, void *arg) -{ - struct grouptask *gtask; - struct async_task_arg *at_arg; - - if ((gtask = malloc(sizeof(struct grouptask) + sizeof(struct async_task_arg), M_IFLIB, M_NOWAIT|M_ZERO)) == NULL) - return (ENOMEM); - - at_arg = (struct async_task_arg *)(gtask + 1); - at_arg->ata_fn = fn; - at_arg->ata_ctx = ctx; - at_arg->ata_arg = arg; - at_arg->ata_gtask = gtask; - - GROUPTASK_INIT(gtask, 0, async_gtask, at_arg); - taskqgroup_attach(qgroup_if_config, gtask, gtask, -1, name); - GROUPTASK_ENQUEUE(gtask); - return (0); -} - -static int -async_if_ioctl(if_ctx_t ctx, u_long command, caddr_t data) -{ - int rc; - - switch (command) { - case SIOCADDMULTI: - case SIOCDELMULTI: - rc = iflib_config_async_gtask_dispatch(ctx, iflib_multi_set, "async_if_multi", NULL); - break; - case SIOCSIFFLAGS: - rc = iflib_config_async_gtask_dispatch(ctx, iflib_flags_set, "async_if_flags", NULL); - break; - default: - panic("unknown command %lx", command); - } - return (rc); -} - - void iflib_config_gtask_deinit(struct grouptask *gtask) { - taskqgroup_detach(qgroup_if_config, gtask); + taskqgroup_detach(qgroup_if_config_tqg, gtask); } void @@ -5607,11 +5206,11 @@ iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name, info, 0, iflib_sysctl_int_delay, "I", description); } -struct sx * +struct mtx * iflib_ctx_lock_get(if_ctx_t ctx) { - return (&ctx->ifc_sx); + return (&ctx->ifc_mtx); } static int @@ -5731,22 +5330,13 @@ iflib_msix_init(if_ctx_t ctx) rx_queues = min(rx_queues, tx_queues); } - device_printf(dev, "trying %d rx queues %d tx queues \n", rx_queues, tx_queues); + device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues); - vectors = tx_queues + rx_queues + admincnt; + vectors = rx_queues + admincnt; if ((err = pci_alloc_msix(dev, &vectors)) == 0) { device_printf(dev, "Using MSIX interrupts with %d vectors\n", vectors); scctx->isc_vectors = vectors; - - if (vectors < tx_queues + rx_queues + admincnt) { - vectors -= admincnt; - if (vectors % 2 != 0) - vectors -= 1; - if (rx_queues > vectors / 2) - rx_queues = vectors / 2; - tx_queues = vectors - rx_queues; - } scctx->isc_nrxqsets = rx_queues; scctx->isc_ntxqsets = tx_queues; scctx->isc_intr = IFLIB_INTR_MSIX; @@ -5881,12 +5471,9 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx) SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, "permit #txq != #rxq"); - SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", + SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, "disable MSIX (default 0)"); - SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", - CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, - "set the rx budget"); /* XXX change for per-queue sizes */ SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", @@ -5897,10 +5484,6 @@ iflib_add_device_sysctl_pre(if_ctx_t ctx) CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A", "list of # of rx descriptors to use, 0 = use default #"); - - SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "watchdog_events", - CTLFLAG_RD, &ctx->ifc_watchdog_events, 0, - "Watchdog events seen since load"); } static void diff --git a/sys/net/iflib.h b/sys/net/iflib.h index 8c7ebb4594aa..6ac75dbb0afb 100644 --- a/sys/net/iflib.h +++ b/sys/net/iflib.h @@ -119,7 +119,6 @@ typedef struct if_pkt_info { qidx_t ipi_pidx; /* start pidx for encap */ qidx_t ipi_new_pidx; /* next available pidx post-encap */ /* offload handling */ - caddr_t ipi_hdr_data; /* raw header */ uint8_t ipi_ehdrlen; /* ether header length */ uint8_t ipi_ip_hlen; /* ip header length */ uint8_t ipi_tcp_hlen; /* tcp header length */ @@ -184,7 +183,6 @@ typedef struct if_txrx { void (*ift_rxd_refill) (void * , if_rxd_update_t iru); void (*ift_rxd_flush) (void *, uint16_t qsidx, uint8_t flidx, qidx_t pidx); int (*ift_legacy_intr) (void *); - int (*ift_txd_errata) (void *, struct mbuf **mp); } *if_txrx_t; typedef struct if_softc_ctx { @@ -296,9 +294,9 @@ typedef enum { */ #define IFLIB_HAS_TXCQ 0x08 /* - * + * Interface does checksum in place */ -#define IFLIB_UNUSED___0 0x10 +#define IFLIB_NEED_SCRATCH 0x10 /* * Interface doesn't expect in_pseudo for th_sum */ @@ -307,10 +305,6 @@ typedef enum { * Interface doesn't align IP header */ #define IFLIB_DO_RX_FIXUP 0x40 -/* - * Driver needs csum zeroed for offloading - */ -#define IFLIB_NEED_ZERO_CSUM 0x80 @@ -387,7 +381,7 @@ int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, i void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count); -struct sx *iflib_ctx_lock_get(if_ctx_t); +struct mtx *iflib_ctx_lock_get(if_ctx_t); struct mtx *iflib_qset_lock_get(if_ctx_t, uint16_t); void iflib_led_create(if_ctx_t ctx); diff --git a/sys/net/mp_ring.c b/sys/net/mp_ring.c index e2e94e9087c8..3ff272c719ab 100644 --- a/sys/net/mp_ring.c +++ b/sys/net/mp_ring.c @@ -226,15 +226,11 @@ drain_ring_lockless(struct ifmp_ring *r, union ring_state os, uint16_t prev, int if (cidx != pidx && pending < 64 && total < budget) continue; critical_enter(); - os.state = ns.state = r->state; - ns.cidx = cidx; - ns.flags = state_to_flags(ns, total >= budget); - while (atomic_cmpset_acq_64(&r->state, os.state, ns.state) == 0) { - cpu_spinwait(); + do { os.state = ns.state = r->state; ns.cidx = cidx; ns.flags = state_to_flags(ns, total >= budget); - } + } while (atomic_cmpset_acq_64(&r->state, os.state, ns.state) == 0); critical_exit(); if (ns.flags == ABDICATED) @@ -458,12 +454,18 @@ ifmp_ring_enqueue(struct ifmp_ring *r, void **items, int n, int budget) do { os.state = ns.state = r->state; ns.pidx_tail = pidx_stop; - if (os.flags == IDLE) - ns.flags = ABDICATED; + ns.flags = BUSY; } while (atomic_cmpset_rel_64(&r->state, os.state, ns.state) == 0); critical_exit(); counter_u64_add(r->enqueues, n); + /* + * Turn into a consumer if some other thread isn't active as a consumer + * already. + */ + if (os.flags != BUSY) + drain_ring_lockless(r, ns, os.flags, budget); + return (0); } #endif @@ -474,9 +476,7 @@ ifmp_ring_check_drainage(struct ifmp_ring *r, int budget) union ring_state os, ns; os.state = r->state; - if ((os.flags != STALLED && os.flags != ABDICATED) || // Only continue in STALLED and ABDICATED - os.pidx_head != os.pidx_tail || // Require work to be available - (os.flags != ABDICATED && r->can_drain(r) == 0)) // Can either drain, or everyone left + if (os.flags != STALLED || os.pidx_head != os.pidx_tail || r->can_drain(r) == 0) return; MPASS(os.cidx != os.pidx_tail); /* implied by STALLED */ diff --git a/sys/sys/gtaskqueue.h b/sys/sys/gtaskqueue.h index be36a4756af8..e85196372323 100644 --- a/sys/sys/gtaskqueue.h +++ b/sys/sys/gtaskqueue.h @@ -58,9 +58,7 @@ int taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *grptask, void taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask); struct taskqgroup *taskqgroup_create(char *name); void taskqgroup_destroy(struct taskqgroup *qgroup); -int taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri); -int taskqgroup_adjust_once(struct taskqgroup *qgroup, int cnt, int stride, bool ithread, int pri); -void taskqgroup_set_adjust(struct taskqgroup *qgroup, void (*adjust_func)(void*)); +int taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride); #define TASK_ENQUEUED 0x1 #define TASK_SKIP_WAKEUP 0x2 @@ -82,40 +80,27 @@ void taskqgroup_set_adjust(struct taskqgroup *qgroup, void (*adjust_func)(void*) #define TASKQGROUP_DECLARE(name) \ extern struct taskqgroup *qgroup_##name - -#define TASKQGROUP_DEFINE(name, cnt, stride, intr, pri) \ +#define TASKQGROUP_DEFINE(name, cnt, stride) \ \ struct taskqgroup *qgroup_##name; \ \ static void \ -taskqgroup_adjust_##name(void *arg) \ -{ \ - int max = (intr) ? 1 : (cnt); \ - if (arg != NULL) { \ - uintptr_t maxcpu = (uintptr_t) arg; \ - max = maxcpu; \ - } \ - \ - taskqgroup_adjust_once(qgroup_##name, max, (stride), (intr), (pri)); \ -} \ - \ -SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY, \ - taskqgroup_adjust_##name, NULL); \ - \ -static void \ taskqgroup_define_##name(void *arg) \ { \ qgroup_##name = taskqgroup_create(#name); \ - taskqgroup_set_adjust(qgroup_##name, taskqgroup_adjust_##name); \ } \ + \ SYSINIT(taskqgroup_##name, SI_SUB_TASKQ, SI_ORDER_FIRST, \ - taskqgroup_define_##name, NULL) - - - - - - + taskqgroup_define_##name, NULL); \ + \ +static void \ +taskqgroup_adjust_##name(void *arg) \ +{ \ + taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \ +} \ + \ +SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY, \ + taskqgroup_adjust_##name, NULL) TASKQGROUP_DECLARE(net); TASKQGROUP_DECLARE(softirq);