From 65ef2d5c41d93c2fc28bfa6ac2b60232d1d0c677 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 9 Dec 2019 14:15:55 +0000 Subject: [PATCH 1/4] net: sfp: use a definition for the fault recovery attempts Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index bfe268028154..b1c564b79e3e 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -172,6 +172,14 @@ static const enum gpiod_flags gpio_flags[] = { #define T_RESET_US 10 #define T_FAULT_RECOVER msecs_to_jiffies(1000) +/* N_FAULT_INIT is the number of recovery attempts at module initialisation + * time. If the TX_FAULT signal is not deasserted after this number of + * attempts at clearing it, we decide that the module is faulty. + * N_FAULT is the same but after the module has initialised. + */ +#define N_FAULT_INIT 5 +#define N_FAULT 5 + /* SFP module presence detection is poor: the three MOD DEF signals are * the same length on the PCB, which means it's possible for MOD DEF 0 to * connect before the I2C bus on MOD DEF 1/2. @@ -1885,7 +1893,7 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) sfp_module_tx_enable(sfp); /* Initialise the fault clearance retries */ - sfp->sm_retries = 5; + sfp->sm_retries = N_FAULT_INIT; /* We need to check the TX_FAULT state, which is not defined * while TX_DISABLE is asserted. The earliest we want to do @@ -1925,7 +1933,7 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) * or t_start_up, so assume there is a fault. */ sfp_sm_fault(sfp, SFP_S_INIT_TX_FAULT, - sfp->sm_retries == 5); + sfp->sm_retries == N_FAULT_INIT); } else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) { init_done: /* TX_FAULT deasserted or we timed out with TX_FAULT * clear. Probe for the PHY and check the LOS state. @@ -1938,7 +1946,7 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) sfp_sm_link_check_los(sfp); /* Reset the fault retry count */ - sfp->sm_retries = 5; + sfp->sm_retries = N_FAULT; } break; From 281e4eab1abeb179610de0884a4bd65bf2d4e61e Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 9 Dec 2019 14:16:00 +0000 Subject: [PATCH 2/4] net: sfp: rename sm_retries Rename sm_retries as sm_fault_retries, as this is what this member is tracking. Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index b1c564b79e3e..a67f089f2106 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -234,7 +234,7 @@ struct sfp { unsigned char sm_mod_tries; unsigned char sm_dev_state; unsigned short sm_state; - unsigned int sm_retries; + unsigned char sm_fault_retries; struct sfp_eeprom_id id; unsigned int module_power_mW; @@ -1490,7 +1490,7 @@ static bool sfp_los_event_inactive(struct sfp *sfp, unsigned int event) static void sfp_sm_fault(struct sfp *sfp, unsigned int next_state, bool warn) { - if (sfp->sm_retries && !--sfp->sm_retries) { + if (sfp->sm_fault_retries && !--sfp->sm_fault_retries) { dev_err(sfp->dev, "module persistently indicates fault, disabling\n"); sfp_sm_next(sfp, SFP_S_TX_DISABLE, 0); @@ -1893,7 +1893,7 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) sfp_module_tx_enable(sfp); /* Initialise the fault clearance retries */ - sfp->sm_retries = N_FAULT_INIT; + sfp->sm_fault_retries = N_FAULT_INIT; /* We need to check the TX_FAULT state, which is not defined * while TX_DISABLE is asserted. The earliest we want to do @@ -1933,7 +1933,7 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) * or t_start_up, so assume there is a fault. */ sfp_sm_fault(sfp, SFP_S_INIT_TX_FAULT, - sfp->sm_retries == N_FAULT_INIT); + sfp->sm_fault_retries == N_FAULT_INIT); } else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) { init_done: /* TX_FAULT deasserted or we timed out with TX_FAULT * clear. Probe for the PHY and check the LOS state. @@ -1946,7 +1946,7 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) sfp_sm_link_check_los(sfp); /* Reset the fault retry count */ - sfp->sm_retries = N_FAULT; + sfp->sm_fault_retries = N_FAULT; } break; From 256e43cb8c696e0754514686d92f4e27b4985ad5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 9 Dec 2019 14:16:05 +0000 Subject: [PATCH 3/4] net: sfp: error handling for phy probe Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index a67f089f2106..76fa95e54542 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -1410,7 +1410,7 @@ static void sfp_sm_phy_detach(struct sfp *sfp) sfp->mod_phy = NULL; } -static void sfp_sm_probe_phy(struct sfp *sfp, bool is_c45) +static int sfp_sm_probe_phy(struct sfp *sfp, bool is_c45) { struct phy_device *phy; int err; @@ -1418,18 +1418,18 @@ static void sfp_sm_probe_phy(struct sfp *sfp, bool is_c45) phy = get_phy_device(sfp->i2c_mii, SFP_PHY_ADDR, is_c45); if (phy == ERR_PTR(-ENODEV)) { dev_info(sfp->dev, "no PHY detected\n"); - return; + return 0; } if (IS_ERR(phy)) { dev_err(sfp->dev, "mdiobus scan returned %ld\n", PTR_ERR(phy)); - return; + return PTR_ERR(phy); } err = phy_device_register(phy); if (err) { phy_device_free(phy); dev_err(sfp->dev, "phy_device_register failed: %d\n", err); - return; + return err; } err = sfp_add_phy(sfp->sfp_bus, phy); @@ -1437,10 +1437,12 @@ static void sfp_sm_probe_phy(struct sfp *sfp, bool is_c45) phy_device_remove(phy); phy_device_free(phy); dev_err(sfp->dev, "sfp_add_phy failed: %d\n", err); - return; + return err; } sfp->mod_phy = phy; + + return 0; } static void sfp_sm_link_up(struct sfp *sfp) @@ -1513,21 +1515,24 @@ static void sfp_sm_fault(struct sfp *sfp, unsigned int next_state, bool warn) * Clause 45 copper SFP+ modules (10G) appear to switch their interface * mode according to the negotiated line speed. */ -static void sfp_sm_probe_for_phy(struct sfp *sfp) +static int sfp_sm_probe_for_phy(struct sfp *sfp) { + int err = 0; + switch (sfp->id.base.extended_cc) { case SFF8024_ECC_10GBASE_T_SFI: case SFF8024_ECC_10GBASE_T_SR: case SFF8024_ECC_5GBASE_T: case SFF8024_ECC_2_5GBASE_T: - sfp_sm_probe_phy(sfp, true); + err = sfp_sm_probe_phy(sfp, true); break; default: if (sfp->id.base.e1000_base_t) - sfp_sm_probe_phy(sfp, false); + err = sfp_sm_probe_phy(sfp, false); break; } + return err; } static int sfp_module_parse_power(struct sfp *sfp) @@ -1938,7 +1943,10 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) init_done: /* TX_FAULT deasserted or we timed out with TX_FAULT * clear. Probe for the PHY and check the LOS state. */ - sfp_sm_probe_for_phy(sfp); + if (sfp_sm_probe_for_phy(sfp)) { + sfp_sm_next(sfp, SFP_S_FAIL, 0); + break; + } if (sfp_module_start(sfp->sfp_bus)) { sfp_sm_next(sfp, SFP_S_FAIL, 0); break; From 1cb89a14c80a87abe1f5bd47b1e7b3171b968f8c Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 9 Dec 2019 14:16:11 +0000 Subject: [PATCH 4/4] net: sfp: re-attempt probing for phy Some 1000BASE-T PHY modules take a while for the PHY to wake up. Retry the probe a number of times before deciding that the module has no PHY. Tested with: Sourcephotonics SPGBTXCNFC - PHY takes less than 50ms to respond. Champion One 1000SFPT - PHY takes about 200ms to respond. Mikrotik S-RJ01 - no PHY Signed-off-by: Russell King Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/sfp.c | 63 ++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 76fa95e54542..e54aef921038 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -62,6 +62,7 @@ enum { SFP_S_FAIL, SFP_S_WAIT, SFP_S_INIT, + SFP_S_INIT_PHY, SFP_S_INIT_TX_FAULT, SFP_S_WAIT_LOS, SFP_S_LINK_UP, @@ -126,6 +127,7 @@ static const char * const sm_state_strings[] = { [SFP_S_FAIL] = "fail", [SFP_S_WAIT] = "wait", [SFP_S_INIT] = "init", + [SFP_S_INIT_PHY] = "init_phy", [SFP_S_INIT_TX_FAULT] = "init_tx_fault", [SFP_S_WAIT_LOS] = "wait_los", [SFP_S_LINK_UP] = "link_up", @@ -180,6 +182,12 @@ static const enum gpiod_flags gpio_flags[] = { #define N_FAULT_INIT 5 #define N_FAULT 5 +/* T_PHY_RETRY is the time interval between attempts to probe the PHY. + * R_PHY_RETRY is the number of attempts. + */ +#define T_PHY_RETRY msecs_to_jiffies(50) +#define R_PHY_RETRY 12 + /* SFP module presence detection is poor: the three MOD DEF signals are * the same length on the PCB, which means it's possible for MOD DEF 0 to * connect before the I2C bus on MOD DEF 1/2. @@ -235,6 +243,7 @@ struct sfp { unsigned char sm_dev_state; unsigned short sm_state; unsigned char sm_fault_retries; + unsigned char sm_phy_retries; struct sfp_eeprom_id id; unsigned int module_power_mW; @@ -1416,10 +1425,8 @@ static int sfp_sm_probe_phy(struct sfp *sfp, bool is_c45) int err; phy = get_phy_device(sfp->i2c_mii, SFP_PHY_ADDR, is_c45); - if (phy == ERR_PTR(-ENODEV)) { - dev_info(sfp->dev, "no PHY detected\n"); - return 0; - } + if (phy == ERR_PTR(-ENODEV)) + return PTR_ERR(phy); if (IS_ERR(phy)) { dev_err(sfp->dev, "mdiobus scan returned %ld\n", PTR_ERR(phy)); return PTR_ERR(phy); @@ -1867,6 +1874,7 @@ static void sfp_sm_module(struct sfp *sfp, unsigned int event) static void sfp_sm_main(struct sfp *sfp, unsigned int event) { unsigned long timeout; + int ret; /* Some events are global */ if (sfp->sm_state != SFP_S_DOWN && @@ -1940,24 +1948,41 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event) sfp_sm_fault(sfp, SFP_S_INIT_TX_FAULT, sfp->sm_fault_retries == N_FAULT_INIT); } else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) { - init_done: /* TX_FAULT deasserted or we timed out with TX_FAULT - * clear. Probe for the PHY and check the LOS state. - */ - if (sfp_sm_probe_for_phy(sfp)) { - sfp_sm_next(sfp, SFP_S_FAIL, 0); - break; - } - if (sfp_module_start(sfp->sfp_bus)) { - sfp_sm_next(sfp, SFP_S_FAIL, 0); - break; - } - sfp_sm_link_check_los(sfp); - - /* Reset the fault retry count */ - sfp->sm_fault_retries = N_FAULT; + init_done: + sfp->sm_phy_retries = R_PHY_RETRY; + goto phy_probe; } break; + case SFP_S_INIT_PHY: + if (event != SFP_E_TIMEOUT) + break; + phy_probe: + /* TX_FAULT deasserted or we timed out with TX_FAULT + * clear. Probe for the PHY and check the LOS state. + */ + ret = sfp_sm_probe_for_phy(sfp); + if (ret == -ENODEV) { + if (--sfp->sm_phy_retries) { + sfp_sm_next(sfp, SFP_S_INIT_PHY, T_PHY_RETRY); + break; + } else { + dev_info(sfp->dev, "no PHY detected\n"); + } + } else if (ret) { + sfp_sm_next(sfp, SFP_S_FAIL, 0); + break; + } + if (sfp_module_start(sfp->sfp_bus)) { + sfp_sm_next(sfp, SFP_S_FAIL, 0); + break; + } + sfp_sm_link_check_los(sfp); + + /* Reset the fault retry count */ + sfp->sm_fault_retries = N_FAULT; + break; + case SFP_S_INIT_TX_FAULT: if (event == SFP_E_TIMEOUT) { sfp_module_tx_fault_reset(sfp);