Source-Changes-HG archive

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]

[src/trunk]: src/sys/dev/pci/ixgbe Print ECC, PHY and temp error log using wi...



details:   https://anonhg.NetBSD.org/src/rev/85e1fd33628e
branches:  trunk
changeset: 363410:85e1fd33628e
user:      msaitoh <msaitoh%NetBSD.org@localhost>
date:      Thu Mar 10 04:14:34 2022 +0000

description:
Print ECC, PHY and temp error log using with ratecheck().

 - The ratecheck() is for just in case. All of the interrupts might occur
   only once, but I don't know whether it's true or not. For the fan failure,
   it seems it occurs only once.
 - All of the interval is 60s.

diffstat:

 sys/dev/pci/ixgbe/ixgbe.c |  73 +++++++++++++++++++++++++---------------------
 sys/dev/pci/ixgbe/ixgbe.h |   3 +-
 2 files changed, 41 insertions(+), 35 deletions(-)

diffs (151 lines):

diff -r 7ff5d402fbfe -r 85e1fd33628e sys/dev/pci/ixgbe/ixgbe.c
--- a/sys/dev/pci/ixgbe/ixgbe.c Thu Mar 10 04:00:32 2022 +0000
+++ b/sys/dev/pci/ixgbe/ixgbe.c Thu Mar 10 04:14:34 2022 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.c,v 1.311 2022/03/10 04:00:32 msaitoh Exp $ */
+/* $NetBSD: ixgbe.c,v 1.312 2022/03/10 04:14:34 msaitoh Exp $ */
 
 /******************************************************************************
 
@@ -64,7 +64,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ixgbe.c,v 1.311 2022/03/10 04:00:32 msaitoh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ixgbe.c,v 1.312 2022/03/10 04:14:34 msaitoh Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_inet.h"
@@ -424,6 +424,9 @@
 #endif
 #define IXGBE_WORKQUEUE_PRI PRI_SOFTNET
 
+/* Interval between reports of errors */
+static const struct timeval ixgbe_errlog_intrvl = { 60, 0 };   /* 60s */
+
 /************************************************************************
  * ixgbe_initialize_rss_mapping
  ************************************************************************/
@@ -3230,10 +3233,10 @@
 #endif
 
                if (eicr & IXGBE_EICR_ECC) {
-                       device_printf(adapter->dev,
-                           "CRITICAL: ECC ERROR!! Please Reboot!!\n");
-                       /* Disable interrupt to prevent log spam */
-                       *eims_disable |= IXGBE_EICR_ECC;
+                       if (ratecheck(&adapter->lasterr_time,
+                           &ixgbe_errlog_intrvl))
+                               device_printf(adapter->dev,
+                                   "CRITICAL: ECC ERROR!! Please Reboot!!\n");
                }
 
                /* Check for over temp condition */
@@ -3242,32 +3245,32 @@
                        case ixgbe_mac_X550EM_a:
                                if (!(eicr & IXGBE_EICR_GPI_SDP0_X550EM_a))
                                        break;
-                               /* Disable interrupt to prevent log spam */
-                               *eims_disable |= IXGBE_EICR_GPI_SDP0_X550EM_a;
-
                                retval = hw->phy.ops.check_overtemp(hw);
                                if (retval != IXGBE_ERR_OVERTEMP)
                                        break;
-                               device_printf(adapter->dev,
-                                   "CRITICAL: OVER TEMP!! "
-                                   "PHY IS SHUT DOWN!!\n");
-                               device_printf(adapter->dev,
-                                   "System shutdown required!\n");
+                               if (ratecheck(&adapter->lasterr_time,
+                                   &ixgbe_errlog_intrvl)) {
+                                       device_printf(adapter->dev,
+                                           "CRITICAL: OVER TEMP!! "
+                                           "PHY IS SHUT DOWN!!\n");
+                                       device_printf(adapter->dev,
+                                           "System shutdown required!\n");
+                               }
                                break;
                        default:
                                if (!(eicr & IXGBE_EICR_TS))
                                        break;
-                               /* Disable interrupt to prevent log spam */
-                               *eims_disable |= IXGBE_EIMS_TS;
-
                                retval = hw->phy.ops.check_overtemp(hw);
                                if (retval != IXGBE_ERR_OVERTEMP)
                                        break;
-                               device_printf(adapter->dev,
-                                   "CRITICAL: OVER TEMP!! "
-                                   "PHY IS SHUT DOWN!!\n");
-                               device_printf(adapter->dev,
-                                   "System shutdown required!\n");
+                               if (ratecheck(&adapter->lasterr_time,
+                                   &ixgbe_errlog_intrvl)) {
+                                       device_printf(adapter->dev,
+                                           "CRITICAL: OVER TEMP!! "
+                                           "PHY IS SHUT DOWN!!\n");
+                                       device_printf(adapter->dev,
+                                           "System shutdown required!\n");
+                               }
                                break;
                        }
                }
@@ -3281,13 +3284,8 @@
        }
 
        /* Check for fan failure */
-       if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL) {
-               retval = ixgbe_check_fan_failure(adapter, eicr, true);
-               if (retval == IXGBE_ERR_FAN_FAILURE) {
-                       /* Disable interrupt to prevent log spam */
-                       *eims_disable |= IXGBE_EIMS_GPI_SDP1_BY_MAC(hw);
-               }
-       }
+       if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL)
+               ixgbe_check_fan_failure(adapter, eicr, true);
 
        /* External PHY interrupt */
        if ((hw->phy.type == ixgbe_phy_x550em_ext_t) &&
@@ -6609,13 +6607,20 @@
        mask = (in_interrupt) ? IXGBE_EICR_GPI_SDP1_BY_MAC(&adapter->hw) :
            IXGBE_ESDP_SDP1;
 
-       if (reg & mask) {
+       if ((reg & mask) == 0)
+               return IXGBE_SUCCESS;
+
+       /*
+        * Use ratecheck() just in case interrupt occur frequently.
+        * When EXPX9501AT's fan stopped, interrupt occurred only once,
+        * an red LED on the board turned on and link never up until
+        * power off.
+        */
+       if (ratecheck(&adapter->lasterr_time, &ixgbe_errlog_intrvl))
                device_printf(adapter->dev,
                    "\nCRITICAL: FAN FAILURE!! REPLACE IMMEDIATELY!!\n");
-               return IXGBE_ERR_FAN_FAILURE;
-       }
-
-       return IXGBE_SUCCESS;
+
+       return IXGBE_ERR_FAN_FAILURE;
 } /* ixgbe_check_fan_failure */
 
 /************************************************************************
diff -r 7ff5d402fbfe -r 85e1fd33628e sys/dev/pci/ixgbe/ixgbe.h
--- a/sys/dev/pci/ixgbe/ixgbe.h Thu Mar 10 04:00:32 2022 +0000
+++ b/sys/dev/pci/ixgbe/ixgbe.h Thu Mar 10 04:14:34 2022 +0000
@@ -1,4 +1,4 @@
-/* $NetBSD: ixgbe.h,v 1.84 2021/12/10 11:33:11 msaitoh Exp $ */
+/* $NetBSD: ixgbe.h,v 1.85 2022/03/10 04:14:34 msaitoh Exp $ */
 
 /******************************************************************************
   SPDX-License-Identifier: BSD-3-Clause
@@ -645,6 +645,7 @@
 
        struct sysctllog        *sysctllog;
        const struct sysctlnode *sysctltop;
+       struct timeval          lasterr_time;
 };
 
 /* Precision Time Sync (IEEE 1588) defines */



Home | Main Index | Thread Index | Old Index