2006-04-02 17:51:53 +08:00
|
|
|
/*
|
|
|
|
* libata-eh.c - libata error handling
|
|
|
|
*
|
|
|
|
* Maintained by: Jeff Garzik <jgarzik@pobox.com>
|
|
|
|
* Please ALWAYS copy linux-ide@vger.kernel.org
|
|
|
|
* on emails.
|
|
|
|
*
|
|
|
|
* Copyright 2006 Tejun Heo <htejun@gmail.com>
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License as
|
|
|
|
* published by the Free Software Foundation; either version 2, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; see the file COPYING. If not, write to
|
|
|
|
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
|
|
|
|
* USA.
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* libata documentation is available via 'make {ps|pdf}docs',
|
|
|
|
* as Documentation/DocBook/libata.*
|
|
|
|
*
|
|
|
|
* Hardware documentation available from http://www.t13.org/ and
|
|
|
|
* http://www.sata-io.org/
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/kernel.h>
|
2007-10-12 05:12:35 +08:00
|
|
|
#include <linux/pci.h>
|
2006-04-02 17:51:53 +08:00
|
|
|
#include <scsi/scsi.h>
|
|
|
|
#include <scsi/scsi_host.h>
|
|
|
|
#include <scsi/scsi_eh.h>
|
|
|
|
#include <scsi/scsi_device.h>
|
|
|
|
#include <scsi/scsi_cmnd.h>
|
2006-08-10 19:31:37 +08:00
|
|
|
#include "../scsi/scsi_transport_api.h"
|
2006-04-02 17:51:53 +08:00
|
|
|
|
|
|
|
#include <linux/libata.h>
|
|
|
|
|
|
|
|
#include "libata.h"
|
|
|
|
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
enum {
|
2007-11-27 18:28:56 +08:00
|
|
|
/* speed down verdicts */
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
ATA_EH_SPDN_NCQ_OFF = (1 << 0),
|
|
|
|
ATA_EH_SPDN_SPEED_DOWN = (1 << 1),
|
|
|
|
ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2),
|
2007-11-27 18:28:59 +08:00
|
|
|
ATA_EH_SPDN_KEEP_ERRORS = (1 << 3),
|
2007-11-27 18:28:56 +08:00
|
|
|
|
|
|
|
/* error flags */
|
|
|
|
ATA_EFLAG_IS_IO = (1 << 0),
|
2007-11-27 18:28:59 +08:00
|
|
|
ATA_EFLAG_DUBIOUS_XFER = (1 << 1),
|
2007-11-27 18:28:56 +08:00
|
|
|
|
|
|
|
/* error categories */
|
|
|
|
ATA_ECAT_NONE = 0,
|
|
|
|
ATA_ECAT_ATA_BUS = 1,
|
|
|
|
ATA_ECAT_TOUT_HSM = 2,
|
|
|
|
ATA_ECAT_UNK_DEV = 3,
|
2008-01-03 00:21:14 +08:00
|
|
|
ATA_ECAT_DUBIOUS_NONE = 4,
|
|
|
|
ATA_ECAT_DUBIOUS_ATA_BUS = 5,
|
|
|
|
ATA_ECAT_DUBIOUS_TOUT_HSM = 6,
|
|
|
|
ATA_ECAT_DUBIOUS_UNK_DEV = 7,
|
|
|
|
ATA_ECAT_NR = 8,
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
};
|
|
|
|
|
2007-02-02 15:50:52 +08:00
|
|
|
/* Waiting in ->prereset can never be reliable. It's sometimes nice
|
|
|
|
* to wait there but it can't be depended upon; otherwise, we wouldn't
|
|
|
|
* be resetting. Just give it enough time for most drives to spin up.
|
|
|
|
*/
|
|
|
|
enum {
|
|
|
|
ATA_EH_PRERESET_TIMEOUT = 10 * HZ,
|
2007-07-16 13:29:41 +08:00
|
|
|
ATA_EH_FASTDRAIN_INTERVAL = 3 * HZ,
|
2007-02-02 15:50:52 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* The following table determines how we sequence resets. Each entry
|
|
|
|
* represents timeout for that try. The first try can be soft or
|
|
|
|
* hardreset. All others are hardreset if available. In most cases
|
|
|
|
* the first reset w/ 10sec timeout should succeed. Following entries
|
|
|
|
* are mostly for error handling, hotplug and retarded devices.
|
|
|
|
*/
|
|
|
|
static const unsigned long ata_eh_reset_timeouts[] = {
|
|
|
|
10 * HZ, /* most drives spin up by 10sec */
|
|
|
|
10 * HZ, /* > 99% working drives spin up before 20sec */
|
|
|
|
35 * HZ, /* give > 30 secs of idleness for retarded devices */
|
|
|
|
5 * HZ, /* and sweet one last chance */
|
|
|
|
/* > 1 min has elapsed, give up */
|
|
|
|
};
|
|
|
|
|
2006-05-15 19:58:12 +08:00
|
|
|
static void __ata_port_freeze(struct ata_port *ap);
|
2007-03-02 16:32:47 +08:00
|
|
|
#ifdef CONFIG_PM
|
2006-07-03 15:07:27 +08:00
|
|
|
static void ata_eh_handle_port_suspend(struct ata_port *ap);
|
|
|
|
static void ata_eh_handle_port_resume(struct ata_port *ap);
|
2007-03-02 16:32:47 +08:00
|
|
|
#else /* CONFIG_PM */
|
|
|
|
static void ata_eh_handle_port_suspend(struct ata_port *ap)
|
|
|
|
{ }
|
|
|
|
|
|
|
|
static void ata_eh_handle_port_resume(struct ata_port *ap)
|
|
|
|
{ }
|
|
|
|
#endif /* CONFIG_PM */
|
2006-05-15 19:58:12 +08:00
|
|
|
|
2007-07-16 13:29:39 +08:00
|
|
|
static void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, const char *fmt,
|
|
|
|
va_list args)
|
|
|
|
{
|
|
|
|
ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len,
|
|
|
|
ATA_EH_DESC_LEN - ehi->desc_len,
|
|
|
|
fmt, args);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* __ata_ehi_push_desc - push error description without adding separator
|
|
|
|
* @ehi: target EHI
|
|
|
|
* @fmt: printf format string
|
|
|
|
*
|
|
|
|
* Format string according to @fmt and append it to @ehi->desc.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* spin_lock_irqsave(host lock)
|
|
|
|
*/
|
|
|
|
void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
__ata_ehi_pushv_desc(ehi, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ata_ehi_push_desc - push error description with separator
|
|
|
|
* @ehi: target EHI
|
|
|
|
* @fmt: printf format string
|
|
|
|
*
|
|
|
|
* Format string according to @fmt and append it to @ehi->desc.
|
|
|
|
* If @ehi->desc is not empty, ", " is added in-between.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* spin_lock_irqsave(host lock)
|
|
|
|
*/
|
|
|
|
void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
if (ehi->desc_len)
|
|
|
|
__ata_ehi_push_desc(ehi, ", ");
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
__ata_ehi_pushv_desc(ehi, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ata_ehi_clear_desc - clean error description
|
|
|
|
* @ehi: target EHI
|
|
|
|
*
|
|
|
|
* Clear @ehi->desc.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* spin_lock_irqsave(host lock)
|
|
|
|
*/
|
|
|
|
void ata_ehi_clear_desc(struct ata_eh_info *ehi)
|
|
|
|
{
|
|
|
|
ehi->desc[0] = '\0';
|
|
|
|
ehi->desc_len = 0;
|
|
|
|
}
|
|
|
|
|
2007-08-18 12:14:55 +08:00
|
|
|
/**
|
|
|
|
* ata_port_desc - append port description
|
|
|
|
* @ap: target ATA port
|
|
|
|
* @fmt: printf format string
|
|
|
|
*
|
|
|
|
* Format string according to @fmt and append it to port
|
|
|
|
* description. If port description is not empty, " " is added
|
|
|
|
* in-between. This function is to be used while initializing
|
|
|
|
* ata_host. The description is printed on host registration.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* None.
|
|
|
|
*/
|
|
|
|
void ata_port_desc(struct ata_port *ap, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING));
|
|
|
|
|
|
|
|
if (ap->link.eh_info.desc_len)
|
|
|
|
__ata_ehi_push_desc(&ap->link.eh_info, " ");
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
__ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_PCI
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ata_port_pbar_desc - append PCI BAR description
|
|
|
|
* @ap: target ATA port
|
|
|
|
* @bar: target PCI BAR
|
|
|
|
* @offset: offset into PCI BAR
|
|
|
|
* @name: name of the area
|
|
|
|
*
|
|
|
|
* If @offset is negative, this function formats a string which
|
|
|
|
* contains the name, address, size and type of the BAR and
|
|
|
|
* appends it to the port description. If @offset is zero or
|
|
|
|
* positive, only name and offsetted address is appended.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* None.
|
|
|
|
*/
|
|
|
|
void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset,
|
|
|
|
const char *name)
|
|
|
|
{
|
|
|
|
struct pci_dev *pdev = to_pci_dev(ap->host->dev);
|
|
|
|
char *type = "";
|
|
|
|
unsigned long long start, len;
|
|
|
|
|
|
|
|
if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM)
|
|
|
|
type = "m";
|
|
|
|
else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO)
|
|
|
|
type = "i";
|
|
|
|
|
|
|
|
start = (unsigned long long)pci_resource_start(pdev, bar);
|
|
|
|
len = (unsigned long long)pci_resource_len(pdev, bar);
|
|
|
|
|
|
|
|
if (offset < 0)
|
|
|
|
ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start);
|
|
|
|
else
|
2007-12-14 08:01:37 +08:00
|
|
|
ata_port_desc(ap, "%s 0x%llx", name,
|
|
|
|
start + (unsigned long long)offset);
|
2007-08-18 12:14:55 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* CONFIG_PCI */
|
|
|
|
|
2007-11-27 18:28:56 +08:00
|
|
|
static void ata_ering_record(struct ata_ering *ering, unsigned int eflags,
|
2006-05-15 19:58:19 +08:00
|
|
|
unsigned int err_mask)
|
|
|
|
{
|
|
|
|
struct ata_ering_entry *ent;
|
|
|
|
|
|
|
|
WARN_ON(!err_mask);
|
|
|
|
|
|
|
|
ering->cursor++;
|
|
|
|
ering->cursor %= ATA_ERING_SIZE;
|
|
|
|
|
|
|
|
ent = &ering->ring[ering->cursor];
|
2007-11-27 18:28:56 +08:00
|
|
|
ent->eflags = eflags;
|
2006-05-15 19:58:19 +08:00
|
|
|
ent->err_mask = err_mask;
|
|
|
|
ent->timestamp = get_jiffies_64();
|
|
|
|
}
|
|
|
|
|
2007-11-27 18:28:59 +08:00
|
|
|
static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering)
|
|
|
|
{
|
|
|
|
struct ata_ering_entry *ent = &ering->ring[ering->cursor];
|
|
|
|
|
|
|
|
if (ent->err_mask)
|
|
|
|
return ent;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
static void ata_ering_clear(struct ata_ering *ering)
|
2006-05-15 19:58:19 +08:00
|
|
|
{
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
memset(ering, 0, sizeof(*ering));
|
2006-05-15 19:58:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int ata_ering_map(struct ata_ering *ering,
|
|
|
|
int (*map_fn)(struct ata_ering_entry *, void *),
|
|
|
|
void *arg)
|
|
|
|
{
|
|
|
|
int idx, rc = 0;
|
|
|
|
struct ata_ering_entry *ent;
|
|
|
|
|
|
|
|
idx = ering->cursor;
|
|
|
|
do {
|
|
|
|
ent = &ering->ring[idx];
|
|
|
|
if (!ent->err_mask)
|
|
|
|
break;
|
|
|
|
rc = map_fn(ent, arg);
|
|
|
|
if (rc)
|
|
|
|
break;
|
|
|
|
idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE;
|
|
|
|
} while (idx != ering->cursor);
|
|
|
|
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2006-06-24 19:30:18 +08:00
|
|
|
static unsigned int ata_eh_dev_action(struct ata_device *dev)
|
|
|
|
{
|
2007-08-06 17:36:22 +08:00
|
|
|
struct ata_eh_context *ehc = &dev->link->eh_context;
|
2006-06-24 19:30:18 +08:00
|
|
|
|
|
|
|
return ehc->i.action | ehc->i.dev_action[dev->devno];
|
|
|
|
}
|
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev,
|
2006-06-24 19:30:18 +08:00
|
|
|
struct ata_eh_info *ehi, unsigned int action)
|
|
|
|
{
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_device *tdev;
|
2006-06-24 19:30:18 +08:00
|
|
|
|
|
|
|
if (!dev) {
|
|
|
|
ehi->action &= ~action;
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_link_for_each_dev(tdev, link)
|
|
|
|
ehi->dev_action[tdev->devno] &= ~action;
|
2006-06-24 19:30:18 +08:00
|
|
|
} else {
|
|
|
|
/* doesn't make sense for port-wide EH actions */
|
|
|
|
WARN_ON(!(action & ATA_EH_PERDEV_MASK));
|
|
|
|
|
|
|
|
/* break ehi->action into ehi->dev_action */
|
|
|
|
if (ehi->action & action) {
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_link_for_each_dev(tdev, link)
|
|
|
|
ehi->dev_action[tdev->devno] |=
|
|
|
|
ehi->action & action;
|
2006-06-24 19:30:18 +08:00
|
|
|
ehi->action &= ~action;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* turn off the specified per-dev action */
|
|
|
|
ehi->dev_action[dev->devno] &= ~action;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-04-02 17:51:53 +08:00
|
|
|
/**
|
|
|
|
* ata_scsi_timed_out - SCSI layer time out callback
|
|
|
|
* @cmd: timed out SCSI command
|
|
|
|
*
|
|
|
|
* Handles SCSI layer timeout. We race with normal completion of
|
|
|
|
* the qc for @cmd. If the qc is already gone, we lose and let
|
|
|
|
* the scsi command finish (EH_HANDLED). Otherwise, the qc has
|
|
|
|
* timed out and EH should be invoked. Prevent ata_qc_complete()
|
|
|
|
* from finishing it by setting EH_SCHEDULED and return
|
|
|
|
* EH_NOT_HANDLED.
|
|
|
|
*
|
2006-05-15 19:58:12 +08:00
|
|
|
* TODO: kill this function once old EH is gone.
|
|
|
|
*
|
2006-04-02 17:51:53 +08:00
|
|
|
* LOCKING:
|
|
|
|
* Called from timer context
|
|
|
|
*
|
|
|
|
* RETURNS:
|
|
|
|
* EH_HANDLED or EH_NOT_HANDLED
|
|
|
|
*/
|
|
|
|
enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
|
|
|
|
{
|
|
|
|
struct Scsi_Host *host = cmd->device->host;
|
2006-04-12 01:12:34 +08:00
|
|
|
struct ata_port *ap = ata_shost_to_port(host);
|
2006-04-02 17:51:53 +08:00
|
|
|
unsigned long flags;
|
|
|
|
struct ata_queued_cmd *qc;
|
2006-05-15 19:58:12 +08:00
|
|
|
enum scsi_eh_timer_return ret;
|
2006-04-02 17:51:53 +08:00
|
|
|
|
|
|
|
DPRINTK("ENTER\n");
|
|
|
|
|
2006-05-15 19:58:12 +08:00
|
|
|
if (ap->ops->error_handler) {
|
|
|
|
ret = EH_NOT_HANDLED;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = EH_HANDLED;
|
2006-06-23 11:46:10 +08:00
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
2007-08-06 17:36:22 +08:00
|
|
|
qc = ata_qc_from_tag(ap, ap->link.active_tag);
|
2006-04-02 17:51:53 +08:00
|
|
|
if (qc) {
|
|
|
|
WARN_ON(qc->scsicmd != cmd);
|
|
|
|
qc->flags |= ATA_QCFLAG_EH_SCHEDULED;
|
|
|
|
qc->err_mask |= AC_ERR_TIMEOUT;
|
|
|
|
ret = EH_NOT_HANDLED;
|
|
|
|
}
|
2006-06-23 11:46:10 +08:00
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
2006-04-02 17:51:53 +08:00
|
|
|
|
2006-05-15 19:58:12 +08:00
|
|
|
out:
|
2006-04-02 17:51:53 +08:00
|
|
|
DPRINTK("EXIT, ret=%d\n", ret);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ata_scsi_error - SCSI layer error handler callback
|
|
|
|
* @host: SCSI host on which error occurred
|
|
|
|
*
|
|
|
|
* Handles SCSI-layer-thrown error events.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* Inherited from SCSI layer (none, can sleep)
|
|
|
|
*
|
|
|
|
* RETURNS:
|
|
|
|
* Zero.
|
|
|
|
*/
|
2006-04-12 01:04:39 +08:00
|
|
|
void ata_scsi_error(struct Scsi_Host *host)
|
2006-04-02 17:51:53 +08:00
|
|
|
{
|
2006-04-12 01:12:34 +08:00
|
|
|
struct ata_port *ap = ata_shost_to_port(host);
|
2007-08-18 12:28:49 +08:00
|
|
|
int i;
|
2006-05-15 19:58:12 +08:00
|
|
|
unsigned long flags;
|
2006-04-02 17:51:53 +08:00
|
|
|
|
|
|
|
DPRINTK("ENTER\n");
|
|
|
|
|
2006-05-15 19:58:12 +08:00
|
|
|
/* synchronize with port task */
|
2006-04-02 17:51:53 +08:00
|
|
|
ata_port_flush_task(ap);
|
|
|
|
|
2006-08-24 15:19:22 +08:00
|
|
|
/* synchronize with host lock and sort out timeouts */
|
2006-05-15 19:58:12 +08:00
|
|
|
|
|
|
|
/* For new EH, all qcs are finished in one of three ways -
|
|
|
|
* normal completion, error completion, and SCSI timeout.
|
|
|
|
* Both cmpletions can race against SCSI timeout. When normal
|
|
|
|
* completion wins, the qc never reaches EH. When error
|
|
|
|
* completion wins, the qc has ATA_QCFLAG_FAILED set.
|
|
|
|
*
|
|
|
|
* When SCSI timeout wins, things are a bit more complex.
|
|
|
|
* Normal or error completion can occur after the timeout but
|
|
|
|
* before this point. In such cases, both types of
|
|
|
|
* completions are honored. A scmd is determined to have
|
|
|
|
* timed out iff its associated qc is active and not failed.
|
|
|
|
*/
|
|
|
|
if (ap->ops->error_handler) {
|
|
|
|
struct scsi_cmnd *scmd, *tmp;
|
|
|
|
int nr_timedout = 0;
|
|
|
|
|
2006-07-03 02:02:15 +08:00
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
2006-05-15 19:58:12 +08:00
|
|
|
|
|
|
|
list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) {
|
|
|
|
struct ata_queued_cmd *qc;
|
|
|
|
|
|
|
|
for (i = 0; i < ATA_MAX_QUEUE; i++) {
|
|
|
|
qc = __ata_qc_from_tag(ap, i);
|
|
|
|
if (qc->flags & ATA_QCFLAG_ACTIVE &&
|
|
|
|
qc->scsicmd == scmd)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i < ATA_MAX_QUEUE) {
|
|
|
|
/* the scmd has an associated qc */
|
|
|
|
if (!(qc->flags & ATA_QCFLAG_FAILED)) {
|
|
|
|
/* which hasn't failed yet, timeout */
|
|
|
|
qc->err_mask |= AC_ERR_TIMEOUT;
|
|
|
|
qc->flags |= ATA_QCFLAG_FAILED;
|
|
|
|
nr_timedout++;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* Normal completion occurred after
|
|
|
|
* SCSI timeout but before this point.
|
|
|
|
* Successfully complete it.
|
|
|
|
*/
|
|
|
|
scmd->retries = scmd->allowed;
|
|
|
|
scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we have timed out qcs. They belong to EH from
|
|
|
|
* this point but the state of the controller is
|
|
|
|
* unknown. Freeze the port to make sure the IRQ
|
|
|
|
* handler doesn't diddle with those qcs. This must
|
|
|
|
* be done atomically w.r.t. setting QCFLAG_FAILED.
|
|
|
|
*/
|
|
|
|
if (nr_timedout)
|
|
|
|
__ata_port_freeze(ap);
|
|
|
|
|
2006-07-03 02:02:15 +08:00
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
2007-08-18 12:28:49 +08:00
|
|
|
|
|
|
|
/* initialize eh_tries */
|
|
|
|
ap->eh_tries = ATA_EH_MAX_TRIES;
|
2006-05-15 19:58:12 +08:00
|
|
|
} else
|
2006-07-03 02:02:15 +08:00
|
|
|
spin_unlock_wait(ap->lock);
|
2006-05-15 19:58:12 +08:00
|
|
|
|
|
|
|
repeat:
|
|
|
|
/* invoke error handler */
|
|
|
|
if (ap->ops->error_handler) {
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_link *link;
|
|
|
|
|
2007-07-16 13:29:41 +08:00
|
|
|
/* kill fast drain timer */
|
|
|
|
del_timer_sync(&ap->fastdrain_timer);
|
|
|
|
|
2006-07-03 15:07:27 +08:00
|
|
|
/* process port resume request */
|
|
|
|
ata_eh_handle_port_resume(ap);
|
|
|
|
|
2006-05-15 19:58:21 +08:00
|
|
|
/* fetch & clear EH info */
|
2006-07-03 02:02:15 +08:00
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
2006-05-15 19:58:21 +08:00
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
__ata_port_for_each_link(link, ap) {
|
2007-11-27 18:28:58 +08:00
|
|
|
struct ata_eh_context *ehc = &link->eh_context;
|
|
|
|
struct ata_device *dev;
|
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
memset(&link->eh_context, 0, sizeof(link->eh_context));
|
|
|
|
link->eh_context.i = link->eh_info;
|
|
|
|
memset(&link->eh_info, 0, sizeof(link->eh_info));
|
2007-11-27 18:28:58 +08:00
|
|
|
|
|
|
|
ata_link_for_each_dev(dev, link) {
|
|
|
|
int devno = dev->devno;
|
|
|
|
|
|
|
|
ehc->saved_xfer_mode[devno] = dev->xfer_mode;
|
|
|
|
if (ata_ncq_enabled(dev))
|
|
|
|
ehc->saved_ncq_enabled |= 1 << devno;
|
|
|
|
}
|
2007-08-06 17:36:23 +08:00
|
|
|
}
|
2006-05-15 19:58:21 +08:00
|
|
|
|
2006-06-29 00:29:30 +08:00
|
|
|
ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
|
|
|
|
ap->pflags &= ~ATA_PFLAG_EH_PENDING;
|
2007-09-23 12:14:12 +08:00
|
|
|
ap->excl_link = NULL; /* don't maintain exclusion over EH */
|
2006-05-15 19:58:21 +08:00
|
|
|
|
2006-07-03 02:02:15 +08:00
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
2006-05-15 19:58:12 +08:00
|
|
|
|
2006-07-03 15:07:27 +08:00
|
|
|
/* invoke EH, skip if unloading or suspended */
|
|
|
|
if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
|
2006-05-31 17:28:13 +08:00
|
|
|
ap->ops->error_handler(ap);
|
|
|
|
else
|
|
|
|
ata_eh_finish(ap);
|
2006-05-15 19:58:12 +08:00
|
|
|
|
2006-07-03 15:07:27 +08:00
|
|
|
/* process port suspend request */
|
|
|
|
ata_eh_handle_port_suspend(ap);
|
|
|
|
|
2006-05-15 19:58:12 +08:00
|
|
|
/* Exception might have happend after ->error_handler
|
|
|
|
* recovered the port but before this point. Repeat
|
|
|
|
* EH in such case.
|
|
|
|
*/
|
2006-07-03 02:02:15 +08:00
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
2006-05-15 19:58:12 +08:00
|
|
|
|
2006-06-29 00:29:30 +08:00
|
|
|
if (ap->pflags & ATA_PFLAG_EH_PENDING) {
|
2007-08-18 12:28:49 +08:00
|
|
|
if (--ap->eh_tries) {
|
2006-07-03 02:02:15 +08:00
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
2006-05-15 19:58:12 +08:00
|
|
|
goto repeat;
|
|
|
|
}
|
|
|
|
ata_port_printk(ap, KERN_ERR, "EH pending after %d "
|
2007-08-18 12:28:49 +08:00
|
|
|
"tries, giving up\n", ATA_EH_MAX_TRIES);
|
2007-06-25 20:47:11 +08:00
|
|
|
ap->pflags &= ~ATA_PFLAG_EH_PENDING;
|
2006-05-15 19:58:12 +08:00
|
|
|
}
|
|
|
|
|
2006-05-15 19:58:21 +08:00
|
|
|
/* this run is complete, make sure EH info is clear */
|
2007-08-06 17:36:23 +08:00
|
|
|
__ata_port_for_each_link(link, ap)
|
|
|
|
memset(&link->eh_info, 0, sizeof(link->eh_info));
|
2006-05-15 19:58:21 +08:00
|
|
|
|
2006-07-03 02:02:15 +08:00
|
|
|
/* Clear host_eh_scheduled while holding ap->lock such
|
2006-05-15 19:58:12 +08:00
|
|
|
* that if exception occurs after this point but
|
|
|
|
* before EH completion, SCSI midlayer will
|
|
|
|
* re-initiate EH.
|
|
|
|
*/
|
|
|
|
host->host_eh_scheduled = 0;
|
|
|
|
|
2006-07-03 02:02:15 +08:00
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
2006-05-15 19:58:12 +08:00
|
|
|
} else {
|
2007-08-06 17:36:22 +08:00
|
|
|
WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL);
|
2006-05-15 19:58:12 +08:00
|
|
|
ap->ops->eng_timeout(ap);
|
|
|
|
}
|
2006-04-02 17:51:53 +08:00
|
|
|
|
2006-05-15 19:58:12 +08:00
|
|
|
/* finish or retry handled scmd's and clean up */
|
2006-04-02 17:51:53 +08:00
|
|
|
WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q));
|
|
|
|
|
|
|
|
scsi_eh_flush_done_q(&ap->eh_done_q);
|
|
|
|
|
2006-05-15 19:58:12 +08:00
|
|
|
/* clean up */
|
2006-07-03 02:02:15 +08:00
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
2006-05-15 19:58:12 +08:00
|
|
|
|
2006-07-03 15:07:26 +08:00
|
|
|
if (ap->pflags & ATA_PFLAG_LOADING)
|
2006-06-29 00:29:30 +08:00
|
|
|
ap->pflags &= ~ATA_PFLAG_LOADING;
|
2006-07-03 15:07:26 +08:00
|
|
|
else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG)
|
2006-11-22 22:54:01 +08:00
|
|
|
queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0);
|
2006-07-03 15:07:26 +08:00
|
|
|
|
|
|
|
if (ap->pflags & ATA_PFLAG_RECOVERED)
|
|
|
|
ata_port_printk(ap, KERN_INFO, "EH complete\n");
|
2006-05-31 17:28:05 +08:00
|
|
|
|
2006-06-29 00:29:30 +08:00
|
|
|
ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED);
|
2006-05-15 19:58:12 +08:00
|
|
|
|
2006-05-31 17:27:27 +08:00
|
|
|
/* tell wait_eh that we're done */
|
2006-06-29 00:29:30 +08:00
|
|
|
ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS;
|
2006-05-31 17:27:27 +08:00
|
|
|
wake_up_all(&ap->eh_wait_q);
|
|
|
|
|
2006-07-03 02:02:15 +08:00
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
2006-05-15 19:58:12 +08:00
|
|
|
|
2006-04-02 17:51:53 +08:00
|
|
|
DPRINTK("EXIT\n");
|
|
|
|
}
|
|
|
|
|
2006-05-31 17:27:27 +08:00
|
|
|
/**
|
|
|
|
* ata_port_wait_eh - Wait for the currently pending EH to complete
|
|
|
|
* @ap: Port to wait EH for
|
|
|
|
*
|
|
|
|
* Wait until the currently pending EH is complete.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* Kernel thread context (may sleep).
|
|
|
|
*/
|
|
|
|
void ata_port_wait_eh(struct ata_port *ap)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
DEFINE_WAIT(wait);
|
|
|
|
|
|
|
|
retry:
|
2006-06-23 11:46:10 +08:00
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
2006-05-31 17:27:27 +08:00
|
|
|
|
2006-06-29 00:29:30 +08:00
|
|
|
while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) {
|
2006-05-31 17:27:27 +08:00
|
|
|
prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE);
|
2006-06-23 11:46:10 +08:00
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
2006-05-31 17:27:27 +08:00
|
|
|
schedule();
|
2006-06-23 11:46:10 +08:00
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
2006-05-31 17:27:27 +08:00
|
|
|
}
|
2006-06-11 10:01:38 +08:00
|
|
|
finish_wait(&ap->eh_wait_q, &wait);
|
2006-05-31 17:27:27 +08:00
|
|
|
|
2006-06-23 11:46:10 +08:00
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
2006-05-31 17:27:27 +08:00
|
|
|
|
|
|
|
/* make sure SCSI EH is complete */
|
2006-08-24 15:19:22 +08:00
|
|
|
if (scsi_host_in_recovery(ap->scsi_host)) {
|
2006-05-31 17:27:27 +08:00
|
|
|
msleep(10);
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-07-16 13:29:41 +08:00
|
|
|
static int ata_eh_nr_in_flight(struct ata_port *ap)
|
|
|
|
{
|
|
|
|
unsigned int tag;
|
|
|
|
int nr = 0;
|
|
|
|
|
|
|
|
/* count only non-internal commands */
|
|
|
|
for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++)
|
|
|
|
if (ata_qc_from_tag(ap, tag))
|
|
|
|
nr++;
|
|
|
|
|
|
|
|
return nr;
|
|
|
|
}
|
|
|
|
|
|
|
|
void ata_eh_fastdrain_timerfn(unsigned long arg)
|
|
|
|
{
|
|
|
|
struct ata_port *ap = (void *)arg;
|
|
|
|
unsigned long flags;
|
|
|
|
int cnt;
|
|
|
|
|
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
|
|
|
|
|
|
|
cnt = ata_eh_nr_in_flight(ap);
|
|
|
|
|
|
|
|
/* are we done? */
|
|
|
|
if (!cnt)
|
|
|
|
goto out_unlock;
|
|
|
|
|
|
|
|
if (cnt == ap->fastdrain_cnt) {
|
|
|
|
unsigned int tag;
|
|
|
|
|
|
|
|
/* No progress during the last interval, tag all
|
|
|
|
* in-flight qcs as timed out and freeze the port.
|
|
|
|
*/
|
|
|
|
for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) {
|
|
|
|
struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag);
|
|
|
|
if (qc)
|
|
|
|
qc->err_mask |= AC_ERR_TIMEOUT;
|
|
|
|
}
|
|
|
|
|
|
|
|
ata_port_freeze(ap);
|
|
|
|
} else {
|
|
|
|
/* some qcs have finished, give it another chance */
|
|
|
|
ap->fastdrain_cnt = cnt;
|
|
|
|
ap->fastdrain_timer.expires =
|
|
|
|
jiffies + ATA_EH_FASTDRAIN_INTERVAL;
|
|
|
|
add_timer(&ap->fastdrain_timer);
|
|
|
|
}
|
|
|
|
|
|
|
|
out_unlock:
|
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain
|
|
|
|
* @ap: target ATA port
|
|
|
|
* @fastdrain: activate fast drain
|
|
|
|
*
|
|
|
|
* Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain
|
|
|
|
* is non-zero and EH wasn't pending before. Fast drain ensures
|
|
|
|
* that EH kicks in in timely manner.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* spin_lock_irqsave(host lock)
|
|
|
|
*/
|
|
|
|
static void ata_eh_set_pending(struct ata_port *ap, int fastdrain)
|
|
|
|
{
|
|
|
|
int cnt;
|
|
|
|
|
|
|
|
/* already scheduled? */
|
|
|
|
if (ap->pflags & ATA_PFLAG_EH_PENDING)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ap->pflags |= ATA_PFLAG_EH_PENDING;
|
|
|
|
|
|
|
|
if (!fastdrain)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* do we have in-flight qcs? */
|
|
|
|
cnt = ata_eh_nr_in_flight(ap);
|
|
|
|
if (!cnt)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* activate fast drain */
|
|
|
|
ap->fastdrain_cnt = cnt;
|
|
|
|
ap->fastdrain_timer.expires = jiffies + ATA_EH_FASTDRAIN_INTERVAL;
|
|
|
|
add_timer(&ap->fastdrain_timer);
|
|
|
|
}
|
|
|
|
|
2006-05-15 19:58:05 +08:00
|
|
|
/**
|
|
|
|
* ata_qc_schedule_eh - schedule qc for error handling
|
|
|
|
* @qc: command to schedule error handling for
|
|
|
|
*
|
|
|
|
* Schedule error handling for @qc. EH will kick in as soon as
|
|
|
|
* other commands are drained.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
2006-08-24 15:19:22 +08:00
|
|
|
* spin_lock_irqsave(host lock)
|
2006-05-15 19:58:05 +08:00
|
|
|
*/
|
|
|
|
void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
|
|
|
|
{
|
|
|
|
struct ata_port *ap = qc->ap;
|
|
|
|
|
|
|
|
WARN_ON(!ap->ops->error_handler);
|
|
|
|
|
|
|
|
qc->flags |= ATA_QCFLAG_FAILED;
|
2007-07-16 13:29:41 +08:00
|
|
|
ata_eh_set_pending(ap, 1);
|
2006-05-15 19:58:05 +08:00
|
|
|
|
|
|
|
/* The following will fail if timeout has already expired.
|
|
|
|
* ata_scsi_error() takes care of such scmds on EH entry.
|
|
|
|
* Note that ATA_QCFLAG_FAILED is unconditionally set after
|
|
|
|
* this function completes.
|
|
|
|
*/
|
|
|
|
scsi_req_abort_cmd(qc->scsicmd);
|
|
|
|
}
|
|
|
|
|
2006-05-15 19:58:07 +08:00
|
|
|
/**
|
|
|
|
* ata_port_schedule_eh - schedule error handling without a qc
|
|
|
|
* @ap: ATA port to schedule EH for
|
|
|
|
*
|
|
|
|
* Schedule error handling for @ap. EH will kick in as soon as
|
|
|
|
* all commands are drained.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
2006-08-24 15:19:22 +08:00
|
|
|
* spin_lock_irqsave(host lock)
|
2006-05-15 19:58:07 +08:00
|
|
|
*/
|
|
|
|
void ata_port_schedule_eh(struct ata_port *ap)
|
|
|
|
{
|
|
|
|
WARN_ON(!ap->ops->error_handler);
|
|
|
|
|
2007-05-01 17:50:15 +08:00
|
|
|
if (ap->pflags & ATA_PFLAG_INITIALIZING)
|
|
|
|
return;
|
|
|
|
|
2007-07-16 13:29:41 +08:00
|
|
|
ata_eh_set_pending(ap, 1);
|
2006-08-24 15:19:22 +08:00
|
|
|
scsi_schedule_eh(ap->scsi_host);
|
2006-05-15 19:58:07 +08:00
|
|
|
|
|
|
|
DPRINTK("port EH scheduled\n");
|
|
|
|
}
|
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link)
|
2006-05-15 19:58:07 +08:00
|
|
|
{
|
|
|
|
int tag, nr_aborted = 0;
|
|
|
|
|
|
|
|
WARN_ON(!ap->ops->error_handler);
|
|
|
|
|
2007-07-16 13:29:41 +08:00
|
|
|
/* we're gonna abort all commands, no need for fast drain */
|
|
|
|
ata_eh_set_pending(ap, 0);
|
|
|
|
|
2006-05-15 19:58:07 +08:00
|
|
|
for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
|
|
|
|
struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag);
|
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
if (qc && (!link || qc->dev->link == link)) {
|
2006-05-15 19:58:07 +08:00
|
|
|
qc->flags |= ATA_QCFLAG_FAILED;
|
|
|
|
ata_qc_complete(qc);
|
|
|
|
nr_aborted++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!nr_aborted)
|
|
|
|
ata_port_schedule_eh(ap);
|
|
|
|
|
|
|
|
return nr_aborted;
|
|
|
|
}
|
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
/**
|
|
|
|
* ata_link_abort - abort all qc's on the link
|
|
|
|
* @link: ATA link to abort qc's for
|
|
|
|
*
|
|
|
|
* Abort all active qc's active on @link and schedule EH.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* spin_lock_irqsave(host lock)
|
|
|
|
*
|
|
|
|
* RETURNS:
|
|
|
|
* Number of aborted qc's.
|
|
|
|
*/
|
|
|
|
int ata_link_abort(struct ata_link *link)
|
|
|
|
{
|
|
|
|
return ata_do_link_abort(link->ap, link);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ata_port_abort - abort all qc's on the port
|
|
|
|
* @ap: ATA port to abort qc's for
|
|
|
|
*
|
|
|
|
* Abort all active qc's of @ap and schedule EH.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* spin_lock_irqsave(host_set lock)
|
|
|
|
*
|
|
|
|
* RETURNS:
|
|
|
|
* Number of aborted qc's.
|
|
|
|
*/
|
|
|
|
int ata_port_abort(struct ata_port *ap)
|
|
|
|
{
|
|
|
|
return ata_do_link_abort(ap, NULL);
|
|
|
|
}
|
|
|
|
|
2006-05-15 19:58:09 +08:00
|
|
|
/**
|
|
|
|
* __ata_port_freeze - freeze port
|
|
|
|
* @ap: ATA port to freeze
|
|
|
|
*
|
|
|
|
* This function is called when HSM violation or some other
|
|
|
|
* condition disrupts normal operation of the port. Frozen port
|
|
|
|
* is not allowed to perform any operation until the port is
|
|
|
|
* thawed, which usually follows a successful reset.
|
|
|
|
*
|
|
|
|
* ap->ops->freeze() callback can be used for freezing the port
|
|
|
|
* hardware-wise (e.g. mask interrupt and stop DMA engine). If a
|
|
|
|
* port cannot be frozen hardware-wise, the interrupt handler
|
|
|
|
* must ack and clear interrupts unconditionally while the port
|
|
|
|
* is frozen.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
2006-08-24 15:19:22 +08:00
|
|
|
* spin_lock_irqsave(host lock)
|
2006-05-15 19:58:09 +08:00
|
|
|
*/
|
|
|
|
static void __ata_port_freeze(struct ata_port *ap)
|
|
|
|
{
|
|
|
|
WARN_ON(!ap->ops->error_handler);
|
|
|
|
|
|
|
|
if (ap->ops->freeze)
|
|
|
|
ap->ops->freeze(ap);
|
|
|
|
|
2006-06-29 00:29:30 +08:00
|
|
|
ap->pflags |= ATA_PFLAG_FROZEN;
|
2006-05-15 19:58:09 +08:00
|
|
|
|
2007-02-21 00:06:51 +08:00
|
|
|
DPRINTK("ata%u port frozen\n", ap->print_id);
|
2006-05-15 19:58:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ata_port_freeze - abort & freeze port
|
|
|
|
* @ap: ATA port to freeze
|
|
|
|
*
|
|
|
|
* Abort and freeze @ap.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
2006-08-24 15:19:22 +08:00
|
|
|
* spin_lock_irqsave(host lock)
|
2006-05-15 19:58:09 +08:00
|
|
|
*
|
|
|
|
* RETURNS:
|
|
|
|
* Number of aborted commands.
|
|
|
|
*/
|
|
|
|
int ata_port_freeze(struct ata_port *ap)
|
|
|
|
{
|
|
|
|
int nr_aborted;
|
|
|
|
|
|
|
|
WARN_ON(!ap->ops->error_handler);
|
|
|
|
|
|
|
|
nr_aborted = ata_port_abort(ap);
|
|
|
|
__ata_port_freeze(ap);
|
|
|
|
|
|
|
|
return nr_aborted;
|
|
|
|
}
|
|
|
|
|
2007-09-23 12:14:13 +08:00
|
|
|
/**
|
|
|
|
* sata_async_notification - SATA async notification handler
|
|
|
|
* @ap: ATA port where async notification is received
|
|
|
|
*
|
|
|
|
* Handler to be called when async notification via SDB FIS is
|
|
|
|
* received. This function schedules EH if necessary.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* spin_lock_irqsave(host lock)
|
|
|
|
*
|
|
|
|
* RETURNS:
|
|
|
|
* 1 if EH is scheduled, 0 otherwise.
|
|
|
|
*/
|
|
|
|
int sata_async_notification(struct ata_port *ap)
|
|
|
|
{
|
|
|
|
u32 sntf;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
if (!(ap->flags & ATA_FLAG_AN))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf);
|
|
|
|
if (rc == 0)
|
|
|
|
sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf);
|
|
|
|
|
2008-04-07 21:47:22 +08:00
|
|
|
if (!sata_pmp_attached(ap) || rc) {
|
2007-09-23 12:14:13 +08:00
|
|
|
/* PMP is not attached or SNTF is not available */
|
2008-04-07 21:47:22 +08:00
|
|
|
if (!sata_pmp_attached(ap)) {
|
2007-09-23 12:14:13 +08:00
|
|
|
/* PMP is not attached. Check whether ATAPI
|
|
|
|
* AN is configured. If so, notify media
|
|
|
|
* change.
|
|
|
|
*/
|
|
|
|
struct ata_device *dev = ap->link.device;
|
|
|
|
|
|
|
|
if ((dev->class == ATA_DEV_ATAPI) &&
|
|
|
|
(dev->flags & ATA_DFLAG_AN))
|
|
|
|
ata_scsi_media_change_notify(dev);
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
/* PMP is attached but SNTF is not available.
|
|
|
|
* ATAPI async media change notification is
|
|
|
|
* not used. The PMP must be reporting PHY
|
|
|
|
* status change, schedule EH.
|
|
|
|
*/
|
|
|
|
ata_port_schedule_eh(ap);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* PMP is attached and SNTF is available */
|
|
|
|
struct ata_link *link;
|
|
|
|
|
|
|
|
/* check and notify ATAPI AN */
|
|
|
|
ata_port_for_each_link(link, ap) {
|
|
|
|
if (!(sntf & (1 << link->pmp)))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if ((link->device->class == ATA_DEV_ATAPI) &&
|
|
|
|
(link->device->flags & ATA_DFLAG_AN))
|
|
|
|
ata_scsi_media_change_notify(link->device);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If PMP is reporting that PHY status of some
|
|
|
|
* downstream ports has changed, schedule EH.
|
|
|
|
*/
|
|
|
|
if (sntf & (1 << SATA_PMP_CTRL_PORT)) {
|
|
|
|
ata_port_schedule_eh(ap);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-05-15 19:58:09 +08:00
|
|
|
/**
|
|
|
|
* ata_eh_freeze_port - EH helper to freeze port
|
|
|
|
* @ap: ATA port to freeze
|
|
|
|
*
|
|
|
|
* Freeze @ap.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* None.
|
|
|
|
*/
|
|
|
|
void ata_eh_freeze_port(struct ata_port *ap)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
if (!ap->ops->error_handler)
|
|
|
|
return;
|
|
|
|
|
2006-06-23 11:46:10 +08:00
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
2006-05-15 19:58:09 +08:00
|
|
|
__ata_port_freeze(ap);
|
2006-06-23 11:46:10 +08:00
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
2006-05-15 19:58:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ata_port_thaw_port - EH helper to thaw port
|
|
|
|
* @ap: ATA port to thaw
|
|
|
|
*
|
|
|
|
* Thaw frozen port @ap.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* None.
|
|
|
|
*/
|
|
|
|
void ata_eh_thaw_port(struct ata_port *ap)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
if (!ap->ops->error_handler)
|
|
|
|
return;
|
|
|
|
|
2006-06-23 11:46:10 +08:00
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
2006-05-15 19:58:09 +08:00
|
|
|
|
2006-06-29 00:29:30 +08:00
|
|
|
ap->pflags &= ~ATA_PFLAG_FROZEN;
|
2006-05-15 19:58:09 +08:00
|
|
|
|
|
|
|
if (ap->ops->thaw)
|
|
|
|
ap->ops->thaw(ap);
|
|
|
|
|
2006-06-23 11:46:10 +08:00
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
2006-05-15 19:58:09 +08:00
|
|
|
|
2007-02-21 00:06:51 +08:00
|
|
|
DPRINTK("ata%u port thawed\n", ap->print_id);
|
2006-05-15 19:58:09 +08:00
|
|
|
}
|
|
|
|
|
2006-04-02 17:51:53 +08:00
|
|
|
static void ata_eh_scsidone(struct scsi_cmnd *scmd)
|
|
|
|
{
|
|
|
|
/* nada */
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __ata_eh_qc_complete(struct ata_queued_cmd *qc)
|
|
|
|
{
|
|
|
|
struct ata_port *ap = qc->ap;
|
|
|
|
struct scsi_cmnd *scmd = qc->scsicmd;
|
|
|
|
unsigned long flags;
|
|
|
|
|
2006-06-23 11:46:10 +08:00
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
2006-04-02 17:51:53 +08:00
|
|
|
qc->scsidone = ata_eh_scsidone;
|
|
|
|
__ata_qc_complete(qc);
|
|
|
|
WARN_ON(ata_tag_valid(qc->tag));
|
2006-06-23 11:46:10 +08:00
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
2006-04-02 17:51:53 +08:00
|
|
|
|
|
|
|
scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ata_eh_qc_complete - Complete an active ATA command from EH
|
|
|
|
* @qc: Command to complete
|
|
|
|
*
|
|
|
|
* Indicate to the mid and upper layers that an ATA command has
|
|
|
|
* completed. To be used from EH.
|
|
|
|
*/
|
|
|
|
void ata_eh_qc_complete(struct ata_queued_cmd *qc)
|
|
|
|
{
|
|
|
|
struct scsi_cmnd *scmd = qc->scsicmd;
|
|
|
|
scmd->retries = scmd->allowed;
|
|
|
|
__ata_eh_qc_complete(qc);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH
|
|
|
|
* @qc: Command to retry
|
|
|
|
*
|
|
|
|
* Indicate to the mid and upper layers that an ATA command
|
|
|
|
* should be retried. To be used from EH.
|
|
|
|
*
|
|
|
|
* SCSI midlayer limits the number of retries to scmd->allowed.
|
|
|
|
* scmd->retries is decremented for commands which get retried
|
|
|
|
* due to unrelated failures (qc->err_mask is zero).
|
|
|
|
*/
|
|
|
|
void ata_eh_qc_retry(struct ata_queued_cmd *qc)
|
|
|
|
{
|
|
|
|
struct scsi_cmnd *scmd = qc->scsicmd;
|
|
|
|
if (!qc->err_mask && scmd->retries)
|
|
|
|
scmd->retries--;
|
|
|
|
__ata_eh_qc_complete(qc);
|
|
|
|
}
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2006-05-31 17:28:01 +08:00
|
|
|
/**
|
|
|
|
* ata_eh_detach_dev - detach ATA device
|
|
|
|
* @dev: ATA device to detach
|
|
|
|
*
|
|
|
|
* Detach @dev.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* None.
|
|
|
|
*/
|
2007-09-23 12:14:12 +08:00
|
|
|
void ata_eh_detach_dev(struct ata_device *dev)
|
2006-05-31 17:28:01 +08:00
|
|
|
{
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_link *link = dev->link;
|
|
|
|
struct ata_port *ap = link->ap;
|
2006-05-31 17:28:01 +08:00
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
ata_dev_disable(dev);
|
|
|
|
|
2006-06-23 11:46:10 +08:00
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
2006-05-31 17:28:01 +08:00
|
|
|
|
|
|
|
dev->flags &= ~ATA_DFLAG_DETACH;
|
|
|
|
|
|
|
|
if (ata_scsi_offline_dev(dev)) {
|
|
|
|
dev->flags |= ATA_DFLAG_DETACHED;
|
2006-06-29 00:29:30 +08:00
|
|
|
ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
|
2006-05-31 17:28:01 +08:00
|
|
|
}
|
|
|
|
|
2006-06-24 19:30:19 +08:00
|
|
|
/* clear per-dev EH actions */
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK);
|
|
|
|
ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK);
|
2006-06-24 19:30:19 +08:00
|
|
|
|
2006-06-23 11:46:10 +08:00
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
2006-05-31 17:28:01 +08:00
|
|
|
}
|
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
/**
|
|
|
|
* ata_eh_about_to_do - about to perform eh_action
|
2007-08-06 17:36:23 +08:00
|
|
|
* @link: target ATA link
|
2006-06-19 17:27:23 +08:00
|
|
|
* @dev: target ATA dev for per-dev action (can be NULL)
|
2006-05-15 19:58:22 +08:00
|
|
|
* @action: action about to be performed
|
|
|
|
*
|
|
|
|
* Called just before performing EH actions to clear related bits
|
2007-08-06 17:36:23 +08:00
|
|
|
* in @link->eh_info such that eh actions are not unnecessarily
|
|
|
|
* repeated.
|
2006-05-15 19:58:22 +08:00
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* None.
|
|
|
|
*/
|
2007-09-23 12:14:12 +08:00
|
|
|
void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev,
|
|
|
|
unsigned int action)
|
2006-05-15 19:58:22 +08:00
|
|
|
{
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_port *ap = link->ap;
|
|
|
|
struct ata_eh_info *ehi = &link->eh_info;
|
|
|
|
struct ata_eh_context *ehc = &link->eh_context;
|
2006-05-15 19:58:22 +08:00
|
|
|
unsigned long flags;
|
|
|
|
|
2006-06-23 11:46:10 +08:00
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
2006-07-03 15:07:26 +08:00
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_eh_clear_action(link, dev, ehi, action);
|
2006-07-03 15:07:26 +08:00
|
|
|
|
2006-07-10 22:18:46 +08:00
|
|
|
if (!(ehc->i.flags & ATA_EHI_QUIET))
|
2006-07-03 15:07:26 +08:00
|
|
|
ap->pflags |= ATA_PFLAG_RECOVERED;
|
|
|
|
|
2006-06-23 11:46:10 +08:00
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
2006-06-19 17:27:23 +08:00
|
|
|
/**
|
|
|
|
* ata_eh_done - EH action complete
|
2007-08-06 17:36:23 +08:00
|
|
|
* @ap: target ATA port
|
2006-06-19 17:27:23 +08:00
|
|
|
* @dev: target ATA dev for per-dev action (can be NULL)
|
|
|
|
* @action: action just completed
|
|
|
|
*
|
|
|
|
* Called right after performing EH actions to clear related bits
|
2007-08-06 17:36:23 +08:00
|
|
|
* in @link->eh_context.
|
2006-06-19 17:27:23 +08:00
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* None.
|
|
|
|
*/
|
2007-09-23 12:14:12 +08:00
|
|
|
void ata_eh_done(struct ata_link *link, struct ata_device *dev,
|
|
|
|
unsigned int action)
|
2006-06-19 17:27:23 +08:00
|
|
|
{
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_eh_context *ehc = &link->eh_context;
|
2007-08-06 17:36:22 +08:00
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_eh_clear_action(link, dev, &ehc->i, action);
|
2006-06-19 17:27:23 +08:00
|
|
|
}
|
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
/**
|
|
|
|
* ata_err_string - convert err_mask to descriptive string
|
|
|
|
* @err_mask: error mask to convert to string
|
|
|
|
*
|
|
|
|
* Convert @err_mask to descriptive string. Errors are
|
|
|
|
* prioritized according to severity and only the most severe
|
|
|
|
* error is reported.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* None.
|
|
|
|
*
|
|
|
|
* RETURNS:
|
|
|
|
* Descriptive string for @err_mask
|
|
|
|
*/
|
2007-10-19 18:42:56 +08:00
|
|
|
static const char *ata_err_string(unsigned int err_mask)
|
2006-05-15 19:58:22 +08:00
|
|
|
{
|
|
|
|
if (err_mask & AC_ERR_HOST_BUS)
|
|
|
|
return "host bus error";
|
|
|
|
if (err_mask & AC_ERR_ATA_BUS)
|
|
|
|
return "ATA bus error";
|
|
|
|
if (err_mask & AC_ERR_TIMEOUT)
|
|
|
|
return "timeout";
|
|
|
|
if (err_mask & AC_ERR_HSM)
|
|
|
|
return "HSM violation";
|
|
|
|
if (err_mask & AC_ERR_SYSTEM)
|
|
|
|
return "internal error";
|
|
|
|
if (err_mask & AC_ERR_MEDIA)
|
|
|
|
return "media error";
|
|
|
|
if (err_mask & AC_ERR_INVALID)
|
|
|
|
return "invalid argument";
|
|
|
|
if (err_mask & AC_ERR_DEV)
|
|
|
|
return "device error";
|
|
|
|
return "unknown error";
|
|
|
|
}
|
|
|
|
|
2006-05-15 20:03:46 +08:00
|
|
|
/**
|
|
|
|
* ata_read_log_page - read a specific log page
|
|
|
|
* @dev: target device
|
|
|
|
* @page: page to read
|
|
|
|
* @buf: buffer to store read page
|
|
|
|
* @sectors: number of sectors to read
|
|
|
|
*
|
|
|
|
* Read log page using READ_LOG_EXT command.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* Kernel thread context (may sleep).
|
|
|
|
*
|
|
|
|
* RETURNS:
|
|
|
|
* 0 on success, AC_ERR_* mask otherwise.
|
|
|
|
*/
|
|
|
|
static unsigned int ata_read_log_page(struct ata_device *dev,
|
|
|
|
u8 page, void *buf, unsigned int sectors)
|
|
|
|
{
|
|
|
|
struct ata_taskfile tf;
|
|
|
|
unsigned int err_mask;
|
|
|
|
|
|
|
|
DPRINTK("read log page - page %d\n", page);
|
|
|
|
|
|
|
|
ata_tf_init(dev, &tf);
|
|
|
|
tf.command = ATA_CMD_READ_LOG_EXT;
|
|
|
|
tf.lbal = page;
|
|
|
|
tf.nsect = sectors;
|
|
|
|
tf.hob_nsect = sectors >> 8;
|
|
|
|
tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE;
|
|
|
|
tf.protocol = ATA_PROT_PIO;
|
|
|
|
|
|
|
|
err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE,
|
2007-10-09 14:05:44 +08:00
|
|
|
buf, sectors * ATA_SECT_SIZE, 0);
|
2006-05-15 20:03:46 +08:00
|
|
|
|
|
|
|
DPRINTK("EXIT, err_mask=%x\n", err_mask);
|
|
|
|
return err_mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ata_eh_read_log_10h - Read log page 10h for NCQ error details
|
|
|
|
* @dev: Device to read log page 10h from
|
|
|
|
* @tag: Resulting tag of the failed command
|
|
|
|
* @tf: Resulting taskfile registers of the failed command
|
|
|
|
*
|
|
|
|
* Read log page 10h to obtain NCQ error details and clear error
|
|
|
|
* condition.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* Kernel thread context (may sleep).
|
|
|
|
*
|
|
|
|
* RETURNS:
|
|
|
|
* 0 on success, -errno otherwise.
|
|
|
|
*/
|
|
|
|
static int ata_eh_read_log_10h(struct ata_device *dev,
|
|
|
|
int *tag, struct ata_taskfile *tf)
|
|
|
|
{
|
2007-08-06 17:36:22 +08:00
|
|
|
u8 *buf = dev->link->ap->sector_buf;
|
2006-05-15 20:03:46 +08:00
|
|
|
unsigned int err_mask;
|
|
|
|
u8 csum;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1);
|
|
|
|
if (err_mask)
|
|
|
|
return -EIO;
|
|
|
|
|
|
|
|
csum = 0;
|
|
|
|
for (i = 0; i < ATA_SECT_SIZE; i++)
|
|
|
|
csum += buf[i];
|
|
|
|
if (csum)
|
|
|
|
ata_dev_printk(dev, KERN_WARNING,
|
|
|
|
"invalid checksum 0x%x on log page 10h\n", csum);
|
|
|
|
|
|
|
|
if (buf[0] & 0x80)
|
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
*tag = buf[0] & 0x1f;
|
|
|
|
|
|
|
|
tf->command = buf[2];
|
|
|
|
tf->feature = buf[3];
|
|
|
|
tf->lbal = buf[4];
|
|
|
|
tf->lbam = buf[5];
|
|
|
|
tf->lbah = buf[6];
|
|
|
|
tf->device = buf[7];
|
|
|
|
tf->hob_lbal = buf[8];
|
|
|
|
tf->hob_lbam = buf[9];
|
|
|
|
tf->hob_lbah = buf[10];
|
|
|
|
tf->nsect = buf[12];
|
|
|
|
tf->hob_nsect = buf[13];
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
/**
|
|
|
|
* atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
|
|
|
|
* @dev: device to perform REQUEST_SENSE to
|
|
|
|
* @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
|
|
|
|
*
|
|
|
|
* Perform ATAPI REQUEST_SENSE after the device reported CHECK
|
|
|
|
* SENSE. This function is EH helper.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* Kernel thread context (may sleep).
|
|
|
|
*
|
|
|
|
* RETURNS:
|
|
|
|
* 0 on success, AC_ERR_* mask on failure
|
|
|
|
*/
|
2007-04-02 11:30:46 +08:00
|
|
|
static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc)
|
2006-05-15 19:58:22 +08:00
|
|
|
{
|
2007-04-02 11:30:46 +08:00
|
|
|
struct ata_device *dev = qc->dev;
|
|
|
|
unsigned char *sense_buf = qc->scsicmd->sense_buffer;
|
2007-08-06 17:36:22 +08:00
|
|
|
struct ata_port *ap = dev->link->ap;
|
2006-05-15 19:58:22 +08:00
|
|
|
struct ata_taskfile tf;
|
|
|
|
u8 cdb[ATAPI_CDB_LEN];
|
|
|
|
|
|
|
|
DPRINTK("ATAPI request sense\n");
|
|
|
|
|
|
|
|
/* FIXME: is this needed? */
|
|
|
|
memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);
|
|
|
|
|
2007-04-02 11:30:46 +08:00
|
|
|
/* initialize sense_buf with the error register,
|
|
|
|
* for the case where they are -not- overwritten
|
|
|
|
*/
|
2006-05-15 19:58:22 +08:00
|
|
|
sense_buf[0] = 0x70;
|
2007-04-02 11:30:46 +08:00
|
|
|
sense_buf[2] = qc->result_tf.feature >> 4;
|
|
|
|
|
2007-05-22 08:14:23 +08:00
|
|
|
/* some devices time out if garbage left in tf */
|
2007-04-02 11:30:46 +08:00
|
|
|
ata_tf_init(dev, &tf);
|
2006-05-15 19:58:22 +08:00
|
|
|
|
|
|
|
memset(cdb, 0, ATAPI_CDB_LEN);
|
|
|
|
cdb[0] = REQUEST_SENSE;
|
|
|
|
cdb[4] = SCSI_SENSE_BUFFERSIZE;
|
|
|
|
|
|
|
|
tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
|
|
|
|
tf.command = ATA_CMD_PACKET;
|
|
|
|
|
|
|
|
/* is it pointless to prefer PIO for "safety reasons"? */
|
|
|
|
if (ap->flags & ATA_FLAG_PIO_DMA) {
|
2007-12-19 05:34:43 +08:00
|
|
|
tf.protocol = ATAPI_PROT_DMA;
|
2006-05-15 19:58:22 +08:00
|
|
|
tf.feature |= ATAPI_PKT_DMA;
|
|
|
|
} else {
|
2007-12-19 05:34:43 +08:00
|
|
|
tf.protocol = ATAPI_PROT_PIO;
|
2007-12-12 11:12:46 +08:00
|
|
|
tf.lbam = SCSI_SENSE_BUFFERSIZE;
|
|
|
|
tf.lbah = 0;
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
|
2007-10-09 14:05:44 +08:00
|
|
|
sense_buf, SCSI_SENSE_BUFFERSIZE, 0);
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ata_eh_analyze_serror - analyze SError for a failed port
|
2007-08-06 17:36:23 +08:00
|
|
|
* @link: ATA link to analyze SError for
|
2006-05-15 19:58:22 +08:00
|
|
|
*
|
|
|
|
* Analyze SError if available and further determine cause of
|
|
|
|
* failure.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* None.
|
|
|
|
*/
|
2007-08-06 17:36:23 +08:00
|
|
|
static void ata_eh_analyze_serror(struct ata_link *link)
|
2006-05-15 19:58:22 +08:00
|
|
|
{
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_eh_context *ehc = &link->eh_context;
|
2006-05-15 19:58:22 +08:00
|
|
|
u32 serror = ehc->i.serror;
|
|
|
|
unsigned int err_mask = 0, action = 0;
|
2007-09-23 12:14:13 +08:00
|
|
|
u32 hotplug_mask;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2008-05-19 00:15:09 +08:00
|
|
|
if (serror & (SERR_PERSISTENT | SERR_DATA)) {
|
2006-05-15 19:58:22 +08:00
|
|
|
err_mask |= AC_ERR_ATA_BUS;
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
action |= ATA_EH_RESET;
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
if (serror & SERR_PROTOCOL) {
|
|
|
|
err_mask |= AC_ERR_HSM;
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
action |= ATA_EH_RESET;
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
if (serror & SERR_INTERNAL) {
|
|
|
|
err_mask |= AC_ERR_SYSTEM;
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
action |= ATA_EH_RESET;
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
2007-09-23 12:14:13 +08:00
|
|
|
|
|
|
|
/* Determine whether a hotplug event has occurred. Both
|
|
|
|
* SError.N/X are considered hotplug events for enabled or
|
|
|
|
* host links. For disabled PMP links, only N bit is
|
|
|
|
* considered as X bit is left at 1 for link plugging.
|
|
|
|
*/
|
|
|
|
hotplug_mask = 0;
|
|
|
|
|
|
|
|
if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link))
|
|
|
|
hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG;
|
|
|
|
else
|
|
|
|
hotplug_mask = SERR_PHYRDY_CHG;
|
|
|
|
|
|
|
|
if (serror & hotplug_mask)
|
2006-05-31 17:28:03 +08:00
|
|
|
ata_ehi_hotplugged(&ehc->i);
|
2006-05-15 19:58:22 +08:00
|
|
|
|
|
|
|
ehc->i.err_mask |= err_mask;
|
|
|
|
ehc->i.action |= action;
|
|
|
|
}
|
|
|
|
|
2006-05-15 20:03:46 +08:00
|
|
|
/**
|
|
|
|
* ata_eh_analyze_ncq_error - analyze NCQ error
|
2007-08-06 17:36:23 +08:00
|
|
|
* @link: ATA link to analyze NCQ error for
|
2006-05-15 20:03:46 +08:00
|
|
|
*
|
|
|
|
* Read log page 10h, determine the offending qc and acquire
|
|
|
|
* error status TF. For NCQ device errors, all LLDDs have to do
|
|
|
|
* is setting AC_ERR_DEV in ehi->err_mask. This function takes
|
|
|
|
* care of the rest.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* Kernel thread context (may sleep).
|
|
|
|
*/
|
2008-05-02 14:14:53 +08:00
|
|
|
void ata_eh_analyze_ncq_error(struct ata_link *link)
|
2006-05-15 20:03:46 +08:00
|
|
|
{
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_port *ap = link->ap;
|
|
|
|
struct ata_eh_context *ehc = &link->eh_context;
|
|
|
|
struct ata_device *dev = link->device;
|
2006-05-15 20:03:46 +08:00
|
|
|
struct ata_queued_cmd *qc;
|
|
|
|
struct ata_taskfile tf;
|
|
|
|
int tag, rc;
|
|
|
|
|
|
|
|
/* if frozen, we can't do much */
|
2006-06-29 00:29:30 +08:00
|
|
|
if (ap->pflags & ATA_PFLAG_FROZEN)
|
2006-05-15 20:03:46 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
/* is it NCQ device error? */
|
2007-08-06 17:36:23 +08:00
|
|
|
if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV))
|
2006-05-15 20:03:46 +08:00
|
|
|
return;
|
|
|
|
|
|
|
|
/* has LLDD analyzed already? */
|
|
|
|
for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
|
|
|
|
qc = __ata_qc_from_tag(ap, tag);
|
|
|
|
|
|
|
|
if (!(qc->flags & ATA_QCFLAG_FAILED))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (qc->err_mask)
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* okay, this error is ours */
|
|
|
|
rc = ata_eh_read_log_10h(dev, &tag, &tf);
|
|
|
|
if (rc) {
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_link_printk(link, KERN_ERR, "failed to read log page 10h "
|
2006-05-15 20:03:46 +08:00
|
|
|
"(errno=%d)\n", rc);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
if (!(link->sactive & (1 << tag))) {
|
|
|
|
ata_link_printk(link, KERN_ERR, "log page 10h reported "
|
2006-05-15 20:03:46 +08:00
|
|
|
"inactive tag %d\n", tag);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we've got the perpetrator, condemn it */
|
|
|
|
qc = __ata_qc_from_tag(ap, tag);
|
|
|
|
memcpy(&qc->result_tf, &tf, sizeof(tf));
|
2008-04-24 10:36:25 +08:00
|
|
|
qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48;
|
2007-07-16 13:29:40 +08:00
|
|
|
qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ;
|
2006-05-15 20:03:46 +08:00
|
|
|
ehc->i.err_mask &= ~AC_ERR_DEV;
|
|
|
|
}
|
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
/**
|
|
|
|
* ata_eh_analyze_tf - analyze taskfile of a failed qc
|
|
|
|
* @qc: qc to analyze
|
|
|
|
* @tf: Taskfile registers to analyze
|
|
|
|
*
|
|
|
|
* Analyze taskfile of @qc and further determine cause of
|
|
|
|
* failure. This function also requests ATAPI sense data if
|
|
|
|
* avaliable.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* Kernel thread context (may sleep).
|
|
|
|
*
|
|
|
|
* RETURNS:
|
|
|
|
* Determined recovery action
|
|
|
|
*/
|
|
|
|
static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
|
|
|
|
const struct ata_taskfile *tf)
|
|
|
|
{
|
|
|
|
unsigned int tmp, action = 0;
|
|
|
|
u8 stat = tf->command, err = tf->feature;
|
|
|
|
|
|
|
|
if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) {
|
|
|
|
qc->err_mask |= AC_ERR_HSM;
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
return ATA_EH_RESET;
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
2007-03-20 14:24:11 +08:00
|
|
|
if (stat & (ATA_ERR | ATA_DF))
|
|
|
|
qc->err_mask |= AC_ERR_DEV;
|
|
|
|
else
|
2006-05-15 19:58:22 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
switch (qc->dev->class) {
|
|
|
|
case ATA_DEV_ATA:
|
|
|
|
if (err & ATA_ICRC)
|
|
|
|
qc->err_mask |= AC_ERR_ATA_BUS;
|
|
|
|
if (err & ATA_UNC)
|
|
|
|
qc->err_mask |= AC_ERR_MEDIA;
|
|
|
|
if (err & ATA_IDNF)
|
|
|
|
qc->err_mask |= AC_ERR_INVALID;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case ATA_DEV_ATAPI:
|
2006-11-21 09:40:51 +08:00
|
|
|
if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) {
|
2007-04-02 11:30:46 +08:00
|
|
|
tmp = atapi_eh_request_sense(qc);
|
2006-11-21 09:40:51 +08:00
|
|
|
if (!tmp) {
|
|
|
|
/* ATA_QCFLAG_SENSE_VALID is used to
|
|
|
|
* tell atapi_qc_complete() that sense
|
|
|
|
* data is already valid.
|
|
|
|
*
|
|
|
|
* TODO: interpret sense data and set
|
|
|
|
* appropriate err_mask.
|
|
|
|
*/
|
|
|
|
qc->flags |= ATA_QCFLAG_SENSE_VALID;
|
|
|
|
} else
|
|
|
|
qc->err_mask |= tmp;
|
|
|
|
}
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS))
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
action |= ATA_EH_RESET;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
|
|
|
return action;
|
|
|
|
}
|
|
|
|
|
2007-11-27 18:28:59 +08:00
|
|
|
static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask,
|
|
|
|
int *xfer_ok)
|
2006-05-15 19:58:22 +08:00
|
|
|
{
|
2007-11-27 18:28:59 +08:00
|
|
|
int base = 0;
|
|
|
|
|
|
|
|
if (!(eflags & ATA_EFLAG_DUBIOUS_XFER))
|
|
|
|
*xfer_ok = 1;
|
|
|
|
|
|
|
|
if (!*xfer_ok)
|
2008-01-03 00:21:14 +08:00
|
|
|
base = ATA_ECAT_DUBIOUS_NONE;
|
2007-11-27 18:28:59 +08:00
|
|
|
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
if (err_mask & AC_ERR_ATA_BUS)
|
2007-11-27 18:28:59 +08:00
|
|
|
return base + ATA_ECAT_ATA_BUS;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
if (err_mask & AC_ERR_TIMEOUT)
|
2007-11-27 18:28:59 +08:00
|
|
|
return base + ATA_ECAT_TOUT_HSM;
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
|
2007-11-27 18:28:56 +08:00
|
|
|
if (eflags & ATA_EFLAG_IS_IO) {
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
if (err_mask & AC_ERR_HSM)
|
2007-11-27 18:28:59 +08:00
|
|
|
return base + ATA_ECAT_TOUT_HSM;
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
if ((err_mask &
|
|
|
|
(AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
|
2007-11-27 18:28:59 +08:00
|
|
|
return base + ATA_ECAT_UNK_DEV;
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
struct speed_down_verdict_arg {
|
2006-05-15 19:58:22 +08:00
|
|
|
u64 since;
|
2007-11-27 18:28:59 +08:00
|
|
|
int xfer_ok;
|
2007-11-27 18:28:56 +08:00
|
|
|
int nr_errors[ATA_ECAT_NR];
|
2006-05-15 19:58:22 +08:00
|
|
|
};
|
|
|
|
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
|
2006-05-15 19:58:22 +08:00
|
|
|
{
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
struct speed_down_verdict_arg *arg = void_arg;
|
2007-11-27 18:28:59 +08:00
|
|
|
int cat;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
|
|
|
if (ent->timestamp < arg->since)
|
|
|
|
return -1;
|
|
|
|
|
2007-11-27 18:28:59 +08:00
|
|
|
cat = ata_eh_categorize_error(ent->eflags, ent->err_mask,
|
|
|
|
&arg->xfer_ok);
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
arg->nr_errors[cat]++;
|
2007-11-27 18:28:59 +08:00
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
* ata_eh_speed_down_verdict - Determine speed down verdict
|
2006-05-15 19:58:22 +08:00
|
|
|
* @dev: Device of interest
|
|
|
|
*
|
|
|
|
* This function examines error ring of @dev and determines
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
* whether NCQ needs to be turned off, transfer speed should be
|
|
|
|
* stepped down, or falling back to PIO is necessary.
|
2006-05-15 19:58:22 +08:00
|
|
|
*
|
2007-11-27 18:28:56 +08:00
|
|
|
* ECAT_ATA_BUS : ATA_BUS error for any command
|
|
|
|
*
|
|
|
|
* ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for
|
|
|
|
* IO commands
|
|
|
|
*
|
|
|
|
* ECAT_UNK_DEV : Unknown DEV error for IO commands
|
|
|
|
*
|
2007-11-27 18:28:59 +08:00
|
|
|
* ECAT_DUBIOUS_* : Identical to above three but occurred while
|
|
|
|
* data transfer hasn't been verified.
|
|
|
|
*
|
2007-11-27 18:28:56 +08:00
|
|
|
* Verdicts are
|
|
|
|
*
|
|
|
|
* NCQ_OFF : Turn off NCQ.
|
2006-05-15 19:58:22 +08:00
|
|
|
*
|
2007-11-27 18:28:56 +08:00
|
|
|
* SPEED_DOWN : Speed down transfer speed but don't fall back
|
|
|
|
* to PIO.
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
*
|
2007-11-27 18:28:56 +08:00
|
|
|
* FALLBACK_TO_PIO : Fall back to PIO.
|
2006-05-15 19:58:22 +08:00
|
|
|
*
|
2007-11-27 18:28:56 +08:00
|
|
|
* Even if multiple verdicts are returned, only one action is
|
2007-11-27 18:28:59 +08:00
|
|
|
* taken per error. An action triggered by non-DUBIOUS errors
|
|
|
|
* clears ering, while one triggered by DUBIOUS_* errors doesn't.
|
|
|
|
* This is to expedite speed down decisions right after device is
|
|
|
|
* initially configured.
|
|
|
|
*
|
|
|
|
* The followings are speed down rules. #1 and #2 deal with
|
|
|
|
* DUBIOUS errors.
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
*
|
2007-11-27 18:28:59 +08:00
|
|
|
* 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors
|
|
|
|
* occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO.
|
|
|
|
*
|
|
|
|
* 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors
|
|
|
|
* occurred during last 5 mins, NCQ_OFF.
|
|
|
|
*
|
|
|
|
* 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors
|
2007-11-27 18:28:56 +08:00
|
|
|
* ocurred during last 5 mins, FALLBACK_TO_PIO
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
*
|
2007-11-27 18:28:59 +08:00
|
|
|
* 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred
|
2007-11-27 18:28:56 +08:00
|
|
|
* during last 10 mins, NCQ_OFF.
|
|
|
|
*
|
2007-11-27 18:28:59 +08:00
|
|
|
* 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6
|
2007-11-27 18:28:56 +08:00
|
|
|
* UNK_DEV errors occurred during last 10 mins, SPEED_DOWN.
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
*
|
2006-05-15 19:58:22 +08:00
|
|
|
* LOCKING:
|
|
|
|
* Inherited from caller.
|
|
|
|
*
|
|
|
|
* RETURNS:
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
* OR of ATA_EH_SPDN_* flags.
|
2006-05-15 19:58:22 +08:00
|
|
|
*/
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev)
|
2006-05-15 19:58:22 +08:00
|
|
|
{
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ;
|
|
|
|
u64 j64 = get_jiffies_64();
|
|
|
|
struct speed_down_verdict_arg arg;
|
|
|
|
unsigned int verdict = 0;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2007-11-27 18:28:56 +08:00
|
|
|
/* scan past 5 mins of error history */
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
memset(&arg, 0, sizeof(arg));
|
2007-11-27 18:28:56 +08:00
|
|
|
arg.since = j64 - min(j64, j5mins);
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2007-11-27 18:28:59 +08:00
|
|
|
if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] +
|
|
|
|
arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1)
|
|
|
|
verdict |= ATA_EH_SPDN_SPEED_DOWN |
|
|
|
|
ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS;
|
|
|
|
|
|
|
|
if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] +
|
|
|
|
arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1)
|
|
|
|
verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS;
|
|
|
|
|
2007-11-27 18:28:56 +08:00
|
|
|
if (arg.nr_errors[ATA_ECAT_ATA_BUS] +
|
|
|
|
arg.nr_errors[ATA_ECAT_TOUT_HSM] +
|
2007-11-27 18:28:57 +08:00
|
|
|
arg.nr_errors[ATA_ECAT_UNK_DEV] > 6)
|
2007-11-27 18:28:56 +08:00
|
|
|
verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2007-11-27 18:28:56 +08:00
|
|
|
/* scan past 10 mins of error history */
|
2006-05-15 19:58:22 +08:00
|
|
|
memset(&arg, 0, sizeof(arg));
|
2007-11-27 18:28:56 +08:00
|
|
|
arg.since = j64 - min(j64, j10mins);
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2007-11-27 18:28:56 +08:00
|
|
|
if (arg.nr_errors[ATA_ECAT_TOUT_HSM] +
|
|
|
|
arg.nr_errors[ATA_ECAT_UNK_DEV] > 3)
|
|
|
|
verdict |= ATA_EH_SPDN_NCQ_OFF;
|
|
|
|
|
|
|
|
if (arg.nr_errors[ATA_ECAT_ATA_BUS] +
|
|
|
|
arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 ||
|
2007-11-27 18:28:57 +08:00
|
|
|
arg.nr_errors[ATA_ECAT_UNK_DEV] > 6)
|
2007-11-27 18:28:56 +08:00
|
|
|
verdict |= ATA_EH_SPDN_SPEED_DOWN;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
return verdict;
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ata_eh_speed_down - record error and speed down if necessary
|
|
|
|
* @dev: Failed device
|
2007-11-27 18:28:56 +08:00
|
|
|
* @eflags: mask of ATA_EFLAG_* flags
|
2006-05-15 19:58:22 +08:00
|
|
|
* @err_mask: err_mask of the error
|
|
|
|
*
|
|
|
|
* Record error and examine error history to determine whether
|
|
|
|
* adjusting transmission speed is necessary. It also sets
|
|
|
|
* transmission limits appropriately if such adjustment is
|
|
|
|
* necessary.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* Kernel thread context (may sleep).
|
|
|
|
*
|
|
|
|
* RETURNS:
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
* Determined recovery action.
|
2006-05-15 19:58:22 +08:00
|
|
|
*/
|
2007-11-27 18:28:56 +08:00
|
|
|
static unsigned int ata_eh_speed_down(struct ata_device *dev,
|
|
|
|
unsigned int eflags, unsigned int err_mask)
|
2006-05-15 19:58:22 +08:00
|
|
|
{
|
2007-11-27 18:28:56 +08:00
|
|
|
struct ata_link *link = dev->link;
|
2007-11-27 18:28:59 +08:00
|
|
|
int xfer_ok = 0;
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
unsigned int verdict;
|
|
|
|
unsigned int action = 0;
|
|
|
|
|
|
|
|
/* don't bother if Cat-0 error */
|
2007-11-27 18:28:59 +08:00
|
|
|
if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0)
|
2006-05-15 19:58:22 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* record error and determine whether speed down is necessary */
|
2007-11-27 18:28:56 +08:00
|
|
|
ata_ering_record(&dev->ering, eflags, err_mask);
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
verdict = ata_eh_speed_down_verdict(dev);
|
2006-05-15 19:58:22 +08:00
|
|
|
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
/* turn off NCQ? */
|
|
|
|
if ((verdict & ATA_EH_SPDN_NCQ_OFF) &&
|
|
|
|
(dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ |
|
|
|
|
ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) {
|
|
|
|
dev->flags |= ATA_DFLAG_NCQ_OFF;
|
|
|
|
ata_dev_printk(dev, KERN_WARNING,
|
|
|
|
"NCQ disabled due to excessive errors\n");
|
|
|
|
goto done;
|
|
|
|
}
|
2006-05-15 19:58:22 +08:00
|
|
|
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
/* speed down? */
|
|
|
|
if (verdict & ATA_EH_SPDN_SPEED_DOWN) {
|
|
|
|
/* speed down SATA link speed if possible */
|
2007-11-27 18:28:56 +08:00
|
|
|
if (sata_down_spd_limit(link) == 0) {
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
action |= ATA_EH_RESET;
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
goto done;
|
|
|
|
}
|
2006-05-15 19:58:22 +08:00
|
|
|
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
/* lower transfer mode */
|
|
|
|
if (dev->spdn_cnt < 2) {
|
|
|
|
static const int dma_dnxfer_sel[] =
|
|
|
|
{ ATA_DNXFER_DMA, ATA_DNXFER_40C };
|
|
|
|
static const int pio_dnxfer_sel[] =
|
|
|
|
{ ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 };
|
|
|
|
int sel;
|
|
|
|
|
|
|
|
if (dev->xfer_shift != ATA_SHIFT_PIO)
|
|
|
|
sel = dma_dnxfer_sel[dev->spdn_cnt];
|
|
|
|
else
|
|
|
|
sel = pio_dnxfer_sel[dev->spdn_cnt];
|
|
|
|
|
|
|
|
dev->spdn_cnt++;
|
|
|
|
|
|
|
|
if (ata_down_xfermask_limit(dev, sel) == 0) {
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
action |= ATA_EH_RESET;
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Fall back to PIO? Slowing down to PIO is meaningless for
|
2007-11-27 18:28:57 +08:00
|
|
|
* SATA ATA devices. Consider it only for PATA and SATAPI.
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
*/
|
|
|
|
if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) &&
|
2007-11-27 18:28:57 +08:00
|
|
|
(link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) &&
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
(dev->xfer_shift != ATA_SHIFT_PIO)) {
|
|
|
|
if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) {
|
|
|
|
dev->spdn_cnt = 0;
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
action |= ATA_EH_RESET;
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
}
|
2006-05-15 19:58:22 +08:00
|
|
|
|
|
|
|
return 0;
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
done:
|
|
|
|
/* device has been slowed down, blow error history */
|
2007-11-27 18:28:59 +08:00
|
|
|
if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS))
|
|
|
|
ata_ering_clear(&dev->ering);
|
libata: put some intelligence into EH speed down sequence
The current EH speed down code is more of a proof that the EH
framework is capable of adjusting transfer speed in response to error.
This patch puts some intelligence into EH speed down sequence. The
rules are..
* If there have been more than three timeout, HSM violation or
unclassified DEV errors for known supported commands during last 10
mins, NCQ is turned off.
* If there have been more than three timeout or HSM violation for known
supported command, transfer mode is slowed down. If DMA is active,
it is first slowered by one grade (e.g. UDMA133->100). If that
doesn't help, it's slowered to 40c limit (UDMA33). If PIO is
active, it's slowered by one grade first. If that doesn't help,
PIO0 is forced. Note that this rule does not change transfer mode.
DMA is never degraded into PIO by this rule.
* If there have been more than ten ATA bus, timeout, HSM violation or
unclassified device errors for known supported commands && speeding
down DMA mode didn't help, the device is forced into PIO mode. Note
that this rule is considered only for PATA devices and is pretty
difficult to trigger.
One error can only trigger one rule at a time. After a rule is
triggered, error history is cleared such that the next speed down
happens only after some number of errors are accumulated. This makes
sense because now speed down is done in bigger stride.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:22:31 +08:00
|
|
|
return action;
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2007-08-06 17:36:24 +08:00
|
|
|
* ata_eh_link_autopsy - analyze error and determine recovery action
|
|
|
|
* @link: host link to perform autopsy on
|
2006-05-15 19:58:22 +08:00
|
|
|
*
|
2007-08-06 17:36:23 +08:00
|
|
|
* Analyze why @link failed and determine which recovery actions
|
|
|
|
* are needed. This function also sets more detailed AC_ERR_*
|
|
|
|
* values and fills sense data for ATAPI CHECK SENSE.
|
2006-05-15 19:58:22 +08:00
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* Kernel thread context (may sleep).
|
|
|
|
*/
|
2007-08-06 17:36:24 +08:00
|
|
|
static void ata_eh_link_autopsy(struct ata_link *link)
|
2006-05-15 19:58:22 +08:00
|
|
|
{
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_port *ap = link->ap;
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_eh_context *ehc = &link->eh_context;
|
2007-10-31 09:17:05 +08:00
|
|
|
struct ata_device *dev;
|
2007-11-27 18:28:56 +08:00
|
|
|
unsigned int all_err_mask = 0, eflags = 0;
|
|
|
|
int tag;
|
2006-05-15 19:58:22 +08:00
|
|
|
u32 serror;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
DPRINTK("ENTER\n");
|
|
|
|
|
2006-07-03 15:07:26 +08:00
|
|
|
if (ehc->i.flags & ATA_EHI_NO_AUTOPSY)
|
|
|
|
return;
|
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
/* obtain and analyze SError */
|
2007-08-06 17:36:23 +08:00
|
|
|
rc = sata_scr_read(link, SCR_ERROR, &serror);
|
2006-05-15 19:58:22 +08:00
|
|
|
if (rc == 0) {
|
|
|
|
ehc->i.serror |= serror;
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_eh_analyze_serror(link);
|
2007-07-16 13:29:41 +08:00
|
|
|
} else if (rc != -EOPNOTSUPP) {
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
/* SError read failed, force reset and probing */
|
2008-01-23 23:05:14 +08:00
|
|
|
ehc->i.probe_mask |= ATA_ALL_DEVICES;
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
ehc->i.action |= ATA_EH_RESET;
|
2007-07-16 13:29:41 +08:00
|
|
|
ehc->i.err_mask |= AC_ERR_OTHER;
|
|
|
|
}
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2006-05-15 20:03:46 +08:00
|
|
|
/* analyze NCQ failure */
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_eh_analyze_ncq_error(link);
|
2006-05-15 20:03:46 +08:00
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
/* any real error trumps AC_ERR_OTHER */
|
|
|
|
if (ehc->i.err_mask & ~AC_ERR_OTHER)
|
|
|
|
ehc->i.err_mask &= ~AC_ERR_OTHER;
|
|
|
|
|
|
|
|
all_err_mask |= ehc->i.err_mask;
|
|
|
|
|
|
|
|
for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
|
|
|
|
struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
|
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link)
|
2006-05-15 19:58:22 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
/* inherit upper level err_mask */
|
|
|
|
qc->err_mask |= ehc->i.err_mask;
|
|
|
|
|
|
|
|
/* analyze TF */
|
2006-07-08 19:17:26 +08:00
|
|
|
ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf);
|
2006-05-15 19:58:22 +08:00
|
|
|
|
|
|
|
/* DEV errors are probably spurious in case of ATA_BUS error */
|
|
|
|
if (qc->err_mask & AC_ERR_ATA_BUS)
|
|
|
|
qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA |
|
|
|
|
AC_ERR_INVALID);
|
|
|
|
|
|
|
|
/* any real error trumps unknown error */
|
|
|
|
if (qc->err_mask & ~AC_ERR_OTHER)
|
|
|
|
qc->err_mask &= ~AC_ERR_OTHER;
|
|
|
|
|
|
|
|
/* SENSE_VALID trumps dev/unknown error and revalidation */
|
2007-10-26 15:12:41 +08:00
|
|
|
if (qc->flags & ATA_QCFLAG_SENSE_VALID)
|
2006-05-15 19:58:22 +08:00
|
|
|
qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER);
|
|
|
|
|
2008-03-27 18:14:24 +08:00
|
|
|
/* determine whether the command is worth retrying */
|
|
|
|
if (!(qc->err_mask & AC_ERR_INVALID) &&
|
|
|
|
((qc->flags & ATA_QCFLAG_IO) || qc->err_mask != AC_ERR_DEV))
|
|
|
|
qc->flags |= ATA_QCFLAG_RETRY;
|
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
/* accumulate error info */
|
2006-07-08 19:17:26 +08:00
|
|
|
ehc->i.dev = qc->dev;
|
2006-05-15 19:58:22 +08:00
|
|
|
all_err_mask |= qc->err_mask;
|
|
|
|
if (qc->flags & ATA_QCFLAG_IO)
|
2007-11-27 18:28:56 +08:00
|
|
|
eflags |= ATA_EFLAG_IS_IO;
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
2006-05-16 11:58:24 +08:00
|
|
|
/* enforce default EH actions */
|
2006-06-29 00:29:30 +08:00
|
|
|
if (ap->pflags & ATA_PFLAG_FROZEN ||
|
2006-05-16 11:58:24 +08:00
|
|
|
all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT))
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
ehc->i.action |= ATA_EH_RESET;
|
2007-11-27 18:28:56 +08:00
|
|
|
else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) ||
|
|
|
|
(!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV)))
|
2006-07-08 19:17:26 +08:00
|
|
|
ehc->i.action |= ATA_EH_REVALIDATE;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2007-10-31 09:17:05 +08:00
|
|
|
/* If we have offending qcs and the associated failed device,
|
|
|
|
* perform per-dev EH action only on the offending device.
|
|
|
|
*/
|
2006-07-08 19:17:26 +08:00
|
|
|
if (ehc->i.dev) {
|
|
|
|
ehc->i.dev_action[ehc->i.dev->devno] |=
|
|
|
|
ehc->i.action & ATA_EH_PERDEV_MASK;
|
|
|
|
ehc->i.action &= ~ATA_EH_PERDEV_MASK;
|
2006-06-19 17:27:23 +08:00
|
|
|
}
|
|
|
|
|
2008-01-10 12:41:23 +08:00
|
|
|
/* propagate timeout to host link */
|
|
|
|
if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link))
|
|
|
|
ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT;
|
|
|
|
|
|
|
|
/* record error and consider speeding down */
|
2007-10-31 09:17:05 +08:00
|
|
|
dev = ehc->i.dev;
|
2008-01-10 12:41:23 +08:00
|
|
|
if (!dev && ((ata_link_max_devices(link) == 1 &&
|
|
|
|
ata_dev_enabled(link->device))))
|
|
|
|
dev = link->device;
|
2007-10-31 09:17:05 +08:00
|
|
|
|
2007-11-27 18:28:59 +08:00
|
|
|
if (dev) {
|
|
|
|
if (dev->flags & ATA_DFLAG_DUBIOUS_XFER)
|
|
|
|
eflags |= ATA_EFLAG_DUBIOUS_XFER;
|
2007-11-27 18:28:56 +08:00
|
|
|
ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask);
|
2007-11-27 18:28:59 +08:00
|
|
|
}
|
2007-10-31 09:17:05 +08:00
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
DPRINTK("EXIT\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2007-08-06 17:36:24 +08:00
|
|
|
* ata_eh_autopsy - analyze error and determine recovery action
|
|
|
|
* @ap: host port to perform autopsy on
|
|
|
|
*
|
|
|
|
* Analyze all links of @ap and determine why they failed and
|
|
|
|
* which recovery actions are needed.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* Kernel thread context (may sleep).
|
|
|
|
*/
|
2007-09-23 12:14:12 +08:00
|
|
|
void ata_eh_autopsy(struct ata_port *ap)
|
2007-08-06 17:36:24 +08:00
|
|
|
{
|
|
|
|
struct ata_link *link;
|
|
|
|
|
2008-01-10 12:41:23 +08:00
|
|
|
ata_port_for_each_link(link, ap)
|
2007-08-06 17:36:24 +08:00
|
|
|
ata_eh_link_autopsy(link);
|
2008-01-10 12:41:23 +08:00
|
|
|
|
|
|
|
/* Autopsy of fanout ports can affect host link autopsy.
|
|
|
|
* Perform host link autopsy last.
|
|
|
|
*/
|
2008-04-07 21:47:22 +08:00
|
|
|
if (sata_pmp_attached(ap))
|
2008-01-10 12:41:23 +08:00
|
|
|
ata_eh_link_autopsy(&ap->link);
|
2007-08-06 17:36:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ata_eh_link_report - report error handling to user
|
2007-08-06 17:36:23 +08:00
|
|
|
* @link: ATA link EH is going on
|
2006-05-15 19:58:22 +08:00
|
|
|
*
|
|
|
|
* Report EH to user.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* None.
|
|
|
|
*/
|
2007-08-06 17:36:24 +08:00
|
|
|
static void ata_eh_link_report(struct ata_link *link)
|
2006-05-15 19:58:22 +08:00
|
|
|
{
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_port *ap = link->ap;
|
|
|
|
struct ata_eh_context *ehc = &link->eh_context;
|
2006-05-15 19:58:22 +08:00
|
|
|
const char *frozen, *desc;
|
2007-08-18 12:28:49 +08:00
|
|
|
char tries_buf[6];
|
2006-05-15 19:58:22 +08:00
|
|
|
int tag, nr_failed = 0;
|
|
|
|
|
2007-10-09 13:57:56 +08:00
|
|
|
if (ehc->i.flags & ATA_EHI_QUIET)
|
|
|
|
return;
|
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
desc = NULL;
|
|
|
|
if (ehc->i.desc[0] != '\0')
|
|
|
|
desc = ehc->i.desc;
|
|
|
|
|
|
|
|
for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
|
|
|
|
struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
|
|
|
|
|
2007-10-26 15:19:26 +08:00
|
|
|
if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link ||
|
|
|
|
((qc->flags & ATA_QCFLAG_QUIET) &&
|
|
|
|
qc->err_mask == AC_ERR_DEV))
|
2006-05-15 19:58:22 +08:00
|
|
|
continue;
|
|
|
|
if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
nr_failed++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!nr_failed && !ehc->i.err_mask)
|
|
|
|
return;
|
|
|
|
|
|
|
|
frozen = "";
|
2006-06-29 00:29:30 +08:00
|
|
|
if (ap->pflags & ATA_PFLAG_FROZEN)
|
2006-05-15 19:58:22 +08:00
|
|
|
frozen = " frozen";
|
|
|
|
|
2007-08-18 12:28:49 +08:00
|
|
|
memset(tries_buf, 0, sizeof(tries_buf));
|
|
|
|
if (ap->eh_tries < ATA_EH_MAX_TRIES)
|
|
|
|
snprintf(tries_buf, sizeof(tries_buf) - 1, " t%d",
|
|
|
|
ap->eh_tries);
|
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
if (ehc->i.dev) {
|
2006-05-15 20:03:46 +08:00
|
|
|
ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x "
|
2007-08-18 12:28:49 +08:00
|
|
|
"SAct 0x%x SErr 0x%x action 0x%x%s%s\n",
|
|
|
|
ehc->i.err_mask, link->sactive, ehc->i.serror,
|
|
|
|
ehc->i.action, frozen, tries_buf);
|
2006-05-15 19:58:22 +08:00
|
|
|
if (desc)
|
2007-07-16 13:29:39 +08:00
|
|
|
ata_dev_printk(ehc->i.dev, KERN_ERR, "%s\n", desc);
|
2006-05-15 19:58:22 +08:00
|
|
|
} else {
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_link_printk(link, KERN_ERR, "exception Emask 0x%x "
|
2007-08-18 12:28:49 +08:00
|
|
|
"SAct 0x%x SErr 0x%x action 0x%x%s%s\n",
|
|
|
|
ehc->i.err_mask, link->sactive, ehc->i.serror,
|
|
|
|
ehc->i.action, frozen, tries_buf);
|
2006-05-15 19:58:22 +08:00
|
|
|
if (desc)
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_link_printk(link, KERN_ERR, "%s\n", desc);
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
2007-10-02 23:22:02 +08:00
|
|
|
if (ehc->i.serror)
|
|
|
|
ata_port_printk(ap, KERN_ERR,
|
|
|
|
"SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n",
|
|
|
|
ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "",
|
|
|
|
ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "",
|
|
|
|
ehc->i.serror & SERR_DATA ? "UnrecovData " : "",
|
|
|
|
ehc->i.serror & SERR_PERSISTENT ? "Persist " : "",
|
|
|
|
ehc->i.serror & SERR_PROTOCOL ? "Proto " : "",
|
|
|
|
ehc->i.serror & SERR_INTERNAL ? "HostInt " : "",
|
|
|
|
ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "",
|
|
|
|
ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "",
|
|
|
|
ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "",
|
|
|
|
ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "",
|
|
|
|
ehc->i.serror & SERR_DISPARITY ? "Dispar " : "",
|
|
|
|
ehc->i.serror & SERR_CRC ? "BadCRC " : "",
|
|
|
|
ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "",
|
|
|
|
ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "",
|
|
|
|
ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "",
|
|
|
|
ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "",
|
2007-10-19 18:42:56 +08:00
|
|
|
ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : "");
|
2007-10-02 23:22:02 +08:00
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
|
|
|
|
struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
|
2006-11-14 21:36:12 +08:00
|
|
|
struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf;
|
2007-11-28 22:16:09 +08:00
|
|
|
const u8 *cdb = qc->cdb;
|
|
|
|
char data_buf[20] = "";
|
|
|
|
char cdb_buf[70] = "";
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
if (!(qc->flags & ATA_QCFLAG_FAILED) ||
|
|
|
|
qc->dev->link != link || !qc->err_mask)
|
2006-05-15 19:58:22 +08:00
|
|
|
continue;
|
|
|
|
|
2007-11-28 22:16:09 +08:00
|
|
|
if (qc->dma_dir != DMA_NONE) {
|
|
|
|
static const char *dma_str[] = {
|
|
|
|
[DMA_BIDIRECTIONAL] = "bidi",
|
|
|
|
[DMA_TO_DEVICE] = "out",
|
|
|
|
[DMA_FROM_DEVICE] = "in",
|
|
|
|
};
|
|
|
|
static const char *prot_str[] = {
|
|
|
|
[ATA_PROT_PIO] = "pio",
|
|
|
|
[ATA_PROT_DMA] = "dma",
|
|
|
|
[ATA_PROT_NCQ] = "ncq",
|
2007-12-19 05:34:43 +08:00
|
|
|
[ATAPI_PROT_PIO] = "pio",
|
|
|
|
[ATAPI_PROT_DMA] = "dma",
|
2007-11-28 22:16:09 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
snprintf(data_buf, sizeof(data_buf), " %s %u %s",
|
|
|
|
prot_str[qc->tf.protocol], qc->nbytes,
|
|
|
|
dma_str[qc->dma_dir]);
|
|
|
|
}
|
|
|
|
|
2007-12-02 07:05:39 +08:00
|
|
|
if (ata_is_atapi(qc->tf.protocol))
|
2007-11-28 22:16:09 +08:00
|
|
|
snprintf(cdb_buf, sizeof(cdb_buf),
|
|
|
|
"cdb %02x %02x %02x %02x %02x %02x %02x %02x "
|
|
|
|
"%02x %02x %02x %02x %02x %02x %02x %02x\n ",
|
|
|
|
cdb[0], cdb[1], cdb[2], cdb[3],
|
|
|
|
cdb[4], cdb[5], cdb[6], cdb[7],
|
|
|
|
cdb[8], cdb[9], cdb[10], cdb[11],
|
|
|
|
cdb[12], cdb[13], cdb[14], cdb[15]);
|
|
|
|
|
2006-11-14 21:36:12 +08:00
|
|
|
ata_dev_printk(qc->dev, KERN_ERR,
|
|
|
|
"cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
|
2007-11-28 22:16:09 +08:00
|
|
|
"tag %d%s\n %s"
|
2006-11-14 21:36:12 +08:00
|
|
|
"res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
|
2007-07-16 13:29:40 +08:00
|
|
|
"Emask 0x%x (%s)%s\n",
|
2006-11-14 21:36:12 +08:00
|
|
|
cmd->command, cmd->feature, cmd->nsect,
|
|
|
|
cmd->lbal, cmd->lbam, cmd->lbah,
|
|
|
|
cmd->hob_feature, cmd->hob_nsect,
|
|
|
|
cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah,
|
2007-11-28 22:16:09 +08:00
|
|
|
cmd->device, qc->tag, data_buf, cdb_buf,
|
2006-11-14 21:36:12 +08:00
|
|
|
res->command, res->feature, res->nsect,
|
|
|
|
res->lbal, res->lbam, res->lbah,
|
|
|
|
res->hob_feature, res->hob_nsect,
|
|
|
|
res->hob_lbal, res->hob_lbam, res->hob_lbah,
|
2007-07-16 13:29:40 +08:00
|
|
|
res->device, qc->err_mask, ata_err_string(qc->err_mask),
|
|
|
|
qc->err_mask & AC_ERR_NCQ ? " <F>" : "");
|
2007-10-02 23:22:02 +08:00
|
|
|
|
|
|
|
if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ |
|
2007-10-19 18:42:56 +08:00
|
|
|
ATA_ERR)) {
|
2007-10-02 23:22:02 +08:00
|
|
|
if (res->command & ATA_BUSY)
|
|
|
|
ata_dev_printk(qc->dev, KERN_ERR,
|
2007-10-19 18:42:56 +08:00
|
|
|
"status: { Busy }\n");
|
2007-10-02 23:22:02 +08:00
|
|
|
else
|
|
|
|
ata_dev_printk(qc->dev, KERN_ERR,
|
|
|
|
"status: { %s%s%s%s}\n",
|
|
|
|
res->command & ATA_DRDY ? "DRDY " : "",
|
|
|
|
res->command & ATA_DF ? "DF " : "",
|
|
|
|
res->command & ATA_DRQ ? "DRQ " : "",
|
2007-10-19 18:42:56 +08:00
|
|
|
res->command & ATA_ERR ? "ERR " : "");
|
2007-10-02 23:22:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (cmd->command != ATA_CMD_PACKET &&
|
|
|
|
(res->feature & (ATA_ICRC | ATA_UNC | ATA_IDNF |
|
|
|
|
ATA_ABORTED)))
|
|
|
|
ata_dev_printk(qc->dev, KERN_ERR,
|
|
|
|
"error: { %s%s%s%s}\n",
|
|
|
|
res->feature & ATA_ICRC ? "ICRC " : "",
|
|
|
|
res->feature & ATA_UNC ? "UNC " : "",
|
|
|
|
res->feature & ATA_IDNF ? "IDNF " : "",
|
2007-10-19 18:42:56 +08:00
|
|
|
res->feature & ATA_ABORTED ? "ABRT " : "");
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-08-06 17:36:24 +08:00
|
|
|
/**
|
|
|
|
* ata_eh_report - report error handling to user
|
|
|
|
* @ap: ATA port to report EH about
|
|
|
|
*
|
|
|
|
* Report EH to user.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* None.
|
|
|
|
*/
|
2007-09-23 12:14:12 +08:00
|
|
|
void ata_eh_report(struct ata_port *ap)
|
2007-08-06 17:36:24 +08:00
|
|
|
{
|
|
|
|
struct ata_link *link;
|
|
|
|
|
|
|
|
__ata_port_for_each_link(link, ap)
|
|
|
|
ata_eh_link_report(link);
|
|
|
|
}
|
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset,
|
libata: add deadline support to prereset and reset methods
Add @deadline to prereset and reset methods and make them honor it.
ata_wait_ready() which directly takes @deadline is implemented to be
used as the wait function. This patch is in preparation for EH timing
improvements.
* ata_wait_ready() never does busy sleep. It's only used from EH and
no wait in EH is that urgent. This function also prints 'be
patient' message automatically after 5 secs of waiting if more than
3 secs is remaining till deadline.
* ata_bus_post_reset() now fails with error code if any of its wait
fails. This is important because earlier reset tries will have
shorter timeout than the spec requires. If a device fails to
respond before the short timeout, reset should be retried with
longer timeout rather than silently ignoring the device.
There are three behavior differences.
1. Timeout is applied to both devices at once, not separately. This
is more consistent with what the spec says.
2. When a device passes devchk but fails to become ready before
deadline. Previouly, post_reset would just succeed and let
device classification remove the device. New code fails the
reset thus causing reset retry. After a few times, EH will give
up disabling the port.
3. When slave device passes devchk but fails to become accessible
(TF-wise) after reset. Original code disables dev1 after 30s
timeout and continues as if the device doesn't exist, while the
patched code fails reset. When this happens, new code fails
reset on whole port rather than proceeding with only the primary
device.
If the failing device is suffering transient problems, new code
retries reset which is a better behavior. If the failing device is
actually broken, the net effect is identical to it, but not to the
other device sharing the channel. In the previous code, reset would
have succeeded after 30s thus detecting the working one. In the new
code, reset fails and whole port gets disabled. IMO, it's a
pathological case anyway (broken device sharing bus with working
one) and doesn't really matter.
* ata_bus_softreset() is changed to return error code from
ata_bus_post_reset(). It used to return 0 unconditionally.
* Spin up waiting is to be removed and not converted to honor
deadline.
* To be on the safe side, deadline is set to 40s for the time being.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-02-02 15:50:52 +08:00
|
|
|
unsigned int *classes, unsigned long deadline)
|
2006-05-31 17:28:24 +08:00
|
|
|
{
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_device *dev;
|
2006-05-31 17:28:24 +08:00
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_link_for_each_dev(dev, link)
|
2007-08-06 17:36:23 +08:00
|
|
|
classes[dev->devno] = ATA_DEV_UNKNOWN;
|
2006-05-31 17:28:24 +08:00
|
|
|
|
2008-05-19 00:15:08 +08:00
|
|
|
return reset(link, classes, deadline);
|
2006-05-31 17:28:24 +08:00
|
|
|
}
|
|
|
|
|
2007-09-23 12:14:12 +08:00
|
|
|
static int ata_eh_followup_srst_needed(struct ata_link *link,
|
|
|
|
int rc, int classify,
|
2006-05-31 17:27:50 +08:00
|
|
|
const unsigned int *classes)
|
|
|
|
{
|
2008-04-08 00:46:56 +08:00
|
|
|
if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link))
|
2007-09-23 12:14:12 +08:00
|
|
|
return 0;
|
2008-04-07 21:47:20 +08:00
|
|
|
if (rc == -EAGAIN) {
|
|
|
|
if (classify)
|
|
|
|
return 1;
|
|
|
|
rc = 0;
|
|
|
|
}
|
2006-05-31 17:27:50 +08:00
|
|
|
if (rc != 0)
|
|
|
|
return 0;
|
2008-04-07 21:47:22 +08:00
|
|
|
if (sata_pmp_supported(link->ap) && ata_is_host_link(link))
|
2007-09-23 12:19:53 +08:00
|
|
|
return 1;
|
2006-05-31 17:27:50 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-09-23 12:14:12 +08:00
|
|
|
int ata_eh_reset(struct ata_link *link, int classify,
|
|
|
|
ata_prereset_fn_t prereset, ata_reset_fn_t softreset,
|
|
|
|
ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
|
2006-05-15 19:58:22 +08:00
|
|
|
{
|
2007-10-31 09:17:03 +08:00
|
|
|
const int max_tries = ARRAY_SIZE(ata_eh_reset_timeouts);
|
2007-10-09 14:06:10 +08:00
|
|
|
struct ata_port *ap = link->ap;
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_eh_context *ehc = &link->eh_context;
|
2006-05-31 17:27:50 +08:00
|
|
|
unsigned int *classes = ehc->classes;
|
2007-10-31 09:17:03 +08:00
|
|
|
unsigned int lflags = link->flags;
|
2006-07-03 15:07:26 +08:00
|
|
|
int verbose = !(ehc->i.flags & ATA_EHI_QUIET);
|
2007-02-02 15:50:52 +08:00
|
|
|
int try = 0;
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_device *dev;
|
2007-10-31 09:17:03 +08:00
|
|
|
unsigned long deadline, now;
|
2006-05-15 19:58:22 +08:00
|
|
|
ata_reset_fn_t reset;
|
2007-10-09 14:06:10 +08:00
|
|
|
unsigned long flags;
|
2007-10-31 09:17:03 +08:00
|
|
|
u32 sstatus;
|
2008-05-19 00:15:08 +08:00
|
|
|
int nr_known, rc;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2008-05-19 00:15:06 +08:00
|
|
|
/*
|
|
|
|
* Prepare to reset
|
|
|
|
*/
|
2007-10-09 14:06:10 +08:00
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
|
|
|
ap->pflags |= ATA_PFLAG_RESETTING;
|
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
|
|
|
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
|
2006-07-10 22:18:46 +08:00
|
|
|
|
2007-10-29 15:41:09 +08:00
|
|
|
ata_link_for_each_dev(dev, link) {
|
|
|
|
/* If we issue an SRST then an ATA drive (not ATAPI)
|
|
|
|
* may change configuration and be in PIO0 timing. If
|
|
|
|
* we do a hard reset (or are coming from power on)
|
|
|
|
* this is true for ATA or ATAPI. Until we've set a
|
|
|
|
* suitable controller mode we should not touch the
|
|
|
|
* bus as we may be talking too fast.
|
|
|
|
*/
|
|
|
|
dev->pio_mode = XFER_PIO_0;
|
|
|
|
|
|
|
|
/* If the controller has a pio mode setup function
|
|
|
|
* then use it to set the chipset to rights. Don't
|
|
|
|
* touch the DMA setup as that will be dealt with when
|
|
|
|
* configuring devices.
|
|
|
|
*/
|
|
|
|
if (ap->ops->set_piomode)
|
|
|
|
ap->ops->set_piomode(ap, dev);
|
|
|
|
}
|
|
|
|
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
/* prefer hardreset */
|
2008-05-19 00:15:06 +08:00
|
|
|
reset = NULL;
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
ehc->i.action &= ~ATA_EH_RESET;
|
|
|
|
if (hardreset) {
|
|
|
|
reset = hardreset;
|
|
|
|
ehc->i.action = ATA_EH_HARDRESET;
|
2008-01-30 17:18:26 +08:00
|
|
|
} else if (softreset) {
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
reset = softreset;
|
|
|
|
ehc->i.action = ATA_EH_SOFTRESET;
|
|
|
|
}
|
2006-05-31 17:27:48 +08:00
|
|
|
|
|
|
|
if (prereset) {
|
2007-08-06 17:36:23 +08:00
|
|
|
rc = prereset(link, jiffies + ATA_EH_PRERESET_TIMEOUT);
|
2006-05-31 17:27:48 +08:00
|
|
|
if (rc) {
|
2006-09-27 00:53:38 +08:00
|
|
|
if (rc == -ENOENT) {
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_link_printk(link, KERN_DEBUG,
|
2007-03-12 16:24:08 +08:00
|
|
|
"port disabled. ignoring.\n");
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
ehc->i.action &= ~ATA_EH_RESET;
|
2007-03-12 16:24:08 +08:00
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_link_for_each_dev(dev, link)
|
2007-08-06 17:36:23 +08:00
|
|
|
classes[dev->devno] = ATA_DEV_NONE;
|
2007-03-12 16:24:08 +08:00
|
|
|
|
|
|
|
rc = 0;
|
2006-09-27 00:53:38 +08:00
|
|
|
} else
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_link_printk(link, KERN_ERR,
|
2006-05-31 17:27:48 +08:00
|
|
|
"prereset failed (errno=%d)\n", rc);
|
2007-07-16 13:29:41 +08:00
|
|
|
goto out;
|
2006-05-31 17:27:48 +08:00
|
|
|
}
|
|
|
|
|
2008-05-19 00:15:06 +08:00
|
|
|
/* prereset() might have cleared ATA_EH_RESET. If so,
|
|
|
|
* bang classes and return.
|
|
|
|
*/
|
|
|
|
if (reset && !(ehc->i.action & ATA_EH_RESET)) {
|
|
|
|
ata_link_for_each_dev(dev, link)
|
|
|
|
classes[dev->devno] = ATA_DEV_NONE;
|
|
|
|
rc = 0;
|
|
|
|
goto out;
|
|
|
|
}
|
2006-05-31 17:27:48 +08:00
|
|
|
}
|
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
retry:
|
2008-05-19 00:15:06 +08:00
|
|
|
/*
|
|
|
|
* Perform reset
|
|
|
|
*/
|
2008-05-19 00:15:07 +08:00
|
|
|
if (ata_is_host_link(link))
|
|
|
|
ata_eh_freeze_port(ap);
|
|
|
|
|
2007-02-02 15:50:52 +08:00
|
|
|
deadline = jiffies + ata_eh_reset_timeouts[try++];
|
|
|
|
|
2008-05-19 00:15:06 +08:00
|
|
|
if (reset) {
|
|
|
|
if (verbose)
|
|
|
|
ata_link_printk(link, KERN_INFO, "%s resetting link\n",
|
|
|
|
reset == softreset ? "soft" : "hard");
|
|
|
|
|
|
|
|
/* mark that this EH session started with reset */
|
|
|
|
if (reset == hardreset)
|
|
|
|
ehc->i.flags |= ATA_EHI_DID_HARDRESET;
|
|
|
|
else
|
|
|
|
ehc->i.flags |= ATA_EHI_DID_SOFTRESET;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2008-05-19 00:15:06 +08:00
|
|
|
rc = ata_do_reset(link, reset, classes, deadline);
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2008-05-19 00:15:06 +08:00
|
|
|
if (reset == hardreset &&
|
|
|
|
ata_eh_followup_srst_needed(link, rc, classify, classes)) {
|
|
|
|
/* okay, let's do follow-up softreset */
|
|
|
|
reset = softreset;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2008-05-19 00:15:06 +08:00
|
|
|
if (!reset) {
|
|
|
|
ata_link_printk(link, KERN_ERR,
|
|
|
|
"follow-up softreset required "
|
|
|
|
"but no softreset avaliable\n");
|
|
|
|
rc = -EINVAL;
|
|
|
|
goto fail;
|
|
|
|
}
|
2006-05-31 17:27:50 +08:00
|
|
|
|
2008-05-19 00:15:06 +08:00
|
|
|
ata_eh_about_to_do(link, NULL, ATA_EH_RESET);
|
|
|
|
rc = ata_do_reset(link, reset, classes, deadline);
|
2006-05-31 17:27:50 +08:00
|
|
|
}
|
|
|
|
|
2008-05-19 00:15:06 +08:00
|
|
|
/* -EAGAIN can happen if we skipped followup SRST */
|
|
|
|
if (rc && rc != -EAGAIN)
|
|
|
|
goto fail;
|
|
|
|
} else {
|
|
|
|
if (verbose)
|
|
|
|
ata_link_printk(link, KERN_INFO, "no reset method "
|
|
|
|
"available, skipping reset\n");
|
|
|
|
if (!(lflags & ATA_LFLAG_ASSUME_CLASS))
|
|
|
|
lflags |= ATA_LFLAG_ASSUME_ATA;
|
2006-05-31 17:27:50 +08:00
|
|
|
}
|
|
|
|
|
2008-05-19 00:15:06 +08:00
|
|
|
/*
|
|
|
|
* Post-reset processing
|
|
|
|
*/
|
2007-10-31 09:17:03 +08:00
|
|
|
ata_link_for_each_dev(dev, link) {
|
|
|
|
/* After the reset, the device state is PIO 0 and the
|
|
|
|
* controller state is undefined. Reset also wakes up
|
|
|
|
* drives from sleeping mode.
|
|
|
|
*/
|
|
|
|
dev->pio_mode = XFER_PIO_0;
|
|
|
|
dev->flags &= ~ATA_DFLAG_SLEEPING;
|
2007-02-02 15:50:52 +08:00
|
|
|
|
2007-10-31 09:17:03 +08:00
|
|
|
if (ata_link_offline(link))
|
|
|
|
continue;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2008-01-08 19:26:12 +08:00
|
|
|
/* apply class override */
|
2007-10-31 09:17:03 +08:00
|
|
|
if (lflags & ATA_LFLAG_ASSUME_ATA)
|
|
|
|
classes[dev->devno] = ATA_DEV_ATA;
|
|
|
|
else if (lflags & ATA_LFLAG_ASSUME_SEMB)
|
|
|
|
classes[dev->devno] = ATA_DEV_SEMB_UNSUP; /* not yet */
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
2007-10-31 09:17:03 +08:00
|
|
|
/* record current link speed */
|
|
|
|
if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0)
|
|
|
|
link->sata_spd = (sstatus >> 4) & 0xf;
|
2007-07-16 13:29:40 +08:00
|
|
|
|
2008-05-19 00:15:07 +08:00
|
|
|
/* thaw the port */
|
|
|
|
if (ata_is_host_link(link))
|
|
|
|
ata_eh_thaw_port(ap);
|
|
|
|
|
2008-05-19 00:15:08 +08:00
|
|
|
/* postreset() should clear hardware SError. Although SError
|
|
|
|
* is cleared during link resume, clearing SError here is
|
|
|
|
* necessary as some PHYs raise hotplug events after SRST.
|
|
|
|
* This introduces race condition where hotplug occurs between
|
|
|
|
* reset and here. This race is mediated by cross checking
|
|
|
|
* link onlineness and classification result later.
|
|
|
|
*/
|
2007-10-31 09:17:03 +08:00
|
|
|
if (postreset)
|
|
|
|
postreset(link, classes);
|
2006-05-31 17:27:23 +08:00
|
|
|
|
2008-05-19 00:15:08 +08:00
|
|
|
/* clear cached SError */
|
|
|
|
spin_lock_irqsave(link->ap->lock, flags);
|
|
|
|
link->eh_info.serror = 0;
|
|
|
|
spin_unlock_irqrestore(link->ap->lock, flags);
|
|
|
|
|
|
|
|
/* Make sure onlineness and classification result correspond.
|
|
|
|
* Hotplug could have happened during reset and some
|
|
|
|
* controllers fail to wait while a drive is spinning up after
|
|
|
|
* being hotplugged causing misdetection. By cross checking
|
|
|
|
* link onlineness and classification result, those conditions
|
|
|
|
* can be reliably detected and retried.
|
|
|
|
*/
|
|
|
|
nr_known = 0;
|
|
|
|
ata_link_for_each_dev(dev, link) {
|
|
|
|
/* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */
|
|
|
|
if (classes[dev->devno] == ATA_DEV_UNKNOWN)
|
|
|
|
classes[dev->devno] = ATA_DEV_NONE;
|
|
|
|
else
|
|
|
|
nr_known++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (classify && !nr_known && ata_link_online(link)) {
|
|
|
|
if (try < max_tries) {
|
|
|
|
ata_link_printk(link, KERN_WARNING, "link online but "
|
|
|
|
"device misclassified, retrying\n");
|
|
|
|
rc = -EAGAIN;
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
ata_link_printk(link, KERN_WARNING,
|
|
|
|
"link online but device misclassified, "
|
|
|
|
"device detection might fail\n");
|
|
|
|
}
|
|
|
|
|
2007-10-31 09:17:03 +08:00
|
|
|
/* reset successful, schedule revalidation */
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
ata_eh_done(link, NULL, ATA_EH_RESET);
|
2007-10-31 09:17:03 +08:00
|
|
|
ehc->i.action |= ATA_EH_REVALIDATE;
|
2007-09-23 12:14:12 +08:00
|
|
|
|
2007-10-31 09:17:03 +08:00
|
|
|
rc = 0;
|
2007-07-16 13:29:41 +08:00
|
|
|
out:
|
|
|
|
/* clear hotplug flag */
|
|
|
|
ehc->i.flags &= ~ATA_EHI_HOTPLUGGED;
|
2007-10-09 14:06:10 +08:00
|
|
|
|
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
|
|
|
ap->pflags &= ~ATA_PFLAG_RESETTING;
|
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
return rc;
|
2007-10-31 09:17:03 +08:00
|
|
|
|
|
|
|
fail:
|
2008-04-07 21:47:20 +08:00
|
|
|
/* if SCR isn't accessible on a fan-out port, PMP needs to be reset */
|
|
|
|
if (!ata_is_host_link(link) &&
|
|
|
|
sata_scr_read(link, SCR_STATUS, &sstatus))
|
|
|
|
rc = -ERESTART;
|
|
|
|
|
2007-10-31 09:17:03 +08:00
|
|
|
if (rc == -ERESTART || try >= max_tries)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
now = jiffies;
|
|
|
|
if (time_before(now, deadline)) {
|
|
|
|
unsigned long delta = deadline - now;
|
|
|
|
|
|
|
|
ata_link_printk(link, KERN_WARNING, "reset failed "
|
|
|
|
"(errno=%d), retrying in %u secs\n",
|
|
|
|
rc, (jiffies_to_msecs(delta) + 999) / 1000);
|
|
|
|
|
|
|
|
while (delta)
|
|
|
|
delta = schedule_timeout_uninterruptible(delta);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (rc == -EPIPE || try == max_tries - 1)
|
|
|
|
sata_down_spd_limit(link);
|
|
|
|
if (hardreset)
|
|
|
|
reset = hardreset;
|
|
|
|
goto retry;
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
static int ata_eh_revalidate_and_attach(struct ata_link *link,
|
2006-05-31 17:28:03 +08:00
|
|
|
struct ata_device **r_failed_dev)
|
2006-05-15 19:58:22 +08:00
|
|
|
{
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_port *ap = link->ap;
|
|
|
|
struct ata_eh_context *ehc = &link->eh_context;
|
2006-05-15 19:58:22 +08:00
|
|
|
struct ata_device *dev;
|
2007-03-22 21:24:19 +08:00
|
|
|
unsigned int new_mask = 0;
|
2006-05-31 17:28:03 +08:00
|
|
|
unsigned long flags;
|
2007-08-06 17:36:23 +08:00
|
|
|
int rc = 0;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
|
|
|
DPRINTK("ENTER\n");
|
|
|
|
|
2007-03-22 21:24:19 +08:00
|
|
|
/* For PATA drive side cable detection to work, IDENTIFY must
|
|
|
|
* be done backwards such that PDIAG- is released by the slave
|
|
|
|
* device before the master device is identified.
|
|
|
|
*/
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_link_for_each_dev_reverse(dev, link) {
|
2007-08-06 17:36:23 +08:00
|
|
|
unsigned int action = ata_eh_dev_action(dev);
|
|
|
|
unsigned int readid_flags = 0;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2006-11-10 17:08:10 +08:00
|
|
|
if (ehc->i.flags & ATA_EHI_DID_RESET)
|
|
|
|
readid_flags |= ATA_READID_POSTRESET;
|
|
|
|
|
2007-05-05 03:27:47 +08:00
|
|
|
if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) {
|
2007-09-23 12:19:54 +08:00
|
|
|
WARN_ON(dev->class == ATA_DEV_PMP);
|
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
if (ata_link_offline(link)) {
|
2006-05-15 19:58:22 +08:00
|
|
|
rc = -EIO;
|
2007-03-22 21:24:19 +08:00
|
|
|
goto err;
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE);
|
2007-09-23 12:14:12 +08:00
|
|
|
rc = ata_dev_revalidate(dev, ehc->classes[dev->devno],
|
|
|
|
readid_flags);
|
2006-05-15 19:58:22 +08:00
|
|
|
if (rc)
|
2007-03-22 21:24:19 +08:00
|
|
|
goto err;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_eh_done(link, dev, ATA_EH_REVALIDATE);
|
2006-06-19 17:27:23 +08:00
|
|
|
|
2006-11-01 17:39:27 +08:00
|
|
|
/* Configuration may have changed, reconfigure
|
|
|
|
* transfer mode.
|
|
|
|
*/
|
|
|
|
ehc->i.flags |= ATA_EHI_SETMODE;
|
|
|
|
|
2006-06-12 12:01:34 +08:00
|
|
|
/* schedule the scsi_rescan_device() here */
|
|
|
|
queue_work(ata_aux_wq, &(ap->scsi_rescan_task));
|
2006-05-31 17:28:03 +08:00
|
|
|
} else if (dev->class == ATA_DEV_UNKNOWN &&
|
|
|
|
ehc->tries[dev->devno] &&
|
|
|
|
ata_class_enabled(ehc->classes[dev->devno])) {
|
|
|
|
dev->class = ehc->classes[dev->devno];
|
|
|
|
|
2007-09-23 12:19:54 +08:00
|
|
|
if (dev->class == ATA_DEV_PMP)
|
|
|
|
rc = sata_pmp_attach(dev);
|
|
|
|
else
|
|
|
|
rc = ata_dev_read_id(dev, &dev->class,
|
|
|
|
readid_flags, dev->id);
|
2007-03-22 21:24:19 +08:00
|
|
|
switch (rc) {
|
|
|
|
case 0:
|
2007-08-06 17:36:23 +08:00
|
|
|
new_mask |= 1 << dev->devno;
|
2007-03-22 21:24:19 +08:00
|
|
|
break;
|
|
|
|
case -ENOENT:
|
2006-11-10 17:08:10 +08:00
|
|
|
/* IDENTIFY was issued to non-existent
|
|
|
|
* device. No need to reset. Just
|
|
|
|
* thaw and kill the device.
|
|
|
|
*/
|
|
|
|
ata_eh_thaw_port(ap);
|
2006-05-31 17:28:03 +08:00
|
|
|
dev->class = ATA_DEV_UNKNOWN;
|
|
|
|
break;
|
2007-03-22 21:24:19 +08:00
|
|
|
default:
|
|
|
|
dev->class = ATA_DEV_UNKNOWN;
|
|
|
|
goto err;
|
2006-05-31 17:28:03 +08:00
|
|
|
}
|
2007-03-22 21:24:19 +08:00
|
|
|
}
|
|
|
|
}
|
2006-05-31 17:28:03 +08:00
|
|
|
|
2007-04-23 01:05:53 +08:00
|
|
|
/* PDIAG- should have been released, ask cable type if post-reset */
|
2008-02-13 08:15:09 +08:00
|
|
|
if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) {
|
|
|
|
if (ap->ops->cable_detect)
|
|
|
|
ap->cbl = ap->ops->cable_detect(ap);
|
|
|
|
ata_force_cbl(ap);
|
|
|
|
}
|
2007-04-23 01:05:53 +08:00
|
|
|
|
2007-03-22 21:24:19 +08:00
|
|
|
/* Configure new devices forward such that user doesn't see
|
|
|
|
* device detection messages backwards.
|
|
|
|
*/
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_link_for_each_dev(dev, link) {
|
2007-09-23 12:19:54 +08:00
|
|
|
if (!(new_mask & (1 << dev->devno)) ||
|
|
|
|
dev->class == ATA_DEV_PMP)
|
2007-03-22 21:24:19 +08:00
|
|
|
continue;
|
|
|
|
|
|
|
|
ehc->i.flags |= ATA_EHI_PRINTINFO;
|
|
|
|
rc = ata_dev_configure(dev);
|
|
|
|
ehc->i.flags &= ~ATA_EHI_PRINTINFO;
|
|
|
|
if (rc)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
|
|
|
ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
|
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
|
|
|
|
|
|
|
/* new device discovered, configure xfermode */
|
|
|
|
ehc->i.flags |= ATA_EHI_SETMODE;
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
2007-03-22 21:24:19 +08:00
|
|
|
return 0;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2007-03-22 21:24:19 +08:00
|
|
|
err:
|
|
|
|
*r_failed_dev = dev;
|
|
|
|
DPRINTK("EXIT rc=%d\n", rc);
|
2006-05-15 19:58:22 +08:00
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
2007-11-27 18:28:55 +08:00
|
|
|
/**
|
|
|
|
* ata_set_mode - Program timings and issue SET FEATURES - XFER
|
|
|
|
* @link: link on which timings will be programmed
|
|
|
|
* @r_failed_dev: out paramter for failed device
|
|
|
|
*
|
|
|
|
* Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If
|
|
|
|
* ata_set_mode() fails, pointer to the failing device is
|
|
|
|
* returned in @r_failed_dev.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* PCI/etc. bus probe sem.
|
|
|
|
*
|
|
|
|
* RETURNS:
|
|
|
|
* 0 on success, negative errno otherwise
|
|
|
|
*/
|
|
|
|
int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev)
|
|
|
|
{
|
|
|
|
struct ata_port *ap = link->ap;
|
2007-11-27 18:28:58 +08:00
|
|
|
struct ata_device *dev;
|
|
|
|
int rc;
|
2007-11-27 18:28:55 +08:00
|
|
|
|
2007-11-27 18:28:59 +08:00
|
|
|
/* if data transfer is verified, clear DUBIOUS_XFER on ering top */
|
|
|
|
ata_link_for_each_dev(dev, link) {
|
|
|
|
if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) {
|
|
|
|
struct ata_ering_entry *ent;
|
|
|
|
|
|
|
|
ent = ata_ering_top(&dev->ering);
|
|
|
|
if (ent)
|
|
|
|
ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-11-27 18:28:55 +08:00
|
|
|
/* has private set_mode? */
|
|
|
|
if (ap->ops->set_mode)
|
2007-11-27 18:28:58 +08:00
|
|
|
rc = ap->ops->set_mode(link, r_failed_dev);
|
|
|
|
else
|
|
|
|
rc = ata_do_set_mode(link, r_failed_dev);
|
|
|
|
|
|
|
|
/* if transfer mode has changed, set DUBIOUS_XFER on device */
|
|
|
|
ata_link_for_each_dev(dev, link) {
|
|
|
|
struct ata_eh_context *ehc = &link->eh_context;
|
|
|
|
u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno];
|
|
|
|
u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno));
|
|
|
|
|
|
|
|
if (dev->xfer_mode != saved_xfer_mode ||
|
|
|
|
ata_ncq_enabled(dev) != saved_ncq)
|
|
|
|
dev->flags |= ATA_DFLAG_DUBIOUS_XFER;
|
|
|
|
}
|
|
|
|
|
|
|
|
return rc;
|
2007-11-27 18:28:55 +08:00
|
|
|
}
|
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
static int ata_link_nr_enabled(struct ata_link *link)
|
2006-05-15 19:58:22 +08:00
|
|
|
{
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_device *dev;
|
|
|
|
int cnt = 0;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_link_for_each_dev(dev, link)
|
2007-08-06 17:36:23 +08:00
|
|
|
if (ata_dev_enabled(dev))
|
2006-05-15 19:58:22 +08:00
|
|
|
cnt++;
|
|
|
|
return cnt;
|
|
|
|
}
|
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
static int ata_link_nr_vacant(struct ata_link *link)
|
2006-05-31 17:28:03 +08:00
|
|
|
{
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_device *dev;
|
|
|
|
int cnt = 0;
|
2006-05-31 17:28:03 +08:00
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_link_for_each_dev(dev, link)
|
2007-08-06 17:36:23 +08:00
|
|
|
if (dev->class == ATA_DEV_UNKNOWN)
|
2006-05-31 17:28:03 +08:00
|
|
|
cnt++;
|
|
|
|
return cnt;
|
|
|
|
}
|
|
|
|
|
2007-08-06 17:36:23 +08:00
|
|
|
static int ata_eh_skip_recovery(struct ata_link *link)
|
2006-05-31 17:28:03 +08:00
|
|
|
{
|
2008-01-23 23:05:14 +08:00
|
|
|
struct ata_port *ap = link->ap;
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_eh_context *ehc = &link->eh_context;
|
2007-08-06 17:36:23 +08:00
|
|
|
struct ata_device *dev;
|
2006-05-31 17:28:03 +08:00
|
|
|
|
2007-09-23 12:14:13 +08:00
|
|
|
/* skip disabled links */
|
|
|
|
if (link->flags & ATA_LFLAG_DISABLED)
|
|
|
|
return 1;
|
|
|
|
|
2008-01-23 23:05:14 +08:00
|
|
|
/* thaw frozen port and recover failed devices */
|
|
|
|
if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* reset at least once if reset is requested */
|
|
|
|
if ((ehc->i.action & ATA_EH_RESET) &&
|
|
|
|
!(ehc->i.flags & ATA_EHI_DID_RESET))
|
2006-05-31 17:28:03 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* skip if class codes for all vacant slots are ATA_DEV_NONE */
|
2007-08-06 17:36:23 +08:00
|
|
|
ata_link_for_each_dev(dev, link) {
|
2006-05-31 17:28:03 +08:00
|
|
|
if (dev->class == ATA_DEV_UNKNOWN &&
|
|
|
|
ehc->classes[dev->devno] != ATA_DEV_NONE)
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2007-11-27 18:28:54 +08:00
|
|
|
static int ata_eh_schedule_probe(struct ata_device *dev)
|
|
|
|
{
|
|
|
|
struct ata_eh_context *ehc = &dev->link->eh_context;
|
|
|
|
|
|
|
|
if (!(ehc->i.probe_mask & (1 << dev->devno)) ||
|
|
|
|
(ehc->did_probe_mask & (1 << dev->devno)))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
ata_eh_detach_dev(dev);
|
|
|
|
ata_dev_init(dev);
|
|
|
|
ehc->did_probe_mask |= (1 << dev->devno);
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
ehc->i.action |= ATA_EH_RESET;
|
2007-11-27 18:28:58 +08:00
|
|
|
ehc->saved_xfer_mode[dev->devno] = 0;
|
|
|
|
ehc->saved_ncq_enabled &= ~(1 << dev->devno);
|
2007-11-27 18:28:54 +08:00
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2007-08-06 17:36:24 +08:00
|
|
|
static int ata_eh_handle_dev_fail(struct ata_device *dev, int err)
|
2007-07-01 18:05:58 +08:00
|
|
|
{
|
2007-08-06 17:36:22 +08:00
|
|
|
struct ata_eh_context *ehc = &dev->link->eh_context;
|
2007-07-01 18:05:58 +08:00
|
|
|
|
|
|
|
ehc->tries[dev->devno]--;
|
|
|
|
|
|
|
|
switch (err) {
|
|
|
|
case -ENODEV:
|
|
|
|
/* device missing or wrong IDENTIFY data, schedule probing */
|
|
|
|
ehc->i.probe_mask |= (1 << dev->devno);
|
|
|
|
case -EINVAL:
|
|
|
|
/* give it just one more chance */
|
|
|
|
ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1);
|
|
|
|
case -EIO:
|
2007-10-29 15:45:05 +08:00
|
|
|
if (ehc->tries[dev->devno] == 1 && dev->pio_mode > XFER_PIO_0) {
|
2007-07-01 18:05:58 +08:00
|
|
|
/* This is the last chance, better to slow
|
|
|
|
* down than lose it.
|
|
|
|
*/
|
2007-08-06 17:36:23 +08:00
|
|
|
sata_down_spd_limit(dev->link);
|
2007-07-01 18:05:58 +08:00
|
|
|
ata_down_xfermask_limit(dev, ATA_DNXFER_PIO);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) {
|
|
|
|
/* disable device if it has used up all its chances */
|
|
|
|
ata_dev_disable(dev);
|
|
|
|
|
|
|
|
/* detach if offline */
|
2007-08-06 17:36:23 +08:00
|
|
|
if (ata_link_offline(dev->link))
|
2007-07-01 18:05:58 +08:00
|
|
|
ata_eh_detach_dev(dev);
|
|
|
|
|
2007-11-27 18:28:54 +08:00
|
|
|
/* schedule probe if necessary */
|
|
|
|
if (ata_eh_schedule_probe(dev))
|
2007-07-01 18:05:58 +08:00
|
|
|
ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
|
2007-08-06 17:36:24 +08:00
|
|
|
|
|
|
|
return 1;
|
2007-07-01 18:05:58 +08:00
|
|
|
} else {
|
libata: prefer hardreset
When both soft and hard resets are available, libata preferred
softreset till now. The logic behind it was to be softer to devices;
however, this doesn't really help much. Rationales for the change:
* BIOS may freeze lock certain things during boot and softreset can't
unlock those. This by itself is okay but during operation PHY event
or other error conditions can trigger hardreset and the device may
end up with different configuration.
For example, after a hardreset, previously unlockable HPA can be
unlocked resulting in different device size and thus revalidation
failure. Similar condition can occur during or after resume.
* Certain ATAPI devices require hardreset to recover after certain
error conditions. On PATA, this is done by issuing the DEVICE RESET
command. On SATA, COMRESET has equivalent effect. The problem is
that DEVICE RESET needs its own execution protocol.
For SFF controllers with bare TF access, it can be easily
implemented but more advanced controllers (e.g. ahci and sata_sil24)
require specialized implementations. Simply using hardreset solves
the problem nicely.
* COMRESET initialization sequence is the norm in SATA land and many
SATA devices don't work properly if only SRST is used. For example,
some PMPs behave this way and libata works around by always issuing
hardreset if the host supports PMP.
Like the above example, libata has developed a number of mechanisms
aiming to promote softreset to hardreset if softreset is not going
to work. This approach is time consuming and error prone.
Also, note that, dependingon how you read the specs, it could be
argued that PMP fan-out ports require COMRESET to start operation.
In fact, all the PMPs on the market except one don't work properly
if COMRESET is not issued to fan-out ports after PMP reset.
* COMRESET is an integral part of SATA connection and any working
device should be able to handle COMRESET properly. After all, it's
the way to signal hardreset during reboot. This is the most used
and recommended (at least by the ahci spec) method of resetting
devices.
So, this patch makes libata prefer hardreset over softreset by making
the following changes.
* Rename ATA_EH_RESET_MASK to ATA_EH_RESET and use it whereever
ATA_EH_{SOFT|HARD}RESET used to be used. ATA_EH_{SOFT|HARD}RESET is
now only used to tell prereset whether soft or hard reset will be
issued.
* Strip out now unneeded promote-to-hardreset logics from
ata_eh_reset(), ata_std_prereset(), sata_pmp_std_prereset() and
other places.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-01-23 23:05:14 +08:00
|
|
|
ehc->i.action |= ATA_EH_RESET;
|
2007-08-06 17:36:24 +08:00
|
|
|
return 0;
|
2007-07-01 18:05:58 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
/**
|
|
|
|
* ata_eh_recover - recover host port after error
|
|
|
|
* @ap: host port to recover
|
2006-05-31 17:27:48 +08:00
|
|
|
* @prereset: prereset method (can be NULL)
|
2006-05-15 19:58:22 +08:00
|
|
|
* @softreset: softreset method (can be NULL)
|
|
|
|
* @hardreset: hardreset method (can be NULL)
|
|
|
|
* @postreset: postreset method (can be NULL)
|
2007-08-06 17:36:24 +08:00
|
|
|
* @r_failed_link: out parameter for failed link
|
2006-05-15 19:58:22 +08:00
|
|
|
*
|
|
|
|
* This is the alpha and omega, eum and yang, heart and soul of
|
|
|
|
* libata exception handling. On entry, actions required to
|
2007-08-06 17:36:24 +08:00
|
|
|
* recover each link and hotplug requests are recorded in the
|
|
|
|
* link's eh_context. This function executes all the operations
|
|
|
|
* with appropriate retrials and fallbacks to resurrect failed
|
2006-05-31 17:28:03 +08:00
|
|
|
* devices, detach goners and greet newcomers.
|
2006-05-15 19:58:22 +08:00
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* Kernel thread context (may sleep).
|
|
|
|
*
|
|
|
|
* RETURNS:
|
|
|
|
* 0 on success, -errno on failure.
|
|
|
|
*/
|
2007-09-23 12:14:12 +08:00
|
|
|
int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
|
|
|
|
ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
|
|
|
|
ata_postreset_fn_t postreset,
|
|
|
|
struct ata_link **r_failed_link)
|
2006-05-15 19:58:22 +08:00
|
|
|
{
|
2007-08-06 17:36:24 +08:00
|
|
|
struct ata_link *link;
|
2006-05-15 19:58:22 +08:00
|
|
|
struct ata_device *dev;
|
2007-08-06 17:36:24 +08:00
|
|
|
int nr_failed_devs, nr_disabled_devs;
|
2008-05-19 00:15:07 +08:00
|
|
|
int rc;
|
2007-09-23 12:14:13 +08:00
|
|
|
unsigned long flags;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
|
|
|
DPRINTK("ENTER\n");
|
|
|
|
|
|
|
|
/* prep for recovery */
|
2007-08-06 17:36:24 +08:00
|
|
|
ata_port_for_each_link(link, ap) {
|
|
|
|
struct ata_eh_context *ehc = &link->eh_context;
|
2006-05-31 17:28:03 +08:00
|
|
|
|
2007-09-23 12:14:13 +08:00
|
|
|
/* re-enable link? */
|
|
|
|
if (ehc->i.action & ATA_EH_ENABLE_LINK) {
|
|
|
|
ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK);
|
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
|
|
|
link->flags &= ~ATA_LFLAG_DISABLED;
|
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
|
|
|
ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK);
|
|
|
|
}
|
|
|
|
|
2007-08-06 17:36:24 +08:00
|
|
|
ata_link_for_each_dev(dev, link) {
|
2007-09-23 12:14:12 +08:00
|
|
|
if (link->flags & ATA_LFLAG_NO_RETRY)
|
|
|
|
ehc->tries[dev->devno] = 1;
|
|
|
|
else
|
|
|
|
ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
|
2006-05-31 17:28:03 +08:00
|
|
|
|
2007-08-06 17:36:24 +08:00
|
|
|
/* collect port action mask recorded in dev actions */
|
|
|
|
ehc->i.action |= ehc->i.dev_action[dev->devno] &
|
|
|
|
~ATA_EH_PERDEV_MASK;
|
|
|
|
ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK;
|
|
|
|
|
|
|
|
/* process hotplug request */
|
|
|
|
if (dev->flags & ATA_DFLAG_DETACH)
|
|
|
|
ata_eh_detach_dev(dev);
|
|
|
|
|
2007-11-27 18:28:54 +08:00
|
|
|
/* schedule probe if necessary */
|
|
|
|
if (!ata_dev_enabled(dev))
|
|
|
|
ata_eh_schedule_probe(dev);
|
2006-05-31 17:28:03 +08:00
|
|
|
}
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
retry:
|
|
|
|
rc = 0;
|
2007-08-06 17:36:24 +08:00
|
|
|
nr_failed_devs = 0;
|
|
|
|
nr_disabled_devs = 0;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2006-06-12 13:11:43 +08:00
|
|
|
/* if UNLOADING, finish immediately */
|
2006-06-29 00:29:30 +08:00
|
|
|
if (ap->pflags & ATA_PFLAG_UNLOADING)
|
2006-06-12 13:11:43 +08:00
|
|
|
goto out;
|
|
|
|
|
2007-08-06 17:36:24 +08:00
|
|
|
/* prep for EH */
|
|
|
|
ata_port_for_each_link(link, ap) {
|
|
|
|
struct ata_eh_context *ehc = &link->eh_context;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2007-08-06 17:36:24 +08:00
|
|
|
/* skip EH if possible. */
|
|
|
|
if (ata_eh_skip_recovery(link))
|
|
|
|
ehc->i.action = 0;
|
|
|
|
|
|
|
|
ata_link_for_each_dev(dev, link)
|
|
|
|
ehc->classes[dev->devno] = ATA_DEV_UNKNOWN;
|
|
|
|
}
|
2006-05-31 17:28:03 +08:00
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
/* reset */
|
2008-05-19 00:15:07 +08:00
|
|
|
ata_port_for_each_link(link, ap) {
|
|
|
|
struct ata_eh_context *ehc = &link->eh_context;
|
2007-08-06 17:36:24 +08:00
|
|
|
|
2008-05-19 00:15:07 +08:00
|
|
|
if (!(ehc->i.action & ATA_EH_RESET))
|
|
|
|
continue;
|
2007-08-06 17:36:24 +08:00
|
|
|
|
2008-05-19 00:15:07 +08:00
|
|
|
rc = ata_eh_reset(link, ata_link_nr_vacant(link),
|
|
|
|
prereset, softreset, hardreset, postreset);
|
|
|
|
if (rc) {
|
|
|
|
ata_link_printk(link, KERN_ERR,
|
|
|
|
"reset failed, giving up\n");
|
|
|
|
goto out;
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-08-06 17:36:24 +08:00
|
|
|
/* the rest */
|
|
|
|
ata_port_for_each_link(link, ap) {
|
|
|
|
struct ata_eh_context *ehc = &link->eh_context;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2007-08-06 17:36:24 +08:00
|
|
|
/* revalidate existing devices and attach new ones */
|
|
|
|
rc = ata_eh_revalidate_and_attach(link, &dev);
|
2007-02-02 15:22:30 +08:00
|
|
|
if (rc)
|
2006-05-15 19:58:22 +08:00
|
|
|
goto dev_fail;
|
|
|
|
|
2007-09-23 12:19:54 +08:00
|
|
|
/* if PMP got attached, return, pmp EH will take care of it */
|
|
|
|
if (link->device->class == ATA_DEV_PMP) {
|
|
|
|
ehc->i.action = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-08-06 17:36:24 +08:00
|
|
|
/* configure transfer mode if necessary */
|
|
|
|
if (ehc->i.flags & ATA_EHI_SETMODE) {
|
|
|
|
rc = ata_set_mode(link, &dev);
|
|
|
|
if (rc)
|
|
|
|
goto dev_fail;
|
|
|
|
ehc->i.flags &= ~ATA_EHI_SETMODE;
|
|
|
|
}
|
|
|
|
|
2008-03-27 17:37:14 +08:00
|
|
|
if (ehc->i.action & ATA_EH_LPM)
|
2007-10-25 12:58:59 +08:00
|
|
|
ata_link_for_each_dev(dev, link)
|
|
|
|
ata_dev_enable_pm(dev, ap->pm_policy);
|
|
|
|
|
2007-08-06 17:36:24 +08:00
|
|
|
/* this link is okay now */
|
|
|
|
ehc->i.flags = 0;
|
|
|
|
continue;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2007-10-19 18:42:56 +08:00
|
|
|
dev_fail:
|
2007-08-06 17:36:24 +08:00
|
|
|
nr_failed_devs++;
|
|
|
|
if (ata_eh_handle_dev_fail(dev, rc))
|
|
|
|
nr_disabled_devs++;
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2007-10-09 14:06:48 +08:00
|
|
|
if (ap->pflags & ATA_PFLAG_FROZEN) {
|
|
|
|
/* PMP reset requires working host port.
|
|
|
|
* Can't retry if it's frozen.
|
|
|
|
*/
|
2008-04-07 21:47:22 +08:00
|
|
|
if (sata_pmp_attached(ap))
|
2007-10-09 14:06:48 +08:00
|
|
|
goto out;
|
2007-08-06 17:36:24 +08:00
|
|
|
break;
|
2007-10-09 14:06:48 +08:00
|
|
|
}
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
2007-08-06 17:36:24 +08:00
|
|
|
if (nr_failed_devs) {
|
|
|
|
if (nr_failed_devs != nr_disabled_devs) {
|
|
|
|
ata_port_printk(ap, KERN_WARNING, "failed to recover "
|
|
|
|
"some devices, retrying in 5 secs\n");
|
|
|
|
ssleep(5);
|
|
|
|
} else {
|
|
|
|
/* no device left to recover, repeat fast */
|
|
|
|
msleep(500);
|
|
|
|
}
|
2006-05-15 19:58:22 +08:00
|
|
|
|
2007-08-06 17:36:24 +08:00
|
|
|
goto retry;
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
2007-08-06 17:36:24 +08:00
|
|
|
out:
|
|
|
|
if (rc && r_failed_link)
|
|
|
|
*r_failed_link = link;
|
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
DPRINTK("EXIT, rc=%d\n", rc);
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ata_eh_finish - finish up EH
|
|
|
|
* @ap: host port to finish EH for
|
|
|
|
*
|
|
|
|
* Recovery is complete. Clean up EH states and retry or finish
|
|
|
|
* failed qcs.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* None.
|
|
|
|
*/
|
2007-09-23 12:14:12 +08:00
|
|
|
void ata_eh_finish(struct ata_port *ap)
|
2006-05-15 19:58:22 +08:00
|
|
|
{
|
|
|
|
int tag;
|
|
|
|
|
|
|
|
/* retry or finish qcs */
|
|
|
|
for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
|
|
|
|
struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
|
|
|
|
|
|
|
|
if (!(qc->flags & ATA_QCFLAG_FAILED))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (qc->err_mask) {
|
|
|
|
/* FIXME: Once EH migration is complete,
|
|
|
|
* generate sense data in this function,
|
|
|
|
* considering both err_mask and tf.
|
|
|
|
*/
|
2008-03-27 18:14:24 +08:00
|
|
|
if (qc->flags & ATA_QCFLAG_RETRY)
|
2006-05-15 19:58:22 +08:00
|
|
|
ata_eh_qc_retry(qc);
|
2008-03-27 18:14:24 +08:00
|
|
|
else
|
|
|
|
ata_eh_qc_complete(qc);
|
2006-05-15 19:58:22 +08:00
|
|
|
} else {
|
|
|
|
if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
|
|
|
|
ata_eh_qc_complete(qc);
|
|
|
|
} else {
|
|
|
|
/* feed zero TF to sense generation */
|
|
|
|
memset(&qc->result_tf, 0, sizeof(qc->result_tf));
|
|
|
|
ata_eh_qc_retry(qc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2007-09-23 12:14:12 +08:00
|
|
|
|
|
|
|
/* make sure nr_active_links is zero after EH */
|
|
|
|
WARN_ON(ap->nr_active_links);
|
|
|
|
ap->nr_active_links = 0;
|
2006-05-15 19:58:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ata_do_eh - do standard error handling
|
|
|
|
* @ap: host port to handle error for
|
libata: make reset related methods proper port operations
Currently reset methods are not specified directly in the
ata_port_operations table. If a LLD wants to use custom reset
methods, it should construct and use a error_handler which uses those
reset methods. It's done this way for two reasons.
First, the ops table already contained too many methods and adding
four more of them would noticeably increase the amount of necessary
boilerplate code all over low level drivers.
Second, as ->error_handler uses those reset methods, it can get
confusing. ie. By overriding ->error_handler, those reset ops can be
made useless making layering a bit hazy.
Now that ops table uses inheritance, the first problem doesn't exist
anymore. The second isn't completely solved but is relieved by
providing default values - most drivers can just override what it has
implemented and don't have to concern itself about higher level
callbacks. In fact, there currently is no driver which actually
modifies error handling behavior. Drivers which override
->error_handler just wraps the standard error handler only to prepare
the controller for EH. I don't think making ops layering strict has
any noticeable benefit.
This patch makes ->prereset, ->softreset, ->hardreset, ->postreset and
their PMP counterparts propoer ops. Default ops are provided in the
base ops tables and drivers are converted to override individual reset
methods instead of creating custom error_handler.
* ata_std_error_handler() doesn't use sata_std_hardreset() if SCRs
aren't accessible. sata_promise doesn't need to use separate
error_handlers for PATA and SATA anymore.
* softreset is broken for sata_inic162x and sata_sx4. As libata now
always prefers hardreset, this doesn't really matter but the ops are
forced to NULL using ATA_OP_NULL for documentation purpose.
* pata_hpt374 needs to use different prereset for the first and second
PCI functions. This used to be done by branching from
hpt374_error_handler(). The proper way to do this is to use
separate ops and port_info tables for each function. Converted.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-03-25 11:22:50 +08:00
|
|
|
*
|
2006-05-31 17:27:48 +08:00
|
|
|
* @prereset: prereset method (can be NULL)
|
2006-05-15 19:58:22 +08:00
|
|
|
* @softreset: softreset method (can be NULL)
|
|
|
|
* @hardreset: hardreset method (can be NULL)
|
|
|
|
* @postreset: postreset method (can be NULL)
|
|
|
|
*
|
|
|
|
* Perform standard error handling sequence.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* Kernel thread context (may sleep).
|
|
|
|
*/
|
2006-05-31 17:27:48 +08:00
|
|
|
void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
|
|
|
|
ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
|
|
|
|
ata_postreset_fn_t postreset)
|
2006-05-15 19:58:22 +08:00
|
|
|
{
|
2007-08-06 17:36:24 +08:00
|
|
|
struct ata_device *dev;
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
ata_eh_autopsy(ap);
|
|
|
|
ata_eh_report(ap);
|
|
|
|
|
|
|
|
rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset,
|
|
|
|
NULL);
|
|
|
|
if (rc) {
|
|
|
|
ata_link_for_each_dev(dev, &ap->link)
|
|
|
|
ata_dev_disable(dev);
|
|
|
|
}
|
|
|
|
|
2006-05-15 19:58:22 +08:00
|
|
|
ata_eh_finish(ap);
|
|
|
|
}
|
2006-07-03 15:07:27 +08:00
|
|
|
|
libata: make reset related methods proper port operations
Currently reset methods are not specified directly in the
ata_port_operations table. If a LLD wants to use custom reset
methods, it should construct and use a error_handler which uses those
reset methods. It's done this way for two reasons.
First, the ops table already contained too many methods and adding
four more of them would noticeably increase the amount of necessary
boilerplate code all over low level drivers.
Second, as ->error_handler uses those reset methods, it can get
confusing. ie. By overriding ->error_handler, those reset ops can be
made useless making layering a bit hazy.
Now that ops table uses inheritance, the first problem doesn't exist
anymore. The second isn't completely solved but is relieved by
providing default values - most drivers can just override what it has
implemented and don't have to concern itself about higher level
callbacks. In fact, there currently is no driver which actually
modifies error handling behavior. Drivers which override
->error_handler just wraps the standard error handler only to prepare
the controller for EH. I don't think making ops layering strict has
any noticeable benefit.
This patch makes ->prereset, ->softreset, ->hardreset, ->postreset and
their PMP counterparts propoer ops. Default ops are provided in the
base ops tables and drivers are converted to override individual reset
methods instead of creating custom error_handler.
* ata_std_error_handler() doesn't use sata_std_hardreset() if SCRs
aren't accessible. sata_promise doesn't need to use separate
error_handlers for PATA and SATA anymore.
* softreset is broken for sata_inic162x and sata_sx4. As libata now
always prefers hardreset, this doesn't really matter but the ops are
forced to NULL using ATA_OP_NULL for documentation purpose.
* pata_hpt374 needs to use different prereset for the first and second
PCI functions. This used to be done by branching from
hpt374_error_handler(). The proper way to do this is to use
separate ops and port_info tables for each function. Converted.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-03-25 11:22:50 +08:00
|
|
|
/**
|
|
|
|
* ata_std_error_handler - standard error handler
|
|
|
|
* @ap: host port to handle error for
|
|
|
|
*
|
|
|
|
* Standard error handler
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* Kernel thread context (may sleep).
|
|
|
|
*/
|
|
|
|
void ata_std_error_handler(struct ata_port *ap)
|
|
|
|
{
|
|
|
|
struct ata_port_operations *ops = ap->ops;
|
|
|
|
ata_reset_fn_t hardreset = ops->hardreset;
|
|
|
|
|
2008-04-07 21:47:19 +08:00
|
|
|
/* ignore built-in hardreset if SCR access is not available */
|
|
|
|
if (ata_is_builtin_hardreset(hardreset) && !sata_scr_valid(&ap->link))
|
libata: make reset related methods proper port operations
Currently reset methods are not specified directly in the
ata_port_operations table. If a LLD wants to use custom reset
methods, it should construct and use a error_handler which uses those
reset methods. It's done this way for two reasons.
First, the ops table already contained too many methods and adding
four more of them would noticeably increase the amount of necessary
boilerplate code all over low level drivers.
Second, as ->error_handler uses those reset methods, it can get
confusing. ie. By overriding ->error_handler, those reset ops can be
made useless making layering a bit hazy.
Now that ops table uses inheritance, the first problem doesn't exist
anymore. The second isn't completely solved but is relieved by
providing default values - most drivers can just override what it has
implemented and don't have to concern itself about higher level
callbacks. In fact, there currently is no driver which actually
modifies error handling behavior. Drivers which override
->error_handler just wraps the standard error handler only to prepare
the controller for EH. I don't think making ops layering strict has
any noticeable benefit.
This patch makes ->prereset, ->softreset, ->hardreset, ->postreset and
their PMP counterparts propoer ops. Default ops are provided in the
base ops tables and drivers are converted to override individual reset
methods instead of creating custom error_handler.
* ata_std_error_handler() doesn't use sata_std_hardreset() if SCRs
aren't accessible. sata_promise doesn't need to use separate
error_handlers for PATA and SATA anymore.
* softreset is broken for sata_inic162x and sata_sx4. As libata now
always prefers hardreset, this doesn't really matter but the ops are
forced to NULL using ATA_OP_NULL for documentation purpose.
* pata_hpt374 needs to use different prereset for the first and second
PCI functions. This used to be done by branching from
hpt374_error_handler(). The proper way to do this is to use
separate ops and port_info tables for each function. Converted.
Signed-off-by: Tejun Heo <htejun@gmail.com>
2008-03-25 11:22:50 +08:00
|
|
|
hardreset = NULL;
|
|
|
|
|
|
|
|
ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset);
|
|
|
|
}
|
|
|
|
|
2007-03-02 16:32:47 +08:00
|
|
|
#ifdef CONFIG_PM
|
2006-07-03 15:07:27 +08:00
|
|
|
/**
|
|
|
|
* ata_eh_handle_port_suspend - perform port suspend operation
|
|
|
|
* @ap: port to suspend
|
|
|
|
*
|
|
|
|
* Suspend @ap.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* Kernel thread context (may sleep).
|
|
|
|
*/
|
|
|
|
static void ata_eh_handle_port_suspend(struct ata_port *ap)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
int rc = 0;
|
|
|
|
|
|
|
|
/* are we suspending? */
|
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
|
|
|
if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
|
|
|
|
ap->pm_mesg.event == PM_EVENT_ON) {
|
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
|
|
|
|
|
|
|
WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED);
|
|
|
|
|
2007-05-15 02:28:16 +08:00
|
|
|
/* tell ACPI we're suspending */
|
|
|
|
rc = ata_acpi_on_suspend(ap);
|
|
|
|
if (rc)
|
|
|
|
goto out;
|
|
|
|
|
2006-07-03 15:07:27 +08:00
|
|
|
/* suspend */
|
|
|
|
ata_eh_freeze_port(ap);
|
|
|
|
|
|
|
|
if (ap->ops->port_suspend)
|
|
|
|
rc = ap->ops->port_suspend(ap, ap->pm_mesg);
|
|
|
|
|
2007-11-02 09:32:38 +08:00
|
|
|
ata_acpi_set_state(ap, PMSG_SUSPEND);
|
2007-05-15 02:28:16 +08:00
|
|
|
out:
|
2006-07-03 15:07:27 +08:00
|
|
|
/* report result */
|
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
|
|
|
|
|
|
|
ap->pflags &= ~ATA_PFLAG_PM_PENDING;
|
|
|
|
if (rc == 0)
|
|
|
|
ap->pflags |= ATA_PFLAG_SUSPENDED;
|
2007-05-15 02:28:16 +08:00
|
|
|
else if (ap->pflags & ATA_PFLAG_FROZEN)
|
2006-07-03 15:07:27 +08:00
|
|
|
ata_port_schedule_eh(ap);
|
|
|
|
|
|
|
|
if (ap->pm_result) {
|
|
|
|
*ap->pm_result = rc;
|
|
|
|
ap->pm_result = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* ata_eh_handle_port_resume - perform port resume operation
|
|
|
|
* @ap: port to resume
|
|
|
|
*
|
|
|
|
* Resume @ap.
|
|
|
|
*
|
|
|
|
* LOCKING:
|
|
|
|
* Kernel thread context (may sleep).
|
|
|
|
*/
|
|
|
|
static void ata_eh_handle_port_resume(struct ata_port *ap)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
2007-05-05 03:27:47 +08:00
|
|
|
int rc = 0;
|
2006-07-03 15:07:27 +08:00
|
|
|
|
|
|
|
/* are we resuming? */
|
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
|
|
|
if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
|
|
|
|
ap->pm_mesg.event != PM_EVENT_ON) {
|
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
|
|
|
|
2007-05-05 03:27:47 +08:00
|
|
|
WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED));
|
2006-07-03 15:07:27 +08:00
|
|
|
|
2007-11-02 09:32:38 +08:00
|
|
|
ata_acpi_set_state(ap, PMSG_ON);
|
|
|
|
|
2006-07-03 15:07:27 +08:00
|
|
|
if (ap->ops->port_resume)
|
|
|
|
rc = ap->ops->port_resume(ap);
|
|
|
|
|
libata: reimplement ACPI invocation
This patch reimplements ACPI invocation such that, instead of
exporting ACPI details to the rest of libata, ACPI event handlers -
ata_acpi_on_resume() and ata_acpi_on_devcfg() - are used. These two
functions are responsible for determining whether specific ACPI method
is used and when.
On resume, _GTF is scheduled by setting ATA_DFLAG_ACPI_PENDING device
flag. This is done this way to avoid performing the action on wrong
device device (device swapping while suspended).
On every ata_dev_configure(), ata_acpi_on_devcfg() is called, which
performs _SDD and _GTF. _GTF is performed only after resuming and, if
SATA, hardreset as the ACPI spec specifies. As _GTF may contain
arbitrary commands, IDENTIFY page is re-read after _GTF taskfiles are
executed.
If one of ACPI methods fails, ata_acpi_on_devcfg() retries on the
first failure. If it fails again on the second try, ACPI is disabled
on the device. Note that successful configuration clears ACPI failed
status.
With all feature checks moved to the above two functions,
do_drive_set_taskfiles() is trivial and thus collapsed into
ata_acpi_exec_tfs(), which is now static and converted to return the
number of executed taskfiles to be used by ata_acpi_on_resume(). As
failures are handled properly, ata_acpi_push_id() now returns -errno
on errors instead of unconditional zero.
Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
2007-05-15 02:28:16 +08:00
|
|
|
/* tell ACPI that we're resuming */
|
|
|
|
ata_acpi_on_resume(ap);
|
|
|
|
|
2007-05-05 03:27:47 +08:00
|
|
|
/* report result */
|
2006-07-03 15:07:27 +08:00
|
|
|
spin_lock_irqsave(ap->lock, flags);
|
|
|
|
ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED);
|
|
|
|
if (ap->pm_result) {
|
|
|
|
*ap->pm_result = rc;
|
|
|
|
ap->pm_result = NULL;
|
|
|
|
}
|
|
|
|
spin_unlock_irqrestore(ap->lock, flags);
|
|
|
|
}
|
2007-03-02 16:32:47 +08:00
|
|
|
#endif /* CONFIG_PM */
|