One more major cam_periph_error() rewrite to improve error handling and

reporting. It includes:
 - removing of error messages controlled by bootverbose, replacing them
with more universal and informative debugging on CAM_DEBUG_INFO level,
that is now built into the kernel by default;
 - more close following to the arguments submitted by caller, such as
SF_PRINT_ALWAYS, SF_QUIET_IR and SF_NO_PRINT; consumer knows better which
errors are usual/expected at this point and which are really informative;
 - adding two new flags SF_NO_RECOVERY and SF_NO_RETRY to allow caller
specify how much assistance it needs at this point; previously consumers
controlled that by not calling cam_periph_error() at all, but that made
behavior inconsistent and debugging complicated;
 - tuning debug messages and taken actions order to make debugging output
more readable and cause-effect relationships visible;
 - making camperiphdone() (common device recovery completion handler) to
also use cam_periph_error() in most cases, instead of own dumb code;
 - removing manual sense fetching code from cam_periph_error(); I was told
by number of people that it is SIM obligation to fetch sense data, so this
code is useless and only significantly complicates recovery logic;
 - making ada, da and pass driver to use cam_periph_error() with new limited
recovery options to handle error recovery and debugging in common way;
as one of results, CAM_REQUEUE_REQ and other retrying statuses are now
working fine with pass driver, that caused many problems before.
 - reverting r186891 by raj@ to avoid burning few seconds in tight DELAY()
loops on device probe, while device simply loads media; I think that problem
may already be fixed in other way, and even if it is not, solution must be
different.

Sponsored by:	iXsystems, Inc.
MFC after:	2 weeks
This commit is contained in:
Alexander Motin 2012-06-09 13:07:44 +00:00
parent 5d02ed91e9
commit 0191d9b367
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=236814
10 changed files with 271 additions and 545 deletions

View File

@ -584,6 +584,7 @@ adadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t len
struct disk *dp;
uint64_t lba;
uint16_t count;
int error = 0;
dp = arg;
periph = dp->d_drv1;
@ -622,13 +623,16 @@ adadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t len
}
xpt_polled_action(&ccb);
if ((ccb.ataio.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
error = cam_periph_error(&ccb,
0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0)
cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0,
/*reduction*/0, /*timeout*/0, /*getcount_only*/0);
if (error != 0)
printf("Aborting dump due to I/O error.\n");
cam_periph_unlock(periph);
return(EIO);
}
cam_periph_unlock(periph);
return(0);
return (error);
}
if (softc->flags & ADA_FLAG_CAN_FLUSHCACHE) {
@ -636,7 +640,7 @@ adadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t len
ccb.ccb_h.ccb_state = ADA_CCB_DUMP;
cam_fill_ataio(&ccb.ataio,
1,
0,
adadone,
CAM_DIR_NONE,
0,
@ -650,18 +654,16 @@ adadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t len
ata_28bit_cmd(&ccb.ataio, ATA_FLUSHCACHE, 0, 0, 0);
xpt_polled_action(&ccb);
if ((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP)
xpt_print(periph->path, "Synchronize cache failed\n");
error = cam_periph_error(&ccb,
0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0)
cam_release_devq(ccb.ccb_h.path,
/*relsim_flags*/0,
/*reduction*/0,
/*timeout*/0,
/*getcount_only*/0);
cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0,
/*reduction*/0, /*timeout*/0, /*getcount_only*/0);
if (error != 0)
xpt_print(periph->path, "Synchronize cache failed\n");
}
cam_periph_unlock(periph);
return (0);
return (error);
}
static void
@ -1716,6 +1718,7 @@ adaflush(void)
{
struct cam_periph *periph;
struct ada_softc *softc;
int error;
TAILQ_FOREACH(periph, &adadriver.units, unit_links) {
union ccb ccb;
@ -1739,7 +1742,7 @@ adaflush(void)
ccb.ccb_h.ccb_state = ADA_CCB_DUMP;
cam_fill_ataio(&ccb.ataio,
1,
0,
adadone,
CAM_DIR_NONE,
0,
@ -1753,15 +1756,13 @@ adaflush(void)
ata_28bit_cmd(&ccb.ataio, ATA_FLUSHCACHE, 0, 0, 0);
xpt_polled_action(&ccb);
if ((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP)
xpt_print(periph->path, "Synchronize cache failed\n");
error = cam_periph_error(&ccb,
0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0)
cam_release_devq(ccb.ccb_h.path,
/*relsim_flags*/0,
/*reduction*/0,
/*timeout*/0,
/*getcount_only*/0);
cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0,
/*reduction*/0, /*timeout*/0, /*getcount_only*/0);
if (error != 0)
xpt_print(periph->path, "Synchronize cache failed\n");
cam_periph_unlock(periph);
}
}
@ -1771,6 +1772,7 @@ adaspindown(uint8_t cmd, int flags)
{
struct cam_periph *periph;
struct ada_softc *softc;
int error;
TAILQ_FOREACH(periph, &adadriver.units, unit_links) {
union ccb ccb;
@ -1795,7 +1797,7 @@ adaspindown(uint8_t cmd, int flags)
ccb.ccb_h.ccb_state = ADA_CCB_DUMP;
cam_fill_ataio(&ccb.ataio,
1,
0,
adadone,
CAM_DIR_NONE | flags,
0,
@ -1806,15 +1808,13 @@ adaspindown(uint8_t cmd, int flags)
ata_28bit_cmd(&ccb.ataio, cmd, 0, 0, 0);
xpt_polled_action(&ccb);
if ((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP)
xpt_print(periph->path, "Spin-down disk failed\n");
error = cam_periph_error(&ccb,
0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0)
cam_release_devq(ccb.ccb_h.path,
/*relsim_flags*/0,
/*reduction*/0,
/*timeout*/0,
/*getcount_only*/0);
cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0,
/*reduction*/0, /*timeout*/0, /*getcount_only*/0);
if (error != 0)
xpt_print(periph->path, "Spin-down disk failed\n");
cam_periph_unlock(periph);
}
}

View File

@ -705,12 +705,9 @@ probedone(struct cam_periph *periph, union ccb *done_ccb)
inq_buf = &path->device->inq_data;
if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
if (softc->restart) {
if (bootverbose) {
cam_error_print(done_ccb,
CAM_ESF_ALL, CAM_EPF_ALL);
}
} else if (cam_periph_error(done_ccb, 0, 0, NULL) == ERESTART)
if (cam_periph_error(done_ccb,
0, softc->restart ? (SF_NO_RECOVERY | SF_NO_RETRY) : 0,
NULL) == ERESTART)
return;
if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) {
/* Don't wedge the queue */

View File

@ -108,6 +108,15 @@ typedef enum {
CAM_RETRY_SELTO = 0x02 /* Retry Selection Timeouts */
} cam_flags;
enum {
SF_RETRY_UA = 0x01, /* Retry UNIT ATTENTION conditions. */
SF_NO_PRINT = 0x02, /* Never print error status. */
SF_QUIET_IR = 0x04, /* Be quiet about Illegal Request reponses */
SF_PRINT_ALWAYS = 0x08, /* Always print error status. */
SF_NO_RECOVERY = 0x10, /* Don't do active error recovery. */
SF_NO_RETRY = 0x20 /* Don't do any retries. */
};
/* CAM Status field values */
typedef enum {
CAM_REQ_INPROG, /* CCB request is in progress */

View File

@ -69,18 +69,22 @@ static void camperiphdone(struct cam_periph *periph,
union ccb *done_ccb);
static void camperiphfree(struct cam_periph *periph);
static int camperiphscsistatuserror(union ccb *ccb,
union ccb **orig_ccb,
cam_flags camflags,
u_int32_t sense_flags,
int *openings,
u_int32_t *relsim_flags,
u_int32_t *timeout,
int *print,
const char **action_string);
static int camperiphscsisenseerror(union ccb *ccb,
union ccb **orig_ccb,
cam_flags camflags,
u_int32_t sense_flags,
int *openings,
u_int32_t *relsim_flags,
u_int32_t *timeout,
int *print,
const char **action_string);
static int nperiph_drivers;
@ -1108,148 +1112,30 @@ cam_release_devq(struct cam_path *path, u_int32_t relsim_flags,
}
#define saved_ccb_ptr ppriv_ptr0
#define recovery_depth ppriv_field1
static void
camperiphsensedone(struct cam_periph *periph, union ccb *done_ccb)
{
union ccb *saved_ccb = (union ccb *)done_ccb->ccb_h.saved_ccb_ptr;
cam_status status;
int frozen = 0;
int depth = done_ccb->ccb_h.recovery_depth;
status = done_ccb->ccb_h.status;
if (status & CAM_DEV_QFRZN) {
frozen = 1;
/*
* Clear freeze flag now for case of retry,
* freeze will be dropped later.
*/
done_ccb->ccb_h.status &= ~CAM_DEV_QFRZN;
}
status &= CAM_STATUS_MASK;
switch (status) {
case CAM_REQ_CMP:
{
int error_code, sense_key, asc, ascq;
scsi_extract_sense_len(&saved_ccb->csio.sense_data,
saved_ccb->csio.sense_len -
saved_ccb->csio.sense_resid,
&error_code, &sense_key, &asc, &ascq,
/*show_errors*/ 1);
/*
* If we manually retrieved sense into a CCB and got
* something other than "NO SENSE" send the updated CCB
* back to the client via xpt_done() to be processed via
* the error recovery code again.
*/
if ((sense_key != -1)
&& (sense_key != SSD_KEY_NO_SENSE)) {
saved_ccb->ccb_h.status |= CAM_AUTOSNS_VALID;
} else {
saved_ccb->ccb_h.status &= ~CAM_STATUS_MASK;
saved_ccb->ccb_h.status |= CAM_AUTOSENSE_FAIL;
}
saved_ccb->csio.sense_resid = done_ccb->csio.resid;
bcopy(saved_ccb, done_ccb, sizeof(union ccb));
xpt_free_ccb(saved_ccb);
break;
}
default:
bcopy(saved_ccb, done_ccb, sizeof(union ccb));
xpt_free_ccb(saved_ccb);
done_ccb->ccb_h.status &= ~CAM_STATUS_MASK;
done_ccb->ccb_h.status |= CAM_AUTOSENSE_FAIL;
break;
}
periph->flags &= ~CAM_PERIPH_SENSE_INPROG;
/*
* If it is the end of recovery, drop freeze, taken due to
* CAM_DEV_QFREEZE flag, set on recovery request.
*/
if (depth == 0) {
cam_release_devq(done_ccb->ccb_h.path,
/*relsim_flags*/0,
/*openings*/0,
/*timeout*/0,
/*getcount_only*/0);
}
/*
* Copy frozen flag from recovery request if it is set there
* for some reason.
*/
if (frozen != 0)
done_ccb->ccb_h.status |= CAM_DEV_QFRZN;
(*done_ccb->ccb_h.cbfcnp)(periph, done_ccb);
}
static void
camperiphdone(struct cam_periph *periph, union ccb *done_ccb)
{
union ccb *saved_ccb, *save_ccb;
union ccb *saved_ccb;
cam_status status;
int frozen = 0;
struct scsi_start_stop_unit *scsi_cmd;
u_int32_t relsim_flags, timeout;
scsi_cmd = (struct scsi_start_stop_unit *)
&done_ccb->csio.cdb_io.cdb_bytes;
status = done_ccb->ccb_h.status;
if (status & CAM_DEV_QFRZN) {
frozen = 1;
/*
* Clear freeze flag now for case of retry,
* freeze will be dropped later.
*/
done_ccb->ccb_h.status &= ~CAM_DEV_QFRZN;
}
timeout = 0;
relsim_flags = 0;
saved_ccb = (union ccb *)done_ccb->ccb_h.saved_ccb_ptr;
switch (status & CAM_STATUS_MASK) {
case CAM_REQ_CMP:
{
/*
* If we have successfully taken a device from the not
* ready to ready state, re-scan the device and re-get
* the inquiry information. Many devices (mostly disks)
* don't properly report their inquiry information unless
* they are spun up.
*/
scsi_cmd = (struct scsi_start_stop_unit *)
&done_ccb->csio.cdb_io.cdb_bytes;
if (scsi_cmd->opcode == START_STOP_UNIT)
xpt_async(AC_INQ_CHANGED,
done_ccb->ccb_h.path, NULL);
goto final;
}
case CAM_SCSI_STATUS_ERROR:
scsi_cmd = (struct scsi_start_stop_unit *)
&done_ccb->csio.cdb_io.cdb_bytes;
if (status & CAM_AUTOSNS_VALID) {
struct ccb_getdev cgd;
if ((status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
if ((status & CAM_STATUS_MASK) == CAM_SCSI_STATUS_ERROR &&
(status & CAM_AUTOSNS_VALID)) {
struct scsi_sense_data *sense;
int error_code, sense_key, asc, ascq, sense_len;
scsi_sense_action err_action;
sense = &done_ccb->csio.sense_data;
sense_len = done_ccb->csio.sense_len -
done_ccb->csio.sense_resid;
scsi_extract_sense_len(sense, sense_len, &error_code,
&sense_key, &asc, &ascq,
/*show_errors*/ 1);
&sense_key, &asc, &ascq, /*show_errors*/ 1);
/*
* Grab the inquiry data for this device.
*/
xpt_setup_ccb(&cgd.ccb_h, done_ccb->ccb_h.path,
CAM_PRIORITY_NORMAL);
cgd.ccb_h.func_code = XPT_GDEV_TYPE;
xpt_action((union ccb *)&cgd);
err_action = scsi_error_action(&done_ccb->csio,
&cgd.inq_data, 0);
/*
* If the error is "invalid field in CDB",
* If the error is "invalid field in CDB",
* and the load/eject flag is set, turn the
* flag off and try again. This is just in
* case the drive in question barfs on the
@ -1257,106 +1143,53 @@ camperiphdone(struct cam_periph *periph, union ccb *done_ccb)
* the load/eject flag by default for
* removable media.
*/
/* XXX KDM
* Should we check to see what the specific
* scsi status is?? Or does it not matter
* since we already know that there was an
* error, and we know what the specific
* error code was, and we know what the
* opcode is..
*/
if ((scsi_cmd->opcode == START_STOP_UNIT) &&
((scsi_cmd->how & SSS_LOEJ) != 0) &&
(asc == 0x24) && (ascq == 0x00) &&
(done_ccb->ccb_h.retry_count > 0)) {
(asc == 0x24) && (ascq == 0x00)) {
scsi_cmd->how &= ~SSS_LOEJ;
if (status & CAM_DEV_QFRZN) {
cam_release_devq(done_ccb->ccb_h.path,
0, 0, 0, 0);
done_ccb->ccb_h.status &=
~CAM_DEV_QFRZN;
}
xpt_action(done_ccb);
} else if ((done_ccb->ccb_h.retry_count > 1)
&& ((err_action & SS_MASK) != SS_FAIL)) {
/*
* In this case, the error recovery
* command failed, but we've got
* some retries left on it. Give
* it another try unless this is an
* unretryable error.
*/
/* set the timeout to .5 sec */
relsim_flags =
RELSIM_RELEASE_AFTER_TIMEOUT;
timeout = 500;
xpt_action(done_ccb);
break;
} else {
/*
* Perform the final retry with the original
* CCB so that final error processing is
* performed by the owner of the CCB.
*/
goto final;
goto out;
}
} else {
save_ccb = xpt_alloc_ccb_nowait();
if (save_ccb == NULL)
goto final;
bcopy(done_ccb, save_ccb, sizeof(*save_ccb));
periph->flags |= CAM_PERIPH_SENSE_INPROG;
/*
* Send a Request Sense to the device. We
* assume that we are in a contingent allegiance
* condition so we do not tag this request.
*/
scsi_request_sense(&done_ccb->csio, /*retries*/1,
camperiphsensedone,
&save_ccb->csio.sense_data,
save_ccb->csio.sense_len,
CAM_TAG_ACTION_NONE,
/*sense_len*/SSD_FULL_SIZE,
/*timeout*/5000);
done_ccb->ccb_h.pinfo.priority--;
done_ccb->ccb_h.flags |= CAM_DEV_QFREEZE;
done_ccb->ccb_h.saved_ccb_ptr = save_ccb;
done_ccb->ccb_h.recovery_depth++;
xpt_action(done_ccb);
}
break;
default:
final:
bcopy(saved_ccb, done_ccb, sizeof(*done_ccb));
xpt_free_ccb(saved_ccb);
periph->flags &= ~CAM_PERIPH_RECOVERY_INPROG;
xpt_action(done_ccb);
break;
if (cam_periph_error(done_ccb,
0, SF_RETRY_UA | SF_NO_PRINT, NULL) == ERESTART)
goto out;
if (done_ccb->ccb_h.status & CAM_DEV_QFRZN) {
cam_release_devq(done_ccb->ccb_h.path, 0, 0, 0, 0);
done_ccb->ccb_h.status &= ~CAM_DEV_QFRZN;
}
} else {
/*
* If we have successfully taken a device from the not
* ready to ready state, re-scan the device and re-get
* the inquiry information. Many devices (mostly disks)
* don't properly report their inquiry information unless
* they are spun up.
*/
if (scsi_cmd->opcode == START_STOP_UNIT)
xpt_async(AC_INQ_CHANGED, done_ccb->ccb_h.path, NULL);
}
/* decrement the retry count */
/*
* XXX This isn't appropriate in all cases. Restructure,
* so that the retry count is only decremented on an
* actual retry. Remeber that the orignal ccb had its
* retry count dropped before entering recovery, so
* doing it again is a bug.
* Perform the final retry with the original CCB so that final
* error processing is performed by the owner of the CCB.
*/
if (done_ccb->ccb_h.retry_count > 0)
done_ccb->ccb_h.retry_count--;
/*
* Drop freeze taken due to CAM_DEV_QFREEZE flag set on recovery
* request.
*/
cam_release_devq(done_ccb->ccb_h.path,
/*relsim_flags*/relsim_flags,
/*openings*/0,
/*timeout*/timeout,
/*getcount_only*/0);
/* Drop freeze taken, if this recovery request got error. */
if (frozen != 0) {
cam_release_devq(done_ccb->ccb_h.path,
/*relsim_flags*/0,
/*openings*/0,
/*timeout*/0,
/*getcount_only*/0);
}
saved_ccb = (union ccb *)done_ccb->ccb_h.saved_ccb_ptr;
bcopy(saved_ccb, done_ccb, sizeof(*done_ccb));
xpt_free_ccb(saved_ccb);
if (done_ccb->ccb_h.cbfcnp != camperiphdone)
periph->flags &= ~CAM_PERIPH_RECOVERY_INPROG;
xpt_action(done_ccb);
out:
/* Drop freeze taken due to CAM_DEV_QFREEZE flag set. */
cam_release_devq(done_ccb->ccb_h.path, 0, 0, 0, 0);
}
/*
@ -1415,10 +1248,10 @@ cam_periph_freeze_after_event(struct cam_periph *periph,
}
static int
camperiphscsistatuserror(union ccb *ccb, cam_flags camflags,
u_int32_t sense_flags,
int *openings, u_int32_t *relsim_flags,
u_int32_t *timeout, const char **action_string)
camperiphscsistatuserror(union ccb *ccb, union ccb **orig_ccb,
cam_flags camflags, u_int32_t sense_flags,
int *openings, u_int32_t *relsim_flags,
u_int32_t *timeout, int *print, const char **action_string)
{
int error;
@ -1431,14 +1264,13 @@ camperiphscsistatuserror(union ccb *ccb, cam_flags camflags,
break;
case SCSI_STATUS_CMD_TERMINATED:
case SCSI_STATUS_CHECK_COND:
if (bootverbose)
xpt_print(ccb->ccb_h.path, "SCSI status error\n");
error = camperiphscsisenseerror(ccb,
error = camperiphscsisenseerror(ccb, orig_ccb,
camflags,
sense_flags,
openings,
relsim_flags,
timeout,
print,
action_string);
break;
case SCSI_STATUS_QUEUE_FULL:
@ -1493,9 +1325,6 @@ camperiphscsistatuserror(union ccb *ccb, cam_flags camflags,
}
*timeout = 0;
error = ERESTART;
if (bootverbose) {
xpt_print(ccb->ccb_h.path, "Queue full\n");
}
break;
}
/* FALLTHROUGH */
@ -1505,9 +1334,6 @@ camperiphscsistatuserror(union ccb *ccb, cam_flags camflags,
* Restart the queue after either another
* command completes or a 1 second timeout.
*/
if (bootverbose) {
xpt_print(ccb->ccb_h.path, "Device busy\n");
}
if (ccb->ccb_h.retry_count > 0) {
ccb->ccb_h.retry_count--;
error = ERESTART;
@ -1519,12 +1345,7 @@ camperiphscsistatuserror(union ccb *ccb, cam_flags camflags,
}
break;
case SCSI_STATUS_RESERV_CONFLICT:
xpt_print(ccb->ccb_h.path, "Reservation conflict\n");
error = EIO;
break;
default:
xpt_print(ccb->ccb_h.path, "SCSI status 0x%x\n",
ccb->csio.scsi_status);
error = EIO;
break;
}
@ -1532,18 +1353,18 @@ camperiphscsistatuserror(union ccb *ccb, cam_flags camflags,
}
static int
camperiphscsisenseerror(union ccb *ccb, cam_flags camflags,
u_int32_t sense_flags,
int *openings, u_int32_t *relsim_flags,
u_int32_t *timeout, const char **action_string)
camperiphscsisenseerror(union ccb *ccb, union ccb **orig,
cam_flags camflags, u_int32_t sense_flags,
int *openings, u_int32_t *relsim_flags,
u_int32_t *timeout, int *print, const char **action_string)
{
struct cam_periph *periph;
union ccb *orig_ccb = ccb;
int error;
int error, recoveryccb;
periph = xpt_path_periph(ccb->ccb_h.path);
if (periph->flags &
(CAM_PERIPH_RECOVERY_INPROG | CAM_PERIPH_SENSE_INPROG)) {
recoveryccb = (ccb->ccb_h.cbfcnp == camperiphdone);
if ((periph->flags & CAM_PERIPH_RECOVERY_INPROG) && !recoveryccb) {
/*
* If error recovery is already in progress, don't attempt
* to process this error, but requeue it unconditionally
@ -1558,6 +1379,7 @@ camperiphscsisenseerror(union ccb *ccb, cam_flags camflags,
* imperitive that we don't violate this assumption.
*/
error = ERESTART;
*print = 0;
} else {
scsi_sense_action err_action;
struct ccb_getdev cgd;
@ -1573,13 +1395,35 @@ camperiphscsisenseerror(union ccb *ccb, cam_flags camflags,
err_action = scsi_error_action(&ccb->csio,
&cgd.inq_data,
sense_flags);
else if ((ccb->ccb_h.flags & CAM_DIS_AUTOSENSE) == 0)
err_action = SS_REQSENSE;
else
err_action = SS_RETRY|SSQ_DECREMENT_COUNT|EIO;
error = err_action & SS_ERRMASK;
/*
* Do not autostart sequential access devices
* to avoid unexpected tape loading.
*/
if ((err_action & SS_MASK) == SS_START &&
SID_TYPE(&cgd.inq_data) == T_SEQUENTIAL) {
*action_string = "Will not autostart a "
"sequential access device";
goto sense_error_done;
}
/*
* Avoid recovery recursion if recovery action is the same.
*/
if ((err_action & SS_MASK) >= SS_START && recoveryccb) {
if (((err_action & SS_MASK) == SS_START &&
ccb->csio.cdb_io.cdb_bytes[0] == START_STOP_UNIT) ||
((err_action & SS_MASK) == SS_TUR &&
(ccb->csio.cdb_io.cdb_bytes[0] == TEST_UNIT_READY))) {
err_action = SS_RETRY|SSQ_DECREMENT_COUNT|EIO;
*relsim_flags = RELSIM_RELEASE_AFTER_TIMEOUT;
*timeout = 500;
}
}
/*
* If the recovery action will consume a retry,
* make sure we actually have retries available.
@ -1627,15 +1471,6 @@ camperiphscsisenseerror(union ccb *ccb, cam_flags camflags,
case SS_START:
{
int le;
if (SID_TYPE(&cgd.inq_data) == T_SEQUENTIAL) {
xpt_free_ccb(orig_ccb);
ccb->ccb_h.status |= CAM_DEV_QFRZN;
*action_string = "Will not autostart a "
"sequential access device";
err_action = SS_FAIL;
error = EIO;
break;
}
/*
* Send a start unit command to the device, and
@ -1699,24 +1534,6 @@ camperiphscsisenseerror(union ccb *ccb, cam_flags camflags,
*timeout = 500;
break;
}
case SS_REQSENSE:
{
*action_string = "Requesting SCSI sense data";
periph->flags |= CAM_PERIPH_SENSE_INPROG;
/*
* Send a Request Sense to the device. We
* assume that we are in a contingent allegiance
* condition so we do not tag this request.
*/
scsi_request_sense(&ccb->csio, /*retries*/1,
camperiphsensedone,
&orig_ccb->csio.sense_data,
orig_ccb->csio.sense_len,
CAM_TAG_ACTION_NONE,
/*sense_len*/SSD_FULL_SIZE,
/*timeout*/5000);
break;
}
default:
panic("Unhandled error action %x", err_action);
}
@ -1733,14 +1550,12 @@ camperiphscsisenseerror(union ccb *ccb, cam_flags camflags,
ccb->ccb_h.pinfo.priority--;
ccb->ccb_h.flags |= CAM_DEV_QFREEZE;
ccb->ccb_h.saved_ccb_ptr = orig_ccb;
ccb->ccb_h.recovery_depth = 0;
error = ERESTART;
*orig = orig_ccb;
}
sense_error_done:
if ((err_action & SSQ_PRINT_SENSE) != 0
&& (ccb->ccb_h.status & CAM_AUTOSNS_VALID) != 0)
cam_error_print(orig_ccb, CAM_ESF_ALL, CAM_EPF_ALL);
*print = ((err_action & SSQ_PRINT_SENSE) != 0);
}
return (error);
}
@ -1754,87 +1569,35 @@ int
cam_periph_error(union ccb *ccb, cam_flags camflags,
u_int32_t sense_flags, union ccb *save_ccb)
{
union ccb *orig_ccb;
struct cam_periph *periph;
const char *action_string;
cam_status status;
int frozen;
int error, printed = 0;
int openings;
u_int32_t relsim_flags;
u_int32_t timeout = 0;
int frozen, error, openings, print, lost_device;
u_int32_t relsim_flags, timeout;
print = 1;
periph = xpt_path_periph(ccb->ccb_h.path);
action_string = NULL;
status = ccb->ccb_h.status;
frozen = (status & CAM_DEV_QFRZN) != 0;
status &= CAM_STATUS_MASK;
openings = relsim_flags = 0;
openings = relsim_flags = timeout = lost_device = 0;
orig_ccb = ccb;
switch (status) {
case CAM_REQ_CMP:
error = 0;
print = 0;
break;
case CAM_SCSI_STATUS_ERROR:
error = camperiphscsistatuserror(ccb,
camflags,
sense_flags,
&openings,
&relsim_flags,
&timeout,
&action_string);
error = camperiphscsistatuserror(ccb, &orig_ccb,
camflags, sense_flags, &openings, &relsim_flags,
&timeout, &print, &action_string);
break;
case CAM_AUTOSENSE_FAIL:
xpt_print(ccb->ccb_h.path, "AutoSense failed\n");
error = EIO; /* we have to kill the command */
break;
case CAM_ATA_STATUS_ERROR:
if (bootverbose && printed == 0) {
xpt_print(ccb->ccb_h.path, "ATA status error\n");
cam_error_print(ccb, CAM_ESF_ALL, CAM_EPF_ALL);
printed++;
}
/* FALLTHROUGH */
case CAM_REQ_CMP_ERR:
if (bootverbose && printed == 0) {
xpt_print(ccb->ccb_h.path,
"Request completed with CAM_REQ_CMP_ERR\n");
printed++;
}
/* FALLTHROUGH */
case CAM_CMD_TIMEOUT:
if (bootverbose && printed == 0) {
xpt_print(ccb->ccb_h.path, "Command timed out\n");
printed++;
}
/* FALLTHROUGH */
case CAM_UNEXP_BUSFREE:
if (bootverbose && printed == 0) {
xpt_print(ccb->ccb_h.path, "Unexpected Bus Free\n");
printed++;
}
/* FALLTHROUGH */
case CAM_UNCOR_PARITY:
if (bootverbose && printed == 0) {
xpt_print(ccb->ccb_h.path,
"Uncorrected parity error\n");
printed++;
}
/* FALLTHROUGH */
case CAM_DATA_RUN_ERR:
if (bootverbose && printed == 0) {
xpt_print(ccb->ccb_h.path, "Data overrun\n");
printed++;
}
/* decrement the number of retries */
if (ccb->ccb_h.retry_count > 0 &&
(periph->flags & CAM_PERIPH_INVALID) == 0) {
ccb->ccb_h.retry_count--;
error = ERESTART;
} else {
action_string = "Retries exhausted";
error = EIO;
}
break;
case CAM_UA_ABORT:
case CAM_UA_TERMIO:
case CAM_MSG_REJECT_REC:
@ -1845,14 +1608,8 @@ cam_periph_error(union ccb *ccb, cam_flags camflags,
if ((camflags & CAM_RETRY_SELTO) != 0) {
if (ccb->ccb_h.retry_count > 0 &&
(periph->flags & CAM_PERIPH_INVALID) == 0) {
ccb->ccb_h.retry_count--;
error = ERESTART;
if (bootverbose && printed == 0) {
xpt_print(ccb->ccb_h.path,
"Selection timeout\n");
printed++;
}
/*
* Wait a bit to give the device
@ -1866,38 +1623,10 @@ cam_periph_error(union ccb *ccb, cam_flags camflags,
}
/* FALLTHROUGH */
case CAM_DEV_NOT_THERE:
{
struct cam_path *newpath;
lun_id_t lun_id;
error = ENXIO;
/*
* For a selection timeout, we consider all of the LUNs on
* the target to be gone. If the status is CAM_DEV_NOT_THERE,
* then we only get rid of the device(s) specified by the
* path in the original CCB.
*/
if (status == CAM_DEV_NOT_THERE)
lun_id = xpt_path_lun_id(ccb->ccb_h.path);
else
lun_id = CAM_LUN_WILDCARD;
/* Should we do more if we can't create the path?? */
if (xpt_create_path(&newpath, periph,
xpt_path_path_id(ccb->ccb_h.path),
xpt_path_target_id(ccb->ccb_h.path),
lun_id) != CAM_REQ_CMP)
break;
/*
* Let peripheral drivers know that this device has gone
* away.
*/
xpt_async(AC_LOST_DEVICE, newpath, NULL);
xpt_free_path(newpath);
print = 0;
lost_device = 1;
break;
}
case CAM_REQ_INVALID:
case CAM_PATH_INVALID:
case CAM_NO_HBA:
@ -1917,27 +1646,16 @@ cam_periph_error(union ccb *ccb, cam_flags camflags,
* these events and should be unconditionally
* retried.
*/
if (bootverbose && printed == 0) {
xpt_print_path(ccb->ccb_h.path);
if (status == CAM_BDR_SENT)
printf("Bus Device Reset sent\n");
else
printf("Bus Reset issued\n");
printed++;
}
/* FALLTHROUGH */
case CAM_REQUEUE_REQ:
/* Unconditional requeue */
if (bootverbose && printed == 0) {
xpt_print(ccb->ccb_h.path, "Request requeued\n");
printed++;
}
if ((periph->flags & CAM_PERIPH_INVALID) == 0)
error = ERESTART;
else {
action_string = "Retries exhausted";
/* Unconditional requeue if device is still there */
if (periph->flags & CAM_PERIPH_INVALID) {
action_string = "Periph was invalidated";
error = EIO;
}
} else if (sense_flags & SF_NO_RETRY) {
error = EIO;
action_string = "Retry was blocked";
} else
error = ERESTART;
break;
case CAM_RESRC_UNAVAIL:
/* Wait a bit for the resource shortage to abate. */
@ -1950,30 +1668,37 @@ cam_periph_error(union ccb *ccb, cam_flags camflags,
}
relsim_flags = RELSIM_RELEASE_AFTER_TIMEOUT;
/* FALLTHROUGH */
case CAM_ATA_STATUS_ERROR:
case CAM_REQ_CMP_ERR:
case CAM_CMD_TIMEOUT:
case CAM_UNEXP_BUSFREE:
case CAM_UNCOR_PARITY:
case CAM_DATA_RUN_ERR:
default:
/* decrement the number of retries */
if (ccb->ccb_h.retry_count > 0 &&
(periph->flags & CAM_PERIPH_INVALID) == 0) {
ccb->ccb_h.retry_count--;
error = ERESTART;
if (bootverbose && printed == 0) {
xpt_print(ccb->ccb_h.path, "CAM status 0x%x\n",
status);
printed++;
}
} else {
if (periph->flags & CAM_PERIPH_INVALID) {
error = EIO;
action_string = "Periph was invalidated";
} else if (ccb->ccb_h.retry_count == 0) {
error = EIO;
action_string = "Retries exhausted";
} else if (sense_flags & SF_NO_RETRY) {
error = EIO;
action_string = "Retry was blocked";
} else {
ccb->ccb_h.retry_count--;
error = ERESTART;
}
break;
}
/*
* If we have and error and are booting verbosely, whine
* *unless* this was a non-retryable selection timeout.
*/
if (error != 0 && bootverbose &&
!(status == CAM_SEL_TIMEOUT && (camflags & CAM_RETRY_SELTO) == 0)) {
if ((sense_flags & SF_PRINT_ALWAYS) ||
CAM_DEBUGGED(ccb->ccb_h.path, CAM_DEBUG_INFO))
print = 1;
else if (sense_flags & SF_NO_PRINT)
print = 0;
if (print)
cam_error_print(orig_ccb, CAM_ESF_ALL, CAM_EPF_ALL);
if (error != 0 && print) {
if (error != ERESTART) {
if (action_string == NULL)
action_string = "Unretryable error";
@ -1985,6 +1710,36 @@ cam_periph_error(union ccb *ccb, cam_flags camflags,
xpt_print(ccb->ccb_h.path, "Retrying command\n");
}
if (lost_device) {
struct cam_path *newpath;
lun_id_t lun_id;
/*
* For a selection timeout, we consider all of the LUNs on
* the target to be gone. If the status is CAM_DEV_NOT_THERE,
* then we only get rid of the device(s) specified by the
* path in the original CCB.
*/
if (status == CAM_DEV_NOT_THERE)
lun_id = xpt_path_lun_id(ccb->ccb_h.path);
else
lun_id = CAM_LUN_WILDCARD;
/* Should we do more if we can't create the path?? */
if (xpt_create_path(&newpath, periph,
xpt_path_path_id(ccb->ccb_h.path),
xpt_path_target_id(ccb->ccb_h.path),
lun_id) == CAM_REQ_CMP) {
/*
* Let peripheral drivers know that this
* device has gone away.
*/
xpt_async(AC_LOST_DEVICE, newpath, NULL);
xpt_free_path(newpath);
}
}
/* Attempt a retry */
if (error == ERESTART || error == 0) {
if (frozen != 0)

View File

@ -118,7 +118,6 @@ struct cam_periph {
#define CAM_PERIPH_INVALID 0x08
#define CAM_PERIPH_NEW_DEV_FOUND 0x10
#define CAM_PERIPH_RECOVERY_INPROG 0x20
#define CAM_PERIPH_SENSE_INPROG 0x40
#define CAM_PERIPH_FREE 0x80
u_int32_t immediate_priority;
u_int32_t refcount;

View File

@ -2901,11 +2901,17 @@ scsi_error_action(struct ccb_scsiio *csio, struct scsi_inquiry_data *inq_data,
SSQ_PRINT_SENSE;
}
}
if ((action & SS_MASK) >= SS_START &&
(sense_flags & SF_NO_RECOVERY)) {
action &= ~SS_MASK;
action |= SS_FAIL;
} else if ((action & SS_MASK) == SS_RETRY &&
(sense_flags & SF_NO_RETRY)) {
action &= ~SS_MASK;
action |= SS_FAIL;
}
}
#ifdef _KERNEL
if (bootverbose)
sense_flags |= SF_PRINT_ALWAYS;
#endif
if ((sense_flags & SF_PRINT_ALWAYS) != 0)
action |= SSQ_PRINT_SENSE;
else if ((sense_flags & SF_NO_PRINT) != 0)

View File

@ -74,9 +74,6 @@ typedef enum {
SS_TUR = 0x040000, /* Send a Test Unit Ready command to the
* device, then retry the original command.
*/
SS_REQSENSE = 0x050000, /* Send a RequestSense command to the
* device, then retry the original command.
*/
SS_MASK = 0xff0000
} scsi_sense_action;
@ -2196,12 +2193,6 @@ void scsi_sense_print(struct cam_device *device,
struct ccb_scsiio *csio, FILE *ofile);
#endif /* _KERNEL */
#define SF_RETRY_UA 0x01
#define SF_NO_PRINT 0x02
#define SF_QUIET_IR 0x04 /* Be quiet about Illegal Request reponses */
#define SF_PRINT_ALWAYS 0x08
const char * scsi_op_desc(u_int16_t opcode,
struct scsi_inquiry_data *inq_data);
char * scsi_cdb_string(u_int8_t *cdb_ptr, char *cdb_string,

View File

@ -1120,6 +1120,7 @@ dadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t leng
u_int secsize;
struct ccb_scsiio csio;
struct disk *dp;
int error = 0;
dp = arg;
periph = dp->d_drv1;
@ -1138,7 +1139,7 @@ dadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t leng
xpt_setup_ccb(&csio.ccb_h, periph->path, CAM_PRIORITY_NORMAL);
csio.ccb_h.ccb_state = DA_CCB_DUMP;
scsi_read_write(&csio,
/*retries*/1,
/*retries*/0,
dadone,
MSG_ORDERED_Q_TAG,
/*read*/FALSE,
@ -1151,19 +1152,16 @@ dadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t leng
/*sense_len*/SSD_FULL_SIZE,
da_default_timeout * 1000);
xpt_polled_action((union ccb *)&csio);
cam_periph_unlock(periph);
if ((csio.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
error = cam_periph_error((union ccb *)&csio,
0, SF_NO_RECOVERY | SF_NO_RETRY, NULL);
if ((csio.ccb_h.status & CAM_DEV_QFRZN) != 0)
cam_release_devq(csio.ccb_h.path, /*relsim_flags*/0,
/*reduction*/0, /*timeout*/0, /*getcount_only*/0);
if (error != 0)
printf("Aborting dump due to I/O error.\n");
if ((csio.ccb_h.status & CAM_STATUS_MASK) ==
CAM_SCSI_STATUS_ERROR)
scsi_sense_print(&csio);
else
printf("status == 0x%x, scsi status == 0x%x\n",
csio.ccb_h.status, csio.scsi_status);
return(EIO);
}
return(0);
cam_periph_unlock(periph);
return (error);
}
/*
@ -1174,7 +1172,7 @@ dadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t leng
xpt_setup_ccb(&csio.ccb_h, periph->path, CAM_PRIORITY_NORMAL);
csio.ccb_h.ccb_state = DA_CCB_DUMP;
scsi_synchronize_cache(&csio,
/*retries*/1,
/*retries*/0,
/*cbfcnp*/dadone,
MSG_SIMPLE_Q_TAG,
/*begin_lba*/0,/* Cover the whole disk */
@ -1183,28 +1181,16 @@ dadump(void *arg, void *virtual, vm_offset_t physical, off_t offset, size_t leng
5 * 60 * 1000);
xpt_polled_action((union ccb *)&csio);
if ((csio.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
if ((csio.ccb_h.status & CAM_STATUS_MASK) ==
CAM_SCSI_STATUS_ERROR) {
int asc, ascq;
int sense_key, error_code;
scsi_extract_sense_len(&csio.sense_data,
csio.sense_len - csio.sense_resid,
&error_code, &sense_key, &asc, &ascq,
/*show_errors*/ 1);
if (sense_key != SSD_KEY_ILLEGAL_REQUEST)
scsi_sense_print(&csio);
} else {
xpt_print(periph->path, "Synchronize cache "
"failed, status == 0x%x, scsi status == "
"0x%x\n", csio.ccb_h.status,
csio.scsi_status);
}
}
error = cam_periph_error((union ccb *)&csio,
0, SF_NO_RECOVERY | SF_NO_RETRY | SF_QUIET_IR, NULL);
if ((csio.ccb_h.status & CAM_DEV_QFRZN) != 0)
cam_release_devq(csio.ccb_h.path, /*relsim_flags*/0,
/*reduction*/0, /*timeout*/0, /*getcount_only*/0);
if (error != 0)
xpt_print(periph->path, "Synchronize cache failed\n");
}
cam_periph_unlock(periph);
return (0);
return (error);
}
static int
@ -2678,6 +2664,7 @@ dashutdown(void * arg, int howto)
{
struct cam_periph *periph;
struct da_softc *softc;
int error;
TAILQ_FOREACH(periph, &dadriver.units, unit_links) {
union ccb ccb;
@ -2699,7 +2686,7 @@ dashutdown(void * arg, int howto)
ccb.ccb_h.ccb_state = DA_CCB_DUMP;
scsi_synchronize_cache(&ccb.csio,
/*retries*/1,
/*retries*/0,
/*cbfcnp*/dadone,
MSG_SIMPLE_Q_TAG,
/*begin_lba*/0, /* whole disk */
@ -2709,32 +2696,13 @@ dashutdown(void * arg, int howto)
xpt_polled_action(&ccb);
if ((ccb.ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
if (((ccb.ccb_h.status & CAM_STATUS_MASK) ==
CAM_SCSI_STATUS_ERROR)
&& (ccb.csio.scsi_status == SCSI_STATUS_CHECK_COND)){
int error_code, sense_key, asc, ascq;
scsi_extract_sense(&ccb.csio.sense_data,
&error_code, &sense_key,
&asc, &ascq);
if (sense_key != SSD_KEY_ILLEGAL_REQUEST)
scsi_sense_print(&ccb.csio);
} else {
xpt_print(periph->path, "Synchronize "
"cache failed, status == 0x%x, scsi status "
"== 0x%x\n", ccb.ccb_h.status,
ccb.csio.scsi_status);
}
}
error = cam_periph_error(&ccb,
0, SF_NO_RECOVERY | SF_NO_RETRY | SF_QUIET_IR, NULL);
if ((ccb.ccb_h.status & CAM_DEV_QFRZN) != 0)
cam_release_devq(ccb.ccb_h.path,
/*relsim_flags*/0,
/*reduction*/0,
/*timeout*/0,
/*getcount_only*/0);
cam_release_devq(ccb.ccb_h.path, /*relsim_flags*/0,
/*reduction*/0, /*timeout*/0, /*getcount_only*/0);
if (error != 0)
xpt_print(periph->path, "Synchronize cache failed\n");
cam_periph_unlock(periph);
}
}

View File

@ -624,9 +624,9 @@ passsendccb(struct cam_periph *periph, union ccb *ccb, union ccb *inccb)
* that request. Otherwise, it's up to the user to perform any
* error recovery.
*/
cam_periph_runccb(ccb,
(ccb->ccb_h.flags & CAM_PASS_ERR_RECOVER) ? passerror : NULL,
/* cam_flags */ CAM_RETRY_SELTO, /* sense_flags */SF_RETRY_UA,
cam_periph_runccb(ccb, passerror, /* cam_flags */ CAM_RETRY_SELTO,
/* sense_flags */ ((ccb->ccb_h.flags & CAM_PASS_ERR_RECOVER) ?
SF_RETRY_UA : SF_NO_RECOVERY) | SF_NO_PRINT,
softc->device_stats);
if (need_unmap != 0)

View File

@ -747,7 +747,7 @@ again:
case PROBE_DV_EXIT:
{
scsi_test_unit_ready(csio,
/*retries*/10,
/*retries*/4,
probedone,
MSG_SIMPLE_Q_TAG,
SSD_FULL_SIZE,
@ -1596,14 +1596,11 @@ probe_device_check:
break;
}
case PROBE_TUR_FOR_NEGOTIATION:
if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
DELAY(500000);
if (cam_periph_error(done_ccb, 0, SF_RETRY_UA,
NULL) == ERESTART)
return;
}
/* FALLTHROUGH */
case PROBE_DV_EXIT:
if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
cam_periph_error(done_ccb, 0,
SF_NO_PRINT | SF_NO_RECOVERY | SF_NO_RETRY, NULL);
}
if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) {
/* Don't wedge the queue */
xpt_release_devq(done_ccb->ccb_h.path, /*count*/1,
@ -1651,6 +1648,10 @@ probe_device_check:
struct scsi_inquiry_data *nbuf;
struct ccb_scsiio *csio;
if ((done_ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
cam_periph_error(done_ccb, 0,
SF_NO_PRINT | SF_NO_RECOVERY | SF_NO_RETRY, NULL);
}
if ((done_ccb->ccb_h.status & CAM_DEV_QFRZN) != 0) {
/* Don't wedge the queue */
xpt_release_devq(done_ccb->ccb_h.path, /*count*/1,