Fix a bug in the error recovery code. It was possible to have more than

one error recovery action oustanding for a given peripheral.

This is bad for several reasons.  The first problem is that the error
recovery actions would likely be to fix the same problem.  (e.g., we
queue 5 CCBs to a disk, and the first one comes back with 0x04,0x02.  We
start error recovery, and the second one comes back with the same status.
Then the third one comes back, and so on.  Each one causes the drive to get
nailed with a start unit, when we really only need one.)

The other problem is that we only have space to store one CCB while we're
doing error recovery.  The subsequent error recovery actions that got
started were over-writing the CCBs from previous error recovery actions,
but we still tried to call the done routine N times for N error recovery
actions.  Each call to dadone() was done with the same CCB, though.  So on
the second one, we got a "biodone: buffer not busy" panic, since the buffer
in question had already been through biodone().

In any case, this fixes things so that any any given time, there's only one
error recovery action outstanding for any given peripheral driver.

Reviewed by:	gibbs
Reported by:	Philippe Regnauld <regnauld@deepo.prosa.dk>
[ Philippe wins the "bug finder of the week" award ]
This commit is contained in:
Kenneth D. Merry 1998-10-13 21:41:32 +00:00
parent 89041bafbd
commit 60a899a075
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=40318
2 changed files with 59 additions and 2 deletions

View File

@ -26,7 +26,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $Id: cam_periph.c,v 1.2 1998/09/20 07:14:36 gibbs Exp $
* $Id: cam_periph.c,v 1.3 1998/09/29 09:18:08 bde Exp $
*/
#include <sys/param.h>
@ -903,6 +903,8 @@ camperiphdone(struct cam_periph *periph, union ccb *done_ccb)
bcopy(done_ccb->ccb_h.saved_ccb_ptr, done_ccb,
sizeof(union ccb));
periph->flags &= ~CAM_PERIPH_RECOVERY_INPROG;
xpt_action(done_ccb);
break;
@ -969,6 +971,8 @@ camperiphdone(struct cam_periph *periph, union ccb *done_ccb)
bcopy(done_ccb->ccb_h.saved_ccb_ptr,
done_ccb, sizeof(union ccb));
periph->flags &= ~CAM_PERIPH_RECOVERY_INPROG;
xpt_action(done_ccb);
}
} else {
@ -981,6 +985,8 @@ camperiphdone(struct cam_periph *periph, union ccb *done_ccb)
bcopy(done_ccb->ccb_h.saved_ccb_ptr,
done_ccb, sizeof(union ccb));
periph->flags &= ~CAM_PERIPH_RECOVERY_INPROG;
xpt_action(done_ccb);
}
@ -989,6 +995,8 @@ camperiphdone(struct cam_periph *periph, union ccb *done_ccb)
bcopy(done_ccb->ccb_h.saved_ccb_ptr, done_ccb,
sizeof(union ccb));
periph->flags &= ~CAM_PERIPH_RECOVERY_INPROG;
xpt_action(done_ccb);
break;
@ -1084,6 +1092,28 @@ cam_periph_error(union ccb *ccb, cam_flags camflags,
&& save_ccb != NULL
&& ccb->ccb_h.retry_count > 0) {
/*
* Since error recovery is already
* in progress, don't attempt to
* process this error. It is probably
* related to the error that caused
* the currently active error recovery
* action. Also, we only have
* space for one saved CCB, so if we
* had two concurrent error recovery
* actions, we would end up
* over-writing one error recovery
* CCB with another one.
*/
if (periph->flags &
CAM_PERIPH_RECOVERY_INPROG) {
error = ERESTART;
break;
}
periph->flags |=
CAM_PERIPH_RECOVERY_INPROG;
/* decrement the number of retries */
if ((err_action &
SSQ_DECREMENT_COUNT) != 0) {
@ -1154,6 +1184,19 @@ cam_periph_error(union ccb *ccb, cam_flags camflags,
&& ccb->ccb_h.retry_count > 0) {
int le;
/*
* Only one error recovery action
* at a time. See above.
*/
if (periph->flags &
CAM_PERIPH_RECOVERY_INPROG) {
error = ERESTART;
break;
}
periph->flags |=
CAM_PERIPH_RECOVERY_INPROG;
/* decrement the number of retries */
retry = 1;
ccb->ccb_h.retry_count--;
@ -1232,6 +1275,19 @@ cam_periph_error(union ccb *ccb, cam_flags camflags,
&& ccb->ccb_h.retry_count > 0) {
int le;
/*
* Only one error recovery action
* at a time. See above.
*/
if (periph->flags &
CAM_PERIPH_RECOVERY_INPROG) {
error = ERESTART;
break;
}
periph->flags |=
CAM_PERIPH_RECOVERY_INPROG;
/* decrement the number of retries */
retry = 1;
ccb->ccb_h.retry_count--;

View File

@ -25,7 +25,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $Id$
* $Id: cam_periph.h,v 1.1 1998/09/15 06:33:23 gibbs Exp $
*/
#ifndef _CAM_CAM_PERIPH_H
@ -83,6 +83,7 @@ struct cam_periph {
#define CAM_PERIPH_LOCK_WANTED 0x04
#define CAM_PERIPH_INVALID 0x08
#define CAM_PERIPH_NEW_DEV_FOUND 0x10
#define CAM_PERIPH_RECOVERY_INPROG 0x20
u_int32_t immediate_priority;
u_int32_t refcount;
SLIST_HEAD(, ccb_hdr) ccb_list; /* For "immediate" requests */