mirror of
https://git.hardenedbsd.org/hardenedbsd/HardenedBSD.git
synced 2024-11-13 05:41:26 +01:00
Ensure I/O buffers in libufs(3) are 128-byte aligned.
Various disk controllers require their buffers to be aligned to a cache-line size (128 bytes). For buffers allocated in structures, ensure that they are 128-byte aligned. Use aligned_malloc to allocate memory to ensure that the returned memory is 128-byte aligned. While we are here, we replace the dynamically allocated inode buffer with a buffer allocated in the uufsd structure just as the superblock and cylinder group buffers do. This can be removed if/when the kernel is fixed. Because this problem has existed on one I/O subsystem or another since the 1990's, we are probably stuck with dealing with it forever. The problem most recent showed up in Azure, see: https://reviews.freebsd.org/D41728 https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=267654 Before these fixes were applied, it was confirmed that the changes in this commit also fixed the issue in Azure. Reviewed-by: Warner Losh, kib Tested-by: Souradeep Chakrabarti of Microsoft (earlier version) PR: 267654 Differential Revision: https://reviews.freebsd.org/D41724
This commit is contained in:
parent
415c1c748d
commit
772430dd67
@ -2,7 +2,7 @@
|
||||
PACKAGE= ufs
|
||||
LIB= ufs
|
||||
SHLIBDIR?= /lib
|
||||
SHLIB_MAJOR= 7
|
||||
SHLIB_MAJOR= 8
|
||||
|
||||
SRCS= block.c cgroup.c gsb_crc32.c inode.c sblock.c type.c ffs_subr.c
|
||||
SRCS+= ffs_tables.c
|
||||
|
@ -57,19 +57,10 @@ bread(struct uufsd *disk, ufs2_daddr_t blockno, void *data, size_t size)
|
||||
|
||||
ERROR(disk, NULL);
|
||||
|
||||
p2 = data;
|
||||
/*
|
||||
* XXX: various disk controllers require alignment of our buffer
|
||||
* XXX: which is stricter than struct alignment.
|
||||
* XXX: Bounce the buffer if not 64 byte aligned.
|
||||
* XXX: this can be removed if/when the kernel is fixed
|
||||
*/
|
||||
if (((intptr_t)data) & 0x3f) {
|
||||
p2 = malloc(size);
|
||||
if (p2 == NULL) {
|
||||
ERROR(disk, "allocate bounce buffer");
|
||||
goto fail;
|
||||
}
|
||||
BUF_MALLOC(&p2, data, size);
|
||||
if (p2 == NULL) {
|
||||
ERROR(disk, "allocate bounce buffer");
|
||||
goto fail;
|
||||
}
|
||||
cnt = pread(disk->d_fd, p2, size, (off_t)(blockno * disk->d_bsize));
|
||||
if (cnt == -1) {
|
||||
@ -101,7 +92,7 @@ bwrite(struct uufsd *disk, ufs2_daddr_t blockno, const void *data, size_t size)
|
||||
{
|
||||
ssize_t cnt;
|
||||
int rv;
|
||||
void *p2 = NULL;
|
||||
void *p2;
|
||||
|
||||
ERROR(disk, NULL);
|
||||
|
||||
@ -110,24 +101,15 @@ bwrite(struct uufsd *disk, ufs2_daddr_t blockno, const void *data, size_t size)
|
||||
ERROR(disk, "failed to open disk for writing");
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: various disk controllers require alignment of our buffer
|
||||
* XXX: which is stricter than struct alignment.
|
||||
* XXX: Bounce the buffer if not 64 byte aligned.
|
||||
* XXX: this can be removed if/when the kernel is fixed
|
||||
*/
|
||||
if (((intptr_t)data) & 0x3f) {
|
||||
p2 = malloc(size);
|
||||
if (p2 == NULL) {
|
||||
ERROR(disk, "allocate bounce buffer");
|
||||
return (-1);
|
||||
}
|
||||
memcpy(p2, data, size);
|
||||
data = p2;
|
||||
BUF_MALLOC(&p2, data, size);
|
||||
if (p2 == NULL) {
|
||||
ERROR(disk, "allocate bounce buffer");
|
||||
return (-1);
|
||||
}
|
||||
cnt = pwrite(disk->d_fd, data, size, (off_t)(blockno * disk->d_bsize));
|
||||
if (p2 != NULL)
|
||||
if (p2 != data)
|
||||
memcpy(p2, data, size);
|
||||
cnt = pwrite(disk->d_fd, p2, size, (off_t)(blockno * disk->d_bsize));
|
||||
if (p2 != data)
|
||||
free(p2);
|
||||
if (cnt == -1) {
|
||||
ERROR(disk, "write error to block device");
|
||||
@ -137,7 +119,6 @@ bwrite(struct uufsd *disk, ufs2_daddr_t blockno, const void *data, size_t size)
|
||||
ERROR(disk, "short write to block device");
|
||||
return (-1);
|
||||
}
|
||||
|
||||
return (cnt);
|
||||
}
|
||||
|
||||
|
@ -62,18 +62,10 @@ getinode(struct uufsd *disk, union dinodep *dp, ino_t inum)
|
||||
ERROR(disk, "inode number out of range");
|
||||
return (-1);
|
||||
}
|
||||
inoblock = disk->d_inoblock;
|
||||
inoblock = (caddr_t)&disk->d_inos[0];
|
||||
min = disk->d_inomin;
|
||||
max = disk->d_inomax;
|
||||
|
||||
if (inoblock == NULL) {
|
||||
inoblock = malloc(fs->fs_bsize);
|
||||
if (inoblock == NULL) {
|
||||
ERROR(disk, "unable to allocate inode block");
|
||||
return (-1);
|
||||
}
|
||||
disk->d_inoblock = inoblock;
|
||||
}
|
||||
if (inum >= min && inum < max)
|
||||
goto gotit;
|
||||
bread(disk, fsbtodb(fs, ino_to_fsba(fs, inum)), inoblock,
|
||||
@ -107,14 +99,10 @@ putinode(struct uufsd *disk)
|
||||
struct fs *fs;
|
||||
|
||||
fs = &disk->d_fs;
|
||||
if (disk->d_inoblock == NULL) {
|
||||
ERROR(disk, "No inode block allocated");
|
||||
return (-1);
|
||||
}
|
||||
if (disk->d_ufs == 2)
|
||||
ffs_update_dinode_ckhash(fs, disk->d_dp.dp2);
|
||||
if (bwrite(disk, fsbtodb(fs, ino_to_fsba(&disk->d_fs, disk->d_inomin)),
|
||||
disk->d_inoblock, disk->d_fs.fs_bsize) <= 0)
|
||||
(caddr_t)&disk->d_inos[0], disk->d_fs.fs_bsize) <= 0)
|
||||
return (-1);
|
||||
return (0);
|
||||
}
|
||||
|
@ -30,6 +30,13 @@
|
||||
#ifndef __LIBUFS_H__
|
||||
#define __LIBUFS_H__
|
||||
|
||||
/*
|
||||
* Various disk controllers require their buffers to be aligned to the size
|
||||
* of a cache line. The LIBUFS_BUFALIGN defines the required alignment size.
|
||||
* The alignment must be a power of 2.
|
||||
*/
|
||||
#define LIBUFS_BUFALIGN 128
|
||||
|
||||
/*
|
||||
* libufs structures.
|
||||
*/
|
||||
@ -42,39 +49,51 @@ union dinodep {
|
||||
* userland ufs disk.
|
||||
*/
|
||||
struct uufsd {
|
||||
const char *d_name; /* disk name */
|
||||
int d_ufs; /* decimal UFS version */
|
||||
int d_fd; /* raw device file descriptor */
|
||||
long d_bsize; /* device bsize */
|
||||
ufs2_daddr_t d_sblock; /* superblock location */
|
||||
struct fs_summary_info *d_si; /* Superblock summary info */
|
||||
caddr_t d_inoblock; /* inode block */
|
||||
uint32_t d_inomin; /* low ino, not ino_t for ABI compat */
|
||||
uint32_t d_inomax; /* high ino, not ino_t for ABI compat */
|
||||
union dinodep d_dp; /* pointer to currently active inode */
|
||||
union {
|
||||
struct fs d_fs; /* filesystem information */
|
||||
char d_sb[MAXBSIZE]; /* superblock as buffer */
|
||||
} d_sbunion;
|
||||
char d_sb[SBLOCKSIZE]; /* superblock as buffer */
|
||||
} d_sbunion __aligned(LIBUFS_BUFALIGN);
|
||||
union {
|
||||
struct cg d_cg; /* cylinder group */
|
||||
char d_buf[MAXBSIZE]; /* cylinder group storage */
|
||||
} d_cgunion;
|
||||
int d_ccg; /* current cylinder group */
|
||||
int d_lcg; /* last cylinder group (in d_cg) */
|
||||
} d_cgunion __aligned(LIBUFS_BUFALIGN);
|
||||
union {
|
||||
union dinodep d_ino[1]; /* inode block */
|
||||
char d_inos[MAXBSIZE]; /* inode block as buffer */
|
||||
} d_inosunion __aligned(LIBUFS_BUFALIGN);
|
||||
const char *d_name; /* disk name */
|
||||
const char *d_error; /* human readable disk error */
|
||||
ufs2_daddr_t d_sblock; /* superblock location */
|
||||
struct fs_summary_info *d_si; /* Superblock summary info */
|
||||
union dinodep d_dp; /* pointer to currently active inode */
|
||||
ino_t d_inomin; /* low ino */
|
||||
ino_t d_inomax; /* high ino */
|
||||
off_t d_sblockloc; /* where to look for the superblock */
|
||||
int d_lookupflags; /* flags to superblock lookup */
|
||||
int d_mine; /* internal flags */
|
||||
#define d_fs d_sbunion.d_fs
|
||||
#define d_sb d_sbunion.d_sb
|
||||
#define d_cg d_cgunion.d_cg
|
||||
int64_t d_bsize; /* device bsize */
|
||||
int64_t d_lookupflags; /* flags to superblock lookup */
|
||||
int64_t d_mine; /* internal flags */
|
||||
int32_t d_ccg; /* current cylinder group */
|
||||
int32_t d_ufs; /* decimal UFS version */
|
||||
int32_t d_fd; /* raw device file descriptor */
|
||||
int32_t d_lcg; /* last cylinder group (in d_cg) */
|
||||
};
|
||||
#define d_inos d_inosunion.d_inos
|
||||
#define d_fs d_sbunion.d_fs
|
||||
#define d_cg d_cgunion.d_cg
|
||||
|
||||
/*
|
||||
* libufs macros (internal, non-exported).
|
||||
*/
|
||||
#ifdef _LIBUFS
|
||||
/*
|
||||
* Ensure that the buffer is aligned to the I/O subsystem requirements.
|
||||
*/
|
||||
#define BUF_MALLOC(newbufpp, data, size) { \
|
||||
if (data != NULL && (((intptr_t)data) & (LIBUFS_BUFALIGN - 1)) == 0) \
|
||||
*newbufpp = (void *)data; \
|
||||
else \
|
||||
*newbufpp = aligned_alloc(LIBUFS_BUFALIGN, size); \
|
||||
}
|
||||
/*
|
||||
* Trace steps through libufs, to be used at entry and erroneous return.
|
||||
*/
|
||||
|
@ -228,7 +228,8 @@ use_pread(void *devfd, off_t loc, void **bufp, int size)
|
||||
int fd;
|
||||
|
||||
fd = *(int *)devfd;
|
||||
if ((*bufp = malloc(size)) == NULL)
|
||||
BUF_MALLOC(bufp, NULL, size);
|
||||
if (*bufp == NULL)
|
||||
return (ENOSPC);
|
||||
if (pread(fd, *bufp, size, loc) != size)
|
||||
return (EIO);
|
||||
|
@ -61,10 +61,6 @@ ufs_disk_close(struct uufsd *disk)
|
||||
ERROR(disk, NULL);
|
||||
close(disk->d_fd);
|
||||
disk->d_fd = -1;
|
||||
if (disk->d_inoblock != NULL) {
|
||||
free(disk->d_inoblock);
|
||||
disk->d_inoblock = NULL;
|
||||
}
|
||||
if (disk->d_mine & MINE_NAME) {
|
||||
free((char *)(uintptr_t)disk->d_name);
|
||||
disk->d_name = NULL;
|
||||
@ -155,10 +151,16 @@ again: if ((ret = stat(name, &st)) < 0) {
|
||||
return (-1);
|
||||
}
|
||||
|
||||
if (((uintptr_t)disk & ~(LIBUFS_BUFALIGN - 1)) != (uintptr_t)disk) {
|
||||
ERROR(disk, "uufsd structure must be aligned to "
|
||||
"LIBUFS_BUFALIGN byte boundry, see ufs_disk_fillout(3)");
|
||||
close(fd);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
disk->d_bsize = 1;
|
||||
disk->d_ccg = 0;
|
||||
disk->d_fd = fd;
|
||||
disk->d_inoblock = NULL;
|
||||
disk->d_inomin = 0;
|
||||
disk->d_inomax = 0;
|
||||
disk->d_lcg = 0;
|
||||
|
@ -9,7 +9,7 @@
|
||||
.\"
|
||||
.\" This file is in the public domain.
|
||||
.\"
|
||||
.Dd June 4, 2003
|
||||
.Dd November 17, 2023
|
||||
.Dt UFS_DISK_CLOSE 3
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -51,6 +51,13 @@ functions open a disk specified by
|
||||
.Fa name
|
||||
and populate the structure pointed to by
|
||||
.Fa disk .
|
||||
The structure referenced by the
|
||||
.Fa disk
|
||||
pointer must be aligned to at least the alignment specified by
|
||||
.Dv LIBUFS_ALIGN
|
||||
that is defined in the
|
||||
.Lb libufs.h
|
||||
header file.
|
||||
The disk is opened read-only.
|
||||
The specified
|
||||
.Fa name
|
||||
|
@ -67,6 +67,7 @@
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <libufs.h>
|
||||
|
||||
#include <sys/queue.h>
|
||||
|
||||
@ -424,6 +425,20 @@ Malloc(size_t size)
|
||||
break;
|
||||
return (retval);
|
||||
}
|
||||
/*
|
||||
* Allocate a block of memory to be used as an I/O buffer.
|
||||
* Ensure that the buffer is aligned to the I/O subsystem requirements.
|
||||
*/
|
||||
static inline void*
|
||||
Balloc(size_t size)
|
||||
{
|
||||
void *retval;
|
||||
|
||||
while ((retval = aligned_alloc(LIBUFS_BUFALIGN, size)) == NULL)
|
||||
if (flushentry() == 0)
|
||||
break;
|
||||
return (retval);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wrapper for calloc() that flushes the cylinder group cache to try
|
||||
|
@ -58,7 +58,6 @@ static const char sccsid[] = "@(#)utilities.c 8.6 (Berkeley) 5/19/95";
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <libufs.h>
|
||||
|
||||
#include "fsck.h"
|
||||
|
||||
@ -189,7 +188,7 @@ bufinit(void)
|
||||
initbarea(&failedbuf, BT_UNKNOWN);
|
||||
failedbuf.b_errs = -1;
|
||||
failedbuf.b_un.b_buf = NULL;
|
||||
if ((cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize)) == NULL)
|
||||
if ((cgblk.b_un.b_buf = Balloc((unsigned int)sblock.fs_bsize)) == NULL)
|
||||
errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize);
|
||||
initbarea(&cgblk, BT_CYLGRP);
|
||||
numbufs = cachelookups = cachereads = 0;
|
||||
@ -211,7 +210,7 @@ allocbuf(const char *failreason)
|
||||
char *bufp;
|
||||
|
||||
bp = (struct bufarea *)Malloc(sizeof(struct bufarea));
|
||||
bufp = Malloc((unsigned int)sblock.fs_bsize);
|
||||
bufp = Balloc((unsigned int)sblock.fs_bsize);
|
||||
if (bp == NULL || bufp == NULL) {
|
||||
errx(EEXIT, "%s", failreason);
|
||||
/* NOTREACHED */
|
||||
@ -241,7 +240,7 @@ cglookup(int cg)
|
||||
if ((unsigned) cg >= sblock.fs_ncg)
|
||||
errx(EEXIT, "cglookup: out of range cylinder group %d", cg);
|
||||
if (cgbufs == NULL) {
|
||||
cgbufs = calloc(sblock.fs_ncg, sizeof(struct bufarea));
|
||||
cgbufs = Calloc(sblock.fs_ncg, sizeof(struct bufarea));
|
||||
if (cgbufs == NULL)
|
||||
errx(EEXIT, "Cannot allocate cylinder group buffers");
|
||||
}
|
||||
@ -250,7 +249,7 @@ cglookup(int cg)
|
||||
return (cgbp);
|
||||
cgp = NULL;
|
||||
if (flushtries == 0)
|
||||
cgp = Malloc((unsigned int)sblock.fs_cgsize);
|
||||
cgp = Balloc((unsigned int)sblock.fs_cgsize);
|
||||
if (cgp == NULL) {
|
||||
if (sujrecovery)
|
||||
errx(EEXIT,"Ran out of memory during journal recovery");
|
||||
@ -966,7 +965,7 @@ blzero(int fd, ufs2_daddr_t blk, long size)
|
||||
if (fd < 0)
|
||||
return;
|
||||
if (zero == NULL) {
|
||||
zero = calloc(ZEROBUFSIZE, 1);
|
||||
zero = Balloc(ZEROBUFSIZE);
|
||||
if (zero == NULL)
|
||||
errx(EEXIT, "cannot allocate buffer pool");
|
||||
}
|
||||
|
@ -48,7 +48,6 @@ static const char sccsid[] = "@(#)inode.c 8.8 (Berkeley) 4/28/95";
|
||||
#include <pwd.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <libufs.h>
|
||||
|
||||
#include "fsck.h"
|
||||
|
||||
@ -646,7 +645,7 @@ setinodebuf(int cg, ino_t inosused)
|
||||
inobufsize = blkroundup(&sblock,
|
||||
MAX(INOBUFSIZE, sblock.fs_bsize));
|
||||
initbarea(&inobuf, BT_INODES);
|
||||
if ((inobuf.b_un.b_buf = Malloc((unsigned)inobufsize)) == NULL)
|
||||
if ((inobuf.b_un.b_buf = Balloc((unsigned)inobufsize)) == NULL)
|
||||
errx(EEXIT, "cannot allocate space for inode buffer");
|
||||
}
|
||||
fullcnt = inobufsize / ((sblock.fs_magic == FS_UFS1_MAGIC) ?
|
||||
|
@ -59,7 +59,6 @@ static char sccsid[] = "@(#)main.c 8.6 (Berkeley) 5/14/95";
|
||||
#include <fstab.h>
|
||||
#include <grp.h>
|
||||
#include <inttypes.h>
|
||||
#include <libufs.h>
|
||||
#include <mntopts.h>
|
||||
#include <paths.h>
|
||||
#include <stdint.h>
|
||||
|
@ -45,7 +45,6 @@ static const char sccsid[] = "@(#)pass5.c 8.9 (Berkeley) 4/28/95";
|
||||
#include <inttypes.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
#include <libufs.h>
|
||||
|
||||
#include "fsck.h"
|
||||
|
||||
|
@ -52,7 +52,6 @@ static const char sccsid[] = "@(#)setup.c 8.10 (Berkeley) 5/9/95";
|
||||
#include <limits.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <libufs.h>
|
||||
|
||||
#include "fsck.h"
|
||||
|
||||
@ -214,7 +213,7 @@ setup(char *dev)
|
||||
sbdirty();
|
||||
}
|
||||
if (snapcnt > 0 && copybuf == NULL) {
|
||||
copybuf = Malloc(sblock.fs_bsize);
|
||||
copybuf = Balloc(sblock.fs_bsize);
|
||||
if (copybuf == NULL)
|
||||
errx(EEXIT, "cannot allocate space for snapshot "
|
||||
"copy buffer");
|
||||
@ -501,7 +500,7 @@ sblock_init(void)
|
||||
fsmodified = 0;
|
||||
lfdir = 0;
|
||||
initbarea(&sblk, BT_SUPERBLK);
|
||||
sblk.b_un.b_buf = Malloc(SBLOCKSIZE);
|
||||
sblk.b_un.b_buf = Balloc(SBLOCKSIZE);
|
||||
if (sblk.b_un.b_buf == NULL)
|
||||
errx(EEXIT, "cannot allocate space for superblock");
|
||||
dev_bsize = secsize = DEV_BSIZE;
|
||||
@ -530,7 +529,7 @@ calcsb(char *dev, int devfd, struct fs *fs)
|
||||
*/
|
||||
if (ioctl(devfd, DIOCGSECTORSIZE, &secsize) == -1)
|
||||
return (0);
|
||||
fsrbuf = Malloc(secsize);
|
||||
fsrbuf = Balloc(secsize);
|
||||
if (fsrbuf == NULL)
|
||||
errx(EEXIT, "calcsb: cannot allocate recovery buffer");
|
||||
if (blread(devfd, fsrbuf,
|
||||
@ -573,7 +572,7 @@ chkrecovery(int devfd)
|
||||
rdsize = sblock.fs_fsize;
|
||||
if (ioctl(devfd, DIOCGSECTORSIZE, &secsize) == -1 ||
|
||||
rdsize % secsize != 0 ||
|
||||
(fsrbuf = Malloc(rdsize)) == NULL ||
|
||||
(fsrbuf = Balloc(rdsize)) == NULL ||
|
||||
blread(devfd, fsrbuf, (SBLOCK_UFS2 - rdsize) / dev_bsize,
|
||||
rdsize) != 0) {
|
||||
free(fsrbuf);
|
||||
@ -612,7 +611,7 @@ saverecovery(int readfd, int writefd)
|
||||
if (sblock.fs_magic != FS_UFS2_MAGIC ||
|
||||
ioctl(readfd, DIOCGSECTORSIZE, &secsize) == -1 ||
|
||||
rdsize % secsize != 0 ||
|
||||
(fsrbuf = Malloc(rdsize)) == NULL ||
|
||||
(fsrbuf = Balloc(rdsize)) == NULL ||
|
||||
blread(readfd, fsrbuf, (SBLOCK_UFS2 - rdsize) / dev_bsize,
|
||||
rdsize) != 0) {
|
||||
printf("RECOVERY DATA COULD NOT BE CREATED\n");
|
||||
|
@ -47,7 +47,6 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <libufs.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <sysexits.h>
|
||||
@ -2274,7 +2273,7 @@ suj_add_block(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
|
||||
static void
|
||||
suj_read(void)
|
||||
{
|
||||
uint8_t block[1 * 1024 * 1024];
|
||||
uint8_t block[1 * 1024 * 1024] __aligned(LIBUFS_BUFALIGN);
|
||||
struct suj_seg *seg;
|
||||
struct jsegrec *recn;
|
||||
struct jsegrec *rec;
|
||||
|
Loading…
Reference in New Issue
Block a user