From d8f4dd34d67a47dd797c123bceff57f7901a4099 Mon Sep 17 00:00:00 2001 From: Marius Strobl Date: Wed, 10 Sep 2008 20:07:08 +0000 Subject: [PATCH] Work around Cheetah+ erratum 34 (USIII+ erratum #10) by relocating the locked entry in it16 slot 0, which typically is occupied by the PROM, and manually entering locked entries in slots != 0. Thanks to Hubert Feyrer for donating the Blade 2000 this change was developed on. --- sys/boot/sparc64/loader/main.c | 110 ++++++++++++++++++++++++++ sys/sparc64/sparc64/genassym.c | 3 + sys/sparc64/sparc64/mp_locore.S | 134 ++++++++++++++++++++++++++------ sys/sun4v/include/asi.h | 1 + 4 files changed, 226 insertions(+), 22 deletions(-) diff --git a/sys/boot/sparc64/loader/main.c b/sys/boot/sparc64/loader/main.c index 5091caa52237..355160cb5e47 100644 --- a/sys/boot/sparc64/loader/main.c +++ b/sys/boot/sparc64/loader/main.c @@ -6,6 +6,31 @@ * As long as the above copyright statement and this notice remain * unchanged, you can do what ever you want with this file. */ +/*- + * Copyright (c) 2008 Marius Strobl + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ #include __FBSDID("$FreeBSD$"); @@ -65,6 +90,7 @@ static vm_offset_t dtlb_va_to_pa_sun4u(vm_offset_t); static inline u_long itlb_get_data_sun4u(int slot); static void itlb_enter_sun4u(u_long vpn, u_long data); static vm_offset_t itlb_va_to_pa_sun4u(vm_offset_t); +static void itlb_relocate_locked0_sun4u(void); extern vm_offset_t md_load(char *, vm_offset_t *); static int sparc64_autoload(void); static ssize_t sparc64_readin(const int, vm_offset_t, const size_t); @@ -456,9 +482,32 @@ static void itlb_enter_sun4u(u_long vpn, u_long data) { u_long reg; + int i; reg = rdpr(pstate); wrpr(pstate, reg & ~PSTATE_IE, 0); + + if (cpu_impl == CPU_IMPL_ULTRASPARCIIIp) { + /* + * Search an unused slot != 0 and explicitly enter the data + * and tag there in order to avoid Cheetah+ erratum 34. + */ + for (i = 1; i < itlb_slot_max; i++) { + if ((itlb_get_data_sun4u(i) & TD_V) != 0) + continue; + + stxa(AA_IMMU_TAR, ASI_IMMU, + TLB_TAR_VA(vpn) | TLB_TAR_CTX(TLB_CTX_KERNEL)); + stxa(TLB_DAR_SLOT(i), ASI_ITLB_DATA_ACCESS_REG, data); + flush(KERNBASE); + break; + } + wrpr(pstate, reg, 0); + if (i == itlb_slot_max) + panic("%s: could not find an unused slot", __func__); + return; + } + stxa(AA_IMMU_TAR, ASI_IMMU, TLB_TAR_VA(vpn) | TLB_TAR_CTX(TLB_CTX_KERNEL)); stxa(0, ASI_ITLB_DATA_IN_REG, data); @@ -466,6 +515,48 @@ itlb_enter_sun4u(u_long vpn, u_long data) wrpr(pstate, reg, 0); } +static void +itlb_relocate_locked0_sun4u(void) +{ + u_long data, pstate, tag; + int i; + + if (cpu_impl != CPU_IMPL_ULTRASPARCIIIp) + return; + + pstate = rdpr(pstate); + wrpr(pstate, pstate & ~PSTATE_IE, 0); + + data = itlb_get_data_sun4u(0); + if ((data & (TD_V | TD_L)) != (TD_V | TD_L)) { + wrpr(pstate, pstate, 0); + return; + } + + /* Flush the mapping of slot 0. */ + tag = ldxa(TLB_DAR_SLOT(0), ASI_ITLB_TAG_READ_REG); + stxa(TLB_DEMAP_VA(TLB_TAR_VA(tag)) | TLB_DEMAP_PRIMARY | + TLB_DEMAP_PAGE, ASI_IMMU_DEMAP, 0); + flush(0); /* The USIII-family ignores the address. */ + + /* + * Search a replacement slot != 0 and enter the data and tag + * that formerly were in slot 0. + */ + for (i = 1; i < itlb_slot_max; i++) { + if ((itlb_get_data_sun4u(i) & TD_V) != 0) + continue; + + stxa(AA_IMMU_TAR, ASI_IMMU, tag); + stxa(TLB_DAR_SLOT(i), ASI_ITLB_DATA_ACCESS_REG, data); + flush(0); /* The USIII-family ignores the address. */ + break; + } + wrpr(pstate, pstate, 0); + if (i == itlb_slot_max) + panic("%s: could not find a replacement slot", __func__); +} + static int mmu_mapin_sun4u(vm_offset_t va, vm_size_t len) { @@ -603,6 +694,25 @@ tlb_init_sun4u(void) OF_getprop(child, "#itlb-entries", &itlb_slot_max, sizeof(itlb_slot_max)) == -1) panic("%s: can't get TLB slot max.", __func__); + + if (cpu_impl == CPU_IMPL_ULTRASPARCIIIp) { +#ifdef LOADER_DEBUG + printf("pre fixup:\n"); + pmap_print_tlb_sun4u(); +#endif + + /* + * Relocate the locked entry in it16 slot 0 (if existent) + * as part of working around Cheetah+ erratum 34. + */ + itlb_relocate_locked0_sun4u(); + +#ifdef LOADER_DEBUG + printf("post fixup:\n"); + pmap_print_tlb_sun4u(); +#endif + } + dtlb_store = malloc(dtlb_slot_max * sizeof(*dtlb_store)); itlb_store = malloc(itlb_slot_max * sizeof(*itlb_store)); if (dtlb_store == NULL || itlb_store == NULL) diff --git a/sys/sparc64/sparc64/genassym.c b/sys/sparc64/sparc64/genassym.c index d3dac32dc12a..09867e8c8242 100644 --- a/sys/sparc64/sparc64/genassym.c +++ b/sys/sparc64/sparc64/genassym.c @@ -123,15 +123,18 @@ ASSYM(TTE_SHIFT, TTE_SHIFT); ASSYM(TTE_VPN, offsetof(struct tte, tte_vpn)); ASSYM(TTE_DATA, offsetof(struct tte, tte_data)); +ASSYM(TD_V, TD_V); ASSYM(TD_EXEC, TD_EXEC); ASSYM(TD_REF, TD_REF); ASSYM(TD_SW, TD_SW); +ASSYM(TD_L, TD_L); ASSYM(TD_CP, TD_CP); ASSYM(TD_CV, TD_CV); ASSYM(TD_W, TD_W); ASSYM(TS_MIN, TS_MIN); ASSYM(TS_MAX, TS_MAX); +ASSYM(TLB_DAR_SLOT_SHIFT, TLB_DAR_SLOT_SHIFT); ASSYM(TLB_PCXR_PGSZ_MASK, TLB_PCXR_PGSZ_MASK); ASSYM(TLB_DIRECT_TO_TTE_MASK, TLB_DIRECT_TO_TTE_MASK); ASSYM(TV_SIZE_BITS, TV_SIZE_BITS); diff --git a/sys/sparc64/sparc64/mp_locore.S b/sys/sparc64/sparc64/mp_locore.S index c17c68e6d0b1..e8b2a9526335 100644 --- a/sys/sparc64/sparc64/mp_locore.S +++ b/sys/sparc64/sparc64/mp_locore.S @@ -1,5 +1,6 @@ /*- * Copyright (c) 2002 Jake Burkholder. + * Copyright (c) 2008 Marius Strobl * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -42,12 +43,78 @@ __FBSDID("$FreeBSD$"); .text _ALIGN_TEXT -1: rd %pc, %l0 - ldx [%l0 + (4f-1b)], %l1 - add %l0, (6f-1b), %l2 + /* + * Initialize misc. state to known values: interrupts disabled, + * normal globals, no clean windows, PIL 0, and floating point + * disabled. + */ +1: wrpr %g0, PSTATE_NORMAL, %pstate + wrpr %g0, 0, %cleanwin + wrpr %g0, 0, %pil + wr %g0, 0, %fprs + + rdpr %ver, %l7 + srlx %l7, VER_IMPL_SHIFT, %l7 + sll %l7, VER_IMPL_SIZE, %l7 + srl %l7, VER_IMPL_SIZE, %l7 + cmp %l7, CPU_IMPL_ULTRASPARCIIIp + bne %icc, 3f + nop + + /* + * Relocate the locked entry in it16 slot 0 (if existent) + * as part of working around Cheetah+ erratum 34. + */ + + setx TD_V | TD_L, %l1, %l0 + /* + * We read ASI_DTLB_DATA_ACCESS_REG twice in order to work + * around errata of USIII and beyond. + */ + ldxa [%g0] ASI_ITLB_DATA_ACCESS_REG, %g0 + ldxa [%g0] ASI_ITLB_DATA_ACCESS_REG, %l6 + and %l6, %l0, %l1 + cmp %l0, %l1 + bne %xcc, 3f + nop + + /* Flush the mapping of slot 0. */ + ldxa [%g0] ASI_ITLB_TAG_READ_REG, %l5 + srlx %l5, TAR_VPN_SHIFT, %l0 + sllx %l0, TAR_VPN_SHIFT, %l0 + or %l0, TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE, %l0 + stxa %g0, [%l0] ASI_IMMU_DEMAP + /* The USIII-family ignores the address. */ + flush %g0 + + /* + * Search a replacement slot != 0 and enter the data and tag + * that formerly were in slot 0. + */ + mov (1 << TLB_DAR_SLOT_SHIFT), %l4 + setx TD_V, %l1, %l0 + /* + * We read ASI_DTLB_DATA_ACCESS_REG twice in order to work + * around errata of USIII and beyond. + */ +2: ldxa [%l4] ASI_ITLB_DATA_ACCESS_REG, %g0 + ldxa [%l4] ASI_ITLB_DATA_ACCESS_REG, %l1 + and %l1, %l0, %l1 + cmp %l0, %l1 + be,a %xcc, 2b + add %l4, (1 << TLB_DAR_SLOT_SHIFT), %l4 + wr %g0, ASI_IMMU, %asi + stxa %l5, [%g0 + AA_IMMU_TAR] %asi + stxa %l6, [%l4] ASI_ITLB_DATA_ACCESS_REG + /* The USIII-family ignores the address. */ + flush %g0 + +3: rd %pc, %l6 + ldx [%l6 + (9f-3b)], %l1 + add %l6, (11f-3b), %l2 clr %l3 -2: cmp %l3, %l1 - be %xcc, 3f +4: cmp %l3, %l1 + be %xcc, 8f nop ldx [%l2 + TTE_VPN], %l4 ldx [%l2 + TTE_DATA], %l5 @@ -56,41 +123,64 @@ __FBSDID("$FreeBSD$"); wr %g0, ASI_DMMU, %asi stxa %l4, [%g0 + AA_DMMU_TAR] %asi stxa %l5, [%g0] ASI_DTLB_DATA_IN_REG - wr %g0, ASI_IMMU, %asi + membar #Sync + + cmp %l7, CPU_IMPL_ULTRASPARCIIIp + bne %icc, 6f + wr %g0, ASI_IMMU, %asi + + /* + * Search an unused slot != 0 and explicitly enter the data + * and tag there in order to avoid Cheetah+ erratum 34. + */ + mov (1 << TLB_DAR_SLOT_SHIFT), %l0 + setx TD_V, %o1, %o0 + /* + * We read ASI_DTLB_DATA_ACCESS_REG twice in order to work + * around errata of USIII and beyond. + */ +5: ldxa [%l0] ASI_ITLB_DATA_ACCESS_REG, %g0 + ldxa [%l0] ASI_ITLB_DATA_ACCESS_REG, %o1 + and %o1, %o0, %o1 + cmp %o0, %o1 + be,a %xcc, 5b + add %l0, (1 << TLB_DAR_SLOT_SHIFT), %l0 + sethi %hi(KERNBASE), %o0 + stxa %l4, [%g0 + AA_IMMU_TAR] %asi + stxa %l5, [%l0] ASI_ITLB_DATA_ACCESS_REG + flush %o0 + ba %xcc, 7f + nop + +6: sethi %hi(KERNBASE), %l0 stxa %l4, [%g0 + AA_IMMU_TAR] %asi stxa %l5, [%g0] ASI_ITLB_DATA_IN_REG - membar #Sync - flush %l4 - add %l2, 1 << TTE_SHIFT, %l2 + flush %l0 +7: add %l2, 1 << TTE_SHIFT, %l2 add %l3, 1, %l3 - ba %xcc, 2b + ba %xcc, 4b nop -3: ldx [%l0 + (5f-1b)], %l1 +8: ldx [%l6 + (10f-3b)], %l1 jmpl %l1, %g0 nop _ALIGN_DATA -4: .xword 0x0 -5: .xword 0x0 -6: +9: .xword 0x0 +10: .xword 0x0 +11: DATA(mp_tramp_code) .xword 1b DATA(mp_tramp_code_len) - .xword 6b-1b + .xword 11b-1b DATA(mp_tramp_tlb_slots) - .xword 4b-1b + .xword 9b-1b DATA(mp_tramp_func) - .xword 5b-1b + .xword 10b-1b /* * void mp_startup(void) */ ENTRY(mp_startup) - wrpr %g0, PSTATE_NORMAL, %pstate - wrpr %g0, 0, %cleanwin - wrpr %g0, 0, %pil - wr %g0, 0, %fprs - SET(cpu_start_args, %l1, %l0) mov CPU_TICKSYNC, %l1 diff --git a/sys/sun4v/include/asi.h b/sys/sun4v/include/asi.h index d2a2b1c7b9af..07b097b93383 100644 --- a/sys/sun4v/include/asi.h +++ b/sys/sun4v/include/asi.h @@ -162,6 +162,7 @@ #define ASI_ITLB_DATA_IN_REG 0x54 #define ASI_ITLB_DATA_ACCESS_REG 0x55 #define ASI_ITLB_TAG_READ_REG 0x56 +#define ASI_IMMU_DEMAP 0x57 #define ASI_DMMU 0x58 #define ASI_DTLB_DATA_IN_REG 0x5c #define ASI_DTLB_DATA_ACCESS_REG 0x5d