From 0e1b66baddd34593cc80bf340ce7cc65966a840f Mon Sep 17 00:00:00 2001 From: purplerain Date: Thu, 11 Apr 2024 04:04:03 +0000 Subject: [PATCH] sync with OpenBSD -current --- gnu/usr.bin/binutils-2.17/bfd/elf.c | 18 +- sys/dev/pci/drm/amd/amdgpu/amdgpu.h | 1 + sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c | 41 +- sys/dev/pci/drm/amd/amdgpu/amdgpu_drv.c | 11 +- .../display/dc/dce110/dce110_hw_sequencer.c | 3 +- .../drm/amd/display/dc/dcn20/dcn20_hwseq.c | 11 +- sys/dev/pci/drm/amd/include/amd_shared.h | 1 + sys/dev/pci/drm/i915/display/intel_cursor.c | 6 +- .../drm/i915/display/intel_display_types.h | 1 + sys/dev/pci/drm/i915/display/intel_fb_pin.c | 10 + .../drm/i915/display/skl_universal_plane.c | 5 +- sys/dev/pci/drm/i915/gem/i915_gem_create.c | 4 +- sys/dev/pci/drm/i915/gt/gen8_engine_cs.c | 10 +- sys/dev/pci/drm/i915/gt/intel_engine_cs.c | 4 +- sys/dev/pci/drm/i915/gt/intel_engine_pm.c | 2 +- .../drm/i915/gt/intel_execlists_submission.c | 4 +- sys/dev/pci/drm/i915/gt/intel_gt.h | 31 ++ sys/dev/pci/drm/i915/gt/intel_gt_mcr.c | 7 +- sys/dev/pci/drm/i915/gt/intel_lrc.c | 38 +- sys/dev/pci/drm/i915/gt/intel_mocs.c | 23 +- sys/dev/pci/drm/i915/gt/intel_rc6.c | 6 +- sys/dev/pci/drm/i915/gt/intel_reset.c | 20 +- sys/dev/pci/drm/i915/gt/intel_reset.h | 2 + sys/dev/pci/drm/i915/gt/intel_rps.c | 2 +- sys/dev/pci/drm/i915/gt/intel_workarounds.c | 392 +++++------------- sys/dev/pci/drm/i915/gt/uc/intel_guc.c | 26 +- .../pci/drm/i915/gt/uc/intel_guc_submission.c | 6 +- sys/dev/pci/drm/i915/i915_debugfs.c | 2 +- sys/dev/pci/drm/i915/i915_drv.h | 4 - sys/dev/pci/drm/i915/i915_perf.c | 11 +- sys/dev/pci/drm/i915/intel_clock_gating.c | 8 - sys/dev/pv/if_vio.c | 117 ++++-- sys/netinet/tcp_debug.c | 43 +- sys/netinet/tcp_input.c | 28 +- 34 files changed, 418 insertions(+), 480 deletions(-) diff --git a/gnu/usr.bin/binutils-2.17/bfd/elf.c b/gnu/usr.bin/binutils-2.17/bfd/elf.c index b23e3a66e..5246b48fd 100644 --- a/gnu/usr.bin/binutils-2.17/bfd/elf.c +++ b/gnu/usr.bin/binutils-2.17/bfd/elf.c @@ -5358,6 +5358,14 @@ rewrite_elf_program_header (bfd *ibfd, bfd *obfd) && ((bfd_vma) s->filepos + s->size \ <= p->p_offset + p->p_filesz)) + /* Special case: OpenBSD pinsyscalls(2) information. */ +#define IS_PINSYSCALL_DATA(p, s) \ + (p->p_type == PT_OPENBSD_SYSCALLS \ + && s->vma == 0 && s->lma == 0 \ + && (bfd_vma) s->filepos >= p->p_offset \ + && ((bfd_vma) s->filepos + s->size \ + <= p->p_offset + p->p_filesz)) + /* The complicated case when p_vaddr is 0 is to handle the Solaris linker, which generates a PT_INTERP section with p_vaddr and p_memsz set to 0. */ @@ -5376,7 +5384,7 @@ rewrite_elf_program_header (bfd *ibfd, bfd *obfd) A section will be included if: 1. It is within the address space of the segment -- we use the LMA if that is set for the segment and the VMA otherwise, - 2. It is an allocated segment, + 2. It is an allocated segment, or part of PT_OPENBSD_SYSCALLS, 3. There is an output section associated with it, 4. The section has not already been allocated to a previous segment. 5. PT_GNU_STACK segments do not include any sections. @@ -5389,7 +5397,8 @@ rewrite_elf_program_header (bfd *ibfd, bfd *obfd) ? IS_CONTAINED_BY_LMA (section, segment, segment->p_paddr) \ : IS_CONTAINED_BY_VMA (section, segment)) \ && (section->flags & SEC_ALLOC) != 0) \ - || IS_COREFILE_NOTE (segment, section)) \ + || IS_COREFILE_NOTE (segment, section) \ + || IS_PINSYSCALL_DATA (segment, section)) \ && section->output_section != NULL \ && segment->p_type != PT_GNU_STACK \ && (segment->p_type != PT_TLS \ @@ -5655,6 +5664,7 @@ rewrite_elf_program_header (bfd *ibfd, bfd *obfd) LMA address of the output section. */ if (IS_CONTAINED_BY_LMA (output_section, segment, map->p_paddr) || IS_COREFILE_NOTE (segment, section) + || IS_PINSYSCALL_DATA (segment, section) || (bed->want_p_paddr_set_to_zero && IS_CONTAINED_BY_VMA (output_section, segment)) ) @@ -5751,7 +5761,8 @@ rewrite_elf_program_header (bfd *ibfd, bfd *obfd) BFD_ASSERT (output_section != NULL); if (IS_CONTAINED_BY_LMA (output_section, segment, map->p_paddr) - || IS_COREFILE_NOTE (segment, section)) + || IS_COREFILE_NOTE (segment, section) + || IS_PINSYSCALL_DATA (segment, section)) { if (map->count == 0) { @@ -5868,6 +5879,7 @@ rewrite_elf_program_header (bfd *ibfd, bfd *obfd) #undef IS_CONTAINED_BY_VMA #undef IS_CONTAINED_BY_LMA #undef IS_COREFILE_NOTE +#undef IS_PINSYSCALL_DATA #undef IS_SOLARIS_PT_INTERP #undef INCLUDE_SECTION_IN_SEGMENT #undef SEGMENT_AFTER_SEGMENT diff --git a/sys/dev/pci/drm/amd/amdgpu/amdgpu.h b/sys/dev/pci/drm/amd/amdgpu/amdgpu.h index afac02445..38a424f16 100644 --- a/sys/dev/pci/drm/amd/amdgpu/amdgpu.h +++ b/sys/dev/pci/drm/amd/amdgpu/amdgpu.h @@ -1398,6 +1398,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, void amdgpu_driver_release_kms(struct drm_device *dev); int amdgpu_device_ip_suspend(struct amdgpu_device *adev); +int amdgpu_device_prepare(struct drm_device *dev); int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); int amdgpu_device_resume(struct drm_device *dev, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); diff --git a/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c b/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c index 7901aeb4d..1363d4c36 100644 --- a/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c +++ b/sys/dev/pci/drm/amd/amdgpu/amdgpu_device.c @@ -1568,6 +1568,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, } else { pr_info("switched off\n"); dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; + amdgpu_device_prepare(dev); amdgpu_device_suspend(dev, true); amdgpu_device_cache_pci_state(pdev); /* Shut down the device */ @@ -4205,6 +4206,41 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) /* * Suspend & resume. */ +/** + * amdgpu_device_prepare - prepare for device suspend + * + * @dev: drm dev pointer + * + * Prepare to put the hw in the suspend state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver suspend. + */ +int amdgpu_device_prepare(struct drm_device *dev) +{ + struct amdgpu_device *adev = drm_to_adev(dev); + int i, r; + + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) + return 0; + + /* Evict the majority of BOs before starting suspend sequence */ + r = amdgpu_device_evict_resources(adev); + if (r) + return r; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + if (!adev->ip_blocks[i].version->funcs->prepare_suspend) + continue; + r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev); + if (r) + return r; + } + + return 0; +} + /** * amdgpu_device_suspend - initiate device suspend * @@ -4230,11 +4266,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) adev->in_suspend = true; - /* Evict the majority of BOs before grabbing the full access */ - r = amdgpu_device_evict_resources(adev); - if (r) - return r; - if (amdgpu_sriov_vf(adev)) { amdgpu_virt_fini_data_exchange(adev); r = amdgpu_virt_request_full_gpu(adev, false); diff --git a/sys/dev/pci/drm/amd/amdgpu/amdgpu_drv.c b/sys/dev/pci/drm/amd/amdgpu/amdgpu_drv.c index cbee3cc31..95d545522 100644 --- a/sys/dev/pci/drm/amd/amdgpu/amdgpu_drv.c +++ b/sys/dev/pci/drm/amd/amdgpu/amdgpu_drv.c @@ -2393,8 +2393,9 @@ static int amdgpu_pmops_prepare(struct device *dev) /* Return a positive number here so * DPM_FLAG_SMART_SUSPEND works properly */ - if (amdgpu_device_supports_boco(drm_dev)) - return pm_runtime_suspended(dev); + if (amdgpu_device_supports_boco(drm_dev) && + pm_runtime_suspended(dev)) + return 1; /* if we will not support s3 or s2i for the device * then skip suspend @@ -2403,7 +2404,7 @@ static int amdgpu_pmops_prepare(struct device *dev) !amdgpu_acpi_is_s3_active(adev)) return 1; - return 0; + return amdgpu_device_prepare(drm_dev); } static void amdgpu_pmops_complete(struct device *dev) @@ -2605,6 +2606,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) if (amdgpu_device_supports_boco(drm_dev)) adev->mp1_state = PP_MP1_STATE_UNLOAD; + ret = amdgpu_device_prepare(drm_dev); + if (ret) + return ret; ret = amdgpu_device_suspend(drm_dev, false); if (ret) { adev->in_runpm = false; @@ -3667,6 +3671,7 @@ amdgpu_activate(struct device *self, int act) switch (act) { case DVACT_QUIESCE: rv = config_activate_children(self, act); + amdgpu_device_prepare(dev); amdgpu_device_suspend(dev, true); break; case DVACT_SUSPEND: diff --git a/sys/dev/pci/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/sys/dev/pci/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index f035a0732..cdb63b90e 100644 --- a/sys/dev/pci/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/sys/dev/pci/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1179,9 +1179,10 @@ void dce110_disable_stream(struct pipe_ctx *pipe_ctx) dto_params.timing = &pipe_ctx->stream->timing; dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; if (dccg) { - dccg->funcs->set_dtbclk_dto(dccg, &dto_params); dccg->funcs->disable_symclk32_se(dccg, dp_hpo_inst); dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst, dp_hpo_inst); + if (dccg && dccg->funcs->set_dtbclk_dto) + dccg->funcs->set_dtbclk_dto(dccg, &dto_params); } } else if (dccg && dccg->funcs->disable_symclk_se) { dccg->funcs->disable_symclk_se(dccg, stream_enc->stream_enc_inst, diff --git a/sys/dev/pci/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/sys/dev/pci/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 1e3803739..12af28590 100644 --- a/sys/dev/pci/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/sys/dev/pci/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2728,18 +2728,17 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx) } if (dc->link_srv->dp_is_128b_132b_signal(pipe_ctx)) { - dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; - dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst); - - phyd32clk = get_phyd32clk_src(link); - dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); - dto_params.otg_inst = tg->inst; dto_params.pixclk_khz = pipe_ctx->stream->timing.pix_clk_100hz / 10; dto_params.num_odm_segments = get_odm_segment_count(pipe_ctx); dto_params.timing = &pipe_ctx->stream->timing; dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr); dccg->funcs->set_dtbclk_dto(dccg, &dto_params); + dp_hpo_inst = pipe_ctx->stream_res.hpo_dp_stream_enc->inst; + dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst, dp_hpo_inst); + + phyd32clk = get_phyd32clk_src(link); + dccg->funcs->enable_symclk32_se(dccg, dp_hpo_inst, phyd32clk); } else { } if (hws->funcs.calculate_dccg_k1_k2_values && dc->res_pool->dccg->funcs->set_pixel_rate_div) { diff --git a/sys/dev/pci/drm/amd/include/amd_shared.h b/sys/dev/pci/drm/amd/include/amd_shared.h index e67d7d704..56e7d3d60 100644 --- a/sys/dev/pci/drm/amd/include/amd_shared.h +++ b/sys/dev/pci/drm/amd/include/amd_shared.h @@ -295,6 +295,7 @@ struct amd_ip_funcs { int (*hw_init)(void *handle); int (*hw_fini)(void *handle); void (*late_fini)(void *handle); + int (*prepare_suspend)(void *handle); int (*suspend)(void *handle); int (*resume)(void *handle); bool (*is_idle)(void *handle); diff --git a/sys/dev/pci/drm/i915/display/intel_cursor.c b/sys/dev/pci/drm/i915/display/intel_cursor.c index b342fad18..61df6cd3f 100644 --- a/sys/dev/pci/drm/i915/display/intel_cursor.c +++ b/sys/dev/pci/drm/i915/display/intel_cursor.c @@ -23,6 +23,8 @@ #include "intel_psr.h" #include "skl_watermark.h" +#include "gem/i915_gem_object.h" + /* Cursor formats */ static const u32 intel_cursor_formats[] = { DRM_FORMAT_ARGB8888, @@ -32,12 +34,10 @@ static u32 intel_cursor_base(const struct intel_plane_state *plane_state) { struct drm_i915_private *dev_priv = to_i915(plane_state->uapi.plane->dev); - const struct drm_framebuffer *fb = plane_state->hw.fb; - const struct drm_i915_gem_object *obj = intel_fb_obj(fb); u32 base; if (DISPLAY_INFO(dev_priv)->cursor_needs_physical) - base = sg_dma_address(obj->mm.pages->sgl); + base = plane_state->phys_dma_addr; else base = intel_plane_ggtt_offset(plane_state); diff --git a/sys/dev/pci/drm/i915/display/intel_display_types.h b/sys/dev/pci/drm/i915/display/intel_display_types.h index 310d718f7..50fa55381 100644 --- a/sys/dev/pci/drm/i915/display/intel_display_types.h +++ b/sys/dev/pci/drm/i915/display/intel_display_types.h @@ -701,6 +701,7 @@ struct intel_plane_state { #define PLANE_HAS_FENCE BIT(0) struct intel_fb_view view; + u32 phys_dma_addr; /* for cursor_needs_physical */ /* Plane pxp decryption state */ bool decrypt; diff --git a/sys/dev/pci/drm/i915/display/intel_fb_pin.c b/sys/dev/pci/drm/i915/display/intel_fb_pin.c index fffd56807..a13165675 100644 --- a/sys/dev/pci/drm/i915/display/intel_fb_pin.c +++ b/sys/dev/pci/drm/i915/display/intel_fb_pin.c @@ -254,6 +254,16 @@ int intel_plane_pin_fb(struct intel_plane_state *plane_state) return PTR_ERR(vma); plane_state->ggtt_vma = vma; + + /* + * Pre-populate the dma address before we enter the vblank + * evade critical section as i915_gem_object_get_dma_address() + * will trigger might_sleep() even if it won't actually sleep, + * which is the case when the fb has already been pinned. + */ + if (phys_cursor) + plane_state->phys_dma_addr = + i915_gem_object_get_dma_address(intel_fb_obj(fb), 0); } else { struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); diff --git a/sys/dev/pci/drm/i915/display/skl_universal_plane.c b/sys/dev/pci/drm/i915/display/skl_universal_plane.c index ffc15d278..d557ecd4e 100644 --- a/sys/dev/pci/drm/i915/display/skl_universal_plane.c +++ b/sys/dev/pci/drm/i915/display/skl_universal_plane.c @@ -20,6 +20,7 @@ #include "skl_scaler.h" #include "skl_universal_plane.h" #include "skl_watermark.h" +#include "gt/intel_gt.h" #include "pxp/intel_pxp.h" static const u32 skl_plane_formats[] = { @@ -2169,8 +2170,8 @@ static bool skl_plane_has_rc_ccs(struct drm_i915_private *i915, enum pipe pipe, enum plane_id plane_id) { /* Wa_14017240301 */ - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(to_gt(i915), IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(to_gt(i915), IP_VER(12, 71), STEP_A0, STEP_B0)) return false; /* Wa_22011186057 */ diff --git a/sys/dev/pci/drm/i915/gem/i915_gem_create.c b/sys/dev/pci/drm/i915/gem/i915_gem_create.c index d24c0ce88..19156ba4b 100644 --- a/sys/dev/pci/drm/i915/gem/i915_gem_create.c +++ b/sys/dev/pci/drm/i915/gem/i915_gem_create.c @@ -405,8 +405,8 @@ static int ext_set_pat(struct i915_user_extension __user *base, void *data) BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) != offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd)); - /* Limiting the extension only to Meteor Lake */ - if (!IS_METEORLAKE(i915)) + /* Limiting the extension only to Xe_LPG and beyond */ + if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 70)) return -ENODEV; if (copy_from_user(&ext, base, sizeof(ext))) diff --git a/sys/dev/pci/drm/i915/gt/gen8_engine_cs.c b/sys/dev/pci/drm/i915/gt/gen8_engine_cs.c index 7ad36198a..cddf8c16e 100644 --- a/sys/dev/pci/drm/i915/gt/gen8_engine_cs.c +++ b/sys/dev/pci/drm/i915/gt/gen8_engine_cs.c @@ -4,9 +4,9 @@ */ #include "gen8_engine_cs.h" -#include "i915_drv.h" #include "intel_engine_regs.h" #include "intel_gpu_commands.h" +#include "intel_gt.h" #include "intel_lrc.h" #include "intel_ring.h" @@ -226,8 +226,8 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) static int mtl_dummy_pipe_control(struct i915_request *rq) { /* Wa_14016712196 */ - if (IS_MTL_GRAPHICS_STEP(rq->i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(rq->i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) || + IS_DG2(rq->i915)) { u32 *cs; /* dummy PIPE_CONTROL + depth flush */ @@ -808,6 +808,7 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { struct drm_i915_private *i915 = rq->i915; + struct intel_gt *gt = rq->engine->gt; u32 flags = (PIPE_CONTROL_CS_STALL | PIPE_CONTROL_TLB_INVALIDATE | PIPE_CONTROL_TILE_CACHE_FLUSH | @@ -818,8 +819,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) PIPE_CONTROL_FLUSH_ENABLE); /* Wa_14016712196 */ - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915)) /* dummy PIPE_CONTROL + depth flush */ cs = gen12_emit_pipe_control(cs, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0); diff --git a/sys/dev/pci/drm/i915/gt/intel_engine_cs.c b/sys/dev/pci/drm/i915/gt/intel_engine_cs.c index 70a2557d3..8dab7b76f 100644 --- a/sys/dev/pci/drm/i915/gt/intel_engine_cs.c +++ b/sys/dev/pci/drm/i915/gt/intel_engine_cs.c @@ -1620,9 +1620,7 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine, * Wa_22011802037: Prior to doing a reset, ensure CS is * stopped, set ring stop bit and prefetch disable bit to halt CS */ - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(engine->i915) >= 11 && - GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) + if (intel_engine_reset_needs_wa_22011802037(engine->gt)) intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base), _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE)); diff --git a/sys/dev/pci/drm/i915/gt/intel_engine_pm.c b/sys/dev/pci/drm/i915/gt/intel_engine_pm.c index a95615b34..5a3a5b29d 100644 --- a/sys/dev/pci/drm/i915/gt/intel_engine_pm.c +++ b/sys/dev/pci/drm/i915/gt/intel_engine_pm.c @@ -21,7 +21,7 @@ static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - if (IS_METEORLAKE(i915) && engine->id == GSC0) { + if (MEDIA_VER(i915) >= 13 && engine->id == GSC0) { intel_uncore_write(engine->gt->uncore, RC_PSMI_CTRL_GSCCS, _MASKED_BIT_DISABLE(IDLE_MSG_DISABLE)); diff --git a/sys/dev/pci/drm/i915/gt/intel_execlists_submission.c b/sys/dev/pci/drm/i915/gt/intel_execlists_submission.c index c3afa0084..8bb1609b1 100644 --- a/sys/dev/pci/drm/i915/gt/intel_execlists_submission.c +++ b/sys/dev/pci/drm/i915/gt/intel_execlists_submission.c @@ -3006,9 +3006,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) * Wa_22011802037: In addition to stopping the cs, we need * to wait for any pending mi force wakeups */ - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(engine->i915) >= 11 && - GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) + if (intel_engine_reset_needs_wa_22011802037(engine->gt)) intel_engine_wait_for_pending_mi_fw(engine); engine->execlists.reset_ccid = active_ccid(engine); diff --git a/sys/dev/pci/drm/i915/gt/intel_gt.h b/sys/dev/pci/drm/i915/gt/intel_gt.h index 6c34547b5..6e63b4668 100644 --- a/sys/dev/pci/drm/i915/gt/intel_gt.h +++ b/sys/dev/pci/drm/i915/gt/intel_gt.h @@ -14,6 +14,37 @@ struct drm_i915_private; struct drm_printer; +/* + * Check that the GT is a graphics GT and has an IP version within the + * specified range (inclusive). + */ +#define IS_GFX_GT_IP_RANGE(gt, from, until) ( \ + BUILD_BUG_ON_ZERO((from) < IP_VER(2, 0)) + \ + BUILD_BUG_ON_ZERO((until) < (from)) + \ + ((gt)->type != GT_MEDIA && \ + GRAPHICS_VER_FULL((gt)->i915) >= (from) && \ + GRAPHICS_VER_FULL((gt)->i915) <= (until))) + +/* + * Check that the GT is a graphics GT with a specific IP version and has + * a stepping in the range [from, until). The lower stepping bound is + * inclusive, the upper bound is exclusive. The most common use-case of this + * macro is for checking bounds for workarounds, which usually have a stepping + * ("from") at which the hardware issue is first present and another stepping + * ("until") at which a hardware fix is present and the software workaround is + * no longer necessary. E.g., + * + * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) + * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B1, STEP_FOREVER) + * + * "STEP_FOREVER" can be passed as "until" for workarounds that have no upper + * stepping bound for the specified IP version. + */ +#define IS_GFX_GT_IP_STEP(gt, ipver, from, until) ( \ + BUILD_BUG_ON_ZERO((until) <= (from)) + \ + (IS_GFX_GT_IP_RANGE((gt), (ipver), (ipver)) && \ + IS_GRAPHICS_STEP((gt)->i915, (from), (until)))) + #define GT_TRACE(gt, fmt, ...) do { \ const struct intel_gt *gt__ __maybe_unused = (gt); \ GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \ diff --git a/sys/dev/pci/drm/i915/gt/intel_gt_mcr.c b/sys/dev/pci/drm/i915/gt/intel_gt_mcr.c index 91e4f9746..0ef2f046a 100644 --- a/sys/dev/pci/drm/i915/gt/intel_gt_mcr.c +++ b/sys/dev/pci/drm/i915/gt/intel_gt_mcr.c @@ -3,8 +3,7 @@ * Copyright © 2022 Intel Corporation */ -#include "i915_drv.h" - +#include "intel_gt.h" #include "intel_gt_mcr.h" #include "intel_gt_print.h" #include "intel_gt_regs.h" @@ -166,8 +165,8 @@ void intel_gt_mcr_init(struct intel_gt *gt) gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table; } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { /* Wa_14016747170 */ - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) fuse = REG_FIELD_GET(MTL_GT_L3_EXC_MASK, intel_uncore_read(gt->uncore, MTL_GT_ACTIVITY_FACTOR)); diff --git a/sys/dev/pci/drm/i915/gt/intel_lrc.c b/sys/dev/pci/drm/i915/gt/intel_lrc.c index 0566c6749..881c3e58d 100644 --- a/sys/dev/pci/drm/i915/gt/intel_lrc.c +++ b/sys/dev/pci/drm/i915/gt/intel_lrc.c @@ -1321,29 +1321,6 @@ gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) return cs; } -/* - * On DG2 during context restore of a preempted context in GPGPU mode, - * RCS restore hang is detected. This is extremely timing dependent. - * To address this below sw wabb is implemented for DG2 A steppings. - */ -static u32 * -dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs) -{ - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG(ce->engine->mmio_base)); - *cs++ = 0x21; - - *cs++ = MI_LOAD_REGISTER_REG; - *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base)); - *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT1); - - *cs++ = MI_LOAD_REGISTER_REG; - *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base)); - *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT2); - - return cs; -} - /* * The bspec's tuning guide asks us to program a vertical watermark value of * 0x3FF. However this register is not saved/restored properly by the @@ -1368,21 +1345,15 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_cmd_buf_wa(ce, cs); cs = gen12_emit_restore_scratch(ce, cs); - /* Wa_22011450934:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0)) - cs = dg2_emit_rcs_hang_wabb(ce, cs); - /* Wa_16013000631:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(ce->engine->i915)) + if (IS_DG2_G11(ce->engine->i915)) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); cs = gen12_emit_aux_table_inv(ce->engine, cs); /* Wa_16014892111 */ - if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(ce->engine->i915, P, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) || IS_DG2(ce->engine->i915)) cs = dg2_emit_draw_watermark_setting(cs); @@ -1396,8 +1367,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_restore_scratch(ce, cs); /* Wa_16013000631:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(ce->engine->i915)) + if (IS_DG2_G11(ce->engine->i915)) if (ce->engine->class == COMPUTE_CLASS) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, diff --git a/sys/dev/pci/drm/i915/gt/intel_mocs.c b/sys/dev/pci/drm/i915/gt/intel_mocs.c index 2c0144072..07269ff3b 100644 --- a/sys/dev/pci/drm/i915/gt/intel_mocs.c +++ b/sys/dev/pci/drm/i915/gt/intel_mocs.c @@ -404,18 +404,6 @@ static const struct drm_i915_mocs_entry dg2_mocs_table[] = { MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), }; -static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = { - /* Wa_14011441408: Set Go to Memory for MOCS#0 */ - MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), - /* UC - Coherent; GO:Memory */ - MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), - /* UC - Non-Coherent; GO:Memory */ - MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), - - /* WB - LC */ - MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), -}; - static const struct drm_i915_mocs_entry pvc_mocs_table[] = { /* Error */ MOCS_ENTRY(0, 0, L3_3_WB), @@ -507,7 +495,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); table->unused_entries_index = I915_MOCS_PTE; - if (IS_METEORLAKE(i915)) { + if (IS_GFX_GT_IP_RANGE(&i915->gt0, IP_VER(12, 70), IP_VER(12, 71))) { table->size = ARRAY_SIZE(mtl_mocs_table); table->table = mtl_mocs_table; table->n_entries = MTL_NUM_MOCS_ENTRIES; @@ -521,13 +509,8 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->wb_index = 2; table->unused_entries_index = 2; } else if (IS_DG2(i915)) { - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { - table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax); - table->table = dg2_mocs_table_g10_ax; - } else { - table->size = ARRAY_SIZE(dg2_mocs_table); - table->table = dg2_mocs_table; - } + table->size = ARRAY_SIZE(dg2_mocs_table); + table->table = dg2_mocs_table; table->uc_index = 1; table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->unused_entries_index = 3; diff --git a/sys/dev/pci/drm/i915/gt/intel_rc6.c b/sys/dev/pci/drm/i915/gt/intel_rc6.c index ccdc1afbf..9e113e947 100644 --- a/sys/dev/pci/drm/i915/gt/intel_rc6.c +++ b/sys/dev/pci/drm/i915/gt/intel_rc6.c @@ -118,14 +118,12 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_EI_MODE(1); /* - * Wa_16011777198 and BSpec 52698 - Render powergating must be off. + * BSpec 52698 - Render powergating must be off. * FIXME BSpec is outdated, disabling powergating for MTL is just * temporary wa and should be removed after fixing real cause * of forcewake timeouts. */ - if (IS_METEORLAKE(gt->i915) || - IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) pg_enable = GEN9_MEDIA_PG_ENABLE | GEN11_MEDIA_SAMPLER_PG_ENABLE; diff --git a/sys/dev/pci/drm/i915/gt/intel_reset.c b/sys/dev/pci/drm/i915/gt/intel_reset.c index a099dd676..eb8c90aad 100644 --- a/sys/dev/pci/drm/i915/gt/intel_reset.c +++ b/sys/dev/pci/drm/i915/gt/intel_reset.c @@ -705,7 +705,7 @@ static int __reset_guc(struct intel_gt *gt) static bool needs_wa_14015076503(struct intel_gt *gt, intel_engine_mask_t engine_mask) { - if (!IS_METEORLAKE(gt->i915) || !HAS_ENGINE(gt, GSC0)) + if (MEDIA_VER_FULL(gt->i915) != IP_VER(13, 0) || !HAS_ENGINE(gt, GSC0)) return false; if (!__HAS_ENGINE(engine_mask, GSC0)) @@ -1646,6 +1646,24 @@ void __intel_fini_wedge(struct intel_wedge_me *w) w->gt = NULL; } +/* + * Wa_22011802037 requires that we (or the GuC) ensure that no command + * streamers are executing MI_FORCE_WAKE while an engine reset is initiated. + */ +bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt) +{ + if (GRAPHICS_VER(gt->i915) < 11) + return false; + + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0)) + return true; + + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + return false; + + return true; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_reset.c" #include "selftest_hangcheck.c" diff --git a/sys/dev/pci/drm/i915/gt/intel_reset.h b/sys/dev/pci/drm/i915/gt/intel_reset.h index 25c975b6e..f615b30b8 100644 --- a/sys/dev/pci/drm/i915/gt/intel_reset.h +++ b/sys/dev/pci/drm/i915/gt/intel_reset.h @@ -78,4 +78,6 @@ void __intel_fini_wedge(struct intel_wedge_me *w); bool intel_has_gpu_reset(const struct intel_gt *gt); bool intel_has_reset_engine(const struct intel_gt *gt); +bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt); + #endif /* I915_RESET_H */ diff --git a/sys/dev/pci/drm/i915/gt/intel_rps.c b/sys/dev/pci/drm/i915/gt/intel_rps.c index 37f70f034..1f90be957 100644 --- a/sys/dev/pci/drm/i915/gt/intel_rps.c +++ b/sys/dev/pci/drm/i915/gt/intel_rps.c @@ -1163,7 +1163,7 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c { struct drm_i915_private *i915 = rps_to_i915(rps); - if (IS_METEORLAKE(i915)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) return mtl_get_freq_caps(rps, caps); else return __gen6_rps_get_freq_caps(rps, caps); diff --git a/sys/dev/pci/drm/i915/gt/intel_workarounds.c b/sys/dev/pci/drm/i915/gt/intel_workarounds.c index 3ae0dbd39..0ea52f77b 100644 --- a/sys/dev/pci/drm/i915/gt/intel_workarounds.c +++ b/sys/dev/pci/drm/i915/gt/intel_workarounds.c @@ -764,39 +764,15 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, { dg2_ctx_gt_tuning_init(engine, wal); - /* Wa_16011186671:dg2_g11 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { - wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH); - wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE); - } - - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { - /* Wa_14010469329:dg2_g10 */ - wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, - XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE); - - /* - * Wa_22010465075:dg2_g10 - * Wa_22010613112:dg2_g10 - * Wa_14010698770:dg2_g10 - */ - wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, - GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); - } - /* Wa_16013271637:dg2 */ wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1, MSC_MSAA_REODER_BUF_BYPASS_DISABLE); /* Wa_14014947963:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) - wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); + wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); /* Wa_18018764978:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) || - IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) - wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL); + wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL); /* Wa_15010599737:dg2 */ wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); @@ -805,27 +781,32 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); } -static void mtl_ctx_gt_tuning_init(struct intel_engine_cs *engine, - struct i915_wa_list *wal) +static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { - struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; dg2_ctx_gt_tuning_init(engine, wal); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER)) + /* + * Due to Wa_16014892111, the DRAW_WATERMARK tuning must be done in + * gen12_emit_indirect_ctx_rcs() rather than here on some early + * steppings. + */ + if (!(IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))) wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false); } -static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine, - struct i915_wa_list *wal) +static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { - struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; - mtl_ctx_gt_tuning_init(engine, wal); + xelpg_ctx_gt_tuning_init(engine, wal); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { /* Wa_14014947963 */ wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); @@ -931,8 +912,8 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) goto done; - if (IS_METEORLAKE(i915)) - mtl_ctx_workarounds_init(engine, wal); + if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74))) + xelpg_ctx_workarounds_init(engine, wal); else if (IS_PONTEVECCHIO(i915)) ; /* noop; none at this time */ else if (IS_DG2(i915)) @@ -1606,31 +1587,11 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) static void dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - struct intel_engine_cs *engine; - int id; - xehp_init_mcr(gt, wal); /* Wa_14011060649:dg2 */ wa_14011060649(gt, wal); - /* - * Although there are per-engine instances of these registers, - * they technically exist outside the engine itself and are not - * impacted by engine resets. Furthermore, they're part of the - * GuC blacklist so trying to treat them as engine workarounds - * will result in GuC initialization failure and a wedged GPU. - */ - for_each_engine(engine, gt, id) { - if (engine->class != VIDEO_DECODE_CLASS) - continue; - - /* Wa_16010515920:dg2_g10 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) - wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base), - ALNUNIT_CLKGATE_DIS); - } - if (IS_DG2_G10(gt->i915)) { /* Wa_22010523718:dg2 */ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, @@ -1641,65 +1602,6 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) DSS_ROUTER_CLKGATE_DIS); } - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) { - /* Wa_14012362059:dg2 */ - wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); - } - - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) { - /* Wa_14010948348:dg2_g10 */ - wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS); - - /* Wa_14011037102:dg2_g10 */ - wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS); - - /* Wa_14011371254:dg2_g10 */ - wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS); - - /* Wa_14011431319:dg2_g10 */ - wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | - GAMTLBVDBOX7_CLKGATE_DIS | - GAMTLBVDBOX6_CLKGATE_DIS | - GAMTLBVDBOX5_CLKGATE_DIS | - GAMTLBVDBOX4_CLKGATE_DIS | - GAMTLBVDBOX3_CLKGATE_DIS | - GAMTLBVDBOX2_CLKGATE_DIS | - GAMTLBVDBOX1_CLKGATE_DIS | - GAMTLBVDBOX0_CLKGATE_DIS | - GAMTLBKCR_CLKGATE_DIS | - GAMTLBGUC_CLKGATE_DIS | - GAMTLBBLT_CLKGATE_DIS); - wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS | - GAMTLBGFXA1_CLKGATE_DIS | - GAMTLBCOMPA0_CLKGATE_DIS | - GAMTLBCOMPA1_CLKGATE_DIS | - GAMTLBCOMPB0_CLKGATE_DIS | - GAMTLBCOMPB1_CLKGATE_DIS | - GAMTLBCOMPC0_CLKGATE_DIS | - GAMTLBCOMPC1_CLKGATE_DIS | - GAMTLBCOMPD0_CLKGATE_DIS | - GAMTLBCOMPD1_CLKGATE_DIS | - GAMTLBMERT_CLKGATE_DIS | - GAMTLBVEBOX3_CLKGATE_DIS | - GAMTLBVEBOX2_CLKGATE_DIS | - GAMTLBVEBOX1_CLKGATE_DIS | - GAMTLBVEBOX0_CLKGATE_DIS); - - /* Wa_14010569222:dg2_g10 */ - wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, - GAMEDIA_CLKGATE_DIS); - - /* Wa_14011028019:dg2_g10 */ - wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); - - /* Wa_14010680813:dg2_g10 */ - wa_mcr_write_or(wal, XEHP_GAMSTLB_CTRL, - CONTROL_BLOCK_CLKGATE_DIS | - EGRESS_BLOCK_CLKGATE_DIS | - TAG_BLOCK_CLKGATE_DIS); - } - /* Wa_14014830051:dg2 */ wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); @@ -1741,14 +1643,15 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) static void xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - /* Wa_14018778641 / Wa_18018781329 */ + /* Wa_14018575942 / Wa_18018781329 */ + wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); /* Wa_22016670082 */ wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE); - if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { /* Wa_14014830051 */ wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); @@ -1791,10 +1694,8 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) */ static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal) { - if (IS_METEORLAKE(gt->i915)) { - if (gt->type != GT_MEDIA) - wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); - + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) { + wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); } @@ -1826,7 +1727,7 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) return; } - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) xelpg_gt_workarounds_init(gt, wal); else if (IS_PONTEVECCHIO(i915)) pvc_gt_workarounds_init(gt, wal); @@ -2242,29 +2143,10 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine) switch (engine->class) { case RENDER_CLASS: - /* - * Wa_1507100340:dg2_g10 - * - * This covers 4 registers which are next to one another : - * - PS_INVOCATION_COUNT - * - PS_INVOCATION_COUNT_UDW - * - PS_DEPTH_COUNT - * - PS_DEPTH_COUNT_UDW - */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) - whitelist_reg_ext(w, PS_INVOCATION_COUNT, - RING_FORCE_TO_NONPRIV_ACCESS_RD | - RING_FORCE_TO_NONPRIV_RANGE_4); - /* Required by recommended tuning setting (not a workaround) */ whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3); break; - case COMPUTE_CLASS: - /* Wa_16011157294:dg2_g10 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) - whitelist_reg(w, GEN9_CTX_PREEMPT_REG); - break; default: break; } @@ -2294,7 +2176,7 @@ static void pvc_whitelist_build(struct intel_engine_cs *engine) blacklist_trtt(engine); } -static void mtl_whitelist_build(struct intel_engine_cs *engine) +static void xelpg_whitelist_build(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; @@ -2316,8 +2198,10 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) wa_init_start(w, engine->gt, "whitelist", engine->name); - if (IS_METEORLAKE(i915)) - mtl_whitelist_build(engine); + if (engine->gt->type == GT_MEDIA) + ; /* none yet */ + else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74))) + xelpg_whitelist_build(engine); else if (IS_PONTEVECCHIO(i915)) pvc_whitelist_build(engine); else if (IS_DG2(i915)) @@ -2415,62 +2299,35 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } } -static bool needs_wa_1308578152(struct intel_engine_cs *engine) -{ - return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >= - GEN_DSS_PER_GSLICE; -} - static void rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { /* Wa_22014600077 */ wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, ENABLE_EU_COUNT_FOR_TDL_FLUSH); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || + IS_DG2(i915)) { /* Wa_1509727124 */ wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915) || - IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_DG2(i915)) { /* Wa_22012856258 */ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_READ_SUPPRESSION); } - if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { - /* Wa_14013392000:dg2_g11 */ - wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); - } - - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { - /* Wa_14012419201:dg2 */ - wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, - GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); - } - - /* Wa_1308578152:dg2_g10 when first gslice is fused off */ - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) && - needs_wa_1308578152(engine)) { - wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON, - GEN12_REPLAY_MODE_GRANULARITY); - } - - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + if (IS_DG2(i915)) { /* * Wa_22010960976:dg2 * Wa_14013347512:dg2 @@ -2479,34 +2336,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { - /* - * Wa_1608949956:dg2_g10 - * Wa_14010198302:dg2_g10 - */ - wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, - MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE); - } - - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) - /* Wa_22010430635:dg2 */ - wa_mcr_masked_en(wal, - GEN9_ROW_CHICKEN4, - GEN12_DISABLE_GRF_CLEAR); - - /* Wa_14013202645:dg2 */ - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) - wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY); - - /* Wa_22012532006:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) - wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, - DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); - - if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) || - IS_DG2_G10(i915)) { + if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) { /* Wa_22014600077:dg2 */ wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), @@ -2514,6 +2344,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) true); } + if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || + IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { + /* + * Wa_1606700617:tgl,dg1,adl-p + * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p + * Wa_14010826681:tgl,dg1,rkl,adl-p + * Wa_18019627453:dg2 + */ + wa_masked_en(wal, + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); + } + if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */ @@ -2527,19 +2370,11 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) */ wa_write_or(wal, GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE); - } - if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) || - IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { - /* - * Wa_1606700617:tgl,dg1,adl-p - * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p - * Wa_14010826681:tgl,dg1,rkl,adl-p - * Wa_18019627453:dg2 - */ - wa_masked_en(wal, - GEN9_CS_DEBUG_MODE1, - FF_DOP_CLOCK_GATE_DISABLE); + /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ + wa_mcr_masked_en(wal, + GEN10_SAMPLER_MODE, + ENABLE_SMALLPL); } if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || @@ -2566,14 +2401,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_RC_SEMA_IDLE_MSG_DISABLE); } - if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || - IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) { - /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ - wa_mcr_masked_en(wal, - GEN10_SAMPLER_MODE, - ENABLE_SMALLPL); - } - if (GRAPHICS_VER(i915) == 11) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, @@ -2975,10 +2802,12 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * function invoked by __intel_engine_init_ctx_wa(). */ static void -add_render_compute_tuning_settings(struct drm_i915_private *i915, +add_render_compute_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal) { - if (IS_METEORLAKE(i915) || IS_DG2(i915)) + struct drm_i915_private *i915 = gt->i915; + + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915)) wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); /* @@ -3007,8 +2836,9 @@ static void general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; - add_render_compute_tuning_settings(i915, wal); + add_render_compute_tuning_settings(gt, wal); if (GRAPHICS_VER(i915) >= 11) { /* This is not a Wa (although referred to as @@ -3029,13 +2859,14 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER) || + IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74))) /* Wa_14017856879 */ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) /* * Wa_14017066071 * Wa_14017654203 @@ -3043,37 +2874,47 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, MTL_DISABLE_SAMPLER_SC_OOO); - if (IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) /* Wa_22015279794 */ wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_PREFETCH_INTO_IC); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || + IS_DG2(i915)) { /* Wa_22013037850 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DISABLE_128B_EVICTION_COMMAND_UDW); + + /* Wa_18017747507 */ + wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || IS_PONTEVECCHIO(i915) || IS_DG2(i915)) { /* Wa_22014226127 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || - IS_DG2(i915)) { - /* Wa_18017747507 */ - wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); + if (IS_PONTEVECCHIO(i915) || IS_DG2(i915)) { + /* Wa_14015227452:dg2,pvc */ + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); + + /* Wa_16015675438:dg2,pvc */ + wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(i915)) { + if (IS_DG2(i915)) { + /* + * Wa_16011620976:dg2_g11 + * Wa_22015475538:dg2 + */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); + } + + if (IS_DG2_G11(i915)) { /* * Wa_22012826095:dg2 * Wa_22013059131:dg2 @@ -3085,18 +2926,18 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li /* Wa_22013059131:dg2 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT); - } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { /* - * Wa_14010918519:dg2_g10 + * Wa_22012654132 * - * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping, - * so ignoring verification. + * Note that register 0xE420 is write-only and cannot be read + * back for verification on DG2 (due to Wa_14012342262), so + * we need to explicitly skip the readback. */ - wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0, - FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, - 0, false); + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), + 0 /* write-only, so skip validation */, + true); } if (IS_XEHPSDV(i915)) { @@ -3114,35 +2955,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); } - - if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) { - /* Wa_14015227452:dg2,pvc */ - wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); - - /* Wa_16015675438:dg2,pvc */ - wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); - } - - if (IS_DG2(i915)) { - /* - * Wa_16011620976:dg2_g11 - * Wa_22015475538:dg2 - */ - wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); - } - - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915)) - /* - * Wa_22012654132 - * - * Note that register 0xE420 is write-only and cannot be read - * back for verification on DG2 (due to Wa_14012342262), so - * we need to explicitly skip the readback. - */ - wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), - 0 /* write-only, so skip validation */, - true); } static void diff --git a/sys/dev/pci/drm/i915/gt/uc/intel_guc.c b/sys/dev/pci/drm/i915/gt/uc/intel_guc.c index 633aa606e..c18cee961 100644 --- a/sys/dev/pci/drm/i915/gt/uc/intel_guc.c +++ b/sys/dev/pci/drm/i915/gt/uc/intel_guc.c @@ -272,18 +272,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50)) flags |= GUC_WA_POLLCS; - /* Wa_16011759253:dg2_g10:a0 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) - flags |= GUC_WA_GAM_CREDITS; - /* Wa_14014475959 */ - if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_DG2(gt->i915)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; /* - * Wa_14012197797:dg2_g10:a0,dg2_g11:a0 - * Wa_22011391025:dg2_g10,dg2_g11,dg2_g12 + * Wa_14012197797 + * Wa_22011391025 * * The same WA bit is used for both and 22011391025 is applicable to * all DG2. @@ -292,22 +288,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) flags |= GUC_WA_DUAL_QUEUE; /* Wa_22011802037: graphics version 11/12 */ - if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(gt->i915) >= 11 && - GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70))) + if (intel_engine_reset_needs_wa_22011802037(gt)) flags |= GUC_WA_PRE_PARSER; - /* Wa_16011777198:dg2 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) - flags |= GUC_WA_RCS_RESET_BEFORE_RC6; - /* - * Wa_22012727170:dg2_g10[a0-c0), dg2_g11[a0..) - * Wa_22012727685:dg2_g11[a0..) + * Wa_22012727170 + * Wa_22012727685 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER)) + if (IS_DG2_G11(gt->i915)) flags |= GUC_WA_CONTEXT_ISOLATION; /* Wa_16015675438 */ diff --git a/sys/dev/pci/drm/i915/gt/uc/intel_guc_submission.c b/sys/dev/pci/drm/i915/gt/uc/intel_guc_submission.c index 2723b7af5..2c9de9a67 100644 --- a/sys/dev/pci/drm/i915/gt/uc/intel_guc_submission.c +++ b/sys/dev/pci/drm/i915/gt/uc/intel_guc_submission.c @@ -1690,9 +1690,7 @@ static void guc_engine_reset_prepare(struct intel_engine_cs *engine) * Wa_22011802037: In addition to stopping the cs, we need * to wait for any pending mi force wakeups */ - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(engine->i915) >= 11 && - GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) { + if (intel_engine_reset_needs_wa_22011802037(engine->gt)) { intel_engine_stop_cs(engine); intel_engine_wait_for_pending_mi_fw(engine); } @@ -4299,7 +4297,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) /* Wa_14014475959:dg2 */ if (engine->class == COMPUTE_CLASS) - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_DG2(engine->i915)) engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; diff --git a/sys/dev/pci/drm/i915/i915_debugfs.c b/sys/dev/pci/drm/i915/i915_debugfs.c index 4de44cf10..7a90a2e32 100644 --- a/sys/dev/pci/drm/i915/i915_debugfs.c +++ b/sys/dev/pci/drm/i915/i915_debugfs.c @@ -144,7 +144,7 @@ static const char *i915_cache_level_str(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = obj_to_i915(obj); - if (IS_METEORLAKE(i915)) { + if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 71))) { switch (obj->pat_index) { case 0: return " WB"; case 1: return " WT"; diff --git a/sys/dev/pci/drm/i915/i915_drv.h b/sys/dev/pci/drm/i915/i915_drv.h index f2ccc7dcb..577617ead 100644 --- a/sys/dev/pci/drm/i915/i915_drv.h +++ b/sys/dev/pci/drm/i915/i915_drv.h @@ -735,10 +735,6 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_XEHPSDV_GRAPHICS_STEP(__i915, since, until) \ (IS_XEHPSDV(__i915) && IS_GRAPHICS_STEP(__i915, since, until)) -#define IS_MTL_GRAPHICS_STEP(__i915, variant, since, until) \ - (IS_SUBPLATFORM(__i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_##variant) && \ - IS_GRAPHICS_STEP(__i915, since, until)) - #define IS_MTL_DISPLAY_STEP(__i915, since, until) \ (IS_METEORLAKE(__i915) && \ IS_DISPLAY_STEP(__i915, since, until)) diff --git a/sys/dev/pci/drm/i915/i915_perf.c b/sys/dev/pci/drm/i915/i915_perf.c index 92a6050cd..523681070 100644 --- a/sys/dev/pci/drm/i915/i915_perf.c +++ b/sys/dev/pci/drm/i915/i915_perf.c @@ -3285,11 +3285,10 @@ get_sseu_config(struct intel_sseu *out_sseu, */ u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915) { - /* - * Wa_18013179988:dg2 - * Wa_14015846243:mtl - */ - if (IS_DG2(i915) || IS_METEORLAKE(i915)) { + struct intel_gt *gt = to_gt(i915); + + /* Wa_18013179988 */ + if (IS_DG2(i915) || IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) { intel_wakeref_t wakeref; u32 reg, shift; @@ -4607,7 +4606,7 @@ static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - if (IS_METEORLAKE(perf->i915)) + if (GRAPHICS_VER_FULL(perf->i915) >= IP_VER(12, 70)) return reg_in_range_table(addr, mtl_oa_mux_regs); else return reg_in_range_table(addr, gen12_oa_mux_regs); diff --git a/sys/dev/pci/drm/i915/intel_clock_gating.c b/sys/dev/pci/drm/i915/intel_clock_gating.c index 81a4d3273..c66eb6abd 100644 --- a/sys/dev/pci/drm/i915/intel_clock_gating.c +++ b/sys/dev/pci/drm/i915/intel_clock_gating.c @@ -396,14 +396,6 @@ static void dg2_init_clock_gating(struct drm_i915_private *i915) /* Wa_22010954014:dg2 */ intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS); - - /* - * Wa_14010733611:dg2_g10 - * Wa_22010146351:dg2_g10 - */ - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) - intel_uncore_rmw(&i915->uncore, XEHP_CLOCK_GATE_DIS, 0, - SGR_DIS | SGGI_DIS); } static void pvc_init_clock_gating(struct drm_i915_private *i915) diff --git a/sys/dev/pv/if_vio.c b/sys/dev/pv/if_vio.c index db399e719..9ad8e36e3 100644 --- a/sys/dev/pv/if_vio.c +++ b/sys/dev/pv/if_vio.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_vio.c,v 1.31 2024/02/14 22:41:48 bluhm Exp $ */ +/* $OpenBSD: if_vio.c,v 1.32 2024/04/10 19:55:50 jan Exp $ */ /* * Copyright (c) 2012 Stefan Fritsch, Alexander Fiveg. @@ -43,12 +43,12 @@ #include #include +#include -#include #include -#include -#include #include +#include +#include #include #if NBPFILTER > 0 @@ -537,6 +537,9 @@ vio_attach(struct device *parent, struct device *self, void *aux) VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_CSUM | VIRTIO_F_RING_EVENT_IDX | VIRTIO_NET_F_GUEST_CSUM; + vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO4; + vsc->sc_driver_features |= VIRTIO_NET_F_HOST_TSO6; + virtio_negotiate_features(vsc, virtio_net_feature_names); if (virtio_has_feature(vsc, VIRTIO_NET_F_MAC)) { vio_get_lladdr(&sc->sc_ac, vsc); @@ -553,9 +556,9 @@ vio_attach(struct device *parent, struct device *self, void *aux) sc->sc_hdr_size = offsetof(struct virtio_net_hdr, num_buffers); } if (virtio_has_feature(vsc, VIRTIO_NET_F_MRG_RXBUF)) - ifp->if_hardmtu = 16000; /* arbitrary limit */ + ifp->if_hardmtu = MAXMCLBYTES; else - ifp->if_hardmtu = MCLBYTES - sc->sc_hdr_size - ETHER_HDR_LEN; + ifp->if_hardmtu = MAXMCLBYTES - sc->sc_hdr_size - ETHER_HDR_LEN; if (virtio_alloc_vq(vsc, &sc->sc_vq[VQRX], 0, MCLBYTES, 2, "rx") != 0) goto err; @@ -595,6 +598,10 @@ vio_attach(struct device *parent, struct device *self, void *aux) if (virtio_has_feature(vsc, VIRTIO_NET_F_CSUM)) ifp->if_capabilities |= IFCAP_CSUM_TCPv4|IFCAP_CSUM_UDPv4| IFCAP_CSUM_TCPv6|IFCAP_CSUM_UDPv6; + if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO4)) + ifp->if_capabilities |= IFCAP_TSOv4; + if (virtio_has_feature(vsc, VIRTIO_NET_F_HOST_TSO6)) + ifp->if_capabilities |= IFCAP_TSOv6; ifq_init_maxlen(&ifp->if_snd, vsc->sc_vqs[1].vq_num - 1); ifmedia_init(&sc->sc_media, 0, vio_media_change, vio_media_status); ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL); @@ -714,6 +721,85 @@ vio_stop(struct ifnet *ifp, int disable) } } +static inline uint16_t +vio_cksum_update(uint32_t cksum, uint16_t paylen) +{ + /* Add payload length */ + cksum += paylen; + + /* Fold back to 16 bit */ + cksum += cksum >> 16; + + return (uint16_t)(cksum); +} + +void +vio_tx_offload(struct virtio_net_hdr *hdr, struct mbuf *m) +{ + struct ether_extracted ext; + + /* + * Checksum Offload + */ + + if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_CSUM_OUT) && + !ISSET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT)) + return; + + ether_extract_headers(m, &ext); + + /* Consistency Checks */ + if ((!ext.ip4 && !ext.ip6) || (!ext.tcp && !ext.udp)) + return; + + if ((ext.tcp && !ISSET(m->m_pkthdr.csum_flags, M_TCP_CSUM_OUT)) || + (ext.udp && !ISSET(m->m_pkthdr.csum_flags, M_UDP_CSUM_OUT))) + return; + + hdr->csum_start = sizeof(*ext.eh); +#if NVLAN > 0 + if (ext.evh) + hdr->csum_start = sizeof(*ext.evh); +#endif + hdr->csum_start += ext.iphlen; + + if (ext.tcp) + hdr->csum_offset = offsetof(struct tcphdr, th_sum); + else if (ext.udp) + hdr->csum_offset = offsetof(struct udphdr, uh_sum); + + hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + + /* + * TCP Segmentation Offload + */ + + if (!ISSET(m->m_pkthdr.csum_flags, M_TCP_TSO)) + return; + + if (!ext.tcp) { + tcpstat_inc(tcps_outbadtso); + return; + } + + hdr->hdr_len = hdr->csum_start + ext.tcphlen; + hdr->gso_size = m->m_pkthdr.ph_mss; + + if (ext.ip4) + hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; +#ifdef INET6 + else if (ext.ip6) + hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; +#endif + + /* VirtIO-Net need pseudo header cksum with IP-payload length for TSO */ + ext.tcp->th_sum = vio_cksum_update(ext.tcp->th_sum, + htons(ext.iplen - ext.iphlen)); + + tcpstat_add(tcps_outpkttso, + (ext.paylen + m->m_pkthdr.ph_mss - 1) / m->m_pkthdr.ph_mss); +} + void vio_start(struct ifnet *ifp) { @@ -750,24 +836,7 @@ again: hdr = &sc->sc_tx_hdrs[slot]; memset(hdr, 0, sc->sc_hdr_size); - if (m->m_pkthdr.csum_flags & (M_TCP_CSUM_OUT|M_UDP_CSUM_OUT)) { - struct ether_extracted ext; - - ether_extract_headers(m, &ext); - hdr->csum_start = sizeof(*ext.eh); -#if NVLAN > 0 - if (ext.evh) - hdr->csum_start = sizeof(*ext.evh); -#endif - if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) - hdr->csum_offset = offsetof(struct tcphdr, th_sum); - else - hdr->csum_offset = offsetof(struct udphdr, uh_sum); - - if (ext.ip4 || ext.ip6) - hdr->csum_start += ext.iphlen; - hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; - } + vio_tx_offload(hdr, m); r = vio_encap(sc, slot, m); if (r != 0) { diff --git a/sys/netinet/tcp_debug.c b/sys/netinet/tcp_debug.c index ae784dcec..751e69a89 100644 --- a/sys/netinet/tcp_debug.c +++ b/sys/netinet/tcp_debug.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_debug.c,v 1.31 2024/01/11 13:49:49 bluhm Exp $ */ +/* $OpenBSD: tcp_debug.c,v 1.32 2024/04/10 22:24:07 bluhm Exp $ */ /* $NetBSD: tcp_debug.c,v 1.10 1996/02/13 23:43:36 christos Exp $ */ /* @@ -79,6 +79,7 @@ #include #include #include +#include #include #include @@ -98,12 +99,23 @@ #include #endif /* INET6 */ +#ifdef TCPDEBUG +#include +#endif + +/* + * Locks used to protect struct members in this file: + * D TCP debug global mutex + */ + +struct mutex tcp_debug_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); + #ifdef TCPDEBUG int tcpconsdebug = 0; #endif -struct tcp_debug tcp_debug[TCP_NDEBUG]; -int tcp_debx; +struct tcp_debug tcp_debug[TCP_NDEBUG]; /* [D] */ +int tcp_debx; /* [D] */ /* * Tcp debug routines @@ -113,14 +125,20 @@ tcp_trace(short act, short ostate, struct tcpcb *tp, struct tcpcb *otp, caddr_t headers, int req, int len) { #ifdef TCPDEBUG + struct tcphdr *th; tcp_seq seq, ack; int flags; #endif int pf = PF_UNSPEC; - struct tcp_debug *td = &tcp_debug[tcp_debx++]; - struct tcpiphdr *ti = (struct tcpiphdr *)headers; - struct tcpipv6hdr *ti6 = (struct tcpipv6hdr *)headers; - struct tcphdr *th; + struct tcp_debug *td; + struct tcpiphdr *ti; + struct tcpipv6hdr *ti6; + + mtx_enter(&tcp_debug_mtx); + + td = &tcp_debug[tcp_debx++]; + ti = (struct tcpiphdr *)headers; + ti6 = (struct tcpipv6hdr *)headers; if (tcp_debx == TCP_NDEBUG) tcp_debx = 0; @@ -153,13 +171,17 @@ tcp_trace(short act, short ostate, struct tcpcb *tp, struct tcpcb *otp, switch (pf) { #ifdef INET6 case PF_INET6: +#ifdef TCPDEBUG th = &ti6->ti6_t; +#endif td->td_ti6 = *ti6; td->td_ti6.ti6_plen = len; break; #endif /* INET6 */ case PF_INET: +#ifdef TCPDEBUG th = &ti->ti_t; +#endif td->td_ti = *ti; td->td_ti.ti_len = len; break; @@ -172,7 +194,7 @@ tcp_trace(short act, short ostate, struct tcpcb *tp, struct tcpcb *otp, td->td_req = req; #ifdef TCPDEBUG if (tcpconsdebug == 0) - return; + goto done; if (otp) printf("%p %s:", otp, tcpstates[ostate]); else @@ -218,11 +240,14 @@ tcp_trace(short act, short ostate, struct tcpcb *tp, struct tcpcb *otp, /* print out internal state of tp !?! */ printf("\n"); if (tp == NULL) - return; + goto done; printf("\trcv_(nxt,wnd,up) (%x,%lx,%x) snd_(una,nxt,max) (%x,%x,%x)\n", tp->rcv_nxt, tp->rcv_wnd, tp->rcv_up, tp->snd_una, tp->snd_nxt, tp->snd_max); printf("\tsnd_(wl1,wl2,wnd) (%x,%x,%lx)\n", tp->snd_wl1, tp->snd_wl2, tp->snd_wnd); + + done: #endif /* TCPDEBUG */ + mtx_leave(&tcp_debug_mtx); } diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index a828508b1..7bd9e07e3 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_input.c,v 1.401 2024/02/13 12:22:09 bluhm Exp $ */ +/* $OpenBSD: tcp_input.c,v 1.402 2024/04/10 22:10:03 bluhm Exp $ */ /* $NetBSD: tcp_input.c,v 1.23 1996/02/13 23:43:44 christos Exp $ */ /* @@ -100,8 +100,6 @@ #include #endif -struct tcpiphdr tcp_saveti; - int tcp_mss_adv(struct mbuf *, int); int tcp_flush_queue(struct tcpcb *); @@ -109,8 +107,6 @@ int tcp_flush_queue(struct tcpcb *); #include #include -struct tcpipv6hdr tcp_saveti6; - /* for the packet header length in the mbuf */ #define M_PH_LEN(m) (((struct mbuf *)(m))->m_pkthdr.len) #define M_V6_LEN(m) (M_PH_LEN(m) - sizeof(struct ip6_hdr)) @@ -373,7 +369,13 @@ tcp_input(struct mbuf **mp, int *offp, int proto, int af) int todrop, acked, ourfinisacked; int hdroptlen = 0; short ostate; - caddr_t saveti; + union { + struct tcpiphdr tcpip; +#ifdef INET6 + struct tcpipv6hdr tcpip6; +#endif + char caddr; + } saveti; tcp_seq iss, *reuse = NULL; uint64_t now; u_long tiwin; @@ -672,15 +674,13 @@ findpcb: switch (af) { #ifdef INET6 case AF_INET6: - saveti = (caddr_t) &tcp_saveti6; - memcpy(&tcp_saveti6.ti6_i, ip6, sizeof(*ip6)); - memcpy(&tcp_saveti6.ti6_t, th, sizeof(*th)); + saveti.tcpip6.ti6_i = *ip6; + saveti.tcpip6.ti6_t = *th; break; #endif case AF_INET: - saveti = (caddr_t) &tcp_saveti; - memcpy(&tcp_saveti.ti_i, ip, sizeof(*ip)); - memcpy(&tcp_saveti.ti_t, th, sizeof(*th)); + memcpy(&saveti.tcpip.ti_i, ip, sizeof(*ip)); + saveti.tcpip.ti_t = *th; break; } } @@ -2031,7 +2031,7 @@ dodata: /* XXX */ } } if (otp) - tcp_trace(TA_INPUT, ostate, tp, otp, saveti, 0, tlen); + tcp_trace(TA_INPUT, ostate, tp, otp, &saveti.caddr, 0, tlen); /* * Return any desired output. @@ -2110,7 +2110,7 @@ drop: * Drop space held by incoming segment and return. */ if (otp) - tcp_trace(TA_DROP, ostate, tp, otp, saveti, 0, tlen); + tcp_trace(TA_DROP, ostate, tp, otp, &saveti.caddr, 0, tlen); m_freem(m); in_pcbunref(inp);