Source-Changes-HG archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
[xsrc/xorg]: xsrc/external/mit/MesaLib/dist/src Import mesa 19.1.7
details: https://anonhg.NetBSD.org/xsrc/rev/b7e5e7c8c532
branches: xorg
changeset: 10381:b7e5e7c8c532
user: maya <maya%NetBSD.org@localhost>
date: Tue Sep 24 16:49:45 2019 +0000
description:
Import mesa 19.1.7
New features in mesa 19.1.0:
GL_ARB_parallel_shader_compile on all drivers.
GL_EXT_gpu_shader4 on all GL 3.1 drivers.
GL_EXT_shader_image_load_formatted on radeonsi.
GL_EXT_texture_buffer_object on all GL 3.1 drivers.
GL_EXT_texture_compression_s3tc_srgb on Gallium drivers and i965 (ES extension).
GL_NV_compute_shader_derivatives on iris and i965.
GL_KHR_parallel_shader_compile on all drivers.
VK_EXT_buffer_device_address on Intel and RADV.
VK_EXT_depth_clip_enable on Intel and RADV.
VK_KHR_ycbcr_image_arrays on Intel.
VK_EXT_inline_uniform_block on Intel and RADV.
VK_EXT_external_memory_host on Intel.
VK_EXT_host_query_reset on Intel and RADV.
VK_KHR_surface_protected_capabilities on Intel and RADV.
VK_EXT_pipeline_creation_feedback on Intel and RADV.
VK_KHR_8bit_storage on RADV.
VK_AMD_gpu_shader_int16 on RADV.
VK_AMD_gpu_shader_half_float on RADV.
VK_NV_compute_shader_derivatives on Intel.
VK_KHR_shader_float16_int8 on Intel and RADV (RADV only supports int8).
VK_KHR_shader_atomic_int64 on Intel.
VK_EXT_descriptor_indexing on Intel.
VK_KHR_shader_float16_int8 on Intel and RADV.
GL_INTEL_conservative_rasterization on iris.
VK_EXT_memory_budget on Intel.
New features in mesa 19.0.0:
GL_AMD_texture_texture4 on all GL 4.0 drivers.
GL_EXT_shader_implicit_conversions on all drivers (ES extension).
GL_EXT_texture_compression_bptc on all GL 4.0 drivers (ES extension).
GL_EXT_texture_compression_rgtc on all GL 3.0 drivers (ES extension).
GL_EXT_render_snorm on gallium drivers (ES extension).
GL_EXT_texture_view on drivers supporting texture views (ES extension).
GL_OES_texture_view on drivers supporting texture views (ES extension).
GL_NV_shader_atomic_float on nvc0 (Fermi/Kepler only).
Shader-based software implementations of GL_ARB_gpu_shader_fp64, GL_ARB_gpu_shader_int64, GL_ARB_vertex_attrib_64bit, and GL_ARB_shader_ballot on i965.
VK_ANDROID_external_memory_android_hardware_buffer on Intel
Fixed and re-exposed VK_EXT_pci_bus_info on Intel and RADV
VK_EXT_scalar_block_layout on Intel and RADV
VK_KHR_depth_stencil_resolve on Intel
VK_KHR_draw_indirect_count on Intel
VK_EXT_conditional_rendering on Intel
VK_EXT_memory_budget on RADV
Also, bug fixes.
diffstat:
external/mit/MesaLib/dist/src/freedreno/ir3/ir3_ra.c | 1163 ++
external/mit/MesaLib/dist/src/freedreno/ir3/ir3_sched.c | 928 +
external/mit/MesaLib/dist/src/freedreno/ir3/ir3_shader.c | 472 +
external/mit/MesaLib/dist/src/freedreno/ir3/ir3_shader.h | 674 +
external/mit/MesaLib/dist/src/freedreno/ir3/ir3_sun.c | 111 +
external/mit/MesaLib/dist/src/freedreno/ir3/meson.build | 76 +
external/mit/MesaLib/dist/src/freedreno/registers/a2xx.xml.h | 3017 +++++
external/mit/MesaLib/dist/src/freedreno/registers/a3xx.xml.h | 3239 +++++
external/mit/MesaLib/dist/src/freedreno/registers/a4xx.xml.h | 4257 +++++++
external/mit/MesaLib/dist/src/freedreno/registers/a5xx.xml.h | 5226 +++++++++
external/mit/MesaLib/dist/src/freedreno/registers/a6xx.xml.h | 5772 ++++++++++
external/mit/MesaLib/dist/src/freedreno/registers/adreno_common.xml.h | 536 +
external/mit/MesaLib/dist/src/freedreno/registers/adreno_pm4.xml.h | 1570 ++
external/mit/MesaLib/dist/src/freedreno/registers/update-headers.sh | 14 +
external/mit/MesaLib/dist/src/freedreno/vulkan/.clang-format | 31 +
external/mit/MesaLib/dist/src/freedreno/vulkan/TODO | 1 +
external/mit/MesaLib/dist/src/freedreno/vulkan/meson.build | 145 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_android.c | 382 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_cmd_buffer.c | 2637 ++++
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_cs.c | 368 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_cs.h | 200 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_descriptor_set.c | 570 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_descriptor_set.h | 102 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_device.c | 2071 +++
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_drm.c | 194 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_entrypoints_gen.py | 509 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_extensions.py | 279 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_fence.c | 381 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_formats.c | 998 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_icd.py | 47 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_image.c | 380 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_meta_blit.c | 39 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_meta_buffer.c | 19 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_meta_clear.c | 53 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_meta_copy.c | 690 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_meta_resolve.c | 41 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_pass.c | 416 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_pipeline.c | 1896 +++
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_pipeline_cache.c | 422 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_private.h | 1556 ++
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_query.c | 122 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_shader.c | 339 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_util.c | 117 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_util.h | 11 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_wsi.c | 272 +
external/mit/MesaLib/dist/src/freedreno/vulkan/tu_wsi_wayland.c | 59 +
external/mit/MesaLib/dist/src/freedreno/vulkan/vk_format.h | 577 +
external/mit/MesaLib/dist/src/freedreno/vulkan/vk_format_layout.csv | 188 +
external/mit/MesaLib/dist/src/freedreno/vulkan/vk_format_parse.py | 388 +
external/mit/MesaLib/dist/src/freedreno/vulkan/vk_format_table.py | 173 +
external/mit/MesaLib/dist/src/gallium/Android.common.mk | 5 +-
external/mit/MesaLib/dist/src/gallium/Android.mk | 70 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/Android.mk | 29 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/Makefile.sources | 17 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/cso_cache/cso_context.c | 3 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/draw/draw_context.c | 2 +
external/mit/MesaLib/dist/src/gallium/auxiliary/draw/draw_gs.c | 198 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/draw/draw_gs.h | 21 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 2 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/draw/draw_pipe_stipple.c | 26 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/draw/draw_pipe_wide_point.c | 9 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/draw/draw_pt.c | 4 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/draw/draw_pt.h | 1 +
external/mit/MesaLib/dist/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 29 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c | 14 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/draw/draw_pt_so_emit.c | 62 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/draw/draw_vbuf.h | 1 +
external/mit/MesaLib/dist/src/gallium/auxiliary/draw/draw_vs.h | 3 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/draw/draw_vs_exec.c | 12 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/draw/draw_vs_llvm.c | 3 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/draw/draw_vs_variant.c | 4 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/driver_ddebug/dd_context.c | 57 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/driver_ddebug/dd_draw.c | 240 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/driver_ddebug/dd_pipe.h | 27 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/driver_ddebug/dd_screen.c | 22 +
external/mit/MesaLib/dist/src/gallium/auxiliary/driver_ddebug/dd_util.h | 66 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/driver_rbug/rbug_screen.c | 15 +
external/mit/MesaLib/dist/src/gallium/auxiliary/driver_trace/tr_context.c | 51 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/driver_trace/tr_screen.c | 39 +
external/mit/MesaLib/dist/src/gallium/auxiliary/gallivm/lp_bld_arit.c | 20 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/gallivm/lp_bld_debug.h | 9 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/gallivm/lp_bld_format.h | 6 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c | 5 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/gallivm/lp_bld_format_s3tc.c | 2266 +++
external/mit/MesaLib/dist/src/gallium/auxiliary/gallivm/lp_bld_init.c | 1 +
external/mit/MesaLib/dist/src/gallium/auxiliary/gallivm/lp_bld_intr.c | 10 +
external/mit/MesaLib/dist/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 18 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 530 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 9 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 41 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/hud/hud_cpu.c | 45 +
external/mit/MesaLib/dist/src/gallium/auxiliary/hud/hud_cpufreq.c | 1 +
external/mit/MesaLib/dist/src/gallium/auxiliary/hud/hud_diskstat.c | 8 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/hud/hud_nic.c | 12 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/hud/hud_sensors_temp.c | 4 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/indices/u_indices_gen.py | 20 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/meson.build | 20 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/nir/tgsi_to_nir.c | 529 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/nir/tgsi_to_nir.h | 7 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/pipe-loader/Android.mk | 46 +
external/mit/MesaLib/dist/src/gallium/auxiliary/pipe-loader/pipe_loader.c | 21 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/pipe-loader/pipe_loader.h | 10 -
external/mit/MesaLib/dist/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c | 65 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h | 3 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c | 7 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/pipebuffer/pb_buffer.h | 129 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 6 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 48 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 4 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c | 6 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 4 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c | 6 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/pipebuffer/pb_validate.c | 2 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/pipebuffer/pb_validate.h | 2 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/target-helpers/drm_helper.h | 161 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/target-helpers/drm_helper_public.h | 19 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/tgsi/tgsi_exec.c | 397 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/tgsi/tgsi_exec.h | 25 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/tgsi/tgsi_info_opcodes.h | 2 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/tgsi/tgsi_scan.c | 32 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/tgsi/tgsi_scan.h | 8 +
external/mit/MesaLib/dist/src/gallium/auxiliary/tgsi/tgsi_strings.c | 2 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/tgsi/tgsi_util.c | 1 +
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_bitmask.c | 145 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_bitmask.h | 40 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_blit.c | 6 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_blitter.c | 3 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_compute.c | 166 +
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_compute.h | 45 +
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_debug_memory.c | 4 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_debug_stack_android.cpp | 2 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_debug_symbol.c | 8 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_dump_defines.c | 2 +
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_dump_state.c | 2 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_file.h | 58 +
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_format.c | 125 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_format.csv | 11 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_format.h | 30 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_format_pack.py | 2 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_format_parse.py | 20 +
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_format_table.py | 4 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_format_tests.c | 6 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_format_zs.c | 6 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_framebuffer.c | 10 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_helpers.c | 63 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_helpers.h | 3 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_inlines.h | 22 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_log.c | 6 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_prim_restart.c | 2 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_range.h | 3 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_screen.c | 40 +
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_screen.h | 8 +
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_tests.c | 74 +
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_threaded_context.c | 11 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_upload_mgr.c | 33 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_upload_mgr.h | 4 +
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_vbuf.c | 10 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/util/u_vbuf.h | 3 +
external/mit/MesaLib/dist/src/gallium/auxiliary/vl/vl_compositor.c | 808 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/vl/vl_compositor.h | 10 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/vl/vl_compositor_cs.c | 469 +
external/mit/MesaLib/dist/src/gallium/auxiliary/vl/vl_compositor_cs.h | 56 +
external/mit/MesaLib/dist/src/gallium/auxiliary/vl/vl_compositor_gfx.c | 726 +
external/mit/MesaLib/dist/src/gallium/auxiliary/vl/vl_compositor_gfx.h | 88 +
external/mit/MesaLib/dist/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 6 -
external/mit/MesaLib/dist/src/gallium/auxiliary/vl/vl_mpeg12_decoder.c | 4 +-
external/mit/MesaLib/dist/src/gallium/auxiliary/vl/vl_winsys_dri3.c | 18 +-
external/mit/MesaLib/dist/src/gallium/docs/source/context.rst | 10 +
external/mit/MesaLib/dist/src/gallium/docs/source/screen.rst | 67 +-
external/mit/MesaLib/dist/src/gallium/docs/source/tgsi.rst | 33 +-
external/mit/MesaLib/dist/src/gallium/drivers/etnaviv/Android.mk | 41 +
external/mit/MesaLib/dist/src/gallium/drivers/etnaviv/Makefile.sources | 2 +
external/mit/MesaLib/dist/src/gallium/drivers/etnaviv/etnaviv_blend.c | 15 +-
external/mit/MesaLib/dist/src/gallium/drivers/etnaviv/etnaviv_blt.c | 2 +
external/mit/MesaLib/dist/src/gallium/meson.build | 41 +-
175 files changed, 50295 insertions(+), 2405 deletions(-)
diffs (truncated from 56938 to 300 lines):
diff -r 0719bd2ba458 -r b7e5e7c8c532 external/mit/MesaLib/dist/src/freedreno/ir3/ir3_ra.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/external/mit/MesaLib/dist/src/freedreno/ir3/ir3_ra.c Tue Sep 24 16:49:45 2019 +0000
@@ -0,0 +1,1163 @@
+/*
+ * Copyright (C) 2014 Rob Clark <robclark%freedesktop.org@localhost>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Rob Clark <robclark%freedesktop.org@localhost>
+ */
+
+#include "util/u_math.h"
+#include "util/register_allocate.h"
+#include "util/ralloc.h"
+#include "util/bitset.h"
+
+#include "ir3.h"
+#include "ir3_compiler.h"
+
+/*
+ * Register Assignment:
+ *
+ * Uses the register_allocate util, which implements graph coloring
+ * algo with interference classes. To handle the cases where we need
+ * consecutive registers (for example, texture sample instructions),
+ * we model these as larger (double/quad/etc) registers which conflict
+ * with the corresponding registers in other classes.
+ *
+ * Additionally we create additional classes for half-regs, which
+ * do not conflict with the full-reg classes. We do need at least
+ * sizes 1-4 (to deal w/ texture sample instructions output to half-
+ * reg). At the moment we don't create the higher order half-reg
+ * classes as half-reg frequently does not have enough precision
+ * for texture coords at higher resolutions.
+ *
+ * There are some additional cases that we need to handle specially,
+ * as the graph coloring algo doesn't understand "partial writes".
+ * For example, a sequence like:
+ *
+ * add r0.z, ...
+ * sam (f32)(xy)r0.x, ...
+ * ...
+ * sam (f32)(xyzw)r0.w, r0.x, ... ; 3d texture, so r0.xyz are coord
+ *
+ * In this scenario, we treat r0.xyz as class size 3, which is written
+ * (from a use/def perspective) at the 'add' instruction and ignore the
+ * subsequent partial writes to r0.xy. So the 'add r0.z, ...' is the
+ * defining instruction, as it is the first to partially write r0.xyz.
+ *
+ * Note i965 has a similar scenario, which they solve with a virtual
+ * LOAD_PAYLOAD instruction which gets turned into multiple MOV's after
+ * register assignment. But for us that is horrible from a scheduling
+ * standpoint. Instead what we do is use idea of 'definer' instruction.
+ * Ie. the first instruction (lowest ip) to write to the variable is the
+ * one we consider from use/def perspective when building interference
+ * graph. (Other instructions which write other variable components
+ * just define the variable some more.)
+ *
+ * Arrays of arbitrary size are handled via pre-coloring a consecutive
+ * sequence of registers. Additional scalar (single component) reg
+ * names are allocated starting at ctx->class_base[total_class_count]
+ * (see arr->base), which are pre-colored. In the use/def graph direct
+ * access is treated as a single element use/def, and indirect access
+ * is treated as use or def of all array elements. (Only the first
+ * def is tracked, in case of multiple indirect writes, etc.)
+ *
+ * TODO arrays that fit in one of the pre-defined class sizes should
+ * not need to be pre-colored, but instead could be given a normal
+ * vreg name. (Ignoring this for now since it is a good way to work
+ * out the kinks with arbitrary sized arrays.)
+ *
+ * TODO might be easier for debugging to split this into two passes,
+ * the first assigning vreg names in a way that we could ir3_print()
+ * the result.
+ */
+
+static const unsigned class_sizes[] = {
+ 1, 2, 3, 4,
+ 4 + 4, /* txd + 1d/2d */
+ 4 + 6, /* txd + 3d */
+};
+#define class_count ARRAY_SIZE(class_sizes)
+
+static const unsigned half_class_sizes[] = {
+ 1, 2, 3, 4,
+};
+#define half_class_count ARRAY_SIZE(half_class_sizes)
+
+/* seems to just be used for compute shaders? Seems like vec1 and vec3
+ * are sufficient (for now?)
+ */
+static const unsigned high_class_sizes[] = {
+ 1, 3,
+};
+#define high_class_count ARRAY_SIZE(high_class_sizes)
+
+#define total_class_count (class_count + half_class_count + high_class_count)
+
+/* Below a0.x are normal regs. RA doesn't need to assign a0.x/p0.x. */
+#define NUM_REGS (4 * 48) /* r0 to r47 */
+#define NUM_HIGH_REGS (4 * 8) /* r48 to r55 */
+#define FIRST_HIGH_REG (4 * 48)
+/* Number of virtual regs in a given class: */
+#define CLASS_REGS(i) (NUM_REGS - (class_sizes[i] - 1))
+#define HALF_CLASS_REGS(i) (NUM_REGS - (half_class_sizes[i] - 1))
+#define HIGH_CLASS_REGS(i) (NUM_HIGH_REGS - (high_class_sizes[i] - 1))
+
+#define HALF_OFFSET (class_count)
+#define HIGH_OFFSET (class_count + half_class_count)
+
+/* register-set, created one time, used for all shaders: */
+struct ir3_ra_reg_set {
+ struct ra_regs *regs;
+ unsigned int classes[class_count];
+ unsigned int half_classes[half_class_count];
+ unsigned int high_classes[high_class_count];
+ /* maps flat virtual register space to base gpr: */
+ uint16_t *ra_reg_to_gpr;
+ /* maps cls,gpr to flat virtual register space: */
+ uint16_t **gpr_to_ra_reg;
+};
+
+static void
+build_q_values(unsigned int **q_values, unsigned off,
+ const unsigned *sizes, unsigned count)
+{
+ for (unsigned i = 0; i < count; i++) {
+ q_values[i + off] = rzalloc_array(q_values, unsigned, total_class_count);
+
+ /* From register_allocate.c:
+ *
+ * q(B,C) (indexed by C, B is this register class) in
+ * Runeson/Nyström paper. This is "how many registers of B could
+ * the worst choice register from C conflict with".
+ *
+ * If we just let the register allocation algorithm compute these
+ * values, is extremely expensive. However, since all of our
+ * registers are laid out, we can very easily compute them
+ * ourselves. View the register from C as fixed starting at GRF n
+ * somewhere in the middle, and the register from B as sliding back
+ * and forth. Then the first register to conflict from B is the
+ * one starting at n - class_size[B] + 1 and the last register to
+ * conflict will start at n + class_size[B] - 1. Therefore, the
+ * number of conflicts from B is class_size[B] + class_size[C] - 1.
+ *
+ * +-+-+-+-+-+-+ +-+-+-+-+-+-+
+ * B | | | | | |n| --> | | | | | | |
+ * +-+-+-+-+-+-+ +-+-+-+-+-+-+
+ * +-+-+-+-+-+
+ * C |n| | | | |
+ * +-+-+-+-+-+
+ *
+ * (Idea copied from brw_fs_reg_allocate.cpp)
+ */
+ for (unsigned j = 0; j < count; j++)
+ q_values[i + off][j + off] = sizes[i] + sizes[j] - 1;
+ }
+}
+
+/* One-time setup of RA register-set, which describes all the possible
+ * "virtual" registers and their interferences. Ie. double register
+ * occupies (and conflicts with) two single registers, and so forth.
+ * Since registers do not need to be aligned to their class size, they
+ * can conflict with other registers in the same class too. Ie:
+ *
+ * Single (base) | Double
+ * --------------+---------------
+ * R0 | D0
+ * R1 | D0 D1
+ * R2 | D1 D2
+ * R3 | D2
+ * .. and so on..
+ *
+ * (NOTE the disassembler uses notation like r0.x/y/z/w but those are
+ * really just four scalar registers. Don't let that confuse you.)
+ */
+struct ir3_ra_reg_set *
+ir3_ra_alloc_reg_set(struct ir3_compiler *compiler)
+{
+ struct ir3_ra_reg_set *set = rzalloc(compiler, struct ir3_ra_reg_set);
+ unsigned ra_reg_count, reg, first_half_reg, first_high_reg, base;
+ unsigned int **q_values;
+
+ /* calculate # of regs across all classes: */
+ ra_reg_count = 0;
+ for (unsigned i = 0; i < class_count; i++)
+ ra_reg_count += CLASS_REGS(i);
+ for (unsigned i = 0; i < half_class_count; i++)
+ ra_reg_count += HALF_CLASS_REGS(i);
+ for (unsigned i = 0; i < high_class_count; i++)
+ ra_reg_count += HIGH_CLASS_REGS(i);
+
+ /* allocate and populate q_values: */
+ q_values = ralloc_array(set, unsigned *, total_class_count);
+
+ build_q_values(q_values, 0, class_sizes, class_count);
+ build_q_values(q_values, HALF_OFFSET, half_class_sizes, half_class_count);
+ build_q_values(q_values, HIGH_OFFSET, high_class_sizes, high_class_count);
+
+ /* allocate the reg-set.. */
+ set->regs = ra_alloc_reg_set(set, ra_reg_count, true);
+ set->ra_reg_to_gpr = ralloc_array(set, uint16_t, ra_reg_count);
+ set->gpr_to_ra_reg = ralloc_array(set, uint16_t *, total_class_count);
+
+ /* .. and classes */
+ reg = 0;
+ for (unsigned i = 0; i < class_count; i++) {
+ set->classes[i] = ra_alloc_reg_class(set->regs);
+
+ set->gpr_to_ra_reg[i] = ralloc_array(set, uint16_t, CLASS_REGS(i));
+
+ for (unsigned j = 0; j < CLASS_REGS(i); j++) {
+ ra_class_add_reg(set->regs, set->classes[i], reg);
+
+ set->ra_reg_to_gpr[reg] = j;
+ set->gpr_to_ra_reg[i][j] = reg;
+
+ for (unsigned br = j; br < j + class_sizes[i]; br++)
+ ra_add_transitive_reg_conflict(set->regs, br, reg);
+
+ reg++;
+ }
+ }
+
+ first_half_reg = reg;
+ base = HALF_OFFSET;
+
+ for (unsigned i = 0; i < half_class_count; i++) {
+ set->half_classes[i] = ra_alloc_reg_class(set->regs);
+
+ set->gpr_to_ra_reg[base + i] =
+ ralloc_array(set, uint16_t, HALF_CLASS_REGS(i));
+
+ for (unsigned j = 0; j < HALF_CLASS_REGS(i); j++) {
+ ra_class_add_reg(set->regs, set->half_classes[i], reg);
+
+ set->ra_reg_to_gpr[reg] = j;
+ set->gpr_to_ra_reg[base + i][j] = reg;
+
+ for (unsigned br = j; br < j + half_class_sizes[i]; br++)
+ ra_add_transitive_reg_conflict(set->regs, br + first_half_reg, reg);
+
+ reg++;
+ }
+ }
+
+ first_high_reg = reg;
+ base = HIGH_OFFSET;
+
+ for (unsigned i = 0; i < high_class_count; i++) {
+ set->high_classes[i] = ra_alloc_reg_class(set->regs);
+
+ set->gpr_to_ra_reg[base + i] =
+ ralloc_array(set, uint16_t, HIGH_CLASS_REGS(i));
+
+ for (unsigned j = 0; j < HIGH_CLASS_REGS(i); j++) {
+ ra_class_add_reg(set->regs, set->high_classes[i], reg);
+
+ set->ra_reg_to_gpr[reg] = j;
+ set->gpr_to_ra_reg[base + i][j] = reg;
+
+ for (unsigned br = j; br < j + high_class_sizes[i]; br++)
+ ra_add_transitive_reg_conflict(set->regs, br + first_high_reg, reg);
+
+ reg++;
+ }
+ }
+
+ /* starting a6xx, half precision regs conflict w/ full precision regs: */
+ if (compiler->gpu_id >= 600) {
+ /* because of transitivity, we can get away with just setting up
+ * conflicts between the first class of full and half regs:
+ */
+ for (unsigned i = 0; i < half_class_count; i++) {
+ /* NOTE there are fewer half class sizes, but they match the
+ * first N full class sizes.. but assert in case that ever
+ * accidentially changes:
+ */
+ debug_assert(class_sizes[i] == half_class_sizes[i]);
+ for (unsigned j = 0; j < CLASS_REGS(i) / 2; j++) {
+ unsigned freg = set->gpr_to_ra_reg[i][j];
Home |
Main Index |
Thread Index |
Old Index