Provide IGC API option descriptions

amielcza · igcbot · commit 1a98d9f98617 · 2025-08-08T11:38:39.000+02:00
diff --git a/IGC/Options/include/igc/Options/CommonApiOptions.td b/IGC/Options/include/igc/Options/CommonApiOptions.td
@@ -26,13 +26,15 @@ defm gtpin_scratch_area_size : CommonSeparate<"gtpin-scratch-area-size">,
 defm : CommonJoined<"gtpin-scratch-area-size=">, Alias<gtpin_scratch_area_size_common>,
   HelpText<"Alias for -ze-gtpin-scratch-area-size">;
 
-defm gtpin_indir_ref : CommonFlag<"gtpin-indir-ref">;
+defm gtpin_indir_ref : CommonFlag<"gtpin-indir-ref">,
+ HelpText<"Ask finalizer to provide list of registers used by indirect operand per %ip">;
 
 defm skip_fde : CommonFlag<"skip-fde">;
 
 defm no_fusedCallWA : CommonFlag<"no-fusedCallWA">;
 
-defm disable_compaction : CommonFlag<"disable-compaction">;
+defm disable_compaction : CommonFlag<"disable-compaction">,
+  HelpText<"Disable compaction pass in finalizer. This pass is used to decide whether to use a compacted, i.e. shorter encoding of machine instructions wherever ISA allows.">;
 
 def emit_debug : PlainFlag<"g">,
   HelpText<"Enable generation of debug information and enables kernel debug">;
@@ -44,8 +46,10 @@ defm opt_disable : CommonFlag<"opt-disable">,
 defm library_compilation : CommonFlag<"library-compilation">;
 
 // -library-compile-simd=[8|16|32]
-defm library_compile_simd : CommonSeparate<"library-compile-simd">;
-defm : CommonJoined<"library-compile-simd=">, Alias<library_compile_simd_common>;
+defm library_compile_simd : CommonSeparate<"library-compile-simd">,
+  HelpText<"Select SIMD size for library compilations [8|16|32]">;
+defm : CommonJoined<"library-compile-simd=">, Alias<library_compile_simd_common>,
+  HelpText<"Select SIMD size for library compilations [8|16|32]">;
 
 defm exp_register_file_size : CommonSeparate<"exp-register-file-size">,
   HelpText<"Set amount of registers used by regalloc">;
diff --git a/IGC/Options/include/igc/Options/IGCApiOptions.td b/IGC/Options/include/igc/Options/IGCApiOptions.td
@@ -14,13 +14,16 @@ let Flags = [IGCApiOption] in {
 // Backend API options {{
 
 // -cl-fp32-correctly-rounded-divide-sqrt, -ze-fp32-correctly-rounded-divide-sqrt
-defm fp32_correctly_rounded_divide_sqrt : CommonFlag<"fp32-correctly-rounded-divide-sqrt">;
+defm fp32_correctly_rounded_divide_sqrt : CommonFlag<"fp32-correctly-rounded-divide-sqrt">,
+  HelpText<"Allows an application to specify that single precision floating-point divide (x/y and 1/x) and sqrt used in the program source are correctly rounded.">;
 
 // -cl-no-subgroup-ifp, -ze-no-subgroup-ifp
-defm no_subgroup_ifp : CommonFlag<"no-subgroup-ifp">;
+defm no_subgroup_ifp : CommonFlag<"no-subgroup-ifp">,
+  HelpText<"This indicates that kernels in this program do not require sub-groups to make independent forward progress.">;
 
 // -cl-uniform-work-group-size, -ze-uniform-work-group-size
-defm uniform_work_group_size : CommonFlag<"uniform-work-group-size">;
+defm uniform_work_group_size : CommonFlag<"uniform-work-group-size">,
+  HelpText<"This requires that the global work-size be a multiple of the work-group size specified to clEnqueueNDRangeKernel.">;
 
 // -cl-take-global-address, -ze-take-global-address
 defm take_global_address : CommonFlag<"take-global-address">;
@@ -32,10 +35,12 @@ defm required_thread_count : CommonSeparate<"reqd-eu-thread-count">;
 defm : CommonJoined<"reqd-eu-thread-count=">, Alias<required_thread_count_common>;
 
 // -ze-opt-large-grf-kernel
-defm large_grf_kernel : CommonSeparate<"large-grf-kernel">;
+defm large_grf_kernel : CommonSeparate<"large-grf-kernel">,
+  HelpText<"-ze-opt-large-grf-kernel <string> tells IGC to use large GRF size if kernel name contains <string> regardless of module-level options.">;
 
 // -ze-opt-regular-grf-kernel
-defm regular_grf_kernel : CommonSeparate<"regular-grf-kernel">;
+defm regular_grf_kernel : CommonSeparate<"regular-grf-kernel">,
+  HelpText<"-ze-opt-regular-grf-kernel <string> tells IGC to use regular GRF size if kernel name contains <string >regardless of module-level options.">;
 
 // -cl-intel-no-local-to-generic, -ze-opt-no-local-to-generic
 defm no_local_to_generic : CommonFlag<"no-local-to-generic">;
@@ -47,14 +52,15 @@ defm 128_grf_per_thread : CommonFlag<"128-GRF-per-thread">;
 defm 256_grf_per_thread : CommonFlag<"256-GRF-per-thread">;
 
 // -cl-intel-greater-than-4GB-buffer-required, -ze-opt-greater-than-4GB-buffer-required
-defm greater_than_4GB_buffer_required : CommonFlag<"greater-than-4GB-buffer-required">;
+defm greater_than_4GB_buffer_required : CommonFlag<"greater-than-4GB-buffer-required">,
+  HelpText<"When this flag is present, it indicates that any OpenCL buffers can be more than 4GB in size. If it is absent, all buffers are not more than 4GB in size.">;
 
 // -cl-poison-unsupported-fp64-kernels -ze-poison-unsupported-fp64-kernels
 defm poison_unsupported_fp64_kernels : CommonFlag<"poison-unsupported-fp64-kernels">;
 
 // -cl-intel-enable-ieee-float-exception-trap
-// This flags enables the IEEE exception trap bit in Control Register in the prolog of the kernel.
-defm enable_ieee_float_exception_trap : CommonFlag<"enable-ieee-float-exception-trap">;
+defm enable_ieee_float_exception_trap : CommonFlag<"enable-ieee-float-exception-trap">,
+  HelpText<"This flag enables the IEEE exception trap bit in Control Register in the prolog of the kernel.">;
 
 // -cl-fp64-gen-emu  -ze-fp64-gen-emu
 defm fp64_gen_emu : CommonFlag<"fp64-gen-emu">;
@@ -63,46 +69,64 @@ defm fp64_gen_emu : CommonFlag<"fp64-gen-emu">;
 defm fp64_gen_conv_emu : CommonFlag<"fp64-gen-conv-emu">;
 
 // -cl-intel-profile-guided-trimming, -ze-opt-profile-guided-trimming
-defm static_profile_guided_trimming : CommonFlag<"static-profile-guided-trimming">;
+defm static_profile_guided_trimming : CommonFlag<"static-profile-guided-trimming">,
+  HelpText<"Enable static analysis in the kernel trimming.">;
 
 // }} Backend API options
 
 // API options from source translation {{
-def cl_std : ClJoined<"std=">;
+def cl_std : ClJoined<"std=">,
+  HelpText<"Determine the language version to be accepted by the compiler.">;
 
-def single_precision_constant : ClFlag<"single-precision-constant">;
+def single_precision_constant : ClFlag<"single-precision-constant">,
+  HelpText<"Forces implicit conversions of double-precision floating-point literals to single precision.">;
 
-def denorms_are_zero : ClFlag<"denorms-are-zero">;
+def denorms_are_zero : ClFlag<"denorms-are-zero">,
+  HelpText<"Controls how single precision and double precision denormalized numbers are handled. If specified as a build option, the single precision denormalized numbers may be flushed to zero; double precision denormalized numbers may also be flushed to zero if the optional extension for double precision is supported">;
 
-def strict_aliasing : ClFlag<"strict-aliasing">;
+def strict_aliasing : ClFlag<"strict-aliasing">,
+  HelpText<"Allows the compiler to assume the strictest aliasing rules.">;
 
-def mad_enable : ClFlag<"mad-enable">;
+def mad_enable : ClFlag<"mad-enable">,
+  HelpText<"Allow a * b + c to be replaced by a mad instruction.">;
 
-def no_signed_zeros : ClFlag<"no-signed-zeros">;
+def no_signed_zeros : ClFlag<"no-signed-zeros">,
+  HelpText<"Allow optimizations for floating-point arithmetic that ignore the signedness of zero.">;
 
-def unsafe_math_optimizations : ClFlag<"unsafe-math-optimizations">;
+def unsafe_math_optimizations : ClFlag<"unsafe-math-optimizations">,
+  HelpText<"Allow optimizations for floating-point arithmetic that (a) assume that arguments and results are valid, (b) may violate the IEEE 754 standard, (c) assume relaxed OpenCL numerical compliance requirements as defined in the unsafe math optimization section of the OpenCL C or OpenCL SPIR-V Environment specifications, and (d) may violate edge case behavior in the OpenCL C or OpenCL SPIR-V Environment specifications.">;
 
-def finite_math_only : ClFlag<"finite-math-only">;
+def finite_math_only : ClFlag<"finite-math-only">,
+  HelpText<"Allow optimizations for floating-point arithmetic that assume that arguments and results are not NaNs, +Inf, -Inf. This option may violate the OpenCL numerical compliance requirements for single precision and double precision floating-point, as well as edge case behavior.">;
 
-def fast_relaxed_math : ClFlag<"fast-relaxed-math">;
+def fast_relaxed_math : ClFlag<"fast-relaxed-math">,
+  HelpText<"Sets the optimization options -cl-finite-math-only and -cl-unsafe-math-optimizations. This option causes the preprocessor macro __FAST_RELAXED_MATH__ to be defined in the OpenCL program.">;
 
 def match_sincospi : ClFlag<"match-sincospi">;
 
-def oclfe_w : PlainFlag<"w">;
+def oclfe_w : PlainFlag<"w">,
+  HelpText<"Remove all warning messages.">;
 
-def oclfe_werror : PlainFlag<"Werror">;
+def oclfe_werror : PlainFlag<"Werror">,
+  HelpText<"Treat every warning as an error.">;
 
-def kernel_arg_info : ClFlag<"kernel-arg-info">;
+def kernel_arg_info : ClFlag<"kernel-arg-info">,
+  HelpText<"Allow the compiler to store information about the arguments of a kernel(s) in the program executable. The argument information stored includes the argument name, its type, the address space and access qualifiers used.">;
 
-def oclfe_x : PlainSeparate<"x">;
+def oclfe_x : PlainSeparate<"x">,
+  HelpText<"Manualy provide type of file. Takes only spir or spir64 as argument.">;
 
-def oclfe_D : PlainJoinedOrSeparate<"D">;
+def oclfe_D : PlainJoinedOrSeparate<"D">,
+  HelpText<"Manually define macros.">;
 
-def oclfe_I : PlainJoinedOrSeparate<"I">;
+def oclfe_I : PlainJoinedOrSeparate<"I">,
+  HelpText<"Add directory to the list of directories which will be searched for header files."> ;
 
-def oclfe_spir_std : PlainJoined<"spir-std=">;
+def oclfe_spir_std : PlainJoined<"spir-std=">,
+  HelpText<"Specify the SPIR version.">;
 
-def oclfe_gline_tables_only : PlainFlag<"gline-tables-only">;
+def oclfe_gline_tables_only : PlainFlag<"gline-tables-only">,
+  HelpText<"Generate only line table debug information.">;
 
 def oclfe_triple : PlainSeparate<"triple">;
 
@@ -112,15 +136,17 @@ def oclfe_dwarf_column_info : PlainFlag<"dwarf-column-info">;
 def debug_info : ClFlag<"intel-debug-info">;
 
 def disable_a64wa : ClFlag<"intel-disable-a64WA">;
-
 def oclfe_profiler : PlainFlag<"profiler">;
 
-def oclfe_s : PlainSeparate<"s">;
+def oclfe_s : PlainSeparate<"s">,
+  HelpText<"Strip all symbol table and debug informaton from the output binary.">;
 
 // Additional debug options.
-def oclfe_igc_opts : PlainJoinedOrSeparate<"igc_opts">;
+def oclfe_igc_opts : PlainJoinedOrSeparate<"igc_opts">,
+  HelpText<"Pass IGC options delimited by ',' or ' '.">;
 
-def oclfe_dump_opt_llvm : PlainJoined<"dump-opt-llvm=">;
+def oclfe_dump_opt_llvm : PlainJoined<"dump-opt-llvm=">,
+  HelpText<"Dump the llvm output to the specified file.">;
 // }} API options from source translation
 
 }
diff --git a/IGC/Options/include/igc/Options/IGCInternalOptions.td b/IGC/Options/include/igc/Options/IGCInternalOptions.td
@@ -14,7 +14,8 @@ let Flags = [IGCInternalOption] in {
 // Backend internal options {{
 
 // -cl-replace-global-offsets-by-zero, -ze-replace-global-offsets-by-zero
-defm replace_global_offsets_by_zero : CommonFlag<"replace-global-offsets-by-zero">;
+defm replace_global_offsets_by_zero : CommonFlag<"replace-global-offsets-by-zero">,
+  HelpText<"OpenCL's global IDs are assumed to start from the origin at global offsets (offset_x, offset_y, offset_z) When this flag is present, it indicates that the global offsets are (0,0,0).">;
 
 defm kernel_debug_enable : CommonFlag<"kernel-debug-enable">;
 
@@ -28,16 +29,20 @@ defm include_sip_kernel_local_debug : CommonFlag<"include-sip-kernel-local-debug
 defm use_32_bit_ptr_arith : CommonFlag<"use-32bit-ptr-arith">;
 
 // -cl-intel-greater-than-4GB-buffer-required, -ze-opt-greater-than-4GB-buffer-required
-defm greater_than_4GB_buffer_required : CommonFlag<"greater-than-4GB-buffer-required">;
+defm greater_than_4GB_buffer_required : CommonFlag<"greater-than-4GB-buffer-required">,
+  HelpText<"When this flag is present, it indicates that any OpenCL buffers can be more than 4GB in size. If it is absent,  all buffers are not more than 4GB in size.">;
 
 // -cl-intel-has-buffer-offset-arg, -ze-opt-has-buffer-offset-arg, -ze-intel-has-buffer-offset-arg
-defm has_buffer_offset_arg : CommonFlag<"has-buffer-offset-arg">;
+defm has_buffer_offset_arg : CommonFlag<"has-buffer-offset-arg">,
+  HelpText<"This flag, together with *[-cl-intel|-ze-opt]-greater-than-4GB-buffer-required* is used to convert stateless memory accesses, called messages or load/store, into stateful ones. The OpenCL runtime can create a surface whose base is either *buffer_base* or *buffer_base + buffer_offset*, based on whether *buffer_offset* is used.">;
 
 // -cl-intel-buffer-offset-arg-required, -ze-opt-buffer-offset-arg-required
-defm buffer_offset_arg_required : CommonFlag<"buffer-offset-arg-required">;
+defm buffer_offset_arg_required : CommonFlag<"buffer-offset-arg-required">,
+  HelpText<"Tell IGC to always use buffer offset. It is valid only if -intel-has-buffer-offset-arg is present.">;
 
 // -cl-intel-has-positive-pointer-offset, -ze-opt-has-positive-pointer-offset
-defm has_positive_pointer_offset : CommonFlag<"has-positive-pointer-offset">;
+defm has_positive_pointer_offset : CommonFlag<"has-positive-pointer-offset">,
+  HelpText<"For any load and store (aka message) whose address = *ptrArg + offset*, where *ptrArg* is a kernel pointer argument, offset is assumed to be non-negative if this flag is present.">;
 
 // -cl-intel-disable-a64WA
 defm disable_a64wa : CommonFlag<"disable-a64WA">;
@@ -49,12 +54,13 @@ defm force_enable_a64wa : CommonFlag<"force-enable-a64WA">;
 defm no_prera_scheduling : CommonFlag<"no-prera-scheduling">;
 
 // (All start with -cl-intel or -ze-opt)
-defm num_thread_per_eu : CommonSeparate<"num-thread-per-eu">;
+defm num_thread_per_eu : CommonSeparate<"num-thread-per-eu">,
+  HelpText<"Overrides the current number of threads value defined by the user's command line option for the entire module or the compiler choice by heuristics.">;
 
 defm exp_register_file_size : CommonSeparate<"exp-register-file-size">,
-  HelpText<"Set amount of registers used by regalloc">;
+  HelpText<"Set amount of registers used by regalloc.">;
 defm : CommonJoined<"exp-register-file-size=">, Alias<exp_register_file_size_common>,
-  HelpText<"Alias for -ze-exp-register-file-size">;
+  HelpText<"Alias for -ze-exp-register-file-size.">;
 
 // -cl-intel-128-GRF-per-thread
 defm 128_grf_per_thread : CommonFlag<"128-GRF-per-thread">;
@@ -64,7 +70,7 @@ defm 256_grf_per_thread : CommonFlag<"256-GRF-per-thread">;
 
 // -cl-intel-enable-auto-large-GRF-mode, -ze-opt-enable-auto-large-GRF-mode
 defm enable_auto_large_GRF_mode : CommonFlag<"enable-auto-large-GRF-mode">,
-  HelpText<"Use compiler heuristics to determine number of GRF">;
+  HelpText<"Use compiler heuristics to determine number of GRF.">;
 
 // -cl-intel-force-global-mem-allocation, -cl-force-global-mem-allocation, -ze-force-global-mem-allocation
 defm force_global_mem_allocation : CommonFlag<"force-global-mem-allocation">;
@@ -76,10 +82,12 @@ defm force_global_mem_allocation : CommonFlag<"force-global-mem-allocation">;
 defm disable_recompilation : CommonFlag<"disable-recompilation">;
 
 // -cl-intel-force-emu-int32divrem
-defm force_emu_int32divrem : CommonFlag<"force-emu-int32divrem">;
+defm force_emu_int32divrem : CommonFlag<"force-emu-int32divrem">,
+  HelpText<"Use emulation fp64-based emulation functions if fp64 is supported natively.">;
 
 // -cl-intel-force-emu-sp-int32divrem
-defm force_emu_sp_int32divrem : CommonFlag<"force-emu-sp-int32divrem">;
+defm force_emu_sp_int32divrem : CommonFlag<"force-emu-sp-int32divrem">,
+  HelpText<"Force the fp64-based emulation regardless of native support.">;
 
 // -cl-intel-force-disable-4GB-buffer
 defm force_disable_4GB_buffer : CommonFlag<"force-disable-4GB-buffer">;
@@ -101,7 +109,8 @@ defm vector_coalescing : CommonSeparate<"vector-coalesing">;
 defm : CommonJoined<"vector-coalesing=">, Alias<vector_coalescing_common>;
 
 // -cl-intel-exclude-ir-from-zebin
-defm exclude_ir_from_zebin : CommonFlag<"exclude-ir-from-zebin">;
+defm exclude_ir_from_zebin : CommonFlag<"exclude-ir-from-zebin">,
+  HelpText<"Exclude SPIR-V section from files generated in ZEBIN format.">;
 
 // -cl-intel-no-spill
 defm no_spill : CommonFlag<"no-spill">;
@@ -110,10 +119,12 @@ defm no_spill : CommonFlag<"no-spill">;
 defm disable_noMaskWA : CommonFlag<"disable-noMaskWA">;
 
 // -cl-intel-ignoreBFRounding, -ze-intel-ignoreBFRounding
-defm ignoreBFRounding : CommonFlag<"ignoreBFRounding">;
+defm ignoreBFRounding : CommonFlag<"ignoreBFRounding">,
+  HelpText<"Folds BF operands into mul/add/cmp operations.">;
 
 // -cl-compile-one-at-time
-defm compile_one_at_time : CommonFlag<"compile-one-at-time">;
+defm compile_one_at_time : CommonFlag<"compile-one-at-time">,
+  HelpText<"Enables llvm::module splitting to compile only one kernel at a time.">;
 
 // -cl-skip-reloc-add
 defm skip_reloc_add : CommonFlag<"skip-reloc-add">;
@@ -135,18 +146,23 @@ defm store_cache_default : CommonSeparate<"store-cache-default">;
 defm : CommonJoined<"store-cache-default=">, Alias<store_cache_default_common>;
 
 // -cl-fp64-gen-emu  -ze-fp64-gen-emu
-defm fp64_gen_emu : CommonFlag<"fp64-gen-emu">;
+defm fp64_gen_emu : CommonFlag<"fp64-gen-emu">,
+  HelpText<"Enable full FP64 emulation.">;
 
 // *-private-memory-minimal-size-per-thread <SIZE>
-defm private_memory_minimal_size_per_thread : CommonSeparate<"private-memory-minimal-size-per-thread">;
+defm private_memory_minimal_size_per_thread : CommonSeparate<"private-memory-minimal-size-per-thread">,
+  HelpText<"When this flag is present, it guarantees that size of private memory allocated per thread can not be less then the given value. Constraint: <SIZE> >= 0.">;
 
 // *-scratch-space-private-memory-minimal-size-per-thread <SIZE>
-defm scratch_space_private_memory_minimal_size_per_thread : CommonSeparate<"scratch-space-private-memory-minimal-size-per-thread">;
+defm scratch_space_private_memory_minimal_size_per_thread : CommonSeparate<"scratch-space-private-memory-minimal-size-per-thread">,
+  HelpText<"When this flag is present, it guarantees that size of scratch space private memory allocated per thread can not be less then the given value. Constraint: <SIZE> >= 0.">;
 
-defm enable_divergent_barrier_handling : CommonFlag<"enable-divergent-barrier-handling">;
+defm enable_divergent_barrier_handling : CommonFlag<"enable-divergent-barrier-handling">,
+  HelpText<"Enable the divergent barrier pass.">;
 
 // -cl-intel-high-accuracy-nolut-math
-defm high_accuracy_nolut_math : CommonFlag<"high-accuracy-nolut-math">;
+defm high_accuracy_nolut_math : CommonFlag<"high-accuracy-nolut-math">,
+  HelpText<"Enbales experimental high accuracy implementations of transcendentals.">;
 
 // -[cl-intel|ze-opt]-ldstcombine=[0|1]
 // -[cl-intel|ze-opt]-ldstcombine-max-storebytes=[4|8|16|32]
@@ -163,13 +179,15 @@ defm : CommonJoined<"ldstcombine-max-loadbytes=">, Alias<ldstcombine_max_loadbyt
 // Internal options from source translation {{
 def oclfe_ocl_version : PlainJoined<"ocl-version=">;
 
-def oclfe_force_cl_std : PlainFlag<"force-cl-std">;
+def oclfe_force_cl_std : PlainFlag<"force-cl-std">,
+  HelpText<"Force a specific OpenCL C version.">;
 
 def oclfe_32bit : PlainFlag<"m32">;
 
 def oclfe_64bit : PlainFlag<"m64">;
 
-def oclfe_D : PlainJoinedOrSeparate<"D">;
+def oclfe_D : PlainJoinedOrSeparate<"D">,
+  HelpText<"Manually define macros.">;
 
 def oclfe_cl_ext : ClJoined<"ext=">;
 // }} Internal options from source translation