Gecko [ actualbuzz.com ]

Name	Size	Permission	Date
cuda_wrappers	[ DIR ]	drwxr-xr-x	2026-02-03 15:09
fuzzer	[ DIR ]	drwxr-xr-x	2026-02-03 15:08
llvm_libc_wrappers	[ DIR ]	drwxr-xr-x	2026-02-03 15:09
llvm_offload_wrappers	[ DIR ]	drwxr-xr-x	2026-02-03 15:09
openmp_wrappers	[ DIR ]	drwxr-xr-x	2026-02-03 15:09
orc	[ DIR ]	drwxr-xr-x	2026-02-03 15:08
ppc_wrappers	[ DIR ]	drwxr-xr-x	2026-02-03 15:09
profile	[ DIR ]	drwxr-xr-x	2026-02-03 15:08
sanitizer	[ DIR ]	drwxr-xr-x	2026-02-03 15:08
xray	[ DIR ]	drwxr-xr-x	2026-02-03 15:08
zos_wrappers	[ DIR ]	drwxr-xr-x	2026-02-03 15:09
__clang_cuda_builtin_vars.h	4.78 KB	-rw-r--r--	2025-07-08 23:06
__clang_cuda_cmath.h	18.06 KB	-rw-r--r--	2025-07-08 23:06
__clang_cuda_complex_builtins.h	9.36 KB	-rw-r--r--	2025-07-08 23:06
__clang_cuda_device_functions.h	57.36 KB	-rw-r--r--	2025-07-08 23:06
__clang_cuda_intrinsics.h	29.87 KB	-rw-r--r--	2025-07-08 23:06
__clang_cuda_libdevice_declares.h	21.89 KB	-rw-r--r--	2025-07-08 23:06
__clang_cuda_math.h	16.21 KB	-rw-r--r--	2025-07-08 23:06
__clang_cuda_math_forward_declares.h	8.27 KB	-rw-r--r--	2025-07-08 23:06
__clang_cuda_runtime_wrapper.h	17.6 KB	-rw-r--r--	2025-07-08 23:06
__clang_cuda_texture_intrinsics.h	31.86 KB	-rw-r--r--	2025-07-08 23:06
__clang_hip_cmath.h	26.48 KB	-rw-r--r--	2025-07-08 23:06
__clang_hip_libdevice_declares.h	19.87 KB	-rw-r--r--	2025-07-08 23:06
__clang_hip_math.h	32.78 KB	-rw-r--r--	2025-07-08 23:06
__clang_hip_runtime_wrapper.h	4.94 KB	-rw-r--r--	2025-07-08 23:06
__clang_hip_stdlib.h	1.19 KB	-rw-r--r--	2025-07-08 23:06
__stdarg___gnuc_va_list.h	467 B	-rw-r--r--	2025-07-08 23:06
__stdarg___va_copy.h	445 B	-rw-r--r--	2025-07-08 23:06
__stdarg_header_macro.h	417 B	-rw-r--r--	2025-07-08 23:06
__stdarg_va_arg.h	793 B	-rw-r--r--	2025-07-08 23:06
__stdarg_va_copy.h	451 B	-rw-r--r--	2025-07-08 23:06
__stdarg_va_list.h	448 B	-rw-r--r--	2025-07-08 23:06
__stddef_header_macro.h	417 B	-rw-r--r--	2025-07-08 23:06
__stddef_max_align_t.h	857 B	-rw-r--r--	2025-07-08 23:06
__stddef_null.h	875 B	-rw-r--r--	2025-07-08 23:06
__stddef_nullptr_t.h	958 B	-rw-r--r--	2025-07-08 23:06
__stddef_offsetof.h	708 B	-rw-r--r--	2025-07-08 23:06
__stddef_ptrdiff_t.h	717 B	-rw-r--r--	2025-07-08 23:06
__stddef_rsize_t.h	710 B	-rw-r--r--	2025-07-08 23:06
__stddef_size_t.h	708 B	-rw-r--r--	2025-07-08 23:06
__stddef_unreachable.h	735 B	-rw-r--r--	2025-07-08 23:06
__stddef_wchar_t.h	853 B	-rw-r--r--	2025-07-08 23:06
__stddef_wint_t.h	443 B	-rw-r--r--	2025-07-08 23:06
__wmmintrin_aes.h	5.15 KB	-rw-r--r--	2025-07-08 23:06
__wmmintrin_pclmul.h	1.99 KB	-rw-r--r--	2025-07-08 23:06
adcintrin.h	5.43 KB	-rw-r--r--	2025-07-08 23:06
adxintrin.h	3.37 KB	-rw-r--r--	2025-07-08 23:06
altivec.h	698.45 KB	-rw-r--r--	2025-07-08 23:06
amdgpuintrin.h	7.4 KB	-rw-r--r--	2025-07-08 23:06
ammintrin.h	7.56 KB	-rw-r--r--	2025-07-08 23:06
amxavx512intrin.h	12.67 KB	-rw-r--r--	2025-07-08 23:06
amxbf16transposeintrin.h	3.44 KB	-rw-r--r--	2025-07-08 23:06
amxcomplexintrin.h	6.81 KB	-rw-r--r--	2025-07-08 23:06
amxcomplextransposeintrin.h	11.78 KB	-rw-r--r--	2025-07-08 23:06
amxfp16intrin.h	3.25 KB	-rw-r--r--	2025-07-08 23:06
amxfp16transposeintrin.h	3.44 KB	-rw-r--r--	2025-07-08 23:06
amxfp8intrin.h	8.63 KB	-rw-r--r--	2025-07-08 23:06
amxintrin.h	19.83 KB	-rw-r--r--	2025-07-08 23:06
amxmovrsintrin.h	2.24 KB	-rw-r--r--	2025-07-08 23:06
amxmovrstransposeintrin.h	9.18 KB	-rw-r--r--	2025-07-08 23:06
amxtf32intrin.h	3.48 KB	-rw-r--r--	2025-07-08 23:06
amxtf32transposeintrin.h	3.56 KB	-rw-r--r--	2025-07-08 23:06
amxtransposeintrin.h	10.6 KB	-rw-r--r--	2025-07-08 23:06
arm64intr.h	993 B	-rw-r--r--	2025-07-08 23:06
arm_acle.h	29.97 KB	-rw-r--r--	2025-07-08 23:06
arm_bf16.h	548 B	-rw-r--r--	2025-09-17 20:10
arm_cde.h	32.67 KB	-rw-r--r--	2025-09-17 20:10
arm_cmse.h	6.21 KB	-rw-r--r--	2025-07-08 23:06
arm_fp16.h	16.94 KB	-rw-r--r--	2025-09-17 20:10
arm_mve.h	1.48 MB	-rw-r--r--	2025-09-17 20:10
arm_neon.h	2.66 MB	-rw-r--r--	2025-09-17 20:10
arm_neon_sve_bridge.h	9.48 KB	-rw-r--r--	2025-07-08 23:06
arm_sme.h	223.36 KB	-rw-r--r--	2025-09-17 20:10
arm_sve.h	2.03 MB	-rw-r--r--	2025-09-17 20:10
arm_vector_types.h	9.86 KB	-rw-r--r--	2025-09-17 20:10
armintr.h	843 B	-rw-r--r--	2025-07-08 23:06
avx10_2_512bf16intrin.h	22.87 KB	-rw-r--r--	2025-07-08 23:06
avx10_2_512convertintrin.h	12.88 KB	-rw-r--r--	2025-07-08 23:06
avx10_2_512minmaxintrin.h	7.35 KB	-rw-r--r--	2025-07-08 23:06
avx10_2_512niintrin.h	13.71 KB	-rw-r--r--	2025-07-08 23:06
avx10_2_512satcvtdsintrin.h	13.48 KB	-rw-r--r--	2025-07-08 23:06
avx10_2_512satcvtintrin.h	16.6 KB	-rw-r--r--	2025-07-08 23:06
avx10_2bf16intrin.h	43.63 KB	-rw-r--r--	2025-07-08 23:06
avx10_2convertintrin.h	22.75 KB	-rw-r--r--	2025-07-08 23:06
avx10_2copyintrin.h	2.35 KB	-rw-r--r--	2025-07-08 23:06
avx10_2minmaxintrin.h	13.53 KB	-rw-r--r--	2025-07-08 23:06
avx10_2niintrin.h	16.6 KB	-rw-r--r--	2025-07-08 23:06
avx10_2satcvtdsintrin.h	15.36 KB	-rw-r--r--	2025-07-08 23:06
avx10_2satcvtintrin.h	18.87 KB	-rw-r--r--	2025-07-08 23:06
avx2intrin.h	188.12 KB	-rw-r--r--	2025-07-08 23:06
avx512bf16intrin.h	10.58 KB	-rw-r--r--	2025-07-08 23:06
avx512bitalgintrin.h	2.56 KB	-rw-r--r--	2025-07-08 23:06
avx512bwintrin.h	76.02 KB	-rw-r--r--	2025-07-08 23:06
avx512cdintrin.h	4.23 KB	-rw-r--r--	2025-07-08 23:06
avx512dqintrin.h	58.87 KB	-rw-r--r--	2025-07-08 23:06
avx512fintrin.h	383.45 KB	-rw-r--r--	2025-07-08 23:06
avx512fp16intrin.h	156.92 KB	-rw-r--r--	2025-07-08 23:06
avx512ifmaintrin.h	2.6 KB	-rw-r--r--	2025-07-08 23:06
avx512ifmavlintrin.h	4.6 KB	-rw-r--r--	2025-07-08 23:06
avx512vbmi2intrin.h	13.18 KB	-rw-r--r--	2025-07-08 23:06
avx512vbmiintrin.h	3.83 KB	-rw-r--r--	2025-07-08 23:06
avx512vbmivlintrin.h	7.23 KB	-rw-r--r--	2025-07-08 23:06
avx512vlbf16intrin.h	19.46 KB	-rw-r--r--	2025-07-08 23:06
avx512vlbitalgintrin.h	4.52 KB	-rw-r--r--	2025-07-08 23:06
avx512vlbwintrin.h	121.56 KB	-rw-r--r--	2025-07-08 23:06
avx512vlcdintrin.h	7.95 KB	-rw-r--r--	2025-07-08 23:06
avx512vldqintrin.h	46.71 KB	-rw-r--r--	2025-07-08 23:06
avx512vlfp16intrin.h	85.51 KB	-rw-r--r--	2025-07-08 23:06
avx512vlintrin.h	322.6 KB	-rw-r--r--	2025-07-08 23:06
avx512vlvbmi2intrin.h	26.01 KB	-rw-r--r--	2025-07-08 23:06
avx512vlvnniintrin.h	13.41 KB	-rw-r--r--	2025-07-08 23:06
avx512vlvp2intersectintrin.h	4.67 KB	-rw-r--r--	2025-07-08 23:06
avx512vnniintrin.h	4.32 KB	-rw-r--r--	2025-07-08 23:06
avx512vp2intersectintrin.h	3.03 KB	-rw-r--r--	2025-07-08 23:06
avx512vpopcntdqintrin.h	2.3 KB	-rw-r--r--	2025-07-08 23:06
avx512vpopcntdqvlintrin.h	3.86 KB	-rw-r--r--	2025-07-08 23:06
avxifmaintrin.h	5.75 KB	-rw-r--r--	2025-07-08 23:06
avxintrin.h	197.12 KB	-rw-r--r--	2025-07-08 23:06
avxneconvertintrin.h	14.09 KB	-rw-r--r--	2025-07-08 23:06
avxvnniint16intrin.h	15.84 KB	-rw-r--r--	2025-07-08 23:06
avxvnniint8intrin.h	17.02 KB	-rw-r--r--	2025-07-08 23:06
avxvnniintrin.h	10.44 KB	-rw-r--r--	2025-07-08 23:06
bmi2intrin.h	7.37 KB	-rw-r--r--	2025-07-08 23:06
bmiintrin.h	20.02 KB	-rw-r--r--	2025-07-08 23:06
builtins.h	846 B	-rw-r--r--	2025-07-08 23:06
cet.h	1.49 KB	-rw-r--r--	2025-07-08 23:06
cetintrin.h	3.27 KB	-rw-r--r--	2025-07-08 23:06
cldemoteintrin.h	1.18 KB	-rw-r--r--	2025-07-08 23:06
clflushoptintrin.h	1.17 KB	-rw-r--r--	2025-07-08 23:06
clwbintrin.h	1.2 KB	-rw-r--r--	2025-07-08 23:06
clzerointrin.h	1.19 KB	-rw-r--r--	2025-07-08 23:06
cmpccxaddintrin.h	2.33 KB	-rw-r--r--	2025-07-08 23:06
cpuid.h	11.9 KB	-rw-r--r--	2025-07-08 23:06
crc32intrin.h	3.27 KB	-rw-r--r--	2025-07-08 23:06
emmintrin.h	196.89 KB	-rw-r--r--	2025-07-08 23:06
enqcmdintrin.h	2.12 KB	-rw-r--r--	2025-07-08 23:06
f16cintrin.h	5.39 KB	-rw-r--r--	2025-07-08 23:06
float.h	6.43 KB	-rw-r--r--	2025-07-08 23:06
fma4intrin.h	6.82 KB	-rw-r--r--	2025-07-08 23:06
fmaintrin.h	28.65 KB	-rw-r--r--	2025-07-08 23:06
fxsrintrin.h	2.82 KB	-rw-r--r--	2025-07-08 23:06
gfniintrin.h	9.59 KB	-rw-r--r--	2025-07-08 23:06
gpuintrin.h	9.07 KB	-rw-r--r--	2025-07-08 23:06
hexagon_circ_brev_intrinsics.h	15.59 KB	-rw-r--r--	2025-07-08 23:06
hexagon_protos.h	374.42 KB	-rw-r--r--	2025-07-08 23:06
hexagon_types.h	130.38 KB	-rw-r--r--	2025-07-08 23:06
hresetintrin.h	1.36 KB	-rw-r--r--	2025-07-08 23:06
htmintrin.h	6.14 KB	-rw-r--r--	2025-07-08 23:06
htmxlintrin.h	9.01 KB	-rw-r--r--	2025-07-08 23:06
hvx_hexagon_protos.h	275.18 KB	-rw-r--r--	2025-07-08 23:06
ia32intrin.h	25.38 KB	-rw-r--r--	2025-07-08 23:06
immintrin.h	25.46 KB	-rw-r--r--	2025-07-08 23:06
intrin.h	18.16 KB	-rw-r--r--	2025-07-08 23:06
intrin0.h	13.76 KB	-rw-r--r--	2025-07-08 23:06
inttypes.h	2.37 KB	-rw-r--r--	2025-07-08 23:06
invpcidintrin.h	764 B	-rw-r--r--	2025-07-08 23:06
iso646.h	763 B	-rw-r--r--	2025-07-08 23:06
keylockerintrin.h	17.85 KB	-rw-r--r--	2025-07-08 23:06
larchintrin.h	8.5 KB	-rw-r--r--	2025-07-08 23:06
lasxintrin.h	142.14 KB	-rw-r--r--	2025-07-08 23:06
limits.h	3.71 KB	-rw-r--r--	2025-07-08 23:06
lsxintrin.h	134.7 KB	-rw-r--r--	2025-07-08 23:06
lwpintrin.h	5 KB	-rw-r--r--	2025-07-08 23:06
lzcntintrin.h	3.45 KB	-rw-r--r--	2025-07-08 23:06
mm3dnow.h	729 B	-rw-r--r--	2025-07-08 23:06
mm_malloc.h	1.88 KB	-rw-r--r--	2025-07-08 23:06
mmintrin.h	58.6 KB	-rw-r--r--	2025-07-08 23:06
module.modulemap	6.47 KB	-rw-r--r--	2025-07-08 23:06
movdirintrin.h	1.57 KB	-rw-r--r--	2025-07-08 23:06
movrs_avx10_2_512intrin.h	3.88 KB	-rw-r--r--	2025-07-08 23:06
movrs_avx10_2intrin.h	6.87 KB	-rw-r--r--	2025-07-08 23:06
movrsintrin.h	1.98 KB	-rw-r--r--	2025-07-08 23:06
msa.h	25.01 KB	-rw-r--r--	2025-07-08 23:06
mwaitxintrin.h	2.19 KB	-rw-r--r--	2025-07-08 23:06
nmmintrin.h	709 B	-rw-r--r--	2025-07-08 23:06
nvptxintrin.h	7.42 KB	-rw-r--r--	2025-07-08 23:06
omp-tools.h	50.86 KB	-rw-r--r--	2025-09-17 20:37
omp.h	23.36 KB	-rw-r--r--	2025-09-17 20:37
ompt-multiplex.h	51.11 KB	-rw-r--r--	2025-07-08 23:06
ompt.h	50.86 KB	-rw-r--r--	2025-09-17 20:37
ompx.h	7.38 KB	-rw-r--r--	2025-09-17 20:37
opencl-c-base.h	30.94 KB	-rw-r--r--	2025-07-08 23:06
opencl-c.h	874.98 KB	-rw-r--r--	2025-07-08 23:06
pconfigintrin.h	1.19 KB	-rw-r--r--	2025-07-08 23:06
pkuintrin.h	934 B	-rw-r--r--	2025-07-08 23:06
pmmintrin.h	11.12 KB	-rw-r--r--	2025-07-08 23:06
popcntintrin.h	1.9 KB	-rw-r--r--	2025-07-08 23:06
prfchiintrin.h	2.02 KB	-rw-r--r--	2025-07-08 23:06
prfchwintrin.h	2.06 KB	-rw-r--r--	2025-07-08 23:06
ptrauth.h	15.61 KB	-rw-r--r--	2025-07-08 23:06
ptwriteintrin.h	1.05 KB	-rw-r--r--	2025-07-08 23:06
raointintrin.h	6.59 KB	-rw-r--r--	2025-07-08 23:06
rdpruintrin.h	1.59 KB	-rw-r--r--	2025-07-08 23:06
rdseedintrin.h	2.85 KB	-rw-r--r--	2025-07-08 23:06
riscv_bitmanip.h	5.59 KB	-rw-r--r--	2025-07-08 23:06
riscv_corev_alu.h	3.94 KB	-rw-r--r--	2025-07-08 23:06
riscv_crypto.h	5.09 KB	-rw-r--r--	2025-07-08 23:06
riscv_ntlh.h	744 B	-rw-r--r--	2025-07-08 23:06
riscv_vector.h	17.11 KB	-rw-r--r--	2025-09-17 20:11
rtmintrin.h	1.25 KB	-rw-r--r--	2025-07-08 23:06
s390intrin.h	604 B	-rw-r--r--	2025-07-08 23:06
serializeintrin.h	881 B	-rw-r--r--	2025-07-08 23:06
sgxintrin.h	1.77 KB	-rw-r--r--	2025-07-08 23:06
sha512intrin.h	5.95 KB	-rw-r--r--	2025-07-08 23:06
shaintrin.h	7.37 KB	-rw-r--r--	2025-07-08 23:06
sifive_vector.h	6.88 KB	-rw-r--r--	2025-07-08 23:06
sm3intrin.h	7.29 KB	-rw-r--r--	2025-07-08 23:06
sm4evexintrin.h	1.17 KB	-rw-r--r--	2025-07-08 23:06
sm4intrin.h	8.2 KB	-rw-r--r--	2025-07-08 23:06
smmintrin.h	99.34 KB	-rw-r--r--	2025-07-08 23:06
stdalign.h	756 B	-rw-r--r--	2025-07-08 23:06
stdarg.h	2.44 KB	-rw-r--r--	2025-07-08 23:06
stdatomic.h	8.46 KB	-rw-r--r--	2025-07-08 23:06
stdbool.h	1.14 KB	-rw-r--r--	2025-07-08 23:06
stdckdint.h	1.63 KB	-rw-r--r--	2025-07-08 23:06
stddef.h	4.93 KB	-rw-r--r--	2025-07-08 23:06
stdint.h	30.33 KB	-rw-r--r--	2025-07-08 23:06
stdnoreturn.h	1.29 KB	-rw-r--r--	2025-07-08 23:06
tbmintrin.h	3.42 KB	-rw-r--r--	2025-07-08 23:06
tgmath.h	29.68 KB	-rw-r--r--	2025-07-08 23:06
tmmintrin.h	31.18 KB	-rw-r--r--	2025-07-08 23:06
tsxldtrkintrin.h	1.97 KB	-rw-r--r--	2025-07-08 23:06
uintrintrin.h	4.96 KB	-rw-r--r--	2025-07-08 23:06
unwind.h	11.21 KB	-rw-r--r--	2025-07-08 23:06
usermsrintrin.h	1.54 KB	-rw-r--r--	2025-07-08 23:06
vadefs.h	1.39 KB	-rw-r--r--	2025-07-08 23:06
vaesintrin.h	2.61 KB	-rw-r--r--	2025-07-08 23:06
varargs.h	584 B	-rw-r--r--	2025-07-08 23:06
vecintrin.h	419.23 KB	-rw-r--r--	2025-07-08 23:06
velintrin.h	2.1 KB	-rw-r--r--	2025-07-08 23:06
velintrin_approx.h	3.54 KB	-rw-r--r--	2025-07-08 23:06
velintrin_gen.h	69.06 KB	-rw-r--r--	2025-07-08 23:06
vpclmulqdqintrin.h	1.06 KB	-rw-r--r--	2025-07-08 23:06
waitpkgintrin.h	1.33 KB	-rw-r--r--	2025-07-08 23:06
wasm_simd128.h	81.71 KB	-rw-r--r--	2025-07-08 23:06
wbnoinvdintrin.h	749 B	-rw-r--r--	2025-07-08 23:06
wmmintrin.h	659 B	-rw-r--r--	2025-07-08 23:06
x86gprintrin.h	2.22 KB	-rw-r--r--	2025-07-08 23:06
x86intrin.h	1.38 KB	-rw-r--r--	2025-07-08 23:06
xmmintrin.h	117.43 KB	-rw-r--r--	2025-07-08 23:06
xopintrin.h	19.96 KB	-rw-r--r--	2025-07-08 23:06
xsavecintrin.h	2.51 KB	-rw-r--r--	2025-07-08 23:06
xsaveintrin.h	1.64 KB	-rw-r--r--	2025-07-08 23:06
xsaveoptintrin.h	1 KB	-rw-r--r--	2025-07-08 23:06
xsavesintrin.h	1.24 KB	-rw-r--r--	2025-07-08 23:06
xtestintrin.h	873 B	-rw-r--r--	2025-07-08 23:06
yvals_core.h	687 B	-rw-r--r--	2025-07-08 23:06

Rename

/*===--- __clang_cuda_texture_intrinsics.h - Device-side texture support ---===
 *
 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 * See https://llvm.org/LICENSE.txt for license information.
 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 *
 *===-----------------------------------------------------------------------===
 *
 * This header provides in-header implmentations for NVCC's built-in
 * __nv_tex_surf_handler() which is used by CUDA's texture-related headers.  The
 * built-in is unusual as it's actually a set of function overloads that use the
 * first string literal argument as one of the overload parameters.
 */
#ifndef __CLANG_CUDA_TEXTURE_INTRINSICS_H__
#define __CLANG_CUDA_TEXTURE_INTRINSICS_H__
#ifndef __CUDA__
#error "This file is for CUDA compilation only."
#endif

// __nv_tex_surf_handler() provided by this header as a macro.
#define __nv_tex_surf_handler(__op, __ptr, ...)                                \
  ::__cuda_tex::__tex_fetch<                                                   \
      ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash(__op)>>(__ptr,           \
                                                              __VA_ARGS__)

#pragma push_macro("__ASM_OUT")
#pragma push_macro("__ASM_OUTP")
#pragma push_macro("__Args")
#pragma push_macro("__ID")
#pragma push_macro("__IDV")
#pragma push_macro("__IMPL_2DGATHER")
#pragma push_macro("__IMPL_ALIAS")
#pragma push_macro("__IMPL_ALIASI")
#pragma push_macro("__IMPL_F1")
#pragma push_macro("__IMPL_F3")
#pragma push_macro("__IMPL_F3N")
#pragma push_macro("__IMPL_F3S")
#pragma push_macro("__IMPL_S")
#pragma push_macro("__IMPL_S3")
#pragma push_macro("__IMPL_S3I")
#pragma push_macro("__IMPL_S3N")
#pragma push_macro("__IMPL_S3NI")
#pragma push_macro("__IMPL_S3S")
#pragma push_macro("__IMPL_S3SI")
#pragma push_macro("__IMPL_SI")
#pragma push_macro("__L")
#pragma push_macro("__STRIP_PARENS")

// Put all functions into anonymous namespace so they have internal linkage.
// The device-only function here must be internal in order to avoid ODR
// violations in case they are used from the files compiled with
// -fgpu-rdc. E.g. a library and an app using it may be built with a different
// version of this header file.
namespace {

// Put the implmentation into its own namespace so we don't pollute the TU.
namespace __cuda_tex {

// First, we need a perfect hash function and a few constexpr helper functions
// for converting a string literal into a numeric value which can be used to
// parametrize a template. We can not use string literals for that as that would
// require C++20.
//
// The hash function was generated with 'gperf' and then manually converted into
// its constexpr equivalent.
//
// NOTE: the perfect hashing scheme comes with inherent self-test. If the hash
// function has a collision for any of the texture operations, the compilation
// will fail due to an attempt to redefine a tag with the same value. If the
// header compiles, then the hash function is good enough for the job.

constexpr int __tex_len(const char *s) {
  return (s[0] == 0)    ? 0
         : (s[1] == 0)  ? 1
         : (s[2] == 0)  ? 2
         : (s[3] == 0)  ? 3
         : (s[4] == 0)  ? 4
         : (s[5] == 0)  ? 5
         : (s[6] == 0)  ? 6
         : (s[7] == 0)  ? 7
         : (s[8] == 0)  ? 8
         : (s[9] == 0)  ? 9
         : (s[10] == 0) ? 10
         : (s[11] == 0) ? 11
         : (s[12] == 0) ? 12
         : (s[13] == 0) ? 13
         : (s[14] == 0) ? 14
         : (s[15] == 0) ? 15
         : (s[16] == 0) ? 16
         : (s[17] == 0) ? 17
         : (s[18] == 0) ? 18
         : (s[19] == 0) ? 19
         : (s[20] == 0) ? 20
         : (s[21] == 0) ? 21
         : (s[22] == 0) ? 22
         : (s[23] == 0) ? 23
         : (s[24] == 0) ? 24
         : (s[25] == 0) ? 25
         : (s[26] == 0) ? 26
         : (s[27] == 0) ? 27
         : (s[28] == 0) ? 28
         : (s[29] == 0) ? 29
         : (s[30] == 0) ? 30
         : (s[31] == 0) ? 31
                        : 32;
}

constexpr int __tex_hash_map(int c) {
  return (c == 49)    ? 10
         : (c == 50)  ? 0
         : (c == 51)  ? 100
         : (c == 52)  ? 30
         : (c == 67)  ? 10
         : (c == 68)  ? 0
         : (c == 69)  ? 25
         : (c == 72)  ? 70
         : (c == 77)  ? 0
         : (c == 96)  ? 44
         : (c == 99)  ? 10
         : (c == 100) ? 5
         : (c == 101) ? 60
         : (c == 102) ? 40
         : (c == 103) ? 70
         : (c == 104) ? 25
         : (c == 112) ? 0
         : (c == 114) ? 45
         : (c == 117) ? 5
         : (c == 118) ? 85
         : (c == 120) ? 20
                      : 225;
}

constexpr int __tex_op_hash(const char *str) {
  return __tex_len(str) + __tex_hash_map(str[7] + 1) + __tex_hash_map(str[6]) +
         __tex_hash_map(str[5]) + __tex_hash_map(str[__tex_len(str) - 1]);
}

// Tag type to identify particular texture operation.
template <int N> struct __Tag;
#define __ID(__op) __Tag<__tex_op_hash(__op)>
// Tags for variants of particular operation. E.g. tex2Dgather can translate
// into 4 different instructions.
#define __IDV(__op, __variant)                                                 \
  __Tag<10000 + __tex_op_hash(__op) * 100 + __variant>

// Helper classes for figuring out key data types for derived types.
// E.g. char2 has __base_t = char, __fetch_t = char4
template <class> struct __TypeInfoT;
// Type info for the fundamental types.
template <> struct __TypeInfoT<float> {
  using __base_t = float;
  using __fetch_t = float4;
};
template <> struct __TypeInfoT<char> {
  using __base_t = char;
  using __fetch_t = int4;
};
template <> struct __TypeInfoT<signed char> {
  using __base_t = signed char;
  using __fetch_t = int4;
};
template <> struct __TypeInfoT<unsigned char> {
  using __base_t = unsigned char;
  using __fetch_t = uint4;
};
template <> struct __TypeInfoT<short> {
  using __base_t = short;
  using __fetch_t = int4;
};
template <> struct __TypeInfoT<unsigned short> {
  using __base_t = unsigned short;
  using __fetch_t = uint4;
};
template <> struct __TypeInfoT<int> {
  using __base_t = int;
  using __fetch_t = int4;
};
template <> struct __TypeInfoT<unsigned int> {
  using __base_t = unsigned int;
  using __fetch_t = uint4;
};

// Derived base/fetch types for N-element vectors.
template <class __T> struct __TypeInfoT {
  using __base_t = decltype(__T::x);
  using __fetch_t = typename __TypeInfoT<__base_t>::__fetch_t;
};

// Classes that implement specific texture ops.
template <class __op> struct __tex_fetch_v4;

// Helper macros to strip parens from a macro argument.
#define __Args(...) __VA_ARGS__
#define __STRIP_PARENS(__X) __X
#define __L(__X) __STRIP_PARENS(__Args __X)

// Construct inline assembly output args.
// Results are stored in a temp var __r.
// isResident bool is pointed to by __ir
// Asm args for return values. It's a 4-element vector
#define __ASM_OUT(__t)                                                         \
  ("=" __t(__r.x), "=" __t(__r.y), "=" __t(__r.z), "=" __t(__r.w))
// .. possibly combined with a predicate.
#define __ASM_OUTP(__t) (__L(__ASM_OUT(__t)), "=h"(*__ir))

// Implements a single variant of texture fetch instruction.
#define __IMPL_F1(__rt, __dt, __args, __asm_op, __asm_outs, __asm_args)        \
  template <>                                                                  \
  __device__ __rt __run<__dt>(cudaTextureObject_t __obj, __L(__args)) {        \
    __rt __r;                                                                  \
    asm(__asm_op : __L(__asm_outs) : "l"(__obj), __L(__asm_args));             \
    return __r;                                                                \
  }

// Implements texture fetch instructions for int4/uint4/float4 data types.
#define __IMPL_F3(__args, __asm_op, __ctype, __asm_op_args, __asm_args)        \
  __IMPL_F1(int4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args,   \
            __ASM_OUT("r"), __asm_args)                                        \
  __IMPL_F1(uint4, uint4, __args, __asm_op ".u32." __ctype "\t" __asm_op_args, \
            __ASM_OUT("r"), __asm_args)                                        \
  __IMPL_F1(float4, float4, __args,                                            \
            __asm_op ".f32." __ctype "\t" __asm_op_args, __ASM_OUT("f"),       \
            __asm_args)
// Implements 'sparse' texture fetch instructions for int4/uint4/float4 data
// types. Similar to above, but returns a boolean 'isPresent' value in addition
// to texture data,
#define __IMPL_F3S(__args, __asm_op, __ctype, __asm_op_args, __asm_args)       \
  __IMPL_F1(int4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args,   \
            __ASM_OUTP("r"), __asm_args)                                       \
  __IMPL_F1(uint4, uint4, __args, __asm_op ".u32." __ctype "\t" __asm_op_args, \
            __ASM_OUTP("r"), __asm_args)                                       \
  __IMPL_F1(float4, float4, __args,                                            \
            __asm_op ".f32." __ctype "\t" __asm_op_args, __ASM_OUTP("f"),      \
            __asm_args)

// Similar to F3, but for integer data which is returned as normalized floats.
// Only instantiates fetch functions for int4/uint4.
#define __IMPL_F3N(__args, __asm_op, __ctype, __asm_op_args, __asm_args)       \
  __IMPL_F1(float4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \
            __ASM_OUT("r"), __asm_args)                                        \
  __IMPL_F1(float4, uint4, __args,                                             \
            __asm_op ".u32." __ctype "\t" __asm_op_args, __ASM_OUT("r"),       \
            __asm_args)

// Instantiates __tex_fetch_v4 with regular fetch functions.
#define __IMPL_S3I(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \
  template <> struct __tex_fetch_v4<__op> {                                    \
    template <class T>                                                         \
    __device__ static T __run(cudaTextureObject_t __obj, __L(__args));         \
    __IMPL_F3(__args, __asm_op, __ctype, __asm_op_args, __asm_args)            \
  }

// Same, but for sparse ops. Only available on sm_60+
#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600)
#define __IMPL_S3SI(__op, __args, __asm_op, __ctype, __asm_op_args,            \
                    __asm_args)                                                \
  template <> struct __tex_fetch_v4<__op> {                                    \
    template <class T>                                                         \
    __device__ static T __run(cudaTextureObject_t __obj, __L(__args));         \
    __IMPL_F3S(__args, __asm_op, __ctype, __asm_op_args, __asm_args)           \
  }
#else
#define __IMPL_S3SI(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args)
#endif

// Same, but for normalized float ops.
#define __IMPL_S3NI(__op, __args, __asm_op, __ctype, __asm_op_args,            \
                    __asm_args)                                                \
  template <> struct __tex_fetch_v4<__op> {                                    \
    template <class T>                                                         \
    __device__ static float4 __run(cudaTextureObject_t __obj, __L(__args));    \
    __IMPL_F3N(__args, __asm_op, __ctype, __asm_op_args, __asm_args)           \
  }

// Regular and normalized float ops share a lot of similarities.  This macro
// instantiates both variants -- normal for __op and normalized for __opn.
#define __IMPL_SI(__op, __opn, __args, __asm_op, __ctype, __asm_op_args,       \
                  __asm_args)                                                  \
  __IMPL_S3I(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args);      \
  __IMPL_S3NI(__opn, __args, __asm_op, __ctype, __asm_op_args, __asm_args)

// Convenience macros which converts string literal __op into a __Tag,
#define __IMPL_S3(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args)  \
  __IMPL_S3I(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args)
#define __IMPL_S3S(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \
  __IMPL_S3SI(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args)
#define __IMPL_S3N(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \
  __IMPL_S3NI(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args)
#define __IMPL_S(__op, __opn, __args, __asm_op, __ctype, __asm_op_args,        \
                 __asm_args)                                                   \
  __IMPL_SI(__ID(__op), __ID(__opn), __args, __asm_op, __ctype, __asm_op_args, \
            __asm_args)

// CUDA headers have some 'legacy' texture oprerations that duplicate
// functionality. So, we just inherit it, instead of refining a copy.
#define __IMPL_ALIASI(__op, __opn)                                             \
  template <> struct __tex_fetch_v4<__op> : __tex_fetch_v4<__opn> {}
#define __IMPL_ALIAS(__op, __opn) __IMPL_ALIASI(__ID(__op), __ID(__opn))

// Now we can instantiate everything we need for each specific texture fetch
// variant.
__IMPL_S("__tex1D_v2", "__tex1D_rmnf_v2", (float __x), "tex.1d.v4", "f32",
         "{%0, %1, %2, %3}, [%4, {%5}];", ("f"(__x)));
__IMPL_S("__tex1Dfetch_v2", "__tex1Dfetch_rmnf_v2", (int __x), "tex.1d.v4",
         "s32", "{%0, %1, %2, %3}, [%4, {%5}];", ("r"(__x)));
__IMPL_ALIAS("__itex1D", "__tex1D_v2");
__IMPL_ALIAS("__itex1Dfetch", "__tex1Dfetch_v2");

__IMPL_S("__tex1DGrad_v2", "__tex1DGrad_rmnf_v2",
         (float __x, float __dPdx, float __dPdy), "tex.grad.1d.v4", "f32",
         "{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};",
         ("f"(__x), "f"(__dPdx), "f"(__dPdy)));
__IMPL_ALIAS("__itex1DGrad", "__tex1DGrad_v2");

__IMPL_S("__tex1DLayered_v2", "__tex1DLayered_rmnf_v2",
         (float __x, int __layer), "tex.a1d.v4", "f32",
         "{%0, %1, %2, %3}, [%4, {%5, %6}];", ("r"(__layer), "f"(__x)));
__IMPL_ALIAS("__itex1DLayered", "__tex1DLayered_v2");

__IMPL_S("__tex1DLayeredGrad_v2", "__tex1DLayeredGrad_rmnf_v2",
         (float __x, int __layer, float __dPdx, float __dPdy),
         "tex.grad.a1d.v4", "f32",
         "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};",
         ("r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy)));
__IMPL_ALIAS("__itex1DLayeredGrad", "__tex1DLayeredGrad_v2");

__IMPL_S("__tex1DLayeredLod_v2", "__tex1DLayeredLod_rmnf_v2",
         (float __x, int __layer, float __level), "tex.level.a1d.v4", "f32",
         "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;",
         ("r"(__layer), "f"(__x), "f"(__level)));
__IMPL_ALIAS("__itex1DLayeredLod", "__tex1DLayeredLod_v2");

__IMPL_S("__tex1DLod_v2", "__tex1DLod_rmnf_v2", (float __x, float __level),
         "tex.level.1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5}], %6;",
         ("f"(__x), "f"(__level)));
__IMPL_ALIAS("__itex1DLod", "__tex1DLod_v2");

// 2D
__IMPL_S("__tex2D_v2", "__tex2D_rmnf_v2", (float __x, float __y), "tex.2d.v4",
         "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}];", ("f"(__x), "f"(__y)));
__IMPL_ALIAS("__itex2D", "__tex2D_v2");

__IMPL_S3S("__itex2D_sparse", (float __x, float __y, unsigned char *__ir),
           "{.reg .pred %%p0;\n\t"
           "tex.2d.v4",
           "f32",
           "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"
           " selp.u16 %4, 1, 0, %%p0; }",
           ("f"(__x), "f"(__y)));

__IMPL_S("__tex2DGrad_v2", "__tex2DGrad_rmnf_v2",
         (float __x, float __y, const float2 *__dPdx, const float2 *__dPdy),
         "tex.grad.2d.v4", "f32",
         "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};",
         ("f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x),
          "f"(__dPdy->y)));
__IMPL_ALIAS("__itex2DGrad_v2", "__tex2DGrad_v2");

__IMPL_S3S("__itex2DGrad_sparse",
           (float __x, float __y, const float2 *__dPdx, const float2 *__dPdy,
            unsigned char *__ir),
           "{.reg .pred %%p0;\n\t"
           "tex.grad.2d.v4",
           "f32",
           "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], {%8, %9}, {%10, %11};\n\t"
           "selp.u16 %4, 1, 0, %%p0; }",
           ("f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x),
            "f"(__dPdy->y)));

__IMPL_S("__tex2DLayered_v2", "__tex2DLayered_rmnf_v2",
         (float __x, float __y, int __layer), "tex.a2d.v4", "f32",
         "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];",
         ("r"(__layer), "f"(__x), "f"(__y)));
__IMPL_ALIAS("__itex2DLayered", "__tex2DLayered_v2");

__IMPL_S3S("__itex2DLayered_sparse",
           (float __x, float __y, int __layer, unsigned char *__ir),
           "{.reg .pred %%p0;\n\t"
           "tex.a2d.v4",
           "f32",
           "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
           "selp.u16 %4, 1, 0, %%p0; }",
           ("r"(__layer), "f"(__x), "f"(__y)));

__IMPL_S("__tex2DLayeredGrad_v2", "__tex2DLayeredGrad_rmnf_v2",
         (float __x, float __y, int __layer, const float2 *__dPdx,
          const float2 *__dPdy),
         "tex.grad.a2d.v4", "f32",
         "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};",
         ("r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y),
          "f"(__dPdy->x), "f"(__dPdy->y)));
__IMPL_ALIAS("__itex2DLayeredGrad_v2", "__tex2DLayeredGrad_v2");

__IMPL_S3S(
    "__itex2DLayeredGrad_sparse",
    (float __x, float __y, int __layer, const float2 *__dPdx,
     const float2 *__dPdy, unsigned char *__ir),
    "{.reg .pred %%p0;\n\t"
    "tex.grad.a2d.v4",
    "f32",
    "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], {%9, %10}, {%11, %12};\n\t"
    "selp.u16 %4, 1, 0, %%p0; }",
    ("r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y),
     "f"(__dPdy->x), "f"(__dPdy->y)));

__IMPL_S("__tex2DLayeredLod_v2", "__tex2DLayeredLod_rmnf_v2",
         (float __x, float __y, int __layer, float __level), "tex.level.a2d.v4",
         "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;",
         ("r"(__layer), "f"(__x), "f"(__y), "f"(__level)));
__IMPL_ALIAS("__itex2DLayeredLod", "__tex2DLayeredLod_v2");

__IMPL_S3S("__itex2DLayeredLod_sparse",
           (float __x, float __y, int __layer, float __level,
            unsigned char *__ir),
           "{.reg .pred %%p0;\n\t"
           "tex.level.a2d.v4",
           "f32",
           "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t"
           "selp.u16 %4, 1, 0, %%p0; }",
           ("r"(__layer), "f"(__x), "f"(__y), "f"(__level)));

__IMPL_S("__tex2DLod_v2", "__tex2DLod_rmnf_v2",
         (float __x, float __y, float __level), "tex.level.2d.v4", "f32",
         "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;",
         ("f"(__x), "f"(__y), "f"(__level)));
__IMPL_ALIAS("__itex2DLod", "__tex2DLod_v2");

__IMPL_S3S("__itex2DLod_sparse",
           (float __x, float __y, float __level, unsigned char *__ir),
           "{.reg .pred %%p0;\n\t"
           "tex.level.2d.v4",
           "f32",
           "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], %8;\n\t"
           "selp.u16 %4, 1, 0, %%p0; }",
           ("f"(__x), "f"(__y), "f"(__level)));

// 2D gather is special. Unlike other variants that translate into exactly one
// asm instruction, it uses one of the four different instructions selected by
// __comp.  We implement each instruction variant separately, and dispatch the
// right one from the manually implemented 'umbrella' fetch.
#define __IMPL_2DGATHER(variant, instr)                                        \
  __IMPL_SI(__IDV("__tex2Dgather_v2", variant),                                \
            __IDV("__tex2Dgather_rmnf_v2", variant),                           \
            (float __x, float __y, int __comp), instr, "f32",                  \
            "{%0, %1, %2, %3}, [%4, {%5, %6}];", ("f"(__x), "f"(__y)));        \
  __IMPL_ALIASI(__IDV("__itex2Dgather", variant),                              \
                __IDV("__tex2Dgather_v2", variant));                           \
  __IMPL_S3SI(__IDV("__itex2Dgather_sparse", variant),                         \
              (float __x, float __y, unsigned char *__ir, int __comp),         \
              "{.reg .pred %%p0;\n\t" instr, "f32",                            \
              "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t"                     \
              "selp.u16 %4, 1, 0, %%p0; }",                                    \
              ("f"(__x), "f"(__y)));
__IMPL_2DGATHER(0, "tld4.r.2d.v4");
__IMPL_2DGATHER(1, "tld4.g.2d.v4");
__IMPL_2DGATHER(2, "tld4.b.2d.v4");
__IMPL_2DGATHER(3, "tld4.a.2d.v4");

// Umbrella dispatcher -- calls into specific 2Dgather variant.
template <> struct __tex_fetch_v4<__ID("__tex2Dgather_v2")> {
  template <class __T>
  __device__ static __T __run(cudaTextureObject_t __obj, float __x, float __y,
                              int __comp) {
    switch (__comp) {
    case 0:
      return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 0)>::__run<__T>(
          __obj, __x, __y, __comp);
    case 1:
      return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 1)>::__run<__T>(
          __obj, __x, __y, __comp);
    case 2:
      return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 2)>::__run<__T>(
          __obj, __x, __y, __comp);
    case 3:
      return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 3)>::__run<__T>(
          __obj, __x, __y, __comp);
    }
  }
};
__IMPL_ALIAS("__itex2Dgather", "__tex2Dgather_v2");

template <> struct __tex_fetch_v4<__ID("__tex2Dgather_rmnf_v2")> {
  template <class __T>
  __device__ static float4 __run(cudaTextureObject_t __obj, float __x,
                                 float __y, int __comp) {
    switch (__comp) {
    case 0:
      return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 0)>::__run<__T>(
          __obj, __x, __y, __comp);
    case 1:
      return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 1)>::__run<__T>(
          __obj, __x, __y, __comp);
    case 2:
      return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 2)>::__run<__T>(
          __obj, __x, __y, __comp);
    case 3:
      return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 3)>::__run<__T>(
          __obj, __x, __y, __comp);
    }
  }
};

#if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600)
template <> struct __tex_fetch_v4<__ID("__itex2Dgather_sparse")> {
  template <class __T>
  __device__ static __T __run(cudaTextureObject_t __obj, float __x, float __y,
                              unsigned char *__ir, int __comp) {
    switch (__comp) {
    case 0:
      return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 0)>::__run<__T>(
          __obj, __x, __y, __ir, __comp);
    case 1:
      return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 1)>::__run<__T>(
          __obj, __x, __y, __ir, __comp);
    case 2:
      return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 2)>::__run<__T>(
          __obj, __x, __y, __ir, __comp);
    case 3:
      return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 3)>::__run<__T>(
          __obj, __x, __y, __ir, __comp);
    }
  }
};
#endif

// 3D
__IMPL_S("__tex3D_v2", "__tex3D_rmnf_v2", (float __x, float __y, float __z),
         "tex.3d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];",
         ("f"(__x), "f"(__y), "f"(__z)));
__IMPL_ALIAS("__itex3D", "__tex3D_v2");

__IMPL_S3S("__itex3D_sparse",
           (float __x, float __y, float __z, unsigned char *__ir),
           "{.reg .pred %%p0;\n\t"
           "tex.3d.v4",
           "f32",
           "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
           "selp.u16 %4, 1, 0, %%p0; }",
           ("f"(__x), "f"(__y), "f"(__z)));

__IMPL_S("__tex3DGrad_v2", "__tex3DGrad_rmnf_v2",
         (float __x, float __y, float __z, const float4 *__dPdx,
          const float4 *__dPdy),
         "tex.grad.3d.v4", "f32",
         "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
         "{%8, %9, %10, %10}, {%11, %12, %13, %13};",
         ("f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y),
          "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)));
__IMPL_ALIAS("__itex3DGrad_v2", "__tex3DGrad_v2");

__IMPL_S3S("__itex3DGrad_sparse",
           (float __x, float __y, float __z, const float4 *__dPdx,
            const float4 *__dPdy, unsigned char *__ir),
           "{.reg .pred %%p0;\n\t"
           "tex.grad.3d.v4",
           "f32",
           "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], "
           "{%9, %10, %11, %11}, {%12, %13, %14, %14};\n\t"
           "selp.u16 %4, 1, 0, %%p0; }",
           ("f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y),
            "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)));

__IMPL_S("__tex3DLod_v2", "__tex3DLod_rmnf_v2",
         (float __x, float __y, float __z, float __level), "tex.level.3d.v4",
         "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;",
         ("f"(__x), "f"(__y), "f"(__z), "f"(__level)));
__IMPL_ALIAS("__itex3DLod", "__tex3DLod_v2");

__IMPL_S3S("__itex3DLod_sparse",
           (float __x, float __y, float __z, float __level,
            unsigned char *__ir),
           "{.reg .pred %%p0;\n\t"
           "tex.level.3d.v4",
           "f32",
           "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t"
           "selp.u16 %4, 1, 0, %%p0; }",
           ("f"(__x), "f"(__y), "f"(__z), "f"(__level)));

// Cubemap
__IMPL_S("__texCubemap_v2", "__texCubemap_rmnf_v2",
         (float __x, float __y, float __z), "tex.cube.v4", "f32",
         "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];",
         ("f"(__x), "f"(__y), "f"(__z)));
__IMPL_ALIAS("__itexCubemap", "__texCubemap_v2");

__IMPL_S3S("__itexCubemap_sparse",
           (float __x, float __y, float __z, unsigned char *__ir),
           "{.reg .pred %%p0;\n\t"
           "tex.cube.v4",
           "f32",
           "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t"
           "selp.u16 %4, 1, 0, %%p0; }",
           ("f"(__x), "f"(__y), "f"(__z)));

__IMPL_S("__texCubemapGrad_v2", "__texCubemapGrad_rmnf_v2",
         (float __x, float __y, float __z, const float4 *__dPdx,
          const float4 *__dPdy),
         "tex.grad.cube.v4", "f32",
         "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], "
         "{%8, %9, %10, %10}, {%11, %12, %13, %13};",
         ("f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y),
          "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)));
__IMPL_ALIAS("__itexCubemapGrad_v2", "__texCubemapGrad_v2");

__IMPL_S("__texCubemapLayered_v2", "__texCubemapLayered_rmnf_v2",
         (float __x, float __y, float __z, int __layer), "tex.acube.v4", "f32",
         "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];",
         ("r"(__layer), "f"(__x), "f"(__y), "f"(__z)));
__IMPL_ALIAS("__itexCubemapLayered", "__texCubemapLayered_v2");

__IMPL_S("__texCubemapLayeredGrad_v2", "__texCubemapLayeredGrad_rmnf_v2",
         (float __x, float __y, float __z, int __layer, const float4 *__dPdx,
          const float4 *__dPdy),
         "tex.grad.acube.v4", "f32",
         "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], "
         "{%9, %10, %11, %11}, {%12, %13, %14, %14};",
         ("r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x),
          "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y),
          "f"(__dPdy->z)));
__IMPL_ALIAS("__itexCubemapLayeredGrad_v2", "__texCubemapLayeredGrad_v2");

__IMPL_S("__texCubemapLayeredLod_v2", "__texCubemapLayeredLod_rmnf_v2",
         (float __x, float __y, float __z, int __layer, float __level),
         "tex.level.acube.v4", "f32",
         "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;",
         ("r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level)));
__IMPL_ALIAS("__itexCubemapLayeredLod", "__texCubemapLayeredLod_v2");

__IMPL_S("__texCubemapLod_v2", "__texCubemapLod_rmnf_v2",
         (float __x, float __y, float __z, float __level), "tex.level.cube.v4",
         "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;",
         ("f"(__x), "f"(__y), "f"(__z), "f"(__level)));
__IMPL_ALIAS("__itexCubemapLod", "__texCubemapLod_v2");

// Helper class for extracting slice of data from V4 fetch results.
template <class __DestT, class __SrcT> struct __convert {
  template <int __NElements = sizeof(__DestT) /
                              sizeof(typename __TypeInfoT<__DestT>::__base_t)>
  __device__ static __DestT __run(__SrcT __v);
  template <> __device__ static __DestT __run<1>(__SrcT __v) { return {__v.x}; }
  template <> __device__ static __DestT __run<2>(__SrcT __v) {
    return {__v.x, __v.y};
  }
  template <> __device__ static __DestT __run<3>(__SrcT __v) {
    return {__v.x, __v.y, __v.z};
  }
  template <> __device__ static __DestT __run<4>(__SrcT __v) {
    return {__v.x, __v.y, __v.z, __v.w};
  }
};

// These are the top-level function overloads the __nv_tex_surf_handler expands
// to.  Each overload deals with one of the several ways __nv_tex_surf_handler
// is called by CUDA headers. In the end, each of the overloads does the same
// job -- it figures out which `__tex_fetch_v4::run` variant should be used to
// fetch texture data and which `__convert::run` is needed to convert it into
// appropriate return type.

// __nv_tex_surf_handler("__tex...", &ret, cudaTextureObject_t handle, args...);
//   Data type and return type are based on ret.
template <class __op, class __T, class... __Args>
__device__ static void __tex_fetch(__T *__ptr, cudaTextureObject_t __handle,
                                   __Args... __args) {
  using __FetchT = typename __TypeInfoT<__T>::__fetch_t;
  *__ptr = __convert<__T, __FetchT>::__run(
      __tex_fetch_v4<__op>::template __run<__FetchT>(__handle, __args...));
}

#if CUDA_VERSION < 12000
// texture<> objects get magically converted into a texture reference.  However,
// there's no way to convert them to cudaTextureObject_t on C++ level. So, we
// cheat a bit and use inline assembly to do it. It costs us an extra register
// and a move, but that is easy for ptxas to optimize away.
template <class __T>
__device__ cudaTextureObject_t __tex_handle_to_obj(__T __handle) {
  cudaTextureObject_t __obj;
  asm("mov.b64 %0, %1; " : "=l"(__obj) : "l"(__handle));
  return __obj;
}

// __nv_tex_surf_handler ("__tex...", &ret, textureReference, args...);
//   Data type and return type is based on ret.
template <class __op, class __T, class __HandleT, class... __Args>
__device__ static void __tex_fetch(__T *__ptr, __HandleT __handle,
                                   __Args... __args) {
  using __FetchT = typename __TypeInfoT<__T>::__fetch_t;
  *__ptr = __convert<__T, __FetchT>::__run(
      __tex_fetch_v4<__op>::template __run<__FetchT>(
          __tex_handle_to_obj(__handle), __args...));
}

// __nv_tex_surf_handler ("__tex...", &type_dummy, &ret, texture<...>, args...);
// cudaReadModeNormalizedFloat fetches always return float4.
template <class __op, class __DataT, class __RetT, int __TexT, class... __Args>
__device__ static void
__tex_fetch(__DataT *, __RetT *__ptr,
            texture<__DataT, __TexT, cudaReadModeNormalizedFloat> __handle,
            __Args... __args) {
  using __FetchT = typename __TypeInfoT<__DataT>::__fetch_t;
  *__ptr = __convert<__RetT, float4>::__run(
      __tex_fetch_v4<__op>::template __run<__FetchT>(
          __tex_handle_to_obj(__handle), __args...));
}

// __nv_tex_surf_handler ("__tex...", &type_dummy, &ret, texture<...>, args...);
// For cudaReadModeElementType fetch return type is based on type_dummy.
template <class __op, class __DataT, class __RetT, int __TexT, class... __Args>
__device__ static void
__tex_fetch(__DataT *, __RetT *__ptr,
            texture<__DataT, __TexT, cudaReadModeElementType> __handle,
            __Args... __args) {
  using __FetchT = typename __TypeInfoT<__DataT>::__fetch_t;
  *__ptr = __convert<__RetT, __FetchT>::__run(
      __tex_fetch_v4<__op>::template __run<__FetchT>(
          __tex_handle_to_obj(__handle), __args...));
}
#endif // CUDA_VERSION
} // namespace __cuda_tex
} // namespace
#pragma pop_macro("__ASM_OUT")
#pragma pop_macro("__ASM_OUTP")
#pragma pop_macro("__Args")
#pragma pop_macro("__ID")
#pragma pop_macro("__IDV")
#pragma pop_macro("__IMPL_2DGATHER")
#pragma pop_macro("__IMPL_ALIAS")
#pragma pop_macro("__IMPL_ALIASI")
#pragma pop_macro("__IMPL_F1")
#pragma pop_macro("__IMPL_F3")
#pragma pop_macro("__IMPL_F3N")
#pragma pop_macro("__IMPL_F3S")
#pragma pop_macro("__IMPL_S")
#pragma pop_macro("__IMPL_S3")
#pragma pop_macro("__IMPL_S3I")
#pragma pop_macro("__IMPL_S3N")
#pragma pop_macro("__IMPL_S3NI")
#pragma pop_macro("__IMPL_S3S")
#pragma pop_macro("__IMPL_S3SI")
#pragma pop_macro("__IMPL_SI")
#pragma pop_macro("__L")
#pragma pop_macro("__STRIP_PARENS")
#endif // __CLANG_CUDA_TEXTURE_INTRINSICS_H__