Linux sh52.ich-4.com 5.14.0-611.26.1.el9_7.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Jan 29 05:24:47 EST 2026 x86_64
LiteSpeed
Server IP : 198.143.147.58 & Your IP : 216.73.217.21
Domains :
Cant Read [ /etc/named.conf ]
User : actualbuzz
Terminal
Auto Root
Create File
Create Folder
Localroot Suggester
Backdoor Destroyer
Readme
/
lib /
clang /
20 /
include /
Delete
Unzip
Name
Size
Permission
Date
Action
cuda_wrappers
[ DIR ]
drwxr-xr-x
2026-02-03 15:09
fuzzer
[ DIR ]
drwxr-xr-x
2026-02-03 15:08
llvm_libc_wrappers
[ DIR ]
drwxr-xr-x
2026-02-03 15:09
llvm_offload_wrappers
[ DIR ]
drwxr-xr-x
2026-02-03 15:09
openmp_wrappers
[ DIR ]
drwxr-xr-x
2026-02-03 15:09
orc
[ DIR ]
drwxr-xr-x
2026-02-03 15:08
ppc_wrappers
[ DIR ]
drwxr-xr-x
2026-02-03 15:09
profile
[ DIR ]
drwxr-xr-x
2026-02-03 15:08
sanitizer
[ DIR ]
drwxr-xr-x
2026-02-03 15:08
xray
[ DIR ]
drwxr-xr-x
2026-02-03 15:08
zos_wrappers
[ DIR ]
drwxr-xr-x
2026-02-03 15:09
__clang_cuda_builtin_vars.h
4.78
KB
-rw-r--r--
2025-07-08 23:06
__clang_cuda_cmath.h
18.06
KB
-rw-r--r--
2025-07-08 23:06
__clang_cuda_complex_builtins.h
9.36
KB
-rw-r--r--
2025-07-08 23:06
__clang_cuda_device_functions.h
57.36
KB
-rw-r--r--
2025-07-08 23:06
__clang_cuda_intrinsics.h
29.87
KB
-rw-r--r--
2025-07-08 23:06
__clang_cuda_libdevice_declares.h
21.89
KB
-rw-r--r--
2025-07-08 23:06
__clang_cuda_math.h
16.21
KB
-rw-r--r--
2025-07-08 23:06
__clang_cuda_math_forward_declares.h
8.27
KB
-rw-r--r--
2025-07-08 23:06
__clang_cuda_runtime_wrapper.h
17.6
KB
-rw-r--r--
2025-07-08 23:06
__clang_cuda_texture_intrinsics.h
31.86
KB
-rw-r--r--
2025-07-08 23:06
__clang_hip_cmath.h
26.48
KB
-rw-r--r--
2025-07-08 23:06
__clang_hip_libdevice_declares.h
19.87
KB
-rw-r--r--
2025-07-08 23:06
__clang_hip_math.h
32.78
KB
-rw-r--r--
2025-07-08 23:06
__clang_hip_runtime_wrapper.h
4.94
KB
-rw-r--r--
2025-07-08 23:06
__clang_hip_stdlib.h
1.19
KB
-rw-r--r--
2025-07-08 23:06
__stdarg___gnuc_va_list.h
467
B
-rw-r--r--
2025-07-08 23:06
__stdarg___va_copy.h
445
B
-rw-r--r--
2025-07-08 23:06
__stdarg_header_macro.h
417
B
-rw-r--r--
2025-07-08 23:06
__stdarg_va_arg.h
793
B
-rw-r--r--
2025-07-08 23:06
__stdarg_va_copy.h
451
B
-rw-r--r--
2025-07-08 23:06
__stdarg_va_list.h
448
B
-rw-r--r--
2025-07-08 23:06
__stddef_header_macro.h
417
B
-rw-r--r--
2025-07-08 23:06
__stddef_max_align_t.h
857
B
-rw-r--r--
2025-07-08 23:06
__stddef_null.h
875
B
-rw-r--r--
2025-07-08 23:06
__stddef_nullptr_t.h
958
B
-rw-r--r--
2025-07-08 23:06
__stddef_offsetof.h
708
B
-rw-r--r--
2025-07-08 23:06
__stddef_ptrdiff_t.h
717
B
-rw-r--r--
2025-07-08 23:06
__stddef_rsize_t.h
710
B
-rw-r--r--
2025-07-08 23:06
__stddef_size_t.h
708
B
-rw-r--r--
2025-07-08 23:06
__stddef_unreachable.h
735
B
-rw-r--r--
2025-07-08 23:06
__stddef_wchar_t.h
853
B
-rw-r--r--
2025-07-08 23:06
__stddef_wint_t.h
443
B
-rw-r--r--
2025-07-08 23:06
__wmmintrin_aes.h
5.15
KB
-rw-r--r--
2025-07-08 23:06
__wmmintrin_pclmul.h
1.99
KB
-rw-r--r--
2025-07-08 23:06
adcintrin.h
5.43
KB
-rw-r--r--
2025-07-08 23:06
adxintrin.h
3.37
KB
-rw-r--r--
2025-07-08 23:06
altivec.h
698.45
KB
-rw-r--r--
2025-07-08 23:06
amdgpuintrin.h
7.4
KB
-rw-r--r--
2025-07-08 23:06
ammintrin.h
7.56
KB
-rw-r--r--
2025-07-08 23:06
amxavx512intrin.h
12.67
KB
-rw-r--r--
2025-07-08 23:06
amxbf16transposeintrin.h
3.44
KB
-rw-r--r--
2025-07-08 23:06
amxcomplexintrin.h
6.81
KB
-rw-r--r--
2025-07-08 23:06
amxcomplextransposeintrin.h
11.78
KB
-rw-r--r--
2025-07-08 23:06
amxfp16intrin.h
3.25
KB
-rw-r--r--
2025-07-08 23:06
amxfp16transposeintrin.h
3.44
KB
-rw-r--r--
2025-07-08 23:06
amxfp8intrin.h
8.63
KB
-rw-r--r--
2025-07-08 23:06
amxintrin.h
19.83
KB
-rw-r--r--
2025-07-08 23:06
amxmovrsintrin.h
2.24
KB
-rw-r--r--
2025-07-08 23:06
amxmovrstransposeintrin.h
9.18
KB
-rw-r--r--
2025-07-08 23:06
amxtf32intrin.h
3.48
KB
-rw-r--r--
2025-07-08 23:06
amxtf32transposeintrin.h
3.56
KB
-rw-r--r--
2025-07-08 23:06
amxtransposeintrin.h
10.6
KB
-rw-r--r--
2025-07-08 23:06
arm64intr.h
993
B
-rw-r--r--
2025-07-08 23:06
arm_acle.h
29.97
KB
-rw-r--r--
2025-07-08 23:06
arm_bf16.h
548
B
-rw-r--r--
2025-09-17 20:10
arm_cde.h
32.67
KB
-rw-r--r--
2025-09-17 20:10
arm_cmse.h
6.21
KB
-rw-r--r--
2025-07-08 23:06
arm_fp16.h
16.94
KB
-rw-r--r--
2025-09-17 20:10
arm_mve.h
1.48
MB
-rw-r--r--
2025-09-17 20:10
arm_neon.h
2.66
MB
-rw-r--r--
2025-09-17 20:10
arm_neon_sve_bridge.h
9.48
KB
-rw-r--r--
2025-07-08 23:06
arm_sme.h
223.36
KB
-rw-r--r--
2025-09-17 20:10
arm_sve.h
2.03
MB
-rw-r--r--
2025-09-17 20:10
arm_vector_types.h
9.86
KB
-rw-r--r--
2025-09-17 20:10
armintr.h
843
B
-rw-r--r--
2025-07-08 23:06
avx10_2_512bf16intrin.h
22.87
KB
-rw-r--r--
2025-07-08 23:06
avx10_2_512convertintrin.h
12.88
KB
-rw-r--r--
2025-07-08 23:06
avx10_2_512minmaxintrin.h
7.35
KB
-rw-r--r--
2025-07-08 23:06
avx10_2_512niintrin.h
13.71
KB
-rw-r--r--
2025-07-08 23:06
avx10_2_512satcvtdsintrin.h
13.48
KB
-rw-r--r--
2025-07-08 23:06
avx10_2_512satcvtintrin.h
16.6
KB
-rw-r--r--
2025-07-08 23:06
avx10_2bf16intrin.h
43.63
KB
-rw-r--r--
2025-07-08 23:06
avx10_2convertintrin.h
22.75
KB
-rw-r--r--
2025-07-08 23:06
avx10_2copyintrin.h
2.35
KB
-rw-r--r--
2025-07-08 23:06
avx10_2minmaxintrin.h
13.53
KB
-rw-r--r--
2025-07-08 23:06
avx10_2niintrin.h
16.6
KB
-rw-r--r--
2025-07-08 23:06
avx10_2satcvtdsintrin.h
15.36
KB
-rw-r--r--
2025-07-08 23:06
avx10_2satcvtintrin.h
18.87
KB
-rw-r--r--
2025-07-08 23:06
avx2intrin.h
188.12
KB
-rw-r--r--
2025-07-08 23:06
avx512bf16intrin.h
10.58
KB
-rw-r--r--
2025-07-08 23:06
avx512bitalgintrin.h
2.56
KB
-rw-r--r--
2025-07-08 23:06
avx512bwintrin.h
76.02
KB
-rw-r--r--
2025-07-08 23:06
avx512cdintrin.h
4.23
KB
-rw-r--r--
2025-07-08 23:06
avx512dqintrin.h
58.87
KB
-rw-r--r--
2025-07-08 23:06
avx512fintrin.h
383.45
KB
-rw-r--r--
2025-07-08 23:06
avx512fp16intrin.h
156.92
KB
-rw-r--r--
2025-07-08 23:06
avx512ifmaintrin.h
2.6
KB
-rw-r--r--
2025-07-08 23:06
avx512ifmavlintrin.h
4.6
KB
-rw-r--r--
2025-07-08 23:06
avx512vbmi2intrin.h
13.18
KB
-rw-r--r--
2025-07-08 23:06
avx512vbmiintrin.h
3.83
KB
-rw-r--r--
2025-07-08 23:06
avx512vbmivlintrin.h
7.23
KB
-rw-r--r--
2025-07-08 23:06
avx512vlbf16intrin.h
19.46
KB
-rw-r--r--
2025-07-08 23:06
avx512vlbitalgintrin.h
4.52
KB
-rw-r--r--
2025-07-08 23:06
avx512vlbwintrin.h
121.56
KB
-rw-r--r--
2025-07-08 23:06
avx512vlcdintrin.h
7.95
KB
-rw-r--r--
2025-07-08 23:06
avx512vldqintrin.h
46.71
KB
-rw-r--r--
2025-07-08 23:06
avx512vlfp16intrin.h
85.51
KB
-rw-r--r--
2025-07-08 23:06
avx512vlintrin.h
322.6
KB
-rw-r--r--
2025-07-08 23:06
avx512vlvbmi2intrin.h
26.01
KB
-rw-r--r--
2025-07-08 23:06
avx512vlvnniintrin.h
13.41
KB
-rw-r--r--
2025-07-08 23:06
avx512vlvp2intersectintrin.h
4.67
KB
-rw-r--r--
2025-07-08 23:06
avx512vnniintrin.h
4.32
KB
-rw-r--r--
2025-07-08 23:06
avx512vp2intersectintrin.h
3.03
KB
-rw-r--r--
2025-07-08 23:06
avx512vpopcntdqintrin.h
2.3
KB
-rw-r--r--
2025-07-08 23:06
avx512vpopcntdqvlintrin.h
3.86
KB
-rw-r--r--
2025-07-08 23:06
avxifmaintrin.h
5.75
KB
-rw-r--r--
2025-07-08 23:06
avxintrin.h
197.12
KB
-rw-r--r--
2025-07-08 23:06
avxneconvertintrin.h
14.09
KB
-rw-r--r--
2025-07-08 23:06
avxvnniint16intrin.h
15.84
KB
-rw-r--r--
2025-07-08 23:06
avxvnniint8intrin.h
17.02
KB
-rw-r--r--
2025-07-08 23:06
avxvnniintrin.h
10.44
KB
-rw-r--r--
2025-07-08 23:06
bmi2intrin.h
7.37
KB
-rw-r--r--
2025-07-08 23:06
bmiintrin.h
20.02
KB
-rw-r--r--
2025-07-08 23:06
builtins.h
846
B
-rw-r--r--
2025-07-08 23:06
cet.h
1.49
KB
-rw-r--r--
2025-07-08 23:06
cetintrin.h
3.27
KB
-rw-r--r--
2025-07-08 23:06
cldemoteintrin.h
1.18
KB
-rw-r--r--
2025-07-08 23:06
clflushoptintrin.h
1.17
KB
-rw-r--r--
2025-07-08 23:06
clwbintrin.h
1.2
KB
-rw-r--r--
2025-07-08 23:06
clzerointrin.h
1.19
KB
-rw-r--r--
2025-07-08 23:06
cmpccxaddintrin.h
2.33
KB
-rw-r--r--
2025-07-08 23:06
cpuid.h
11.9
KB
-rw-r--r--
2025-07-08 23:06
crc32intrin.h
3.27
KB
-rw-r--r--
2025-07-08 23:06
emmintrin.h
196.89
KB
-rw-r--r--
2025-07-08 23:06
enqcmdintrin.h
2.12
KB
-rw-r--r--
2025-07-08 23:06
f16cintrin.h
5.39
KB
-rw-r--r--
2025-07-08 23:06
float.h
6.43
KB
-rw-r--r--
2025-07-08 23:06
fma4intrin.h
6.82
KB
-rw-r--r--
2025-07-08 23:06
fmaintrin.h
28.65
KB
-rw-r--r--
2025-07-08 23:06
fxsrintrin.h
2.82
KB
-rw-r--r--
2025-07-08 23:06
gfniintrin.h
9.59
KB
-rw-r--r--
2025-07-08 23:06
gpuintrin.h
9.07
KB
-rw-r--r--
2025-07-08 23:06
hexagon_circ_brev_intrinsics.h
15.59
KB
-rw-r--r--
2025-07-08 23:06
hexagon_protos.h
374.42
KB
-rw-r--r--
2025-07-08 23:06
hexagon_types.h
130.38
KB
-rw-r--r--
2025-07-08 23:06
hresetintrin.h
1.36
KB
-rw-r--r--
2025-07-08 23:06
htmintrin.h
6.14
KB
-rw-r--r--
2025-07-08 23:06
htmxlintrin.h
9.01
KB
-rw-r--r--
2025-07-08 23:06
hvx_hexagon_protos.h
275.18
KB
-rw-r--r--
2025-07-08 23:06
ia32intrin.h
25.38
KB
-rw-r--r--
2025-07-08 23:06
immintrin.h
25.46
KB
-rw-r--r--
2025-07-08 23:06
intrin.h
18.16
KB
-rw-r--r--
2025-07-08 23:06
intrin0.h
13.76
KB
-rw-r--r--
2025-07-08 23:06
inttypes.h
2.37
KB
-rw-r--r--
2025-07-08 23:06
invpcidintrin.h
764
B
-rw-r--r--
2025-07-08 23:06
iso646.h
763
B
-rw-r--r--
2025-07-08 23:06
keylockerintrin.h
17.85
KB
-rw-r--r--
2025-07-08 23:06
larchintrin.h
8.5
KB
-rw-r--r--
2025-07-08 23:06
lasxintrin.h
142.14
KB
-rw-r--r--
2025-07-08 23:06
limits.h
3.71
KB
-rw-r--r--
2025-07-08 23:06
lsxintrin.h
134.7
KB
-rw-r--r--
2025-07-08 23:06
lwpintrin.h
5
KB
-rw-r--r--
2025-07-08 23:06
lzcntintrin.h
3.45
KB
-rw-r--r--
2025-07-08 23:06
mm3dnow.h
729
B
-rw-r--r--
2025-07-08 23:06
mm_malloc.h
1.88
KB
-rw-r--r--
2025-07-08 23:06
mmintrin.h
58.6
KB
-rw-r--r--
2025-07-08 23:06
module.modulemap
6.47
KB
-rw-r--r--
2025-07-08 23:06
movdirintrin.h
1.57
KB
-rw-r--r--
2025-07-08 23:06
movrs_avx10_2_512intrin.h
3.88
KB
-rw-r--r--
2025-07-08 23:06
movrs_avx10_2intrin.h
6.87
KB
-rw-r--r--
2025-07-08 23:06
movrsintrin.h
1.98
KB
-rw-r--r--
2025-07-08 23:06
msa.h
25.01
KB
-rw-r--r--
2025-07-08 23:06
mwaitxintrin.h
2.19
KB
-rw-r--r--
2025-07-08 23:06
nmmintrin.h
709
B
-rw-r--r--
2025-07-08 23:06
nvptxintrin.h
7.42
KB
-rw-r--r--
2025-07-08 23:06
omp-tools.h
50.86
KB
-rw-r--r--
2025-09-17 20:37
omp.h
23.36
KB
-rw-r--r--
2025-09-17 20:37
ompt-multiplex.h
51.11
KB
-rw-r--r--
2025-07-08 23:06
ompt.h
50.86
KB
-rw-r--r--
2025-09-17 20:37
ompx.h
7.38
KB
-rw-r--r--
2025-09-17 20:37
opencl-c-base.h
30.94
KB
-rw-r--r--
2025-07-08 23:06
opencl-c.h
874.98
KB
-rw-r--r--
2025-07-08 23:06
pconfigintrin.h
1.19
KB
-rw-r--r--
2025-07-08 23:06
pkuintrin.h
934
B
-rw-r--r--
2025-07-08 23:06
pmmintrin.h
11.12
KB
-rw-r--r--
2025-07-08 23:06
popcntintrin.h
1.9
KB
-rw-r--r--
2025-07-08 23:06
prfchiintrin.h
2.02
KB
-rw-r--r--
2025-07-08 23:06
prfchwintrin.h
2.06
KB
-rw-r--r--
2025-07-08 23:06
ptrauth.h
15.61
KB
-rw-r--r--
2025-07-08 23:06
ptwriteintrin.h
1.05
KB
-rw-r--r--
2025-07-08 23:06
raointintrin.h
6.59
KB
-rw-r--r--
2025-07-08 23:06
rdpruintrin.h
1.59
KB
-rw-r--r--
2025-07-08 23:06
rdseedintrin.h
2.85
KB
-rw-r--r--
2025-07-08 23:06
riscv_bitmanip.h
5.59
KB
-rw-r--r--
2025-07-08 23:06
riscv_corev_alu.h
3.94
KB
-rw-r--r--
2025-07-08 23:06
riscv_crypto.h
5.09
KB
-rw-r--r--
2025-07-08 23:06
riscv_ntlh.h
744
B
-rw-r--r--
2025-07-08 23:06
riscv_vector.h
17.11
KB
-rw-r--r--
2025-09-17 20:11
rtmintrin.h
1.25
KB
-rw-r--r--
2025-07-08 23:06
s390intrin.h
604
B
-rw-r--r--
2025-07-08 23:06
serializeintrin.h
881
B
-rw-r--r--
2025-07-08 23:06
sgxintrin.h
1.77
KB
-rw-r--r--
2025-07-08 23:06
sha512intrin.h
5.95
KB
-rw-r--r--
2025-07-08 23:06
shaintrin.h
7.37
KB
-rw-r--r--
2025-07-08 23:06
sifive_vector.h
6.88
KB
-rw-r--r--
2025-07-08 23:06
sm3intrin.h
7.29
KB
-rw-r--r--
2025-07-08 23:06
sm4evexintrin.h
1.17
KB
-rw-r--r--
2025-07-08 23:06
sm4intrin.h
8.2
KB
-rw-r--r--
2025-07-08 23:06
smmintrin.h
99.34
KB
-rw-r--r--
2025-07-08 23:06
stdalign.h
756
B
-rw-r--r--
2025-07-08 23:06
stdarg.h
2.44
KB
-rw-r--r--
2025-07-08 23:06
stdatomic.h
8.46
KB
-rw-r--r--
2025-07-08 23:06
stdbool.h
1.14
KB
-rw-r--r--
2025-07-08 23:06
stdckdint.h
1.63
KB
-rw-r--r--
2025-07-08 23:06
stddef.h
4.93
KB
-rw-r--r--
2025-07-08 23:06
stdint.h
30.33
KB
-rw-r--r--
2025-07-08 23:06
stdnoreturn.h
1.29
KB
-rw-r--r--
2025-07-08 23:06
tbmintrin.h
3.42
KB
-rw-r--r--
2025-07-08 23:06
tgmath.h
29.68
KB
-rw-r--r--
2025-07-08 23:06
tmmintrin.h
31.18
KB
-rw-r--r--
2025-07-08 23:06
tsxldtrkintrin.h
1.97
KB
-rw-r--r--
2025-07-08 23:06
uintrintrin.h
4.96
KB
-rw-r--r--
2025-07-08 23:06
unwind.h
11.21
KB
-rw-r--r--
2025-07-08 23:06
usermsrintrin.h
1.54
KB
-rw-r--r--
2025-07-08 23:06
vadefs.h
1.39
KB
-rw-r--r--
2025-07-08 23:06
vaesintrin.h
2.61
KB
-rw-r--r--
2025-07-08 23:06
varargs.h
584
B
-rw-r--r--
2025-07-08 23:06
vecintrin.h
419.23
KB
-rw-r--r--
2025-07-08 23:06
velintrin.h
2.1
KB
-rw-r--r--
2025-07-08 23:06
velintrin_approx.h
3.54
KB
-rw-r--r--
2025-07-08 23:06
velintrin_gen.h
69.06
KB
-rw-r--r--
2025-07-08 23:06
vpclmulqdqintrin.h
1.06
KB
-rw-r--r--
2025-07-08 23:06
waitpkgintrin.h
1.33
KB
-rw-r--r--
2025-07-08 23:06
wasm_simd128.h
81.71
KB
-rw-r--r--
2025-07-08 23:06
wbnoinvdintrin.h
749
B
-rw-r--r--
2025-07-08 23:06
wmmintrin.h
659
B
-rw-r--r--
2025-07-08 23:06
x86gprintrin.h
2.22
KB
-rw-r--r--
2025-07-08 23:06
x86intrin.h
1.38
KB
-rw-r--r--
2025-07-08 23:06
xmmintrin.h
117.43
KB
-rw-r--r--
2025-07-08 23:06
xopintrin.h
19.96
KB
-rw-r--r--
2025-07-08 23:06
xsavecintrin.h
2.51
KB
-rw-r--r--
2025-07-08 23:06
xsaveintrin.h
1.64
KB
-rw-r--r--
2025-07-08 23:06
xsaveoptintrin.h
1
KB
-rw-r--r--
2025-07-08 23:06
xsavesintrin.h
1.24
KB
-rw-r--r--
2025-07-08 23:06
xtestintrin.h
873
B
-rw-r--r--
2025-07-08 23:06
yvals_core.h
687
B
-rw-r--r--
2025-07-08 23:06
Save
Rename
/*===--- __clang_cuda_texture_intrinsics.h - Device-side texture support ---=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== * * This header provides in-header implmentations for NVCC's built-in * __nv_tex_surf_handler() which is used by CUDA's texture-related headers. The * built-in is unusual as it's actually a set of function overloads that use the * first string literal argument as one of the overload parameters. */ #ifndef __CLANG_CUDA_TEXTURE_INTRINSICS_H__ #define __CLANG_CUDA_TEXTURE_INTRINSICS_H__ #ifndef __CUDA__ #error "This file is for CUDA compilation only." #endif // __nv_tex_surf_handler() provided by this header as a macro. #define __nv_tex_surf_handler(__op, __ptr, ...) \ ::__cuda_tex::__tex_fetch< \ ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash(__op)>>(__ptr, \ __VA_ARGS__) #pragma push_macro("__ASM_OUT") #pragma push_macro("__ASM_OUTP") #pragma push_macro("__Args") #pragma push_macro("__ID") #pragma push_macro("__IDV") #pragma push_macro("__IMPL_2DGATHER") #pragma push_macro("__IMPL_ALIAS") #pragma push_macro("__IMPL_ALIASI") #pragma push_macro("__IMPL_F1") #pragma push_macro("__IMPL_F3") #pragma push_macro("__IMPL_F3N") #pragma push_macro("__IMPL_F3S") #pragma push_macro("__IMPL_S") #pragma push_macro("__IMPL_S3") #pragma push_macro("__IMPL_S3I") #pragma push_macro("__IMPL_S3N") #pragma push_macro("__IMPL_S3NI") #pragma push_macro("__IMPL_S3S") #pragma push_macro("__IMPL_S3SI") #pragma push_macro("__IMPL_SI") #pragma push_macro("__L") #pragma push_macro("__STRIP_PARENS") // Put all functions into anonymous namespace so they have internal linkage. // The device-only function here must be internal in order to avoid ODR // violations in case they are used from the files compiled with // -fgpu-rdc. E.g. a library and an app using it may be built with a different // version of this header file. namespace { // Put the implmentation into its own namespace so we don't pollute the TU. namespace __cuda_tex { // First, we need a perfect hash function and a few constexpr helper functions // for converting a string literal into a numeric value which can be used to // parametrize a template. We can not use string literals for that as that would // require C++20. // // The hash function was generated with 'gperf' and then manually converted into // its constexpr equivalent. // // NOTE: the perfect hashing scheme comes with inherent self-test. If the hash // function has a collision for any of the texture operations, the compilation // will fail due to an attempt to redefine a tag with the same value. If the // header compiles, then the hash function is good enough for the job. constexpr int __tex_len(const char *s) { return (s[0] == 0) ? 0 : (s[1] == 0) ? 1 : (s[2] == 0) ? 2 : (s[3] == 0) ? 3 : (s[4] == 0) ? 4 : (s[5] == 0) ? 5 : (s[6] == 0) ? 6 : (s[7] == 0) ? 7 : (s[8] == 0) ? 8 : (s[9] == 0) ? 9 : (s[10] == 0) ? 10 : (s[11] == 0) ? 11 : (s[12] == 0) ? 12 : (s[13] == 0) ? 13 : (s[14] == 0) ? 14 : (s[15] == 0) ? 15 : (s[16] == 0) ? 16 : (s[17] == 0) ? 17 : (s[18] == 0) ? 18 : (s[19] == 0) ? 19 : (s[20] == 0) ? 20 : (s[21] == 0) ? 21 : (s[22] == 0) ? 22 : (s[23] == 0) ? 23 : (s[24] == 0) ? 24 : (s[25] == 0) ? 25 : (s[26] == 0) ? 26 : (s[27] == 0) ? 27 : (s[28] == 0) ? 28 : (s[29] == 0) ? 29 : (s[30] == 0) ? 30 : (s[31] == 0) ? 31 : 32; } constexpr int __tex_hash_map(int c) { return (c == 49) ? 10 : (c == 50) ? 0 : (c == 51) ? 100 : (c == 52) ? 30 : (c == 67) ? 10 : (c == 68) ? 0 : (c == 69) ? 25 : (c == 72) ? 70 : (c == 77) ? 0 : (c == 96) ? 44 : (c == 99) ? 10 : (c == 100) ? 5 : (c == 101) ? 60 : (c == 102) ? 40 : (c == 103) ? 70 : (c == 104) ? 25 : (c == 112) ? 0 : (c == 114) ? 45 : (c == 117) ? 5 : (c == 118) ? 85 : (c == 120) ? 20 : 225; } constexpr int __tex_op_hash(const char *str) { return __tex_len(str) + __tex_hash_map(str[7] + 1) + __tex_hash_map(str[6]) + __tex_hash_map(str[5]) + __tex_hash_map(str[__tex_len(str) - 1]); } // Tag type to identify particular texture operation. template <int N> struct __Tag; #define __ID(__op) __Tag<__tex_op_hash(__op)> // Tags for variants of particular operation. E.g. tex2Dgather can translate // into 4 different instructions. #define __IDV(__op, __variant) \ __Tag<10000 + __tex_op_hash(__op) * 100 + __variant> // Helper classes for figuring out key data types for derived types. // E.g. char2 has __base_t = char, __fetch_t = char4 template <class> struct __TypeInfoT; // Type info for the fundamental types. template <> struct __TypeInfoT<float> { using __base_t = float; using __fetch_t = float4; }; template <> struct __TypeInfoT<char> { using __base_t = char; using __fetch_t = int4; }; template <> struct __TypeInfoT<signed char> { using __base_t = signed char; using __fetch_t = int4; }; template <> struct __TypeInfoT<unsigned char> { using __base_t = unsigned char; using __fetch_t = uint4; }; template <> struct __TypeInfoT<short> { using __base_t = short; using __fetch_t = int4; }; template <> struct __TypeInfoT<unsigned short> { using __base_t = unsigned short; using __fetch_t = uint4; }; template <> struct __TypeInfoT<int> { using __base_t = int; using __fetch_t = int4; }; template <> struct __TypeInfoT<unsigned int> { using __base_t = unsigned int; using __fetch_t = uint4; }; // Derived base/fetch types for N-element vectors. template <class __T> struct __TypeInfoT { using __base_t = decltype(__T::x); using __fetch_t = typename __TypeInfoT<__base_t>::__fetch_t; }; // Classes that implement specific texture ops. template <class __op> struct __tex_fetch_v4; // Helper macros to strip parens from a macro argument. #define __Args(...) __VA_ARGS__ #define __STRIP_PARENS(__X) __X #define __L(__X) __STRIP_PARENS(__Args __X) // Construct inline assembly output args. // Results are stored in a temp var __r. // isResident bool is pointed to by __ir // Asm args for return values. It's a 4-element vector #define __ASM_OUT(__t) \ ("=" __t(__r.x), "=" __t(__r.y), "=" __t(__r.z), "=" __t(__r.w)) // .. possibly combined with a predicate. #define __ASM_OUTP(__t) (__L(__ASM_OUT(__t)), "=h"(*__ir)) // Implements a single variant of texture fetch instruction. #define __IMPL_F1(__rt, __dt, __args, __asm_op, __asm_outs, __asm_args) \ template <> \ __device__ __rt __run<__dt>(cudaTextureObject_t __obj, __L(__args)) { \ __rt __r; \ asm(__asm_op : __L(__asm_outs) : "l"(__obj), __L(__asm_args)); \ return __r; \ } // Implements texture fetch instructions for int4/uint4/float4 data types. #define __IMPL_F3(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_F1(int4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \ __ASM_OUT("r"), __asm_args) \ __IMPL_F1(uint4, uint4, __args, __asm_op ".u32." __ctype "\t" __asm_op_args, \ __ASM_OUT("r"), __asm_args) \ __IMPL_F1(float4, float4, __args, \ __asm_op ".f32." __ctype "\t" __asm_op_args, __ASM_OUT("f"), \ __asm_args) // Implements 'sparse' texture fetch instructions for int4/uint4/float4 data // types. Similar to above, but returns a boolean 'isPresent' value in addition // to texture data, #define __IMPL_F3S(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_F1(int4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \ __ASM_OUTP("r"), __asm_args) \ __IMPL_F1(uint4, uint4, __args, __asm_op ".u32." __ctype "\t" __asm_op_args, \ __ASM_OUTP("r"), __asm_args) \ __IMPL_F1(float4, float4, __args, \ __asm_op ".f32." __ctype "\t" __asm_op_args, __ASM_OUTP("f"), \ __asm_args) // Similar to F3, but for integer data which is returned as normalized floats. // Only instantiates fetch functions for int4/uint4. #define __IMPL_F3N(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_F1(float4, int4, __args, __asm_op ".s32." __ctype "\t" __asm_op_args, \ __ASM_OUT("r"), __asm_args) \ __IMPL_F1(float4, uint4, __args, \ __asm_op ".u32." __ctype "\t" __asm_op_args, __ASM_OUT("r"), \ __asm_args) // Instantiates __tex_fetch_v4 with regular fetch functions. #define __IMPL_S3I(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \ template <> struct __tex_fetch_v4<__op> { \ template <class T> \ __device__ static T __run(cudaTextureObject_t __obj, __L(__args)); \ __IMPL_F3(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ } // Same, but for sparse ops. Only available on sm_60+ #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600) #define __IMPL_S3SI(__op, __args, __asm_op, __ctype, __asm_op_args, \ __asm_args) \ template <> struct __tex_fetch_v4<__op> { \ template <class T> \ __device__ static T __run(cudaTextureObject_t __obj, __L(__args)); \ __IMPL_F3S(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ } #else #define __IMPL_S3SI(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) #endif // Same, but for normalized float ops. #define __IMPL_S3NI(__op, __args, __asm_op, __ctype, __asm_op_args, \ __asm_args) \ template <> struct __tex_fetch_v4<__op> { \ template <class T> \ __device__ static float4 __run(cudaTextureObject_t __obj, __L(__args)); \ __IMPL_F3N(__args, __asm_op, __ctype, __asm_op_args, __asm_args) \ } // Regular and normalized float ops share a lot of similarities. This macro // instantiates both variants -- normal for __op and normalized for __opn. #define __IMPL_SI(__op, __opn, __args, __asm_op, __ctype, __asm_op_args, \ __asm_args) \ __IMPL_S3I(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args); \ __IMPL_S3NI(__opn, __args, __asm_op, __ctype, __asm_op_args, __asm_args) // Convenience macros which converts string literal __op into a __Tag, #define __IMPL_S3(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_S3I(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args) #define __IMPL_S3S(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_S3SI(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args) #define __IMPL_S3N(__op, __args, __asm_op, __ctype, __asm_op_args, __asm_args) \ __IMPL_S3NI(__ID(__op), __args, __asm_op, __ctype, __asm_op_args, __asm_args) #define __IMPL_S(__op, __opn, __args, __asm_op, __ctype, __asm_op_args, \ __asm_args) \ __IMPL_SI(__ID(__op), __ID(__opn), __args, __asm_op, __ctype, __asm_op_args, \ __asm_args) // CUDA headers have some 'legacy' texture oprerations that duplicate // functionality. So, we just inherit it, instead of refining a copy. #define __IMPL_ALIASI(__op, __opn) \ template <> struct __tex_fetch_v4<__op> : __tex_fetch_v4<__opn> {} #define __IMPL_ALIAS(__op, __opn) __IMPL_ALIASI(__ID(__op), __ID(__opn)) // Now we can instantiate everything we need for each specific texture fetch // variant. __IMPL_S("__tex1D_v2", "__tex1D_rmnf_v2", (float __x), "tex.1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5}];", ("f"(__x))); __IMPL_S("__tex1Dfetch_v2", "__tex1Dfetch_rmnf_v2", (int __x), "tex.1d.v4", "s32", "{%0, %1, %2, %3}, [%4, {%5}];", ("r"(__x))); __IMPL_ALIAS("__itex1D", "__tex1D_v2"); __IMPL_ALIAS("__itex1Dfetch", "__tex1Dfetch_v2"); __IMPL_S("__tex1DGrad_v2", "__tex1DGrad_rmnf_v2", (float __x, float __dPdx, float __dPdy), "tex.grad.1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};", ("f"(__x), "f"(__dPdx), "f"(__dPdy))); __IMPL_ALIAS("__itex1DGrad", "__tex1DGrad_v2"); __IMPL_S("__tex1DLayered_v2", "__tex1DLayered_rmnf_v2", (float __x, int __layer), "tex.a1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}];", ("r"(__layer), "f"(__x))); __IMPL_ALIAS("__itex1DLayered", "__tex1DLayered_v2"); __IMPL_S("__tex1DLayeredGrad_v2", "__tex1DLayeredGrad_rmnf_v2", (float __x, int __layer, float __dPdx, float __dPdy), "tex.grad.a1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};", ("r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy))); __IMPL_ALIAS("__itex1DLayeredGrad", "__tex1DLayeredGrad_v2"); __IMPL_S("__tex1DLayeredLod_v2", "__tex1DLayeredLod_rmnf_v2", (float __x, int __layer, float __level), "tex.level.a1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;", ("r"(__layer), "f"(__x), "f"(__level))); __IMPL_ALIAS("__itex1DLayeredLod", "__tex1DLayeredLod_v2"); __IMPL_S("__tex1DLod_v2", "__tex1DLod_rmnf_v2", (float __x, float __level), "tex.level.1d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5}], %6;", ("f"(__x), "f"(__level))); __IMPL_ALIAS("__itex1DLod", "__tex1DLod_v2"); // 2D __IMPL_S("__tex2D_v2", "__tex2D_rmnf_v2", (float __x, float __y), "tex.2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}];", ("f"(__x), "f"(__y))); __IMPL_ALIAS("__itex2D", "__tex2D_v2"); __IMPL_S3S("__itex2D_sparse", (float __x, float __y, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" " selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y))); __IMPL_S("__tex2DGrad_v2", "__tex2DGrad_rmnf_v2", (float __x, float __y, const float2 *__dPdx, const float2 *__dPdy), "tex.grad.2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};", ("f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y))); __IMPL_ALIAS("__itex2DGrad_v2", "__tex2DGrad_v2"); __IMPL_S3S("__itex2DGrad_sparse", (float __x, float __y, const float2 *__dPdx, const float2 *__dPdy, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.grad.2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], {%8, %9}, {%10, %11};\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y))); __IMPL_S("__tex2DLayered_v2", "__tex2DLayered_rmnf_v2", (float __x, float __y, int __layer), "tex.a2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];", ("r"(__layer), "f"(__x), "f"(__y))); __IMPL_ALIAS("__itex2DLayered", "__tex2DLayered_v2"); __IMPL_S3S("__itex2DLayered_sparse", (float __x, float __y, int __layer, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.a2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("r"(__layer), "f"(__x), "f"(__y))); __IMPL_S("__tex2DLayeredGrad_v2", "__tex2DLayeredGrad_rmnf_v2", (float __x, float __y, int __layer, const float2 *__dPdx, const float2 *__dPdy), "tex.grad.a2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};", ("r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y))); __IMPL_ALIAS("__itex2DLayeredGrad_v2", "__tex2DLayeredGrad_v2"); __IMPL_S3S( "__itex2DLayeredGrad_sparse", (float __x, float __y, int __layer, const float2 *__dPdx, const float2 *__dPdy, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.grad.a2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], {%9, %10}, {%11, %12};\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y))); __IMPL_S("__tex2DLayeredLod_v2", "__tex2DLayeredLod_rmnf_v2", (float __x, float __y, int __layer, float __level), "tex.level.a2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;", ("r"(__layer), "f"(__x), "f"(__y), "f"(__level))); __IMPL_ALIAS("__itex2DLayeredLod", "__tex2DLayeredLod_v2"); __IMPL_S3S("__itex2DLayeredLod_sparse", (float __x, float __y, int __layer, float __level, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.level.a2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("r"(__layer), "f"(__x), "f"(__y), "f"(__level))); __IMPL_S("__tex2DLod_v2", "__tex2DLod_rmnf_v2", (float __x, float __y, float __level), "tex.level.2d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;", ("f"(__x), "f"(__y), "f"(__level))); __IMPL_ALIAS("__itex2DLod", "__tex2DLod_v2"); __IMPL_S3S("__itex2DLod_sparse", (float __x, float __y, float __level, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.level.2d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}], %8;\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__level))); // 2D gather is special. Unlike other variants that translate into exactly one // asm instruction, it uses one of the four different instructions selected by // __comp. We implement each instruction variant separately, and dispatch the // right one from the manually implemented 'umbrella' fetch. #define __IMPL_2DGATHER(variant, instr) \ __IMPL_SI(__IDV("__tex2Dgather_v2", variant), \ __IDV("__tex2Dgather_rmnf_v2", variant), \ (float __x, float __y, int __comp), instr, "f32", \ "{%0, %1, %2, %3}, [%4, {%5, %6}];", ("f"(__x), "f"(__y))); \ __IMPL_ALIASI(__IDV("__itex2Dgather", variant), \ __IDV("__tex2Dgather_v2", variant)); \ __IMPL_S3SI(__IDV("__itex2Dgather_sparse", variant), \ (float __x, float __y, unsigned char *__ir, int __comp), \ "{.reg .pred %%p0;\n\t" instr, "f32", \ "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7}];\n\t" \ "selp.u16 %4, 1, 0, %%p0; }", \ ("f"(__x), "f"(__y))); __IMPL_2DGATHER(0, "tld4.r.2d.v4"); __IMPL_2DGATHER(1, "tld4.g.2d.v4"); __IMPL_2DGATHER(2, "tld4.b.2d.v4"); __IMPL_2DGATHER(3, "tld4.a.2d.v4"); // Umbrella dispatcher -- calls into specific 2Dgather variant. template <> struct __tex_fetch_v4<__ID("__tex2Dgather_v2")> { template <class __T> __device__ static __T __run(cudaTextureObject_t __obj, float __x, float __y, int __comp) { switch (__comp) { case 0: return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 0)>::__run<__T>( __obj, __x, __y, __comp); case 1: return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 1)>::__run<__T>( __obj, __x, __y, __comp); case 2: return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 2)>::__run<__T>( __obj, __x, __y, __comp); case 3: return __tex_fetch_v4<__IDV("__tex2Dgather_v2", 3)>::__run<__T>( __obj, __x, __y, __comp); } } }; __IMPL_ALIAS("__itex2Dgather", "__tex2Dgather_v2"); template <> struct __tex_fetch_v4<__ID("__tex2Dgather_rmnf_v2")> { template <class __T> __device__ static float4 __run(cudaTextureObject_t __obj, float __x, float __y, int __comp) { switch (__comp) { case 0: return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 0)>::__run<__T>( __obj, __x, __y, __comp); case 1: return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 1)>::__run<__T>( __obj, __x, __y, __comp); case 2: return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 2)>::__run<__T>( __obj, __x, __y, __comp); case 3: return __tex_fetch_v4<__IDV("__tex2Dgather_rmnf_v2", 3)>::__run<__T>( __obj, __x, __y, __comp); } } }; #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__ >= 600) template <> struct __tex_fetch_v4<__ID("__itex2Dgather_sparse")> { template <class __T> __device__ static __T __run(cudaTextureObject_t __obj, float __x, float __y, unsigned char *__ir, int __comp) { switch (__comp) { case 0: return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 0)>::__run<__T>( __obj, __x, __y, __ir, __comp); case 1: return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 1)>::__run<__T>( __obj, __x, __y, __ir, __comp); case 2: return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 2)>::__run<__T>( __obj, __x, __y, __ir, __comp); case 3: return __tex_fetch_v4<__IDV("__itex2Dgather_sparse", 3)>::__run<__T>( __obj, __x, __y, __ir, __comp); } } }; #endif // 3D __IMPL_S("__tex3D_v2", "__tex3D_rmnf_v2", (float __x, float __y, float __z), "tex.3d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];", ("f"(__x), "f"(__y), "f"(__z))); __IMPL_ALIAS("__itex3D", "__tex3D_v2"); __IMPL_S3S("__itex3D_sparse", (float __x, float __y, float __z, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.3d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__z))); __IMPL_S("__tex3DGrad_v2", "__tex3DGrad_rmnf_v2", (float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy), "tex.grad.3d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " "{%8, %9, %10, %10}, {%11, %12, %13, %13};", ("f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z))); __IMPL_ALIAS("__itex3DGrad_v2", "__tex3DGrad_v2"); __IMPL_S3S("__itex3DGrad_sparse", (float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.grad.3d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], " "{%9, %10, %11, %11}, {%12, %13, %14, %14};\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z))); __IMPL_S("__tex3DLod_v2", "__tex3DLod_rmnf_v2", (float __x, float __y, float __z, float __level), "tex.level.3d.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;", ("f"(__x), "f"(__y), "f"(__z), "f"(__level))); __IMPL_ALIAS("__itex3DLod", "__tex3DLod_v2"); __IMPL_S3S("__itex3DLod_sparse", (float __x, float __y, float __z, float __level, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.level.3d.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}], %9;\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__z), "f"(__level))); // Cubemap __IMPL_S("__texCubemap_v2", "__texCubemap_rmnf_v2", (float __x, float __y, float __z), "tex.cube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];", ("f"(__x), "f"(__y), "f"(__z))); __IMPL_ALIAS("__itexCubemap", "__texCubemap_v2"); __IMPL_S3S("__itexCubemap_sparse", (float __x, float __y, float __z, unsigned char *__ir), "{.reg .pred %%p0;\n\t" "tex.cube.v4", "f32", "{%0, %1, %2, %3}|%%p0, [%5, {%6, %7, %8, %8}];\n\t" "selp.u16 %4, 1, 0, %%p0; }", ("f"(__x), "f"(__y), "f"(__z))); __IMPL_S("__texCubemapGrad_v2", "__texCubemapGrad_rmnf_v2", (float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy), "tex.grad.cube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " "{%8, %9, %10, %10}, {%11, %12, %13, %13};", ("f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z))); __IMPL_ALIAS("__itexCubemapGrad_v2", "__texCubemapGrad_v2"); __IMPL_S("__texCubemapLayered_v2", "__texCubemapLayered_rmnf_v2", (float __x, float __y, float __z, int __layer), "tex.acube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];", ("r"(__layer), "f"(__x), "f"(__y), "f"(__z))); __IMPL_ALIAS("__itexCubemapLayered", "__texCubemapLayered_v2"); __IMPL_S("__texCubemapLayeredGrad_v2", "__texCubemapLayeredGrad_rmnf_v2", (float __x, float __y, float __z, int __layer, const float4 *__dPdx, const float4 *__dPdy), "tex.grad.acube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], " "{%9, %10, %11, %11}, {%12, %13, %14, %14};", ("r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z))); __IMPL_ALIAS("__itexCubemapLayeredGrad_v2", "__texCubemapLayeredGrad_v2"); __IMPL_S("__texCubemapLayeredLod_v2", "__texCubemapLayeredLod_rmnf_v2", (float __x, float __y, float __z, int __layer, float __level), "tex.level.acube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;", ("r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level))); __IMPL_ALIAS("__itexCubemapLayeredLod", "__texCubemapLayeredLod_v2"); __IMPL_S("__texCubemapLod_v2", "__texCubemapLod_rmnf_v2", (float __x, float __y, float __z, float __level), "tex.level.cube.v4", "f32", "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;", ("f"(__x), "f"(__y), "f"(__z), "f"(__level))); __IMPL_ALIAS("__itexCubemapLod", "__texCubemapLod_v2"); // Helper class for extracting slice of data from V4 fetch results. template <class __DestT, class __SrcT> struct __convert { template <int __NElements = sizeof(__DestT) / sizeof(typename __TypeInfoT<__DestT>::__base_t)> __device__ static __DestT __run(__SrcT __v); template <> __device__ static __DestT __run<1>(__SrcT __v) { return {__v.x}; } template <> __device__ static __DestT __run<2>(__SrcT __v) { return {__v.x, __v.y}; } template <> __device__ static __DestT __run<3>(__SrcT __v) { return {__v.x, __v.y, __v.z}; } template <> __device__ static __DestT __run<4>(__SrcT __v) { return {__v.x, __v.y, __v.z, __v.w}; } }; // These are the top-level function overloads the __nv_tex_surf_handler expands // to. Each overload deals with one of the several ways __nv_tex_surf_handler // is called by CUDA headers. In the end, each of the overloads does the same // job -- it figures out which `__tex_fetch_v4::run` variant should be used to // fetch texture data and which `__convert::run` is needed to convert it into // appropriate return type. // __nv_tex_surf_handler("__tex...", &ret, cudaTextureObject_t handle, args...); // Data type and return type are based on ret. template <class __op, class __T, class... __Args> __device__ static void __tex_fetch(__T *__ptr, cudaTextureObject_t __handle, __Args... __args) { using __FetchT = typename __TypeInfoT<__T>::__fetch_t; *__ptr = __convert<__T, __FetchT>::__run( __tex_fetch_v4<__op>::template __run<__FetchT>(__handle, __args...)); } #if CUDA_VERSION < 12000 // texture<> objects get magically converted into a texture reference. However, // there's no way to convert them to cudaTextureObject_t on C++ level. So, we // cheat a bit and use inline assembly to do it. It costs us an extra register // and a move, but that is easy for ptxas to optimize away. template <class __T> __device__ cudaTextureObject_t __tex_handle_to_obj(__T __handle) { cudaTextureObject_t __obj; asm("mov.b64 %0, %1; " : "=l"(__obj) : "l"(__handle)); return __obj; } // __nv_tex_surf_handler ("__tex...", &ret, textureReference, args...); // Data type and return type is based on ret. template <class __op, class __T, class __HandleT, class... __Args> __device__ static void __tex_fetch(__T *__ptr, __HandleT __handle, __Args... __args) { using __FetchT = typename __TypeInfoT<__T>::__fetch_t; *__ptr = __convert<__T, __FetchT>::__run( __tex_fetch_v4<__op>::template __run<__FetchT>( __tex_handle_to_obj(__handle), __args...)); } // __nv_tex_surf_handler ("__tex...", &type_dummy, &ret, texture<...>, args...); // cudaReadModeNormalizedFloat fetches always return float4. template <class __op, class __DataT, class __RetT, int __TexT, class... __Args> __device__ static void __tex_fetch(__DataT *, __RetT *__ptr, texture<__DataT, __TexT, cudaReadModeNormalizedFloat> __handle, __Args... __args) { using __FetchT = typename __TypeInfoT<__DataT>::__fetch_t; *__ptr = __convert<__RetT, float4>::__run( __tex_fetch_v4<__op>::template __run<__FetchT>( __tex_handle_to_obj(__handle), __args...)); } // __nv_tex_surf_handler ("__tex...", &type_dummy, &ret, texture<...>, args...); // For cudaReadModeElementType fetch return type is based on type_dummy. template <class __op, class __DataT, class __RetT, int __TexT, class... __Args> __device__ static void __tex_fetch(__DataT *, __RetT *__ptr, texture<__DataT, __TexT, cudaReadModeElementType> __handle, __Args... __args) { using __FetchT = typename __TypeInfoT<__DataT>::__fetch_t; *__ptr = __convert<__RetT, __FetchT>::__run( __tex_fetch_v4<__op>::template __run<__FetchT>( __tex_handle_to_obj(__handle), __args...)); } #endif // CUDA_VERSION } // namespace __cuda_tex } // namespace #pragma pop_macro("__ASM_OUT") #pragma pop_macro("__ASM_OUTP") #pragma pop_macro("__Args") #pragma pop_macro("__ID") #pragma pop_macro("__IDV") #pragma pop_macro("__IMPL_2DGATHER") #pragma pop_macro("__IMPL_ALIAS") #pragma pop_macro("__IMPL_ALIASI") #pragma pop_macro("__IMPL_F1") #pragma pop_macro("__IMPL_F3") #pragma pop_macro("__IMPL_F3N") #pragma pop_macro("__IMPL_F3S") #pragma pop_macro("__IMPL_S") #pragma pop_macro("__IMPL_S3") #pragma pop_macro("__IMPL_S3I") #pragma pop_macro("__IMPL_S3N") #pragma pop_macro("__IMPL_S3NI") #pragma pop_macro("__IMPL_S3S") #pragma pop_macro("__IMPL_S3SI") #pragma pop_macro("__IMPL_SI") #pragma pop_macro("__L") #pragma pop_macro("__STRIP_PARENS") #endif // __CLANG_CUDA_TEXTURE_INTRINSICS_H__