Skip to content

Commit d507d74

Browse files
committed
Add hexagon support
Signed-off-by: Brian Cain <[email protected]>
1 parent 2731a48 commit d507d74

24 files changed

+3257
-0
lines changed

src/hexagon.rs

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#![cfg(not(feature = "no-asm"))]
2+
3+
mod dfaddsub {
4+
core::arch::global_asm!(include_str!("hexagon/dfaddsub.s"), options(raw));
5+
}
6+
mod dfdiv {
7+
core::arch::global_asm!(include_str!("hexagon/dfdiv.s"), options(raw));
8+
}
9+
mod dffma {
10+
core::arch::global_asm!(include_str!("hexagon/dffma.s"), options(raw));
11+
}
12+
mod dfminmax {
13+
core::arch::global_asm!(include_str!("hexagon/dfminmax.s"), options(raw));
14+
}
15+
mod dfmul {
16+
core::arch::global_asm!(include_str!("hexagon/dfmul.s"), options(raw));
17+
}
18+
mod dfsqrt {
19+
core::arch::global_asm!(include_str!("hexagon/dfsqrt.s"), options(raw));
20+
}
21+
mod divdi3 {
22+
core::arch::global_asm!(include_str!("hexagon/divdi3.s"), options(raw));
23+
}
24+
mod divsi3 {
25+
core::arch::global_asm!(include_str!("hexagon/divsi3.s"), options(raw));
26+
}
27+
mod fastmath2_dlib_asm {
28+
core::arch::global_asm!(include_str!("hexagon/fastmath2_dlib_asm.s"), options(raw));
29+
}
30+
mod fastmath2_ldlib_asm {
31+
core::arch::global_asm!(include_str!("hexagon/fastmath2_ldlib_asm.s"), options(raw));
32+
}
33+
mod memcpy_forward_vp4cp4n2 {
34+
core::arch::global_asm!(
35+
include_str!("hexagon/memcpy_forward_vp4cp4n2.s"),
36+
options(raw)
37+
);
38+
}
39+
mod memcpy_likely_aligned {
40+
core::arch::global_asm!(
41+
include_str!("hexagon/memcpy_likely_aligned.s"),
42+
options(raw)
43+
);
44+
}
45+
mod moddi3 {
46+
core::arch::global_asm!(include_str!("hexagon/moddi3.s"), options(raw));
47+
}
48+
mod modsi3 {
49+
core::arch::global_asm!(include_str!("hexagon/modsi3.s"), options(raw));
50+
}
51+
mod sfdiv_opt {
52+
core::arch::global_asm!(include_str!("hexagon/sfdiv_opt.s"), options(raw));
53+
}
54+
mod sfsqrt_opt {
55+
core::arch::global_asm!(include_str!("hexagon/sfsqrt_opt.s"), options(raw));
56+
}
57+
mod udivdi3 {
58+
core::arch::global_asm!(include_str!("hexagon/udivdi3.s"), options(raw));
59+
}
60+
mod udivmoddi4 {
61+
core::arch::global_asm!(include_str!("hexagon/udivmoddi4.s"), options(raw));
62+
}
63+
mod udivmodsi4 {
64+
core::arch::global_asm!(include_str!("hexagon/udivmodsi4.s"), options(raw));
65+
}
66+
mod udivsi3 {
67+
core::arch::global_asm!(include_str!("hexagon/udivsi3.s"), options(raw));
68+
}
69+
mod umoddi3 {
70+
core::arch::global_asm!(include_str!("hexagon/umoddi3.s"), options(raw));
71+
}
72+
mod umodsi3 {
73+
core::arch::global_asm!(include_str!("hexagon/umodsi3.s"), options(raw));
74+
}

src/hexagon/dfaddsub.s

+321
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,321 @@
1+
.text
2+
.global __hexagon_adddf3
3+
.global __hexagon_subdf3
4+
.type __hexagon_adddf3, @function
5+
.type __hexagon_subdf3, @function
6+
7+
.global __qdsp_adddf3 ; .set __qdsp_adddf3, __hexagon_adddf3
8+
.global __hexagon_fast_adddf3 ; .set __hexagon_fast_adddf3, __hexagon_adddf3
9+
.global __hexagon_fast2_adddf3 ; .set __hexagon_fast2_adddf3, __hexagon_adddf3
10+
.global __qdsp_subdf3 ; .set __qdsp_subdf3, __hexagon_subdf3
11+
.global __hexagon_fast_subdf3 ; .set __hexagon_fast_subdf3, __hexagon_subdf3
12+
.global __hexagon_fast2_subdf3 ; .set __hexagon_fast2_subdf3, __hexagon_subdf3
13+
14+
.p2align 5
15+
__hexagon_adddf3:
16+
{
17+
r4 = extractu(r1,#11,#20)
18+
r5 = extractu(r3,#11,#20)
19+
r13:12 = combine(##0x20000000,#0)
20+
}
21+
{
22+
p3 = dfclass(r1:0,#2)
23+
p3 = dfclass(r3:2,#2)
24+
r9:8 = r13:12
25+
p2 = cmp.gtu(r5,r4)
26+
}
27+
{
28+
if (!p3) jump .Ladd_abnormal
29+
if (p2) r1:0 = r3:2
30+
if (p2) r3:2 = r1:0
31+
if (p2) r5:4 = combine(r4,r5)
32+
}
33+
{
34+
r13:12 = insert(r1:0,#52,#11 -2)
35+
r9:8 = insert(r3:2,#52,#11 -2)
36+
r15 = sub(r4,r5)
37+
r7:6 = combine(#62,#1)
38+
}
39+
40+
41+
42+
43+
44+
.Ladd_continue:
45+
{
46+
r15 = min(r15,r7)
47+
48+
r11:10 = neg(r13:12)
49+
p2 = cmp.gt(r1,#-1)
50+
r14 = #0
51+
}
52+
{
53+
if (!p2) r13:12 = r11:10
54+
r11:10 = extractu(r9:8,r15:14)
55+
r9:8 = ASR(r9:8,r15)
56+
57+
58+
59+
60+
r15:14 = #0
61+
}
62+
{
63+
p1 = cmp.eq(r11:10,r15:14)
64+
if (!p1.new) r8 = or(r8,r6)
65+
r5 = add(r4,#-1024 -60)
66+
p3 = cmp.gt(r3,#-1)
67+
}
68+
{
69+
r13:12 = add(r13:12,r9:8)
70+
r11:10 = sub(r13:12,r9:8)
71+
r7:6 = combine(#54,##2045)
72+
}
73+
{
74+
p0 = cmp.gtu(r4,r7)
75+
p0 = !cmp.gtu(r4,r6)
76+
if (!p0.new) jump:nt .Ladd_ovf_unf
77+
if (!p3) r13:12 = r11:10
78+
}
79+
{
80+
r1:0 = convert_d2df(r13:12)
81+
p0 = cmp.eq(r13,#0)
82+
p0 = cmp.eq(r12,#0)
83+
if (p0.new) jump:nt .Ladd_zero
84+
}
85+
{
86+
r1 += asl(r5,#20)
87+
jumpr r31
88+
}
89+
.falign
90+
__hexagon_subdf3:
91+
{
92+
r3 = togglebit(r3,#31)
93+
jump __qdsp_adddf3
94+
}
95+
96+
97+
.falign
98+
.Ladd_zero:
99+
100+
101+
{
102+
r28 = USR
103+
r1:0 = #0
104+
r3 = #1
105+
}
106+
{
107+
r28 = extractu(r28,#2,#22)
108+
r3 = asl(r3,#31)
109+
}
110+
{
111+
p0 = cmp.eq(r28,#2)
112+
if (p0.new) r1 = xor(r1,r3)
113+
jumpr r31
114+
}
115+
.falign
116+
.Ladd_ovf_unf:
117+
{
118+
r1:0 = convert_d2df(r13:12)
119+
p0 = cmp.eq(r13,#0)
120+
p0 = cmp.eq(r12,#0)
121+
if (p0.new) jump:nt .Ladd_zero
122+
}
123+
{
124+
r28 = extractu(r1,#11,#20)
125+
r1 += asl(r5,#20)
126+
}
127+
{
128+
r5 = add(r5,r28)
129+
r3:2 = combine(##0x00100000,#0)
130+
}
131+
{
132+
p0 = cmp.gt(r5,##1024 +1024 -2)
133+
if (p0.new) jump:nt .Ladd_ovf
134+
}
135+
{
136+
p0 = cmp.gt(r5,#0)
137+
if (p0.new) jumpr:t r31
138+
r28 = sub(#1,r5)
139+
}
140+
{
141+
r3:2 = insert(r1:0,#52,#0)
142+
r1:0 = r13:12
143+
}
144+
{
145+
r3:2 = lsr(r3:2,r28)
146+
}
147+
{
148+
r1:0 = insert(r3:2,#63,#0)
149+
jumpr r31
150+
}
151+
.falign
152+
.Ladd_ovf:
153+
154+
{
155+
r1:0 = r13:12
156+
r28 = USR
157+
r13:12 = combine(##0x7fefffff,#-1)
158+
}
159+
{
160+
r5 = extractu(r28,#2,#22)
161+
r28 = or(r28,#0x28)
162+
r9:8 = combine(##0x7ff00000,#0)
163+
}
164+
{
165+
USR = r28
166+
r5 ^= lsr(r1,#31)
167+
r28 = r5
168+
}
169+
{
170+
p0 = !cmp.eq(r28,#1)
171+
p0 = !cmp.eq(r5,#2)
172+
if (p0.new) r13:12 = r9:8
173+
}
174+
{
175+
r1:0 = insert(r13:12,#63,#0)
176+
}
177+
{
178+
p0 = dfcmp.eq(r1:0,r1:0)
179+
jumpr r31
180+
}
181+
182+
.Ladd_abnormal:
183+
{
184+
r13:12 = extractu(r1:0,#63,#0)
185+
r9:8 = extractu(r3:2,#63,#0)
186+
}
187+
{
188+
p3 = cmp.gtu(r13:12,r9:8)
189+
if (!p3.new) r1:0 = r3:2
190+
if (!p3.new) r3:2 = r1:0
191+
}
192+
{
193+
194+
p0 = dfclass(r1:0,#0x0f)
195+
if (!p0.new) jump:nt .Linvalid_nan_add
196+
if (!p3) r13:12 = r9:8
197+
if (!p3) r9:8 = r13:12
198+
}
199+
{
200+
201+
202+
p1 = dfclass(r1:0,#0x08)
203+
if (p1.new) jump:nt .Linf_add
204+
}
205+
{
206+
p2 = dfclass(r3:2,#0x01)
207+
if (p2.new) jump:nt .LB_zero
208+
r13:12 = #0
209+
}
210+
211+
{
212+
p0 = dfclass(r1:0,#4)
213+
if (p0.new) jump:nt .Ladd_two_subnormal
214+
r13:12 = combine(##0x20000000,#0)
215+
}
216+
{
217+
r4 = extractu(r1,#11,#20)
218+
r5 = #1
219+
220+
r9:8 = asl(r9:8,#11 -2)
221+
}
222+
223+
224+
225+
{
226+
r13:12 = insert(r1:0,#52,#11 -2)
227+
r15 = sub(r4,r5)
228+
r7:6 = combine(#62,#1)
229+
jump .Ladd_continue
230+
}
231+
232+
.Ladd_two_subnormal:
233+
{
234+
r13:12 = extractu(r1:0,#63,#0)
235+
r9:8 = extractu(r3:2,#63,#0)
236+
}
237+
{
238+
r13:12 = neg(r13:12)
239+
r9:8 = neg(r9:8)
240+
p0 = cmp.gt(r1,#-1)
241+
p1 = cmp.gt(r3,#-1)
242+
}
243+
{
244+
if (p0) r13:12 = r1:0
245+
if (p1) r9:8 = r3:2
246+
}
247+
{
248+
r13:12 = add(r13:12,r9:8)
249+
}
250+
{
251+
r9:8 = neg(r13:12)
252+
p0 = cmp.gt(r13,#-1)
253+
r3:2 = #0
254+
}
255+
{
256+
if (!p0) r1:0 = r9:8
257+
if (p0) r1:0 = r13:12
258+
r3 = ##0x80000000
259+
}
260+
{
261+
if (!p0) r1 = or(r1,r3)
262+
p0 = dfcmp.eq(r1:0,r3:2)
263+
if (p0.new) jump:nt .Lzero_plus_zero
264+
}
265+
{
266+
jumpr r31
267+
}
268+
269+
.Linvalid_nan_add:
270+
{
271+
r28 = convert_df2sf(r1:0)
272+
p0 = dfclass(r3:2,#0x0f)
273+
if (p0.new) r3:2 = r1:0
274+
}
275+
{
276+
r2 = convert_df2sf(r3:2)
277+
r1:0 = #-1
278+
jumpr r31
279+
}
280+
.falign
281+
.LB_zero:
282+
{
283+
p0 = dfcmp.eq(r13:12,r1:0)
284+
if (!p0.new) jumpr:t r31
285+
}
286+
287+
288+
289+
290+
.Lzero_plus_zero:
291+
{
292+
p0 = cmp.eq(r1:0,r3:2)
293+
if (p0.new) jumpr:t r31
294+
}
295+
{
296+
r28 = USR
297+
}
298+
{
299+
r28 = extractu(r28,#2,#22)
300+
r1:0 = #0
301+
}
302+
{
303+
p0 = cmp.eq(r28,#2)
304+
if (p0.new) r1 = ##0x80000000
305+
jumpr r31
306+
}
307+
.Linf_add:
308+
309+
{
310+
p0 = !cmp.eq(r1,r3)
311+
p0 = dfclass(r3:2,#8)
312+
if (!p0.new) jumpr:t r31
313+
}
314+
{
315+
r2 = ##0x7f800001
316+
}
317+
{
318+
r1:0 = convert_sf2df(r2)
319+
jumpr r31
320+
}
321+
.size __hexagon_adddf3,.-__hexagon_adddf3

0 commit comments

Comments
 (0)