1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
|
// MIT License, Copyright (c) 2020 Marvin Borner
#include <math.h>
f64 mceil(f64 x)
{
if (x == 0.0)
return x;
f64 out;
__asm__ volatile("frndint\n" : "=t"(out) : "0"(x));
if (out < x)
return out + 1.0;
return out;
}
f64 mfloor(f64 x)
{
if (x == 0.0)
return x;
f64 out;
__asm__ volatile("frndint\n" : "=t"(out) : "0"(x));
if (out > x)
return out - 1.0;
return out;
}
f64 mexp(f64 exp)
{
f64 out;
__asm__ volatile("fldl2e\n"
"fmulp\n"
"fld1\n"
"fld %%st(1)\n"
"fprem\n"
"f2xm1\n"
"faddp\n"
"fscale\n"
"fstp %%st(1)"
: "=t"(out)
: "0"(exp));
return out;
}
f64 mexp2(f64 exp)
{
f64 out;
__asm__ volatile("fld1\n"
"fld %%st(1)\n"
"fprem\n"
"f2xm1\n"
"faddp\n"
"fscale\n"
"fstp %%st(1)"
: "=t"(out)
: "0"(exp));
return out;
}
f64 mlog(f64 x)
{
f64 out;
__asm__ volatile("fldln2\n"
"fld %%st(1)\n"
"fyl2x\n"
"fstp %%st(1)"
: "=t"(out)
: "0"(x));
return out;
}
f64 mlog2(f64 x)
{
f64 out;
__asm__ volatile("fld1\n"
"fld %%st(1)\n"
"fyl2x\n"
"fstp %%st(1)"
: "=t"(out)
: "0"(x));
return out;
}
f64 mpow(f64 base, f64 exp)
{
if (exp == 0)
return 1;
if (exp == 1)
return base;
if (base == 0)
return 0;
if (exp == (f64)((s32)exp)) {
f64 out = base;
for (u32 i = 0; i < FABS(exp) - 1; i++)
out *= base;
if (exp < 0)
out = 1.0 / out;
return out;
}
return mexp2(exp * mlog2(base));
}
// TODO: More efficient sqrt?
f64 msqrt(f64 num)
{
return mpow(num, .5);
}
f64 mcubic(f64 x, f64 a, f64 b, f64 c, f64 d)
{
return a * mpow(x, 3) + b * mpow(x, 2) + c * x + d;
}
/**
* Interpolations
*/
f64 mlerp(f64 from, f64 to, f64 trans)
{
return from + (to - from) * trans;
}
f64 mblerp(f64 a, f64 b, f64 c, f64 d, f64 transx, f64 transy)
{
return mlerp(mlerp(a, b, transx), mlerp(c, d, transx), transy);
}
/**
* Trigonometric functions
*/
f64 msin(f64 angle)
{
f64 ret = 0.0;
__asm__ volatile("fsin" : "=t"(ret) : "0"(angle));
return ret;
}
f64 mcos(f64 angle)
{
return msin(angle + (f64)M_PI_2);
}
f64 mtan(f64 angle)
{
f64 ret = 0.0, one;
__asm__ volatile("fptan" : "=t"(one), "=u"(ret) : "0"(angle));
return ret;
}
|