1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
|
/* Half-float conversion routines.
Copyright (C) 2008-2024 Free Software Foundation, Inc.
Contributed by CodeSourcery.
This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.
This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
struct format
{
/* Number of bits. */
unsigned long long size;
/* Exponent bias. */
unsigned long long bias;
/* Exponent width in bits. */
unsigned long long exponent;
/* Significand precision in explicitly stored bits. */
unsigned long long significand;
};
static const struct format
binary32 =
{
32, /* size. */
127, /* bias. */
8, /* exponent. */
23 /* significand. */
};
static const struct format
binary64 =
{
64, /* size. */
1023, /* bias. */
11, /* exponent. */
52 /* significand. */
};
/* Function prototypes. */
unsigned short __gnu_f2h_ieee (unsigned int a);
unsigned int __gnu_h2f_ieee (unsigned short a);
unsigned short __gnu_f2h_alternative (unsigned int x);
unsigned int __gnu_h2f_alternative (unsigned short a);
unsigned short __gnu_d2h_ieee (unsigned long long a);
unsigned short __gnu_d2h_alternative (unsigned long long x);
static inline unsigned short
__gnu_float2h_internal (const struct format* fmt,
unsigned long long a, int ieee)
{
unsigned long long point = 1ULL << fmt->significand;
unsigned short sign = (a >> (fmt->size - 16)) & 0x8000;
int aexp;
unsigned long long mantissa;
unsigned long long mask;
unsigned long long increment;
/* Get the exponent and mantissa encodings. */
mantissa = a & (point - 1);
mask = (1 << fmt->exponent) - 1;
aexp = (a >> fmt->significand) & mask;
/* Infinity, NaN and alternative format special case. */
if (((unsigned int) aexp) == mask)
{
if (!ieee)
return sign;
if (mantissa == 0)
return sign | 0x7c00; /* Infinity. */
/* Remaining cases are NaNs. Convert SNaN to QNaN. */
return sign | 0x7e00 | (mantissa >> (fmt->significand - 10));
}
/* Zero. */
if (aexp == 0 && mantissa == 0)
return sign;
/* Construct the exponent and mantissa. */
aexp -= fmt->bias;
/* Decimal point is immediately after the significand. */
mantissa |= point;
if (aexp < -14)
{
mask = point | (point - 1);
/* Minimum exponent for half-precision is 2^-24. */
if (aexp >= -25)
mask >>= 25 + aexp;
}
else
mask = (point - 1) >> 10;
/* Round. */
if (mantissa & mask)
{
increment = (mask + 1) >> 1;
if ((mantissa & mask) == increment)
increment = mantissa & (increment << 1);
mantissa += increment;
if (mantissa >= (point << 1))
{
mantissa >>= 1;
aexp++;
}
}
if (ieee)
{
if (aexp > 15)
return sign | 0x7c00;
}
else
{
if (aexp > 16)
return sign | 0x7fff;
}
if (aexp < -24)
return sign;
if (aexp < -14)
{
mantissa >>= -14 - aexp;
aexp = -14;
}
/* Encode the final 16-bit floating-point value.
This is formed of the sign bit, the bias-adjusted exponent, and the
calculated mantissa, with the following caveats:
1. The mantissa calculated after rounding could have a leading 1.
To compensate for this, subtract one from the exponent bias (15)
before adding it to the calculated exponent.
2. When we were calculating rounding, we left the mantissa with the
number of bits of the source operand, it needs reduced to ten
bits (+1 for the afforementioned leading 1) by shifting right by
the number of bits in the source mantissa - 10.
3. To ensure the leading 1 in the mantissa is applied to the exponent
we need to add the mantissa rather than apply an arithmetic "or"
to it. */
return sign | (((aexp + 14) << 10) + (mantissa >> (fmt->significand - 10)));
}
static inline unsigned short
__gnu_f2h_internal (unsigned int a, int ieee)
{
return __gnu_float2h_internal (&binary32, (unsigned long long) a, ieee);
}
static inline unsigned short
__gnu_d2h_internal (unsigned long long a, int ieee)
{
return __gnu_float2h_internal (&binary64, a, ieee);
}
static inline unsigned int
__gnu_h2f_internal(unsigned short a, int ieee)
{
unsigned int sign = (unsigned int)(a & 0x8000) << 16;
int aexp = (a >> 10) & 0x1f;
unsigned int mantissa = a & 0x3ff;
if (aexp == 0x1f && ieee)
return sign | 0x7f800000 | (mantissa << 13);
if (aexp == 0)
{
int shift;
if (mantissa == 0)
return sign;
shift = __builtin_clz(mantissa) - 21;
mantissa <<= shift;
aexp = -shift;
}
return sign | (((aexp + 0x70) << 23) + (mantissa << 13));
}
unsigned short
__gnu_f2h_ieee(unsigned int a)
{
return __gnu_f2h_internal(a, 1);
}
unsigned int
__gnu_h2f_ieee(unsigned short a)
{
return __gnu_h2f_internal(a, 1);
}
unsigned short
__gnu_f2h_alternative(unsigned int x)
{
return __gnu_f2h_internal(x, 0);
}
unsigned int
__gnu_h2f_alternative(unsigned short a)
{
return __gnu_h2f_internal(a, 0);
}
unsigned short
__gnu_d2h_ieee (unsigned long long a)
{
return __gnu_d2h_internal (a, 1);
}
unsigned short
__gnu_d2h_alternative (unsigned long long x)
{
return __gnu_d2h_internal (x, 0);
}
|