58 v128_t x1, x0 = wasm_v128_load(address);
59 x1 = wasm_i32x4_shuffle(x0, x0, 2, 3, 2, 3);
60 x0 = wasm_v128_or(x0, x1);
61 x1 = wasm_i32x4_shuffle(x0, x0, 1, 1, 1, 1);
62 x0 = wasm_v128_or(x0, x1);
63 ui32 t = (
ui32)wasm_i32x4_extract_lane(x0, 0);
70 v128_t x1, x0 = wasm_v128_load(address);
71 x1 = wasm_i64x2_shuffle(x0, x0, 1, 1);
72 x0 = wasm_v128_or(x0, x1);
73 ui64 t = (
ui64)wasm_i64x2_extract_lane(x0, 0);
79 float delta_inv,
ui32 count,
ui32* max_val)
84 ui32 shift = 31 - K_max;
85 v128_t m0 = wasm_i32x4_splat(INT_MIN);
86 v128_t zero = wasm_i32x4_splat(0);
87 v128_t one = wasm_i32x4_splat(1);
88 v128_t tmax = wasm_v128_load(max_val);
90 for ( ; count >= 4; count -= 4, p += 4, dp += 4)
92 v128_t v = wasm_v128_load(p);
93 v128_t sign = wasm_i32x4_lt(v, zero);
94 v128_t val = wasm_v128_xor(v, sign);
95 v128_t ones = wasm_v128_and(sign, one);
96 val = wasm_i32x4_add(val, ones);
97 sign = wasm_v128_and(sign, m0);
98 val = wasm_i32x4_shl(val, shift);
99 tmax = wasm_v128_or(tmax, val);
100 val = wasm_v128_or(val, sign);
101 wasm_v128_store(dp, val);
105 v128_t v = wasm_v128_load(p);
106 v128_t sign = wasm_i32x4_lt(v, zero);
107 v128_t val = wasm_v128_xor(v, sign);
108 v128_t ones = wasm_v128_and(sign, one);
109 val = wasm_i32x4_add(val, ones);
110 sign = wasm_v128_and(sign, m0);
111 val = wasm_i32x4_shl(val, shift);
114 v128_t idx = wasm_i32x4_make(0, 1, 2, 3);
115 v128_t mask = wasm_i32x4_gt(c, idx);
116 c = wasm_v128_and(val, mask);
117 tmax = wasm_v128_or(tmax, c);
119 val = wasm_v128_or(val, sign);
120 wasm_v128_store(dp, val);
122 wasm_v128_store(max_val, tmax);
127 float delta_inv,
ui32 count,
ui32* max_val)
133 v128_t d = wasm_f32x4_splat(delta_inv);
134 v128_t zero = wasm_i32x4_splat(0);
135 v128_t one = wasm_i32x4_splat(1);
136 v128_t tmax = wasm_v128_load(max_val);
137 float *p = (
float*)sp;
138 for ( ; count >= 4; count -= 4, p += 4, dp += 4)
140 v128_t vf = wasm_v128_load(p);
141 vf = wasm_f32x4_mul(vf, d);
142 v128_t val = wasm_i32x4_trunc_sat_f32x4(vf);
143 v128_t sign = wasm_i32x4_lt(val, zero);
144 val = wasm_v128_xor(val, sign);
145 v128_t ones = wasm_v128_and(sign, one);
146 val = wasm_i32x4_add(val, ones);
147 tmax = wasm_v128_or(tmax, val);
148 sign = wasm_i32x4_shl(sign, 31);
149 val = wasm_v128_or(val, sign);
150 wasm_v128_store(dp, val);
154 v128_t vf = wasm_v128_load(p);
155 vf = wasm_f32x4_mul(vf, d);
156 v128_t val = wasm_i32x4_trunc_sat_f32x4(vf);
157 v128_t sign = wasm_i32x4_lt(val, zero);
158 val = wasm_v128_xor(val, sign);
159 v128_t ones = wasm_v128_and(sign, one);
160 val = wasm_i32x4_add(val, ones);
163 v128_t idx = wasm_i32x4_make(0, 1, 2, 3);
164 v128_t mask = wasm_i32x4_gt(c, idx);
165 c = wasm_v128_and(val, mask);
166 tmax = wasm_v128_or(tmax, c);
168 sign = wasm_i32x4_shl(sign, 31);
169 val = wasm_v128_or(val, sign);
170 wasm_v128_store(dp, val);
172 wasm_v128_store(max_val, tmax);
177 float delta,
ui32 count)
180 ui32 shift = 31 - K_max;
181 v128_t m1 = wasm_i32x4_splat(INT_MAX);
182 v128_t zero = wasm_i32x4_splat(0);
183 v128_t one = wasm_i32x4_splat(1);
185 for (
ui32 i = 0; i < count; i += 4, sp += 4, p += 4)
188 v128_t val = wasm_v128_and(v, m1);
189 val = wasm_i32x4_shr(val, shift);
190 v128_t sign = wasm_i32x4_lt(v, zero);
191 val = wasm_v128_xor(val, sign);
192 v128_t ones = wasm_v128_and(sign, one);
193 val = wasm_i32x4_add(val, ones);
194 wasm_v128_store(p, val);
200 float delta,
ui32 count)
203 v128_t m1 = wasm_i32x4_splat(INT_MAX);
204 v128_t d = wasm_f32x4_splat(delta);
205 float *p = (
float*)dp;
206 for (
ui32 i = 0; i < count; i += 4, sp += 4, p += 4)
209 v128_t vali = wasm_v128_and(v, m1);
210 v128_t valf = wasm_f32x4_convert_i32x4(vali);
211 valf = wasm_f32x4_mul(valf, d);
212 v128_t sign = wasm_v128_andnot(v, m1);
213 valf = wasm_v128_or(valf, sign);
214 wasm_v128_store(p, valf);
220 float delta_inv,
ui32 count,
ui64* max_val)
225 ui32 shift = 63 - K_max;
226 v128_t m0 = wasm_i64x2_splat(LLONG_MIN);
227 v128_t zero = wasm_i64x2_splat(0);
228 v128_t one = wasm_i64x2_splat(1);
229 v128_t tmax = wasm_v128_load(max_val);
231 for ( ; count >= 2; count -= 2, p += 2, dp += 2)
233 v128_t v = wasm_v128_load(p);
234 v128_t sign = wasm_i64x2_lt(v, zero);
235 v128_t val = wasm_v128_xor(v, sign);
236 v128_t ones = wasm_v128_and(sign, one);
237 val = wasm_i64x2_add(val, ones);
238 sign = wasm_v128_and(sign, m0);
239 val = wasm_i64x2_shl(val, shift);
240 tmax = wasm_v128_or(tmax, val);
241 val = wasm_v128_or(val, sign);
242 wasm_v128_store(dp, val);
246 v128_t v = wasm_v128_load(p);
247 v128_t sign = wasm_i64x2_lt(v, zero);
248 v128_t val = wasm_v128_xor(v, sign);
249 v128_t ones = wasm_v128_and(sign, one);
250 val = wasm_i64x2_add(val, ones);
251 sign = wasm_v128_and(sign, m0);
252 val = wasm_i64x2_shl(val, shift);
254 v128_t c = wasm_i32x4_make((
si32)0xFFFFFFFF, (
si32)0xFFFFFFFF, 0, 0);
255 c = wasm_v128_and(val, c);
256 tmax = wasm_v128_or(tmax, c);
258 val = wasm_v128_or(val, sign);
259 wasm_v128_store(dp, val);
262 wasm_v128_store(max_val, tmax);
267 float delta,
ui32 count)
270 ui32 shift = 63 - K_max;
271 v128_t m1 = wasm_i64x2_splat(LLONG_MAX);
272 v128_t zero = wasm_i64x2_splat(0);
273 v128_t one = wasm_i64x2_splat(1);
275 for (
ui32 i = 0; i < count; i += 2, sp += 2, p += 2)
278 v128_t val = wasm_v128_and(v, m1);
279 val = wasm_i64x2_shr(val, shift);
280 v128_t sign = wasm_i64x2_lt(v, zero);
281 val = wasm_v128_xor(val, sign);
282 v128_t ones = wasm_v128_and(sign, one);
283 val = wasm_i64x2_add(val, ones);
284 wasm_v128_store(p, val);