OpenJPH
Open-source implementation of JPEG2000 Part-15
Loading...
Searching...
No Matches
ojph_transform.cpp
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2019, Aous Naman
6// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2019, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_transform.cpp
34// Author: Aous Naman
35// Date: 28 August 2019
36//***************************************************************************/
37
38#include <cstdio>
39#include <mutex>
40
41#include "ojph_arch.h"
42#include "ojph_mem.h"
43#include "ojph_transform.h"
45#include "ojph_params.h"
47
48namespace ojph {
49
50 // defined elsewhere
51 class line_buf;
52
53 namespace local {
54
56 // Reversible functions
58
61 (const lifting_step* s, const line_buf* sig, const line_buf* other,
62 const line_buf* aug, ui32 repeat, bool synthesis) = NULL;
63
66 (const param_atk* atk, const line_buf* ldst, const line_buf* hdst,
67 const line_buf* src, ui32 width, bool even) = NULL;
68
71 (const param_atk* atk, const line_buf* dst, const line_buf* lsrc,
72 const line_buf* hsrc, ui32 width, bool even) = NULL;
73
75 // Irreversible functions
77
80 (const lifting_step* s, const line_buf* sig, const line_buf* other,
81 const line_buf* aug, ui32 repeat, bool synthesis) = NULL;
82
85 (float K, const line_buf* aug, ui32 repeat) = NULL;
86
89 (const param_atk* atk, const line_buf* ldst, const line_buf* hdst,
90 const line_buf* src, ui32 width, bool even) = NULL;
91
94 (const param_atk* atk, const line_buf* dst, const line_buf* lsrc,
95 const line_buf* hsrc, ui32 width, bool even) = NULL;
96
99 {
100 static std::once_flag wavelet_transform_functions_init_flag;
101 std::call_once(wavelet_transform_functions_init_flag, [](){
102#if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN)
103
107
112
113 #ifndef OJPH_DISABLE_SIMD
114
115 #if (defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
116
117 #ifndef OJPH_DISABLE_SSE
119 {
124 }
125 #endif // !OJPH_DISABLE_SSE
126
127 #ifndef OJPH_DISABLE_SSE2
129 {
133 }
134 #endif // !OJPH_DISABLE_SSE2
135
136 #ifndef OJPH_DISABLE_AVX
138 {
143 }
144 #endif // !OJPH_DISABLE_AVX
145
146 #ifndef OJPH_DISABLE_AVX2
148 {
152 }
153 #endif // !OJPH_DISABLE_AVX2
154
155 #if (defined(OJPH_ARCH_X86_64) && !defined(OJPH_DISABLE_AVX512))
157 {
158 // rev_vert_step = avx512_rev_vert_step;
159 // rev_horz_ana = avx512_rev_horz_ana;
160 // rev_horz_syn = avx512_rev_horz_syn;
161
166 }
167 #endif // !OJPH_DISABLE_AVX512
168
169 #elif defined(OJPH_ARCH_ARM)
170
171 #elif defined(OJPH_ARCH_PPC64LE)
172
174 {
175 // 128-bit VSX kernels; see ojph_simd_vsx.h
179
184 }
185
186 #endif // !(defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
187
188 #endif // !OJPH_DISABLE_SIMD
189
190#else // OJPH_ENABLE_WASM_SIMD
194
199#endif // !OJPH_ENABLE_WASM_SIMD
200 });
201 }
202
204
205#if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN)
206
208 static
209 void gen_rev_vert_step32(const lifting_step* s, const line_buf* sig,
210 const line_buf* other, const line_buf* aug,
211 ui32 repeat, bool synthesis)
212 {
213 const si32 a = s->rev.Aatk;
214 const si32 b = s->rev.Batk;
215 const ui8 e = s->rev.Eatk;
216
217 si32* dst = aug->i32;
218 const si32* src1 = sig->i32, * src2 = other->i32;
219 // The general definition of the wavelet in Part 2 is slightly
220 // different to part 2, although they are mathematically equivalent
221 // here, we identify the simpler form from Part 1 and employ them
222 if (a == 1)
223 { // 5/3 update and any case with a == 1
224 if (synthesis)
225 for (ui32 i = repeat; i > 0; --i)
226 *dst++ -= (b + *src1++ + *src2++) >> e;
227 else
228 for (ui32 i = repeat; i > 0; --i)
229 *dst++ += (b + *src1++ + *src2++) >> e;
230 }
231 else if (a == -1 && b == 1 && e == 1)
232 { // 5/3 predict
233 if (synthesis)
234 for (ui32 i = repeat; i > 0; --i)
235 *dst++ += (*src1++ + *src2++) >> e;
236 else
237 for (ui32 i = repeat; i > 0; --i)
238 *dst++ -= (*src1++ + *src2++) >> e;
239 }
240 else if (a == -1)
241 { // any case with a == -1, which is not 5/3 predict
242 if (synthesis)
243 for (ui32 i = repeat; i > 0; --i)
244 *dst++ -= (b - (*src1++ + *src2++)) >> e;
245 else
246 for (ui32 i = repeat; i > 0; --i)
247 *dst++ += (b - (*src1++ + *src2++)) >> e;
248 }
249 else { // general case
250 if (synthesis)
251 for (ui32 i = repeat; i > 0; --i)
252 *dst++ -= (b + a * (*src1++ + *src2++)) >> e;
253 else
254 for (ui32 i = repeat; i > 0; --i)
255 *dst++ += (b + a * (*src1++ + *src2++)) >> e;
256 }
257 }
258
260 static
261 void gen_rev_vert_step64(const lifting_step* s, const line_buf* sig,
262 const line_buf* other, const line_buf* aug,
263 ui32 repeat, bool synthesis)
264 {
265 const si64 a = s->rev.Aatk;
266 const si64 b = s->rev.Batk;
267 const ui8 e = s->rev.Eatk;
268
269 si64* dst = aug->i64;
270 const si64* src1 = sig->i64, * src2 = other->i64;
271 // The general definition of the wavelet in Part 2 is slightly
272 // different to part 2, although they are mathematically equivalent
273 // here, we identify the simpler form from Part 1 and employ them
274 if (a == 1)
275 { // 5/3 update and any case with a == 1
276 if (synthesis)
277 for (ui32 i = repeat; i > 0; --i)
278 *dst++ -= (b + *src1++ + *src2++) >> e;
279 else
280 for (ui32 i = repeat; i > 0; --i)
281 *dst++ += (b + *src1++ + *src2++) >> e;
282 }
283 else if (a == -1 && b == 1 && e == 1)
284 { // 5/3 predict
285 if (synthesis)
286 for (ui32 i = repeat; i > 0; --i)
287 *dst++ += (*src1++ + *src2++) >> e;
288 else
289 for (ui32 i = repeat; i > 0; --i)
290 *dst++ -= (*src1++ + *src2++) >> e;
291 }
292 else if (a == -1)
293 { // any case with a == -1, which is not 5/3 predict
294 if (synthesis)
295 for (ui32 i = repeat; i > 0; --i)
296 *dst++ -= (b - (*src1++ + *src2++)) >> e;
297 else
298 for (ui32 i = repeat; i > 0; --i)
299 *dst++ += (b - (*src1++ + *src2++)) >> e;
300 }
301 else { // general case
302 if (synthesis)
303 for (ui32 i = repeat; i > 0; --i)
304 *dst++ -= (b + a * (*src1++ + *src2++)) >> e;
305 else
306 for (ui32 i = repeat; i > 0; --i)
307 *dst++ += (b + a * (*src1++ + *src2++)) >> e;
308 }
309 }
310
312 void gen_rev_vert_step(const lifting_step* s, const line_buf* sig,
313 const line_buf* other, const line_buf* aug,
314 ui32 repeat, bool synthesis)
315 {
316 if (((sig != NULL) && (sig->flags & line_buf::LFT_32BIT)) ||
317 ((aug != NULL) && (aug->flags & line_buf::LFT_32BIT)) ||
318 ((other != NULL) && (other->flags & line_buf::LFT_32BIT)))
319 {
320 assert((sig == NULL || sig->flags & line_buf::LFT_32BIT) &&
321 (other == NULL || other->flags & line_buf::LFT_32BIT) &&
322 (aug == NULL || aug->flags & line_buf::LFT_32BIT));
323 gen_rev_vert_step32(s, sig, other, aug, repeat, synthesis);
324 }
325 else
326 {
327 assert((sig == NULL || sig->flags & line_buf::LFT_64BIT) &&
328 (other == NULL || other->flags & line_buf::LFT_64BIT) &&
329 (aug == NULL || aug->flags & line_buf::LFT_64BIT));
330 gen_rev_vert_step64(s, sig, other, aug, repeat, synthesis);
331 }
332 }
333
335 static
336 void gen_rev_horz_ana32(const param_atk* atk, const line_buf* ldst,
337 const line_buf* hdst, const line_buf* src,
338 ui32 width, bool even)
339 {
340 if (width > 1)
341 {
342 // combine both lsrc and hsrc into dst
343 si32* dph = hdst->i32;
344 si32* dpl = ldst->i32;
345 si32* sp = src->i32;
346 ui32 w = width;
347 if (!even)
348 {
349 *dph++ = *sp++; --w;
350 }
351 for (; w > 1; w -= 2)
352 {
353 *dpl++ = *sp++; *dph++ = *sp++;
354 }
355 if (w)
356 {
357 *dpl++ = *sp++; --w;
358 }
359
360 si32* hp = hdst->i32, * lp = ldst->i32;
361 ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass
362 ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass
363 ui32 num_steps = atk->get_num_steps();
364 for (ui32 j = num_steps; j > 0; --j)
365 {
366 // first lifting step
367 const lifting_step* s = atk->get_step(j - 1);
368 const si32 a = s->rev.Aatk;
369 const si32 b = s->rev.Batk;
370 const ui8 e = s->rev.Eatk;
371
372 // extension
373 lp[-1] = lp[0];
374 lp[l_width] = lp[l_width - 1];
375 // lifting step
376 const si32* sp = lp + (even ? 1 : 0);
377 si32* dp = hp;
378 if (a == 1)
379 { // 5/3 update and any case with a == 1
380 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
381 *dp += (b + (sp[-1] + sp[0])) >> e;
382 }
383 else if (a == -1 && b == 1 && e == 1)
384 { // 5/3 predict
385 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
386 *dp -= (sp[-1] + sp[0]) >> e;
387 }
388 else if (a == -1)
389 { // any case with a == -1, which is not 5/3 predict
390 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
391 *dp += (b - (sp[-1] + sp[0])) >> e;
392 }
393 else {
394 // general case
395 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
396 *dp += (b + a * (sp[-1] + sp[0])) >> e;
397 }
398
399 // swap buffers
400 si32* t = lp; lp = hp; hp = t;
401 even = !even;
402 ui32 w = l_width; l_width = h_width; h_width = w;
403 }
404 }
405 else {
406 if (even)
407 ldst->i32[0] = src->i32[0];
408 else
409 hdst->i32[0] = src->i32[0] << 1;
410 }
411 }
412
414 static
415 void gen_rev_horz_ana64(const param_atk* atk, const line_buf* ldst,
416 const line_buf* hdst, const line_buf* src,
417 ui32 width, bool even)
418 {
419 if (width > 1)
420 {
421 // combine both lsrc and hsrc into dst
422 si64* dph = hdst->i64;
423 si64* dpl = ldst->i64;
424 si64* sp = src->i64;
425 ui32 w = width;
426 if (!even)
427 {
428 *dph++ = *sp++; --w;
429 }
430 for (; w > 1; w -= 2)
431 {
432 *dpl++ = *sp++; *dph++ = *sp++;
433 }
434 if (w)
435 {
436 *dpl++ = *sp++; --w;
437 }
438
439 si64* hp = hdst->i64, * lp = ldst->i64;
440 ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass
441 ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass
442 ui32 num_steps = atk->get_num_steps();
443 for (ui32 j = num_steps; j > 0; --j)
444 {
445 // first lifting step
446 const lifting_step* s = atk->get_step(j - 1);
447 const si64 a = s->rev.Aatk;
448 const si64 b = s->rev.Batk;
449 const ui8 e = s->rev.Eatk;
450
451 // extension
452 lp[-1] = lp[0];
453 lp[l_width] = lp[l_width - 1];
454 // lifting step
455 const si64* sp = lp + (even ? 1 : 0);
456 si64* dp = hp;
457 if (a == 1)
458 { // 5/3 update and any case with a == 1
459 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
460 *dp += (b + (sp[-1] + sp[0])) >> e;
461 }
462 else if (a == -1 && b == 1 && e == 1)
463 { // 5/3 predict
464 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
465 *dp -= (sp[-1] + sp[0]) >> e;
466 }
467 else if (a == -1)
468 { // any case with a == -1, which is not 5/3 predict
469 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
470 *dp += (b - (sp[-1] + sp[0])) >> e;
471 }
472 else {
473 // general case
474 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
475 *dp += (b + a * (sp[-1] + sp[0])) >> e;
476 }
477
478 // swap buffers
479 si64* t = lp; lp = hp; hp = t;
480 even = !even;
481 ui32 w = l_width; l_width = h_width; h_width = w;
482 }
483 }
484 else {
485 if (even)
486 ldst->i64[0] = src->i64[0];
487 else
488 hdst->i64[0] = src->i64[0] << 1;
489 }
490 }
491
493 void gen_rev_horz_ana(const param_atk* atk, const line_buf* ldst,
494 const line_buf* hdst, const line_buf* src,
495 ui32 width, bool even)
496 {
497 if (src->flags & line_buf::LFT_32BIT)
498 {
499 assert((ldst == NULL || ldst->flags & line_buf::LFT_32BIT) &&
500 (hdst == NULL || hdst->flags & line_buf::LFT_32BIT));
501 gen_rev_horz_ana32(atk, ldst, hdst, src, width, even);
502 }
503 else
504 {
505 assert((ldst == NULL || ldst->flags & line_buf::LFT_64BIT) &&
506 (hdst == NULL || hdst->flags & line_buf::LFT_64BIT) &&
507 (src == NULL || src->flags & line_buf::LFT_64BIT));
508 gen_rev_horz_ana64(atk, ldst, hdst, src, width, even);
509 }
510 }
511
513 static
514 void gen_rev_horz_syn32(const param_atk* atk, const line_buf* dst,
515 const line_buf* lsrc, const line_buf* hsrc,
516 ui32 width, bool even)
517 {
518 if (width > 1)
519 {
520 bool ev = even;
521 si32* oth = hsrc->i32, * aug = lsrc->i32;
522 ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass
523 ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass
524 ui32 num_steps = atk->get_num_steps();
525 for (ui32 j = 0; j < num_steps; ++j)
526 {
527 const lifting_step* s = atk->get_step(j);
528 const si32 a = s->rev.Aatk;
529 const si32 b = s->rev.Batk;
530 const ui8 e = s->rev.Eatk;
531
532 // extension
533 oth[-1] = oth[0];
534 oth[oth_width] = oth[oth_width - 1];
535 // lifting step
536 const si32* sp = oth + (ev ? 0 : 1);
537 si32* dp = aug;
538 if (a == 1)
539 { // 5/3 update and any case with a == 1
540 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
541 *dp -= (b + (sp[-1] + sp[0])) >> e;
542 }
543 else if (a == -1 && b == 1 && e == 1)
544 { // 5/3 predict
545 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
546 *dp += (sp[-1] + sp[0]) >> e;
547 }
548 else if (a == -1)
549 { // any case with a == -1, which is not 5/3 predict
550 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
551 *dp -= (b - (sp[-1] + sp[0])) >> e;
552 }
553 else {
554 // general case
555 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
556 *dp -= (b + a * (sp[-1] + sp[0])) >> e;
557 }
558
559 // swap buffers
560 si32* t = aug; aug = oth; oth = t;
561 ev = !ev;
562 ui32 w = aug_width; aug_width = oth_width; oth_width = w;
563 }
564
565 // combine both lsrc and hsrc into dst
566 si32* sph = hsrc->i32;
567 si32* spl = lsrc->i32;
568 si32* dp = dst->i32;
569 ui32 w = width;
570 if (!even)
571 {
572 *dp++ = *sph++; --w;
573 }
574 for (; w > 1; w -= 2)
575 {
576 *dp++ = *spl++; *dp++ = *sph++;
577 }
578 if (w)
579 {
580 *dp++ = *spl++; --w;
581 }
582 }
583 else {
584 if (even)
585 dst->i32[0] = lsrc->i32[0];
586 else
587 dst->i32[0] = hsrc->i32[0] >> 1;
588 }
589 }
590
592 static
593 void gen_rev_horz_syn64(const param_atk* atk, const line_buf* dst,
594 const line_buf* lsrc, const line_buf* hsrc,
595 ui32 width, bool even)
596 {
597 if (width > 1)
598 {
599 bool ev = even;
600 si64* oth = hsrc->i64, * aug = lsrc->i64;
601 ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass
602 ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass
603 ui32 num_steps = atk->get_num_steps();
604 for (ui32 j = 0; j < num_steps; ++j)
605 {
606 const lifting_step* s = atk->get_step(j);
607 const si64 a = s->rev.Aatk;
608 const si64 b = s->rev.Batk;
609 const ui8 e = s->rev.Eatk;
610
611 // extension
612 oth[-1] = oth[0];
613 oth[oth_width] = oth[oth_width - 1];
614 // lifting step
615 const si64* sp = oth + (ev ? 0 : 1);
616 si64* dp = aug;
617 if (a == 1)
618 { // 5/3 update and any case with a == 1
619 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
620 *dp -= (b + (sp[-1] + sp[0])) >> e;
621 }
622 else if (a == -1 && b == 1 && e == 1)
623 { // 5/3 predict
624 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
625 *dp += (sp[-1] + sp[0]) >> e;
626 }
627 else if (a == -1)
628 { // any case with a == -1, which is not 5/3 predict
629 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
630 *dp -= (b - (sp[-1] + sp[0])) >> e;
631 }
632 else {
633 // general case
634 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
635 *dp -= (b + a * (sp[-1] + sp[0])) >> e;
636 }
637
638 // swap buffers
639 si64* t = aug; aug = oth; oth = t;
640 ev = !ev;
641 ui32 w = aug_width; aug_width = oth_width; oth_width = w;
642 }
643
644 // combine both lsrc and hsrc into dst
645 si64* sph = hsrc->i64;
646 si64* spl = lsrc->i64;
647 si64* dp = dst->i64;
648 ui32 w = width;
649 if (!even)
650 {
651 *dp++ = *sph++; --w;
652 }
653 for (; w > 1; w -= 2)
654 {
655 *dp++ = *spl++; *dp++ = *sph++;
656 }
657 if (w)
658 {
659 *dp++ = *spl++; --w;
660 }
661 }
662 else {
663 if (even)
664 dst->i64[0] = lsrc->i64[0];
665 else
666 dst->i64[0] = hsrc->i64[0] >> 1;
667 }
668 }
669
671 void gen_rev_horz_syn(const param_atk* atk, const line_buf* dst,
672 const line_buf* lsrc, const line_buf* hsrc,
673 ui32 width, bool even)
674 {
675 if (dst->flags & line_buf::LFT_32BIT)
676 {
677 assert((lsrc == NULL || lsrc->flags & line_buf::LFT_32BIT) &&
678 (hsrc == NULL || hsrc->flags & line_buf::LFT_32BIT));
679 gen_rev_horz_syn32(atk, dst, lsrc, hsrc, width, even);
680 }
681 else
682 {
683 assert((dst == NULL || dst->flags & line_buf::LFT_64BIT) &&
684 (lsrc == NULL || lsrc->flags & line_buf::LFT_64BIT) &&
685 (hsrc == NULL || hsrc->flags & line_buf::LFT_64BIT));
686 gen_rev_horz_syn64(atk, dst, lsrc, hsrc, width, even);
687 }
688 }
689
691 void gen_irv_vert_step(const lifting_step* s, const line_buf* sig,
692 const line_buf* other, const line_buf* aug,
693 ui32 repeat, bool synthesis)
694 {
695 float a = s->irv.Aatk;
696
697 if (synthesis)
698 a = -a;
699
700 float* dst = aug->f32;
701 const float* src1 = sig->f32, * src2 = other->f32;
702 for (ui32 i = repeat; i > 0; --i)
703 *dst++ += a * (*src1++ + *src2++);
704 }
705
707 void gen_irv_vert_times_K(float K, const line_buf* aug, ui32 repeat)
708 {
709 float* dst = aug->f32;
710 for (ui32 i = repeat; i > 0; --i)
711 *dst++ *= K;
712 }
713
715 void gen_irv_horz_ana(const param_atk* atk, const line_buf* ldst,
716 const line_buf* hdst, const line_buf* src,
717 ui32 width, bool even)
718 {
719 if (width > 1)
720 {
721 // split src into ldst and hdst
722 float* dph = hdst->f32;
723 float* dpl = ldst->f32;
724 float* sp = src->f32;
725 ui32 w = width;
726 if (!even)
727 {
728 *dph++ = *sp++; --w;
729 }
730 for (; w > 1; w -= 2)
731 {
732 *dpl++ = *sp++; *dph++ = *sp++;
733 }
734 if (w)
735 {
736 *dpl++ = *sp++; --w;
737 }
738
739 float* hp = hdst->f32, * lp = ldst->f32;
740 ui32 l_width = (width + (even ? 1 : 0)) >> 1; // low pass
741 ui32 h_width = (width + (even ? 0 : 1)) >> 1; // high pass
742 ui32 num_steps = atk->get_num_steps();
743 for (ui32 j = num_steps; j > 0; --j)
744 {
745 const lifting_step* s = atk->get_step(j - 1);
746 const float a = s->irv.Aatk;
747
748 // extension
749 lp[-1] = lp[0];
750 lp[l_width] = lp[l_width - 1];
751 // lifting step
752 const float* sp = lp + (even ? 1 : 0);
753 float* dp = hp;
754 for (ui32 i = h_width; i > 0; --i, sp++, dp++)
755 *dp += a * (sp[-1] + sp[0]);
756
757 // swap buffers
758 float* t = lp; lp = hp; hp = t;
759 even = !even;
760 ui32 w = l_width; l_width = h_width; h_width = w;
761 }
762
763 {
764 float K = atk->get_K();
765 float K_inv = 1.0f / K;
766 float* dp;
767
768 dp = lp;
769 for (ui32 i = l_width; i > 0; --i)
770 *dp++ *= K_inv;
771
772 dp = hp;
773 for (ui32 i = h_width; i > 0; --i)
774 *dp++ *= K;
775 }
776 }
777 else {
778 if (even)
779 ldst->f32[0] = src->f32[0];
780 else
781 hdst->f32[0] = src->f32[0] * 2.0f;
782 }
783 }
784
786 void gen_irv_horz_syn(const param_atk* atk, const line_buf* dst,
787 const line_buf* lsrc, const line_buf* hsrc,
788 ui32 width, bool even)
789 {
790 if (width > 1)
791 {
792 bool ev = even;
793 float* oth = hsrc->f32, * aug = lsrc->f32;
794 ui32 aug_width = (width + (even ? 1 : 0)) >> 1; // low pass
795 ui32 oth_width = (width + (even ? 0 : 1)) >> 1; // high pass
796
797 {
798 float K = atk->get_K();
799 float K_inv = 1.0f / K;
800 float* dp;
801
802 dp = aug;
803 for (ui32 i = aug_width; i > 0; --i)
804 *dp++ *= K;
805
806 dp = oth;
807 for (ui32 i = oth_width; i > 0; --i)
808 *dp++ *= K_inv;
809 }
810
811 ui32 num_steps = atk->get_num_steps();
812 for (ui32 j = 0; j < num_steps; ++j)
813 {
814 const lifting_step* s = atk->get_step(j);
815 const float a = s->irv.Aatk;
816
817 // extension
818 oth[-1] = oth[0];
819 oth[oth_width] = oth[oth_width - 1];
820 // lifting step
821 const float* sp = oth + (ev ? 0 : 1);
822 float* dp = aug;
823 for (ui32 i = aug_width; i > 0; --i, sp++, dp++)
824 *dp -= a * (sp[-1] + sp[0]);
825
826 // swap buffers
827 float* t = aug; aug = oth; oth = t;
828 ev = !ev;
829 ui32 w = aug_width; aug_width = oth_width; oth_width = w;
830 }
831
832 // combine both lsrc and hsrc into dst
833 float* sph = hsrc->f32;
834 float* spl = lsrc->f32;
835 float* dp = dst->f32;
836 ui32 w = width;
837 if (!even)
838 { *dp++ = *sph++; --w; }
839 for (; w > 1; w -= 2)
840 { *dp++ = *spl++; *dp++ = *sph++; }
841 if (w)
842 { *dp++ = *spl++; --w; }
843 }
844 else {
845 if (even)
846 dst->f32[0] = lsrc->f32[0];
847 else
848 dst->f32[0] = hsrc->f32[0] * 0.5f;
849 }
850 }
851
852#endif // !OJPH_ENABLE_WASM_SIMD
853
854 }
855}
float * f32
Definition ojph_mem.h:187
void(* rev_horz_ana)(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void gen_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void gen_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void gen_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
static void gen_rev_horz_syn32(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void sse2_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
static void gen_rev_vert_step64(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void avx512_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void wasm_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void gen_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void vsx_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void gen_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void sse2_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
static void gen_rev_vert_step32(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
static void gen_rev_horz_ana64(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void(* irv_vert_times_K)(float K, const line_buf *aug, ui32 repeat)
void gen_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void(* irv_vert_step)(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void avx_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void sse_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void avx2_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void init_wavelet_transform_functions()
void wasm_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void wasm_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
static void gen_rev_horz_syn64(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void avx512_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void avx2_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void sse_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void vsx_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void wasm_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void sse_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void wasm_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void avx_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void sse_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void avx512_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void avx512_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void avx_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void avx2_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void(* rev_horz_syn)(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void vsx_irv_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void vsx_rev_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void sse2_rev_vert_step(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void wasm_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void(* irv_horz_ana)(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void(* rev_vert_step)(const lifting_step *s, const line_buf *sig, const line_buf *other, const line_buf *aug, ui32 repeat, bool synthesis)
void vsx_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void gen_irv_horz_ana(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void avx_irv_vert_times_K(float K, const line_buf *aug, ui32 repeat)
void(* irv_horz_syn)(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void wasm_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
void vsx_rev_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
static void gen_rev_horz_ana32(const param_atk *atk, const line_buf *ldst, const line_buf *hdst, const line_buf *src, ui32 width, bool even)
void vsx_irv_horz_syn(const param_atk *atk, const line_buf *dst, const line_buf *lsrc, const line_buf *hsrc, ui32 width, bool even)
int64_t si64
Definition ojph_defs.h:57
@ PPC_CPU_EXT_LEVEL_ARCH_3_00
Definition ojph_arch.h:180
OJPH_EXPORT int get_cpu_ext_level()
int32_t si32
Definition ojph_defs.h:55
uint32_t ui32
Definition ojph_defs.h:54
uint8_t ui8
Definition ojph_defs.h:50
@ X86_CPU_EXT_LEVEL_AVX2
Definition ojph_arch.h:163
@ X86_CPU_EXT_LEVEL_AVX
Definition ojph_arch.h:162
@ X86_CPU_EXT_LEVEL_AVX512
Definition ojph_arch.h:165
@ X86_CPU_EXT_LEVEL_SSE2
Definition ojph_arch.h:157
@ X86_CPU_EXT_LEVEL_SSE
Definition ojph_arch.h:156
const lifting_step * get_step(ui32 s) const