40#include "ojph_simd_vsx.h"
51 for (
size_t i = 0; i < count; i += 16, addr = (
char*)addr + 16)
79 float delta_inv,
ui32 count,
ui32* max_val)
84 ui32 shift = 31 - K_max;
90 for ( ; count >= 4; count -= 4, p += 4, dp += 4)
127 float delta_inv,
ui32 count,
ui32* max_val)
137 float *p = (
float*)sp;
138 for ( ; count >= 4; count -= 4, p += 4, dp += 4)
177 float delta,
ui32 count)
180 ui32 shift = 31 - K_max;
185 for (
ui32 i = 0; i < count; i += 4, sp += 4, p += 4)
200 float delta,
ui32 count)
205 float *p = (
float*)dp;
206 for (
ui32 i = 0; i < count; i += 4, sp += 4, p += 4)
220 float delta_inv,
ui32 count,
ui64* max_val)
225 ui32 shift = 63 - K_max;
231 for ( ; count >= 2; count -= 2, p += 2, dp += 2)
267 float delta,
ui32 count)
270 ui32 shift = 63 - K_max;
275 for (
ui32 i = 0; i < count; i += 2, sp += 2, p += 2)
ui32 vsx_find_max_val32(ui32 *address)
void vsx_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max, float delta_inv, ui32 count, ui64 *max_val)
void vsx_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
ui64 vsx_find_max_val64(ui64 *address)
void vsx_irv_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
void vsx_irv_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void vsx_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void vsx_rev_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
void vsx_mem_clear(void *addr, size_t count)
static v128_t vsx_i32x4_make(int a, int b, int c, int d)
static v128_t vsx_f32x4_mul(v128_t a, v128_t b)
static v128_t vsx_i64x2_lt(v128_t a, v128_t b)
static v128_t vsx_f32x4_convert_i32x4(v128_t a)
static v128_t vsx_v128_xor(v128_t a, v128_t b)
static v128_t vsx_f32x4_splat(float x)
#define vsx_i64x2_extract_lane(a, i)
static v128_t vsx_i32x4_shl(v128_t a, int n)
static v128_t vsx_i32x4_add(v128_t a, v128_t b)
__vector unsigned char v128_t
static v128_t vsx_v128_andnot(v128_t a, v128_t b)
static void vsx_v128_store(void *p, v128_t a)
static v128_t vsx_v128_and(v128_t a, v128_t b)
static v128_t vsx_i64x2_shl(v128_t a, int n)
static v128_t vsx_v128_or(v128_t a, v128_t b)
static v128_t vsx_i32x4_lt(v128_t a, v128_t b)
#define vsx_i64x2_shuffle(a, b, c0, c1)
#define vsx_i32x4_shuffle(a, b, c0, c1, c2, c3)
static v128_t vsx_i64x2_splat(long long x)
static v128_t vsx_i64x2_shr(v128_t a, int n)
static v128_t vsx_i64x2_add(v128_t a, v128_t b)
static v128_t vsx_i32x4_splat(int x)
static v128_t vsx_v128_load(const void *p)
static v128_t vsx_i32x4_shr(v128_t a, int n)
#define vsx_i32x4_extract_lane(a, i)
static v128_t vsx_i32x4_trunc_sat_f32x4(v128_t a)
static v128_t vsx_i32x4_gt(v128_t a, v128_t b)