1155 ui32 missing_msbs,
ui32 num_passes,
1160 static bool insufficient_precision =
false;
1161 static bool modify_code =
false;
1162 static bool truncate_spp_mrp =
false;
1164 if (num_passes > 1 && lengths2 == 0)
1166 OJPH_WARN(0x00010001,
"A malformed codeblock that has more than "
1167 "one coding pass, but zero length for "
1168 "2nd and potential 3rd pass.\n");
1174 OJPH_WARN(0x00010002,
"We do not support more than 3 coding passes; "
1175 "This codeblocks has %d passes.\n",
1180 if (missing_msbs > 30)
1182 if (insufficient_precision ==
false)
1184 insufficient_precision =
true;
1185 OJPH_WARN(0x00010003,
"32 bits are not enough to decode this "
1186 "codeblock. This message will not be "
1187 "displayed again.\n");
1191 else if (missing_msbs == 30)
1193 if (modify_code ==
false) {
1195 OJPH_WARN(0x00010004,
"Not enough precision to decode the cleanup "
1196 "pass. The code can be modified to support "
1197 "this case. This message will not be "
1198 "displayed again.\n");
1202 else if (missing_msbs == 29)
1204 if (num_passes > 1) {
1206 if (truncate_spp_mrp ==
false) {
1207 truncate_spp_mrp =
true;
1208 OJPH_WARN(0x00010005,
"Not enough precision to decode the SgnProp "
1209 "nor MagRef passes; both will be skipped. "
1210 "This message will not be displayed "
1215 ui32 p = 30 - missing_msbs;
1221 OJPH_WARN(0x00010006,
"Wrong codeblock length.\n");
1227 lcup = (int)lengths1;
1229 scup = (((int)coded_data[lcup-1]) << 4) + (coded_data[lcup-2] & 0xF);
1230 if (scup < 2 || scup > lcup || scup > 4079)
1248 ui16 scratch[8 * 513] = {0};
1256 ui32 sstr = ((width + 2u) + 7u) & ~7u;
1258 assert((stride & 0x3) == 0);
1260 ui32 mmsbp2 = missing_msbs + 2;
1272 mel_init(&mel, coded_data, lcup, scup);
1274 rev_init(&vlc, coded_data, lcup, scup);
1284 for (
ui32 x = 0; x < width; sp += 4)
1303 t0 = (run == -1) ? t0 : 0;
1317 c_q = ((t0 & 0x10U) << 3) | ((t0 & 0xE0U) << 2);
1326 t1 =
vlc_tbl0[c_q + (vlc_val & 0x7F)];
1329 if (c_q == 0 && x < width)
1334 t1 = (run == -1) ? t1 : 0;
1339 t1 = x < width ? t1 : 0;
1348 c_q = ((t1 & 0x10U) << 3) | ((t1 & 0xE0U) << 2);
1356 ui32 uvlc_mode = ((t0 & 0x8U) << 3) | ((t1 & 0x8U) << 4);
1357 if (uvlc_mode == 0xc0)
1361 uvlc_mode += (run == -1) ? 0x40 : 0;
1378 ui32 len = uvlc_entry & 0xF;
1379 ui32 tmp = vlc_val & ((1 << len) - 1);
1383 len = uvlc_entry & 0x7;
1385 ui16 u_q = (
ui16)(1 + (uvlc_entry&7) + (tmp&~(0xFFU<<len)));
1387 u_q = (
ui16)(1 + (uvlc_entry >> 3) + (tmp >> len));
1393 for (
ui32 y = 2; y < height; y += 2)
1396 ui16 *sp = scratch + (y >> 1) * sstr;
1398 for (
ui32 x = 0; x < width; sp += 4)
1404 c_q |= ((sp[0 - (
si32)sstr] & 0xA0U) << 2);
1405 c_q |= ((sp[2 - (
si32)sstr] & 0x20U) << 4);
1421 t0 = (run == -1) ? t0 : 0;
1436 c_q = ((t0 & 0x40U) << 2) | ((t0 & 0x80U) << 1);
1438 c_q |= sp[0 - (
si32)sstr] & 0x80;
1440 c_q |= ((sp[2 - (
si32)sstr] & 0xA0U) << 2);
1441 c_q |= ((sp[4 - (
si32)sstr] & 0x20U) << 4);
1450 t1 =
vlc_tbl1[ c_q + (vlc_val & 0x7F)];
1453 if (c_q == 0 && x < width)
1458 t1 = (run == -1) ? t1 : 0;
1463 t1 = x < width ? t1 : 0;
1473 c_q = ((t1 & 0x40U) << 2) | ((t1 & 0x80U) << 1);
1475 c_q |= sp[2 - (
si32)sstr] & 0x80;
1483 ui32 uvlc_mode = ((t0 & 0x8U) << 3) | ((t1 & 0x8U) << 4);
1489 ui32 len = uvlc_entry & 0xF;
1490 ui32 tmp = vlc_val & ((1 << len) - 1);
1494 len = uvlc_entry & 0x7;
1496 ui16 u_q = (
ui16)((uvlc_entry & 7) + (tmp & ~(0xFU << len)));
1498 u_q = (
ui16)((uvlc_entry >> 3) + (tmp >> len));
1521 const int v_n_size = 512 + 8;
1522 ui32 v_n_scratch[2 * v_n_size] = {0};
1529 ui32 *vp = v_n_scratch;
1530 ui32 *dp = decoded_data;
1533 for (
ui32 x = 0; x < width; x += 4, sp += 4, vp += 2, dp += 4)
1568 for (
ui32 y = 2; y < height; y += 2)
1572 ui32 *vp = v_n_scratch;
1574 31, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
1577 31, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1583 for (
ui32 x = 0; x <= width; x += 8, vp += 4)
1607 ui32 *vp = v_n_scratch;
1608 ui16 *sp = scratch + (y >> 1) * sstr;
1609 ui32 *dp = decoded_data + y * stride;
1612 for (
ui32 x = 0; x < width; x += 4, sp += 4, vp += 2, dp += 4)
1619 v128_t gamma, emax, kappa, u_q;
1674 const int v_n_size = 512 + 8;
1675 ui16 v_n_scratch[2 * v_n_size] = {0};
1679 const ui32 dbuf_cap = 4096 * 15 / 8;
1680 ui8 dbuf[dbuf_cap + 72];
1687 ui16 *vp = v_n_scratch;
1688 ui32 *dp = decoded_data;
1691 for (
ui32 x = 0; x < width; x += 4, sp += 4, vp += 2, dp += 4)
1717 -1, 0x0908, -1, 0x0D0C));
1721 -1, 0x0B0A, -1, 0x0F0E));
1726 for (
ui32 y = 2; y < height; y += 2)
1730 ui16 *vp = v_n_scratch;
1732 15, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4
1735 15, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1740 for (
ui32 x = 0; x <= width; x += 16, vp += 8)
1760 ui16 *vp = v_n_scratch;
1761 ui16 *sp = scratch + (y >> 1) * sstr;
1762 ui32 *dp = decoded_data + y * stride;
1765 for (
ui32 x = 0; x < width; x += 4, sp += 4, vp += 2, dp += 4)
1772 v128_t gamma, emax, kappa, u_q;
1787 0x0504, -1, 0x0706, -1));
1811 -1, 0x0908, -1, 0x0D0C));
1815 -1, 0x0B0A, -1, 0x0F0E));
1830 ui16*
const sigma = scratch;
1832 ui32 mstr = (width + 3u) >> 2;
1834 mstr = ((mstr + 2u) + 7u) & ~7u;
1845 for (y = 0; y < height; y += 4)
1847 ui16* sp = scratch + (y >> 1) * sstr;
1848 ui16* dp = sigma + (y >> 2) * mstr;
1849 for (
ui32 x = 0; x < width; x += 8, sp += 8, dp += 2)
1851 v128_t s0, s1, u3, uC, t0, t1;
1875 ui16* dp = sigma + (y >> 2) * mstr;
1877 for (
ui32 x = 0; x < width; x += 32, dp += 8)
1894 ui16 prev_row_sig[256 + 8] = {0};
1897 frwd_init<0>(&sigprop, coded_data + lengths1, (
int)lengths2);
1899 for (
ui32 y = 0; y < height; y += 4)
1901 ui32 pattern = 0xFFFFu;
1902 if (height - y < 4) {
1904 if (height - y < 3) {
1914 ui16 *prev_sig = prev_row_sig;
1915 ui16 *cur_sig = sigma + (y >> 2) * mstr;
1916 ui32 *dpp = decoded_data + y * stride;
1917 for (
ui32 x = 0; x < width; x += 4, dpp += 4, ++cur_sig, ++prev_sig)
1922 pattern = pattern >> (s * 4);
1936 ui32 ps; memcpy(&ps, prev_sig,
sizeof(ps));
1937 ui32 ns; memcpy(&ns, cur_sig + mstr,
sizeof(ns));
1938 ui32 u = (ps & 0x88888888) >> 3;
1940 u |= (ns & 0x11111111) << 3;
1942 ui32 cs; memcpy(&cs, cur_sig,
sizeof(cs));
1945 mbr |= (cs & 0x77777777) << 1;
1946 mbr |= (cs & 0xEEEEEEEE) >> 1;
1966 ui32 col_mask = 0xFu;
1967 ui32 inv_sig = ~cs & pattern;
1968 for (
int i = 0; i < 16; i += 4, col_mask <<= 4)
1970 if ((col_mask & new_sig) == 0)
1974 ui32 sample_mask = 0x1111u & col_mask;
1975 if (new_sig & sample_mask)
1977 new_sig &= ~sample_mask;
1980 ui32 t = 0x33u << i;
1981 new_sig |= t & inv_sig;
1987 if (new_sig & sample_mask)
1989 new_sig &= ~sample_mask;
1992 ui32 t = 0x76u << i;
1993 new_sig |= t & inv_sig;
1999 if (new_sig & sample_mask)
2001 new_sig &= ~sample_mask;
2004 ui32 t = 0xECu << i;
2005 new_sig |= t & inv_sig;
2011 if (new_sig & sample_mask)
2013 new_sig &= ~sample_mask;
2016 ui32 t = 0xC8u << i;
2017 new_sig |= t & inv_sig;
2029 vsx_i8x16_const(0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1));
2037 v128_t ex_sum, shfl, inc_sum = new_sig_vec;
2040 15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30);
2043 7, 8, 9, 10, 11, 12, 13, 14);
2054 15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30);
2060 vsx_i8x16_const(0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1));
2073 0,-1,-1,-1,4,-1,-1,-1,8,-1,-1,-1,12,-1,-1,-1);
2076 for (
int c = 0; c < 4; ++ c) {
2077 v128_t s0, s0_ns, s0_val;
2106 *prev_sig = (
ui16)(new_sig);
2110 new_sig |= (t & 0x7777) << 1;
2111 new_sig |= (t & 0xEEEE) >> 1;
2124 rev_init_mrp(&magref, coded_data, (
int)lengths1, (
int)lengths2);
2126 for (
ui32 y = 0; y < height; y += 4)
2128 ui16 *cur_sig = sigma + (y >> 2) * mstr;
2129 ui32 *dpp = decoded_data + y * stride;
2130 for (
ui32 i = 0; i < width; i += 4, dpp += 4)
2135 ui16 sig = *cur_sig++;
2145 vsx_i8x16_const(0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1));
2154 v128_t ex_sum, shfl, inc_sum = sig_vec;
2156 15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30);
2159 7, 8, 9, 10, 11, 12, 13, 14);
2170 15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30);
2179 vsx_i8x16_const(0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1));
2192 8,-1,-1,-1,12,-1,-1,-1);
2194 for (
int c = 0; c < 4; ++c) {
2195 v128_t s0, s0_sig, s0_idx, s0_val;