takehiro.cpp

/*	MP3 huffman table selecting and bit counting
 *	Copyright (c) 1999-2005 Takehiro TOMINAGA
 *	Copyright (c) 2002-2005 Gabriel Bouvigne
 */

#ifdef HAVE_CONFIG_H
# include <config.h>
#endif


#include "lame.h"
#include "machine.h"
#include "encoder.h"
#include "util.h"
#include "quantize_pvt.h"
#include "tables.h"


static const struct {
  char region0_count;
  char region1_count;
}subdv_table[23] = {
    {0, 0},              /* 0 bands */
    {0, 0},              /* 1 bands */
    {0, 0},              /* 2 bands */
    {0, 0},              /* 3 bands */
    {0, 0},              /* 4 bands */
    {0, 1},              /* 5 bands */
    {1, 1},              /* 6 bands */
    {1, 1},              /* 7 bands */
    {1, 2},              /* 8 bands */
    {2, 2},              /* 9 bands */
    {2, 3},              /* 10 bands */
    {2, 3},              /* 11 bands */
    {3, 4},              /* 12 bands */
    {3, 4},              /* 13 bands */
    {3, 4},              /* 14 bands */
    {4, 5},              /* 15 bands */
    {4, 5},              /* 16 bands */
    {4, 6},              /* 17 bands */
    {5, 6},              /* 18 bands */
    {5, 6},              /* 19 bands */
    {5, 7},              /* 20 bands */
    {6, 7},              /* 21 bands */
    {6, 7},              /* 22 bands */
};

/*********************************************************************
 * nonlinear quantization of xr 
 * More accurate formula than the ISO formula.  Takes into account
 * the fact that we are quantizing xr -> ix, but we want ix^4/3 to be 
 * as close as possible to x^4/3.  (taking the nearest int would mean
 * ix is as close as possible to xr, which is different.)
 *
 * From Segher Boessenkool <segher@eastsite.nl>  11/1999
 *
 * 09/2000: ASM code removed in favor of IEEE754 hack by Takehiro
 * Tominaga. If you need the ASM code, check CVS circa Aug 2000.
 *
 * 01/2004: Optimizations by Gabriel Bouvigne
 *********************************************************************/

static void quantize_lines_xrpow_01(unsigned int l, FLOAT istep, const FLOAT *xr, int *ix) {
    const FLOAT compareval0 = (1.0f - 0.4054f) / istep;
    unsigned int i;

    assert(l > 0);
    assert(l % 2 == 0);
    for (i = 0; i < l; i += 2) {
        FLOAT const xr_0 = xr[i+0];
        FLOAT const xr_1 = xr[i+1];
        int const ix_0 = (compareval0 > xr_0) ? 0 : 1;
        int const ix_1 = (compareval0 > xr_1) ? 0 : 1;
        ix[i+0] = ix_0;
        ix[i+1] = ix_1;
    }
}



#ifdef TAKEHIRO_IEEE754_HACK

union fi_union{
  float   f;
  int     i;
};

#define MAGIC_FLOAT (65536*(128))
#define MAGIC_INT 0x4b000000


static void quantize_lines_xrpow(unsigned int l, FLOAT istep, const FLOAT * xp, int *pi) {
    fi_union *fi;
    unsigned int remaining;

    assert(l > 0);

    fi = (fi_union *) pi;

    l = l >> 1;
    remaining = l % 2;
    l = l >> 1;
    while (l--) {
        double  x0 = istep * xp[0];
        double  x1 = istep * xp[1];
        double  x2 = istep * xp[2];
        double  x3 = istep * xp[3];

        x0 += MAGIC_FLOAT;
        fi[0].f = x0;
        x1 += MAGIC_FLOAT;
        fi[1].f = x1;
        x2 += MAGIC_FLOAT;
        fi[2].f = x2;
        x3 += MAGIC_FLOAT;
        fi[3].f = x3;

        fi[0].f = x0 + adj43asm[fi[0].i - MAGIC_INT];
        fi[1].f = x1 + adj43asm[fi[1].i - MAGIC_INT];
        fi[2].f = x2 + adj43asm[fi[2].i - MAGIC_INT];
        fi[3].f = x3 + adj43asm[fi[3].i - MAGIC_INT];

        fi[0].i -= MAGIC_INT;
        fi[1].i -= MAGIC_INT;
        fi[2].i -= MAGIC_INT;
        fi[3].i -= MAGIC_INT;
        fi += 4;
        xp += 4;
    };
    if (remaining) {
        double  x0 = istep * xp[0];
        double  x1 = istep * xp[1];

        x0 += MAGIC_FLOAT;
        fi[0].f = x0;
        x1 += MAGIC_FLOAT;
        fi[1].f = x1;

        fi[0].f = x0 + adj43asm[fi[0].i - MAGIC_INT];
        fi[1].f = x1 + adj43asm[fi[1].i - MAGIC_INT];

        fi[0].i -= MAGIC_INT;
        fi[1].i -= MAGIC_INT;
    }

}


#else

/*********************************************************************
 * XRPOW_FTOI is a macro to convert floats to ints.  
 * if XRPOW_FTOI(x) = nearest_int(x), then QUANTFAC(x)=adj43asm[x]
 *                                         ROUNDFAC= -0.0946
 *
 * if XRPOW_FTOI(x) = floor(x), then QUANTFAC(x)=asj43[x]   
 *                                   ROUNDFAC=0.4054
 *
 * Note: using floor() or (int) is extremely slow. On machines where
 * the TAKEHIRO_IEEE754_HACK code above does not work, it is worthwile
 * to write some ASM for XRPOW_FTOI().  
 *********************************************************************/
#define XRPOW_FTOI(src,dest) ((dest) = (int)(src))
#define QUANTFAC(rx)  adj43[rx]
#define ROUNDFAC 0.4054


static void quantize_lines_xrpow(unsigned int l, FLOAT istep, const FLOAT *xr, int *ix) {
    unsigned int remaining;

    assert(l > 0);

    l = l >> 1;
    remaining = l % 2;
    l = l >> 1;
    while (l--) {
        FLOAT   x0, x1, x2, x3;
        int     rx0, rx1, rx2, rx3;

        x0 = *xr++ * istep;
        x1 = *xr++ * istep;
        XRPOW_FTOI(x0, rx0);
        x2 = *xr++ * istep;
        XRPOW_FTOI(x1, rx1);
        x3 = *xr++ * istep;
        XRPOW_FTOI(x2, rx2);
        x0 += QUANTFAC(rx0);
        XRPOW_FTOI(x3, rx3);
        x1 += QUANTFAC(rx1);
        XRPOW_FTOI(x0, *ix++);
        x2 += QUANTFAC(rx2);
        XRPOW_FTOI(x1, *ix++);
        x3 += QUANTFAC(rx3);
        XRPOW_FTOI(x2, *ix++);
        XRPOW_FTOI(x3, *ix++);
    };
    if (remaining) {
        FLOAT   x0, x1;
        int     rx0, rx1;

        x0 = *xr++ * istep;
        x1 = *xr++ * istep;
        XRPOW_FTOI(x0, rx0);
        XRPOW_FTOI(x1, rx1);
        x0 += QUANTFAC(rx0);
        x1 += QUANTFAC(rx1);
        XRPOW_FTOI(x0, *ix++);
        XRPOW_FTOI(x1, *ix++);
    }

}



#endif



/*********************************************************************
 * Quantization function
 * This function will select which lines to quantize and call the
 * proper quantization function
 *********************************************************************/

static void quantize_xrpow(const FLOAT * xp, int *pi, FLOAT istep, gr_info const & cod_info,
               calc_noise_data const *prev_noise) {
    /* quantize on xr^(3/4) instead of xr */
    int     sfb;
    int     sfbmax;
    int     j = 0;
    int     prev_data_use;
    int    *iData;
    int     accumulate = 0;
    int     accumulate01 = 0;
    int    *acc_iData;
    const FLOAT *acc_xp;

    iData = pi;
    acc_xp = xp;
    acc_iData = iData;


    /* Reusing previously computed data does not seems to work if global gain
       is changed. Finding why it behaves this way would allow to use a cache of 
       previously computed values (let's 10 cached values per sfb) that would 
       probably provide a noticeable speedup */
    prev_data_use = (prev_noise && (cod_info.global_gain == prev_noise->global_gain));

    if (cod_info.block_type == SHORT_TYPE)
        sfbmax = 38;
    else
        sfbmax = 21;

    for (sfb = 0; sfb <= sfbmax; sfb++) {
        int     step = -1;

        if (prev_data_use || cod_info.block_type == NORM_TYPE) {
            step =
                cod_info.global_gain
                - ((cod_info.scalefac[sfb] + (cod_info.preflag ? pretab[sfb] : 0))
                   << (cod_info.scalefac_scale + 1))
                - cod_info.subblock_gain[cod_info.window[sfb]] * 8;
        }
        assert(cod_info.width[sfb] >= 0);
        if (prev_data_use && (prev_noise->step[sfb] == step)) {
            /* do not recompute this part,
               but compute accumulated lines */
            if (accumulate) {
                quantize_lines_xrpow(accumulate, istep, acc_xp, acc_iData);
                accumulate = 0;
            }
            if (accumulate01) {
                quantize_lines_xrpow_01(accumulate01, istep, acc_xp, acc_iData);
                accumulate01 = 0;
            }
        }
        else {          /*should compute this part */
            int     l;
            l = cod_info.width[sfb];

            if ((j + cod_info.width[sfb]) > cod_info.max_nonzero_coeff) {
                /*do not compute upper zero part */
                int     usefullsize;
                usefullsize = cod_info.max_nonzero_coeff - j + 1;
                memset(&pi[cod_info.max_nonzero_coeff], 0,
                       sizeof(int) * (576 - cod_info.max_nonzero_coeff));
                l = usefullsize;

                if (l < 0) {
                    l = 0;
                }

                /* no need to compute higher sfb values */
                sfb = sfbmax + 1;
            }

            /*accumulate lines to quantize */
            if (!accumulate && !accumulate01) {
                acc_iData = iData;
                acc_xp = xp;
            }
            if (prev_noise &&
                prev_noise->sfb_count1 > 0 &&
                sfb >= prev_noise->sfb_count1 &&
                prev_noise->step[sfb] > 0 && step >= prev_noise->step[sfb]) {

                if (accumulate) {
                    quantize_lines_xrpow(accumulate, istep, acc_xp, acc_iData);
                    accumulate = 0;
                    acc_iData = iData;
                    acc_xp = xp;
                }
                accumulate01 += l;
            }
            else {
                if (accumulate01) {
                    quantize_lines_xrpow_01(accumulate01, istep, acc_xp, acc_iData);
                    accumulate01 = 0;
                    acc_iData = iData;
                    acc_xp = xp;
                }
                accumulate += l;
            }

            if (l <= 0) {
                /*  rh: 20040215
                 *  may happen due to "prev_data_use" optimization 
                 */
                if (accumulate01) {
                    quantize_lines_xrpow_01(accumulate01, istep, acc_xp, acc_iData);
                    accumulate01 = 0;
                }
                if (accumulate) {
                    quantize_lines_xrpow(accumulate, istep, acc_xp, acc_iData);
                    accumulate = 0;
                }

                break;  /* ends for-loop */
            }
        }
        if (sfb <= sfbmax) {
            iData += cod_info.width[sfb];
            xp += cod_info.width[sfb];
            j += cod_info.width[sfb];
        }
    }
    if (accumulate) {   /*last data part */
        quantize_lines_xrpow(accumulate, istep, acc_xp, acc_iData);
        accumulate = 0;
    }
    if (accumulate01) { /*last data part */
        quantize_lines_xrpow_01(accumulate01, istep, acc_xp, acc_iData);
        accumulate01 = 0;
    }

}


/*************************************************************************/
/*	      ix_max							 */
/*************************************************************************/

static int ix_max(const int *ix, const int *end) {
    int     max1 = 0, max2 = 0;

    do {
        int const x1 = *ix++;
        int const x2 = *ix++;
        if (max1 < x1)
            max1 = x1;

        if (max2 < x2)
            max2 = x2;
    } while (ix < end);
    if (max1 < max2)
        max1 = max2;
    return max1;
}


static int count_bit_ESC(const int *ix, const int *const end, int t1, const int t2, unsigned &s) {
    /* ESC-table is used */
    unsigned const linbits = ht[t1].xlen * 65536u + ht[t2].xlen;
    unsigned sum = 0, sum2;

    do {
        unsigned x = *ix++;
        unsigned y = *ix++;

        if (x >= 15u) {
            x = 15u;
            sum += linbits;
        }
        if (y >= 15u) {
            y = 15u;
            sum += linbits;
        }
        x <<= 4u;
        x += y;
        sum += largetbl[x];
    } while (ix < end);

    sum2 = sum & 0xffffu;
    sum >>= 16u;

    if (sum > sum2) {
        sum = sum2;
        t1 = t2;
    }

    s += sum;
    return t1;
}


static int count_bit_noESC(const int *ix, const int *end, int mx, unsigned &s) {
    /* No ESC-words */
    unsigned int sum1 = 0;
    const uint8_t *const hlen1 = ht[1].hlen;
    (void) mx;

    do {
        unsigned int const x0 = *ix++;
        unsigned int const x1 = *ix++;
        sum1 += hlen1[ x0+x0 + x1 ];
    } while (ix < end);

    s += sum1;
    return 1;
}


static const int huf_tbl_noESC[] = {
    1, 2, 5, 7, 7, 10, 10, 13, 13, 13, 13, 13, 13, 13, 13
};


static int count_bit_noESC_from2(const int *ix, const int *end, int max, unsigned &s)
{
    int t1 = huf_tbl_noESC[max - 1];
    /* No ESC-words */
    const unsigned int xlen = ht[t1].xlen;
    uint32_t const* table = (t1 == 2) ? &table23[0] : &table56[0];
    unsigned int sum = 0, sum2;

    do {
        unsigned int const x0 = *ix++;
        unsigned int const x1 = *ix++;
        sum += table[ x0 * xlen + x1 ];
    } while (ix < end);

    sum2 = sum & 0xffffu;
    sum >>= 16u;

    if (sum > sum2) {
        sum = sum2;
        t1++;
    }

    s += sum;
    return t1;
}


inline static int count_bit_noESC_from3(const int *ix, const int *end, int max, unsigned &s)
{
    int t1 = huf_tbl_noESC[max - 1];
    /* No ESC-words */
    unsigned int sum1 = 0;
    unsigned int sum2 = 0;
    unsigned int sum3 = 0;
    const unsigned int xlen = ht[t1].xlen;
    const uint8_t *const hlen1 = ht[t1].hlen;
    const uint8_t *const hlen2 = ht[t1 + 1].hlen;
    const uint8_t *const hlen3 = ht[t1 + 2].hlen;
    int     t;

    do {
        unsigned int x0 = *ix++;
        unsigned int x1 = *ix++;
        unsigned int x = x0 * xlen + x1;
        sum1 += hlen1[x];
        sum2 += hlen2[x];
        sum3 += hlen3[x];
    } while (ix < end);

    t = t1;
    if (sum1 > sum2) {
        sum1 = sum2;
        t++;
    }
    if (sum1 > sum3) {
        sum1 = sum3;
        t = t1 + 2;
    }
    s += sum1;

    return t;  
}


/*************************************************************************/
/*	      choose table						 */
/*************************************************************************/

/*
  Choose the Huffman table that will encode ix[begin..end] with
  the fewest bits.

  Note: This code contains knowledge about the sizes and characteristics
  of the Huffman tables as defined in the IS (Table B.7), and will not work
  with any arbitrary tables.
*/
static int count_bit_null(const int*, const int*, int, unsigned &) {
  return 0;
}

typedef int (*count_fnc)(const int*, const int*, int, unsigned &);
  
static const count_fnc count_fncs[] = {
  count_bit_null,
  count_bit_noESC,
  count_bit_noESC_from2,
  count_bit_noESC_from2,
  count_bit_noESC_from3,
  count_bit_noESC_from3,
  count_bit_noESC_from3,
  count_bit_noESC_from3,
  count_bit_noESC_from3,
  count_bit_noESC_from3,
  count_bit_noESC_from3,
  count_bit_noESC_from3,
  count_bit_noESC_from3,
  count_bit_noESC_from3,
  count_bit_noESC_from3,
  count_bit_noESC_from3};

static int choose_table_nonMMX(const int *ix, const int *const end, int &_s)
{
    unsigned &s = *(unsigned*)&_s;
    unsigned int  max;
    int     choice, choice2;
    max = ix_max(ix, end);

    if (max <= 15) {
      return count_fncs[max](ix, end, max, s);
    }
    /* try tables with linbits */
    if (max > IXMAX_VAL) {
        s = LARGE_BITS;
        return -1;
    }
    max -= 15u;
    for (choice2 = 24; choice2 < 32; choice2++) {
        if (ht[choice2].linmax >= max) {
            break;
        }
    }

    for (choice = choice2 - 8; choice < 24; choice++) {
        if (ht[choice].linmax >= max) {
            break;
        }
    }
    return count_bit_ESC(ix, end, choice, choice2, s);
}



/*************************************************************************/
/*	      count_bit							 */
/*************************************************************************/
int lame_internal_flags::noquant_count_bits(gr_info &gi, calc_noise_data *prev_noise)const{
//  SessionConfig_t const & cfg = gfc.cfg;
  int     bits = 0;
  int     i, a1, a2;
  int const * ix = gi.l3_enc;

  i = Min(576, ((gi.max_nonzero_coeff + 2) >> 1) << 1);
  if (prev_noise) prev_noise->sfb_count1 = 0;
    /* Determine count1 region */
  for (; i > 1; i -= 2) if (ix[i - 1] | ix[i - 2]) break;
  gi.count1 = i;
    /* Determines the number of bits to encode the quadruples. */
  a1 = a2 = 0;
  for (; i > 3; i -= 4) {
    int x4 = ix[i-4];
    int x3 = ix[i-3];
    int x2 = ix[i-2];
    int x1 = ix[i-1];
    int     p;
        /* hack to check if all values <= 1 */
    if ((unsigned int) (x4 | x3 | x2 | x1) > 1) break;
    p = ((x4 * 2 + x3) * 2 + x2) * 2 + x1;
    a1 += t32l[p];
    a2 += t33l[p];
  }
  bits = a1;
  gi.count1table_select = 0;
  if (a1 > a2) {
    bits = a2;
    gi.count1table_select = 1;
  }
  gi.count1bits = bits;
  gi.big_values = i;
  if (!i) return bits;
  if (gi.block_type == SHORT_TYPE) {
    a1 = 3 * scalefac_band.s[3];
    if (a1 > gi.big_values) a1 = gi.big_values;
    a2 = gi.big_values;
  }else if (gi.block_type == NORM_TYPE) {
    assert(i <= 576); /* bv_scf has 576 entries (0..575) */
    a1 = gi.region0_count = sv_qnt.bv_scf[i - 2];
    a2 = gi.region1_count = sv_qnt.bv_scf[i - 1];
    assert(a1 + a2 + 2 < SBPSY_l);
    a2 = scalefac_band.l[a1 + a2 + 2];
    a1 = scalefac_band.l[a1 + 1];
    if (a2 < i)
     gi.table_select[2] = choose_table(ix + a2, ix + i, bits);
  }else{
    gi.region0_count = 7;
        /*gi->region1_count = SBPSY_l - 7 - 1; */
    gi.region1_count = SBMAX_l - 1 - 7 - 1;
    a1 = scalefac_band.l[7 + 1];
    a2 = i;
    if (a1 > a2) a1 = a2;
  }
   /* have to allow for the case when bigvalues < region0 < region1 */
    /* (and region0, region1 are ignored) */
  a1 = Min(a1, i);
  a2 = Min(a2, i);
  assert(a1 >= 0);
  assert(a2 >= 0);
    /* Count the number of bits necessary to code the bigvalues region. */
  if (0 < a1) gi.table_select[0] = choose_table(ix, ix + a1, bits);
  if (a1 < a2) gi.table_select[1] = choose_table(ix + a1, ix + a2, bits);
  if (cfg.use_best_huffman == 2) {
    gi.part2_3_length = bits;
    best_huffman_divide(gi);
    bits = gi.part2_3_length;
  }
  if (prev_noise) {
    if (gi.block_type == NORM_TYPE) {
      int     sfb = 0;
      while (scalefac_band.l[sfb] < gi.big_values) sfb++;
      prev_noise->sfb_count1 = sfb;
    }
  }
  return bits;
}

int lame_internal_flags::count_bits(const FLOAT * const xr, gr_info & gi, calc_noise_data * prev_noise)const{
  int *const ix = gi.l3_enc;

    /* since quantize_xrpow uses table lookup, we need to check this first: */
  FLOAT const w = (IXMAX_VAL) / IPOW20(gi.global_gain);

  if (gi.xrpow_max > w) return LARGE_BITS;
  quantize_xrpow(xr, ix, IPOW20(gi.global_gain), gi, prev_noise);
  if (sv_qnt.substep_shaping & 2) {
    int     sfb, j = 0;
        /* 0.634521682242439 = 0.5946*2**(.5*0.1875) */
    int const gain = gi.global_gain + gi.scalefac_scale;
    const FLOAT roundfac = 0.634521682242439 / IPOW20(gain);
    for (sfb = 0; sfb < gi.sfbmax; sfb++) {
      int const width = gi.width[sfb];
      assert(width >= 0);
      if (!sv_qnt.pseudohalf[sfb]) {
        j += width;
      }else{
        int k;
        for (k = j, j += width; k < j; ++k) {
          ix[k] = (xr[k] >= roundfac) ? ix[k] : 0;
        }
      }
    }
  }
  return noquant_count_bits(gi, prev_noise);
}

/***********************************************************************
  re-calculate the best scalefac_compress using scfsi
  the saved bits are kept in the bit reservoir.
 **********************************************************************/


void lame_internal_flags::recalc_divide_init(gr_info const &cod_info,
                   int const *const ix, int r01_bits[], int r01_div[], int r0_tbl[], int r1_tbl[])const{
    int     r0, r1, bigv, r0t, r1t, bits;

    bigv = cod_info.big_values;

    for (r0 = 0; r0 <= 7 + 15; r0++) {
        r01_bits[r0] = LARGE_BITS;
    }

    for (r0 = 0; r0 < 16; r0++) {
        int const a1 = scalefac_band.l[r0 + 1];
        int     r0bits;
        if (a1 >= bigv)
            break;
        r0bits = 0;
        r0t = choose_table(ix, ix + a1, r0bits);

        for (r1 = 0; r1 < 8; r1++) {
            int const a2 = scalefac_band.l[r0 + r1 + 2];
            if (a2 >= bigv)
                break;

            bits = r0bits;
            r1t = choose_table(ix + a1, ix + a2, bits);
            if (r01_bits[r0 + r1] > bits) {
                r01_bits[r0 + r1] = bits;
                r01_div[r0 + r1] = r0;
                r0_tbl[r0 + r1] = r0t;
                r1_tbl[r0 + r1] = r1t;
            }
        }
    }
}

void lame_internal_flags::recalc_divide_sub(
 const gr_info * cod_info2, gr_info & gi,
 const int *const ix,
 const int r01_bits[], const int r01_div[], const int r0_tbl[], const int r1_tbl[])const{
  int     bits, r2, a2, bigv, r2t;

    bigv = cod_info2->big_values;

    for (r2 = 2; r2 < SBMAX_l + 1; r2++) {
        a2 = scalefac_band.l[r2];
        if (a2 >= bigv)
            break;

        bits = r01_bits[r2 - 2] + cod_info2->count1bits;
        if (gi.part2_3_length <= bits)
            break;

        r2t = choose_table(ix + a2, ix + bigv, bits);
        if (gi.part2_3_length <= bits)
            continue;

        memcpy(&gi, cod_info2, sizeof(gr_info));
        gi.part2_3_length = bits;
        gi.region0_count = r01_div[r2 - 2];
        gi.region1_count = r2 - 2 - r01_div[r2 - 2];
        gi.table_select[0] = r0_tbl[r2 - 2];
        gi.table_select[1] = r1_tbl[r2 - 2];
        gi.table_select[2] = r2t;
    }
}

void lame_internal_flags::best_huffman_divide(gr_info & gi)const{
//    SessionConfig_t const & cfg = gfc.cfg;
    int     i, a1, a2;
    gr_info cod_info2;
    int const *const ix = gi.l3_enc;

    int     r01_bits[7 + 15 + 1];
    int     r01_div[7 + 15 + 1];
    int     r0_tbl[7 + 15 + 1];
    int     r1_tbl[7 + 15 + 1];


    /* SHORT BLOCK stuff fails for MPEG2 */
    if (gi.block_type == SHORT_TYPE && cfg.mode_gr == 1)
        return;


    memcpy(&cod_info2, &gi, sizeof(gr_info));
    if (gi.block_type == NORM_TYPE) {
        recalc_divide_init(gi, ix, r01_bits, r01_div, r0_tbl, r1_tbl);
        recalc_divide_sub(&cod_info2, gi, ix, r01_bits, r01_div, r0_tbl, r1_tbl);
    }

    i = cod_info2.big_values;
    if (i == 0 || (unsigned int) (ix[i - 2] | ix[i - 1]) > 1)
        return;

    i = gi.count1 + 2;
    if (i > 576)
        return;

    /* Determines the number of bits to encode the quadruples. */
    memcpy(&cod_info2, &gi, sizeof(gr_info));
    cod_info2.count1 = i;
    a1 = a2 = 0;

    assert(i <= 576);

    for (; i > cod_info2.big_values; i -= 4) {
        int const p = ((ix[i - 4] * 2 + ix[i - 3]) * 2 + ix[i - 2]) * 2 + ix[i - 1];
        a1 += t32l[p];
        a2 += t33l[p];
    }
    cod_info2.big_values = i;

    cod_info2.count1table_select = 0;
    if (a1 > a2) {
        a1 = a2;
        cod_info2.count1table_select = 1;
    }

    cod_info2.count1bits = a1;

    if (cod_info2.block_type == NORM_TYPE)
        recalc_divide_sub(&cod_info2, gi, ix, r01_bits, r01_div, r0_tbl, r1_tbl);
    else {
        /* Count the number of bits necessary to code the bigvalues region. */
        cod_info2.part2_3_length = a1;
        a1 = scalefac_band.l[7 + 1];
        if (a1 > i) {
            a1 = i;
        }
        if (a1 > 0)
            cod_info2.table_select[0] =
                choose_table(ix, ix + a1, *(int *) &cod_info2.part2_3_length);
        if (i > a1)
            cod_info2.table_select[1] =
                choose_table(ix + a1, ix + i, *(int *) &cod_info2.part2_3_length);
        if (gi.part2_3_length > cod_info2.part2_3_length)
            memcpy(&gi, &cod_info2, sizeof(gr_info));
    }
}

static const int slen1_n[16] = { 1, 1, 1, 1, 8, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16 };
static const int slen2_n[16] = { 1, 2, 4, 8, 1, 2, 4, 8, 2, 4, 8, 2, 4, 8, 4, 8 };
const int slen1_tab[16] = { 0, 0, 0, 0, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 };
const int slen2_tab[16] = { 0, 1, 2, 3, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 3 };

static void scfsi_calc(int ch, III_side_info_t & l3_side)
{
    unsigned int i;
    int     s1, s2, c1, c2;
    int     sfb;
    gr_info & gi = l3_side.tt[1][ch];
    gr_info const *const g0 = &l3_side.tt[0][ch];

    for (i = 0; i < (sizeof(scfsi_band) / sizeof(int)) - 1; i++) {
        for (sfb = scfsi_band[i]; sfb < scfsi_band[i + 1]; sfb++) {
            if (g0->scalefac[sfb] != gi.scalefac[sfb]
                && gi.scalefac[sfb] >= 0)
                break;
        }
        if (sfb == scfsi_band[i + 1]) {
            for (sfb = scfsi_band[i]; sfb < scfsi_band[i + 1]; sfb++) {
                gi.scalefac[sfb] = -1;
            }
            l3_side.scfsi[ch][i] = 1;
        }
    }

    s1 = c1 = 0;
    for (sfb = 0; sfb < 11; sfb++) {
        if (gi.scalefac[sfb] == -1)
            continue;
        c1++;
        if (s1 < gi.scalefac[sfb])
            s1 = gi.scalefac[sfb];
    }

    s2 = c2 = 0;
    for (; sfb < SBPSY_l; sfb++) {
        if (gi.scalefac[sfb] == -1)
            continue;
        c2++;
        if (s2 < gi.scalefac[sfb])
            s2 = gi.scalefac[sfb];
    }

    for (i = 0; i < 16; i++) {
        if (s1 < slen1_n[i] && s2 < slen2_n[i]) {
            int const c = slen1_tab[i] * c1 + slen2_tab[i] * c2;
            if (gi.part2_length > c) {
                gi.part2_length = c;
                gi.scalefac_compress = (int)i;
            }
        }
    }
}

/*
Find the optimal way to store the scalefactors.
Only call this routine after final scalefactors have been
chosen and the channel/granule will not be re-encoded.
 */
void lame_internal_flags::best_scalefac_store(const int gr, const int ch, III_side_info_t & l3_side) const {
//    SessionConfig_t const & cfg = gfc.cfg;
    /* use scalefac_scale if we can */
    gr_info & gi = l3_side.tt[gr][ch];
    int     sfb, i, j, l;
    int     recalc = 0;

    /* remove scalefacs from bands with ix=0.  This idea comes
     * from the AAC ISO docs.  added mt 3/00 */
    /* check if l3_enc=0 */
    j = 0;
    for (sfb = 0; sfb < gi.sfbmax; sfb++) {
        int const width = gi.width[sfb];
        assert(width >= 0);
        for (l = j, j += width; l < j; ++l) {
            if (gi.l3_enc[l] != 0)
                break;
        }
        if (l == j)
            gi.scalefac[sfb] = recalc = -2; /* anything goes. */
        /*  only best_scalefac_store and calc_scfsi 
         *  know--and only they should know--about the magic number -2. 
         */
    }

    if (!gi.scalefac_scale && !gi.preflag) {
        int     s = 0;
        for (sfb = 0; sfb < gi.sfbmax; sfb++)
            if (gi.scalefac[sfb] > 0)
                s |= gi.scalefac[sfb];

        if (!(s & 1) && s != 0) {
            for (sfb = 0; sfb < gi.sfbmax; sfb++)
                if (gi.scalefac[sfb] > 0)
                    gi.scalefac[sfb] >>= 1;

            gi.scalefac_scale = recalc = 1;
        }
    }

    if (!gi.preflag && gi.block_type != SHORT_TYPE && cfg.mode_gr == 2) {
        for (sfb = 11; sfb < SBPSY_l; sfb++)
            if (gi.scalefac[sfb] < pretab[sfb] && gi.scalefac[sfb] != -2)
                break;
        if (sfb == SBPSY_l) {
            for (sfb = 11; sfb < SBPSY_l; sfb++)
                if (gi.scalefac[sfb] > 0)
                    gi.scalefac[sfb] -= pretab[sfb];

            gi.preflag = recalc = 1;
        }
    }

    for (i = 0; i < 4; i++)
        l3_side.scfsi[ch][i] = 0;

    if (cfg.mode_gr == 2 && gr == 1
        && l3_side.tt[0][ch].block_type != SHORT_TYPE
        && l3_side.tt[1][ch].block_type != SHORT_TYPE) {
        scfsi_calc(ch, l3_side);
        recalc = 0;
    }
    for (sfb = 0; sfb < gi.sfbmax; sfb++) {
        if (gi.scalefac[sfb] == -2) {
            gi.scalefac[sfb] = 0; /* if anything goes, then 0 is a good choice */
        }
    }
  if (recalc) scale_bitcount(gi);
}


#ifndef NDEBUG
static int all_scalefactors_not_negative(int const *scalefac, int n)
{
    int     i;
    for (i = 0; i < n; ++i) {
        if (scalefac[i] < 0)
            return 0;
    }
    return 1;
}
#endif


/* number of bits used to encode scalefacs */

/* 18*slen1_tab[i] + 18*slen2_tab[i] */
static const int scale_short[16] = {
    0, 18, 36, 54, 54, 36, 54, 72, 54, 72, 90, 72, 90, 108, 108, 126
};

/* 17*slen1_tab[i] + 18*slen2_tab[i] */
static const int scale_mixed[16] = {
    0, 18, 36, 54, 51, 35, 53, 71, 52, 70, 88, 69, 87, 105, 104, 122
};

/* 11*slen1_tab[i] + 10*slen2_tab[i] */
static const int scale_long[16] = {
    0, 10, 20, 30, 33, 21, 31, 41, 32, 42, 52, 43, 53, 63, 64, 74
};


/*************************************************************************/
/*            scale_bitcount                                             */
/*************************************************************************/

/* Also calculates the number of bits necessary to code the scalefactors. */

int lame_internal_flags::mpeg1_scale_bitcount(gr_info &cod_info) const {
    int     k, sfb, max_slen1 = 0, max_slen2 = 0;

    /* maximum values */
    const int *tab;
    int    *const scalefac = cod_info.scalefac;

    assert(all_scalefactors_not_negative(scalefac, cod_info.sfbmax));

    if (cod_info.block_type == SHORT_TYPE) {
        tab = scale_short;
        if (cod_info.mixed_block_flag)
            tab = scale_mixed;
    }
    else {              /* block_type == 1,2,or 3 */
        tab = scale_long;
        if (!cod_info.preflag) {
            for (sfb = 11; sfb < SBPSY_l; sfb++)
                if (scalefac[sfb] < pretab[sfb])
                    break;

            if (sfb == SBPSY_l) {
                cod_info.preflag = 1;
                for (sfb = 11; sfb < SBPSY_l; sfb++)
                    scalefac[sfb] -= pretab[sfb];
            }
        }
    }

    for (sfb = 0; sfb < cod_info.sfbdivide; sfb++)
        if (max_slen1 < scalefac[sfb])
            max_slen1 = scalefac[sfb];

    for (; sfb < cod_info.sfbmax; sfb++)
        if (max_slen2 < scalefac[sfb])
            max_slen2 = scalefac[sfb];

    /* from Takehiro TOMINAGA <tominaga@isoternet.org> 10/99
     * loop over *all* posible values of scalefac_compress to find the
     * one which uses the smallest number of bits.  ISO would stop
     * at first valid index */
    cod_info.part2_length = LARGE_BITS;
    for (k = 0; k < 16; k++) {
        if (max_slen1 < slen1_n[k] && max_slen2 < slen2_n[k]
            && cod_info.part2_length > tab[k]) {
            cod_info.part2_length = tab[k];
            cod_info.scalefac_compress = k;
        }
    }
    return cod_info.part2_length == LARGE_BITS;
}



/*
  table of largest scalefactor values for MPEG2
*/
static const int max_range_sfac_tab[6][4] = {
    {15, 15, 7, 7},
    {15, 15, 7, 0},
    {7, 3, 0, 0},
    {15, 31, 31, 0},
    {7, 7, 7, 0},
    {3, 3, 0, 0}
};




/*************************************************************************/
/*            scale_bitcount_lsf                                         */
/*************************************************************************/

/* Also counts the number of bits to encode the scalefacs but for MPEG 2 */
/* Lower sampling frequencies  (24, 22.05 and 16 kHz.)                   */

/*  This is reverse-engineered from section 2.4.3.2 of the MPEG2 IS,     */
/* "Audio Decoding Layer III"                                            */

int lame_internal_flags::mpeg2_scale_bitcount(gr_info & cod_info) const {
// gfc==lame_internal_flags
    int     table_number, row_in_table, partition, nr_sfb, window, over;
    int     i, sfb, max_sfac[4];
    const int *partition_table;
    int const *const scalefac = cod_info.scalefac;

    /*
       Set partition table. Note that should try to use table one,
       but do not yet...
     */
    if (cod_info.preflag)
        table_number = 2;
    else
        table_number = 0;

    for (i = 0; i < 4; i++)
        max_sfac[i] = 0;

    if (cod_info.block_type == SHORT_TYPE) {
        row_in_table = 1;
        partition_table = &nr_of_sfb_block[table_number][row_in_table][0];
        for (sfb = 0, partition = 0; partition < 4; partition++) {
            nr_sfb = partition_table[partition] / 3;
            for (i = 0; i < nr_sfb; i++, sfb++)
                for (window = 0; window < 3; window++)
                    if (scalefac[sfb * 3 + window] > max_sfac[partition])
                        max_sfac[partition] = scalefac[sfb * 3 + window];
        }
    }
    else {
        row_in_table = 0;
        partition_table = &nr_of_sfb_block[table_number][row_in_table][0];
        for (sfb = 0, partition = 0; partition < 4; partition++) {
            nr_sfb = partition_table[partition];
            for (i = 0; i < nr_sfb; i++, sfb++)
                if (scalefac[sfb] > max_sfac[partition])
                    max_sfac[partition] = scalefac[sfb];
        }
    }

    for (over = 0, partition = 0; partition < 4; partition++) {
        if (max_sfac[partition] > max_range_sfac_tab[table_number][partition])
            over++;
    }
    if (!over) {
        /*
           Since no bands have been over-amplified, we can set scalefac_compress
           and slen[] for the formatter
         */
        static const int log2tab[] = { 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4 };

        int     slen1, slen2, slen3, slen4;

        cod_info.sfb_partition_table = nr_of_sfb_block[table_number][row_in_table];
        for (partition = 0; partition < 4; partition++)
            cod_info.slen[partition] = log2tab[max_sfac[partition]];

        /* set scalefac_compress */
        slen1 = cod_info.slen[0];
        slen2 = cod_info.slen[1];
        slen3 = cod_info.slen[2];
        slen4 = cod_info.slen[3];

        switch (table_number) {
        case 0:
            cod_info.scalefac_compress = (((slen1 * 5) + slen2) << 4)
                + (slen3 << 2)
                + slen4;
            break;

        case 1:
            cod_info.scalefac_compress = 400 + (((slen1 * 5) + slen2) << 2)
                + slen3;
            break;

        case 2:
            cod_info.scalefac_compress = 500 + (slen1 * 3) + slen2;
            break;

        default:
            ERRORF(*this, "intensity stereo not implemented yet\n");
            break;
        }
    }
#ifdef DEBUG
    if (over)
        ERRORF(gfc, "---WARNING !! Amplification of some bands over limits\n");
#endif
    if (!over) {
        assert(cod_info.sfb_partition_table);
        cod_info.part2_length = 0;
        for (partition = 0; partition < 4; partition++)
            cod_info.part2_length +=
                cod_info.slen[partition] * cod_info.sfb_partition_table[partition];
    }
    return over;
}


int lame_internal_flags::scale_bitcount(gr_info & cod_info)const{
  if (cfg.mode_gr == 2) return mpeg1_scale_bitcount(cod_info);
  return mpeg2_scale_bitcount(cod_info);
}


#ifdef MMX_choose_table
extern int choose_table_MMX(const int *ix, const int *const end, int *const s);
#endif

void lame_internal_flags::huffman_init() {
// gfc == lame_internal_flags
  choose_table = choose_table_nonMMX;

#ifdef MMX_choose_table
  if (CPU_features.MMX) {
    choose_table = choose_table_MMX;
  }
#endif
  for (int i = 2; i <= 576; i += 2) {
    int     scfb_anz = 0, bv_index;
    while (scalefac_band.l[++scfb_anz] < i);

    bv_index = subdv_table[scfb_anz].region0_count;
    while (scalefac_band.l[bv_index + 1] > i)
     bv_index--;
    if (bv_index < 0) {
            /* this is an indication that everything is going to
               be encoded as region0:  bigvalues < region0 < region1
               so lets set region0, region1 to some value larger
               than bigvalues */
      bv_index = subdv_table[scfb_anz].region0_count;
    }
    sv_qnt.bv_scf[i - 2] = bv_index;
    bv_index = subdv_table[scfb_anz].region1_count;
    while (scalefac_band.l[bv_index + sv_qnt.bv_scf[i - 2] + 2] > i)
     bv_index--;
    if (bv_index < 0) bv_index = subdv_table[scfb_anz].region1_count;
    sv_qnt.bv_scf[i - 1] = bv_index;
  }
}
Detected encoding: ASCII (7 bit)