vx32

Local 9vx git repository for patches.
git clone git://r-36.net/vx32
Log | Files | Refs

mdct.c (14680B)


      1 /********************************************************************
      2  *                                                                  *
      3  * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
      4  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
      5  * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
      6  * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
      7  *                                                                  *
      8  * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2002             *
      9  * by the XIPHOPHORUS Company http://www.xiph.org/                  *
     10  *                                                                  *
     11  ********************************************************************
     12 
     13  function: normalized modified discrete cosine transform
     14            power of two length transform only [64 <= n ]
     15  last mod: $Id: mdct.c 1919 2005-07-24 14:18:04Z baford $
     16 
     17  Original algorithm adapted long ago from _The use of multirate filter
     18  banks for coding of high quality digital audio_, by T. Sporer,
     19  K. Brandenburg and B. Edler, collection of the European Signal
     20  Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp
     21  211-214
     22 
     23  The below code implements an algorithm that no longer looks much like
     24  that presented in the paper, but the basic structure remains if you
     25  dig deep enough to see it.
     26 
     27  This module DOES NOT INCLUDE code to generate/apply the window
     28  function.  Everybody has their own weird favorite including me... I
     29  happen to like the properties of y=sin(.5PI*sin^2(x)), but others may
     30  vehemently disagree.
     31 
     32  ********************************************************************/
     33 
     34 /* this can also be run as an integer transform by uncommenting a
     35    define in mdct.h; the integerization is a first pass and although
     36    it's likely stable for Vorbis, the dynamic range is constrained and
     37    roundoff isn't done (so it's noisy).  Consider it functional, but
     38    only a starting point.  There's no point on a machine with an FPU */
     39 
     40 #include <stdio.h>
     41 #include <stdlib.h>
     42 #include <string.h>
     43 #include <math.h>
     44 #include "vorbis/codec.h"
     45 #include "mdct.h"
     46 #include "os.h"
     47 #include "misc.h"
     48 
     49 /* build lookups for trig functions; also pre-figure scaling and
     50    some window function algebra. */
     51 
     52 void mdct_init(mdct_lookup *lookup,int n){
     53   int   *bitrev=_ogg_malloc(sizeof(*bitrev)*(n/4));
     54   DATA_TYPE *T=_ogg_malloc(sizeof(*T)*(n+n/4));
     55   
     56   int i;
     57   int n2=n>>1;
     58   int log2n=lookup->log2n=rint(log((float)n)/log(2.f));
     59   lookup->n=n;
     60   lookup->trig=T;
     61   lookup->bitrev=bitrev;
     62 
     63 /* trig lookups... */
     64 
     65   for(i=0;i<n/4;i++){
     66     T[i*2]=FLOAT_CONV(cos((M_PI/n)*(4*i)));
     67     T[i*2+1]=FLOAT_CONV(-sin((M_PI/n)*(4*i)));
     68     T[n2+i*2]=FLOAT_CONV(cos((M_PI/(2*n))*(2*i+1)));
     69     T[n2+i*2+1]=FLOAT_CONV(sin((M_PI/(2*n))*(2*i+1)));
     70   }
     71   for(i=0;i<n/8;i++){
     72     T[n+i*2]=FLOAT_CONV(cos((M_PI/n)*(4*i+2))*.5);
     73     T[n+i*2+1]=FLOAT_CONV(-sin((M_PI/n)*(4*i+2))*.5);
     74   }
     75 
     76   /* bitreverse lookup... */
     77 
     78   {
     79     int mask=(1<<(log2n-1))-1,i,j;
     80     int msb=1<<(log2n-2);
     81     for(i=0;i<n/8;i++){
     82       int acc=0;
     83       for(j=0;msb>>j;j++)
     84 	if((msb>>j)&i)acc|=1<<j;
     85       bitrev[i*2]=((~acc)&mask)-1;
     86       bitrev[i*2+1]=acc;
     87 
     88     }
     89   }
     90   lookup->scale=FLOAT_CONV(4.f/n);
     91 }
     92 
     93 /* 8 point butterfly (in place, 4 register) */
     94 STIN void mdct_butterfly_8(DATA_TYPE *x){
     95   REG_TYPE r0   = x[6] + x[2];
     96   REG_TYPE r1   = x[6] - x[2];
     97   REG_TYPE r2   = x[4] + x[0];
     98   REG_TYPE r3   = x[4] - x[0];
     99 
    100 	   x[6] = r0   + r2;
    101 	   x[4] = r0   - r2;
    102 	   
    103 	   r0   = x[5] - x[1];
    104 	   r2   = x[7] - x[3];
    105 	   x[0] = r1   + r0;
    106 	   x[2] = r1   - r0;
    107 	   
    108 	   r0   = x[5] + x[1];
    109 	   r1   = x[7] + x[3];
    110 	   x[3] = r2   + r3;
    111 	   x[1] = r2   - r3;
    112 	   x[7] = r1   + r0;
    113 	   x[5] = r1   - r0;
    114 	   
    115 }
    116 
    117 /* 16 point butterfly (in place, 4 register) */
    118 STIN void mdct_butterfly_16(DATA_TYPE *x){
    119   REG_TYPE r0     = x[1]  - x[9];
    120   REG_TYPE r1     = x[0]  - x[8];
    121 
    122            x[8]  += x[0];
    123            x[9]  += x[1];
    124            x[0]   = MULT_NORM((r0   + r1) * cPI2_8);
    125            x[1]   = MULT_NORM((r0   - r1) * cPI2_8);
    126 
    127            r0     = x[3]  - x[11];
    128            r1     = x[10] - x[2];
    129            x[10] += x[2];
    130            x[11] += x[3];
    131            x[2]   = r0;
    132            x[3]   = r1;
    133 
    134            r0     = x[12] - x[4];
    135            r1     = x[13] - x[5];
    136            x[12] += x[4];
    137            x[13] += x[5];
    138            x[4]   = MULT_NORM((r0   - r1) * cPI2_8);
    139            x[5]   = MULT_NORM((r0   + r1) * cPI2_8);
    140 
    141            r0     = x[14] - x[6];
    142            r1     = x[15] - x[7];
    143            x[14] += x[6];
    144            x[15] += x[7];
    145            x[6]  = r0;
    146            x[7]  = r1;
    147 
    148 	   mdct_butterfly_8(x);
    149 	   mdct_butterfly_8(x+8);
    150 }
    151 
    152 /* 32 point butterfly (in place, 4 register) */
    153 STIN void mdct_butterfly_32(DATA_TYPE *x){
    154   REG_TYPE r0     = x[30] - x[14];
    155   REG_TYPE r1     = x[31] - x[15];
    156 
    157            x[30] +=         x[14];           
    158 	   x[31] +=         x[15];
    159            x[14]  =         r0;              
    160 	   x[15]  =         r1;
    161 
    162            r0     = x[28] - x[12];   
    163 	   r1     = x[29] - x[13];
    164            x[28] +=         x[12];           
    165 	   x[29] +=         x[13];
    166            x[12]  = MULT_NORM( r0 * cPI1_8  -  r1 * cPI3_8 );
    167 	   x[13]  = MULT_NORM( r0 * cPI3_8  +  r1 * cPI1_8 );
    168 
    169            r0     = x[26] - x[10];
    170 	   r1     = x[27] - x[11];
    171 	   x[26] +=         x[10];
    172 	   x[27] +=         x[11];
    173 	   x[10]  = MULT_NORM(( r0  - r1 ) * cPI2_8);
    174 	   x[11]  = MULT_NORM(( r0  + r1 ) * cPI2_8);
    175 
    176 	   r0     = x[24] - x[8];
    177 	   r1     = x[25] - x[9];
    178 	   x[24] += x[8];
    179 	   x[25] += x[9];
    180 	   x[8]   = MULT_NORM( r0 * cPI3_8  -  r1 * cPI1_8 );
    181 	   x[9]   = MULT_NORM( r1 * cPI3_8  +  r0 * cPI1_8 );
    182 
    183 	   r0     = x[22] - x[6];
    184 	   r1     = x[7]  - x[23];
    185 	   x[22] += x[6];
    186 	   x[23] += x[7];
    187 	   x[6]   = r1;
    188 	   x[7]   = r0;
    189 
    190 	   r0     = x[4]  - x[20];
    191 	   r1     = x[5]  - x[21];
    192 	   x[20] += x[4];
    193 	   x[21] += x[5];
    194 	   x[4]   = MULT_NORM( r1 * cPI1_8  +  r0 * cPI3_8 );
    195 	   x[5]   = MULT_NORM( r1 * cPI3_8  -  r0 * cPI1_8 );
    196 
    197 	   r0     = x[2]  - x[18];
    198 	   r1     = x[3]  - x[19];
    199 	   x[18] += x[2];
    200 	   x[19] += x[3];
    201 	   x[2]   = MULT_NORM(( r1  + r0 ) * cPI2_8);
    202 	   x[3]   = MULT_NORM(( r1  - r0 ) * cPI2_8);
    203 
    204 	   r0     = x[0]  - x[16];
    205 	   r1     = x[1]  - x[17];
    206 	   x[16] += x[0];
    207 	   x[17] += x[1];
    208 	   x[0]   = MULT_NORM( r1 * cPI3_8  +  r0 * cPI1_8 );
    209 	   x[1]   = MULT_NORM( r1 * cPI1_8  -  r0 * cPI3_8 );
    210 
    211 	   mdct_butterfly_16(x);
    212 	   mdct_butterfly_16(x+16);
    213 
    214 }
    215 
    216 /* N point first stage butterfly (in place, 2 register) */
    217 STIN void mdct_butterfly_first(DATA_TYPE *T,
    218 					DATA_TYPE *x,
    219 					int points){
    220   
    221   DATA_TYPE *x1        = x          + points      - 8;
    222   DATA_TYPE *x2        = x          + (points>>1) - 8;
    223   REG_TYPE   r0;
    224   REG_TYPE   r1;
    225 
    226   do{
    227     
    228                r0      = x1[6]      -  x2[6];
    229 	       r1      = x1[7]      -  x2[7];
    230 	       x1[6]  += x2[6];
    231 	       x1[7]  += x2[7];
    232 	       x2[6]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
    233 	       x2[7]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
    234 	       
    235 	       r0      = x1[4]      -  x2[4];
    236 	       r1      = x1[5]      -  x2[5];
    237 	       x1[4]  += x2[4];
    238 	       x1[5]  += x2[5];
    239 	       x2[4]   = MULT_NORM(r1 * T[5]  +  r0 * T[4]);
    240 	       x2[5]   = MULT_NORM(r1 * T[4]  -  r0 * T[5]);
    241 	       
    242 	       r0      = x1[2]      -  x2[2];
    243 	       r1      = x1[3]      -  x2[3];
    244 	       x1[2]  += x2[2];
    245 	       x1[3]  += x2[3];
    246 	       x2[2]   = MULT_NORM(r1 * T[9]  +  r0 * T[8]);
    247 	       x2[3]   = MULT_NORM(r1 * T[8]  -  r0 * T[9]);
    248 	       
    249 	       r0      = x1[0]      -  x2[0];
    250 	       r1      = x1[1]      -  x2[1];
    251 	       x1[0]  += x2[0];
    252 	       x1[1]  += x2[1];
    253 	       x2[0]   = MULT_NORM(r1 * T[13] +  r0 * T[12]);
    254 	       x2[1]   = MULT_NORM(r1 * T[12] -  r0 * T[13]);
    255 	       
    256     x1-=8;
    257     x2-=8;
    258     T+=16;
    259 
    260   }while(x2>=x);
    261 }
    262 
    263 /* N/stage point generic N stage butterfly (in place, 2 register) */
    264 STIN void mdct_butterfly_generic(DATA_TYPE *T,
    265 					  DATA_TYPE *x,
    266 					  int points,
    267 					  int trigint){
    268   
    269   DATA_TYPE *x1        = x          + points      - 8;
    270   DATA_TYPE *x2        = x          + (points>>1) - 8;
    271   REG_TYPE   r0;
    272   REG_TYPE   r1;
    273 
    274   do{
    275     
    276                r0      = x1[6]      -  x2[6];
    277 	       r1      = x1[7]      -  x2[7];
    278 	       x1[6]  += x2[6];
    279 	       x1[7]  += x2[7];
    280 	       x2[6]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
    281 	       x2[7]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
    282 	       
    283 	       T+=trigint;
    284 	       
    285 	       r0      = x1[4]      -  x2[4];
    286 	       r1      = x1[5]      -  x2[5];
    287 	       x1[4]  += x2[4];
    288 	       x1[5]  += x2[5];
    289 	       x2[4]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
    290 	       x2[5]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
    291 	       
    292 	       T+=trigint;
    293 	       
    294 	       r0      = x1[2]      -  x2[2];
    295 	       r1      = x1[3]      -  x2[3];
    296 	       x1[2]  += x2[2];
    297 	       x1[3]  += x2[3];
    298 	       x2[2]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
    299 	       x2[3]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
    300 	       
    301 	       T+=trigint;
    302 	       
    303 	       r0      = x1[0]      -  x2[0];
    304 	       r1      = x1[1]      -  x2[1];
    305 	       x1[0]  += x2[0];
    306 	       x1[1]  += x2[1];
    307 	       x2[0]   = MULT_NORM(r1 * T[1]  +  r0 * T[0]);
    308 	       x2[1]   = MULT_NORM(r1 * T[0]  -  r0 * T[1]);
    309 
    310 	       T+=trigint;
    311     x1-=8;
    312     x2-=8;
    313 
    314   }while(x2>=x);
    315 }
    316 
    317 STIN void mdct_butterflies(mdct_lookup *init,
    318 			     DATA_TYPE *x,
    319 			     int points){
    320   
    321   DATA_TYPE *T=init->trig;
    322   int stages=init->log2n-5;
    323   int i,j;
    324   
    325   if(--stages>0){
    326     mdct_butterfly_first(T,x,points);
    327   }
    328 
    329   for(i=1;--stages>0;i++){
    330     for(j=0;j<(1<<i);j++)
    331       mdct_butterfly_generic(T,x+(points>>i)*j,points>>i,4<<i);
    332   }
    333 
    334   for(j=0;j<points;j+=32)
    335     mdct_butterfly_32(x+j);
    336 
    337 }
    338 
    339 void mdct_clear(mdct_lookup *l){
    340   if(l){
    341     if(l->trig)_ogg_free(l->trig);
    342     if(l->bitrev)_ogg_free(l->bitrev);
    343     memset(l,0,sizeof(*l));
    344   }
    345 }
    346 
    347 STIN void mdct_bitreverse(mdct_lookup *init, 
    348 			    DATA_TYPE *x){
    349   int        n       = init->n;
    350   int       *bit     = init->bitrev;
    351   DATA_TYPE *w0      = x;
    352   DATA_TYPE *w1      = x = w0+(n>>1);
    353   DATA_TYPE *T       = init->trig+n;
    354 
    355   do{
    356     DATA_TYPE *x0    = x+bit[0];
    357     DATA_TYPE *x1    = x+bit[1];
    358 
    359     REG_TYPE  r0     = x0[1]  - x1[1];
    360     REG_TYPE  r1     = x0[0]  + x1[0];
    361     REG_TYPE  r2     = MULT_NORM(r1     * T[0]   + r0 * T[1]);
    362     REG_TYPE  r3     = MULT_NORM(r1     * T[1]   - r0 * T[0]);
    363 
    364 	      w1    -= 4;
    365 
    366               r0     = HALVE(x0[1] + x1[1]);
    367               r1     = HALVE(x0[0] - x1[0]);
    368       
    369 	      w0[0]  = r0     + r2;
    370 	      w1[2]  = r0     - r2;
    371 	      w0[1]  = r1     + r3;
    372 	      w1[3]  = r3     - r1;
    373 
    374               x0     = x+bit[2];
    375               x1     = x+bit[3];
    376 
    377               r0     = x0[1]  - x1[1];
    378               r1     = x0[0]  + x1[0];
    379               r2     = MULT_NORM(r1     * T[2]   + r0 * T[3]);
    380               r3     = MULT_NORM(r1     * T[3]   - r0 * T[2]);
    381 
    382               r0     = HALVE(x0[1] + x1[1]);
    383               r1     = HALVE(x0[0] - x1[0]);
    384       
    385 	      w0[2]  = r0     + r2;
    386 	      w1[0]  = r0     - r2;
    387 	      w0[3]  = r1     + r3;
    388 	      w1[1]  = r3     - r1;
    389 
    390 	      T     += 4;
    391 	      bit   += 4;
    392 	      w0    += 4;
    393 
    394   }while(w0<w1);
    395 }
    396 
    397 void mdct_backward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){
    398   int n=init->n;
    399   int n2=n>>1;
    400   int n4=n>>2;
    401 
    402   /* rotate */
    403 
    404   DATA_TYPE *iX = in+n2-7;
    405   DATA_TYPE *oX = out+n2+n4;
    406   DATA_TYPE *T  = init->trig+n4;
    407 
    408   do{
    409     oX         -= 4;
    410     oX[0]       = MULT_NORM(-iX[2] * T[3] - iX[0]  * T[2]);
    411     oX[1]       = MULT_NORM (iX[0] * T[3] - iX[2]  * T[2]);
    412     oX[2]       = MULT_NORM(-iX[6] * T[1] - iX[4]  * T[0]);
    413     oX[3]       = MULT_NORM (iX[4] * T[1] - iX[6]  * T[0]);
    414     iX         -= 8;
    415     T          += 4;
    416   }while(iX>=in);
    417 
    418   iX            = in+n2-8;
    419   oX            = out+n2+n4;
    420   T             = init->trig+n4;
    421 
    422   do{
    423     T          -= 4;
    424     oX[0]       =  MULT_NORM (iX[4] * T[3] + iX[6] * T[2]);
    425     oX[1]       =  MULT_NORM (iX[4] * T[2] - iX[6] * T[3]);
    426     oX[2]       =  MULT_NORM (iX[0] * T[1] + iX[2] * T[0]);
    427     oX[3]       =  MULT_NORM (iX[0] * T[0] - iX[2] * T[1]);
    428     iX         -= 8;
    429     oX         += 4;
    430   }while(iX>=in);
    431 
    432   mdct_butterflies(init,out+n2,n2);
    433   mdct_bitreverse(init,out);
    434 
    435   /* roatate + window */
    436 
    437   {
    438     DATA_TYPE *oX1=out+n2+n4;
    439     DATA_TYPE *oX2=out+n2+n4;
    440     DATA_TYPE *iX =out;
    441     T             =init->trig+n2;
    442     
    443     do{
    444       oX1-=4;
    445 
    446       oX1[3]  =  MULT_NORM (iX[0] * T[1] - iX[1] * T[0]);
    447       oX2[0]  = -MULT_NORM (iX[0] * T[0] + iX[1] * T[1]);
    448 
    449       oX1[2]  =  MULT_NORM (iX[2] * T[3] - iX[3] * T[2]);
    450       oX2[1]  = -MULT_NORM (iX[2] * T[2] + iX[3] * T[3]);
    451 
    452       oX1[1]  =  MULT_NORM (iX[4] * T[5] - iX[5] * T[4]);
    453       oX2[2]  = -MULT_NORM (iX[4] * T[4] + iX[5] * T[5]);
    454 
    455       oX1[0]  =  MULT_NORM (iX[6] * T[7] - iX[7] * T[6]);
    456       oX2[3]  = -MULT_NORM (iX[6] * T[6] + iX[7] * T[7]);
    457 
    458       oX2+=4;
    459       iX    +=   8;
    460       T     +=   8;
    461     }while(iX<oX1);
    462 
    463     iX=out+n2+n4;
    464     oX1=out+n4;
    465     oX2=oX1;
    466 
    467     do{
    468       oX1-=4;
    469       iX-=4;
    470 
    471       oX2[0] = -(oX1[3] = iX[3]);
    472       oX2[1] = -(oX1[2] = iX[2]);
    473       oX2[2] = -(oX1[1] = iX[1]);
    474       oX2[3] = -(oX1[0] = iX[0]);
    475 
    476       oX2+=4;
    477     }while(oX2<iX);
    478 
    479     iX=out+n2+n4;
    480     oX1=out+n2+n4;
    481     oX2=out+n2;
    482     do{
    483       oX1-=4;
    484       oX1[0]= iX[3];
    485       oX1[1]= iX[2];
    486       oX1[2]= iX[1];
    487       oX1[3]= iX[0];
    488       iX+=4;
    489     }while(oX1>oX2);
    490   }
    491 }
    492 
    493 void mdct_forward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){
    494   int n=init->n;
    495   int n2=n>>1;
    496   int n4=n>>2;
    497   int n8=n>>3;
    498   DATA_TYPE *w=alloca(n*sizeof(*w)); /* forward needs working space */
    499   DATA_TYPE *w2=w+n2;
    500 
    501   /* rotate */
    502 
    503   /* window + rotate + step 1 */
    504   
    505   REG_TYPE r0;
    506   REG_TYPE r1;
    507   DATA_TYPE *x0=in+n2+n4;
    508   DATA_TYPE *x1=x0+1;
    509   DATA_TYPE *T=init->trig+n2;
    510   
    511   int i=0;
    512   
    513   for(i=0;i<n8;i+=2){
    514     x0 -=4;
    515     T-=2;
    516     r0= x0[2] + x1[0];
    517     r1= x0[0] + x1[2];       
    518     w2[i]=   MULT_NORM(r1*T[1] + r0*T[0]);
    519     w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
    520     x1 +=4;
    521   }
    522 
    523   x1=in+1;
    524   
    525   for(;i<n2-n8;i+=2){
    526     T-=2;
    527     x0 -=4;
    528     r0= x0[2] - x1[0];
    529     r1= x0[0] - x1[2];       
    530     w2[i]=   MULT_NORM(r1*T[1] + r0*T[0]);
    531     w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
    532     x1 +=4;
    533   }
    534     
    535   x0=in+n;
    536 
    537   for(;i<n2;i+=2){
    538     T-=2;
    539     x0 -=4;
    540     r0= -x0[2] - x1[0];
    541     r1= -x0[0] - x1[2];       
    542     w2[i]=   MULT_NORM(r1*T[1] + r0*T[0]);
    543     w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]);
    544     x1 +=4;
    545   }
    546 
    547 
    548   mdct_butterflies(init,w+n2,n2);
    549   mdct_bitreverse(init,w);
    550 
    551   /* roatate + window */
    552 
    553   T=init->trig+n2;
    554   x0=out+n2;
    555 
    556   for(i=0;i<n4;i++){
    557     x0--;
    558     out[i] =MULT_NORM((w[0]*T[0]+w[1]*T[1])*init->scale);
    559     x0[0]  =MULT_NORM((w[0]*T[1]-w[1]*T[0])*init->scale);
    560     w+=2;
    561     T+=2;
    562   }
    563 }
    564