mdct.c (14680B)
1 /******************************************************************** 2 * * 3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. * 4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS * 5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * 6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * 7 * * 8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2002 * 9 * by the XIPHOPHORUS Company http://www.xiph.org/ * 10 * * 11 ******************************************************************** 12 13 function: normalized modified discrete cosine transform 14 power of two length transform only [64 <= n ] 15 last mod: $Id: mdct.c 1919 2005-07-24 14:18:04Z baford $ 16 17 Original algorithm adapted long ago from _The use of multirate filter 18 banks for coding of high quality digital audio_, by T. Sporer, 19 K. Brandenburg and B. Edler, collection of the European Signal 20 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp 21 211-214 22 23 The below code implements an algorithm that no longer looks much like 24 that presented in the paper, but the basic structure remains if you 25 dig deep enough to see it. 26 27 This module DOES NOT INCLUDE code to generate/apply the window 28 function. Everybody has their own weird favorite including me... I 29 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may 30 vehemently disagree. 31 32 ********************************************************************/ 33 34 /* this can also be run as an integer transform by uncommenting a 35 define in mdct.h; the integerization is a first pass and although 36 it's likely stable for Vorbis, the dynamic range is constrained and 37 roundoff isn't done (so it's noisy). Consider it functional, but 38 only a starting point. There's no point on a machine with an FPU */ 39 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <math.h> 44 #include "vorbis/codec.h" 45 #include "mdct.h" 46 #include "os.h" 47 #include "misc.h" 48 49 /* build lookups for trig functions; also pre-figure scaling and 50 some window function algebra. */ 51 52 void mdct_init(mdct_lookup *lookup,int n){ 53 int *bitrev=_ogg_malloc(sizeof(*bitrev)*(n/4)); 54 DATA_TYPE *T=_ogg_malloc(sizeof(*T)*(n+n/4)); 55 56 int i; 57 int n2=n>>1; 58 int log2n=lookup->log2n=rint(log((float)n)/log(2.f)); 59 lookup->n=n; 60 lookup->trig=T; 61 lookup->bitrev=bitrev; 62 63 /* trig lookups... */ 64 65 for(i=0;i<n/4;i++){ 66 T[i*2]=FLOAT_CONV(cos((M_PI/n)*(4*i))); 67 T[i*2+1]=FLOAT_CONV(-sin((M_PI/n)*(4*i))); 68 T[n2+i*2]=FLOAT_CONV(cos((M_PI/(2*n))*(2*i+1))); 69 T[n2+i*2+1]=FLOAT_CONV(sin((M_PI/(2*n))*(2*i+1))); 70 } 71 for(i=0;i<n/8;i++){ 72 T[n+i*2]=FLOAT_CONV(cos((M_PI/n)*(4*i+2))*.5); 73 T[n+i*2+1]=FLOAT_CONV(-sin((M_PI/n)*(4*i+2))*.5); 74 } 75 76 /* bitreverse lookup... */ 77 78 { 79 int mask=(1<<(log2n-1))-1,i,j; 80 int msb=1<<(log2n-2); 81 for(i=0;i<n/8;i++){ 82 int acc=0; 83 for(j=0;msb>>j;j++) 84 if((msb>>j)&i)acc|=1<<j; 85 bitrev[i*2]=((~acc)&mask)-1; 86 bitrev[i*2+1]=acc; 87 88 } 89 } 90 lookup->scale=FLOAT_CONV(4.f/n); 91 } 92 93 /* 8 point butterfly (in place, 4 register) */ 94 STIN void mdct_butterfly_8(DATA_TYPE *x){ 95 REG_TYPE r0 = x[6] + x[2]; 96 REG_TYPE r1 = x[6] - x[2]; 97 REG_TYPE r2 = x[4] + x[0]; 98 REG_TYPE r3 = x[4] - x[0]; 99 100 x[6] = r0 + r2; 101 x[4] = r0 - r2; 102 103 r0 = x[5] - x[1]; 104 r2 = x[7] - x[3]; 105 x[0] = r1 + r0; 106 x[2] = r1 - r0; 107 108 r0 = x[5] + x[1]; 109 r1 = x[7] + x[3]; 110 x[3] = r2 + r3; 111 x[1] = r2 - r3; 112 x[7] = r1 + r0; 113 x[5] = r1 - r0; 114 115 } 116 117 /* 16 point butterfly (in place, 4 register) */ 118 STIN void mdct_butterfly_16(DATA_TYPE *x){ 119 REG_TYPE r0 = x[1] - x[9]; 120 REG_TYPE r1 = x[0] - x[8]; 121 122 x[8] += x[0]; 123 x[9] += x[1]; 124 x[0] = MULT_NORM((r0 + r1) * cPI2_8); 125 x[1] = MULT_NORM((r0 - r1) * cPI2_8); 126 127 r0 = x[3] - x[11]; 128 r1 = x[10] - x[2]; 129 x[10] += x[2]; 130 x[11] += x[3]; 131 x[2] = r0; 132 x[3] = r1; 133 134 r0 = x[12] - x[4]; 135 r1 = x[13] - x[5]; 136 x[12] += x[4]; 137 x[13] += x[5]; 138 x[4] = MULT_NORM((r0 - r1) * cPI2_8); 139 x[5] = MULT_NORM((r0 + r1) * cPI2_8); 140 141 r0 = x[14] - x[6]; 142 r1 = x[15] - x[7]; 143 x[14] += x[6]; 144 x[15] += x[7]; 145 x[6] = r0; 146 x[7] = r1; 147 148 mdct_butterfly_8(x); 149 mdct_butterfly_8(x+8); 150 } 151 152 /* 32 point butterfly (in place, 4 register) */ 153 STIN void mdct_butterfly_32(DATA_TYPE *x){ 154 REG_TYPE r0 = x[30] - x[14]; 155 REG_TYPE r1 = x[31] - x[15]; 156 157 x[30] += x[14]; 158 x[31] += x[15]; 159 x[14] = r0; 160 x[15] = r1; 161 162 r0 = x[28] - x[12]; 163 r1 = x[29] - x[13]; 164 x[28] += x[12]; 165 x[29] += x[13]; 166 x[12] = MULT_NORM( r0 * cPI1_8 - r1 * cPI3_8 ); 167 x[13] = MULT_NORM( r0 * cPI3_8 + r1 * cPI1_8 ); 168 169 r0 = x[26] - x[10]; 170 r1 = x[27] - x[11]; 171 x[26] += x[10]; 172 x[27] += x[11]; 173 x[10] = MULT_NORM(( r0 - r1 ) * cPI2_8); 174 x[11] = MULT_NORM(( r0 + r1 ) * cPI2_8); 175 176 r0 = x[24] - x[8]; 177 r1 = x[25] - x[9]; 178 x[24] += x[8]; 179 x[25] += x[9]; 180 x[8] = MULT_NORM( r0 * cPI3_8 - r1 * cPI1_8 ); 181 x[9] = MULT_NORM( r1 * cPI3_8 + r0 * cPI1_8 ); 182 183 r0 = x[22] - x[6]; 184 r1 = x[7] - x[23]; 185 x[22] += x[6]; 186 x[23] += x[7]; 187 x[6] = r1; 188 x[7] = r0; 189 190 r0 = x[4] - x[20]; 191 r1 = x[5] - x[21]; 192 x[20] += x[4]; 193 x[21] += x[5]; 194 x[4] = MULT_NORM( r1 * cPI1_8 + r0 * cPI3_8 ); 195 x[5] = MULT_NORM( r1 * cPI3_8 - r0 * cPI1_8 ); 196 197 r0 = x[2] - x[18]; 198 r1 = x[3] - x[19]; 199 x[18] += x[2]; 200 x[19] += x[3]; 201 x[2] = MULT_NORM(( r1 + r0 ) * cPI2_8); 202 x[3] = MULT_NORM(( r1 - r0 ) * cPI2_8); 203 204 r0 = x[0] - x[16]; 205 r1 = x[1] - x[17]; 206 x[16] += x[0]; 207 x[17] += x[1]; 208 x[0] = MULT_NORM( r1 * cPI3_8 + r0 * cPI1_8 ); 209 x[1] = MULT_NORM( r1 * cPI1_8 - r0 * cPI3_8 ); 210 211 mdct_butterfly_16(x); 212 mdct_butterfly_16(x+16); 213 214 } 215 216 /* N point first stage butterfly (in place, 2 register) */ 217 STIN void mdct_butterfly_first(DATA_TYPE *T, 218 DATA_TYPE *x, 219 int points){ 220 221 DATA_TYPE *x1 = x + points - 8; 222 DATA_TYPE *x2 = x + (points>>1) - 8; 223 REG_TYPE r0; 224 REG_TYPE r1; 225 226 do{ 227 228 r0 = x1[6] - x2[6]; 229 r1 = x1[7] - x2[7]; 230 x1[6] += x2[6]; 231 x1[7] += x2[7]; 232 x2[6] = MULT_NORM(r1 * T[1] + r0 * T[0]); 233 x2[7] = MULT_NORM(r1 * T[0] - r0 * T[1]); 234 235 r0 = x1[4] - x2[4]; 236 r1 = x1[5] - x2[5]; 237 x1[4] += x2[4]; 238 x1[5] += x2[5]; 239 x2[4] = MULT_NORM(r1 * T[5] + r0 * T[4]); 240 x2[5] = MULT_NORM(r1 * T[4] - r0 * T[5]); 241 242 r0 = x1[2] - x2[2]; 243 r1 = x1[3] - x2[3]; 244 x1[2] += x2[2]; 245 x1[3] += x2[3]; 246 x2[2] = MULT_NORM(r1 * T[9] + r0 * T[8]); 247 x2[3] = MULT_NORM(r1 * T[8] - r0 * T[9]); 248 249 r0 = x1[0] - x2[0]; 250 r1 = x1[1] - x2[1]; 251 x1[0] += x2[0]; 252 x1[1] += x2[1]; 253 x2[0] = MULT_NORM(r1 * T[13] + r0 * T[12]); 254 x2[1] = MULT_NORM(r1 * T[12] - r0 * T[13]); 255 256 x1-=8; 257 x2-=8; 258 T+=16; 259 260 }while(x2>=x); 261 } 262 263 /* N/stage point generic N stage butterfly (in place, 2 register) */ 264 STIN void mdct_butterfly_generic(DATA_TYPE *T, 265 DATA_TYPE *x, 266 int points, 267 int trigint){ 268 269 DATA_TYPE *x1 = x + points - 8; 270 DATA_TYPE *x2 = x + (points>>1) - 8; 271 REG_TYPE r0; 272 REG_TYPE r1; 273 274 do{ 275 276 r0 = x1[6] - x2[6]; 277 r1 = x1[7] - x2[7]; 278 x1[6] += x2[6]; 279 x1[7] += x2[7]; 280 x2[6] = MULT_NORM(r1 * T[1] + r0 * T[0]); 281 x2[7] = MULT_NORM(r1 * T[0] - r0 * T[1]); 282 283 T+=trigint; 284 285 r0 = x1[4] - x2[4]; 286 r1 = x1[5] - x2[5]; 287 x1[4] += x2[4]; 288 x1[5] += x2[5]; 289 x2[4] = MULT_NORM(r1 * T[1] + r0 * T[0]); 290 x2[5] = MULT_NORM(r1 * T[0] - r0 * T[1]); 291 292 T+=trigint; 293 294 r0 = x1[2] - x2[2]; 295 r1 = x1[3] - x2[3]; 296 x1[2] += x2[2]; 297 x1[3] += x2[3]; 298 x2[2] = MULT_NORM(r1 * T[1] + r0 * T[0]); 299 x2[3] = MULT_NORM(r1 * T[0] - r0 * T[1]); 300 301 T+=trigint; 302 303 r0 = x1[0] - x2[0]; 304 r1 = x1[1] - x2[1]; 305 x1[0] += x2[0]; 306 x1[1] += x2[1]; 307 x2[0] = MULT_NORM(r1 * T[1] + r0 * T[0]); 308 x2[1] = MULT_NORM(r1 * T[0] - r0 * T[1]); 309 310 T+=trigint; 311 x1-=8; 312 x2-=8; 313 314 }while(x2>=x); 315 } 316 317 STIN void mdct_butterflies(mdct_lookup *init, 318 DATA_TYPE *x, 319 int points){ 320 321 DATA_TYPE *T=init->trig; 322 int stages=init->log2n-5; 323 int i,j; 324 325 if(--stages>0){ 326 mdct_butterfly_first(T,x,points); 327 } 328 329 for(i=1;--stages>0;i++){ 330 for(j=0;j<(1<<i);j++) 331 mdct_butterfly_generic(T,x+(points>>i)*j,points>>i,4<<i); 332 } 333 334 for(j=0;j<points;j+=32) 335 mdct_butterfly_32(x+j); 336 337 } 338 339 void mdct_clear(mdct_lookup *l){ 340 if(l){ 341 if(l->trig)_ogg_free(l->trig); 342 if(l->bitrev)_ogg_free(l->bitrev); 343 memset(l,0,sizeof(*l)); 344 } 345 } 346 347 STIN void mdct_bitreverse(mdct_lookup *init, 348 DATA_TYPE *x){ 349 int n = init->n; 350 int *bit = init->bitrev; 351 DATA_TYPE *w0 = x; 352 DATA_TYPE *w1 = x = w0+(n>>1); 353 DATA_TYPE *T = init->trig+n; 354 355 do{ 356 DATA_TYPE *x0 = x+bit[0]; 357 DATA_TYPE *x1 = x+bit[1]; 358 359 REG_TYPE r0 = x0[1] - x1[1]; 360 REG_TYPE r1 = x0[0] + x1[0]; 361 REG_TYPE r2 = MULT_NORM(r1 * T[0] + r0 * T[1]); 362 REG_TYPE r3 = MULT_NORM(r1 * T[1] - r0 * T[0]); 363 364 w1 -= 4; 365 366 r0 = HALVE(x0[1] + x1[1]); 367 r1 = HALVE(x0[0] - x1[0]); 368 369 w0[0] = r0 + r2; 370 w1[2] = r0 - r2; 371 w0[1] = r1 + r3; 372 w1[3] = r3 - r1; 373 374 x0 = x+bit[2]; 375 x1 = x+bit[3]; 376 377 r0 = x0[1] - x1[1]; 378 r1 = x0[0] + x1[0]; 379 r2 = MULT_NORM(r1 * T[2] + r0 * T[3]); 380 r3 = MULT_NORM(r1 * T[3] - r0 * T[2]); 381 382 r0 = HALVE(x0[1] + x1[1]); 383 r1 = HALVE(x0[0] - x1[0]); 384 385 w0[2] = r0 + r2; 386 w1[0] = r0 - r2; 387 w0[3] = r1 + r3; 388 w1[1] = r3 - r1; 389 390 T += 4; 391 bit += 4; 392 w0 += 4; 393 394 }while(w0<w1); 395 } 396 397 void mdct_backward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){ 398 int n=init->n; 399 int n2=n>>1; 400 int n4=n>>2; 401 402 /* rotate */ 403 404 DATA_TYPE *iX = in+n2-7; 405 DATA_TYPE *oX = out+n2+n4; 406 DATA_TYPE *T = init->trig+n4; 407 408 do{ 409 oX -= 4; 410 oX[0] = MULT_NORM(-iX[2] * T[3] - iX[0] * T[2]); 411 oX[1] = MULT_NORM (iX[0] * T[3] - iX[2] * T[2]); 412 oX[2] = MULT_NORM(-iX[6] * T[1] - iX[4] * T[0]); 413 oX[3] = MULT_NORM (iX[4] * T[1] - iX[6] * T[0]); 414 iX -= 8; 415 T += 4; 416 }while(iX>=in); 417 418 iX = in+n2-8; 419 oX = out+n2+n4; 420 T = init->trig+n4; 421 422 do{ 423 T -= 4; 424 oX[0] = MULT_NORM (iX[4] * T[3] + iX[6] * T[2]); 425 oX[1] = MULT_NORM (iX[4] * T[2] - iX[6] * T[3]); 426 oX[2] = MULT_NORM (iX[0] * T[1] + iX[2] * T[0]); 427 oX[3] = MULT_NORM (iX[0] * T[0] - iX[2] * T[1]); 428 iX -= 8; 429 oX += 4; 430 }while(iX>=in); 431 432 mdct_butterflies(init,out+n2,n2); 433 mdct_bitreverse(init,out); 434 435 /* roatate + window */ 436 437 { 438 DATA_TYPE *oX1=out+n2+n4; 439 DATA_TYPE *oX2=out+n2+n4; 440 DATA_TYPE *iX =out; 441 T =init->trig+n2; 442 443 do{ 444 oX1-=4; 445 446 oX1[3] = MULT_NORM (iX[0] * T[1] - iX[1] * T[0]); 447 oX2[0] = -MULT_NORM (iX[0] * T[0] + iX[1] * T[1]); 448 449 oX1[2] = MULT_NORM (iX[2] * T[3] - iX[3] * T[2]); 450 oX2[1] = -MULT_NORM (iX[2] * T[2] + iX[3] * T[3]); 451 452 oX1[1] = MULT_NORM (iX[4] * T[5] - iX[5] * T[4]); 453 oX2[2] = -MULT_NORM (iX[4] * T[4] + iX[5] * T[5]); 454 455 oX1[0] = MULT_NORM (iX[6] * T[7] - iX[7] * T[6]); 456 oX2[3] = -MULT_NORM (iX[6] * T[6] + iX[7] * T[7]); 457 458 oX2+=4; 459 iX += 8; 460 T += 8; 461 }while(iX<oX1); 462 463 iX=out+n2+n4; 464 oX1=out+n4; 465 oX2=oX1; 466 467 do{ 468 oX1-=4; 469 iX-=4; 470 471 oX2[0] = -(oX1[3] = iX[3]); 472 oX2[1] = -(oX1[2] = iX[2]); 473 oX2[2] = -(oX1[1] = iX[1]); 474 oX2[3] = -(oX1[0] = iX[0]); 475 476 oX2+=4; 477 }while(oX2<iX); 478 479 iX=out+n2+n4; 480 oX1=out+n2+n4; 481 oX2=out+n2; 482 do{ 483 oX1-=4; 484 oX1[0]= iX[3]; 485 oX1[1]= iX[2]; 486 oX1[2]= iX[1]; 487 oX1[3]= iX[0]; 488 iX+=4; 489 }while(oX1>oX2); 490 } 491 } 492 493 void mdct_forward(mdct_lookup *init, DATA_TYPE *in, DATA_TYPE *out){ 494 int n=init->n; 495 int n2=n>>1; 496 int n4=n>>2; 497 int n8=n>>3; 498 DATA_TYPE *w=alloca(n*sizeof(*w)); /* forward needs working space */ 499 DATA_TYPE *w2=w+n2; 500 501 /* rotate */ 502 503 /* window + rotate + step 1 */ 504 505 REG_TYPE r0; 506 REG_TYPE r1; 507 DATA_TYPE *x0=in+n2+n4; 508 DATA_TYPE *x1=x0+1; 509 DATA_TYPE *T=init->trig+n2; 510 511 int i=0; 512 513 for(i=0;i<n8;i+=2){ 514 x0 -=4; 515 T-=2; 516 r0= x0[2] + x1[0]; 517 r1= x0[0] + x1[2]; 518 w2[i]= MULT_NORM(r1*T[1] + r0*T[0]); 519 w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]); 520 x1 +=4; 521 } 522 523 x1=in+1; 524 525 for(;i<n2-n8;i+=2){ 526 T-=2; 527 x0 -=4; 528 r0= x0[2] - x1[0]; 529 r1= x0[0] - x1[2]; 530 w2[i]= MULT_NORM(r1*T[1] + r0*T[0]); 531 w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]); 532 x1 +=4; 533 } 534 535 x0=in+n; 536 537 for(;i<n2;i+=2){ 538 T-=2; 539 x0 -=4; 540 r0= -x0[2] - x1[0]; 541 r1= -x0[0] - x1[2]; 542 w2[i]= MULT_NORM(r1*T[1] + r0*T[0]); 543 w2[i+1]= MULT_NORM(r1*T[0] - r0*T[1]); 544 x1 +=4; 545 } 546 547 548 mdct_butterflies(init,w+n2,n2); 549 mdct_bitreverse(init,w); 550 551 /* roatate + window */ 552 553 T=init->trig+n2; 554 x0=out+n2; 555 556 for(i=0;i<n4;i++){ 557 x0--; 558 out[i] =MULT_NORM((w[0]*T[0]+w[1]*T[1])*init->scale); 559 x0[0] =MULT_NORM((w[0]*T[1]-w[1]*T[0])*init->scale); 560 w+=2; 561 T+=2; 562 } 563 } 564