/*
 *    MP3 window subband -> subband filtering -> mdct routine
 *
 *      Copyright (c) 1999 Takehiro TOMINAGA
 *      Copyright (c) 2001,2002,2003 gogo-developer
 *
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

/*
 *         Special Thanks to Patrick De Smet for your advices.
 */

#include "config.h"
#include "global.h"

#include "util.h"
#include "newmdct.h"
#include "vfta.h"

#ifdef CPU_I386
extern const float enwindow[288];	/* defined in sbandtbl.nas */
/* : ηɽƱǤ饤ȤΤ˰ư */
#else
const FLOAT8 enwindow[288] =
{
	-4.77e-07 * 0.740951125354959 / 2.384e-06,	/* 15 */
	-4.77e-07 * 0.773010453362737 / 2.384e-06,	/* 14 */
	-4.77e-07 * 0.803207531480645 / 2.384e-06,	/* 13 */
	-4.77e-07 * 0.831469612302545 / 2.384e-06,	/* 12 */
	-4.77e-07 * 0.857728610000272 / 2.384e-06,	/* 11 */
	-4.77e-07 * 0.881921264348355 / 2.384e-06,	/* 10 */
	-9.54e-07 * 0.903989293123443 / 2.384e-06,	/* 9 */
	-9.54e-07 * 0.92387953251128675613 / 2.384e-06,		/* 8 */
	-9.54e-07 * 0.941544065183021 / 2.384e-06,	/* 7 */
	-9.54e-07 * 0.956940335732209 / 2.384e-06,	/* 6 */
	-1.431e-06 * 0.970031253194544 / 2.384e-06,	/* 5 */
	-1.431e-06 * 0.98078528040323 / 2.384e-06,	/* 4 */
	-1.907e-06 * 0.989176509964781 / 2.384e-06,	/* 3 */
	-1.907e-06 * 0.995184726672197 / 2.384e-06,	/* 2 */
	-2.384e-06 * 0.998795456205172 / 2.384e-06,	/* 1 */
	3.5780907e-02 * SQRT2 * 0.5 / 2.384e-06,

	1.03951e-04 * 0.740951125354959 / 2.384e-06,	/* 15 */
	1.05858e-04 * 0.773010453362737 / 2.384e-06,	/* 14 */
	1.07288e-04 * 0.803207531480645 / 2.384e-06,	/* 13 */
	1.08242e-04 * 0.831469612302545 / 2.384e-06,	/* 12 */
	1.08719e-04 * 0.857728610000272 / 2.384e-06,	/* 11 */
	1.08719e-04 * 0.881921264348355 / 2.384e-06,	/* 10 */
	1.08242e-04 * 0.903989293123443 / 2.384e-06,	/* 9 */
	1.06812e-04 * 0.92387953251128675613 / 2.384e-06,	/* 8 */
	1.05381e-04 * 0.941544065183021 / 2.384e-06,	/* 7 */
	1.02520e-04 * 0.956940335732209 / 2.384e-06,	/* 6 */
	9.9182e-05 * 0.970031253194544 / 2.384e-06,	/* 5 */
	9.5367e-05 * 0.98078528040323 / 2.384e-06,	/* 4 */
	9.0122e-05 * 0.989176509964781 / 2.384e-06,	/* 3 */
	8.4400e-05 * 0.995184726672197 / 2.384e-06,	/* 2 */
	7.7724e-05 * 0.998795456205172 / 2.384e-06,	/* 1 */
	1.7876148e-02 * SQRT2 * 0.5 / 2.384e-06,

	9.53674e-04 * 0.740951125354959 / 2.384e-06,	/* 15 */
	9.30786e-04 * 0.773010453362737 / 2.384e-06,	/* 14 */
	9.02653e-04 * 0.803207531480645 / 2.384e-06,	/* 13 */
	8.68797e-04 * 0.831469612302545 / 2.384e-06,	/* 12 */
	8.29220e-04 * 0.857728610000272 / 2.384e-06,	/* 11 */
	7.8392e-04 * 0.881921264348355 / 2.384e-06,	/* 10 */
	7.31945e-04 * 0.903989293123443 / 2.384e-06,	/* 9 */
	6.74248e-04 * 0.92387953251128675613 / 2.384e-06,	/* 8 */
	6.10352e-04 * 0.941544065183021 / 2.384e-06,	/* 7 */
	5.39303e-04 * 0.956940335732209 / 2.384e-06,	/* 6 */
	4.62532e-04 * 0.970031253194544 / 2.384e-06,	/* 5 */
	3.78609e-04 * 0.98078528040323 / 2.384e-06,	/* 4 */
	2.88486e-04 * 0.989176509964781 / 2.384e-06,	/* 3 */
	1.91689e-04 * 0.995184726672197 / 2.384e-06,	/* 2 */
	8.8215e-05 * 0.998795456205172 / 2.384e-06,	/* 1 */
	3.134727e-03 * SQRT2 * 0.5 / 2.384e-06,

	2.841473e-03 * 0.740951125354959 / 2.384e-06,	/* 15 */
	2.521515e-03 * 0.773010453362737 / 2.384e-06,	/* 14 */
	2.174854e-03 * 0.803207531480645 / 2.384e-06,	/* 13 */
	1.800537e-03 * 0.831469612302545 / 2.384e-06,	/* 12 */
	1.399517e-03 * 0.857728610000272 / 2.384e-06,	/* 11 */
	9.71317e-04 * 0.881921264348355 / 2.384e-06,	/* 10 */
	5.15938e-04 * 0.903989293123443 / 2.384e-06,	/* 9 */
	3.3379e-05 * 0.92387953251128675613 / 2.384e-06,	/* 8 */
	-4.75883e-04 * 0.941544065183021 / 2.384e-06,	/* 7 */
	-1.011848e-03 * 0.956940335732209 / 2.384e-06,	/* 6 */
	-1.573563e-03 * 0.970031253194544 / 2.384e-06,	/* 5 */
	-2.161503e-03 * 0.98078528040323 / 2.384e-06,	/* 4 */
	-2.774239e-03 * 0.989176509964781 / 2.384e-06,	/* 3 */
	-3.411293e-03 * 0.995184726672197 / 2.384e-06,	/* 2 */
	-4.072189e-03 * 0.998795456205172 / 2.384e-06,	/* 1 */
	2.457142e-03 * SQRT2 * 0.5 / 2.384e-06,

	3.5758972e-02 * 0.740951125354959 / 2.384e-06,	/* 15 */
	3.5694122e-02 * 0.773010453362737 / 2.384e-06,	/* 14 */
	3.5586357e-02 * 0.803207531480645 / 2.384e-06,	/* 13 */
	3.5435200e-02 * 0.831469612302545 / 2.384e-06,	/* 12 */
	3.5242081e-02 * 0.857728610000272 / 2.384e-06,	/* 11 */
	3.5007000e-02 * 0.881921264348355 / 2.384e-06,	/* 10 */
	3.4730434e-02 * 0.903989293123443 / 2.384e-06,	/* 9 */
	3.4412861e-02 * 0.92387953251128675613 / 2.384e-06,	/* 8 */
	3.4055710e-02 * 0.941544065183021 / 2.384e-06,	/* 7 */
	3.3659935e-02 * 0.956940335732209 / 2.384e-06,	/* 6 */
	3.3225536e-02 * 0.970031253194544 / 2.384e-06,	/* 5 */
	3.2754898e-02 * 0.98078528040323 / 2.384e-06,	/* 4 */
	3.2248020e-02 * 0.989176509964781 / 2.384e-06,	/* 3 */
	3.1706810e-02 * 0.995184726672197 / 2.384e-06,	/* 2 */
	3.1132698e-02 * 0.998795456205172 / 2.384e-06,	/* 1 */
	9.71317e-04 * SQRT2 * 0.5 / 2.384e-06,

	3.401756e-03 * 0.740951125354959 / 2.384e-06,	/* 15 */
	3.643036e-03 * 0.773010453362737 / 2.384e-06,	/* 14 */
	3.858566e-03 * 0.803207531480645 / 2.384e-06,	/* 13 */
	4.049301e-03 * 0.831469612302545 / 2.384e-06,	/* 12 */
	4.215240e-03 * 0.857728610000272 / 2.384e-06,	/* 11 */
	4.357815e-03 * 0.881921264348355 / 2.384e-06,	/* 10 */
	4.477024e-03 * 0.903989293123443 / 2.384e-06,	/* 9 */
	4.573822e-03 * 0.92387953251128675613 / 2.384e-06,	/* 8 */
	4.649162e-03 * 0.941544065183021 / 2.384e-06,	/* 7 */
	4.703045e-03 * 0.956940335732209 / 2.384e-06,	/* 6 */
	4.737377e-03 * 0.970031253194544 / 2.384e-06,	/* 5 */
	4.752159e-03 * 0.98078528040323 / 2.384e-06,	/* 4 */
	4.748821e-03 * 0.989176509964781 / 2.384e-06,	/* 3 */
	4.728317e-03 * 0.995184726672197 / 2.384e-06,	/* 2 */
	4.691124e-03 * 0.998795456205172 / 2.384e-06,	/* 1 */
	2.18868e-04 * SQRT2 * 0.5 / 2.384e-06,

	9.83715e-04 * 0.740951125354959 / 2.384e-06,	/* 15 */
	9.91821e-04 * 0.773010453362737 / 2.384e-06,	/* 14 */
	9.95159e-04 * 0.803207531480645 / 2.384e-06,	/* 13 */
	9.94205e-04 * 0.831469612302545 / 2.384e-06,	/* 12 */
	9.89437e-04 * 0.857728610000272 / 2.384e-06,	/* 11 */
	9.80854e-04 * 0.881921264348355 / 2.384e-06,	/* 10 */
	9.68933e-04 * 0.903989293123443 / 2.384e-06,	/* 9 */
	9.54151e-04 * 0.92387953251128675613 / 2.384e-06,	/* 8 */
	9.35555e-04 * 0.941544065183021 / 2.384e-06,	/* 7 */
	9.15051e-04 * 0.956940335732209 / 2.384e-06,	/* 6 */
	8.91685e-04 * 0.970031253194544 / 2.384e-06,	/* 5 */
	8.66413e-04 * 0.98078528040323 / 2.384e-06,	/* 4 */
	8.38757e-04 * 0.989176509964781 / 2.384e-06,	/* 3 */
	8.09669e-04 * 0.995184726672197 / 2.384e-06,	/* 2 */
	7.79152e-04 * 0.998795456205172 / 2.384e-06,	/* 1 */
	1.01566e-04 * SQRT2 * 0.5 / 2.384e-06,

	9.9182e-05 * 0.740951125354959 / 2.384e-06,	/* 15 */
	9.6321e-05 * 0.773010453362737 / 2.384e-06,	/* 14 */
	9.3460e-05 * 0.803207531480645 / 2.384e-06,	/* 13 */
	9.0599e-05 * 0.831469612302545 / 2.384e-06,	/* 12 */
	8.7261e-05 * 0.857728610000272 / 2.384e-06,	/* 11 */
	8.3923e-05 * 0.881921264348355 / 2.384e-06,	/* 10 */
	8.0585e-05 * 0.903989293123443 / 2.384e-06,	/* 9 */
	7.6771e-05 * 0.92387953251128675613 / 2.384e-06,	/* 8 */
	7.3433e-05 * 0.941544065183021 / 2.384e-06,	/* 7 */
	7.0095e-05 * 0.956940335732209 / 2.384e-06,	/* 6 */
	6.6280e-05 * 0.970031253194544 / 2.384e-06,	/* 5 */
	6.2943e-05 * 0.98078528040323 / 2.384e-06,	/* 4 */
	5.9605e-05 * 0.989176509964781 / 2.384e-06,	/* 3 */
	5.579e-05 * 0.995184726672197 / 2.384e-06,	/* 2 */
	5.2929e-05 * 0.998795456205172 / 2.384e-06,	/* 1 */
	1.3828e-05 * SQRT2 * 0.5 / 2.384e-06,

	1.2398e-05 * 0.740951125354959 / 2.384e-06,	/* 15 */
	1.1444e-05 * 0.773010453362737 / 2.384e-06,	/* 14 */
	1.0014e-05 * 0.803207531480645 / 2.384e-06,	/* 13 */
	9.060e-06 * 0.831469612302545 / 2.384e-06,	/* 12 */
	8.106e-06 * 0.857728610000272 / 2.384e-06,	/* 11 */
	7.629e-06 * 0.881921264348355 / 2.384e-06,	/* 10 */
	6.676e-06 * 0.903989293123443 / 2.384e-06,	/* 9 */
	6.199e-06 * 0.92387953251128675613 / 2.384e-06,		/* 8 */
	5.245e-06 * 0.941544065183021 / 2.384e-06,	/* 7 */
	4.768e-06 * 0.956940335732209 / 2.384e-06,	/* 6 */
	4.292e-06 * 0.970031253194544 / 2.384e-06,	/* 5 */
	3.815e-06 * 0.98078528040323 / 2.384e-06,	/* 4 */
	3.338e-06 * 0.989176509964781 / 2.384e-06,	/* 3 */
	3.338e-06 * 0.995184726672197 / 2.384e-06,	/* 2 */
	2.861e-06 * 0.998795456205172 / 2.384e-06,	/* 1 */
	3.0526638e-02 / 2.384e-06,

	1.91212e-04 * 0.740951125354959 / 2.384e-06,	/* 15 */
	1.65462e-04 * 0.773010453362737 / 2.384e-06,	/* 14 */
	1.40190e-04 * 0.803207531480645 / 2.384e-06,	/* 13 */
	1.16348e-04 * 0.831469612302545 / 2.384e-06,	/* 12 */
	9.3937e-05 * 0.857728610000272 / 2.384e-06,	/* 11 */
	7.2956e-05 * 0.881921264348355 / 2.384e-06,	/* 10 */
	5.2929e-05 * 0.903989293123443 / 2.384e-06,	/* 9 */
	3.4332e-05 * 0.92387953251128675613 / 2.384e-06,	/* 8 */
	1.7166e-05 * 0.941544065183021 / 2.384e-06,	/* 7 */
	9.54e-07 * 0.956940335732209 / 2.384e-06,	/* 6 */
	-1.3828e-05 * 0.970031253194544 / 2.384e-06,	/* 5 */
	-2.718e-05 * 0.98078528040323 / 2.384e-06,	/* 4 */
	-3.9577e-05 * 0.989176509964781 / 2.384e-06,	/* 3 */
	-5.0545e-05 * 0.995184726672197 / 2.384e-06,	/* 2 */
	-6.0558e-05 * 0.998795456205172 / 2.384e-06,	/* 1 */
	4.638195e-03 / 2.384e-06,

	2.283096e-03 * 0.740951125354959 / 2.384e-06,	/* 15 */
	2.110004e-03 * 0.773010453362737 / 2.384e-06,	/* 14 */
	1.937389e-03 * 0.803207531480645 / 2.384e-06,	/* 13 */
	1.766682e-03 * 0.831469612302545 / 2.384e-06,	/* 12 */
	1.597881e-03 * 0.857728610000272 / 2.384e-06,	/* 11 */
	1.432419e-03 * 0.881921264348355 / 2.384e-06,	/* 10 */
	1.269817e-03 * 0.903989293123443 / 2.384e-06,	/* 9 */
	1.111031e-03 * 0.92387953251128675613 / 2.384e-06,	/* 8 */
	9.56535e-04 * 0.941544065183021 / 2.384e-06,	/* 7 */
	8.06808e-04 * 0.956940335732209 / 2.384e-06,	/* 6 */
	6.61850e-04 * 0.970031253194544 / 2.384e-06,	/* 5 */
	5.22137e-04 * 0.98078528040323 / 2.384e-06,	/* 4 */
	3.88145e-04 * 0.989176509964781 / 2.384e-06,	/* 3 */
	2.59876e-04 * 0.995184726672197 / 2.384e-06,	/* 2 */
	1.37329e-04 * 0.998795456205172 / 2.384e-06,	/* 1 */
	7.47204e-04 / 2.384e-06,

	1.6994476e-02 * 0.740951125354959 / 2.384e-06,	/* 15 */
	1.6112804e-02 * 0.773010453362737 / 2.384e-06,	/* 14 */
	1.5233517e-02 * 0.803207531480645 / 2.384e-06,	/* 13 */
	1.4358521e-02 * 0.831469612302545 / 2.384e-06,	/* 12 */
	1.3489246e-02 * 0.857728610000272 / 2.384e-06,	/* 11 */
	1.2627602e-02 * 0.881921264348355 / 2.384e-06,	/* 10 */
	1.1775017e-02 * 0.903989293123443 / 2.384e-06,	/* 9 */
	1.0933399e-02 * 0.92387953251128675613 / 2.384e-06,	/* 8 */
	1.0103703e-02 * 0.941544065183021 / 2.384e-06,	/* 7 */
	9.287834e-03 * 0.956940335732209 / 2.384e-06,	/* 6 */
	8.487225e-03 * 0.970031253194544 / 2.384e-06,	/* 5 */
	7.703304e-03 * 0.98078528040323 / 2.384e-06,	/* 4 */
	6.937027e-03 * 0.989176509964781 / 2.384e-06,	/* 3 */
	6.189346e-03 * 0.995184726672197 / 2.384e-06,	/* 2 */
	5.462170e-03 * 0.998795456205172 / 2.384e-06,	/* 1 */
	4.9591e-05 / 2.384e-06,

	-1.8756866e-02 * 0.740951125354959 / 2.384e-06,	/* 15 */
	-1.9634247e-02 * 0.773010453362737 / 2.384e-06,	/* 14 */
	-2.0506859e-02 * 0.803207531480645 / 2.384e-06,	/* 13 */
	-2.1372318e-02 * 0.831469612302545 / 2.384e-06,	/* 12 */
	-2.2228718e-02 * 0.857728610000272 / 2.384e-06,	/* 11 */
	-2.3074150e-02 * 0.881921264348355 / 2.384e-06,	/* 10 */
	-2.3907185e-02 * 0.903989293123443 / 2.384e-06,	/* 9 */
	-2.4725437e-02 * 0.92387953251128675613 / 2.384e-06,	/* 8 */
	-2.5527000e-02 * 0.941544065183021 / 2.384e-06,	/* 7 */
	-2.6310921e-02 * 0.956940335732209 / 2.384e-06,	/* 6 */
	-2.7073860e-02 * 0.970031253194544 / 2.384e-06,	/* 5 */
	-2.7815342e-02 * 0.98078528040323 / 2.384e-06,	/* 4 */
	-2.8532982e-02 * 0.989176509964781 / 2.384e-06,	/* 3 */
	-2.9224873e-02 * 0.995184726672197 / 2.384e-06,	/* 2 */
	-2.9890060e-02 * 0.998795456205172 / 2.384e-06,	/* 1 */
	4.756451e-03 / 2.384e-06,

	-2.630711e-03 * 0.740951125354959 / 2.384e-06,	/* 15 */
	-2.803326e-03 * 0.773010453362737 / 2.384e-06,	/* 14 */
	-2.974033e-03 * 0.803207531480645 / 2.384e-06,	/* 13 */
	-3.14188e-03 * 0.831469612302545 / 2.384e-06,	/* 12 */
	-3.306866e-03 * 0.857728610000272 / 2.384e-06,	/* 11 */
	-3.467083e-03 * 0.881921264348355 / 2.384e-06,	/* 10 */
	-3.622532e-03 * 0.903989293123443 / 2.384e-06,	/* 9 */
	-3.771782e-03 * 0.92387953251128675613 / 2.384e-06,	/* 8 */
	-3.914356e-03 * 0.941544065183021 / 2.384e-06,	/* 7 */
	-4.048824e-03 * 0.956940335732209 / 2.384e-06,	/* 6 */
	-4.174709e-03 * 0.970031253194544 / 2.384e-06,	/* 5 */
	-4.290581e-03 * 0.98078528040323 / 2.384e-06,	/* 4 */
	-4.395962e-03 * 0.989176509964781 / 2.384e-06,	/* 3 */
	-4.489899e-03 * 0.995184726672197 / 2.384e-06,	/* 2 */
	-4.570484e-03 * 0.998795456205172 / 2.384e-06,	/* 1 */
	2.1458e-05 / 2.384e-06,

	-2.47478e-04 * 0.740951125354959 / 2.384e-06,	/* 15 */
	-2.77042e-04 * 0.773010453362737 / 2.384e-06,	/* 14 */
	-3.07560e-04 * 0.803207531480645 / 2.384e-06,	/* 13 */
	-3.39031e-04 * 0.831469612302545 / 2.384e-06,	/* 12 */
	-3.71456e-04 * 0.857728610000272 / 2.384e-06,	/* 11 */
	-4.04358e-04 * 0.881921264348355 / 2.384e-06,	/* 10 */
	-4.38213e-04 * 0.903989293123443 / 2.384e-06,	/* 9 */
	-4.72546e-04 * 0.92387953251128675613 / 2.384e-06,	/* 8 */
	-5.07355e-04 * 0.941544065183021 / 2.384e-06,	/* 7 */
	-5.42164e-04 * 0.956940335732209 / 2.384e-06,	/* 6 */
	-5.76973e-04 * 0.970031253194544 / 2.384e-06,	/* 5 */
	-6.11782e-04 * 0.98078528040323 / 2.384e-06,	/* 4 */
	-6.46591e-04 * 0.989176509964781 / 2.384e-06,	/* 3 */
	-6.80923e-04 * 0.995184726672197 / 2.384e-06,	/* 2 */
	-7.14302e-04 * 0.998795456205172 / 2.384e-06,	/* 1 */
	-6.9618e-05 / 2.384e-06,	/*    2.384e-06/2.384e-06 */

	-1.4782e-05 * 0.740951125354959 / 2.384e-06,	/* 15 */
	-1.6689e-05 * 0.773010453362737 / 2.384e-06,	/* 14 */
	-1.8120e-05 * 0.803207531480645 / 2.384e-06,	/* 13 */
	-1.9550e-05 * 0.831469612302545 / 2.384e-06,	/* 12 */
	-2.1458e-05 * 0.857728610000272 / 2.384e-06,	/* 11 */
	-2.3365e-05 * 0.881921264348355 / 2.384e-06,	/* 10 */
	-2.5272e-05 * 0.903989293123443 / 2.384e-06,	/* 9 */
	-2.7657e-05 * 0.92387953251128675613 / 2.384e-06,	/* 8 */
	-3.0041e-05 * 0.941544065183021 / 2.384e-06,	/* 7 */
	-3.2425e-05 * 0.956940335732209 / 2.384e-06,	/* 6 */
	-3.4809e-05 * 0.970031253194544 / 2.384e-06,	/* 5 */
	-3.7670e-05 * 0.98078528040323 / 2.384e-06,	/* 4 */
	-4.0531e-05 * 0.989176509964781 / 2.384e-06,	/* 3 */
	-4.3392e-05 * 0.995184726672197 / 2.384e-06,	/* 2 */
	-4.6253e-05 * 0.998795456205172 / 2.384e-06,	/* 1 */
	0.0,	/* unused */

	9.063471690191471e-01,	/* 15 */
	8.206787908286602e-01,	/* 14 */
	7.416505462720353e-01,	/* 13 */
	6.681786379192989e-01,	/* 12 */
	5.993769336819237e-01,	/* 11 */
	5.345111359507916e-01,	/* 10 */
	4.729647758913199e-01,	/* 9 */
	4.1421356237309504879e-01,	/* tan(PI/8) */		/* 8 */
	3.578057213145241e-01,	/* 7 */
	3.033466836073424e-01,	/* 6 */
	2.504869601913055e-01,	/* 5 */
	1.989123673796580e-01,	/* 4 */
	1.483359875383474e-01,	/* 3 */
	9.849140335716425e-02,	/* 2 */
	4.912684976946725e-02,	/* 1 */
	0.0,	/* unused */

	1.960342806591213e-01,	/* 15 */
	3.901806440322567e-01,	/* 14 */
	5.805693545089249e-01,	/* 13 */
	7.653668647301797e-01,	/* 12 */
	9.427934736519954e-01,	/* 11 */
	1.111140466039205e+00,	/* 10 */
	1.268786568327291e+00,	/* 9 */
	1.414213562373095e+00,	/* SQRT2 */	/* 8 */
	1.546020906725474e+00,	/* 7 */
	1.662939224605090e+00,	/* 6 */
	1.763842528696710e+00,	/* 5 */
	1.847759065022573e+00,	/* 4 */
	1.913880671464418e+00,	/* 3 */
	1.961570560806461e+00,	/* 2 */
	1.990369453344394e+00,	/* 1 */
	0.0	/* unused */
};
#endif


#define NS 12
#define NL 36

static const float wins[] = {
	1.316524975873958e-01,	/* wins */
	4.142135623730950e-01,
	7.673269879789602e-01,

	1.091308501069271e+00,	/* tantab_l */
	1.303225372841206e+00,
	1.569685577117490e+00,
	1.920982126971166e+00,
	2.414213562373094e+00,
	3.171594802363212e+00,
	4.510708503662055e+00,
	7.595754112725146e+00,
	2.290376554843115e+01,

	0.98480775301220802032,		/* cx */
	0.64278760968653936292,
	0.34202014332566882393,
	0.93969262078590842791,
	-0.17364817766693030343,
	-0.76604444311897790243,
	0.86602540378443870761,
	0.500000000000000e+00,

	-5.144957554275265e-01,		/* ca */
	-4.717319685649723e-01,
	-3.133774542039019e-01,
	-1.819131996109812e-01,
	-9.457419252642064e-02,
	-4.096558288530405e-02,
	-1.419856857247115e-02,
	-3.699974673760037e-03,

	8.574929257125442e-01,	/* cs */
	8.817419973177052e-01,
	9.496286491027329e-01,
	9.833145924917901e-01,
	9.955178160675857e-01,
	9.991605581781475e-01,
	9.998991952444470e-01,
	9.999931550702802e-01,
};

#define tantab_l (wins+3)
#define cx (wins+12)
#define ca (wins+20)
#define cs (wins+28)

/************************************************************************
*
* window_subband()
*
* PURPOSE:  Overlapping window on PCM samples
*
* SEMANTICS:
* 32 16-bit pcm samples are scaled to fractional 2's complement and
* concatenated to the end of the window buffer #x#. The updated window
* buffer #x# is then windowed by the analysis window #c# to produce the
* windowed sample #z#
*
************************************************************************/

/*
 *      new IDCT routine written by Takehiro TOMINAGA
 */
static const int
order[] =
{
	0, 1, 16, 17, 8, 9, 24, 25, 4, 5, 20, 21, 12, 13, 28, 29,
	2, 3, 18, 19, 10, 11, 26, 27, 6, 7, 22, 23, 14, 15, 30, 31
};

/* enwindow[0]...enwindow[269] 򻲾Ȥ(269=18*15-1) */
/* 3000clk@Ƹ, 2600clk@, 2000clk@ */
static void window_subband_sub1_C(const float *x1, float *a)
{
	int i;
	const float *x2 = &x1[238 - 14 - 286];/* x2 = &x1[-62] */
	const float *wp = enwindow;
	float w1, w2, s, t, u, v;

	for (i = 0; i < 15; i++) {
		s =  x2[-224] * wp[ 0*16];
		v =  x1[ 256] * wp[ 0*16];

		u =  x1[ 224] * wp[15*16];
		t =  x1[ 224] * wp[ 0*16];
		u += x2[-192] * wp[ 0*16];
		t -= x2[-192] * wp[15*16];

		s += x2[-160] * wp[ 1*16];
		v -= x2[-160] * wp[15*16];
		s += x1[ 192] * wp[15*16];
		v += x1[ 192] * wp[ 1*16];

		u += x1[ 160] * wp[14*16];
		t += x1[ 160] * wp[ 1*16];
		u += x2[-128] * wp[ 1*16];
		t -= x2[-128] * wp[14*16];

		s += x2[ -96] * wp[ 2*16];
		v -= x2[ -96] * wp[14*16];
		s += x1[ 128] * wp[14*16];
		v += x1[ 128] * wp[ 2*16];

		u += x1[  96] * wp[13*16];
		t += x1[  96] * wp[ 2*16];
		u += x2[ -64] * wp[ 2*16];
		t -= x2[ -64] * wp[13*16];

		s += x2[ -32] * wp[ 3*16];
		v -= x2[ -32] * wp[13*16];
		s += x1[  64] * wp[13*16];
		v += x1[  64] * wp[ 3*16];

		u += x1[  32] * wp[12*16];
		t += x1[  32] * wp[ 3*16];
		u += x2[   0] * wp[ 3*16];
		t -= x2[   0] * wp[12*16];

		s += x2[  32] * wp[ 4*16];
		v -= x2[  32] * wp[12*16];
		s += x1[   0] * wp[12*16];
		v += x1[   0] * wp[ 4*16];

		u += x1[ -32] * wp[11*16];
		t += x1[ -32] * wp[ 4*16];
		u += x2[  64] * wp[ 4*16];
		t -= x2[  64] * wp[11*16];

		s += x2[  96] * wp[ 5*16];
		v -= x2[  96] * wp[11*16];
		s += x1[ -64] * wp[11*16];
		v += x1[ -64] * wp[ 5*16];

		u += x1[ -96] * wp[10*16];
		t += x1[ -96] * wp[ 5*16];
		u += x2[ 128] * wp[ 5*16];
		t -= x2[ 128] * wp[10*16];

		s += x2[ 160] * wp[ 6*16];
		v -= x2[ 160] * wp[10*16];
		s += x1[-128] * wp[10*16];
		v += x1[-128] * wp[ 6*16];

		u += x1[-160] * wp[ 9*16];
		t += x1[-160] * wp[ 6*16];
		u += x2[ 192] * wp[ 6*16];
		t -= x2[ 192] * wp[ 9*16];

		s += x2[ 224] * wp[ 7*16];
		v -= x2[ 224] * wp[ 9*16];
		s += x1[-192] * wp[ 9*16];
		v += x1[-192] * wp[ 7*16];

		u += x1[-224] * wp[ 8*16];
		t += x1[-224] * wp[ 7*16];
		u += x2[ 256] * wp[ 7*16];
		t -= x2[ 256] * wp[ 8*16];

		s += x1[-256] * wp[ 8*16];
		v -= x2[ 288] * wp[ 8*16];

		/*
		 * this multiplyer could be removed, but it needs more 256 FLOAT data.
		 * thinking about the data cache performance, I think we should not
		 * use such a huge table. tt 2000/Oct/25
		 */
		s *= wp[16*16];
		u *= wp[16*16];
		w1 = t - s;
		w2 = v - u;
		a[i * 2 + 0] = t + s;
		a[i * 2 + 32] = v + u;
		a[i * 2 + 1] = w1 * wp[17*16];
		a[i * 2 + 33] = w2 * wp[17*16];
		wp++;
		x1--;
		x2++;
	}
}
/* returns sum_j=0^31 a[j]*cos(PI*j*(k+1/2)/32), 0<=k<32 */
/* 1150clk@PIII700 */
static void
window_subband_sub2(const sample_t * x1, FLOAT8 a[SBLIMIT])
{
	FLOAT8 s, t, u, v, w, x, y, z, xr;
	int	i;

	t =   x1[  -31]            * enwindow[ 0*16+15];
	w =   x1[    1]            * enwindow[ 0*16+15];
	w += (x1[ -31] - x1[  33]) * enwindow[ 1*16+15];
	t += (x1[ -63] - x1[   1]) * enwindow[ 1*16+15];
	t += (x1[ -95] + x1[  33]) * enwindow[ 2*16+15];
	w += (x1[ -63] + x1[  65]) * enwindow[ 2*16+15];
	w += (x1[ -95] - x1[  97]) * enwindow[ 3*16+15];
	t += (x1[-127] - x1[  65]) * enwindow[ 3*16+15];
	t += (x1[-159] + x1[  97]) * enwindow[ 4*16+15];
	w += (x1[-127] + x1[ 129]) * enwindow[ 4*16+15];
	w += (x1[-159] - x1[ 161]) * enwindow[ 5*16+15];
	t += (x1[-191] - x1[ 129]) * enwindow[ 5*16+15];
	t += (x1[-223] + x1[ 161]) * enwindow[ 6*16+15];
	w += (x1[-191] + x1[ 193]) * enwindow[ 6*16+15];
	w += (x1[-223] - x1[ 225]) * enwindow[ 7*16+15];
	t += (x1[-255] - x1[ 193]) * enwindow[ 7*16+15];

	s =   x1[-239]             * enwindow[11*16+15];
	x =   x1[-207]             * enwindow[11*16+15];
	s +=  x1[-175]             * enwindow[10*16+15];
	x +=  x1[-143]             * enwindow[10*16+15];
	s +=  x1[-111]             * enwindow[ 9*16+15];
	x +=  x1[ -79]             * enwindow[ 9*16+15];
	s +=  x1[ -47]             * enwindow[ 8*16+15];
	x +=  x1[ -15]             * enwindow[ 8*16+15];
	s -=  x1[  17]             * enwindow[12*16+15];
	x -=  x1[  49]             * enwindow[12*16+15];
	s -=  x1[  81]             * enwindow[13*16+15];
	x -=  x1[ 113]             * enwindow[13*16+15];
	s -=  x1[ 145]             * enwindow[14*16+15];
	x -=  x1[ 177]             * enwindow[14*16+15];
	s -=  x1[ 209];
	x -=  x1[ 241];

	u = s - t;
	y = x - w;
	v = s + t;
	z = x + w;

	t = a[14];
	w = a[46];
	s = a[15] - t;
	x = a[47] - w;

	a[14] = v - t;	// A3
	a[15] = u - s;	// A2
	a[30] = u + s;	// A1
	a[31] = v + t;	// A0

	a[46] = z - w;	// A3
	a[47] = y - x;	// A2
	a[62] = y + x;	// A1
	a[63] = z + w;	// A0

	/* ϤɤΤ褦˥뤹롩 */
	for(i=0; i<2; i++){
		xr = a[28] - a[0];
		a[0] += a[28];
		a[28] = xr * enwindow[13 + 17*16];
		xr = a[29] - a[1];
		a[1] += a[29];
		a[29] = xr * enwindow[13 + 17*16];

		xr = a[26] - a[2];
		a[2] += a[26];
		a[26] = xr * enwindow[11 + 17*16];
		xr = a[27] - a[3];
		a[3] += a[27];
		a[27] = xr * enwindow[11 + 17*16];

		xr = a[24] - a[4];
		a[4] += a[24];
		a[24] = xr * enwindow[9 + 17*16];
		xr = a[25] - a[5];
		a[5] += a[25];
		a[25] = xr * enwindow[9 + 17*16];

		xr = a[22] - a[6];
		a[6] += a[22];
		a[22] = xr * SQRT2;
		xr = a[23] - a[7];
		a[7] += a[23];
		a[23] = xr * SQRT2 - a[7];
		a[7] -= a[6];
		a[22] -= a[7];
		a[23] -= a[22];

		xr = a[6];
		a[6] = a[31] - xr;
		a[31] = a[31] + xr;
		xr = a[7];
		a[7] = a[30] - xr;
		a[30] = a[30] + xr;
		xr = a[22];
		a[22] = a[15] - xr;
		a[15] = a[15] + xr;
		xr = a[23];
		a[23] = a[14] - xr;
		a[14] = a[14] + xr;

		xr = a[20] - a[8];
		a[8] += a[20];
		a[20] = xr * enwindow[5 + 17*16];
		xr = a[21] - a[9];
		a[9] += a[21];
		a[21] = xr * enwindow[5 + 17*16];

		xr = a[18] - a[10];
		a[10] += a[18];
		a[18] = xr * enwindow[3 + 17*16];
		xr = a[19] - a[11];
		a[11] += a[19];
		a[19] = xr * enwindow[3 + 17*16];

		xr = a[16] - a[12];
		a[12] += a[16];
		a[16] = xr * enwindow[1 + 17*16];
		xr = a[17] - a[13];
		a[13] += a[17];
		a[17] = xr * enwindow[1 + 17*16];

		xr = -a[20] + a[24];
		a[20] += a[24];
		a[24] = xr * enwindow[3 + 17*16];
		xr = -a[21] + a[25];
		a[21] += a[25];
		a[25] = xr * enwindow[3 + 17*16];

		xr = a[4] - a[8];
		a[4] += a[8];
		a[8] = xr * enwindow[3 + 17*16];
		xr = a[5] - a[9];
		a[5] += a[9];
		a[9] = xr * enwindow[3 + 17*16];

		xr = a[0] - a[12];
		a[0] += a[12];
		a[12] = xr * enwindow[11 + 17*16];
		xr = a[1] - a[13];
		a[1] += a[13];
		a[13] = xr * enwindow[11 + 17*16];
		xr = a[16] - a[28];
		a[16] += a[28];
		a[28] = xr * enwindow[11 + 17*16];
		xr = -a[17] + a[29];
		a[17] += a[29];
		a[29] = xr * enwindow[11 + 17*16];

		xr = SQRT2 * (a[2] - a[10]);
		a[2] += a[10];
		a[10] = xr;
		xr = SQRT2 * (a[3] - a[11]);
		a[3] += a[11];
		a[11] = xr;
		xr = SQRT2 * (-a[18] + a[26]);
		a[18] += a[26];
		a[26] = xr - a[18];
		xr = SQRT2 * (-a[19] + a[27]);
		a[19] += a[27];
		a[27] = xr - a[19];

		xr = a[2];
		a[19] -= a[3];
		a[3] -= xr;
		a[2] = a[31] - xr;
		a[31] += xr;
		xr = a[3];
		a[11] -= a[19];
		a[18] -= xr;
		a[3] = a[30] - xr;
		a[30] += xr;
		xr = a[18];
		a[27] -= a[11];
		a[19] -= xr;
		a[18] = a[15] - xr;
		a[15] += xr;

		xr = a[19];
		a[10] -= xr;
		a[19] = a[14] - xr;
		a[14] += xr;
		xr = a[10];
		a[11] -= xr;
		a[10] = a[23] - xr;
		a[23] += xr;
		xr = a[11];
		a[26] -= xr;
		a[11] = a[22] - xr;
		a[22] += xr;
		xr = a[26];
		a[27] -= xr;
		a[26] = a[7] - xr;
		a[7] += xr;

		xr = a[27];
		a[27] = a[6] - xr;
		a[6] += xr;

		xr = SQRT2 * (a[0] - a[4]);
		a[0] += a[4];
		a[4] = xr;
		xr = SQRT2 * (a[1] - a[5]);
		a[1] += a[5];
		a[5] = xr;
		xr = SQRT2 * (a[16] - a[20]);
		a[16] += a[20];
		a[20] = xr;
		xr = SQRT2 * (a[17] - a[21]);
		a[17] += a[21];
		a[21] = xr;

		xr = -SQRT2 * (a[8] - a[12]);
		a[8] += a[12];
		a[12] = xr - a[8];
		xr = -SQRT2 * (a[9] - a[13]);
		a[9] += a[13];
		a[13] = xr - a[9];
		xr = -SQRT2 * (a[25] - a[29]);
		a[25] += a[29];
		a[29] = xr - a[25];
		xr = -SQRT2 * (a[24] + a[28]);
		a[24] -= a[28];
		a[28] = xr - a[24];

		xr = a[24] - a[16];
		a[24] = xr;
		xr = a[20] - xr;
		a[20] = xr;
		xr = a[28] - xr;
		a[28] = xr;

		xr = a[25] - a[17];
		a[25] = xr;
		xr = a[21] - xr;
		a[21] = xr;
		xr = a[29] - xr;
		a[29] = xr;

		xr = a[17] - a[1];
		a[17] = xr;
		xr = a[9] - xr;
		a[9] = xr;
		xr = a[25] - xr;
		a[25] = xr;
		xr = a[5] - xr;
		a[5] = xr;
		xr = a[21] - xr;
		a[21] = xr;
		xr = a[13] - xr;
		a[13] = xr;
		xr = a[29] - xr;
		a[29] = xr;

		xr = a[1] - a[0];
		a[1] = xr;
		xr = a[16] - xr;
		a[16] = xr;
		xr = a[17] - xr;
		a[17] = xr;
		xr = a[8] - xr;
		a[8] = xr;
		xr = a[9] - xr;
		a[9] = xr;
		xr = a[24] - xr;
		a[24] = xr;
		xr = a[25] - xr;
		a[25] = xr;
		xr = a[4] - xr;
		a[4] = xr;
		xr = a[5] - xr;
		a[5] = xr;
		xr = a[20] - xr;
		a[20] = xr;
		xr = a[21] - xr;
		a[21] = xr;
		xr = a[12] - xr;
		a[12] = xr;
		xr = a[13] - xr;
		a[13] = xr;
		xr = a[28] - xr;
		a[28] = xr;
		xr = a[29] - xr;
		a[29] = xr;

		s = a[ 0]; t = a[31];
		a[ 0] = s + t;
		a[31] = t - s;
		s = a[ 1]; t = a[30];
		a[ 1] = s + t;
		a[30] = t - s;
		s = a[29]; t = a[ 2];
		a[29] = s + t;
		a[ 2] = t - s;
		s = a[28]; t = a[ 3];
		a[28] = s + t;
		a[ 3] = t - s;
		s = a[ 4]; t = a[27];
		a[ 4] = s + t;
		a[27] = t - s;
		s = a[ 5]; t = a[26];
		a[ 5] = s + t;
		a[26] = t - s;
		s = a[25]; t = a[ 6];
		a[25] = s + t;
		a[ 6] = t - s;
		s = a[24]; t = a[ 7];
		a[24] = s + t;
		a[ 7] = t - s;
		s = a[ 8]; t = a[23];
		a[ 8] = s + t;
		a[23] = t - s;
		s = a[ 9]; t = a[22];
		a[ 9] = s + t;
		a[22] = t - s;
		s = a[21]; t = a[10];
		a[21] = s + t;
		a[10] = t - s;
		s = a[20]; t = a[11];
		a[20] = s + t;
		a[11] = t - s;
		s = a[12]; t = a[19];
		a[12] = s + t;
		a[19] = t - s;
		s = a[13]; t = a[18];
		a[13] = s + t;
		a[18] = t - s;
		s = a[17]; t = a[14];
		a[17] = s + t;
		a[14] = t - s;
		s = a[16]; t = a[15];
		a[16] = s + t;
		a[15] = t - s;

		a += 32;
	}
	/*
	 * Compensate for inversion in the analysis filter
	 */
	for (i = 1; i < 32; i += 2) {
		a[i - 32] = -a[i - 32];
	}
}


/*-------------------------------------------------------------------*/
/*                                                                   */
/*   Function: Calculation of the MDCT                               */
/*   In the case of long blocks (type 0,1,3) there are               */
/*   36 coefficents in the time domain and 18 in the frequency       */
/*   domain.                                                         */
/*   In the case of short blocks (type 2) there are 3                */
/*   transformations with short length. This leads to 12 coefficents */
/*   in the time and 6 in the frequency domain. In this case the     */
/*   results are stored side by side in the vector out[].            */
/*                                                                   */
/*   New layer3                                                      */
/*                                                                   */
/*-------------------------------------------------------------------*/

static void
mdct_short(FLOAT8 * inout)
{
	int l;
	for (l = 0; l < 3; l++) {
		FLOAT8 tc0, tc1, tc2, ts0, ts1, ts2;

		ts0 = inout[2 * 3] * wins[0] - inout[5 * 3];
		tc0 = inout[0 * 3] * wins[2] - inout[3 * 3];
		tc1 = ts0 + tc0;
		tc2 = ts0 - tc0;

		ts0 = inout[5 * 3] * wins[0] + inout[2 * 3];
		tc0 = inout[3 * 3] * wins[2] + inout[0 * 3];
		ts1 = ts0 + tc0;
		ts2 = -ts0 + tc0;

		tc0 = (inout[1 * 3] * wins[1] - inout[4 * 3]) * 2.069978111953089e-11;	/* tritab_s[1] */
		ts0 = (inout[4 * 3] * wins[1] + inout[1 * 3]) * 2.069978111953089e-11;	/* tritab_s[1] */

		inout[3 * 0] = tc1 * 1.907525191737280e-11 /* tritab_s[2] */  + tc0;
		inout[3 * 5] = -ts1 * 1.907525191737280e-11 /* tritab_s[0] */  + ts0;

		tc2 = tc2 * 0.86602540378443870761 * 1.907525191737281e-11 /* tritab_s[2] */ ;
		ts1 = ts1 * 0.5 * 1.907525191737281e-11 + ts0;
		inout[3 * 1] = tc2 - ts1;
		inout[3 * 2] = tc2 + ts1;

		tc1 = tc1 * 0.5 * 1.907525191737281e-11 - tc0;
		ts2 = ts2 * 0.86602540378443870761 * 1.907525191737281e-11 /* tritab_s[0] */ ;
		inout[3 * 3] = tc1 + ts2;
		inout[3 * 4] = tc1 - ts2;

		inout++;
	}
}

/* 280clk@PIII */
/* asmФ褤 */
static void prev_mdct_long_norm(float *work, const int type, const float *band0, const float *band1)
{
	static const float wint0[] = {
		-5.456116028e-012, -2.382191666e-013,
		-4.878985237e-012, -6.423305898e-013,
		-4.240449138e-012, -9.400849240e-013,
		-3.559909095e-012, -1.122435044e-012,
		-2.858043446e-012, -1.183840352e-012,
		-2.156177580e-012, -1.122435044e-012,
		-1.475637754e-012, -9.400849240e-013,
		-8.371014927e-013, -6.423305898e-013,
		-2.599706203e-013, -2.382191666e-013,
	};
	float a, b;
	a = wint0[ 0] * band1[0 * 32] + wint0[ 1] * band1[17 * 32];
	b =-wint0[ 1] * band0[0 * 32] + wint0[ 0] * band0[17 * 32];
	work[ 0] = a - b * tantab_l[0];
	work[ 9] = a * tantab_l[0] + b;
	a = wint0[ 2] * band1[1 * 32] + wint0[ 3] * band1[16 * 32];
	b =-wint0[ 3] * band0[1 * 32] + wint0[ 2] * band0[16 * 32];
	work[ 1] = a - b * tantab_l[1];
	work[10] = a * tantab_l[1] + b;
	a = wint0[ 4] * band1[2 * 32] + wint0[ 5] * band1[15 * 32];
	b =-wint0[ 5] * band0[2 * 32] + wint0[ 4] * band0[15 * 32];
	work[ 2] = a - b * tantab_l[2];
	work[11] = a * tantab_l[2] + b;
	a = wint0[ 6] * band1[3 * 32] + wint0[ 7] * band1[14 * 32];
	b =-wint0[ 7] * band0[3 * 32] + wint0[ 6] * band0[14 * 32];
	work[ 3] = a - b * tantab_l[3];
	work[12] = a * tantab_l[3] + b;
	a = wint0[ 8] * band1[4 * 32] + wint0[ 9] * band1[13 * 32];
	b =-wint0[ 9] * band0[4 * 32] + wint0[ 8] * band0[13 * 32];
	work[ 4] = a - b * tantab_l[4];
	work[13] = a * tantab_l[4] + b;
	a = wint0[10] * band1[5 * 32] + wint0[11] * band1[12 * 32];
	b =-wint0[11] * band0[5 * 32] + wint0[10] * band0[12 * 32];
	work[ 5] = a - b * tantab_l[5];
	work[14] = a * tantab_l[5] + b;
	a = wint0[12] * band1[6 * 32] + wint0[13] * band1[11 * 32];
	b =-wint0[13] * band0[6 * 32] + wint0[12] * band0[11 * 32];
	work[ 6] = a - b * tantab_l[6];
	work[15] = a * tantab_l[6] + b;
	a = wint0[14] * band1[7 * 32] + wint0[15] * band1[10 * 32];
	b =-wint0[15] * band0[7 * 32] + wint0[14] * band0[10 * 32];
	work[ 7] = a - b * tantab_l[7];
	work[16] = a * tantab_l[7] + b;
	a = wint0[16] * band1[8 * 32] + wint0[17] * band1[ 9 * 32];
	b =-wint0[17] * band0[8 * 32] + wint0[16] * band0[ 9 * 32];
	work[ 8] = a - b * tantab_l[8];
	work[17] = a * tantab_l[8] + b;
}

static const float mdct_longTbl[][NL] = {
	{	/* START_TYPE */
		-5.461314127e-012, 0.000000000e+000,
		2.382191666e-013, 5.456116028e-012,
		-4.921085674e-012, 0.000000000e+000,
		6.423305898e-013, 4.878985237e-012,
		-4.343404976e-012, 0.000000000e+000,
		9.400849240e-013, 4.240449138e-012,
		-3.732668254e-012, 0.000000000e+000,
		1.122435044e-012, 3.559909095e-012,
		-3.093523918e-012, 0.000000000e+000,
		1.183840352e-012, 2.858043446e-012,
		-2.430835698e-012, 0.000000000e+000,
		1.122435044e-012, 2.156177580e-012,
		-1.734679024e-012, -2.283748277e-013,
		9.400849240e-013, 1.475637754e-012,
		-9.748253637e-013, -4.037858914e-013,
		6.423305898e-013, 8.371014927e-013,
		-2.797435135e-013, -2.146547507e-013,
		2.382191666e-013, 2.599706203e-013,
	},
	{	/* STOP_TYPE */
		-5.456116028e-012, -2.382191666e-013,
		0.000000000e+000, 5.461314127e-012,
		-4.878985237e-012, -6.423305898e-013,
		0.000000000e+000, 4.921085674e-012,
		-4.240449138e-012, -9.400849240e-013,
		0.000000000e+000, 4.343404976e-012,
		-3.559909095e-012, -1.122435044e-012,
		0.000000000e+000, 3.732668254e-012,
		-2.858043446e-012, -1.183840352e-012,
		0.000000000e+000, 3.093523918e-012,
		-2.156177580e-012, -1.122435044e-012,
		0.000000000e+000, 2.430835698e-012,
		-1.475637754e-012, -9.400849240e-013,
		2.283748277e-013, 1.734679024e-012,
		-8.371014927e-013, -6.423305898e-013,
		4.037858914e-013, 9.748253637e-013,
		-2.599706203e-013, -2.382191666e-013,
		2.146547507e-013, 2.797435135e-013,
	},
};

static void prev_mdct_long_edge(float *work, const int type, const float *band0, const float *band1)
{
	float a, b;
	const float *wint = mdct_longTbl[(type-1)>>1];
#if 0
	int k;
	for (k = -NL / 4; k < 0; k++) {
		a = wint[k*4+36] * band1[(k + 9) * 32] + wint[k*4+37] * band1[(8 - k) * 32];
		b = wint[k*4+38] * band0[(k + 9) * 32] - wint[k*4+39] * band0[(8 - k) * 32];
		work[k + 9] = a - b * tantab_l[k + 9];
		work[k + 18] = a * tantab_l[k + 9] + b;
	}
#else
	a = wint[ 0] * band1[0 * 32] + wint[ 1] * band1[17 * 32];
	b = wint[ 2] * band0[0 * 32] - wint[ 3] * band0[17 * 32];
	work[ 0] = a - b * tantab_l[0];
	work[ 9] = a * tantab_l[0] + b;
	a = wint[ 4] * band1[1 * 32] + wint[ 5] * band1[16 * 32];
	b = wint[ 6] * band0[1 * 32] - wint[ 7] * band0[16 * 32];
	work[ 1] = a - b * tantab_l[1];
	work[10] = a * tantab_l[1] + b;
	a = wint[ 8] * band1[2 * 32] + wint[ 9] * band1[15 * 32];
	b = wint[10] * band0[2 * 32] - wint[11] * band0[15 * 32];
	work[ 2] = a - b * tantab_l[2];
	work[11] = a * tantab_l[2] + b;
	a = wint[12] * band1[3 * 32] + wint[13] * band1[14 * 32];
	b = wint[14] * band0[3 * 32] - wint[15] * band0[14 * 32];
	work[ 3] = a - b * tantab_l[3];
	work[12] = a * tantab_l[3] + b;
	a = wint[16] * band1[4 * 32] + wint[17] * band1[13 * 32];
	b = wint[18] * band0[4 * 32] - wint[19] * band0[13 * 32];
	work[ 4] = a - b * tantab_l[4];
	work[13] = a * tantab_l[4] + b;
	a = wint[20] * band1[5 * 32] + wint[21] * band1[12 * 32];
	b = wint[22] * band0[5 * 32] - wint[23] * band0[12 * 32];
	work[ 5] = a - b * tantab_l[5];
	work[14] = a * tantab_l[5] + b;
	a = wint[24] * band1[6 * 32] + wint[25] * band1[11 * 32];
	b = wint[26] * band0[6 * 32] - wint[27] * band0[11 * 32];
	work[ 6] = a - b * tantab_l[6];
	work[15] = a * tantab_l[6] + b;
	a = wint[28] * band1[7 * 32] + wint[29] * band1[10 * 32];
	b = wint[30] * band0[7 * 32] - wint[31] * band0[10 * 32];
	work[ 7] = a - b * tantab_l[7];
	work[16] = a * tantab_l[7] + b;
	a = wint[32] * band1[8 * 32] + wint[33] * band1[ 9 * 32];
	b = wint[34] * band0[8 * 32] - wint[35] * band0[ 9 * 32];
	work[ 8] = a - b * tantab_l[8];
	work[17] = a * tantab_l[8] + b;
#endif
}

/* 140clk@PIII */
static void next_mdct_long(float *mdct_enc)
{
#if 1
	float a, b;
//clkbegin();
	a = mdct_enc[7] * ca[7] + mdct_enc[-1 - 7] * cs[7];
	b = mdct_enc[7] * cs[7] - mdct_enc[-1 - 7] * ca[7];
	mdct_enc[-1 - 7] = a;
	mdct_enc[7] = b;
	a = mdct_enc[6] * ca[6] + mdct_enc[-1 - 6] * cs[6];
	b = mdct_enc[6] * cs[6] - mdct_enc[-1 - 6] * ca[6];
	mdct_enc[-1 - 6] = a;
	mdct_enc[6] = b;
	a = mdct_enc[5] * ca[5] + mdct_enc[-1 - 5] * cs[5];
	b = mdct_enc[5] * cs[5] - mdct_enc[-1 - 5] * ca[5];
	mdct_enc[-1 - 5] = a;
	mdct_enc[5] = b;
	a = mdct_enc[4] * ca[4] + mdct_enc[-1 - 4] * cs[4];
	b = mdct_enc[4] * cs[4] - mdct_enc[-1 - 4] * ca[4];
	mdct_enc[-1 - 4] = a;
	mdct_enc[4] = b;
	a = mdct_enc[3] * ca[3] + mdct_enc[-1 - 3] * cs[3];
	b = mdct_enc[3] * cs[3] - mdct_enc[-1 - 3] * ca[3];
	mdct_enc[-1 - 3] = a;
	mdct_enc[3] = b;
	a = mdct_enc[2] * ca[2] + mdct_enc[-1 - 2] * cs[2];
	b = mdct_enc[2] * cs[2] - mdct_enc[-1 - 2] * ca[2];
	mdct_enc[-1 - 2] = a;
	mdct_enc[2] = b;
	a = mdct_enc[1] * ca[1] + mdct_enc[-1 - 1] * cs[1];
	b = mdct_enc[1] * cs[1] - mdct_enc[-1 - 1] * ca[1];
	mdct_enc[-1 - 1] = a;
	mdct_enc[1] = b;
	a = mdct_enc[0] * ca[0] + mdct_enc[-1 - 0] * cs[0];
	b = mdct_enc[0] * cs[0] - mdct_enc[-1 - 0] * ca[0];
	mdct_enc[-1 - 0] = a;
	mdct_enc[0] = b;
//clkend();
#else
	int k;
	for (k = 7; k >= 0; --k) {
		float	bu, bd;
		bu = mdct_enc[k] * ca[k] + mdct_enc[-1 - k] * cs[k];
		bd = mdct_enc[k] * cs[k] - mdct_enc[-1 - k] * ca[k];
		
		mdct_enc[-1 - k] = bu;
		mdct_enc[k] = bd;
	}
#endif
}

/* NEED asm */
/* 286clk */
/* 202clk@K7-500 */

void mdct_long_C(FLOAT8 * out, FLOAT8 * in)
{
	FLOAT8 ct, st;
	{
		FLOAT8 tc1, tc2, tc3, tc4, ts5, ts6, ts7, ts8;
		// 1,2, 5,6, 9,10, 13,14, 17
		tc1 = in[17] - in[9];
		tc3 = in[15] - in[11];
		tc4 = in[14] - in[12];
		ts5 = in[0] + in[8];
		ts6 = in[1] + in[7];
		ts7 = in[2] + in[6];
		ts8 = in[3] + in[5];

		out[17] = (ts5 + ts7 - ts8) - (ts6 - in[4]);
		st = (ts5 + ts7 - ts8) * cx[7] + (ts6 - in[4]);
		ct = (tc1 - tc3 - tc4) * cx[6];
		out[5] = ct + st;
		out[6] = ct - st;

		tc2 = (in[16] - in[10]) * cx[6];
		ts6 = ts6 * cx[7] + in[4];
		ct = tc1 * cx[0] + tc2 + tc3 * cx[1] + tc4 * cx[2];
		st = -ts5 * cx[4] + ts6 - ts7 * cx[5] + ts8 * cx[3];
		out[1] = ct + st;
		out[2] = ct - st;

		ct = tc1 * cx[1] - tc2 - tc3 * cx[2] + tc4 * cx[0];
		st = -ts5 * cx[5] + ts6 - ts7 * cx[3] + ts8 * cx[4];
		out[9] = ct + st;
		out[10] = ct - st;

		ct = tc1 * cx[2] - tc2 + tc3 * cx[0] - tc4 * cx[1];
		st = ts5 * cx[3] - ts6 + ts7 * cx[4] - ts8 * cx[5];
		out[13] = ct + st;
		out[14] = ct - st;
	}

	{
		FLOAT8 ts1, ts2, ts3, ts4, tc5, tc6, tc7, tc8;

		ts1 = in[8] - in[0];
		ts3 = in[6] - in[2];
		ts4 = in[5] - in[3];
		tc5 = in[17] + in[9];
		tc6 = in[16] + in[10];
		tc7 = in[15] + in[11];
		tc8 = in[14] + in[12];

		out[0] = (tc5 + tc7 + tc8) + (tc6 + in[13]);
		ct = (tc5 + tc7 + tc8) * cx[7] - (tc6 + in[13]);
		st = (ts1 - ts3 + ts4) * cx[6];
		out[11] = ct + st;
		out[12] = ct - st;

		ts2 = (in[7] - in[1]) * cx[6];
		tc6 = in[13] - tc6 * cx[7];
		ct = tc5 * cx[3] - tc6 + tc7 * cx[4] + tc8 * cx[5];
		st = ts1 * cx[2] + ts2 + ts3 * cx[0] + ts4 * cx[1];
		out[3] = ct + st;
		out[4] = ct - st;

		ct = -tc5 * cx[5] + tc6 - tc7 * cx[3] - tc8 * cx[4];
		st = ts1 * cx[1] + ts2 - ts3 * cx[2] - ts4 * cx[0];
		out[7] = ct + st;
		out[8] = ct - st;

		ct = -tc5 * cx[4] + tc6 - tc7 * cx[5] - tc8 * cx[3];
		st = ts1 * cx[0] - ts2 + ts3 * cx[1] - ts4 * cx[2];
		out[15] = ct + st;
		out[16] = ct - st;
	}
}

#ifdef CPU_I386
void mdct_long_SSE2(FLOAT8 * out, FLOAT8 * in);
void mdct_long_E3DN(FLOAT8 * out, FLOAT8 * in);
void window_subband_sub1_FPU(const float *x1, float *a);
void window_subband_sub1_SSE(const float *x1, float *a);
void window_subband_sub1_SSE2(const float *x1, float *a);
void window_subband_sub1_E3DN(const float *x1, float *a);

void setup_mdct(int unit)
{
	if( (unit & MU_tE3DN) && (unit & MU_t3DN) && (unit & MU_tMMX) && (unit & MU_tSSE) ){
		// for Athlon XP
		mdct_long = mdct_long_E3DN;
		window_subband_sub1 = window_subband_sub1_E3DN;
	} else
	if( unit & MU_tSSE2 ){
		mdct_long = mdct_long_SSE2;
		// window_subband_sub1 = window_subband_sub1_SSE2; /* ٤! */
		window_subband_sub1 = window_subband_sub1_SSE;
	} else
	if( unit & MU_tSSE ){
		mdct_long = mdct_long_C;
		window_subband_sub1 = window_subband_sub1_SSE;
	} else
	if( (unit & MU_tE3DN) && (unit & MU_t3DN) && (unit & MU_tMMX) ){
		mdct_long = mdct_long_E3DN;
		window_subband_sub1 = window_subband_sub1_E3DN;
	} else
	{
		mdct_long = mdct_long_C;
		window_subband_sub1 = window_subband_sub1_C;
	}
}
#endif

void
pfb(float (*subband)[18][SBLIMIT], float (*mfbuf)[4][576])
{
	int	k, ch;

	for (ch = 0; ch < RO.channels_out; ch++) {
		int band;
		float	*samp = subband[ch][0];
		float	*wk = mfbuf[ch][0] + 286;

		for (k = 0; k < 18 / 2; k++) {
// clkbegin();
			window_subband_sub1(wk, samp);
			window_subband_sub2(wk, samp);
// clkend();
#if 0
{
        float   s[64];
        int j;
        static lps=0;

        if(lps++ == 10000){
                window_subband_sub1_C(wk, s);
                window_subband_sub2(wk, s);
                for( j = 0; j < 64; j += 2 ){
                        printf("%d %+10e %+10e %+10e  %+10e %+10e %+10e\n", j,
                                (double)samp[j+0],(double)s[j+0],(double)(samp[j+0]-s[j+0]),
                                (double)samp[j+1],(double)s[j+1],(double)(samp[j+1]-s[j+1]));
                }
        }
}
#endif
			samp += 64;
			wk += 64;
		}
	}
#if 0
	for (ch = 0; ch < RO.channels_out; ch++) {
		if (RO.use_filtering) {
			/* apply filters on the polyphase filterbank outputs */
			/* bands <= RO.highpass_band will be zeroed out below */
			/* bands >= RO.lowpass_band  will be zeroed out below */
			for (band = RO.highpass_start_band; band <= RO.highpass_end_band; band++) {
				for (k = 0; k < 18; k++){
					subband[ch][k][order[band]] *= RO.amp_highpass[band];
				}
			}
			for (band = RO.lowpass_start_band; band <= RO.lowpass_end_band; band++) {
				for (k = 0; k < 18; k++){
					subband[ch][k][order[band]] *= RO.amp_lowpass[band];
				}
			}
		}
	}
#endif
}

/* reorder the three short blocks By Takehiro TOMINAGA */
/*
  Within each scalefactor band, data is given for successive
  time windows, beginning with window 0 and ending with window 2.
  Within each window, the quantized values are then arranged in
  order of increasing frequency...
*/
static
void freorder(int scalefac_band[],FLOAT8 ix_orig[576]) {
  int i,sfb, window, j=0;
  FLOAT8 ix[576];
  for (sfb = 0; sfb < SBMAX_s; sfb++) {
    int start = scalefac_band[sfb];
    int end   = scalefac_band[sfb + 1];
    for (window = 0; window < 3; window++) {
      for (i = start; i < end; ++i) {
	ix[j++] = ix_orig[3*i+window];
      }
    }
  }
  memcpy(ix_orig,ix,RO.ixend*sizeof(FLOAT8));
}

void
mdct(float (*xr)[576], float (*subband0)[576], float (*subband1)[576], int *block_type)
{
	int	k, ch, band;
	char work__[18*4+16];
	float *work = (float *)((((int)work__) + 15) & ~15);

	for (ch = 0; ch < RO.channels_out; ch++) {
		float *mdct_enc = xr[ch];
		const int type = block_type[ch];
		if (type == SHORT_TYPE) {
			for (band = 0; band < 32; band++, mdct_enc += 18) {
				float	*band0 = subband0[ch] + order[band];
				float	*band1 = subband1[ch] + order[band];

				if (band >= RO.lowpass_band || band <= RO.highpass_band) {
					memset((char *) mdct_enc, 0, 18*sizeof(float));
				} else {
					for (k = -NS / 4; k < 0; k++) {
						float	w = wins[k + 3];
						mdct_enc[k*3+ 9] = band0[( 9+k)*32]*w - band0[( 8-k)*32];
						mdct_enc[k*3+18] = band0[(14-k)*32]*w + band0[(15+k)*32];
						mdct_enc[k*3+10] = band0[(15+k)*32]*w - band0[(14-k)*32];
						mdct_enc[k*3+19] = band1[( 2-k)*32]*w + band1[( 3+k)*32];
						mdct_enc[k*3+11] = band1[( 3+k)*32]*w - band1[( 2-k)*32];
						mdct_enc[k*3+20] = band1[( 8-k)*32]*w + band1[( 9+k)*32];
					}
					mdct_short(mdct_enc);
				}
			}
			freorder(RO.scalefac_band.s, xr[ch]);
		} else {
			void (*prev_mdct_long)(float *work, const int type, const float *band0, const float *band1);
			if (type == NORM_TYPE) {
				prev_mdct_long = prev_mdct_long_norm;
			} else {
				prev_mdct_long = prev_mdct_long_edge;
			}
			for (band = 0; band < RO.lowpass_band; band++, mdct_enc += 18) {
				float	*band0 = subband0[ch] + order[band];
				float	*band1 = subband1[ch] + order[band];

				if (band <= RO.highpass_band) {
					memset((char *) mdct_enc, 0, 18*sizeof(float));
				} else {
					prev_mdct_long(work, type, band0, band1);
					mdct_long(mdct_enc, work);
				}
				/*
			 	* Perform aliasing reduction butterfly
			 	*/
				if (band > 0)
					next_mdct_long(mdct_enc);
			}
			if (RO.lowpass_band < 32) {
				memset((char *)mdct_enc, 0, (char *)(xr[ch]+RO.ixend)-(char *)mdct_enc);
				next_mdct_long(mdct_enc);
			}
		}
	}
}
