/* * MDH WCET BENCHMARK SUITE. File version $Id: edn.c,v 1.1 2005/11/11 * 10:14:10 ael01 Exp $ */ /************************************************************************ * Simple vector multiply * ************************************************************************/ /* * Changes: JG 2005/12/22: Inserted prototypes, changed type of main to int * etc. Added parenthesis in expressions in jpegdct. Removed unused variable * dx. Changed int to long to avoid problems when compiling to 16 bit target * Indented program. * JG 2006-01-27: Removed code in codebook */ #define N 100 #define ORDER 50 void vec_mpy1(short y[], const short x[], short scaler); long int mac(const short *a, const short *b, long int sqr, long int *sum); void fir(const short array1[], const short coeff[], long int output[]); void fir_no_red_ld(const short x[], const short h[], long int y[]); long int latsynth(short b[], const short k[], long int n, long int f); void iir1(const short *coefs, const short *input, long int *optr, long int *state); long int codebook(long int mask, long int bitchanged, long int numbasis, long int codeword, long int g, const short *d, short ddim, short theta); void jpegdct(short *d, short *r); void vec_mpy1(short y[], const short x[], short scaler) { long int i; for (i = 0; i < 150; i++) y[i] += ((scaler * x[i]) >> 15); } /***************************************************** * Dot Product * *****************************************************/ long int mac(const short *a, const short *b, long int sqr, long int *sum) { long int i; long int dotp = *sum; for (i = 0; i < 150; i++) { dotp += b[i] * a[i]; sqr += b[i] * b[i]; } *sum = dotp; return sqr; } /***************************************************** * FIR Filter * *****************************************************/ void fir(const short array1[], const short coeff[], long int output[]) { long int i, j, sum; for (i = 0; i < N - ORDER; i++) { sum = 0; for (j = 0; j < ORDER; j++) { sum += array1[i + j] * coeff[j]; } output[i] = sum >> 15; } } /**************************************************** * FIR Filter with Redundant Load Elimination By doing two outer loops simultaneously, you can potentially reuse data (depending on the DSP architecture). x and h only need to be loaded once, therefore reducing redundant loads. This reduces memory bandwidth and power. *****************************************************/ void fir_no_red_ld(const short x[], const short h[], long int y[]) { long int i, j; long int sum0, sum1; short x0, x1, h0, h1; for (j = 0; j < 100; j += 2) { sum0 = 0; sum1 = 0; x0 = x[j]; for (i = 0; i < 32; i += 2) { x1 = x[j + i + 1]; h0 = h[i]; sum0 += x0 * h0; sum1 += x1 * h0; x0 = x[j + i + 2]; h1 = h[i + 1]; sum0 += x1 * h1; sum1 += x0 * h1; } y[j] = sum0 >> 15; y[j + 1] = sum1 >> 15; } } /******************************************************* * Lattice Synthesis * * This function doesn't follow the typical DSP multiply two vector operation, but it will point out the compiler's flexibility ********************************************************/ long int latsynth(short b[], const short k[], long int n, long int f) { long int i; f -= b[n - 1] * k[n - 1]; for (i = n - 2; i >= 0; i--) { f -= b[i] * k[i]; b[i + 1] = b[i] + ((k[i] * (f >> 16)) >> 16); } b[0] = f >> 16; return f; } /***************************************************** * IIR Filter * *****************************************************/ void iir1(const short *coefs, const short *input, long int *optr, long int *state) { long int x; long int t; long int n; x = input[0]; for (n = 0; n < 50; n++) { t = x + ((coefs[2] * state[0] + coefs[3] * state[1]) >> 15); x = t + ((coefs[0] * state[0] + coefs[1] * state[1]) >> 15); state[1] = state[0]; state[0] = t; coefs += 4; /* point to next filter coefs */ state += 2; /* point to next filter states */ } *optr++ = x; } /***************************************************** * Vocoder Codebook Search * *****************************************************/ long int codebook(long int mask, long int bitchanged, long int numbasis, long int codeword, long int g, const short *d, short ddim, short theta) /* * dfm (mask=d bitchanged=1 numbasis=17 codeword=e[0] , g=d, d=a, ddim=c, * theta =1 */ { long int j; long int tmpMask; tmpMask = mask << 1; for (j = bitchanged + 1; j <= numbasis; j++) { /* * The following code is removed since it gave a memory access exception. * It is OK since the return value does not control the flow. * The loop always iterates a fixed number of times independent of the loop body. if (theta == !(!(codeword & tmpMask))) g += *(d + bitchanged * ddim + j); else g -= *(d + bitchanged * ddim + j); tmpMask <<= 1; */ } return g; } /***************************************************** * JPEG Discrete Cosine Transform * *****************************************************/ void jpegdct(short *d, short *r) { long int t[12]; short i, j, k, m, n, p; for (k = 1, m = 0, n = 13, p = 8; k <= 8; k += 7, m += 3, n += 3, p -= 7, d -= 64) { for (i = 0; i < 8; i++, d += p) { for (j = 0; j < 4; j++) { t[j] = d[k * j] + d[k * (7 - j)]; t[7 - j] = d[k * j] - d[k * (7 - j)]; } t[8] = t[0] + t[3]; t[9] = t[0] - t[3]; t[10] = t[1] + t[2]; t[11] = t[1] - t[2]; d[0] = (t[8] + t[10]) >> m; d[4 * k] = (t[8] - t[10]) >> m; t[8] = (short) (t[11] + t[9]) * r[10]; d[2 * k] = t[8] + (short) ((t[9] * r[9]) >> n); d[6 * k] = t[8] + (short) ((t[11] * r[11]) >> n); t[0] = (short) (t[4] + t[7]) * r[2]; t[1] = (short) (t[5] + t[6]) * r[0]; t[2] = t[4] + t[6]; t[3] = t[5] + t[7]; t[8] = (short) (t[2] + t[3]) * r[8]; t[2] = (short) t[2] * r[1] + t[8]; t[3] = (short) t[3] * r[3] + t[8]; d[7 * k] = (short) (t[4] * r[4] + t[0] + t[2]) >> n; d[5 * k] = (short) (t[5] * r[6] + t[1] + t[3]) >> n; d[3 * k] = (short) (t[6] * r[5] + t[1] + t[2]) >> n; d[1 * k] = (short) (t[7] * r[7] + t[0] + t[3]) >> n; } } } int main(void) { short a[200] = {0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400, 0x0000, 0x07ff, 0x0c00, 0x0800, 0x0200, 0xf800, 0xf300, 0x0400 }; short b[200] = {0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000, 0x0c60, 0x0c40, 0x0c20, 0x0c00, 0xf600, 0xf400, 0xf200, 0xf000 }; short c = 0x3; long int output[200]; long int d = 0xAAAA; int e[1] = {0xEEEE}; /* * Declared as memory variable so it doesn't get optimized out */ vec_mpy1(a, b, c); c = mac(a, b, (long int) c, (long int *) output); fir(a, b, output); fir_no_red_ld(a, b, output); d = latsynth(a, b, N, d); iir1(a, b, &output[100], output); e[0] = codebook(d, 1, 17, e[0], d, a, c, 1); jpegdct(a, b); return 0; }