vdr/xine-lib-vdr/src/libffmpeg/libavcodec/ppc Makefile.am Makefile.in dsputil_altivec.c dsputil_altivec.h dsputil_ppc.c dsputil_ppc.h fdct_altivec.c fft_altivec.c gcc_fixes.h gmc_altivec.c

Mon, 04 Apr 2005 22:29:57 +0000

Update of /cvsroot/pkg-vdr-dvb/vdr/xine-lib-vdr/src/libffmpeg/libavcodec/ppc
In directory haydn:/tmp/cvs-serv2129/src/libffmpeg/libavcodec/ppc

Added Files:
	Makefile.am Makefile.in dsputil_altivec.c dsputil_altivec.h 
	dsputil_ppc.c dsputil_ppc.h fdct_altivec.c fft_altivec.c 
	gcc_fixes.h gmc_altivec.c 
Log Message:
Import of VDR-patched xine-lib.

--- NEW FILE: dsputil_ppc.c ---
/*
 * Copyright (c) 2002 Brian Foley
 * Copyright (c) 2002 Dieter Shirley
 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include "../dsputil.h"

#include "dsputil_ppc.h"

#ifdef HAVE_ALTIVEC
#include "dsputil_altivec.h"
#endif

extern void fdct_altivec(int16_t *block);
extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);

int mm_flags = 0;

int mm_support(void)
{
    int result = 0;
#ifdef HAVE_ALTIVEC
    if (has_altivec()) {
        result |= MM_ALTIVEC;
    }
#endif /* result */
    return result;
}

#ifdef POWERPC_PERFORMANCE_REPORT
unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
/* list below must match enum in dsputil_ppc.h */
static unsigned char* perfname[] = {
  "ff_fft_calc_altivec",
  "gmc1_altivec",
  "dct_unquantize_h263_altivec",
  "fdct_altivec",
  "idct_add_altivec",
  "idct_put_altivec",
  "put_pixels16_altivec",
  "avg_pixels16_altivec",
  "avg_pixels8_altivec",
  "put_pixels8_xy2_altivec",
  "put_no_rnd_pixels8_xy2_altivec",
  "put_pixels16_xy2_altivec",
  "put_no_rnd_pixels16_xy2_altivec",
  "hadamard8_diff8x8_altivec",
  "hadamard8_diff16_altivec",
  "clear_blocks_dcbz32_ppc",
  "clear_blocks_dcbz128_ppc"
};
#include <stdio.h>
#endif

#ifdef POWERPC_PERFORMANCE_REPORT
void powerpc_display_perf_report(void)
{
  int i, j;
  av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
  for(i = 0 ; i < powerpc_perf_total ; i++)
  {
    for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
      {
	if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
	  av_log(NULL, AV_LOG_INFO,
		  " Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
		  perfname[i],
		  j+1,
		  perfdata[j][i][powerpc_data_min],
		  perfdata[j][i][powerpc_data_max],
		  (double)perfdata[j][i][powerpc_data_sum] /
		  (double)perfdata[j][i][powerpc_data_num],
		  perfdata[j][i][powerpc_data_num]);
      }
  }
}
#endif /* POWERPC_PERFORMANCE_REPORT */

/* ***** WARNING ***** WARNING ***** WARNING ***** */
/*
  clear_blocks_dcbz32_ppc will not work properly
  on PowerPC processors with a cache line size
  not equal to 32 bytes.
  Fortunately all processor used by Apple up to
  at least the 7450 (aka second generation G4)
  use 32 bytes cache line.
  This is due to the use of the 'dcbz' instruction.
  It simply clear to zero a single cache line,
  so you need to know the cache line size to use it !
  It's absurd, but it's fast...

  update 24/06/2003 : Apple released yesterday the G5,
  with a PPC970. cache line size : 128 bytes. Oups.
  The semantic of dcbz was changed, it always clear
  32 bytes. so the function below will work, but will
  be slow. So I fixed check_dcbz_effect to use dcbzl,
  which is defined to clear a cache line (as dcbz before).
  So we still can distinguish, and use dcbz (32 bytes)
  or dcbzl (one cache line) as required.

  see <http://developer.apple.com/technotes/tn/tn2087.html>
  and <http://developer.apple.com/technotes/tn/tn2086.html>
*/
void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
{
POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1);
    register int misal = ((unsigned long)blocks & 0x00000010);
    register int i = 0;
POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
#if 1
    if (misal) {
      ((unsigned long*)blocks)[0] = 0L;
      ((unsigned long*)blocks)[1] = 0L;
      ((unsigned long*)blocks)[2] = 0L;
      ((unsigned long*)blocks)[3] = 0L;
      i += 16;
    }
    for ( ; i < sizeof(DCTELEM)*6*64 ; i += 32) {
#ifndef __MWERKS__
      asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
#else
      __dcbz( blocks, i );
#endif
    }
    if (misal) {
      ((unsigned long*)blocks)[188] = 0L;
      ((unsigned long*)blocks)[189] = 0L;
      ((unsigned long*)blocks)[190] = 0L;
      ((unsigned long*)blocks)[191] = 0L;
      i += 16;
    }
#else
    memset(blocks, 0, sizeof(DCTELEM)*6*64);
#endif
POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
}

/* same as above, when dcbzl clear a whole 128B cache line
   i.e. the PPC970 aka G5 */
#ifndef NO_DCBZL
void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
{
POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1);
    register int misal = ((unsigned long)blocks & 0x0000007f);
    register int i = 0;
POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
#if 1
 if (misal) {
   // we could probably also optimize this case,
   // but there's not much point as the machines
   // aren't available yet (2003-06-26)
      memset(blocks, 0, sizeof(DCTELEM)*6*64);
    }
    else
      for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
	asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
      }
#else
    memset(blocks, 0, sizeof(DCTELEM)*6*64);
#endif
POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
}
#else
void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
{
  memset(blocks, 0, sizeof(DCTELEM)*6*64);
}
#endif

#ifndef NO_DCBZL
/* check dcbz report how many bytes are set to 0 by dcbz */
/* update 24/06/2003 : replace dcbz by dcbzl to get
   the intended effect (Apple "fixed" dcbz)
   unfortunately this cannot be used unless the assembler
   knows about dcbzl ... */
long check_dcbzl_effect(void)
{
  register char *fakedata = (char*)av_malloc(1024);
  register char *fakedata_middle;
  register long zero = 0;
  register long i = 0;
  long count = 0;

  if (!fakedata)
  {
    return 0L;
  }

  fakedata_middle = (fakedata + 512);

  memset(fakedata, 0xFF, 1024);

  /* below the constraint "b" seems to mean "Address base register"
     in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
  asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));

  for (i = 0; i < 1024 ; i ++)
  {
    if (fakedata[i] == (char)0)
      count++;
  }

  av_free(fakedata);

  return count;
}
#else
long check_dcbzl_effect(void)
{
  return 0;
}
#endif

void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
{
    // Common optimizations whether Altivec is available or not

  switch (check_dcbzl_effect()) {
  case 32:
    c->clear_blocks = clear_blocks_dcbz32_ppc;
    break;
  case 128:
    c->clear_blocks = clear_blocks_dcbz128_ppc;
    break;
  default:
    break;
  }

#ifdef HAVE_ALTIVEC
    if (has_altivec()) {
        mm_flags |= MM_ALTIVEC;

        // Altivec specific optimisations
        c->pix_abs[0][1] = sad16_x2_altivec;
        c->pix_abs[0][2] = sad16_y2_altivec;
        c->pix_abs[0][3] = sad16_xy2_altivec;
        c->pix_abs[0][0] = sad16_altivec;
        c->pix_abs[1][0] = sad8_altivec;
        c->sad[0]= sad16_altivec;
        c->sad[1]= sad8_altivec;
        c->pix_norm1 = pix_norm1_altivec;
        c->sse[1]= sse8_altivec;
        c->sse[0]= sse16_altivec;
        c->pix_sum = pix_sum_altivec;
        c->diff_pixels = diff_pixels_altivec;
        c->get_pixels = get_pixels_altivec;
// next one disabled as it's untested.
#if 0
        c->add_bytes= add_bytes_altivec;
#endif /* 0 */
        c->put_pixels_tab[0][0] = put_pixels16_altivec;
        /* the two functions do the same thing, so use the same code */
        c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
        c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
// next one disabled as it's untested.
#if 0
        c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
#endif /* 0 */
        c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
        c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
        c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
        c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;

	c->gmc1 = gmc1_altivec;

#if (__GNUC__ * 100 + __GNUC_MINOR__ >= 330)
	c->hadamard8_diff[0] = hadamard8_diff16_altivec;
	c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
#endif

#ifdef CONFIG_ENCODERS
	if (avctx->dct_algo == FF_DCT_AUTO ||
	    avctx->dct_algo == FF_DCT_ALTIVEC)
	{
	    c->fdct = fdct_altivec;
	}
#endif //CONFIG_ENCODERS

        if ((avctx->idct_algo == FF_IDCT_AUTO) ||
                (avctx->idct_algo == FF_IDCT_ALTIVEC))
        {
            c->idct_put = idct_put_altivec;
            c->idct_add = idct_add_altivec;
#ifndef ALTIVEC_USE_REFERENCE_C_CODE
            c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
            c->idct_permutation_type = FF_NO_IDCT_PERM;
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
        }

#ifdef POWERPC_PERFORMANCE_REPORT
        {
          int i, j;
          for (i = 0 ; i < powerpc_perf_total ; i++)
          {
	    for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
	      {
		perfdata[j][i][powerpc_data_min] = (unsigned long long)0xFFFFFFFFFFFFFFFF;
		perfdata[j][i][powerpc_data_max] = (unsigned long long)0x0000000000000000;
		perfdata[j][i][powerpc_data_sum] = (unsigned long long)0x0000000000000000;
		perfdata[j][i][powerpc_data_num] = (unsigned long long)0x0000000000000000;
	      }
	  }
        }
#endif /* POWERPC_PERFORMANCE_REPORT */
    } else
#endif /* HAVE_ALTIVEC */
    {
        // Non-AltiVec PPC optimisations

        // ... pending ...
    }
}

--- NEW FILE: dsputil_altivec.c ---
/*
 * Copyright (c) 2002 Brian Foley
 * Copyright (c) 2002 Dieter Shirley
 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
[...1610 lines suppressed...]
/* this is borrowed from the libmpeg2 library */
    {
      signal (SIGILL, sigill_handler);
      if (sigsetjmp (jmpbuf, 1)) {
        signal (SIGILL, SIG_DFL);
      } else {
        canjump = 1;

        asm volatile ("mtspr 256, %0\n\t"
                      "vand %%v0, %%v0, %%v0"
                      :
                      : "r" (-1));

        signal (SIGILL, SIG_DFL);
        return 1;
      }
    }
#endif /* CONFIG_DARWIN */
    return 0;
}

--- NEW FILE: fdct_altivec.c ---
/* ffmpeg/libavcodec/ppc/fdct_altivec.c, this file is part of the
 * AltiVec optimized library for the FFMPEG Multimedia System
 * Copyright (C) 2003  James Klicman <james@klicman.org>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include "../common.h"
#include "../dsputil.h"
#include "dsputil_altivec.h"
#include "gcc_fixes.h"

#define vs16(v) ((vector signed short)(v))
#define vs32(v) ((vector signed int)(v))
#define vu8(v)  ((vector unsigned char)(v))
#define vu16(v) ((vector unsigned short)(v))
#define vu32(v) ((vector unsigned int)(v))

#define C1     0.98078525066375732421875000 /* cos(1*PI/16) */
#define C2     0.92387950420379638671875000 /* cos(2*PI/16) */
#define C3     0.83146959543228149414062500 /* cos(3*PI/16) */
#define C4     0.70710676908493041992187500 /* cos(4*PI/16) */
#define C5     0.55557024478912353515625000 /* cos(5*PI/16) */
#define C6     0.38268342614173889160156250 /* cos(6*PI/16) */
#define C7     0.19509032368659973144531250 /* cos(7*PI/16) */
#define SQRT_2 1.41421353816986083984375000 /* sqrt(2)      */

#define W0 -(2 * C2)
#define W1 (2 * C6)
#define W2 (SQRT_2 * C6)
#define W3 (SQRT_2 * C3)
#define W4 (SQRT_2 * (-C1 + C3 + C5 - C7))
#define W5 (SQRT_2 * ( C1 + C3 - C5 + C7))
#define W6 (SQRT_2 * ( C1 + C3 + C5 - C7))
#define W7 (SQRT_2 * ( C1 + C3 - C5 - C7))
#define W8 (SQRT_2 * ( C7 - C3))
#define W9 (SQRT_2 * (-C1 - C3))
#define WA (SQRT_2 * (-C3 - C5))
#define WB (SQRT_2 * ( C5 - C3))

static vector float fdctconsts[3] = {
    (vector float)AVV( W0, W1, W2, W3 ),
    (vector float)AVV( W4, W5, W6, W7 ),
    (vector float)AVV( W8, W9, WA, WB )
};

#define LD_W0 vec_splat(cnsts0, 0)
#define LD_W1 vec_splat(cnsts0, 1)
#define LD_W2 vec_splat(cnsts0, 2)
#define LD_W3 vec_splat(cnsts0, 3)
#define LD_W4 vec_splat(cnsts1, 0)
#define LD_W5 vec_splat(cnsts1, 1)
#define LD_W6 vec_splat(cnsts1, 2)
#define LD_W7 vec_splat(cnsts1, 3)
#define LD_W8 vec_splat(cnsts2, 0)
#define LD_W9 vec_splat(cnsts2, 1)
#define LD_WA vec_splat(cnsts2, 2)
#define LD_WB vec_splat(cnsts2, 3)

#define FDCTROW(b0,b1,b2,b3,b4,b5,b6,b7) /* {{{ */                  \
    x0 = vec_add(b0, b7);               /* x0 = b0 + b7; */         \
    x7 = vec_sub(b0, b7);               /* x7 = b0 - b7; */         \
    x1 = vec_add(b1, b6);               /* x1 = b1 + b6; */         \
    x6 = vec_sub(b1, b6);               /* x6 = b1 - b6; */         \
    x2 = vec_add(b2, b5);               /* x2 = b2 + b5; */         \
    x5 = vec_sub(b2, b5);               /* x5 = b2 - b5; */         \
    x3 = vec_add(b3, b4);               /* x3 = b3 + b4; */         \
    x4 = vec_sub(b3, b4);               /* x4 = b3 - b4; */         \
                                                                    \
    b7 = vec_add(x0, x3);               /* b7 = x0 + x3; */         \
    b1 = vec_add(x1, x2);               /* b1 = x1 + x2; */         \
    b0 = vec_add(b7, b1);               /* b0 = b7 + b1; */         \
    b4 = vec_sub(b7, b1);               /* b4 = b7 - b1; */         \
                                                                    \
    b2 = vec_sub(x0, x3);               /* b2 = x0 - x3; */         \
    b6 = vec_sub(x1, x2);               /* b6 = x1 - x2; */         \
    b5 = vec_add(b6, b2);               /* b5 = b6 + b2; */         \
    cnst = LD_W2;                                                   \
    b5 = vec_madd(cnst, b5, mzero);     /* b5 = b5 * W2; */         \
    cnst = LD_W1;                                                   \
    b2 = vec_madd(cnst, b2, b5);        /* b2 = b5 + b2 * W1; */    \
    cnst = LD_W0;                                                   \
    b6 = vec_madd(cnst, b6, b5);        /* b6 = b5 + b6 * W0; */    \
                                                                    \
    x0 = vec_add(x4, x7);               /* x0 = x4 + x7; */         \
    x1 = vec_add(x5, x6);               /* x1 = x5 + x6; */         \
    x2 = vec_add(x4, x6);               /* x2 = x4 + x6; */         \
    x3 = vec_add(x5, x7);               /* x3 = x5 + x7; */         \
    x8 = vec_add(x2, x3);               /* x8 = x2 + x3; */         \
    cnst = LD_W3;                                                   \
    x8 = vec_madd(cnst, x8, mzero);     /* x8 = x8 * W3; */         \
                                                                    \
    cnst = LD_W8;                                                   \
    x0 = vec_madd(cnst, x0, mzero);     /* x0 *= W8; */             \
    cnst = LD_W9;                                                   \
    x1 = vec_madd(cnst, x1, mzero);     /* x1 *= W9; */             \
    cnst = LD_WA;                                                   \
    x2 = vec_madd(cnst, x2, x8);        /* x2 = x2 * WA + x8; */    \
    cnst = LD_WB;                                                   \
    x3 = vec_madd(cnst, x3, x8);        /* x3 = x3 * WB + x8; */    \
                                                                    \
    cnst = LD_W4;                                                   \
    b7 = vec_madd(cnst, x4, x0);        /* b7 = x4 * W4 + x0; */    \
    cnst = LD_W5;                                                   \
    b5 = vec_madd(cnst, x5, x1);        /* b5 = x5 * W5 + x1; */    \
    cnst = LD_W6;                                                   \
    b3 = vec_madd(cnst, x6, x1);        /* b3 = x6 * W6 + x1; */    \
    cnst = LD_W7;                                                   \
    b1 = vec_madd(cnst, x7, x0);        /* b1 = x7 * W7 + x0; */    \
                                                                    \
    b7 = vec_add(b7, x2);               /* b7 = b7 + x2; */         \
    b5 = vec_add(b5, x3);               /* b5 = b5 + x3; */         \
    b3 = vec_add(b3, x2);               /* b3 = b3 + x2; */         \
    b1 = vec_add(b1, x3);               /* b1 = b1 + x3; */         \
    /* }}} */

#define FDCTCOL(b0,b1,b2,b3,b4,b5,b6,b7) /* {{{ */                  \
    x0 = vec_add(b0, b7);               /* x0 = b0 + b7; */         \
    x7 = vec_sub(b0, b7);               /* x7 = b0 - b7; */         \
    x1 = vec_add(b1, b6);               /* x1 = b1 + b6; */         \
    x6 = vec_sub(b1, b6);               /* x6 = b1 - b6; */         \
    x2 = vec_add(b2, b5);               /* x2 = b2 + b5; */         \
    x5 = vec_sub(b2, b5);               /* x5 = b2 - b5; */         \
    x3 = vec_add(b3, b4);               /* x3 = b3 + b4; */         \
    x4 = vec_sub(b3, b4);               /* x4 = b3 - b4; */         \
                                                                    \
    b7 = vec_add(x0, x3);               /* b7 = x0 + x3; */         \
    b1 = vec_add(x1, x2);               /* b1 = x1 + x2; */         \
    b0 = vec_add(b7, b1);               /* b0 = b7 + b1; */         \
    b4 = vec_sub(b7, b1);               /* b4 = b7 - b1; */         \
                                                                    \
    b2 = vec_sub(x0, x3);               /* b2 = x0 - x3; */         \
    b6 = vec_sub(x1, x2);               /* b6 = x1 - x2; */         \
    b5 = vec_add(b6, b2);               /* b5 = b6 + b2; */         \
    cnst = LD_W2;                                                   \
    b5 = vec_madd(cnst, b5, mzero);     /* b5 = b5 * W2; */         \
    cnst = LD_W1;                                                   \
    b2 = vec_madd(cnst, b2, b5);        /* b2 = b5 + b2 * W1; */    \
    cnst = LD_W0;                                                   \
    b6 = vec_madd(cnst, b6, b5);        /* b6 = b5 + b6 * W0; */    \
                                                                    \
    x0 = vec_add(x4, x7);               /* x0 = x4 + x7; */         \
    x1 = vec_add(x5, x6);               /* x1 = x5 + x6; */         \
    x2 = vec_add(x4, x6);               /* x2 = x4 + x6; */         \
    x3 = vec_add(x5, x7);               /* x3 = x5 + x7; */         \
    x8 = vec_add(x2, x3);               /* x8 = x2 + x3; */         \
    cnst = LD_W3;                                                   \
    x8 = vec_madd(cnst, x8, mzero);     /* x8 = x8 * W3; */         \
                                                                    \
    cnst = LD_W8;                                                   \
    x0 = vec_madd(cnst, x0, mzero);     /* x0 *= W8; */             \
    cnst = LD_W9;                                                   \
    x1 = vec_madd(cnst, x1, mzero);     /* x1 *= W9; */             \
    cnst = LD_WA;                                                   \
    x2 = vec_madd(cnst, x2, x8);        /* x2 = x2 * WA + x8; */    \
    cnst = LD_WB;                                                   \
    x3 = vec_madd(cnst, x3, x8);        /* x3 = x3 * WB + x8; */    \
                                                                    \
    cnst = LD_W4;                                                   \
    b7 = vec_madd(cnst, x4, x0);        /* b7 = x4 * W4 + x0; */    \
    cnst = LD_W5;                                                   \
    b5 = vec_madd(cnst, x5, x1);        /* b5 = x5 * W5 + x1; */    \
    cnst = LD_W6;                                                   \
    b3 = vec_madd(cnst, x6, x1);        /* b3 = x6 * W6 + x1; */    \
    cnst = LD_W7;                                                   \
    b1 = vec_madd(cnst, x7, x0);        /* b1 = x7 * W7 + x0; */    \
                                                                    \
    b7 = vec_add(b7, x2);               /* b7 += x2; */             \
    b5 = vec_add(b5, x3);               /* b5 += x3; */             \
    b3 = vec_add(b3, x2);               /* b3 += x2; */             \
    b1 = vec_add(b1, x3);               /* b1 += x3; */             \
    /* }}} */

/* two dimensional discrete cosine transform */

void fdct_altivec(int16_t *block)
{
POWERPC_PERF_DECLARE(altivec_fdct, 1);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
POWERPC_PERF_START_COUNT(altivec_fdct, 1);
    void ff_jpeg_fdct_islow(int16_t *block);
    ff_jpeg_fdct_islow(block);
POWERPC_PERF_STOP_COUNT(altivec_fdct, 1);
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
    vector signed short *bp;
    vector float *cp;
    vector float b00, b10, b20, b30, b40, b50, b60, b70;
    vector float b01, b11, b21, b31, b41, b51, b61, b71;
    vector float mzero, cnst, cnsts0, cnsts1, cnsts2;
    vector float x0, x1, x2, x3, x4, x5, x6, x7, x8;

    POWERPC_PERF_START_COUNT(altivec_fdct, 1);

    /* setup constants {{{ */
    /* mzero = -0.0 */
    vu32(mzero) = vec_splat_u32(-1);
    vu32(mzero) = vec_sl(vu32(mzero), vu32(mzero));
    cp = fdctconsts;
    cnsts0 = vec_ld(0, cp); cp++;
    cnsts1 = vec_ld(0, cp); cp++;
    cnsts2 = vec_ld(0, cp);
    /* }}} */

    /* 8x8 matrix transpose (vector short[8]) {{{ */
#define MERGE_S16(hl,a,b) vec_merge##hl(vs16(a), vs16(b))

    bp = (vector signed short*)block;
    vs16(b00) = vec_ld(0,    bp);
    vs16(b40) = vec_ld(16*4, bp);
    vs16(b01) = MERGE_S16(h, b00, b40);
    vs16(b11) = MERGE_S16(l, b00, b40);
    bp++;
    vs16(b10) = vec_ld(0,    bp);
    vs16(b50) = vec_ld(16*4, bp);
    vs16(b21) = MERGE_S16(h, b10, b50);
    vs16(b31) = MERGE_S16(l, b10, b50);
    bp++;
    vs16(b20) = vec_ld(0,    bp);
    vs16(b60) = vec_ld(16*4, bp);
    vs16(b41) = MERGE_S16(h, b20, b60);
    vs16(b51) = MERGE_S16(l, b20, b60);
    bp++;
    vs16(b30) = vec_ld(0,    bp);
    vs16(b70) = vec_ld(16*4, bp);
    vs16(b61) = MERGE_S16(h, b30, b70);
    vs16(b71) = MERGE_S16(l, b30, b70);

    vs16(x0) = MERGE_S16(h, b01, b41);
    vs16(x1) = MERGE_S16(l, b01, b41);
    vs16(x2) = MERGE_S16(h, b11, b51);
    vs16(x3) = MERGE_S16(l, b11, b51);
    vs16(x4) = MERGE_S16(h, b21, b61);
    vs16(x5) = MERGE_S16(l, b21, b61);
    vs16(x6) = MERGE_S16(h, b31, b71);
    vs16(x7) = MERGE_S16(l, b31, b71);

    vs16(b00) = MERGE_S16(h, x0, x4);
    vs16(b10) = MERGE_S16(l, x0, x4);
    vs16(b20) = MERGE_S16(h, x1, x5);
    vs16(b30) = MERGE_S16(l, x1, x5);
    vs16(b40) = MERGE_S16(h, x2, x6);
    vs16(b50) = MERGE_S16(l, x2, x6);
    vs16(b60) = MERGE_S16(h, x3, x7);
    vs16(b70) = MERGE_S16(l, x3, x7);

#undef MERGE_S16
    /* }}} */

/* Some of the initial calculations can be done as vector short before
 * conversion to vector float.  The following code section takes advantage
 * of this.
 */
#if 1
    /* fdct rows {{{ */
    vs16(x0) = vec_add(vs16(b00), vs16(b70));
    vs16(x7) = vec_sub(vs16(b00), vs16(b70));
    vs16(x1) = vec_add(vs16(b10), vs16(b60));
    vs16(x6) = vec_sub(vs16(b10), vs16(b60));
    vs16(x2) = vec_add(vs16(b20), vs16(b50));
    vs16(x5) = vec_sub(vs16(b20), vs16(b50));
    vs16(x3) = vec_add(vs16(b30), vs16(b40));
    vs16(x4) = vec_sub(vs16(b30), vs16(b40));

    vs16(b70) = vec_add(vs16(x0), vs16(x3));
    vs16(b10) = vec_add(vs16(x1), vs16(x2));

    vs16(b00) = vec_add(vs16(b70), vs16(b10));
    vs16(b40) = vec_sub(vs16(b70), vs16(b10));

#define CTF0(n) \
    vs32(b##n##1) = vec_unpackl(vs16(b##n##0)); \
    vs32(b##n##0) = vec_unpackh(vs16(b##n##0)); \
    b##n##1 = vec_ctf(vs32(b##n##1), 0); \
    b##n##0 = vec_ctf(vs32(b##n##0), 0);

    CTF0(0);
    CTF0(4);

    vs16(b20) = vec_sub(vs16(x0), vs16(x3));
    vs16(b60) = vec_sub(vs16(x1), vs16(x2));

    CTF0(2);
    CTF0(6);

#undef CTF0

    x0 = vec_add(b60, b20);
    x1 = vec_add(b61, b21);

    cnst = LD_W2;
    x0 = vec_madd(cnst, x0, mzero);
    x1 = vec_madd(cnst, x1, mzero);
    cnst = LD_W1;
    b20 = vec_madd(cnst, b20, x0);
    b21 = vec_madd(cnst, b21, x1);
    cnst = LD_W0;
    b60 = vec_madd(cnst, b60, x0);
    b61 = vec_madd(cnst, b61, x1);

#define CTFX(x,b) \
    vs32(b##0) = vec_unpackh(vs16(x)); \
    vs32(b##1) = vec_unpackl(vs16(x)); \
    b##0 = vec_ctf(vs32(b##0), 0); \
    b##1 = vec_ctf(vs32(b##1), 0); \

    CTFX(x4, b7);
    CTFX(x5, b5);
    CTFX(x6, b3);
    CTFX(x7, b1);

#undef CTFX

    x0 = vec_add(b70, b10);
    x1 = vec_add(b50, b30);
    x2 = vec_add(b70, b30);
    x3 = vec_add(b50, b10);
    x8 = vec_add(x2, x3);
    cnst = LD_W3;
    x8 = vec_madd(cnst, x8, mzero);

    cnst = LD_W8;
    x0 = vec_madd(cnst, x0, mzero);
    cnst = LD_W9;
    x1 = vec_madd(cnst, x1, mzero);
    cnst = LD_WA;
    x2 = vec_madd(cnst, x2, x8);
    cnst = LD_WB;
    x3 = vec_madd(cnst, x3, x8);

    cnst = LD_W4;
    b70 = vec_madd(cnst, b70, x0);
    cnst = LD_W5;
    b50 = vec_madd(cnst, b50, x1);
    cnst = LD_W6;
    b30 = vec_madd(cnst, b30, x1);
    cnst = LD_W7;
    b10 = vec_madd(cnst, b10, x0);

    b70 = vec_add(b70, x2);
    b50 = vec_add(b50, x3);
    b30 = vec_add(b30, x2);
    b10 = vec_add(b10, x3);

    x0 = vec_add(b71, b11);
    x1 = vec_add(b51, b31);
    x2 = vec_add(b71, b31);
    x3 = vec_add(b51, b11);
    x8 = vec_add(x2, x3);
    cnst = LD_W3;
    x8 = vec_madd(cnst, x8, mzero);

    cnst = LD_W8;
    x0 = vec_madd(cnst, x0, mzero);
    cnst = LD_W9;
    x1 = vec_madd(cnst, x1, mzero);
    cnst = LD_WA;
    x2 = vec_madd(cnst, x2, x8);
    cnst = LD_WB;
    x3 = vec_madd(cnst, x3, x8);

    cnst = LD_W4;
    b71 = vec_madd(cnst, b71, x0);
    cnst = LD_W5;
    b51 = vec_madd(cnst, b51, x1);
    cnst = LD_W6;
    b31 = vec_madd(cnst, b31, x1);
    cnst = LD_W7;
    b11 = vec_madd(cnst, b11, x0);

    b71 = vec_add(b71, x2);
    b51 = vec_add(b51, x3);
    b31 = vec_add(b31, x2);
    b11 = vec_add(b11, x3);
    /* }}} */
#else
    /* convert to float {{{ */
#define CTF(n) \
    vs32(b##n##1) = vec_unpackl(vs16(b##n##0)); \
    vs32(b##n##0) = vec_unpackh(vs16(b##n##0)); \
    b##n##1 = vec_ctf(vs32(b##n##1), 0); \
    b##n##0 = vec_ctf(vs32(b##n##0), 0); \

    CTF(0);
    CTF(1);
    CTF(2);
    CTF(3);
    CTF(4);
    CTF(5);
    CTF(6);
    CTF(7);

#undef CTF
    /* }}} */

    FDCTROW(b00, b10, b20, b30, b40, b50, b60, b70);
    FDCTROW(b01, b11, b21, b31, b41, b51, b61, b71);
#endif

    /* 8x8 matrix transpose (vector float[8][2]) {{{ */
    x0 = vec_mergel(b00, b20);
    x1 = vec_mergeh(b00, b20);
    x2 = vec_mergel(b10, b30);
    x3 = vec_mergeh(b10, b30);

    b00 = vec_mergeh(x1, x3);
    b10 = vec_mergel(x1, x3);
    b20 = vec_mergeh(x0, x2);
    b30 = vec_mergel(x0, x2);

    x4 = vec_mergel(b41, b61);
    x5 = vec_mergeh(b41, b61);
    x6 = vec_mergel(b51, b71);
    x7 = vec_mergeh(b51, b71);

    b41 = vec_mergeh(x5, x7);
    b51 = vec_mergel(x5, x7);
    b61 = vec_mergeh(x4, x6);
    b71 = vec_mergel(x4, x6);

    x0 = vec_mergel(b01, b21);
    x1 = vec_mergeh(b01, b21);
    x2 = vec_mergel(b11, b31);
    x3 = vec_mergeh(b11, b31);

    x4 = vec_mergel(b40, b60);
    x5 = vec_mergeh(b40, b60);
    x6 = vec_mergel(b50, b70);
    x7 = vec_mergeh(b50, b70);

    b40 = vec_mergeh(x1, x3);
    b50 = vec_mergel(x1, x3);
    b60 = vec_mergeh(x0, x2);
    b70 = vec_mergel(x0, x2);

    b01 = vec_mergeh(x5, x7);
    b11 = vec_mergel(x5, x7);
    b21 = vec_mergeh(x4, x6);
    b31 = vec_mergel(x4, x6);
    /* }}} */

    FDCTCOL(b00, b10, b20, b30, b40, b50, b60, b70);
    FDCTCOL(b01, b11, b21, b31, b41, b51, b61, b71);

    /* round, convert back to short {{{ */
#define CTS(n) \
    b##n##0 = vec_round(b##n##0); \
    b##n##1 = vec_round(b##n##1); \
    vs32(b##n##0) = vec_cts(b##n##0, 0); \
    vs32(b##n##1) = vec_cts(b##n##1, 0); \
    vs16(b##n##0) = vec_pack(vs32(b##n##0), vs32(b##n##1)); \
    vec_st(vs16(b##n##0), 0, bp);

    bp = (vector signed short*)block;
    CTS(0); bp++;
    CTS(1); bp++;
    CTS(2); bp++;
    CTS(3); bp++;
    CTS(4); bp++;
    CTS(5); bp++;
    CTS(6); bp++;
    CTS(7);

#undef CTS
    /* }}} */

POWERPC_PERF_STOP_COUNT(altivec_fdct, 1);
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}

/* vim:set foldmethod=marker foldlevel=0: */

--- NEW FILE: gmc_altivec.c ---
/*
 * GMC (Global Motion Compensation)
 * AltiVec-enabled
 * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include "../dsputil.h"

#include "gcc_fixes.h"

#include "dsputil_altivec.h"

/*
  altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8,
  to preserve proper dst alignement.
*/
#define GMC1_PERF_COND (h==8)
void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
{
POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
    const int A=(16-x16)*(16-y16);
    const int B=(   x16)*(16-y16);
    const int C=(16-x16)*(   y16);
    const int D=(   x16)*(   y16);
    int i;

POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);

    for(i=0; i<h; i++)
    {
        dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
        dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
        dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
        dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
        dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
        dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
        dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
        dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
        dst+= stride;
        src+= stride;
    }

POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);

#else /* ALTIVEC_USE_REFERENCE_C_CODE */
    const unsigned short __attribute__ ((aligned(16))) rounder_a[8] =
      {rounder, rounder, rounder, rounder,
       rounder, rounder, rounder, rounder};
    const unsigned short __attribute__ ((aligned(16))) ABCD[8] =
      {
        (16-x16)*(16-y16), /* A */
        (   x16)*(16-y16), /* B */
        (16-x16)*(   y16), /* C */
        (   x16)*(   y16), /* D */
        0, 0, 0, 0         /* padding */
      };
    register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
    register const_vector unsigned short vcsr8 = (const_vector unsigned short)vec_splat_u16(8);
    register vector unsigned char dstv, dstv2, src_0, src_1, srcvA, srcvB, srcvC, srcvD;
    register vector unsigned short Av, Bv, Cv, Dv, rounderV, tempA, tempB, tempC, tempD;
    int i;
    unsigned long dst_odd = (unsigned long)dst & 0x0000000F;
    unsigned long src_really_odd = (unsigned long)src & 0x0000000F;

POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);

    tempA = vec_ld(0, (unsigned short*)ABCD);
    Av = vec_splat(tempA, 0);
    Bv = vec_splat(tempA, 1);
    Cv = vec_splat(tempA, 2);
    Dv = vec_splat(tempA, 3);

    rounderV = vec_ld(0, (unsigned short*)rounder_a);

    // we'll be able to pick-up our 9 char elements
    // at src from those 32 bytes
    // we load the first batch here, as inside the loop
    // we can re-use 'src+stride' from one iteration
    // as the 'src' of the next.
    src_0 = vec_ld(0, src);
    src_1 = vec_ld(16, src);
    srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src));

    if (src_really_odd != 0x0000000F)
    { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
      srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
    }
    else
    {
      srcvB = src_1;
    }
    srcvA = vec_mergeh(vczero, srcvA);
    srcvB = vec_mergeh(vczero, srcvB);

    for(i=0; i<h; i++)
    {
      dst_odd = (unsigned long)dst & 0x0000000F;
      src_really_odd = (((unsigned long)src) + stride) & 0x0000000F;

      dstv = vec_ld(0, dst);

      // we we'll be able to pick-up our 9 char elements
      // at src + stride from those 32 bytes
      // then reuse the resulting 2 vectors srvcC and srcvD
      // as the next srcvA and srcvB
      src_0 = vec_ld(stride + 0, src);
      src_1 = vec_ld(stride + 16, src);
      srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));

      if (src_really_odd != 0x0000000F)
      { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
        srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
      }
      else
      {
        srcvD = src_1;
      }

      srcvC = vec_mergeh(vczero, srcvC);
      srcvD = vec_mergeh(vczero, srcvD);

      // OK, now we (finally) do the math :-)
      // those four instructions replaces 32 int muls & 32 int adds.
      // isn't AltiVec nice ?
      tempA = vec_mladd((vector unsigned short)srcvA, Av, rounderV);
      tempB = vec_mladd((vector unsigned short)srcvB, Bv, tempA);
      tempC = vec_mladd((vector unsigned short)srcvC, Cv, tempB);
      tempD = vec_mladd((vector unsigned short)srcvD, Dv, tempC);

      srcvA = srcvC;
      srcvB = srcvD;

      tempD = vec_sr(tempD, vcsr8);

      dstv2 = vec_pack(tempD, (vector unsigned short)vczero);

      if (dst_odd)
      {
        dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1));
      }
      else
      {
        dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3));
      }

      vec_st(dstv2, 0, dst);

      dst += stride;
      src += stride;
    }

POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);

#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}

--- NEW FILE: Makefile.am ---
include $(top_srcdir)/misc/Makefile.common

AM_CFLAGS = $(LIBFFMPEG_CFLAGS)
# CFLAGS is here to filter out -funroll-loops because it causes bad
# behavior of libavcodec
CFLAGS = `echo @CFLAGS@ | sed -e 's/-funroll-loops//g'`

ASFLAGS =

noinst_LTLIBRARIES = libavcodec_ppc.la

libavcodec_ppc_src =  dsputil_altivec.c \
		      dsputil_ppc.c \
		      fdct_altivec.c \
		      fft_altivec.c \
		      idct_altivec.c \
		      gmc_altivec.c \
		      mpegvideo_altivec.c \
		      mpegvideo_ppc.c
libavcodec_ppc_dummy = libavcodec_ppc_dummy.c

EXTRA_DIST =  $(libavcodec_ppc_src) $(libavcodec_ppc_dummy)

#if PPC_ARCH
#ppc_modules = $(libavcodec_ppc_src)
#endif

libavcodec_ppc_la_SOURCES = $(ppc_modules) $(libavcodec_ppc_dummy)

noinst_HEADERS = dsputil_altivec.h dsputil_ppc.h gcc_fixes.h

--- NEW FILE: gcc_fixes.h ---
/*
 * gcc fixes for altivec.
 * Used to workaround broken gcc (FSF gcc-3 pre gcc-3.3) 
 * and to stay somewhat compatible with Darwin.
 */

#ifndef _GCC_FIXES_
#define _GCC_FIXES_

#ifdef HAVE_ALTIVEC_H
#include <altivec.h>
#endif

#ifdef CONFIG_DARWIN
# ifndef __MWERKS__
#  define AVV(x...) (x)
# else
#  define AVV
# endif
#else
#define AVV(x...) {x}
#if (__GNUC__ * 100 + __GNUC_MINOR__ < 303)  

/* This code was provided to me by Bartosch Pixa
 * as a separate header file (broken_mergel.h).
 * thanks to lu_zero for the workaround.
 *
 * See this mail for more information:
 * http://gcc.gnu.org/ml/gcc/2003-04/msg00967.html
 */

static inline vector signed char ff_vmrglb (vector signed char const A,
					  vector signed char const B)
{
    static const vector unsigned char lowbyte = {
	0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b,  0x1b,
	0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f
    };
    return vec_perm (A, B, lowbyte);
}

static inline vector signed short ff_vmrglh (vector signed short const A,
					  vector signed short const B)
{
    static const vector unsigned char lowhalf = {
    	0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b,
	0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f
    };
    return vec_perm (A, B, lowhalf);
}

static inline vector signed int ff_vmrglw (vector signed int const A,
					  vector signed int const B)
{
    static const vector unsigned char lowword = {
    	0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b,
	0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f
    };
    return vec_perm (A, B, lowword);
}
/*#define ff_vmrglb ff_vmrglb 
#define ff_vmrglh ff_vmrglh 
#define ff_vmrglw ff_vmrglw 
*/
#undef vec_mergel

#define vec_mergel(a1, a2) \
__ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
      ((vector signed char) ff_vmrglb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
      ((vector unsigned char) ff_vmrglb ((vector signed char) (a1), (vector signed char) (a2))), \
__ch (__bin_args_eq (vector signed short, (a1), vector signed short, (a2)), \
      ((vector signed short) ff_vmrglh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector unsigned short, (a1), vector unsigned short, (a2)), \
      ((vector unsigned short) ff_vmrglh ((vector signed short) (a1), (vector signed short) (a2))), \
__ch (__bin_args_eq (vector float, (a1), vector float, (a2)), \
      ((vector float) ff_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector signed int, (a1), vector signed int, (a2)), \
      ((vector signed int) ff_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \
__ch (__bin_args_eq (vector unsigned int, (a1), vector unsigned int, (a2)), \
      ((vector unsigned int) ff_vmrglw ((vector signed int) (a1), (vector signed int) (a2))), \
    __altivec_link_error_invalid_argument ())))))))

#endif

#endif /* CONFIG_DARWIN */

#ifndef __MWERKS__
#define const_vector const vector
#else
#define const_vector vector
#endif

#endif /* _GCC_FIXES_ */

--- NEW FILE: Makefile.in ---
# Makefile.in generated by automake 1.9.3 from Makefile.am.
# @configure_input@

# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
# 2003, 2004  Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE.

@SET_MAKE@

SOURCES = $(libavcodec_ppc_la_SOURCES)

srcdir = @srcdir@
top_srcdir = @top_srcdir@
VPATH = @srcdir@
pkgdatadir = $(datadir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
top_builddir = ../../../..
am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
INSTALL = @INSTALL@
install_sh_DATA = $(install_sh) -c -m 644
install_sh_PROGRAM = $(install_sh) -c
install_sh_SCRIPT = $(install_sh) -c
INSTALL_HEADER = $(INSTALL_DATA)
transform = $(program_transform_name)
NORMAL_INSTALL = :
PRE_INSTALL = :
POST_INSTALL = :
NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \
	$(srcdir)/Makefile.in $(top_srcdir)/misc/Makefile.common
subdir = src/libffmpeg/libavcodec/ppc
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/_xine.m4 $(top_srcdir)/m4/aa.m4 \
	$(top_srcdir)/m4/alsa.m4 $(top_srcdir)/m4/arts.m4 \
	$(top_srcdir)/m4/as.m4 $(top_srcdir)/m4/caca.m4 \
	$(top_srcdir)/m4/codeset.m4 $(top_srcdir)/m4/directx.m4 \
	$(top_srcdir)/m4/dl.m4 $(top_srcdir)/m4/dvdnav.m4 \
	$(top_srcdir)/m4/esd.m4 $(top_srcdir)/m4/ffmpeg.m4 \
	$(top_srcdir)/m4/freetype2.m4 $(top_srcdir)/m4/gettext.m4 \
	$(top_srcdir)/m4/glibc21.m4 $(top_srcdir)/m4/iconv.m4 \
	$(top_srcdir)/m4/irixal.m4 $(top_srcdir)/m4/lcmessage.m4 \
	$(top_srcdir)/m4/libFLAC.m4 $(top_srcdir)/m4/libfame.m4 \
	$(top_srcdir)/m4/ogg.m4 $(top_srcdir)/m4/opengl.m4 \
	$(top_srcdir)/m4/pkg.m4 $(top_srcdir)/m4/progtest.m4 \
	$(top_srcdir)/m4/sdl.m4 $(top_srcdir)/m4/speex.m4 \
	$(top_srcdir)/m4/theora.m4 $(top_srcdir)/m4/vorbis.m4 \
	$(top_srcdir)/m4/xv.m4 $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
	$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
libavcodec_ppc_la_LIBADD =
am__objects_1 = libavcodec_ppc_dummy.lo
am_libavcodec_ppc_la_OBJECTS = $(am__objects_1)
libavcodec_ppc_la_OBJECTS = $(am_libavcodec_ppc_la_OBJECTS)
DEFAULT_INCLUDES = -I. -I$(srcdir) -I$(top_builddir)
depcomp = $(SHELL) $(top_srcdir)/depcomp
am__depfiles_maybe = depfiles
COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
	$(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
LTCOMPILE = $(LIBTOOL) --tag=CC --mode=compile $(CC) $(DEFS) \
	$(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
	$(AM_CFLAGS) $(CFLAGS)
CCLD = $(CC)
LINK = $(LIBTOOL) --tag=CC --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
	$(AM_LDFLAGS) $(LDFLAGS) -o $@
SOURCES = $(libavcodec_ppc_la_SOURCES)
DIST_SOURCES = $(libavcodec_ppc_la_SOURCES)
HEADERS = $(noinst_HEADERS)
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
AAINFO = @AAINFO@
AALIB_CFLAGS = @AALIB_CFLAGS@
AALIB_CONFIG = @AALIB_CONFIG@
AALIB_LIBS = @AALIB_LIBS@
ACLOCAL = @ACLOCAL@
ACLOCAL_DIR = @ACLOCAL_DIR@
ALLOCA = @ALLOCA@
ALSA_CFLAGS = @ALSA_CFLAGS@
ALSA_LIBS = @ALSA_LIBS@
ALSA_STATIC_LIB = @ALSA_STATIC_LIB@
AMDEP_FALSE = @AMDEP_FALSE@
AMDEP_TRUE = @AMDEP_TRUE@
AMTAR = @AMTAR@
AR = @AR@
ARTS_CFLAGS = @ARTS_CFLAGS@
ARTS_CONFIG = @ARTS_CONFIG@
ARTS_LIBS = @ARTS_LIBS@
AS = @AS@
ASFLAGS = 
AUTOCONF = @AUTOCONF@
AUTOHEADER = @AUTOHEADER@
AUTOMAKE = @AUTOMAKE@
AWK = @AWK@
BUILD_ASF_FALSE = @BUILD_ASF_FALSE@
BUILD_ASF_TRUE = @BUILD_ASF_TRUE@
BUILD_DHA_KMOD_FALSE = @BUILD_DHA_KMOD_FALSE@
BUILD_DHA_KMOD_TRUE = @BUILD_DHA_KMOD_TRUE@
BUILD_FAAD_FALSE = @BUILD_FAAD_FALSE@
BUILD_FAAD_TRUE = @BUILD_FAAD_TRUE@
BUILD_INCLUDED_LIBINTL = @BUILD_INCLUDED_LIBINTL@
CACA_CFLAGS = @CACA_CFLAGS@
CACA_CONFIG = @CACA_CONFIG@
CACA_LIBS = @CACA_LIBS@
CATALOGS = @CATALOGS@
CATOBJEXT = @CATOBJEXT@
CC = @CC@
CCAS = @CCAS@
CCASCOMPILE = @CCASCOMPILE@
CCASFLAGS = @CCASFLAGS@
CCDEPMODE = @CCDEPMODE@
# CFLAGS is here to filter out -funroll-loops because it causes bad
# behavior of libavcodec
CFLAGS = `echo @CFLAGS@ | sed -e 's/-funroll-loops//g'`
CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CXX = @CXX@
CXXCPP = @CXXCPP@
CXXDEPMODE = @CXXDEPMODE@
CXXFLAGS = @CXXFLAGS@
CYGPATH_W = @CYGPATH_W@
DATADIRNAME = @DATADIRNAME@
DEBUG_CFLAGS = @DEBUG_CFLAGS@
DEFS = @DEFS@
DEPCOMP = @DEPCOMP@
DEPDIR = @DEPDIR@
DEPMOD = @DEPMOD@
DIRECTFB_CFLAGS = @DIRECTFB_CFLAGS@
DIRECTFB_LIBS = @DIRECTFB_LIBS@
DIRECTX_AUDIO_LIBS = @DIRECTX_AUDIO_LIBS@
DIRECTX_CPPFLAGS = @DIRECTX_CPPFLAGS@
DIRECTX_VIDEO_LIBS = @DIRECTX_VIDEO_LIBS@
DLLTOOL = @DLLTOOL@
DVDNAV_CFLAGS = @DVDNAV_CFLAGS@
DVDNAV_CONFIG = @DVDNAV_CONFIG@
DVDNAV_LIBS = @DVDNAV_LIBS@
DYNAMIC_LD_LIBS = @DYNAMIC_LD_LIBS@
ECHO = @ECHO@
ECHO_C = @ECHO_C@
ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
ENABLE_VCD_FALSE = @ENABLE_VCD_FALSE@
ENABLE_VCD_TRUE = @ENABLE_VCD_TRUE@
ESD_CFLAGS = @ESD_CFLAGS@
ESD_CONFIG = @ESD_CONFIG@
ESD_LIBS = @ESD_LIBS@
EXEEXT = @EXEEXT@
EXTRA_X_CFLAGS = @EXTRA_X_CFLAGS@
EXTRA_X_LIBS = @EXTRA_X_LIBS@
F77 = @F77@
FFLAGS = @FFLAGS@
FFMPEG_CPPFLAGS = @FFMPEG_CPPFLAGS@
FFMPEG_LIBS = @FFMPEG_LIBS@
FIG2DEV = @FIG2DEV@
FREETYPE_CONFIG = @FREETYPE_CONFIG@
FT2_CFLAGS = @FT2_CFLAGS@
FT2_LIBS = @FT2_LIBS@
GENCAT = @GENCAT@
GLIBC21 = @GLIBC21@
GLUT_LIBS = @GLUT_LIBS@
GLU_LIBS = @GLU_LIBS@
GMOFILES = @GMOFILES@
GMSGFMT = @GMSGFMT@
GNOME_VFS_CFLAGS = @GNOME_VFS_CFLAGS@
GNOME_VFS_LIBS = @GNOME_VFS_LIBS@
GOOM_LIBS = @GOOM_LIBS@
HAVE_AA_FALSE = @HAVE_AA_FALSE@
HAVE_AA_TRUE = @HAVE_AA_TRUE@
HAVE_ALSA09_FALSE = @HAVE_ALSA09_FALSE@
HAVE_ALSA09_TRUE = @HAVE_ALSA09_TRUE@
HAVE_ALSA_FALSE = @HAVE_ALSA_FALSE@
HAVE_ALSA_TRUE = @HAVE_ALSA_TRUE@
HAVE_ARMV4L_FALSE = @HAVE_ARMV4L_FALSE@
HAVE_ARMV4L_TRUE = @HAVE_ARMV4L_TRUE@
HAVE_ARTS_FALSE = @HAVE_ARTS_FALSE@
HAVE_ARTS_TRUE = @HAVE_ARTS_TRUE@
HAVE_BSDI_CDROM = @HAVE_BSDI_CDROM@
HAVE_CACA_FALSE = @HAVE_CACA_FALSE@
HAVE_CACA_TRUE = @HAVE_CACA_TRUE@
HAVE_CDROM_IOCTLS_FALSE = @HAVE_CDROM_IOCTLS_FALSE@
HAVE_CDROM_IOCTLS_TRUE = @HAVE_CDROM_IOCTLS_TRUE@
HAVE_COREAUDIO_FALSE = @HAVE_COREAUDIO_FALSE@
HAVE_COREAUDIO_TRUE = @HAVE_COREAUDIO_TRUE@
HAVE_DARWIN_CDROM = @HAVE_DARWIN_CDROM@
HAVE_DIRECTFB_FALSE = @HAVE_DIRECTFB_FALSE@
HAVE_DIRECTFB_TRUE = @HAVE_DIRECTFB_TRUE@
HAVE_DIRECTX_FALSE = @HAVE_DIRECTX_FALSE@
HAVE_DIRECTX_TRUE = @HAVE_DIRECTX_TRUE@
HAVE_DVDNAV_FALSE = @HAVE_DVDNAV_FALSE@
HAVE_DVDNAV_TRUE = @HAVE_DVDNAV_TRUE@
HAVE_DXR3_FALSE = @HAVE_DXR3_FALSE@
HAVE_DXR3_TRUE = @HAVE_DXR3_TRUE@
HAVE_ESD_FALSE = @HAVE_ESD_FALSE@
HAVE_ESD_TRUE = @HAVE_ESD_TRUE@
HAVE_FB_FALSE = @HAVE_FB_FALSE@
HAVE_FB_TRUE = @HAVE_FB_TRUE@
HAVE_FFMMX_FALSE = @HAVE_FFMMX_FALSE@
HAVE_FFMMX_TRUE = @HAVE_FFMMX_TRUE@
HAVE_FFMPEG_FALSE = @HAVE_FFMPEG_FALSE@
HAVE_FFMPEG_TRUE = @HAVE_FFMPEG_TRUE@
HAVE_FIG2DEV_FALSE = @HAVE_FIG2DEV_FALSE@
HAVE_FIG2DEV_TRUE = @HAVE_FIG2DEV_TRUE@
HAVE_FLAC_FALSE = @HAVE_FLAC_FALSE@
HAVE_FLAC_TRUE = @HAVE_FLAC_TRUE@
HAVE_FREEBSD_CDROM = @HAVE_FREEBSD_CDROM@
HAVE_GNOME_VFS_FALSE = @HAVE_GNOME_VFS_FALSE@
HAVE_GNOME_VFS_TRUE = @HAVE_GNOME_VFS_TRUE@
HAVE_IRIXAL_FALSE = @HAVE_IRIXAL_FALSE@
HAVE_IRIXAL_TRUE = @HAVE_IRIXAL_TRUE@
HAVE_LIBFAME_FALSE = @HAVE_LIBFAME_FALSE@
HAVE_LIBFAME_TRUE = @HAVE_LIBFAME_TRUE@
HAVE_LIBMNG_FALSE = @HAVE_LIBMNG_FALSE@
HAVE_LIBMNG_TRUE = @HAVE_LIBMNG_TRUE@
HAVE_LIBPNG_FALSE = @HAVE_LIBPNG_FALSE@
HAVE_LIBPNG_TRUE = @HAVE_LIBPNG_TRUE@
HAVE_LIBRTE_FALSE = @HAVE_LIBRTE_FALSE@
HAVE_LIBRTE_TRUE = @HAVE_LIBRTE_TRUE@
HAVE_LIBSMBCLIENT_FALSE = @HAVE_LIBSMBCLIENT_FALSE@
HAVE_LIBSMBCLIENT_TRUE = @HAVE_LIBSMBCLIENT_TRUE@
HAVE_LINUX_CDROM = @HAVE_LINUX_CDROM@
HAVE_LINUX_FALSE = @HAVE_LINUX_FALSE@
HAVE_LINUX_TRUE = @HAVE_LINUX_TRUE@
HAVE_MACOSX_VIDEO_FALSE = @HAVE_MACOSX_VIDEO_FALSE@
HAVE_MACOSX_VIDEO_TRUE = @HAVE_MACOSX_VIDEO_TRUE@
HAVE_MLIB_FALSE = @HAVE_MLIB_FALSE@
HAVE_MLIB_TRUE = @HAVE_MLIB_TRUE@
HAVE_OPENGL_FALSE = @HAVE_OPENGL_FALSE@
HAVE_OPENGL_TRUE = @HAVE_OPENGL_TRUE@
HAVE_OSS_FALSE = @HAVE_OSS_FALSE@
HAVE_OSS_TRUE = @HAVE_OSS_TRUE@
HAVE_POLYPAUDIO_FALSE = @HAVE_POLYPAUDIO_FALSE@
HAVE_POLYPAUDIO_TRUE = @HAVE_POLYPAUDIO_TRUE@
HAVE_SDL_FALSE = @HAVE_SDL_FALSE@
HAVE_SDL_TRUE = @HAVE_SDL_TRUE@
HAVE_SGMLTOOLS_FALSE = @HAVE_SGMLTOOLS_FALSE@
HAVE_SGMLTOOLS_TRUE = @HAVE_SGMLTOOLS_TRUE@
HAVE_SOLARIS_CDROM = @HAVE_SOLARIS_CDROM@
HAVE_SPEEX_FALSE = @HAVE_SPEEX_FALSE@
HAVE_SPEEX_TRUE = @HAVE_SPEEX_TRUE@
HAVE_STK_FALSE = @HAVE_STK_FALSE@
HAVE_STK_TRUE = @HAVE_STK_TRUE@
HAVE_SUNAUDIO_FALSE = @HAVE_SUNAUDIO_FALSE@
HAVE_SUNAUDIO_TRUE = @HAVE_SUNAUDIO_TRUE@
HAVE_SUNDGA_FALSE = @HAVE_SUNDGA_FALSE@
HAVE_SUNDGA_TRUE = @HAVE_SUNDGA_TRUE@
HAVE_SUNFB_FALSE = @HAVE_SUNFB_FALSE@
HAVE_SUNFB_TRUE = @HAVE_SUNFB_TRUE@
HAVE_SYNCFB_FALSE = @HAVE_SYNCFB_FALSE@
HAVE_SYNCFB_TRUE = @HAVE_SYNCFB_TRUE@
HAVE_THEORA_FALSE = @HAVE_THEORA_FALSE@
HAVE_THEORA_TRUE = @HAVE_THEORA_TRUE@
HAVE_V4L_FALSE = @HAVE_V4L_FALSE@
HAVE_V4L_TRUE = @HAVE_V4L_TRUE@
HAVE_VCDNAV_FALSE = @HAVE_VCDNAV_FALSE@
HAVE_VCDNAV_TRUE = @HAVE_VCDNAV_TRUE@
HAVE_VIDIX_FALSE = @HAVE_VIDIX_FALSE@
HAVE_VIDIX_TRUE = @HAVE_VIDIX_TRUE@
HAVE_VLDXVMC_FALSE = @HAVE_VLDXVMC_FALSE@
HAVE_VLDXVMC_TRUE = @HAVE_VLDXVMC_TRUE@
HAVE_VORBIS_FALSE = @HAVE_VORBIS_FALSE@
HAVE_VORBIS_TRUE = @HAVE_VORBIS_TRUE@
HAVE_W32DLL_FALSE = @HAVE_W32DLL_FALSE@
HAVE_W32DLL_TRUE = @HAVE_W32DLL_TRUE@
HAVE_WIN32_CDROM = @HAVE_WIN32_CDROM@
HAVE_X11_FALSE = @HAVE_X11_FALSE@
HAVE_X11_TRUE = @HAVE_X11_TRUE@
HAVE_XVMC_FALSE = @HAVE_XVMC_FALSE@
HAVE_XVMC_TRUE = @HAVE_XVMC_TRUE@
HAVE_XV_FALSE = @HAVE_XV_FALSE@
HAVE_XV_TRUE = @HAVE_XV_TRUE@
HAVE_XXMC_FALSE = @HAVE_XXMC_FALSE@
HAVE_XXMC_TRUE = @HAVE_XXMC_TRUE@
HAVE_ZLIB_FALSE = @HAVE_ZLIB_FALSE@
HAVE_ZLIB_TRUE = @HAVE_ZLIB_TRUE@
HOST_OS_DARWIN_FALSE = @HOST_OS_DARWIN_FALSE@
HOST_OS_DARWIN_TRUE = @HOST_OS_DARWIN_TRUE@
INCLUDED_INTL_FALSE = @INCLUDED_INTL_FALSE@
INCLUDED_INTL_TRUE = @INCLUDED_INTL_TRUE@
INCLUDES = @INCLUDES@
INSTALL_DATA = @INSTALL_DATA@
INSTALL_M4_FALSE = @INSTALL_M4_FALSE@
INSTALL_M4_TRUE = @INSTALL_M4_TRUE@
INSTALL_PROGRAM = @INSTALL_PROGRAM@
INSTALL_SCRIPT = @INSTALL_SCRIPT@
INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
INSTOBJEXT = @INSTOBJEXT@
INTLBISON = @INTLBISON@
INTLDIR = @INTLDIR@
INTLLIBS = @INTLLIBS@
INTLOBJS = @INTLOBJS@
INTL_LIBTOOL_SUFFIX_PREFIX = @INTL_LIBTOOL_SUFFIX_PREFIX@
IRIXAL_CFLAGS = @IRIXAL_CFLAGS@
IRIXAL_LIBS = @IRIXAL_LIBS@
IRIXAL_STATIC_LIB = @IRIXAL_STATIC_LIB@
KSTAT_LIBS = @KSTAT_LIBS@
LDFLAGS = @LDFLAGS@
LIBCDIO_CFLAGS = @LIBCDIO_CFLAGS@
LIBCDIO_LIBS = @LIBCDIO_LIBS@
LIBFAME_CFLAGS = @LIBFAME_CFLAGS@
LIBFAME_CONFIG = @LIBFAME_CONFIG@
LIBFAME_LIBS = @LIBFAME_LIBS@
LIBFFMPEG_CFLAGS = @LIBFFMPEG_CFLAGS@
LIBFLAC_CFLAGS = @LIBFLAC_CFLAGS@
LIBFLAC_LIBS = @LIBFLAC_LIBS@
LIBICONV = @LIBICONV@
LIBISO9660_LIBS = @LIBISO9660_LIBS@
LIBMODPLUG_CFLAGS = @LIBMODPLUG_CFLAGS@
LIBMODPLUG_LIBS = @LIBMODPLUG_LIBS@
LIBMPEG2_CFLAGS = @LIBMPEG2_CFLAGS@
LIBNAME = @LIBNAME@
LIBOBJS = @LIBOBJS@
LIBPNG_CONFIG = @LIBPNG_CONFIG@
LIBS = @LIBS@
LIBSMBCLIENT_LIBS = @LIBSMBCLIENT_LIBS@
LIBSTK_CFLAGS = @LIBSTK_CFLAGS@
LIBSTK_LIBS = @LIBSTK_LIBS@
LIBTOOL = $(SHELL) $(top_builddir)/libtool-nofpic
LIBTOOL_DEPS = @LIBTOOL_DEPS@
LIBVCDINFO_LIBS = @LIBVCDINFO_LIBS@
LIBVCD_CFLAGS = @LIBVCD_CFLAGS@
LIBVCD_LIBS = @LIBVCD_LIBS@
LIBVCD_SYSDEP = @LIBVCD_SYSDEP@
LINUX_CDROM_TIMEOUT = @LINUX_CDROM_TIMEOUT@
LINUX_INCLUDE = @LINUX_INCLUDE@
LN_S = @LN_S@
LTLIBOBJS = @LTLIBOBJS@
LT_AGE = @LT_AGE@
LT_CURRENT = @LT_CURRENT@
LT_REVISION = @LT_REVISION@
MAKEINFO = @MAKEINFO@
MKINSTALLDIRS = @MKINSTALLDIRS@
MKNOD = @MKNOD@
MLIB_CFLAGS = @MLIB_CFLAGS@
MLIB_LIBS = @MLIB_LIBS@
MNG_LIBS = @MNG_LIBS@
MSGFMT = @MSGFMT@
NET_LIBS = @NET_LIBS@
OBJC = @OBJC@
OBJCDEPMODE = @OBJCDEPMODE@
OBJCFLAGS = @OBJCFLAGS@
OBJDUMP = @OBJDUMP@
OBJEXT = @OBJEXT@
OGG_CFLAGS = @OGG_CFLAGS@
OGG_LIBS = @OGG_LIBS@
OPENGL_CFLAGS = @OPENGL_CFLAGS@
OPENGL_LIBS = @OPENGL_LIBS@
PACKAGE = @PACKAGE@
PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
PACKAGE_NAME = @PACKAGE_NAME@
PACKAGE_STRING = @PACKAGE_STRING@
PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_VERSION = @PACKAGE_VERSION@
PASS1_CFLAGS = @PASS1_CFLAGS@
PASS2_CFLAGS = @PASS2_CFLAGS@
PATH_SEPARATOR = @PATH_SEPARATOR@
PKG_CONFIG = @PKG_CONFIG@
PNG_CFLAGS = @PNG_CFLAGS@
PNG_LIBS = @PNG_LIBS@
POFILES = @POFILES@
POLYPAUDIO_CFLAGS = @POLYPAUDIO_CFLAGS@
POLYPAUDIO_LIBS = @POLYPAUDIO_LIBS@
POSUB = @POSUB@
PPC_ARCH_FALSE = @PPC_ARCH_FALSE@
PPC_ARCH_TRUE = @PPC_ARCH_TRUE@
RANLIB = @RANLIB@
RT_LIBS = @RT_LIBS@
SDL_CFLAGS = @SDL_CFLAGS@
SDL_CONFIG = @SDL_CONFIG@
SDL_LIBS = @SDL_LIBS@
SET_MAKE = @SET_MAKE@
SGMLTOOLS = @SGMLTOOLS@
SHELL = @SHELL@
SPEC_VERSION = @SPEC_VERSION@
SPEEX_CFLAGS = @SPEEX_CFLAGS@
SPEEX_LIBS = @SPEEX_LIBS@
STATIC = @STATIC@
STRIP = @STRIP@
SUNDGA_CFLAGS = @SUNDGA_CFLAGS@
SUNDGA_LIBS = @SUNDGA_LIBS@
TAR_NAME = @TAR_NAME@
THEORAENC_LIBS = @THEORAENC_LIBS@
THEORAFILE_LIBS = @THEORAFILE_LIBS@
THEORA_CFLAGS = @THEORA_CFLAGS@
THEORA_LIBS = @THEORA_LIBS@
THREAD_CFLAGS = @THREAD_CFLAGS@
THREAD_CFLAGS_CONFIG = @THREAD_CFLAGS_CONFIG@
THREAD_INCLUDES = @THREAD_INCLUDES@
THREAD_LIBS = @THREAD_LIBS@
THREAD_LIBS_CONFIG = @THREAD_LIBS_CONFIG@
USE_INCLUDED_LIBINTL = @USE_INCLUDED_LIBINTL@
USE_NLS = @USE_NLS@
VERSION = @VERSION@
VORBISENC_LIBS = @VORBISENC_LIBS@
VORBISFILE_LIBS = @VORBISFILE_LIBS@
VORBIS_CFLAGS = @VORBIS_CFLAGS@
VORBIS_LIBS = @VORBIS_LIBS@
W32DLL_DEP = @W32DLL_DEP@
W32_NO_OPTIMIZE = @W32_NO_OPTIMIZE@
WIN32_CPPFLAGS = @WIN32_CPPFLAGS@
WIN32_FALSE = @WIN32_FALSE@
WIN32_TRUE = @WIN32_TRUE@
XGETTEXT = @XGETTEXT@
XINE_ACFLAGS = @XINE_ACFLAGS@
XINE_BIN_AGE = @XINE_BIN_AGE@
XINE_BUILD_CC = @XINE_BUILD_CC@
XINE_BUILD_DATE = @XINE_BUILD_DATE@
XINE_BUILD_OS = @XINE_BUILD_OS@
XINE_CONFIG_PREFIX = @XINE_CONFIG_PREFIX@
XINE_DATADIR = @XINE_DATADIR@
XINE_FONTDIR = @XINE_FONTDIR@
XINE_FONTPATH = @XINE_FONTPATH@
XINE_IFACE_AGE = @XINE_IFACE_AGE@
XINE_LOCALEDIR = @XINE_LOCALEDIR@
XINE_LOCALEPATH = @XINE_LOCALEPATH@
XINE_MAJOR = @XINE_MAJOR@
XINE_MINOR = @XINE_MINOR@
XINE_PLUGINDIR = @XINE_PLUGINDIR@
XINE_PLUGINPATH = @XINE_PLUGINPATH@
XINE_PLUGIN_MIN_SYMS = @XINE_PLUGIN_MIN_SYMS@
XINE_SCRIPTPATH = @XINE_SCRIPTPATH@
XINE_SUB = @XINE_SUB@
XVMC_LIB = @XVMC_LIB@
XV_LIB = @XV_LIB@
XXMC_LIB = @XXMC_LIB@
X_CFLAGS = @X_CFLAGS@
X_EXTRA_LIBS = @X_EXTRA_LIBS@
X_LIBS = @X_LIBS@
X_PRE_LIBS = @X_PRE_LIBS@
ZLIB_INCLUDES = @ZLIB_INCLUDES@
ZLIB_LIBS = @ZLIB_LIBS@
ZLIB_LIBS_CONFIG = @ZLIB_LIBS_CONFIG@
ac_ct_AR = @ac_ct_AR@
ac_ct_AS = @ac_ct_AS@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_DLLTOOL = @ac_ct_DLLTOOL@
ac_ct_F77 = @ac_ct_F77@
ac_ct_OBJDUMP = @ac_ct_OBJDUMP@
ac_ct_RANLIB = @ac_ct_RANLIB@
ac_ct_STRIP = @ac_ct_STRIP@
am__fastdepCC_FALSE = @am__fastdepCC_FALSE@
am__fastdepCC_TRUE = @am__fastdepCC_TRUE@
am__fastdepCXX_FALSE = @am__fastdepCXX_FALSE@
am__fastdepCXX_TRUE = @am__fastdepCXX_TRUE@
am__fastdepOBJC_FALSE = @am__fastdepOBJC_FALSE@
am__fastdepOBJC_TRUE = @am__fastdepOBJC_TRUE@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
am__tar = @am__tar@
am__untar = @am__untar@
bindir = @bindir@
build = @build@
build_alias = @build_alias@
build_cpu = @build_cpu@
build_os = @build_os@
build_vendor = @build_vendor@
datadir = @datadir@
exec_prefix = @exec_prefix@
host = @host@
host_alias = @host_alias@
host_cpu = @host_cpu@
host_os = @host_os@
host_vendor = @host_vendor@
includedir = @includedir@
infodir = @infodir@
install_sh = @install_sh@
libdir = @libdir@
libexecdir = @libexecdir@
localstatedir = @localstatedir@
mandir = @mandir@
mkdir_p = @mkdir_p@
oldincludedir = @oldincludedir@
prefix = @prefix@
program_transform_name = @program_transform_name@
sbindir = @sbindir@
sharedstatedir = @sharedstatedir@
sysconfdir = @sysconfdir@
target = @target@
target_alias = @target_alias@
target_cpu = @target_cpu@
target_os = @target_os@
target_vendor = @target_vendor@
w32_path = @w32_path@
XINE_LIB = $(top_builddir)/src/xine-engine/libxine.la
AM_CFLAGS = $(LIBFFMPEG_CFLAGS)
noinst_LTLIBRARIES = libavcodec_ppc.la
libavcodec_ppc_src = dsputil_altivec.c \
		      dsputil_ppc.c \
		      fdct_altivec.c \
		      fft_altivec.c \
		      idct_altivec.c \
		      gmc_altivec.c \
		      mpegvideo_altivec.c \
		      mpegvideo_ppc.c

libavcodec_ppc_dummy = libavcodec_ppc_dummy.c
EXTRA_DIST = $(libavcodec_ppc_src) $(libavcodec_ppc_dummy)

#if PPC_ARCH
#ppc_modules = $(libavcodec_ppc_src)
#endif
libavcodec_ppc_la_SOURCES = $(ppc_modules) $(libavcodec_ppc_dummy)
noinst_HEADERS = dsputil_altivec.h dsputil_ppc.h gcc_fixes.h
all: all-am

.SUFFIXES:
.SUFFIXES: .c .lo .o .obj
$(srcdir)/Makefile.in:  $(srcdir)/Makefile.am $(top_srcdir)/misc/Makefile.common $(am__configure_deps)
	@for dep in $?; do \
	  case '$(am__configure_deps)' in \
	    *$$dep*) \
	      cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh \
		&& exit 0; \
	      exit 1;; \
	  esac; \
	done; \
	echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu  src/libffmpeg/libavcodec/ppc/Makefile'; \
	cd $(top_srcdir) && \
	  $(AUTOMAKE) --gnu  src/libffmpeg/libavcodec/ppc/Makefile
.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
	@case '$?' in \
	  *config.status*) \
	    cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
	  *) \
	    echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
	    cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
	esac;

$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh

$(top_srcdir)/configure:  $(am__configure_deps)
	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4):  $(am__aclocal_m4_deps)
	cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh

clean-noinstLTLIBRARIES:
	-test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
	@list='$(noinst_LTLIBRARIES)'; for p in $$list; do \
	  dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
	  test "$$dir" != "$$p" || dir=.; \
	  echo "rm -f \"$${dir}/so_locations\""; \
	  rm -f "$${dir}/so_locations"; \
	done
libavcodec_ppc.la: $(libavcodec_ppc_la_OBJECTS) $(libavcodec_ppc_la_DEPENDENCIES) 
	$(LINK)  $(libavcodec_ppc_la_LDFLAGS) $(libavcodec_ppc_la_OBJECTS) $(libavcodec_ppc_la_LIBADD) $(LIBS)

mostlyclean-compile:
	-rm -f *.$(OBJEXT)

distclean-compile:
	-rm -f *.tab.c

@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libavcodec_ppc_dummy.Plo@am__quote@

.c.o:
@am__fastdepCC_TRUE@	if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \
@am__fastdepCC_TRUE@	then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@	$(COMPILE) -c $<

.c.obj:
@am__fastdepCC_TRUE@	if $(COMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ `$(CYGPATH_W) '$<'`; \
@am__fastdepCC_TRUE@	then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Po"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@	$(COMPILE) -c `$(CYGPATH_W) '$<'`

.c.lo:
@am__fastdepCC_TRUE@	if $(LTCOMPILE) -MT $@ -MD -MP -MF "$(DEPDIR)/$*.Tpo" -c -o $@ $<; \
@am__fastdepCC_TRUE@	then mv -f "$(DEPDIR)/$*.Tpo" "$(DEPDIR)/$*.Plo"; else rm -f "$(DEPDIR)/$*.Tpo"; exit 1; fi
@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@	$(LTCOMPILE) -c -o $@ $<

mostlyclean-libtool:
	-rm -f *.lo

clean-libtool:
	-rm -rf .libs _libs

distclean-libtool:
	-rm -f libtool
uninstall-info-am:

ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
	list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
	unique=`for i in $$list; do \
	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
	  done | \
	  $(AWK) '    { files[$$0] = 1; } \
	       END { for (i in files) print i; }'`; \
	mkid -fID $$unique
tags: TAGS

TAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
		$(TAGS_FILES) $(LISP)
	tags=; \
	here=`pwd`; \
	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
	unique=`for i in $$list; do \
	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
	  done | \
	  $(AWK) '    { files[$$0] = 1; } \
	       END { for (i in files) print i; }'`; \
	if test -z "$(ETAGS_ARGS)$$tags$$unique"; then :; else \
	  test -n "$$unique" || unique=$$empty_fix; \
	  $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
	    $$tags $$unique; \
	fi
ctags: CTAGS
CTAGS:  $(HEADERS) $(SOURCES)  $(TAGS_DEPENDENCIES) \
		$(TAGS_FILES) $(LISP)
	tags=; \
	here=`pwd`; \
	list='$(SOURCES) $(HEADERS)  $(LISP) $(TAGS_FILES)'; \
	unique=`for i in $$list; do \
	    if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
	  done | \
	  $(AWK) '    { files[$$0] = 1; } \
	       END { for (i in files) print i; }'`; \
	test -z "$(CTAGS_ARGS)$$tags$$unique" \
	  || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
	     $$tags $$unique

GTAGS:
	here=`$(am__cd) $(top_builddir) && pwd` \
	  && cd $(top_srcdir) \
	  && gtags -i $(GTAGS_ARGS) $$here

distclean-tags:
	-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags

distdir: $(DISTFILES)
	$(mkdir_p) $(distdir)/../../../../misc
	@srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
	topsrcdirstrip=`echo "$(top_srcdir)" | sed 's|.|.|g'`; \
	list='$(DISTFILES)'; for file in $$list; do \
	  case $$file in \
	    $(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
	    $(top_srcdir)/*) file=`echo "$$file" | sed "s|^$$topsrcdirstrip/|$(top_builddir)/|"`;; \
	  esac; \
	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
	  dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
	  if test "$$dir" != "$$file" && test "$$dir" != "."; then \
	    dir="/$$dir"; \
	    $(mkdir_p) "$(distdir)$$dir"; \
	  else \
	    dir=''; \
	  fi; \
	  if test -d $$d/$$file; then \
	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
	    fi; \
	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
	  else \
	    test -f $(distdir)/$$file \
	    || cp -p $$d/$$file $(distdir)/$$file \
	    || exit 1; \
	  fi; \
	done
check-am: all-am
check: check-am
all-am: Makefile $(LTLIBRARIES) $(HEADERS)
installdirs:
install: install-am
install-exec: install-exec-am
install-data: install-data-am
uninstall: uninstall-am

install-am: all-am
	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am

installcheck: installcheck-am
install-strip:
	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
	  install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
	  `test -z '$(STRIP)' || \
	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install

clean-generic:

distclean-generic:
	-test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
clean: clean-am

clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
	mostlyclean-am

distclean: distclean-am
	-rm -rf ./$(DEPDIR)
	-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
	distclean-libtool distclean-tags

dvi: dvi-am

dvi-am:

html: html-am

info: info-am

info-am:

install-data-am:
	@$(NORMAL_INSTALL)
	$(MAKE) $(AM_MAKEFLAGS) install-data-hook

install-exec-am:

install-info: install-info-am

install-man:

installcheck-am:

maintainer-clean: maintainer-clean-am
	-rm -rf ./$(DEPDIR)
	-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic

mostlyclean: mostlyclean-am

mostlyclean-am: mostlyclean-compile mostlyclean-generic \
	mostlyclean-libtool

pdf: pdf-am

pdf-am:

ps: ps-am

ps-am:

uninstall-am: uninstall-info-am
	@$(NORMAL_INSTALL)
	$(MAKE) $(AM_MAKEFLAGS) uninstall-hook

.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
	clean-libtool clean-noinstLTLIBRARIES ctags distclean \
	distclean-compile distclean-generic distclean-libtool \
	distclean-tags distdir dvi dvi-am html html-am info info-am \
	install install-am install-data install-data-am \
	install-data-hook install-exec install-exec-am install-info \
	install-info-am install-man install-strip installcheck \
	installcheck-am installdirs maintainer-clean \
	maintainer-clean-generic mostlyclean mostlyclean-compile \
	mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
	tags uninstall uninstall-am uninstall-hook uninstall-info-am

$(XINE_LIB):
	@cd $(top_srcdir)/src/xine-engine && $(MAKE)

install-data-hook:
	@if test $$MAKELEVEL -le 4 ; then \
	  if test -x "$(top_srcdir)/post-install.sh" ; then \
	    $(top_srcdir)/post-install.sh ; \
	  fi \
	fi

pass1:
	@$(MAKE) MULTIPASS_CFLAGS="$(PASS1_CFLAGS)"

pass2:
	@$(MAKE) MULTIPASS_CFLAGS="$(PASS2_CFLAGS)"

debug:
	@$(MAKE) CFLAGS="$(DEBUG_CFLAGS)"

install-debug: debug
	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
	@list='$(SUBDIRS)'; for subdir in $$list; do \
	  (cd $$subdir && $(MAKE) $@) || exit; \
	done;
	$(MAKE) $(AM_MAKEFLAGS) install-data-hook

install-includeHEADERS: $(include_HEADERS)
	@$(NORMAL_INSTALL)
	$(install_sh) -d $(DESTDIR)$(includedir)/xine
	@list='$(include_HEADERS)'; for p in $$list; do \
	  if test -f "$$p"; then d= ; else d="$(srcdir)/"; fi; \
	  echo " $(INSTALL_DATA) $$d$$p $(DESTDIR)$(includedir)/xine/$$p"; \
	  $(INSTALL_DATA) $$d$$p $(DESTDIR)$(includedir)/xine/$$p; \
	done

uninstall-includeHEADERS:
	@$(NORMAL_UNINSTALL)
	list='$(include_HEADERS)'; for p in $$list; do \
	  rm -f $(DESTDIR)$(includedir)/xine/$$p; \
	done

uninstall-hook:
	@if echo '$(libdir)' | egrep ^'$(XINE_PLUGINDIR)' >/dev/null; then \
	  list='$(lib_LTLIBRARIES)'; for p in $$list; do \
	    p="`echo $$p | sed -e 's/\.la$$/\.so/g;s|^.*/||'`"; \
	    echo " rm -f $(DESTDIR)$(libdir)/$$p"; \
	    rm -f $(DESTDIR)$(libdir)/$$p; \
	  done; \
	fi

mostlyclean-generic:
	-rm -f *~ \#* .*~ .\#*

maintainer-clean-generic:
	-@echo "This command is intended for maintainers to use;"
	-@echo "it deletes files that may require special tools to rebuild."
	-rm -f Makefile.in
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:

--- NEW FILE: dsputil_ppc.h ---
/*
 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#ifndef _DSPUTIL_PPC_
#define _DSPUTIL_PPC_

#ifdef CONFIG_DARWIN
/* The Apple assembler shipped w/ gcc-3.3 knows about DCBZL, previous assemblers don't
   We assume here that the Darwin GCC is from Apple.... */
#if (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
#define NO_DCBZL
#endif
#else /* CONFIG_DARWIN */
/* I don't think any non-Apple assembler knows about DCBZL */
#define NO_DCBZL
#endif /* CONFIG_DARWIN */

#ifdef POWERPC_PERFORMANCE_REPORT
void powerpc_display_perf_report(void);
/* the 604* have 2, the G3* have 4, the G4s have 6 */
#define POWERPC_NUM_PMC_ENABLED 4
/* if you add to the enum below, also add to the perfname array
   in dsputil_ppc.c */
enum powerpc_perf_index {
  altivec_fft_num = 0,
  altivec_gmc1_num,
  altivec_dct_unquantize_h263_num,
  altivec_fdct,
  altivec_idct_add_num,
  altivec_idct_put_num,
  altivec_put_pixels16_num,
  altivec_avg_pixels16_num,
  altivec_avg_pixels8_num,
  altivec_put_pixels8_xy2_num,
  altivec_put_no_rnd_pixels8_xy2_num,
  altivec_put_pixels16_xy2_num,
  altivec_put_no_rnd_pixels16_xy2_num,
  altivec_hadamard8_diff8x8_num,
  altivec_hadamard8_diff16_num,
  powerpc_clear_blocks_dcbz32,
  powerpc_clear_blocks_dcbz128,
  powerpc_perf_total
};
enum powerpc_data_index {
  powerpc_data_min = 0,
  powerpc_data_max,
  powerpc_data_sum,
  powerpc_data_num,
  powerpc_data_total
};
extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];

#ifndef POWERPC_MODE_64BITS
#define POWERP_PMC_DATATYPE unsigned long
#define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 937" : "=r" (a))
#define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 938" : "=r" (a))
#if (POWERPC_NUM_PMC_ENABLED > 2)
#define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 941" : "=r" (a))
#define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 942" : "=r" (a))
#else
#define POWERPC_GET_PMC3(a) do {} while (0)
#define POWERPC_GET_PMC4(a) do {} while (0)
#endif
#if (POWERPC_NUM_PMC_ENABLED > 4)
#define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 929" : "=r" (a))
#define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 930" : "=r" (a))
#else
#define POWERPC_GET_PMC5(a) do {} while (0)
#define POWERPC_GET_PMC6(a) do {} while (0)
#endif
#else /* POWERPC_MODE_64BITS */
#define POWERP_PMC_DATATYPE unsigned long long
#define POWERPC_GET_PMC1(a) asm volatile("mfspr %0, 771" : "=r" (a))
#define POWERPC_GET_PMC2(a) asm volatile("mfspr %0, 772" : "=r" (a))
#if (POWERPC_NUM_PMC_ENABLED > 2)
#define POWERPC_GET_PMC3(a) asm volatile("mfspr %0, 773" : "=r" (a))
#define POWERPC_GET_PMC4(a) asm volatile("mfspr %0, 774" : "=r" (a))
#else
#define POWERPC_GET_PMC3(a) do {} while (0)
#define POWERPC_GET_PMC4(a) do {} while (0)
#endif
#if (POWERPC_NUM_PMC_ENABLED > 4)
#define POWERPC_GET_PMC5(a) asm volatile("mfspr %0, 775" : "=r" (a))
#define POWERPC_GET_PMC6(a) asm volatile("mfspr %0, 776" : "=r" (a))
#else
#define POWERPC_GET_PMC5(a) do {} while (0)
#define POWERPC_GET_PMC6(a) do {} while (0)
#endif
#endif /* POWERPC_MODE_64BITS */
#define POWERPC_PERF_DECLARE(a, cond)				\
  POWERP_PMC_DATATYPE						\
    pmc_start[POWERPC_NUM_PMC_ENABLED],				\
    pmc_stop[POWERPC_NUM_PMC_ENABLED],				\
    pmc_loop_index;
#define POWERPC_PERF_START_COUNT(a, cond) do { \
  POWERPC_GET_PMC6(pmc_start[5]); \
  POWERPC_GET_PMC5(pmc_start[4]); \
  POWERPC_GET_PMC4(pmc_start[3]); \
  POWERPC_GET_PMC3(pmc_start[2]); \
  POWERPC_GET_PMC2(pmc_start[1]); \
  POWERPC_GET_PMC1(pmc_start[0]); \
  } while (0)
#define POWERPC_PERF_STOP_COUNT(a, cond) do { \
  POWERPC_GET_PMC1(pmc_stop[0]); \
  POWERPC_GET_PMC2(pmc_stop[1]); \
  POWERPC_GET_PMC3(pmc_stop[2]); \
  POWERPC_GET_PMC4(pmc_stop[3]); \
  POWERPC_GET_PMC5(pmc_stop[4]); \
  POWERPC_GET_PMC6(pmc_stop[5]); \
  if (cond)                       \
  {                               \
    for(pmc_loop_index = 0;       \
        pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \
        pmc_loop_index++)         \
    {                             \
      if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index])  \
	{							  \
        POWERP_PMC_DATATYPE diff =				  \
          pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index];   \
        if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
          perfdata[pmc_loop_index][a][powerpc_data_min] = diff;   \
        if (diff > perfdata[pmc_loop_index][a][powerpc_data_max]) \
          perfdata[pmc_loop_index][a][powerpc_data_max] = diff;   \
        perfdata[pmc_loop_index][a][powerpc_data_sum] += diff;    \
        perfdata[pmc_loop_index][a][powerpc_data_num] ++;         \
      }                           \
    }                             \
  }                               \
} while (0)
#else /* POWERPC_PERFORMANCE_REPORT */
// those are needed to avoid empty statements.
#define POWERPC_PERF_DECLARE(a, cond)        int altivec_placeholder __attribute__ ((unused))
#define POWERPC_PERF_START_COUNT(a, cond)    do {} while (0)
#define POWERPC_PERF_STOP_COUNT(a, cond)     do {} while (0)
#endif /* POWERPC_PERFORMANCE_REPORT */

#endif /*  _DSPUTIL_PPC_ */

--- NEW FILE: dsputil_altivec.h ---
/*
 * Copyright (c) 2002 Brian Foley
 * Copyright (c) 2002 Dieter Shirley
 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#ifndef _DSPUTIL_ALTIVEC_
#define _DSPUTIL_ALTIVEC_

#include "dsputil_ppc.h"

#ifdef HAVE_ALTIVEC

extern int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
extern int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
extern int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
extern int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
extern int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
extern int pix_norm1_altivec(uint8_t *pix, int line_size);
extern int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
extern int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
extern int pix_sum_altivec(uint8_t * pix, int line_size);
extern void diff_pixels_altivec(DCTELEM* block, const uint8_t* s1, const uint8_t* s2, int stride);
extern void get_pixels_altivec(DCTELEM* block, const uint8_t * pixels, int line_size);

extern void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w);
extern void put_pixels_clamped_altivec(const DCTELEM *block, uint8_t *restrict pixels, int line_size);
extern void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
extern void avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
extern void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h);
extern void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
extern void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
extern void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h);
extern void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h);
extern int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h);
extern int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h);

extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder);

extern int has_altivec(void);

// used to build registers permutation vectors (vcprm)
// the 's' are for words in the _s_econd vector
#define WORD_0 0x00,0x01,0x02,0x03
#define WORD_1 0x04,0x05,0x06,0x07
#define WORD_2 0x08,0x09,0x0a,0x0b
#define WORD_3 0x0c,0x0d,0x0e,0x0f
#define WORD_s0 0x10,0x11,0x12,0x13
#define WORD_s1 0x14,0x15,0x16,0x17
#define WORD_s2 0x18,0x19,0x1a,0x1b
#define WORD_s3 0x1c,0x1d,0x1e,0x1f

#ifdef CONFIG_DARWIN
#define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d)
#else
#define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d}
#endif

// vcprmle is used to keep the same index as in the SSE version.
// it's the same as vcprm, with the index inversed
// ('le' is Little Endian)
#define vcprmle(a,b,c,d) vcprm(d,c,b,a)

// used to build inverse/identity vectors (vcii)
// n is _n_egative, p is _p_ositive
#define FLOAT_n -1.
#define FLOAT_p 1.

#ifdef CONFIG_DARWIN
#define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d)
#else
#define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d}
#endif

#else /* HAVE_ALTIVEC */
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
#error "I can't use ALTIVEC_USE_REFERENCE_C_CODE if I don't use HAVE_ALTIVEC"
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
#endif /* HAVE_ALTIVEC */

#endif /* _DSPUTIL_ALTIVEC_ */

--- NEW FILE: fft_altivec.c ---
/*
 * FFT/IFFT transforms
 * AltiVec-enabled
 * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org>
 * Based on code Copyright (c) 2002 Fabrice Bellard.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
#include "../dsputil.h"

#include "gcc_fixes.h"

#include "dsputil_altivec.h"

/*
  those three macros are from libavcodec/fft.c
  and are required for the reference C code
*/
/* butter fly op */
#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
{\
  FFTSample ax, ay, bx, by;\
  bx=pre1;\
  by=pim1;\
  ax=qre1;\
  ay=qim1;\
  pre = (bx + ax);\
  pim = (by + ay);\
  qre = (bx - ax);\
  qim = (by - ay);\
}
#define MUL16(a,b) ((a) * (b))
#define CMUL(pre, pim, are, aim, bre, bim) \
{\
   pre = (MUL16(are, bre) - MUL16(aim, bim));\
   pim = (MUL16(are, bim) + MUL16(bre, aim));\
}

/**
 * Do a complex FFT with the parameters defined in ff_fft_init(). The
 * input data must be permuted before with s->revtab table. No
 * 1.0/sqrt(n) normalization is done.
 * AltiVec-enabled
 * This code assumes that the 'z' pointer is 16 bytes-aligned
 * It also assumes all FFTComplex are 8 bytes-aligned pair of float
 * The code is exactly the same as the SSE version, except
 * that successive MUL + ADD/SUB have been merged into
 * fused multiply-add ('vec_madd' in altivec)
 */
void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z)
{
POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6);
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
    int ln = s->nbits;
    int	j, np, np2;
    int	nblocks, nloops;
    register FFTComplex *p, *q;
    FFTComplex *exptab = s->exptab;
    int l;
    FFTSample tmp_re, tmp_im;

POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);

    np = 1 << ln;

    /* pass 0 */

    p=&z[0];
    j=(np >> 1);
    do {
        BF(p[0].re, p[0].im, p[1].re, p[1].im, 
           p[0].re, p[0].im, p[1].re, p[1].im);
        p+=2;
    } while (--j != 0);

    /* pass 1 */

    p=&z[0];
    j=np >> 2;
    if (s->inverse) {
        do {
            BF(p[0].re, p[0].im, p[2].re, p[2].im, 
               p[0].re, p[0].im, p[2].re, p[2].im);
            BF(p[1].re, p[1].im, p[3].re, p[3].im, 
               p[1].re, p[1].im, -p[3].im, p[3].re);
            p+=4;
        } while (--j != 0);
    } else {
        do {
            BF(p[0].re, p[0].im, p[2].re, p[2].im, 
               p[0].re, p[0].im, p[2].re, p[2].im);
            BF(p[1].re, p[1].im, p[3].re, p[3].im, 
               p[1].re, p[1].im, p[3].im, -p[3].re);
            p+=4;
        } while (--j != 0);
    }
    /* pass 2 .. ln-1 */

    nblocks = np >> 3;
    nloops = 1 << 2;
    np2 = np >> 1;
    do {
        p = z;
        q = z + nloops;
        for (j = 0; j < nblocks; ++j) {
            BF(p->re, p->im, q->re, q->im,
               p->re, p->im, q->re, q->im);

            p++;
            q++;
            for(l = nblocks; l < np2; l += nblocks) {
                CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im);
                BF(p->re, p->im, q->re, q->im,
                   p->re, p->im, tmp_re, tmp_im);
                p++;
                q++;
            }

            p += nloops;
            q += nloops;
        }
        nblocks = nblocks >> 1;
        nloops = nloops << 1;
    } while (nblocks != 0);

POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6);

#else /* ALTIVEC_USE_REFERENCE_C_CODE */
#ifdef CONFIG_DARWIN
    register const vector float vczero = (const vector float)(0.);
#else
    register const vector float vczero = (const vector float){0.,0.,0.,0.};
#endif

    int ln = s->nbits;
    int	j, np, np2;
    int	nblocks, nloops;
    register FFTComplex *p, *q;
    FFTComplex *cptr, *cptr1;
    int k;

POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);

    np = 1 << ln;

    {
        vector float *r, a, b, a1, c1, c2;

        r = (vector float *)&z[0];

        c1 = vcii(p,p,n,n);

        if (s->inverse)
            {
                c2 = vcii(p,p,n,p);
            }
        else
            {
                c2 = vcii(p,p,p,n);
            }

        j = (np >> 2);
        do {
            a = vec_ld(0, r);
            a1 = vec_ld(sizeof(vector float), r);

            b = vec_perm(a,a,vcprmle(1,0,3,2));
            a = vec_madd(a,c1,b);
            /* do the pass 0 butterfly */

            b = vec_perm(a1,a1,vcprmle(1,0,3,2));
            b = vec_madd(a1,c1,b);
            /* do the pass 0 butterfly */

            /* multiply third by -i */
            b = vec_perm(b,b,vcprmle(2,3,1,0));

            /* do the pass 1 butterfly */
            vec_st(vec_madd(b,c2,a), 0, r);
            vec_st(vec_nmsub(b,c2,a), sizeof(vector float), r);

            r += 2;
        } while (--j != 0);
    }
    /* pass 2 .. ln-1 */

    nblocks = np >> 3;
    nloops = 1 << 2;
    np2 = np >> 1;

    cptr1 = s->exptab1;
    do {
        p = z;
        q = z + nloops;
        j = nblocks;
        do {
            cptr = cptr1;
            k = nloops >> 1;
            do {
                vector float a,b,c,t1;

                a = vec_ld(0, (float*)p);
                b = vec_ld(0, (float*)q);

                /* complex mul */
                c = vec_ld(0, (float*)cptr);
                /*  cre*re cim*re */
                t1 = vec_madd(c, vec_perm(b,b,vcprmle(2,2,0,0)),vczero);
                c = vec_ld(sizeof(vector float), (float*)cptr);
                /*  -cim*im cre*im */
                b = vec_madd(c, vec_perm(b,b,vcprmle(3,3,1,1)),t1);

                /* butterfly */
                vec_st(vec_add(a,b), 0, (float*)p);
                vec_st(vec_sub(a,b), 0, (float*)q);

                p += 2;
                q += 2;
                cptr += 4;
            } while (--k);

            p += nloops;
            q += nloops;
        } while (--j);
        cptr1 += nloops * 2;
        nblocks = nblocks >> 1;
        nloops = nloops << 1;
    } while (nblocks != 0);

POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6);

#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
}