/*****************************************************************************
 * $Id: vop-deint-greedy.c,v 1.1 2004/09/18 16:46:55 alainjj Exp $
 * Program under GNU General Public License (see ../COPYING)
 * Deinterlace routines for xine by Miguel Freitas
 * based of DScaler project sources (deinterlace.sourceforge.net)
 *****************************************************************************/

#include <stdio.h>
#include "config.h"

#ifdef ARCH_X86

#include "colorspace.h"
#include "vop.h"
#include "memcpy.h"
#include "cpu_accel.h"
extern int debug;

static int deinterlace_greedy_yuv_mmx(vop2 *v,  unsigned char *dest, unsigned char *src, 
				   int width, int height) {
  int Line;
  int	LoopCtr;
  uint64_t *L1;					// ptr to Line1, of 3
  uint64_t *L2;					// ptr to Line2, the weave line
  uint64_t *L3;					// ptr to Line3
  uint64_t *LP2;					// ptr to prev Line2
  uint64_t *Dest;
  uint8_t* pEvenLines = src;
  uint8_t* pOddLines = src+2*width;
  uint8_t* pPrevLines;

  static mmx_t ShiftMask = {ub:{0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe,0xfe}};

  int LineLength = width * 2;
  int SourcePitch = width * 4;
  int IsOdd = 1;
  long GreedyMaxComb = 15;
  static mmx_t MaxComb;
  int i;

  if (IsOdd)
    pPrevLines = v->bufs[(v->buf_cur+v->nbufs-1)%v->nbufs] + 2*width;
  else
    pPrevLines = v->bufs[(v->buf_cur+v->nbufs-1)%v->nbufs];


  for( i = 0; i < 8; i++ )
    MaxComb.ub[i] = GreedyMaxComb; // How badly do we let it weave? 0-255


  // copy first even line no matter what, and the first odd line if we're
  // processing an EVEN field. (note diff from other deint rtns.)
  fast_memcpy(dest, pEvenLines, LineLength); //DL0
  if (!IsOdd)
    fast_memcpy(dest + LineLength, pOddLines, LineLength); //DL1

  height = height / 2;
  for (Line = 0; Line < height - 1; ++Line)
  {
    LoopCtr = LineLength / 8;				// there are LineLength / 8 qwords per line

    if (IsOdd)
    {
      L1 = (uint64_t *)(pEvenLines + Line * SourcePitch);
      L2 = (uint64_t *)(pOddLines + Line * SourcePitch);
      L3 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch);
      LP2 = (uint64_t *)(pPrevLines + Line * SourcePitch); // prev Odd lines
      Dest = (uint64_t *)(dest + (Line * 2 + 1) * LineLength);
    }
    else
    {
      L1 = (uint64_t *)(pOddLines + Line * SourcePitch);
      L2 = (uint64_t *)(pEvenLines + (Line + 1) * SourcePitch);
      L3 = (uint64_t *)(pOddLines + (Line + 1) * SourcePitch);
      LP2 = (uint64_t *)(pPrevLines + (Line + 1) * SourcePitch); //prev even lines
      Dest = (uint64_t *)(dest + (Line * 2 + 2) * LineLength);
    }

    fast_memcpy((char *)Dest + LineLength, L3, LineLength);

// For ease of reading, the comments below assume that we're operating on an odd
// field (i.e., that info->IsOdd is true).  Assume the obvious for even lines..

    while( LoopCtr-- )
    {
      movq_m2r ( *L1++, mm1 );
      movq_m2r ( *L2++, mm2 );
      movq_m2r ( *L3++, mm3 );
      movq_m2r ( *LP2++, mm0 );

      // average L1 and L3 leave result in mm4
      movq_r2r ( mm1, mm4 );	// L1

      pand_m2r ( ShiftMask, mm4 );
      psrlw_i2r ( 01, mm4 );
      movq_r2r ( mm3, mm5 );  // L3
      pand_m2r ( ShiftMask, mm5 );
      psrlw_i2r ( 01, mm5 );
      paddb_r2r ( mm5, mm4 );  // the average, for computing comb

      // get abs value of possible L2 comb
      movq_r2r	( mm2, mm7 );				// L2
      psubusb_r2r ( mm4, mm7 );				// L2 - avg
      movq_r2r ( mm4, mm5 );				// avg
      psubusb_r2r ( mm2, mm5 );				// avg - L2
      por_r2r ( mm7, mm5 );				// abs(avg-L2)
      movq_r2r ( mm4, mm6 );     // copy of avg for later

      // get abs value of possible LP2 comb
      movq_r2r ( mm0, mm7 );				// LP2
      psubusb_r2r ( mm4, mm7 );				// LP2 - avg
      psubusb_r2r ( mm0, mm4 );				// avg - LP2
      por_r2r ( mm7, mm4 );				// abs(avg-LP2)

      // use L2 or LP2 depending upon which makes smaller comb
      psubusb_r2r ( mm5, mm4 );				// see if it goes to zero
      psubusb_r2r ( mm5, mm5 );				// 0
      pcmpeqb_r2r ( mm5, mm4 );				// if (mm4=0) then FF else 0
      pcmpeqb_r2r ( mm4, mm5 );				// opposite of mm4

      // if Comb(LP2) <= Comb(L2) then mm4=ff, mm5=0 else mm4=0, mm5 = 55
      pand_r2r ( mm2, mm5 );				// use L2 if mm5 == ff, else 0
      pand_r2r ( mm0, mm4 );				// use LP2 if mm4 = ff, else 0
      por_r2r ( mm5, mm4 );				// may the best win

      // Now lets clip our chosen value to be not outside of the range
      // of the high/low range L1-L3 by more than abs(L1-L3)
      // This allows some comb but limits the damages and also allows more
      // detail than a boring oversmoothed clip.

      movq_r2r ( mm1, mm2 );				// copy L1
      psubusb_r2r ( mm3, mm2 );				// - L3, with saturation
      paddusb_r2r ( mm3, mm2 );                // now = Max(L1,L3)

      pcmpeqb_r2r ( mm7, mm7 );				// all ffffffff
      psubusb_r2r ( mm1, mm7 );				// - L1
      paddusb_r2r ( mm7, mm3 );				// add, may sat at fff..
      psubusb_r2r ( mm7, mm3 );				// now = Min(L1,L3)

      // allow the value to be above the high or below the low by amt of MaxComb
      paddusb_m2r ( MaxComb, mm2 );			// increase max by diff
      psubusb_m2r ( MaxComb, mm3 );			// lower min by diff

      psubusb_r2r ( mm3, mm4 );				// best - Min
      paddusb_r2r ( mm3, mm4 );				// now = Max(best,Min(L1,L3)

      pcmpeqb_r2r ( mm7, mm7 );				// all ffffffff
      psubusb_r2r ( mm4, mm7 );				// - Max(best,Min(best,L3)
      paddusb_r2r ( mm7, mm2 );				// add may sat at FFF..
      psubusb_r2r ( mm7, mm2 );				// now = Min( Max(best, Min(L1,L3), L2 )=L2 clipped

      movq_r2m ( mm2, *Dest++ );        // move in our clipped best

    }
  }

  /* Copy last odd line if we're processing an Odd field. */
  if (IsOdd)
  {
    fast_memcpy(dest + (height * 2 - 1) * LineLength,
                      pOddLines + (height - 1) * SourcePitch,
                      LineLength);
  }

  /* clear out the MMX registers ready for doing floating point again */
  emms();
  return 1;
}

vop vop_deint_greedy = {
  "deintgreedy",  /* name */
  2,           /* 2 images are needed */
  VIDEO_YUYV,  /* input format  */
  VIDEO_YUYV,  /* output format */
  deinterlace_greedy_yuv_mmx,  /* The TREATMENT function */
  NULL,        /* No reinitialization function */
  0,           /* the width of the input is equal to the output width*/
  0,           /* idem for the height */
  -1,          /* the destination height is 480 for ntsc, 576 for ntsc */
  1            /* preferably the last treatment */
};

#endif

