/*
 * Addons for extra webcam support in the pwcbsd driver
 *
 *  Copyright (C) 2007 Luigi Rizzo
 *
 * BSD License as this is totally rewritten.
 *
 * Pixart Decompressor algorithm by Bertrik.Sikken. Thomas Kaiser (C) 2005
 */

#include "pwc.h"

struct decompress_buf {
	int used;
	unsigned char data[2*ISO_MAX_FRAME_SIZE];
};

static int pixart_decompress_row(unsigned char *inp, unsigned char *outp, int width);

/* this is public as it is called in spca5xx.c */
int pac207_consume(struct pwc_softc *sc, unsigned char *iso_buf, int flen)
{
    int awake = 0;	/* need to awake the caller at the end */
    struct pwc_frame_buf *fbuf = sc->fill_frame;
    struct decompress_buf *buf = sc->decompress_data;

    while (flen > 0) {
	unsigned char *fillptr = fbuf->data + fbuf->filled;
	int i;

	if (sc->vsync != VSYNC_SYNCHED) { /* sync hunting */
		/*
		 * This code can support different camera types with similar
		 * decoding/decompress requirement, so we use a switch().
		 */
		switch (sc->pwc_info.bridge) {
		default:
			printf("unsupported bridge %d, cannot find sync\n",
				sc->pwc_info.bridge);
			break;

		case BRIDGE_PAC207:
			if (flen < 6)	/* not enough to find the marker */
				break;
			for (i = 0; i < flen - 5; i++) {
				static const char marker[] = { 0xff, 0xff, 0x00, 0xff, 0x96 };
				if (memcmp(iso_buf + i, marker, 5) == 0) {
					Trace(TRACE_READ, "marker found at %d\n", i);
					sc->vsync = VSYNC_SYNCHED;
					i += 16;	/* header size */
					flen -= i;
					iso_buf += i;
					break;
				}
			}
			sc->bytes_skipped += i;
		}
		if (sc->vsync != VSYNC_SYNCHED)
			break;
		buf->used = 0;
	}
	/* copy iso_buf into decompress_data */
	memcpy(buf->data + buf->used, iso_buf, flen);
	buf->used += flen;

	if (buf->used < sc->image.x + 2)	/* less than one row, return */
		break;
	/* Read the row-type and possibly decode.
	 * After the block, i is the number of bytes used.
	 */
	i = buf->data[0] * 256 + buf->data[1];	/* row-type */
	if (i == 0x1ee1) {	/* compressed */
		i = pixart_decompress_row(buf->data, fillptr, sc->image.x);
	} else if (i == 0x0ff0) {	/* uncompressed, header + samples */
		memcpy(fillptr, buf->data + 2, sc->image.x);
		i = sc->image.x + 2;	/* bytes used */
	} else {	/* lost sync. Drop entire packet for simplicity */
		printf("Invalid marker 0x%x at ofs %d, skipping\n", i, fbuf->filled);
		sc->vsync = VSYNC_SYNCHUNT;
		sc->bytes_skipped += fbuf->filled + buf->used;
		fbuf->filled = 0;
		buf->used = 0;
		break;
	}
	fbuf->filled += sc->image.x;
	/*
	 * Make a fake buffer with the leftover data, if any, which will
	 * be copied back into buf->data[0] at the next round.
	 */
	iso_buf = buf->data + i;
	flen = buf->used - i;
	buf->used = 0;

	if (fbuf->filled >= sc->image.x * sc->image.y) { /* frame ready */
		if(sc->drop_frames > 0) {
			sc->drop_frames--;
		} else {
			/* Send only once per EOF */
			awake = 1; /* delay wake_ups */
			pwc_next_fill_frame(sc);
			fbuf = sc->fill_frame;
		}
		fbuf->filled = 0;
		sc->vsync = VSYNC_SYNCHUNT;
	}
    }
    return awake;
}

/* initialize variables depending on type and decompressor*/
static int spca_construct(struct pwc_softc *sc)
{
	int i;
	struct usb_spca50x *spca50x = &sc->spca50x;

	printf("pwc_construct for spca driver bridge %d\n",
		sc->pwc_info.bridge);
	if (spca50x_configure(spca50x)) {
		printf("error configuring\n");
		return -ENXIO;
	}
	sc->view_min.x = spca50x->minwidth;
	sc->view_min.x = spca50x->minheight;
	sc->view_max.x = spca50x->maxwidth;
	sc->view_max.y = spca50x->maxheight;
	sc->abs_max.x  = spca50x->maxwidth;
	sc->abs_max.y  = spca50x->maxheight;

	sc->vcinterface = 2;	/* XXX correct ? */
	sc->vendpoint = spca50x->vendpoint;	/* XXX where is it stored ? */
	sc->frame_header_size = 0;
	sc->frame_trailer_size = 0;
	/* XXX todo map formats into image_mask */
	sc->image_mask = 0;
	for (i = QCIF; i < TOTMODE; i++) {
		int w = spca50x->mode_cam[i].width;
		int h = spca50x->mode_cam[i].height;
		if (w == 128 && h == 96)
			sc->image_mask |= 1 << PSZ_SQCIF;
		else if (w == 160 && h == 120)
			sc->image_mask |= 1 << PSZ_QSIF ;
		else if (w == 176 && h == 144)
			sc->image_mask |= 1 << PSZ_QCIF;
		else if (w == 192 && h == 144)
			sc->image_mask |= 1 << PSZ_QCIF;	// XXX QPAL
		else if (w == 320 && h == 240)
			sc->image_mask |= 1 << PSZ_SIF;
		else if (w == 352 && h == 288)
			sc->image_mask |= 1 << PSZ_CIF;
		if (0)
		printf("mode %d %d WxH %d %d pipe %d method %d palette %x\n",
		i,
		spca50x->mode_cam[i].mode,
		spca50x->mode_cam[i].width,
		spca50x->mode_cam[i].height,
		spca50x->mode_cam[i].pipe,
		spca50x->mode_cam[i].method,
		spca50x->mode_cam[i].t_palette
		);
	}
	printf("---> image_mask 0x%x\n", sc->image_mask);
	sc->vpalette = VIDEO_PALETTE_YUV420P; /* default */
	sc->view_min.size = sc->view_min.x * sc->view_min.y;
	sc->view_max.size = sc->view_max.x * sc->view_max.y;
	/* length of image, in YUV format; always allocate enough memory. */
	sc->len_per_image = (sc->abs_max.x * sc->abs_max.y * 3) / 2;
	return 0;
}

static void pixart_init_decoder(void);

/* open callback for spca */
static int spca_open_cb(struct pwc_softc *sc)
{
	sc->decompress_data = malloc(sizeof(struct decompress_buf) + PWC_FRAME_SIZE, M_USBDEV, M_WAITOK|M_ZERO);
	if (sc->decompress_data == NULL)
		return ENOMEM;
	/* XXX this is for the jpeg decompression */
	sc->spca50x.tmpBuffer = (char *)sc->decompress_data + sizeof(struct decompress_buf);

	pixart_init_decoder();
	/* just set a default video mode for the time being */
	sc->view.x = 176;
	sc->view.y = 144;
	return 0;
}

/* close routine for spca */
static int spca_close_cb(struct pwc_softc *sc)
{
	spca50x_stop_isoc(&sc->spca50x);
	set_alt_interface(sc->udev, sc->sc_iface, 0);
	return 0;
}

struct camera_callbacks spca_callbacks = {
        .cb_attach = spca_construct,
        .cb_open = spca_open_cb,
        .cb_close = spca_close_cb,
	.cb_decompress = spca_decompress,
};

struct pixart_decode_table_t {
	int is_abs;
	int len;
	int val;
};
 
/* imported from spca-decoder.c */
static inline unsigned char CLIP(int color)
{
	return (color >0xFF) ? 0xff : ( color<0? 0 : color ) ;
}

// y=0.656g+0.125b+0.226r
static inline unsigned char RGB24_TO_Y(int r, int g, int b)
{
	return CLIP((g*656+b*125+r*226)/1000);
	// return CLIP((((g) <<9)+((g)<<7)+((g)<<5)+((b)<<7)+((r)<<8)-((r)<<4)-((r)<<3))>>10);
}

// v=(r-y)0.656
// #define YR_TO_V(r,y) CLIP( 128 + (((((r)-(y)) << 9 )+(((r)-(y)) << 7 )+(((r)-(y)) << 5 )) >> 10))
static inline unsigned char YR_TO_V(int r, int y)
{
	return CLIP( 128 + ( ((r - y) * 656)/1000) ) ;
	// return CLIP( 128 + (((((r)-(y)) << 9 )+(((r)-(y)) << 7 )+(((r)-(y)) << 5 )) >> 10))
}

// u=(b-y)0.5
//#define YB_TO_U(b,y) CLIP(128 + (((b)-(y)) >> 1))
static inline unsigned char YB_TO_U(int b, int y)
{
	return CLIP( 128 + (b-y)/2 );
}

/* XXX hooks for gamma correction */
#define Red(x)		x
#define Green(x)	x
#define Blue(x)		x

static struct pixart_decode_table_t pixart_decode_table[256];

/* pixart compressed format handler */
static void pixart_init_decoder(void)
{
    int i;
    int is_abs, val, len;
    struct pixart_decode_table_t *table = pixart_decode_table;

    if (table[0].len != 0)	/* already initialized */
	return;
    for (i = 0; i < 256; i++) {
        is_abs = 0;
        val = 0;
        len = 0;
        if ((i & 0xC0) == 0) {			/* code 00 */
            val = 0;
            len = 2;
        } else if ((i & 0xC0) == 0x40) {	/* code 01 */
            val = -5;
            len = 2;
        } else if ((i & 0xC0) == 0x80) {	/* code 10 */
            val = +5;
            len = 2;
        } else if ((i & 0xF0) == 0xC0) {	/* code 1100 */
            val = -10;
            len = 4;
        } else if ((i & 0xF0) == 0xD0) {	/* code 1101 */
            val = +10;
            len = 4;
        } else if ((i & 0xF8) == 0xE0) {	/* code 11100 */
            val = -15;
            len = 5;
        } else if ((i & 0xF8) == 0xE8) {	/* code 11101 */
            val = +15;
            len = 5;
        } else if ((i & 0xFC) == 0xF0) {	/* code 111100 */
            val = -20;
            len = 6;
        } else if ((i & 0xFC) == 0xF4) {	/* code 111101 */
            val = +20;
            len = 6;
        } else if ((i & 0xF8) == 0xF8) {	/* code 11111xxxxxx */
            is_abs = 1;
            val = 0;
            len = 5;
        }
        table[i].is_abs = is_abs;
        table[i].val = val;
        table[i].len = len;
    }
}

/*
 * support routine for pixart decoding.
 * The input buffer start at 'base', we read 8 bit at
 * bit offset 'bitpos'.
 */
static inline unsigned char getByte(unsigned char *base, unsigned int bitpos)
{
    unsigned char *addr = base + (bitpos >> 3);
    return (addr[0] << (bitpos & 7)) | (addr[1] >> (8 - (bitpos & 7)));
}

/*
 * Decompress routine.
 * First two bytes are the identifier (0x1e, 0xe1)
 * followed bu two pixels as raw 8 bit, followed by compressed samples
 * (stored as huffman-compressed deltas).
 * Because the format is bayer, the first two pixels are RY or BY,
 * and all the following pixels are deltas with respect to the
 * corresponding R or Y sample.
 */
static int
pixart_decompress_row(unsigned char *inp, unsigned char *outp, int width)
{
    int col;
    int val;
    int bitpos;
    unsigned char code;
    struct pixart_decode_table_t *table = pixart_decode_table;

    /* first two pixels are stored as raw 8-bit */
    *outp++ = inp[2];
    *outp++ = inp[3];
    bitpos = 32;	/* skip the 1e e1 marker and the first 2 pixels */

    /* main decoding loop */
    for (col = 2; col < width; col++) {
        /* get bitcode */

        code = getByte(inp, bitpos);
        bitpos += table[code].len;

        /* calculate pixel value */
        if (table[code].is_abs) {
            /* absolute value: get 6 more bits */
            code = getByte(inp, bitpos);
            bitpos += 6;   
            *outp++ = code & 0xFC;
        } else {
            /* relative to left pixel */
            val = outp[-2] + table[code].val;
            *outp++ = CLIP(val);
        }
    }

    /* return line length, rounded up to next 16-bit word */
    val = 2 * ((bitpos + 15) / 16);
#if 0
    printf("decompress skipped %d bytes\n", val);
    for (col = 0; col < 2*val; col++) {
	if (inp[col] == 0x1e && inp[col+1] == 0xe1)
		printf("0x1ee1 at offset %d\n", col);
    }
#endif
    return val;
}

/* convert skipping ofs bytes at the beginning of each line */
static void bayer_to_yuv(unsigned char *dst, unsigned char *buf, int width, int height, int padx, int pady, int ofs)
{
	int mx, my;	/* loop on input */
	int outwidth = width - 2*padx;
	int outheight = height - 2*pady;
	int framesize = outwidth * outheight;	/* output frame size */
	unsigned char *U = dst + framesize, *V = U + framesize/4;
	unsigned char *pic = dst;
	unsigned char * pic1 = pic + outwidth;

	int nextinline = width*2;
	int inl = 0, inl1 = width;	/* input pointers for line 0 and 1 */

// 	buf += ofs;	/* XXX offset by 1 ? mess on the last pixel ? */

	/*
	 * We have the following pixel layout, which we scan two rows at a time.
	 * We focus on the center square, while the borders are not available
	 * so we have to come up with fake values for them.
	 * If a sample is not available we use the average of the
	 * surrounding samples for the same color component.
	 * The algorithm below is called Bilinear Interpolation and is one with
	 * the best cost/performance ratio.
	 */
	for (my = 0; my < height; my += 2) {
		if (my < pady || my >= pady + outheight)
			continue;
		for (mx = 0; mx < width; mx += 2) {
			unsigned char	r00, g10, r20, g30;
			unsigned char	g01, b11, g21, b31;
			unsigned char	r02, g12, r22, g32;
			unsigned char	g03, b13, g23, b33;

			/* completion of the central matrix */
			unsigned char y11, y21, y12, y22;
			unsigned char g11,           g22;
			unsigned char r11, r21, r12;
			unsigned char      b21, b12, b22;
			unsigned char y, r, b;

			if (mx < padx || mx >= padx + outwidth)
				continue;
			b11 = buf[inl  + mx + 0];
			g21 = buf[inl  + mx + 1];
			g12 = buf[inl1 + mx + 0];
			r22 = buf[inl1 + mx + 1];
			if (mx == 0 || my == 0 || mx == width - 2 || my == height - 2) {
				/* assume the whole border is not available */
				/* XXX could be done better because some
				 * of the samples do exist, but do it later.
				 */
				g11 = g22 = (g21 + g12) >> 1;
				b21 = b12 = b22 = b11;
				r11 = r21 = r12 = r22;
			} else {
				r00 = buf[inl  + mx - width - 1];
				g10 = buf[inl  + mx - width    ];
				r20 = buf[inl  + mx - width + 1];
				g30 = buf[inl  + mx - width + 2];

				g01 = buf[inl  + mx         - 1];
				b31 = buf[inl  + mx         + 2];
				r02 = buf[inl1 + mx         - 1];
				g32 = buf[inl1 + mx         + 2];

				g03 = buf[inl1 + mx + width - 1];
				b13 = buf[inl1 + mx + width    ];
				g23 = buf[inl1 + mx + width + 1];
				b33 = buf[inl1 + mx + width + 2];

				g11 = (g10 + g01 + g21 + g12) >> 2;
				g22 = (g32 + g23 + g21 + g12) >> 2;
				b21 = (b11 + b31) >> 1;
				b12 = (b11 + b13) >> 1;
				b22 = (b11 + b31 + b13 + b33) >> 2;
				r11 = (r00 + r20 + r02 + r22) >> 2;
				r12 = (r02 + r22) >> 1;
				r21 = (r20 + r22) >> 1;
			}
			r = (r11 + r12 + r21 + r22) >> 2;
			b = (b11 + b12 + b21 + b22) >> 2;

			*pic++  = y11 = RGB24_TO_Y(r11, g11, b11);
			*pic++  = y21 = RGB24_TO_Y(r21, g21, b21);
			*pic1++ = y12 = RGB24_TO_Y(r12, g12, b12);
			*pic1++ = y22 = RGB24_TO_Y(r22, g22, b22);

			y = (y11 + y21 + y12 + y22) >> 2;
				
			*U++ = YB_TO_U(b, y);
			*V++ = YR_TO_V(r, y);
		} /* end mx loop */
                inl += nextinline;
                inl1 += nextinline;
		pic += outwidth;
		pic1 += outwidth;
	}                   // end my loop
}

int spca_decompress(struct pwc_softc *pdev, unsigned char *src, unsigned char *image, int srclen)
{

	int i;
	char *dst = src;
	char *base = src;
	struct spca50x_frame myframe;

	switch (pdev->pwc_info.dataformat) {
	default:
		printf("unrecognised dataformat %d, returning raw data\n",
			pdev->pwc_info.bridge);
		memcpy(image, src, pdev->frame_size);
		return 0;

	case JPGH: {	/* Logitech ? */
		int w = (src[10] << 8 ) + src[11];
		int h = (src[12] << 8 ) + src[13];
		printf("found jpgh %d x %d bytes %d\n", w, h, srclen);
		/* XXX check width */
#if 1
		myframe.decoder = &pdev->spca50x.maindecode;
		myframe.tmpbuffer = pdev->spca50x.tmpBuffer;	/* XXX malloc ? */
		if (myframe.tmpbuffer == NULL) {
			printf("%s: buffer not allocated\n", __FUNCTION__);
			return -ENOMEM;
		}
		/* the decoder decodes in-place... */
		memcpy(image, src, pdev->frame_size);
		myframe.data = image;

		myframe.x = myframe.width = myframe.hdrwidth = pdev->image.x;
		myframe.y = myframe.height = myframe.hdrheight = pdev->image.y;
		myframe.cameratype = pdev->spca50x.cameratype;
		myframe.pictsetting = pdev->spca50x.pictsetting;
		myframe.format = pdev->spca50x.format;

		myframe.method = 0;	/* no crop, no pad... */
		myframe.cropx1 = myframe.cropx2 = 0; myframe.cropy1 = myframe.cropy2 = 0;
		myframe.scanlength =  pdev->image.x * pdev->image.y * 3/2; /* assume 420 data */
		spca50x_outpicture(&myframe);
#endif
		
		break;
	    }

	case S561:	/* Creative */
#if 0
	        if (src[1] & 0x10) {
			decode_spca561(myframe->data, myframe->tmpbuffer,
                           myframe->width, myframe->height);
		} else
#endif
			memcpy(src, src + 10,  pdev->image.x * pdev->image.y);
		bayer_to_yuv(image, src, pdev->image.x, pdev->image.y,
			(pdev->image.x - pdev->view.x)/2,
			(pdev->image.y - pdev->view.y)/2, 1);
		break;

	case NONE:	/* Dexxa camera */
		/* XXX copy in place */
		printf("bytes %d size %d x %d\n", srclen, pdev->image.x, pdev->image.y);
		for (i=0; i < pdev->image.y; i++) {
			memcpy(dst, src, pdev->image.x);
			src += pdev->image.x + 16;
			dst += pdev->image.x;
		}
		/* XXX not really */
		bayer_to_yuv(image, base, pdev->image.x, pdev->image.y,
			(pdev->image.x - pdev->view.x)/2,
			(pdev->image.y - pdev->view.y)/2, 0);
		break;

	case PGBRG:	/* pixart bayer */
		bayer_to_yuv(image, base, pdev->image.x, pdev->image.y,
			(pdev->image.x - pdev->view.x)/2,
			(pdev->image.y - pdev->view.y)/2, 1);
		break;
	}
	return 0;
}
