/* 
 *	Copyright (C) Chia-chen Kuo - April 2001
 *
 *  This file is part of DVD2AVI, a free MPEG-2 decoder
 *	
 *  DVD2AVI is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2, or (at your option)
 *  any later version.
 *   
 *  DVD2AVI is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 *  GNU General Public License for more details.
 *   
 *  You should have received a copy of the GNU General Public License
 *  along with GNU Make; see the file COPYING.  If not, write to
 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
 *
 */

#define GLOBAL
#include "global.h"
#include "vfapi.h"
#include "getbit.h"

#define MAX_FRAME_NUMBER	1000000
#define MAX_GOP_SIZE		1024

static int ChromaFormat[4] = {
	0, 6, 8, 12
};

typedef struct {
	DWORD		number;
	int			file;
	__int64		position;
}	GOPLIST;
GOPLIST *GOPList[MAX_FRAME_NUMBER];

typedef struct {
	DWORD			top;
	DWORD			bottom;
	char			forward;
	char			backward;
}	FRAMELIST;
FRAMELIST *FrameList[MAX_FRAME_NUMBER];

static unsigned char *GOPBuffer[MAX_GOP_SIZE];
static BOOL Field_Order, Full_Frame;
static HINSTANCE hLibrary;

static void CheckCPU(void);
__forceinline static void RGBCopyodd(unsigned char *src, unsigned char *dst, int pitch, int forward);
__forceinline static void RGBCopyeven(unsigned char *src, unsigned char *dst, int pitch, int forward);

int Open_D2VFAPI(char *path, D2VFAPI *out)
{
	char ID[19], PASS[19] = "DVD2AVIProjectFile";
	DWORD i, j, size, code, type, tff, rff, film, ntsc, gop, top, bottom, mapping;
	int repeat_on, repeat_off, repeat_init;
	int Clip_Top, Clip_Bottom, Clip_Left, Clip_Right, Squeeze_Width, Squeeze_Height;

	HKEY key; DWORD value = REG_SZ; DWORD length = 256;
	char *ext, buffer[256];

	ZeroMemory(out, sizeof(D2VFAPI));

	out->VF_File = fopen(path, "r");
	if (fgets(ID, 19, out->VF_File)==NULL)
		return 0;
	if (strcmp(ID, PASS))
		return 0;

	// load DLL
	if (RegOpenKeyEx(HKEY_CURRENT_USER, "Software\\VFPlugin", 0, KEY_ALL_ACCESS, &key)==ERROR_SUCCESS)
	{
		RegQueryValueEx(key, "DVD2AVI", NULL, &value, (unsigned char*)buffer, &length);

		ext = strrchr(buffer, '\\');
		sprintf(buffer + (int)(ext-buffer) + 1, "OpenDVD.dll");
		RegCloseKey(key);
	}

	if ((hLibrary = LoadLibrary(buffer)) != NULL)
		BufferOp = (PBufferOp) GetProcAddress(hLibrary, "BufferOp");

	for (i=0; i<MAX_FILE_NUMBER; i++)
		Infilename[i] = (char*)malloc(_MAX_PATH);

	fscanf(out->VF_File, "%d", &File_Limit);

	i = File_Limit;
	while (i)
	{
		fscanf(out->VF_File, "%d ", &j);
		fgets(Infilename[File_Limit-i], j+1, out->VF_File);
		if ((Infile[File_Limit-i] = _open(Infilename[File_Limit-i], _O_RDONLY | _O_BINARY))==-1)
			return 0;
		i--;
	}

	CheckCPU();

	fscanf(out->VF_File, "\nStream_Type=%d,%X,%X\n", &SystemStream_Flag, &lfsr0, &lfsr1);
	if (lfsr0 || lfsr1)
		KeyOp_Flag = 1;
	else
		KeyOp_Flag = 0;

	fscanf(out->VF_File, "iDCT_Algorithm=%d\n", &IDCT_Flag);

	switch (IDCT_Flag)
	{
		case IDCT_SSEMMX:
			if (!cpu.ssemmx)
				IDCT_Flag = IDCT_MMX;
			break;

		case IDCT_FPU:
			Initialize_FPU_IDCT();
			break;

		case IDCT_REF:
			Initialize_REF_IDCT();
			break;
	}

	File_Flag = 0;
	_lseeki64(Infile[0], 0, SEEK_SET);
	Initialize_Buffer();

	do
	{
		next_start_code();
		code = Get_Bits(32);
	}
	while (code!=SEQUENCE_HEADER_CODE);

	sequence_header();

	mb_width = (horizontal_size+15)/16;
	mb_height = progressive_sequence ? (vertical_size+15)/16 : 2*((vertical_size+31)/32);

	Coded_Picture_Width = 16 * mb_width;
	Coded_Picture_Height = 16 * mb_height;

	Chroma_Width = (chroma_format==CHROMA444) ? Coded_Picture_Width : Coded_Picture_Width>>1;
	Chroma_Height = (chroma_format!=CHROMA420) ? Coded_Picture_Height : Coded_Picture_Height>>1;

	block_count = ChromaFormat[chroma_format];

	for (i=0; i<8; i++)
	{
		p_block[i] = (short *)malloc(sizeof(short)*64 + 64);
		block[i]   = (short *)((long)p_block[i] + 64 - (long)p_block[i]%64);
	}

	p_fTempArray = (void *)malloc(sizeof(float)*128 + 64);
	fTempArray = (void *)((long)p_fTempArray + 64 - (long)p_fTempArray%64);

	for (i=0; i<3; i++)
	{
		if (i==0)
			size = Coded_Picture_Width * Coded_Picture_Height;
		else
			size = Chroma_Width * Chroma_Height;

		backward_reference_frame[i] = (unsigned char*)malloc(size);
		forward_reference_frame[i] = (unsigned char*)malloc(size);
		auxframe[i] = (unsigned char*)malloc(size);
	}

	fscanf(out->VF_File, "YUVRGB_Scale=%d\n", &i);

	if (i)
	{
		RGB_Scale = 0x1000254310002543;
		RGB_Offset = 0x0010001000100010;
		RGB_CBU = 0x0000408D0000408D;
		RGB_CGX = 0xF377E5FCF377E5FC;
		RGB_CRV = 0x0000331300003313;
	}
	else
	{
		RGB_Scale = 0x1000200010002000;
		RGB_Offset = 0x0000000000000000;
		RGB_CBU = 0x000038B4000038B4;
		RGB_CGX = 0xF4FDE926F4FDE926;
		RGB_CRV = 0x00002CDD00002CDD;
	}

	fscanf(out->VF_File, "Luminance=%d,%d\n", &i, &j);

	if (i==128 && j==0)
		Luminance_Flag = 0;
	else
	{
		Luminance_Flag = 1;
		LumGainMask = ((__int64)i<<48) + ((__int64)i<<32) + ((__int64)i<<16) + (__int64)i;
		LumOffsetMask = ((__int64)j<<48) + ((__int64)j<<32) + ((__int64)j<<16) + (__int64)j;

		lum = (unsigned char*)malloc(Coded_Picture_Width * Coded_Picture_Height);
	}

	fscanf(out->VF_File, "Picture_Size=%d,%d,%d,%d,%d,%d\n", &Clip_Top, &Clip_Bottom, 
		&Clip_Left, &Clip_Right, &Squeeze_Width, &Squeeze_Height);

	Resize_Flag = 0;
	Resize_Width = Clip_Width = Coded_Picture_Width;
	Resize_Height = Clip_Height = Coded_Picture_Height;
	CLIP_AREA = HALF_CLIP_AREA = CLIP_STEP = 0;

	if (Clip_Top || Clip_Bottom || Clip_Left || Clip_Right)
	{
		Clip_Width -= Clip_Left+Clip_Right;
		Clip_Height -= Clip_Top+Clip_Bottom;
		Resize_Width = Clip_Width;
		Resize_Height = Clip_Height;

		CLIP_AREA = Coded_Picture_Width * Clip_Top;
		HALF_CLIP_AREA = (Coded_Picture_Width>>1) * Clip_Top;
		CLIP_STEP = Coded_Picture_Width * Clip_Top + Clip_Left;
	}

	if (Squeeze_Width || Squeeze_Height)
	{
		Resize_Flag = 1;
		Resize_Width -= Squeeze_Width;
		Resize_Height -= Squeeze_Height;
	}

	TWIDTH = Clip_Width * 3;
	SWIDTH = Clip_Width * 6;
	LUM_AREA = Coded_Picture_Width * Clip_Height;
	PROGRESSIVE_HEIGHT = (Coded_Picture_Height>>1) - 2;
	INTERLACED_HEIGHT = (Coded_Picture_Height>>2) - 2;
	HALF_WIDTH = Coded_Picture_Width>>1;
	HALF_WIDTH_D8 = (Coded_Picture_Width>>1) - 8;
	DOUBLE_WIDTH = Coded_Picture_Width<<1;

	u422 = (unsigned char*)malloc(Coded_Picture_Width * Coded_Picture_Height / 2);
	v422 = (unsigned char*)malloc(Coded_Picture_Width * Coded_Picture_Height / 2);
	u444 = (unsigned char*)malloc(Coded_Picture_Width * Coded_Picture_Height);
	v444 = (unsigned char*)malloc(Coded_Picture_Width * Coded_Picture_Height);
	rgb24 = (unsigned char*)malloc(Clip_Width * Clip_Height * 3);

	fscanf(out->VF_File, "Field_Operation=%d\n", &FO_Flag);
	fscanf(out->VF_File, "Frame_Rate=%d\n", &(out->VF_FrameRate));
	fscanf(out->VF_File, "Location=%d,%X,%d,%X\n", &i, &j, &i, &j);

	ntsc = film = top = bottom = gop = mapping = repeat_on = repeat_off = repeat_init = 0;

	while (fscanf(out->VF_File, "%d", &type) && type<9)
	{
		if (type==7)	// I frame
		{
			GOPList[gop] = calloc(1, sizeof(GOPLIST));
			GOPList[gop]->number = film;
			fscanf(out->VF_File, "%d %X", &(GOPList[gop]->file), &j);

			GOPList[gop]->position = (__int64)j*BUFFER_SIZE;
			gop ++;

			fscanf(out->VF_File, "%d", &j);
			tff = j>>1;
			rff = j & 1;
		}
		else	// P, B frame
		{
			tff = type>>1;
			rff = type & 1;
		}

		if (!film)
		{
			if (tff)
				Field_Order = 1;
			else
				Field_Order = 0;
		}

		if (FO_Flag==FO_FILM)
		{
			if (rff)
				repeat_on++;
			else
				repeat_off++;

			if (repeat_init)
			{
				if (repeat_off-repeat_on == 5)
				{
					repeat_on = repeat_off = 0;
				}
				else
				{
					FrameList[mapping] = calloc(1, sizeof(FRAMELIST));
					FrameList[mapping]->top = FrameList[mapping]->bottom = film;
					mapping ++;
				}

				if (repeat_on-repeat_off == 5)
				{
					repeat_on = repeat_off = 0;
					FrameList[mapping] = calloc(1, sizeof(FRAMELIST));
					FrameList[mapping]->top = FrameList[mapping]->bottom = film;
					mapping ++;
				}
			}
			else
			{
				if (repeat_off-repeat_on == 3)
				{
					repeat_on = repeat_off = 0;
					repeat_init = 1;
				}
				else
				{
					FrameList[mapping] = calloc(1, sizeof(FRAMELIST));
					FrameList[mapping]->top = FrameList[mapping]->bottom = film;
					mapping ++;
				}

				if (repeat_on-repeat_off == 3)
				{
					repeat_on = repeat_off = 0;
					repeat_init = 1;

					FrameList[mapping] = calloc(1, sizeof(FRAMELIST));
					FrameList[mapping]->top = FrameList[mapping]->bottom = film;
					mapping ++;
				}
			}
		}
		else
		{
			if (top)
			{
				FrameList[ntsc]->bottom = film;
				ntsc ++;
				FrameList[ntsc] = calloc(1, sizeof(FRAMELIST));
				FrameList[ntsc]->top = film;
			}
			else if (bottom)
			{
				FrameList[ntsc]->top = film;
				ntsc ++;
				FrameList[ntsc] = calloc(1, sizeof(FRAMELIST));
				FrameList[ntsc]->bottom = film;
			}
			else
			{
				FrameList[ntsc] = calloc(1, sizeof(FRAMELIST));
				FrameList[ntsc]->top = film;
				FrameList[ntsc]->bottom = film;
				ntsc ++;
			}

			if (rff)
			{
				if (!top && !bottom)
					FrameList[ntsc] = calloc(1, sizeof(FRAMELIST));

				if (tff)
				{
					FrameList[ntsc]->top = film;
					top = 1;
				}
				else
				{
					FrameList[ntsc]->bottom = film;
					bottom = 1;
				}

				if (top && bottom)
				{
					top = bottom = 0;
					ntsc ++;
				}
			}
		}

		film ++;
	}

	out->VF_FrameBound = film;
	film -= 2;

	if (FO_Flag==FO_FILM)
	{
		while (FrameList[mapping-1]->top >= film)
			mapping --;

		out->VF_FrameLimit = mapping;
	}
	else
	{
		if (FO_Flag==FO_SWAP)
		{
			Field_Order = !Field_Order;

			if (Field_Order)
				for (i=0; i<ntsc-1; i++)
					FrameList[i]->bottom = FrameList[i+1]->bottom;
			else
				for (i=0; i<ntsc-1; i++)
					FrameList[i]->top = FrameList[i+1]->top;
		}

		while ((FrameList[ntsc-1]->top >= film) || (FrameList[ntsc-1]->bottom >= film))
			ntsc --;

		out->VF_FrameLimit = ntsc;

		for (i=0; i<out->VF_FrameLimit-1; i++)
			if (FrameList[i]->top==FrameList[i+1]->top || FrameList[i]->top==FrameList[i+1]->bottom ||
				FrameList[i]->bottom==FrameList[i+1]->top || FrameList[i]->bottom==FrameList[i+1]->bottom)
			{
				FrameList[i]->forward = 1;
				FrameList[i+1]->backward = 1;
			}
	}

	Full_Frame = 1;
	for (i=0; i<out->VF_FrameLimit; i++)
		if (FrameList[i]->top!=FrameList[i]->bottom)
		{
			Full_Frame = 0;
			break;
		}

	out->VF_GOPNow = out->VF_GOPLimit = gop;
	out->VF_OldFrame = out->VF_FrameLimit;
	out->VF_FrameSize = Clip_Width * Clip_Height * 3;

	return 1;
}

void Decode_D2VFAPI(D2VFAPI *in, unsigned char *dst, DWORD frame, int pitch)
{
	DWORD i, now, size, origin, ref, fo;
	int remain;

	if (FO_Flag==FO_FILM)
	{
		fo = 0;
		frame = FrameList[frame]->top;
	}

	origin = frame;

	if (FO_Flag!=FO_FILM)
	{
		if (FrameList[frame]->top == FrameList[frame]->bottom)
		{
			fo = 0;
			frame = FrameList[frame]->top;
		}
		else if (FrameList[frame]->top < FrameList[frame]->bottom)
		{
			fo = 1;
			frame = FrameList[frame]->top;
		}
		else
		{
			fo = 2;
			frame = FrameList[frame]->bottom;
		}
	}

	ref = frame;

	if (frame >= GOPList[in->VF_GOPLimit-1]->number)
	{
		now = in->VF_GOPLimit-1;
		ref -= GOPList[in->VF_GOPLimit-1]->number;
		size = in->VF_FrameBound - GOPList[in->VF_GOPLimit-1]->number + 1;
	}
	else
		for (now = 0; now < (in->VF_GOPLimit-1); now++)
		{
			if (frame>=GOPList[now]->number && frame<GOPList[now+1]->number)
			{
				ref -= GOPList[now]->number;
				size = GOPList[now+1]->number - GOPList[now]->number + 1;
				break;
			}
		}

	if (fo)
		ref ++;

	if (now != in->VF_GOPNow)
	{
		if ((in->VF_OldFrame + 1)==origin)
		{
			if (Full_Frame)
			{
				Get_Hdr();
				Decode_Picture(1, dst, pitch);

				if (picture_structure!=FRAME_PICTURE)
				{
					Get_Hdr();
					Decode_Picture(1, dst, pitch);
				}
			}
			else
				switch (fo)
				{
					case 0:
						if (!FrameList[origin]->backward)
						{
							Get_Hdr();
							Decode_Picture(1, dst, pitch);

							if (picture_structure!=FRAME_PICTURE)
							{
								Get_Hdr();
								Decode_Picture(1, dst, pitch);
							}

							if (FrameList[origin]->forward)
							{
								if (Field_Order)
									RGBCopyodd(dst, rgb24, pitch, 1);
								else
									RGBCopyeven(dst, rgb24, pitch, 1);
							}
						}
						else
						{
							RGBCopyodd(rgb24, dst, pitch, 0);
							RGBCopyeven(rgb24, dst, pitch, 0);
						}
						break;

					case 1:
						RGBCopyodd(rgb24, dst, pitch, 0);

						Get_Hdr();
						Decode_Picture(1, rgb24, TWIDTH);

						if (picture_structure!=FRAME_PICTURE)
						{
							Get_Hdr();
							Decode_Picture(1, rgb24, TWIDTH);
						}

						RGBCopyeven(rgb24, dst, pitch, 0);
						break;

					case 2:	
						RGBCopyeven(rgb24, dst, pitch, 0);

						Get_Hdr();
						Decode_Picture(1, rgb24, TWIDTH);

						if (picture_structure!=FRAME_PICTURE)
						{
							Get_Hdr();
							Decode_Picture(1, rgb24, TWIDTH);
						}

						RGBCopyodd(rgb24, dst, pitch, 0);
						break;
				}

			if (in->VF_GOPSize)
			{
				for (i=0; i < in->VF_GOPSize; i++)
					free(GOPBuffer[i]);

				in->VF_GOPSize = 0;
			}

			in->VF_GOPNow = in->VF_GOPLimit;
			in->VF_OldFrame = origin;
			return;
		}

		remain = ref;
		in->VF_OldRef = ref;
		in->VF_GOPNow = now;
		Second_Field = 0;

		if (size < in->VF_GOPSize)
			for (i=0; i < (in->VF_GOPSize - size); i++)
				free(GOPBuffer[size+i]);
		else if (size > in->VF_GOPSize)
			for (i=0; i < (size - in->VF_GOPSize); i++)
				GOPBuffer[in->VF_GOPSize+i] = malloc(in->VF_FrameSize);

		in->VF_GOPSize = size;

		File_Flag = GOPList[now]->file;
		_lseeki64(Infile[GOPList[now]->file], GOPList[now]->position, SEEK_SET);
		Initialize_Buffer();

		while (Get_Hdr() && picture_coding_type!=I_TYPE);

		Decode_Picture(0, dst, pitch);

		while (Get_Hdr() && picture_coding_type==B_TYPE);

		if (picture_structure!=FRAME_PICTURE)
		{
			Decode_Picture(0, dst, pitch);
			Get_Hdr();
		}

		Decode_Picture(1, dst, pitch);

		if (picture_structure!=FRAME_PICTURE)
		{
			Get_Hdr();
			Decode_Picture(1, dst, pitch);
		}

		RGBCopyodd(dst, GOPBuffer[0], pitch, 1);
		RGBCopyeven(dst, GOPBuffer[0], pitch, 1);

		while (remain && Get_Hdr())
		{
			Decode_Picture(1, dst, pitch);

			if (picture_structure!=FRAME_PICTURE)
			{
				Get_Hdr();
				Decode_Picture(1, dst, pitch);
			}

			RGBCopyodd(dst, GOPBuffer[ref - remain + 1], pitch, 1);
			RGBCopyeven(dst, GOPBuffer[ref - remain + 1], pitch, 1);

			remain--;
		}

		if (!Full_Frame && ref>=(size-2))
		{
			RGBCopyodd(dst, rgb24, pitch, 1);
			RGBCopyeven(dst, rgb24, pitch, 1);
		}
	}
	else
	{
		remain = ref - in->VF_OldRef;

		if (remain > 0)
		{
			in->VF_OldRef = ref;

			while (remain && Get_Hdr())
			{
				Decode_Picture(1, dst, pitch);

				if (picture_structure!=FRAME_PICTURE)
				{
					Get_Hdr();
					Decode_Picture(1, dst, pitch);
				}

				RGBCopyodd(dst, GOPBuffer[ref - remain + 1], pitch, 1);
				RGBCopyeven(dst, GOPBuffer[ref - remain + 1], pitch, 1);

				remain--;
			}

			if (!Full_Frame && ref>=(size-2))
			{
				RGBCopyodd(dst, rgb24, pitch, 1);
				RGBCopyeven(dst, rgb24, pitch, 1);
			}
		}
	}

	switch (fo)
	{
		case 0:
			RGBCopyodd(GOPBuffer[ref], dst, pitch, 0);
			RGBCopyeven(GOPBuffer[ref], dst, pitch, 0);
			break;

		case 1:
			RGBCopyodd(GOPBuffer[ref-1], dst, pitch, 0);
			RGBCopyeven(GOPBuffer[ref], dst, pitch, 0);
			break;

		case 2:
			RGBCopyodd(GOPBuffer[ref], dst, pitch, 0);
			RGBCopyeven(GOPBuffer[ref-1], dst, pitch, 0);
			break;
	}

	in->VF_OldFrame = origin;
}

void Close_D2VFAPI(D2VFAPI *in)
{
	int i;

	if (in != NULL)
		fclose(in->VF_File);

	while (in->VF_GOPSize)
	{
		in->VF_GOPSize--;
		free(GOPBuffer[in->VF_GOPSize]);
	}

	while (File_Limit)
	{
		File_Limit--;
		_close(Infile[File_Limit]);
	}

	for (i=0; i<MAX_FILE_NUMBER; i++)
		free(Infilename[i]);

	for (i=0; i<3; i++)
	{
		free(backward_reference_frame[i]);
		free(forward_reference_frame[i]);
		free(auxframe[i]);
	}

	free(u422);
	free(v422);
	free(u444);
	free(v444);
	free(rgb24);

	if(Luminance_Flag)
		free(lum);

	for (i=0; i<8; i++)
		free(p_block[i]);

	free(p_fTempArray);

	if (hLibrary)
		FreeLibrary(hLibrary);
}

static void CheckCPU()
{
	__asm
	{
		mov			eax, 1
		cpuid
		test		edx, 0x00800000		// STD MMX
		jz			TEST_SSE
		mov			[cpu.mmx], 1
TEST_SSE:
		test		edx, 0x02000000		// STD SSE
		jz			TEST_3DNOW
		mov			[cpu.ssemmx], 1
		mov			[cpu.ssefpu], 1
TEST_3DNOW:
		mov			eax, 0x80000001
		cpuid
		test		edx, 0x80000000		// 3D NOW
		jz			TEST_SSEMMX
		mov			[cpu._3dnow], 1
TEST_SSEMMX:
		test		edx, 0x00400000		// SSE MMX
		jz			TEST_END
		mov			[cpu.ssemmx], 1
TEST_END:
	}
}

static void RGBCopyodd(unsigned char *src, unsigned char *dst, int pitch, int forward)
{
	int i;
	int PWIDTH = forward ? (pitch<<1) : SWIDTH;
	int QWIDTH = forward ? SWIDTH : (pitch<<1);

	for (i=0; i<(Clip_Height>>1); i++)
	{
		memcpy (dst, src, TWIDTH);
		src += PWIDTH;
		dst += QWIDTH;
	}
}

static void RGBCopyeven(unsigned char *src, unsigned char *dst, int pitch, int forward)
{
	int i;
	int PWIDTH = forward ? (pitch<<1) : SWIDTH;
	int QWIDTH = forward ? SWIDTH : (pitch<<1);
	src += forward ? pitch : TWIDTH;
	dst += forward ? TWIDTH : pitch;

	for (i=0; i<(Clip_Height>>1); i++)
	{
		memcpy (dst, src, TWIDTH);
		src += PWIDTH;
		dst += QWIDTH;
	}
}
