// Generator for the YouTube video Quaternion HQ Tech Demo: https://www.youtube.com/watch?v=iCDseNgfPsw
// Written by Nils Liaaen Corneliusen 2019.
// https://www.ignorantus.com
// License: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication license

// Image licenses:

// Pictures from commons.wikimedia.org
// License: Creative Commons Attribution-Share Alike 3.0 Unported

// Title:  Atlass
// Artist: Ano8675309
// Link:   https://commons.wikimedia.org/wiki/File:Atlass.jpg

// Title:  Ankerbrot / OBEY
// Artist: Harald Schilly
// Link:   https://commons.wikimedia.org/wiki/File:Ankerbrot_-_OBEY_-_panoramio.jpg

// Pictures from pixabay.com
// Licence: Pixabay License

// Title:  Men Individual Group
// Artist: Clker-Free-Vector-Images
// Link:   https://pixabay.com/vectors/men-individual-group-community-311308/

// Title:  Paper Wall Structure
// Artist: Gerd Altmann
// Link:   https://pixabay.com/photos/paper-wall-structure-ban-font-109277/

// Modified versions of the images should be in the archive containing this source file.
// Otherwise, get it here: https://www.ignorantus.com/source/


// ffmpeg -framerate 60 -i out-%05d.jpg -i Hot_Pursuit.mp3 -framerate 60 -c:v libx264 -preset slow -crf 18 -pix_fmt yuv420p -s 1920x1080 quat2hq.avi

#include <stdio.h>
#include <Windows.h>
#include <synchapi.h>
#include <concurrent_queue.h>
#include <assert.h>
#include <conio.h>

#include "quat.h"
#include "timer.h"
#include "font.h"
#include "spiral.h"

HANDLE *threadHandles = NULL;
HANDLE *startHandles = NULL;
HANDLE *doneHandles = NULL;
HANDLE *quitHandles = NULL;
Concurrency::concurrent_queue<QuatInfo> **threadQueues = NULL;
std::atomic<int> jobId;

DWORD WINAPI threadFunc( LPVOID lpParameter )
{
	SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_BELOW_NORMAL );

	int tid = (int)(uint64_t)lpParameter;
	printf( "%d: Startup thread\n", tid );

	HANDLE h[2];
	h[0] = startHandles[tid];
	h[1] = quitHandles[tid];

	int tilecnt = 0;

	while( true ) {

		DWORD rc = WaitForMultipleObjects( 2, h, FALSE, INFINITE );

		if( rc == WAIT_FAILED ) {
			printf( "%d: screw this crap\n", tid );
			return 0;
		}

		int index = rc - WAIT_OBJECT_0;

		if( index == 1 ) {
			printf( "%d: Goodbye. %d tiles rendered.\n", tid, tilecnt );
			return 0;
		}

		//        printf( "%d: Work\n", tid );

		QuatInfo tqi;
		if( !threadQueues[tid]->try_pop( tqi ) ) {
			printf( "%d: No message. Doh\n", tid );
			continue;
		}

		while( true ) {

			int id = jobId.fetch_add( 1 );
			if( id >= TOTAL_BLOCKS ) break;

			int bx = id % XBLOCKS;
			int by = id / XBLOCKS;

			int cx = bx * BLOCKW;
			int cy = by * BLOCKH;

//            printf( "%d: %d,%d\n", tid, cx, cy );
			quatBlock( tqi, cx, cy, BLOCKW, BLOCKH );

			tilecnt++;

		}

		//        printf( "%d: Frame done. %d tiles rendered.\n", tid, items );
		SetEvent( doneHandles[tid] );
	}

	return 0;
}

void scale_down_2( bmp_argb *src, bmp_argb *dst )
{
	assert( (src->h&1) == 0 );
	assert( (src->w&1) == 0 );
	assert( src->w == dst->w*2 );
	assert( src->h == dst->h*2 );

	uint32_t *src32 = (uint32_t *)src->argb;
	uint32_t *dst32 = (uint32_t *)dst->argb;

	int srcstride32 = src->stride/4;
	int dststride32 = dst->stride/4;

	for( int y = 0; y < dst->h; y++ ) {

		uint32_t *srcrow0 = src32 + y*2*srcstride32;
		uint32_t *srcrow1 = srcrow0 + srcstride32;
		uint32_t *dstrow  = dst32 + y  *dststride32;
	
		for( int x = 0; x < dst->w; x++ ) {

			uint32_t s00 = *srcrow0++;
			uint32_t s01 = *srcrow0++;
			uint32_t s10 = *srcrow1++;
			uint32_t s11 = *srcrow1++;

			int r00 = (s00>>16)&0xff; int g00 = (s00>>8)&0xff; int b00 = (s00>>0)&0xff; int a00 = (s00>>24)&0xff;
			int r01 = (s01>>16)&0xff; int g01 = (s01>>8)&0xff; int b01 = (s01>>0)&0xff; int a01 = (s01>>24)&0xff;
			int r10 = (s10>>16)&0xff; int g10 = (s10>>8)&0xff; int b10 = (s10>>0)&0xff; int a10 = (s10>>24)&0xff;
			int r11 = (s11>>16)&0xff; int g11 = (s11>>8)&0xff; int b11 = (s11>>0)&0xff; int a11 = (s11>>24)&0xff;

			int r = (r00+r01+r10+r11)>>2;
			int g = (g00+g01+g10+g11)>>2;
			int b = (b00+b01+b10+b11)>>2;
			int a = (a00+a01+a10+a11)>>2;

			*dstrow++ = (a<<24)|(r<<16)|(g<<8)|(b<<0);

		}

	}
}


void scale_up_2( bmp_argb *src, bmp_argb *dst )
{
	assert( (src->h&1) == 0 );
	assert( (src->w&1) == 0 );
	assert( src->w == dst->w/2 );
	assert( src->h == dst->h/2 );

	uint32_t *src32 = (uint32_t *)src->argb;
	uint32_t *dst32 = (uint32_t *)dst->argb;

	int srcstride32 = src->stride/4;
	int dststride32 = dst->stride/4;

	for( int y = 0; y < src->h; y++ ) {

		uint32_t *srcrow0 = src32 + y*srcstride32;
		uint32_t *dstrow0 = dst32 + y*2*dststride32;
		uint32_t *dstrow1 = dstrow0 + dststride32;
	
		for( int x = 0; x < src->w; x++ ) {

			uint32_t s00 = *srcrow0++;

			*dstrow0++ = s00;
			*dstrow0++ = s00;
			*dstrow1++ = s00;
			*dstrow1++ = s00;

		}

	}
}

// 8 start           480
// 55 shift -> 1:10  3300->4200
// 1:42              6120
// 1:57 guitar       7020
// 2:46 end, credits 

v4 postable[13] = {
	{  0.0,    0.0,     0.0,     0.0    }, // sphere 00:08 !start morph 5s
	{ -0.291, -0.399,   0.339,   0.437  }, //        00:13 times when morph done
	{ -0.213, -0.0410, -0.563,  -0.560  }, //        00:28
	{ -0.162,  0.163,   0.560,  -0.599  }, //        00:43
	{ -0.2,    0.8,     0.0,     0.0    }, //        00:58
	{ -0.445,  0.339,  -0.0889, -0.562  }, //        01:13
	{ -0.450, -0.447,   0.181,   0.306  }, //        01:28
	{ -0.137, -0.630,  -0.475,  -0.046  }, //        01:43
	{ -0.2,    0.4,    -0.4,    -0.4    }, //        01:58
	{ -1.0,    0.2,     0.0,     0.0    }, // stick  02:13
	{ -0.125, -0.256,   0.847,   0.0895 }, // doh    02:28
	{  0.0,    0.0,     0.0,     0.0    }, // sphere 02:43
	{  0.0,    0.0,     0.0,     0.0    }
};

// 7^11=1,977,326,743

void draw_outro_text( bmp_argb *dst )
{
	const char *text0[] = {
		"Music: \"Hot Pursuit\" by Kevin MacLeod (incompetech.com)",
		"Licensed under Creative Commons: By Attribution 3.0 License",
		"",
		"Quaternion based on shader code by Keenan Crane",
		"https://www.cs.cmu.edu/~kmcrane/Projects/QuaternionJulia/",
		"",
		"Spiral based on \"Look me in the eyes\" by Fabrice Neyret",
		"Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License",
		"https://www.shadertoy.com/view/ldBGDc",
		"",
		"Pictures from commons.wikimedia.org:",
		"\"Atlass\" by Ano8675309",
		"\"Ankerbrot / OBEY\" by Harald Schilly",
		"Creative Commons Attribution-Share Alike 3.0 Unported License",
		"",
		"Pictures from pixabay.com:",
		"\"Men Individual Group\" by Clker-Free-Vector-Images",
		"\"Paper Wall Structure\" by Gerd Altmann",

	};

	int text0cnt = sizeof(text0)/sizeof(text0[0]);

	int startPos = dst->h/2 - (text0cnt*FONT_HEIGHT)/2;

	for( int i = 0; i < text0cnt; i++ ) {
		int len = text_getlen( text0[i] );
		int xpos = dst->w/2 - len/2;

		text_draw( (uint32_t *)dst->argb, xpos, startPos+i*FONT_HEIGHT, dst->stride/4, text0[i], 0x00ffff00 );
	
	}

}

void draw_intro_text( bmp_argb *dst )
{
	const char *text0[] = {
		"Raytraced Quaternion Julia Fractals HQ Tech Demo",
		"",
		"Nils L. Corneliusen 2019",
		"www.ignorantus.com",
		"",
		"Rendered in 4k on an Intel i7-7820X CPU",
	};

	int text0cnt = sizeof(text0)/sizeof(text0[0]);

	int startPos = dst->h/2 - (text0cnt*FONT_HEIGHT)/2;

	for( int i = 0; i < text0cnt; i++ ) {
		int len = text_getlen( text0[i] );
		int xpos = dst->w/2 - len/2;

		text_draw( (uint32_t *)dst->argb, xpos, startPos+i*FONT_HEIGHT, dst->stride/4, text0[i], i == 0 ? 0x00ffff00 : 0x00ffffff );
	
	}

}


int main()
{
	StartCounter();

	int threads = THREADS;
	QuatInfo quat;

	quat.width = WIDTH;
	quat.height = HEIGHT;

	quat.ax  = 10.0f * WIDTH / (float)HEIGHT;
	quat.ay  = 10.0f;

	quat.start_pos = v3_set( quat.ax  * -0.5f - 0.0f,
							 quat.ay  * -0.5f - 0.0f,
							 12.0f ); // 8

	quat.rot_xz = 0.0f;
	quat.epsilon = EPSILON;

	quat.dst = bmp_alloc( WIDTH, HEIGHT );

	quat.mu_pos = postable[7];

	quat.obj_col = v3_set( 1.0f, 0.0f, 0.0f );

	jobId.store( 0 );

	if( threads ) {
		startHandles  = (HANDLE *)malloc( sizeof(HANDLE)*threads );
		quitHandles   = (HANDLE *)malloc( sizeof(HANDLE)*threads );
		doneHandles   = (HANDLE *)malloc( sizeof(HANDLE)*threads );
		threadHandles = (HANDLE *)malloc( sizeof(HANDLE)*threads );
		threadQueues  = (Concurrency::concurrent_queue<QuatInfo> **)malloc( sizeof(Concurrency::concurrent_queue<QuatInfo> *)*threads );
	}

	for( int i = 0; i < threads; i++ ) {
		startHandles[i] = CreateEvent( NULL, FALSE, FALSE, NULL );
		quitHandles[i]  = CreateEvent( NULL, FALSE, FALSE, NULL );
		doneHandles[i]  = CreateEvent( NULL, FALSE, FALSE, NULL );
		threadQueues[i] = new Concurrency::concurrent_queue<QuatInfo>;
	}

	for( int i = 0; i < threads; i++ ) {
		DWORD threadId;
		threadHandles[i] = CreateThread( NULL, 0, threadFunc, (LPVOID)(uint64_t)i, 0, &threadId );
	}

	bmp_argb *buf_atlas  = bmp_load( "c:\\quat_assets\\atlass4.bmp", true );
	bmp_argb *buf_obey   = bmp_load( "c:\\quat_assets\\obey4.bmp", true );
	bmp_argb *buf_unique = bmp_load( "c:\\quat_assets\\unique_red.bmp", true );
	bmp_argb *buf_unacc  = bmp_load( "c:\\quat_assets\\unacceptable.bmp", true );

	bmp_argb *buf_spiral = bmp_alloc( 1920, 1080 );

	bmp_argb *buf_3840 = bmp_alloc( 1920*2, 1080*2 );
	bmp_argb *buf_1920 = bmp_alloc( 1920, 1080 );
	bmp_argb *buf_960  = bmp_alloc(  960,  540 );

	bmp_argb *buf_out = bmp_alloc( 1920, 1080 );

#define PIC_TIME (15*60)

#define INTRO_LEN (8*60)

#define END_START (2*60*60+45*60)
#define END_END   (2*60*60+50*60)

//#define STARTFRAME (INTRO_LEN)
//#define ENDFRAME   (INTRO_LEN+30*4)

#define MIX_TIME (5*60)
#define HOLD_TIME (10*60)
#define MU_TIME (HOLD_TIME+MIX_TIME)

//#define STARTFRAME (480)
//#define ENDFRAME   (INTRO_LEN+4*MU_TIME)

//#define STARTFRAME (6119)
//#define ENDFRAME   (6120+10*60)

//#define STARTFRAME 0
//#define ENDFRAME (END_END)

//#define STARTFRAME (END_START-PIC_TIME-2)
#define STARTFRAME (0)
//#define ENDFRAME (END_END)
#define ENDFRAME (INTRO_LEN)

	printf( "Start: %d End: %d\n", STARTFRAME, ENDFRAME );

	for( int frame = STARTFRAME; frame < ENDFRAME; frame++ ) {
		bool quit = false;

		double t0 = 0.0, t1 = 0.0;
		char fname[80];
		sprintf( fname, "C:\\Users\\ncorn\\tmp\\out-%05d.jpg", frame );

		int rc = _kbhit();
		if( rc != 0 ) {
			int key = _getch();
			switch( key ) {
			case ' ':
				printf( "**** Pause\n" );
				_getch();
				printf( "**** Resume\n" );
				break;

			case 'q':
				printf( "**** quit\n" );
				quit = true;
				return 0;
			default: 
				break;
			}
		}

		if( quit ) break;

		quat.frame = (float)frame;

		float fr = quat.frame / 300.0f;
		quat.rot_xz = (fr-floor(fr)) * 2.0f * PI;


		if( frame < INTRO_LEN ) {

			printf( "%d\n", frame );

			if( frame >= 3*60 ) {
				spiral_draw( buf_1920, v3_set( 0.0f, 0.5f, 0.0f ), frame, false );
			} else {
				memset( buf_1920->argb, 0x00, buf_1920->stride*buf_1920->h );
			}

			draw_intro_text( buf_1920 );

			bmp_save_jpg( buf_1920, fname, false, 95 );

			continue;

		}

		int quatframe = frame-INTRO_LEN;

		int smu = quatframe/MU_TIME;

		int qfr = quatframe%MU_TIME;
		if( qfr < MIX_TIME ) {
			float wt = (qfr%MIX_TIME)/(float)(MIX_TIME);
			quat.mu_pos = v4_mix( postable[smu], postable[smu+1], wt );
		} else {
			quat.mu_pos = postable[smu+1];
		}

		if( frame >= END_START && frame <= END_END ) {

			printf( "%d\n", frame );
			spiral_draw( buf_1920, v3_set( 0.0f, 0.0f, 0.5f ), frame, false );

			draw_outro_text( buf_1920 );
			bmp_save_jpg( buf_1920, fname, false, 95 );
			continue;

		}


		for( int i = 0; i < threads; i++ ) {
			threadQueues[i]->push( quat );
		}

		t0 = GetCounter();
		for( int i = 0; i < threads; i++ ) {
			SetEvent( startHandles[i] );
		}

		WaitForMultipleObjects( threads, doneHandles, TRUE, INFINITE );
		t1 = GetCounter();

		printf( "Frame %d, time: %fms\n", frame, (t1-t0) );

		jobId.store( 0 );

		bmp_argb *quatsc = NULL;

		if( quat.dst->w != 1920 ) {

			if( quat.dst->w == 1920*2 ) {
				scale_down_2( quat.dst, buf_1920 );
				quatsc = buf_1920;
			} else if( quat.dst->w == 1920*4 ) {
				scale_down_2( quat.dst, buf_3840 );
				scale_down_2( buf_3840, buf_1920 );
				quatsc = buf_1920;
			} else if( quat.dst->w == 1920/2 ) {
				scale_up_2( quat.dst, buf_1920 );
				quatsc = buf_1920;
			} else if( quat.dst->w == 1920/4 ) {
				scale_up_2( quat.dst, buf_960 );
				scale_up_2( buf_960, buf_1920 );
				quatsc = buf_1920;
			} else {
				printf( "doh! fix your res!\n" );
				assert( 0 );
			}

		} else {
			quatsc = quat.dst;
		}

		bool blend = false;

		// Composite this crap

		// 0:30
		if( frame >= 30*60 && frame <= 30*60+PIC_TIME ) {

			int lfr = frame - (30*60);
			float wt = sin((lfr/(float)PIC_TIME)*PI);

			bmp_fade( buf_unique, buf_out, wt );

			blend = true;
		}


		// 0:55 bg picture
		if( frame >= 55*60 && frame <= 55*60+PIC_TIME ) {

			int lfr = frame - 55*60;
			float wt = sin((lfr/(float)PIC_TIME)*PI);

			bmp_fade( buf_atlas, buf_out, wt );

			blend = true;
		}

		// 1:42
		if( frame >= 1*60*60+42*60 && frame <= 1*60*60+42*60+PIC_TIME ) {

			int lfr = frame - (1*60*60+42*60);
			float wt = sin((lfr/(float)PIC_TIME)*PI);

			bmp_fade( buf_obey, buf_out, wt );

			blend = true;
		}

		// last
		if( frame >= END_START-PIC_TIME-1 && frame < END_START ) {

			int lfr = frame - (END_START-PIC_TIME-1);
			float wt = sin((lfr/(float)PIC_TIME)*PI);

			bmp_fade( buf_unacc, buf_out, wt );

			blend = true;
		}


		spiral_draw( buf_out, v3_set( 0.0f, 0.5f, 0.0f ), frame, blend );

		bmp_blend_2( quatsc, buf_out, 1.0f );

		// 1:57
		if( frame >= 1*60*60+57*60 && frame < 1*60*60+57*60+10*60 ) {
			//1,977,326,743
			const char *secret = "1,977,326,743";
			int len = text_getlen( secret );
			text_draw( (uint32_t *)buf_out->argb, buf_out->w - len, buf_out->h - FONT_HEIGHT, buf_out->stride/4, secret, 0x00ffff00 );
		}

		char buf[80];
		sprintf( buf, "Quaternion HQ Tech Demo. i7-7820X using 16 rendering threads.\n" );
		text_draw( (uint32_t *)buf_out->argb, 0, 0*FONT_HEIGHT, buf_out->stride/4, buf, 0x00ffffff );

		sprintf( buf, "mu_pos: %.4f,%.4f,%.4f,%.4f", quat.mu_pos.x, quat.mu_pos.y, quat.mu_pos.z, quat.mu_pos.w );
		text_draw( (uint32_t *)buf_out->argb, 0, 1*FONT_HEIGHT, buf_out->stride/4, buf, 0x00ffffff );

//		spiral_draw( quatsc, v3_set( 1.0f, 0.5f, 0.5f ), frame, true );

//		Sleep( 100 );

		bmp_save_jpg( buf_out, fname, false, 95 );


	}

	for( int i = 0; i < threads; i++ ) {
		SetEvent( quitHandles[i] );
	}

	WaitForMultipleObjects( threads, threadHandles, TRUE, INFINITE );

	printf( "Goodbye, Mr Bond\n" );


#if 0
	int bx = BLOCKW;
	int by = BLOCKH;

	for( int y = 0; y < qi->height; y += BLOCKW ) {

		if( y + BLOCKH > qi->height ) by = qi->height - y;

		for( int x = 0; x < qi->width; x += BLOCKH ) {

			quatBlock( x, y, bx, by );

		}

	}
#endif

	return 0;
}
