// Raytraced Quaternion Julia Sets.
// Written by Nils Liaaen Corneliusen 2019.
// https://www.ignorantus.com
// License: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication license

#include <stdio.h>
#include <math.h>
#include <iostream>
#include <atomic>

#include <Windows.h>

#include "bmp.h"
#include "vec.h"
#include "timer.h"
#include "quat.h"
#include "font.h"



#if 0
const v3 cols[7] = {
    { 1.0f, 0.0f, 0.0f },
    { 1.0f, 1.0f, 0.0f },
    { 0.0f, 1.0f, 0.0f },
    { 0.0f, 1.0f, 1.0f },
    { 0.0f, 0.0f, 1.0f },
    { 1.0f, 0.0f, 1.0f },
    { 1.0f, 0.0f, 0.0f }
};
#else
const v3 cols[7] = {
    { 1.0f, 0.0f, 0.0f },
    { 1.0f, 0.5f, 0.0f },
    { 1.0f, 0.0f, 0.0f },
    { 1.0f, 0.5f, 0.0f },
    { 1.0f, 0.0f, 0.0f },
    { 1.0f, 0.5f, 0.0f },
    { 1.0f, 0.0f, 0.0f }
};
#endif

static v3 h2rgb( float h )
{
    h = h - floor( h );
    float Slice     = 5.0f * h;
    float SliceInt  = floor( Slice );
    float SliceFrac = Slice - SliceInt;

    int i = (int)SliceInt;
    return v3_mix( cols[i], cols[i+1], SliceFrac );
}


static v4 quatMult( v4 q1, v4 q2 )
{
    v4 r;

    r.x   = q1.x * q2.x - v3_dot( v3_set( q1.y, q1.z, q1.w ), v3_set( q2.y, q2.z, q2.w ) );

    v3 m0 = v3_mul1( v3_set(q2.y,q2.z,q2.w), q1.x );
    v3 m1 = v3_mul1( v3_set(q1.y,q1.z,q1.w), q2.x );
    v3 m2 = v3_mul(  v3_set(q1.z,q1.w,q1.y), v3_set(q2.w,q2.y,q2.z) );
    v3 m3 = v3_mul(  v3_set(q1.w,q1.y,q1.z), v3_set(q2.z,q2.w,q2.y) );

    v3 r0 = v3_sub( v3_add( v3_add( m0, m1 ), m2 ), m3 );

    r = v4_set( r.x, r0.x, r0.y, r0.z );

    return r;
}

static v4 quatSq( v4 q )
{
    v4 r;
    v3 qyzw = v3_set( q.y, q.z, q.w );
    r.x   = q.x * q.x - v3_dots( qyzw );
    v3 r3 = v3_mul1( qyzw, q.x * 2.0f );
    r = v4_set( r.x, r3.x, r3.y, r3.z );
    return r;
}

static v4 iterateIntersect( v4 *q1, v4 c )
{
    bool r0,  r1;
    v4 q0;
    v4 qp0, qp1;

    // reduce for qp1=vec4( 1.0f, 0.0f, 0.0f, 0.0f )
    qp0 = v4_mul1(          *q1,        2.0f );  q0 = v4_add( quatSq( *q1 ), c );
    qp1 = v4_mul1( quatMult( q0, qp0 ), 2.0f ); *q1 = v4_add( quatSq(  q0 ), c );
    r0  = v4_dots(  q0 ) > ESCAPE_THRESHOLD;
    r1  = v4_dots( *q1 ) > ESCAPE_THRESHOLD;
    if( r0 || r1 ) {
        *q1  = r0 ?  q0 : *q1;
        return r0 ? qp0 : qp1;
    }

    // unroll 2 to reduce stalls
    for( int i = 0; i < ITERATIONS_INTER-2; i += 2 ) {
        qp0 = v4_mul1( quatMult( *q1, qp1 ), 2.0f );  q0 = v4_add( quatSq( *q1 ), c );
        qp1 = v4_mul1( quatMult(  q0, qp0 ), 2.0f ); *q1 = v4_add( quatSq(  q0 ), c );

        r0  = v4_dots(  q0 ) > ESCAPE_THRESHOLD;
        r1  = v4_dots( *q1 ) > ESCAPE_THRESHOLD;
        if( r0 || r1 ) break;
    }

    *q1   = r0 ? q0  : *q1;
    return r0 ? qp0 : qp1;
}

static float intersectQJulia( v3 *rO, v3 rD, v4 c, float epsilon )
{
    float dist;

    while( true ) {
        v4 z  = v4_set( rO->x, rO->y, rO->z, 0.0f );
        v4 zp = iterateIntersect( &z, c );

        float normZ = v4_length( z );
        dist = 0.5f * normZ * log( normZ ) / v4_length( zp );

        *rO = v3_add( *rO, v3_mul1( rD, dist ) );

        if( dist < epsilon || v3_dots( *rO ) > BOUNDING_RADIUS_2 ) break;
    }

    return dist;
}


static v3 normEstimate( v3 qP, v4 c )
{
    v4 gx1 = v4_set( qP.x, qP.y, qP.z, 0.0f ); gx1.x -= DEL;
    v4 gx2 = v4_set( qP.x, qP.y, qP.z, 0.0f ); gx2.x += DEL;
    v4 gy1 = v4_set( qP.x, qP.y, qP.z, 0.0f ); gy1.y -= DEL;
    v4 gy2 = v4_set( qP.x, qP.y, qP.z, 0.0f ); gy2.y += DEL;
    v4 gz1 = v4_set( qP.x, qP.y, qP.z, 0.0f ); gz1.z -= DEL;
    v4 gz2 = v4_set( qP.x, qP.y, qP.z, 0.0f ); gz2.z += DEL;

    // reduce iteration 0 where z=0
    float dyz = v2_dots( v2_set( qP.y, qP.z ) );
    float qx  = qP.x * qP.x;
    float qx2 = 2.0f * qP.x;

    gx1 = v4_add( v4_set( gx1.x * gx1.x - dyz,                 2.0f * gx1.x * qP.y, 2.0f * gx1.x * qP.z,  0.0f ), c );
    gx2 = v4_add( v4_set( gx2.x * gx2.x - dyz,                 2.0f * gx2.x * qP.y, 2.0f * gx2.x * qP.z,  0.0f ), c );
    gy1 = v4_add( v4_set( qx - v2_dots( v2_set(gy1.y,gy1.z) ), qx2 * gy1.y,         qx2 * gy1.z,          0.0f ), c );
    gy2 = v4_add( v4_set( qx - v2_dots( v2_set(gy2.y,gy2.z) ), qx2 * gy2.y,         qx2 * gy2.z,          0.0f ), c );
    gz1 = v4_add( v4_set( qx - v2_dots( v2_set(gz1.y,gz1.z) ), qx2 * gz1.y,         qx2 * gz1.z,          0.0f ), c );
    gz2 = v4_add( v4_set( qx - v2_dots( v2_set(gz2.y,gz2.z) ), qx2 * gz2.y,         qx2 * gz2.z,          0.0f ), c );

    for( int i = 0; i < ITERATIONS_NORM-1; i++ ) {
        gx1 = v4_add( quatSq( gx1 ), c );
        gx2 = v4_add( quatSq( gx2 ), c );
        gy1 = v4_add( quatSq( gy1 ), c );
        gy2 = v4_add( quatSq( gy2 ), c );
        gz1 = v4_add( quatSq( gz1 ), c );
        gz2 = v4_add( quatSq( gz2 ), c );
    }

    return v3_normalize( v3_set( v4_length( gx2 ) - v4_length( gx1 ),
                                 v4_length( gy2 ) - v4_length( gy1 ),
                                 v4_length( gz2 ) - v4_length( gz1 ) ) );
}

static v3 Phong( v3 light, v3 eye, v3 pt, v3 N, v3 obj_col, float frame )
{
    v3  L  = v3_normalize( v3_sub( light, pt ) );
    v3  E  = v3_normalize( v3_sub( eye  , pt ) );
    float NL = v3_dot( N, L );
    
    v3 R = v3_sub( L, v3_mul1( N, NL * 2.0f ) );

    float diffuse = max( NL, 0.0f );

//	v3 d = v3_mul1( obj_col, diffuse );

//	if( d.x < 4.0f/255.0f ) return d;

    return v3_add1( v3_mul1( obj_col, diffuse ), pow( max( v3_dot( E, R ), 0.0f ), 8.0f ) );

//    return v3_add1( v3_mul1( obj_col, diffuse ), pow( max( v3_dot( E, R ), 0.0f ), 8.0f ) ); // orig

//	if( N.z < 0.0f ) N.z = 0.0f;
//	return v3_add1( v3_mul1( h2rgb( (N.z/2.0f)+(frame/30.0f) ), diffuse ), pow( max( v3_dot( E, R ), 0.0f ), 8.0f ) );
//    return v3_mul1( obj_col, diffuse );
}

static v3 rotate_xz( float frac, float radius )
{
    return v3_set( radius*sin( frac ), 0.0f, -radius*cos( frac ) );
}

static v4 quatPixel( QuatInfo &tqi, float fx, float fy )
{
    v4 outColor = v4_set( 0.0f, 0.0f, 0.0f, 0.0f );

    v3 rD = v3_normalize( v3_set( (tqi.start_pos.x + tqi.ax * fx),
                                  (tqi.start_pos.y + tqi.ay * fy),
                                  (tqi.start_pos.z                    ) ) );

    // was probably not a good idea to hardcode this
    float angle = atan( rD.x ) * (2.0f/16.0f);

    v3 rO = rotate_xz( tqi.rot_xz,         22.0f ); v3 eye_pos = rO;
    v3 xz = rotate_xz( tqi.rot_xz + angle,  8.0f );

    rD = v3_normalize( v3_set( -xz.x, rD.y, -xz.z ) ); v3 eye_dir = rD;

    v3 light = rO;

    // inline intersect
    float b0 = v3_dot( rO, rD );
    float d0 = b0 * b0 - v3_dots( rO ) + BOUNDING_RADIUS_2;

    // redundant at correct zoom level
    if( d0 <= 0.0f ) {
//        outColor = getBG();
        outColor = v4_set( 0.0f, 0.0f, 1.0f, 0.0f );
        return outColor;
    }

    float t0 = -b0 - sqrtf( d0 );

    rO = v3_add( rO, v3_mul1( rD, t0 ) );

	v3 sphere_pos = rO;

    float dist = intersectQJulia( &rO, rD, tqi.mu_pos, tqi.epsilon );

	if( dist > tqi.epsilon ) return outColor;

//	dist *= 1.0f/EPSILON;
//	return v3_set( dist, dist, dist );

    v3 N = normEstimate( rO, tqi.mu_pos );

	v3 Ph = Phong( light, rD, rO, N, tqi.obj_col, (float)tqi.frame );

	outColor = v4_set( Ph.x, Ph.y, Ph.z, 1.0f );

    return outColor;

}

float clampf( float v )
{
    return v < 0.0f ? 0.0f : v > 1.0f ? 1.0f : v;
}

void quatBlock( QuatInfo &tqi, int ulx, int uly, int w, int h )
{
    uint32_t *dst32 = (uint32_t *)tqi.dst->argb;
    int stride32 = tqi.dst->stride/4;

//    bool hackit = true;

	int y;
    for( y = uly; y < uly+h && y < tqi.height; y++ ) {

        uint32_t *dstrow = dst32 + y*stride32 + ulx;

        for( int x = ulx; x < ulx+w; x++ ) {

			uint32_t srcpixel8 = *dstrow;
			float srcr = ((srcpixel8>>16)&0xff)/255.0f;
			float srcg = ((srcpixel8>> 8)&0xff)/255.0f;
			float srcb = ((srcpixel8>> 0)&0xff)/255.0f;
			v3 srcpixel = v3_set( srcr, srcg, srcb );

            v4 dstpixel = quatPixel( tqi, x/(float)tqi.width, y/(float)tqi.height );

			v3 d = v3_set( clampf( dstpixel.x ), clampf( dstpixel.y ), clampf( dstpixel.z ) );

			v3 pixel = v3_mix( srcpixel, d, dstpixel.w );

            int r = (int)(pixel.x * 255.0f);
            int g = (int)(pixel.y * 255.0f);
            int b = (int)(pixel.z * 255.0f);
            int a = (int)(dstpixel.w > 0.0f ? 255 : 0);
            uint32_t pix = (a<<24)|(r<<16)|(g<<8)|(b<<0);

//			if( pix != 0 ) hackit = false;

            *dstrow++ = pix;

        }
    }
/*
	if( hackit ) {
        uint32_t randcol = ((rand()&0xff)<<16)|((rand()&0xff)<<8)|((rand()&0xff)<<0);
        for( int yy = uly; yy < uly+h && yy < tqi.height; yy++ ) {
            uint32_t *dstrow = dst32 + yy*stride32 + ulx;
            for( int xx = ulx; xx < ulx+w; xx++ ) {
                *dstrow++ = randcol;
            }
        }
    }
*/
}

