// Integer raytracer for the Mellanox TILE-Gx.
// Written by Nils Liaaen Corneliusen 2017.
// License: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication license
// Please refer to the article at http://www.ignorantus.com/pages/tilegx_integer_raytracer/
// for more information.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdbool.h>
#include <math.h>
#include <stdint.h>
#include <assert.h>
#include <limits.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/time.h>

#include <arch/cycle.h>
#include <tmc/cpus.h>
#include <tmc/task.h>
#include <tmc/udn.h>

#include <jpeglib.h>

#include "par.h"
#include "vec3.h"
#include "ivec3.h"

#define MIN(a,b) ((a)<(b)?(a):(b))
#define MAX(a,b) ((a)>(b)?(a):(b))


#define MAXREF  3
#define OBJNUM 40

#define FARDIST (255<<FRACBITS)
#define NOHIT   -1


static uint64_t get_usec( void )
{
    struct timeval tv;
    gettimeofday( &tv, NULL );
    return (((uint64_t)tv.tv_sec)*1000000)+((uint64_t)tv.tv_usec);
}

#define PI ((float)M_PI)

#define SUNPOS 0

#define WIDTH    1920
#define HEIGHT   1080
#define BLOCKX     16
#define BLOCKY     16


// This should be roughly the same as in GPURay
typedef struct {
    vec3  obj_pos;
    vec3  obj_col;
    vec3  poseye;
    float radsq;
    float rv;

    // movement stuff
    int   orbit_center; // id of object to rotate around
    float orbit_radius;
    float orbit_tilt;   // xz..yz plane tilt, -PI..0..PI
    int   orbit_speed;  // frames for one full rotation
    int   orbit_offset;
} sphere;

typedef struct {
    int largecnt;
    int smallcnt;

    vec3 light_pos;
    vec3 eye;

    sphere spheres[OBJNUM];

    int frame;

    float ax;
    float ay;

    int width, height;
    int stride;
    float swidth, sheight;
} worldinfo;


// And this is the minimum needed for integer rendering
typedef struct {
    ivec3 obj_pos;
    ivec3 obj_col;
    ivec3 poseye;
    fixed radsq;
    fixed rv;
} isphere;

typedef struct {
    isphere spheres[OBJNUM];
    ivec3   light_pos;
    ivec3   eye;
    fixed   ax;
    fixed   ay;
    fixed   swidth;
    fixed   sheight;
    int     width;
    int     height;
    int     stride;
} iworldinfo;

worldinfo world0;
worldinfo *w0;

iworldinfo iworld0;
iworldinfo *iw0;

// World control is done in floating point for simplicity

vec3 coltable[] = {
    { 1.0f, 1.0f, 0.0f },
    { 0.0f, 0.0f, 1.0f },
    { 0.0f, 1.0f, 0.0f },
    { 0.0f, 1.0f, 1.0f },
    { 1.0f, 0.0f, 1.0f },
    { 1.0f, 1.0f, 1.0f },
    { 0.0f, 0.0f, 0.0f },
    { 1.0f, 0.0f, 0.5f },
};

#define COLCNT (sizeof(coltable)/sizeof(coltable[0]))

static unsigned int colctr = 0;

static vec3 getcol( void )
{
    vec3 v;

    v = coltable[colctr++];
    if( colctr >= COLCNT ) colctr = 0;

    return v;
}


static void sphere_update( worldinfo *w, int spid, int orbit_center, float orbit_radius, float orbit_tilt, int orbit_speed, float obj_radius, vec3 obj_col, int orbit_offset )
{
    sphere *sp = &w->spheres[spid];
    sp->orbit_center = orbit_center;
    sp->orbit_radius = orbit_radius;
    sp->orbit_tilt   = orbit_tilt;
    sp->orbit_speed  = orbit_speed;
    sp->radsq        = obj_radius * obj_radius;
    sp->obj_col      = obj_col;
    sp->orbit_offset   = orbit_offset;
}

//#define RAD_SUN    2.8f
#define RAD_SUN 1.7f
#define RAD_PLANET 0.5f
#define RAD_MOON   0.2f

#define DIST_INNER (RAD_SUN+1.5f*RAD_PLANET+2.0f*RAD_MOON + 0.4f)
#define DIST_OUTER (RAD_SUN+4.0f*RAD_PLANET+2.0f*RAD_MOON + 0.5f)
#define DIST_MOON  (1.0f*RAD_PLANET+1.0f*RAD_MOON + 0.3f)

#define SPEED_INNER 415
#define SPEED_OUTER 617
#define SPEED_MOON   80

static void world_timer( worldinfo *w );

static void world_init( worldinfo *w, int width, int height, int stride )
{
    w->eye       = vec3_set(  0.0f,  0.0f, -7.0f );
    w->light_pos = vec3_set( -4.0f, -6.0f, -9.5f );

    w->width  = width;
    w->height = height;
    w->stride = stride;

    w->swidth  = 10.0f * (float)w->width / (float)w->height;
    w->sheight = 10.0f;

    w->ax = w->swidth  / (float)width;
    w->ay = w->sheight / (float)height;

    w->largecnt = OBJNUM-1;
    w->smallcnt = 0;

    printf( "Huge: 1 Large: %d Small: %d\n", w->largecnt, w->smallcnt );

    assert( w->largecnt + w->smallcnt + 1 == OBJNUM );
    assert( OBJNUM % 4 == 0 );

    float tilt       = -PI;
//    float tilt_add   = (PI*2.0f) / (float)(w->largecnt);
    float tilt_add = (PI*2.0f) / 64.0f;
    int moonsp_adj   =  0;
    int planetsp_adj =  0;
    int planetoffset = 27;
    int moonoffset   =  9;


    // sun in the center
    sphere_update( w, SUNPOS, -1, 0.0f, 0.0f, 360, RAD_SUN, getcol(), 0 );

    w->spheres[SUNPOS].obj_pos = vec3_set( 0.0f, 0.0f, 0.0f );

    // planets orbiting the sun
    for( int i = 0; i < w->largecnt; i++ ) {

        int dist  = i>=w->largecnt/2 ? DIST_OUTER : DIST_INNER;
        int speed = i>=w->largecnt/2 ? SPEED_OUTER : SPEED_INNER;
        sphere_update( w, i+1, SUNPOS, dist, tilt, speed + planetsp_adj*i, RAD_PLANET, getcol(), i*planetoffset );
        tilt += tilt_add;

    }

    // moons orbiting the planets
    tilt = -PI;

    for( int i = 0; i < w->smallcnt; i++ ) {

        sphere_update( w, w->largecnt + i + 1, i + 1, DIST_MOON, tilt, SPEED_MOON + moonsp_adj*i, RAD_MOON, getcol(), i*moonoffset );
        tilt += tilt_add;

    }

    w->frame = 0;
    world_timer( w );
    w->frame = 0;
}


static vec3 rotate( float frac, float tilt, float radius )
{
    float yv    = sinf( tilt );
    float xv    = cosf( tilt );
    float sinxy = radius*sinf( frac * (PI*2.0f) );

    return vec3_set( sinxy * xv,
                     sinxy * yv,
                     -radius*cosf( frac * (PI*2.0f) ) );
}


const vec3 cols[7] = {
    { 1.0f, 0.0f, 0.0f },
    { 1.0f, 1.0f, 0.0f },
    { 0.0f, 1.0f, 0.0f },
    { 0.0f, 1.0f, 1.0f },
    { 0.0f, 0.0f, 1.0f },
    { 1.0f, 0.0f, 1.0f },

    { 1.0f, 0.0f, 0.0f }
};

static vec3 h2rgb( float h )
{
    h = h - floorf( h );
    float Slice     = 6.0f * h;
    float SliceInt  = floorf( Slice );
    float SliceFrac = Slice - SliceInt;

    int i = (int)SliceInt;
    return vec3_mix( cols[i], cols[i+1], SliceFrac );
}


static void world_timer( worldinfo *w )
{
    for( int i = 0; i < OBJNUM; i++ ) {
        sphere *sp = &w->spheres[i];

        float frac = ((w->frame+sp->orbit_offset)%sp->orbit_speed)/(float)(sp->orbit_speed);

        vec3 rotv = rotate( frac, sp->orbit_tilt, sp->orbit_radius );

        vec3 rot_pos;

        if( sp->orbit_center == -1 )
            rot_pos = vec3_set( 0.0f, 0.0f, 0.0f );
        else
            rot_pos = w->spheres[sp->orbit_center].obj_pos;

        sp->obj_pos = vec3_add( rot_pos, rotv );

        if( i == SUNPOS )
            sp->obj_col = h2rgb( w->frame/720.0f );
        else if( i < w->largecnt )
            sp->obj_col = h2rgb( (w->frame / 300.f  + (PI + sp->orbit_tilt)/10.0f*PI) );

        sp->poseye = vec3_sub( w0->eye, sp->obj_pos );
        sp->rv     = -norm2( sp->poseye ) + sp->radsq;

    }

    // Large planet without moon always black
    w->spheres[w->largecnt].obj_col = vec3_set( 0.0f, 0.0f, 0.0f );

    // light movement
    float frac = (w->frame)/180.0f;
    w->light_pos = vec3_set( -4.0f + sinf( frac )      * 8.0f,
                           -(-6.0f + cosf( frac )      * 4.0f),
                             -9.5f + sinf( frac*2.0f ) * 3.0f );

    w->frame++;

}

static void world_to_iworld( worldinfo *w, iworldinfo *iw )
{
    for( int i = 0; i < OBJNUM; i++ ) {
        sphere  *sp  = &w->spheres[i];
        isphere *isp = &iw->spheres[i];

        isp->obj_pos = v2iv( sp->obj_pos );
        isp->poseye  = v2iv( sp->poseye );
        isp->rv      = f2i( sp->rv );
        isp->obj_col = v2iv( sp->obj_col );
        isp->radsq   = f2i( sp->radsq );

    }

    iw->light_pos = v2iv( w->light_pos );
}


#define BG_R64 0x0000000000000000
#define BG_G64 0x0000000000000000
#define BG_B64 0x0000000000000000

static __attribute__ ((noinline)) void render( uint8_t * restrict rgb, int xstart, int ystart )
{
    int rowstride = (iw0->stride*3)/8;
    uint64_t * restrict outr   = (uint64_t *)(rgb + ystart*3*iw0->stride                 + xstart);
    uint64_t * restrict outg   = (uint64_t *)(rgb + ystart*3*iw0->stride +   iw0->stride + xstart);
    uint64_t * restrict outb   = (uint64_t *)(rgb + ystart*3*iw0->stride + 2*iw0->stride + xstart);

    ivec3 startpos = ivec3_set( imul(-iw0->swidth,  1<<(FRACBITS-1) ) - iw0->eye.x + imul(xstart<<FRACBITS, iw0->ax ),
                                imul( iw0->sheight, 1<<(FRACBITS-1) ) - iw0->eye.y - imul(ystart<<FRACBITS, iw0->ay ),
                                                                      - iw0->eye.z );

    // just clear block if no hits on corners
    ivec3 spos;
    spos       = startpos;                           ivec3 ul = inormalize( spos );
    spos.y    -= imul(iw0->ay,(BLOCKY-1)<<FRACBITS); ivec3 bl = inormalize( spos );
    spos.x    += imul(iw0->ax,(BLOCKX-1)<<FRACBITS); ivec3 br = inormalize( spos );
    spos.y    += imul(iw0->ay,(BLOCKY-1)<<FRACBITS); ivec3 ur = inormalize( spos );

    int i;
    for( i = 0; i < OBJNUM; i++ ) {
        isphere *sp = &iw0->spheres[i];
        ivec3 v     = sp->poseye;
        fixed rv    = sp->rv;

        if( idotsq( v, ul ) + rv > 0 || idotsq( v, bl ) + rv > 0 ||
            idotsq( v, br ) + rv > 0 || idotsq( v, ur ) + rv > 0    ) break;
    }

    int lastrow = MIN( iw0->height, ystart+BLOCKY );

    if( i == OBJNUM ) {

        for( int y = ystart; y < lastrow; y++ ) {

            // adjust this if BLOCKX is changed
            outr[0] = BG_R64; outr[1] = BG_R64; outr += rowstride;
            outg[0] = BG_R64; outg[1] = BG_G64; outg += rowstride;
            outb[0] = BG_R64; outb[1] = BG_B64; outb += rowstride;
        }

        return;
    }

    spos.y = startpos.y;

    uint64_t r0 = 0, g0 = 0, b0 = 0;

    for( int y = ystart; y < lastrow; y++, spos.y -= iw0->ay ) {

        spos.x = startpos.x;

        for( int x = 0; x < BLOCKX; x++, spos.x += iw0->ax ) {

            ivec3 ray_pos = iw0->eye;
            ivec3 ray_dir = inormalize( spos );

            ivec3 col = { 0, 0, 0 };

            for( int j = 0; j < MAXREF; j++ ) {

                int   rt_hit  = NOHIT;
                fixed rt_dist = FARDIST;

                if( j == 0 ) {

                    for( int i = 0; i < OBJNUM; i += 4 ) {

                        fixed b0 = idot( iw0->spheres[i+0].poseye, ray_dir );
                        fixed b1 = idot( iw0->spheres[i+1].poseye, ray_dir );
                        fixed b2 = idot( iw0->spheres[i+2].poseye, ray_dir );
                        fixed b3 = idot( iw0->spheres[i+3].poseye, ray_dir );
                        fixed d0 = imul( b0, b0 ) + iw0->spheres[i+0].rv;
                        fixed d1 = imul( b1, b1 ) + iw0->spheres[i+1].rv;
                        fixed d2 = imul( b2, b2 ) + iw0->spheres[i+2].rv;
                        fixed d3 = imul( b3, b3 ) + iw0->spheres[i+3].rv;

                        if( d0 <= 0 && d1 <= 0 && d2 <= 0 && d3 <= 0 ) continue;

                        if( d0 > 0 ) {
                            fixed t0 = -b0 - isqrt( d0 );
                            if( t0 > 0 && t0 < rt_dist ) {
                                rt_dist = t0;
                                rt_hit  = i + 0;
                            }
                        }

                        if( d1 > 0 ) {
                            fixed t1 = -b1 - isqrt( d1 );
                            if( t1 > 0 && t1 < rt_dist ) {
                                rt_dist = t1;
                                rt_hit  = i + 1;
                            }

                        }

                        if( d2 > 0 ) {
                            fixed t2 = -b2 - isqrt( d2 );
                            if( t2 > 0 && t2 < rt_dist ) {
                                rt_dist = t2;
                                rt_hit  = i + 2;
                            }
                        }

                        if( d3 > 0 ) {
                            fixed t3 = -b3 - isqrt( d3 );
                            if( t3 > 0 && t3 < rt_dist ) {
                                rt_dist = t3;
                                rt_hit  = i + 3;
                            }

                        }

                    }

                } else {

                    for( int i = 0; i < OBJNUM; i += 4 ) {

                        ivec3 v0 = ivec3_sub( ray_pos, iw0->spheres[i+0].obj_pos );
                        ivec3 v1 = ivec3_sub( ray_pos, iw0->spheres[i+1].obj_pos );
                        ivec3 v2 = ivec3_sub( ray_pos, iw0->spheres[i+2].obj_pos );
                        ivec3 v3 = ivec3_sub( ray_pos, iw0->spheres[i+3].obj_pos );
                        fixed b0 = idot( v0, ray_dir );
                        fixed b1 = idot( v1, ray_dir );
                        fixed b2 = idot( v2, ray_dir );
                        fixed b3 = idot( v3, ray_dir );
                        fixed d0 = imul( b0, b0 ) - inorm2( v0 ) + iw0->spheres[i+0].radsq;
                        fixed d1 = imul( b1, b1 ) - inorm2( v1 ) + iw0->spheres[i+1].radsq;
                        fixed d2 = imul( b2, b2 ) - inorm2( v2 ) + iw0->spheres[i+2].radsq;
                        fixed d3 = imul( b3, b3 ) - inorm2( v3 ) + iw0->spheres[i+3].radsq;

                        if( d0 <= 0 && d1 <= 0 && d2 <= 0 && d3 <= 0 ) continue;

                        if( d0 > 0 ) {
                            fixed t0 = -b0 - isqrt( d0 );
                            if( t0 > 0 && t0 < rt_dist ) {
                                rt_dist = t0;
                                rt_hit  = i + 0;
                            }
                        }

                        if( d1 > 0 ) {
                            fixed t1 = -b1 - isqrt( d1 );
                            if( t1 > 0 && t1 < rt_dist ) {
                                rt_dist = t1;
                                rt_hit  = i + 1;
                            }

                        }

                        if( d2 > 0 ) {
                            fixed t2 = -b2 - isqrt( d2 );
                            if( t2 > 0 && t2 < rt_dist ) {
                                rt_dist = t2;
                                rt_hit  = i + 2;
                            }
                        }

                        if( d3 > 0 ) {
                            fixed t3 = -b3 - isqrt( d3 );
                            if( t3 > 0 && t3 < rt_dist ) {
                                rt_dist = t3;
                                rt_hit  = i + 3;
                            }

                        }
                    }

                }

                if( rt_hit == NOHIT ) break;

                ray_pos  = ivec3_add( ray_pos, ivec3_scale( ray_dir, rt_dist ) );
                ivec3 n  = inormalize( ivec3_sub( ray_pos, iw0->spheres[rt_hit].obj_pos ) );
                ivec3 l  = inormalize( ivec3_sub( iw0->light_pos, ray_pos ) );

                fixed diffuse  = imax( idot( n, l ), 0 );
                fixed specular = idot( ray_dir, ivec3_sub( l, ivec3_scale( n, imul( diffuse, 2<<FRACBITS ) ) ) );
                      specular = ipower_spec( imax( specular, 0 ) );

                col = ivec3_add( col, ivec3_add1( ivec3_scale( iw0->spheres[rt_hit].obj_col, diffuse  ), specular ) );

                ray_dir = ivec3_sub( ray_dir, ivec3_scale( n, imul( idot( ray_dir, n ), 2<<FRACBITS ) ) );
            } // j refl

            // grab enough bits to catch overflow
            r0 |= imin( __insn_bfextu( col.x, FRACBITS-8, FRACBITS+8 ), 255 ); r0 = __insn_rotli( r0, 56 );
            g0 |= imin( __insn_bfextu( col.y, FRACBITS-8, FRACBITS+8 ), 255 ); g0 = __insn_rotli( g0, 56 );
            b0 |= imin( __insn_bfextu( col.z, FRACBITS-8, FRACBITS+8 ), 255 ); b0 = __insn_rotli( b0, 56 );

            if( (x&7) == 7 ) {
                outr[x/8] = r0; r0 = 0;
                outg[x/8] = g0; g0 = 0;
                outb[x/8] = b0; b0 = 0;
            }

        } // x

        outr += rowstride;
        outg += rowstride;
        outb += rowstride;

    } // y

}


static bool writejpg( char *fname, uint8_t *src, int width, int height, int stride, bool flip, int quality )
{
    struct jpeg_compress_struct cinfo;
    struct jpeg_error_mgr jerr;
    JSAMPROW row_pointer[1];

    printf( "Saving jpg %s %d*%d\n", fname, width, height );

    cinfo.err = jpeg_std_error( &jerr );

    jpeg_create_compress( &cinfo );

    FILE *outfile = fopen( fname, "wb" );
    if( outfile == NULL ) {
        printf( "Error: Could not create ouput file %s\n", fname );
        return false;
    }

    jpeg_stdio_dest(&cinfo, outfile);

    cinfo.image_width      = width;
    cinfo.image_height     = height;
    cinfo.input_components = 3;
    cinfo.in_color_space   = JCS_RGB;

    jpeg_set_defaults( &cinfo );

    jpeg_set_quality( &cinfo, quality, TRUE );

    // use yuv444 for better quality, much slower
    cinfo.comp_info[0].v_samp_factor = 1;
    cinfo.comp_info[0].h_samp_factor = 1;

    jpeg_start_compress( &cinfo, TRUE );

    uint8_t *row = malloc( width * 3 );

    while( cinfo.next_scanline < cinfo.image_height ) {

        int dy = flip ? height-1-cinfo.next_scanline : cinfo.next_scanline;

        uint8_t *srcr = src + stride * 3 * dy;
        uint8_t *srcg = src + stride * 3 * dy + stride;
        uint8_t *srcb = src + stride * 3 * dy + stride + stride;

        for( int i = 0; i < width*3; i += 3 ) {
            uint8_t r = *srcr++;
            uint8_t g = *srcg++;
            uint8_t b = *srcb++;
            row[i+0] = r;
            row[i+1] = g;
            row[i+2] = b;
        }

        row_pointer[0] = row;
        jpeg_write_scanlines( &cinfo, row_pointer, 1 );
    }

    jpeg_finish_compress( &cinfo );

    free( row );
    fclose( outfile );

    jpeg_destroy_compress( &cinfo );

    return true;
}


static void draw1bar( uint8_t *dst, int count, uint8_t val1, uint8_t val2 )
{
    int x;
    for( x = 0; x < count; x++ )
        *dst++ = val1;
    for( ; x < WIDTH; x++ )
        *dst++ = val2;
}
static void drawbar( uint8_t *dst, int load )
{
    uint8_t * restrict outr   = dst;
    uint8_t * restrict outg   = dst +   w0->stride;
    uint8_t * restrict outb   = dst + 2*w0->stride;

    for( int y = 0; y < 8; y++ ) {
        draw1bar( outr, load, 0xff, 0x00 );
        draw1bar( outg, load, 0x00, 0xff );
        draw1bar( outb, load, 0x00, 0x00 );

        outr += 3*w0->stride;
        outg += 3*w0->stride;
        outb += 3*w0->stride;
    }

}

// Parallel raytracer
typedef struct {
    int x;
    int y;
    uint8_t *dst;
} raytracerdata;

static void raytracer_func( void *data, unsigned int len )
{
    (void)len;
    raytracerdata *rd = data;

    render( rd->dst, rd->x, rd->y );
}

int staticframe = -1;

#define LOADS 16

static void render_controller( int rw, int rh, bool savepics, int frames, int timeskip )
{
    uint64_t total = 0;
    uint8_t *dst = NULL;

    float loadlist[LOADS];
    int loadpos = 0;

    for( int i = 0; i < LOADS; i++ ) {
        loadlist[i] = 0.0f;
    }

    int time_now  = 0;
    int time_quit = frames;

    dst = malloc( rw * rh * 3 );

    world_init( w0, rw, rh, (rw+63)&~63 );

    // Copy fixed stuff to integer world
    iw0->eye     = v2iv( w0->eye );
    iw0->swidth  = f2i( w0->swidth );
    iw0->sheight = f2i( w0->sheight );
    iw0->ax      = f2i( w0->ax );
    iw0->ay      = f2i( w0->ay );
    iw0->width   = w0->width;
    iw0->height  = w0->height;
    iw0->stride  = w0->stride;


    if( staticframe != -1 ) {
        for( int i = 0; i < staticframe; i++ )
            world_timer( w0 );
    }


    while( time_now < time_quit ) {

        uint64_t t0 = get_usec();

        // convert world data to fixed point
        world_to_iworld( w0, iw0 );

        // start jobs
        raytracerdata rd;

        rd.dst = dst;

        for( int y = 0; y < rh; y += BLOCKY ) {

            rd.y = y;

            for( int x = 0; x < rw; x += BLOCKX ) {

                rd.x = x;

                par_sendjob( raytracer_func, &rd, sizeof(rd) );

            }

        }

        // do timer while last jobs are wrapping up
        if( staticframe == -1 ) {
            for( int i = 0; i < timeskip; i++ ) {
                world_timer( w0 );
            }
        }

        // wait for all done
        par_wait();

        uint64_t t1 = get_usec();

        if( time_now%30 == 0 ) printf( "%lu\n", t1-t0 );

        uint64_t frametime = t1-t0;
        total += frametime;

        loadlist[loadpos++] =  (frametime*100.0f)/(1000000.0f/60.0f);
        if( loadpos >= LOADS ) loadpos = 0;

        float loadsum = 0.0f;
        for( int i = 0; i < LOADS; i++ ) {
            loadsum += loadlist[i];
        }

        float load = loadsum / LOADS;
        if( load > 100.0f ) load = 100.0f;

//        if( time_now > LOADS ) printf( "%f\n", load );

        drawbar( dst, (int)(load*((float)WIDTH/100.0f)) );

        if( savepics ) {

            char fname[80];
            sprintf( fname, "pic%04d.jpg", time_now );
            writejpg( fname, dst, w0->width, w0->height, w0->width, false, 90 );

        }

        time_now++;
    }

    printf( "total ms: %f\n", total/1000.0f );
    printf( "average frame time: %f\n", ((float)(total)/(float)(time_quit))/1000.0f );

}



static void print_usage( const char *exec_name )
{
    printf( "Usage: %s [OPTIONS]\n\n", exec_name );
    printf( "Options:\n" );
    printf( "-m          Use predefined measurement config\n" );
    printf( "-s          Save output\n" );
    printf( "-g frame    Render static frame\n" );
    printf( "-f frames   Number of frames\n" );
    printf( "-t skip     Time to skip between frames\n" );
    printf( "-w cores    Number of cores to run on\n" );
}


int main( int argc, char *argv[] )
{
    int frames      = INT_MAX;
    int cpus        = 35;
    int timeskip    = 1;
    bool savepics   = false;
    bool measurecfg = false;

    int w = WIDTH;
    int h = HEIGHT;
    int rc;

    int opt;

    w0  = &world0;
    iw0 = &iworld0;

    while( (opt = getopt( argc, argv, "hmsg:f:t:w:" ) ) != -1 ) {
        switch( opt ) {
        default:
        case 'h':
            print_usage(argv[0]);
            return 0;
        case 's': savepics   = true;           break;
        case 'm': measurecfg = true;           break;
        case 'g': staticframe= atoi( optarg ); break;
        case 'f': frames     = atoi( optarg ); break;
        case 't': timeskip   = atoi( optarg ); break;
        case 'w': cpus       = atoi( optarg ); break;
        }
    }

    if( measurecfg ) {
        printf( "Measurement config being used\n" );
        frames   = 900;
        cpus     = 35;
        timeskip = 1;
        savepics = false;
    }

    printf( "Frames:    %d\n",    frames );
    printf( "Cores:     %d\n",    cpus );
    printf( "Blocksize: %d*%d\n", BLOCKX, BLOCKY );
    printf( "Timeskip:  %d\n",    timeskip );
    printf( "Output:    %d*%d\n", w, h );

    rc = par_init( cpus );
    if( rc != 0 ) return rc;

//    par_measure_start();
    
    render_controller( w, h, savepics, frames, timeskip );

//    par_measure_stop();
//    par_measure_report();

    par_shutdown();

    return 0;
}
