﻿#include "quadexport.h"
#include "scenefile.h"
#include "platform.h"
#include "sys_log.h"
#include "sys_vr.h"
#include "sys_quadstore.h"
#include "sys_render.h"
#include "imgload.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <xmmintrin.h>
#include <emmintrin.h>

struct yr_quad_exporter
{
	yrSceneFile*	sf;
	char*			folder;

	yrThread		t;
	yrEvent			t_finished;
	yrEvent			t_cancel;
	enum QEStatus	t_status;

	float			range;
	enum QEColor	bgcolor;
	int				bgimage;

	size_t			total;
	size_t			processed;
	size_t			exported;
	size_t			ignored;
	size_t			failed;
};

static void export_threadfunc(void* p);

yrQuadExporter* yrQuadExport_init(const char* file, const char* folder, float range, enum QEColor bgcolor, int bgimage)
{
	yrQuadExporter* out = calloc(1, sizeof(yrQuadExporter));	if(!out) {yrLog(0, "Out of memory"); return NULL;}
	out->folder = _strdup(folder);								if(!out->folder) {yrLog(0, "Out of memory"); goto onerror;}
	out->range = range;
	out->bgcolor = bgcolor;
	out->bgimage = bgimage;
	out->total = 1; //avoid divide by zero

	out->sf = yrSceneFile_open(file, 0);						if(!out->sf) goto onerror;
	out->t_finished = yrEvent_create();							if(!out->t_finished) goto onerror;
	out->t_cancel = yrEvent_create();							if(!out->t_cancel) goto onerror;
	out->t = yrThread_create(export_threadfunc, out);			if(!out->t) goto onerror;

	return out;
onerror:
	yrQuadExport_destroy(out);
	return NULL;
}

void yrQuadExport_destroy(yrQuadExporter* e)
{
	if(e->t) {
		yrEvent_set(e->t_cancel, 1);
		yrThread_join(e->t, 500000);
	}
	if(e->t_cancel)		yrEvent_destroy(e->t_cancel);
	if(e->t_finished)	yrEvent_destroy(e->t_finished);
	if(e->sf)			yrSceneFile_close(e->sf);
	free(e->folder);
	free(e);
}

enum QEStatus yrQuadExport_status(yrQuadExporter* e, size_t* total, size_t* processed, size_t* exported, size_t* ignored, size_t* failed)
{
	//copy values
	*total = e->total;
	*processed = e->processed;
	*exported = e->exported;
	*ignored = e->ignored;
	*failed = e->failed;

	//check thread status
	if(0 == yrEvent_wait(e->t_finished, 0)) {
		return e->t_status;
	} else {
		return qesBusy;
	}
}

void yrQuadExport_cancel(yrQuadExporter* e)
{
	//signal thread to stop
	yrEvent_set(e->t_cancel, 1);
}

#define BG_MAX_TEX 125
static int rangecheck(yrQuad* q, vec4i pos, float range)
{
	if(range <= 0.0f) return 1;
	vec4i v3 = vec4i_sub(vec4i_add(q->v[1], q->v[2]), q->v[0]);
	float len0 = vec3f_length(vec4f_mul(1.0f/YR_ACCURACY, vec4f_from_vec4i(vec4i_sub(q->v[0], pos))));
	float len1 = vec3f_length(vec4f_mul(1.0f/YR_ACCURACY, vec4f_from_vec4i(vec4i_sub(q->v[1], pos))));
	float len2 = vec3f_length(vec4f_mul(1.0f/YR_ACCURACY, vec4f_from_vec4i(vec4i_sub(q->v[2], pos))));
	float len3 = vec3f_length(vec4f_mul(1.0f/YR_ACCURACY, vec4f_from_vec4i(vec4i_sub(v3, pos))));
	return (len0 < range ||
			len1 < range ||
			len2 < range ||
			len3 < range);
}
static int quad2image(unsigned* out_w, unsigned* out_h, unsigned char** out_data, yrQuad* q, uint32_t tiles[64], yrSceneFile* sf, size_t bglen, unsigned char* bgpng);
static void export_threadfunc(void* p)
{
	yrQuadExporter* e = (yrQuadExporter*) p;
	int nocancel = 1;
	int err;
	e->t_status = qesBusy;

	//load backgrounds
	unsigned bg_count;
	size_t* bg_size;
	unsigned char* bg_blob;
	uint32_t bg_last_id;
	uint32_t bg_id_map[BG_MAX_TEX];
	err = yrSceneFile_backgrounds_get(e->sf, &bg_count, &bg_size, &bg_blob, &bg_last_id, bg_id_map);
	if(err) {
		e->t_status = qesInitFailed;
		yrEvent_set(e->t_finished, 1);
		return;
	}

	//cold read last position
	vec4i vr_last_pos = yrVR_cold_get_coord_offset(e->sf);
	vr_last_pos.z += 165*YR_ACCURACY/100; //add 1.65m to approximate head position

	//init cold iterate of quads
	yrQuad_cold_iter* qi = yrQuad_cold_iter_init(e->sf, &e->total);
	if(!qi) {
		free(bg_size);
		free(bg_blob);
		e->t_status = qesInitFailed;
		yrEvent_set(e->t_finished, 1);
		return;
	}

	//filename
	const char fnamepart[] = "/VBoard_4294967296.png";
	size_t folderlen = strlen(e->folder);
	char* fnamebuf = malloc(folderlen + sizeof(fnamepart));
	if(!fnamebuf) {
		yrLog(0, "Out of memory");
		free(bg_size);
		free(bg_blob);
		e->t_status = qesInitFailed;
		yrEvent_set(e->t_finished, 1);
		return;
	}
	strcpy(fnamebuf, e->folder);
	strcat(fnamebuf, fnamepart);
	size_t fnameoff = folderlen + 8;

	//iterate quads && not cancel
	yrQuad q;
	uint32_t q_bg;
	uint32_t q_tiles[64];
	while((0 == yrQuad_cold_iter_next(qi, &q, &q_bg, q_tiles)) &&
		  (nocancel = yrEvent_wait(e->t_cancel, 0)))
	{
		if(rangecheck(&q, vr_last_pos, e->range)) {
			//color
			if(e->bgcolor == qecTransparent)	q.color = 0x00000000ul;
			if(e->bgcolor == qecWhite)			q.color = 0xFFFFFFFFul;

			//background image
			size_t qbglen = 0;
			unsigned char* qbgdata = bg_blob;
			if(e->bgimage) {
				for(unsigned b = 0; b < bg_count; ++b) {
					if(bg_id_map[b] == q_bg) {
						qbglen = bg_size[b];
						break;
					}
					qbgdata += bg_size[b];
				}
			}
			if(!qbglen) qbgdata = NULL;

			//actual export
			unsigned w,h;
			unsigned char* img;
			err = quad2image(&w, &h, &img, &q, q_tiles, e->sf, qbglen, qbgdata);
			if(err) {e->failed += 1; e->processed += 1; continue;}

			//encode to png
			unsigned char* png;
			size_t pnglen;
			const char* errtxt = yrImgLoad_encode(&pnglen, &png, w, h, img);
			free(img);
			if(errtxt) {
				yrLog(0, "Whiteboard export failed: %s", errtxt);
				e->failed += 1;
				e->processed += 1;
				continue;
			}

			//write to file
			sprintf(fnamebuf + fnameoff, "%u.png", q.id);
			yrFile* f = yrFile_open(fnamebuf, yrF_create | yrF_write);
			size_t rw = yrFile_write(f, (int64_t) pnglen, png);
			yrFile_close(f);
			free(png);
			if(rw != pnglen) {e->failed += 1; e->processed += 1; continue;}
			
			//done
			e->exported += 1;
		} else {
			e->ignored += 1;
		}
		e->processed += 1;
	}
	//done
	free(fnamebuf);
	free(bg_size);
	free(bg_blob);
	e->t_status = nocancel ? (e->failed ? qesFailed : qesFinished) : qesCancelled;
	yrEvent_set(e->t_finished, 1);
}


//sRGB to Linear
/*	Condition			Value
	0 ≤ S ≤ 0.04045		L = S/12.92
	0.04045 < S ≤ 1		L = ((S+0.055)/1.055)^2.4
*/
//Linear to sRGB
/*	Condition			Value
	0 ≤ L ≤ 0.0031308	S = L*12.92
	0.0031308 < L ≤ 1	S = 1.055*L^(1/2.4) − 0.055
*/

typedef union {
	__m128 v;
	struct {
		float r;
		float g;
		float b;
		float a;
	};
} px_linear;

typedef union {
	uint32_t v;
	struct {
		uint8_t r;
		uint8_t g;
		uint8_t b;
		uint8_t a;
	};
} px_srgb;

static float srgb2linear(uint8_t in)
{
	if(in > 10) {
		return powf(in*(1/(255.0f*1.055f)) + 0.055f/1.055f, 2.4f);
	} else {
		return in/(255.0f * 12.92f);
	}
}

static uint8_t linear2srgb(float in)
{
	float out;
	if(in > 0.0031308f) {
		out = powf(in, 1.0f/2.4f)*1.055f - 0.055f;
	} else {
		out = in * 12.92f;
	}
	out *= 255.0f;
	out = (out > 255.0f) ? 255.0f : out;
	out = (out < 0.0f) ? 0.0f : out;
	return (uint8_t) roundf(out);
}

static px_linear ipx2fpx(px_srgb px)
{
	px_linear out;
	out.r = srgb2linear(px.r);
	out.g = srgb2linear(px.g);
	out.b = srgb2linear(px.b);
	out.a = (float) px.a * (1.0f/255.0f);
	return out;
}

static px_linear premul_combine(px_linear img, px_linear overlay)
{
	px_linear out;
	out.v = _mm_mul_ps(img.v, _mm_set_ps1(1.0f - overlay.a));
	out.v = _mm_add_ps(out.v, overlay.v);
	return out;
}

static px_srgb fpx2ipx(px_linear px, px_linear* err, int u_p)
{
	px_linear foo;
	//undo premultiply
	if(!u_p) {
		foo.v = px.v;
	}
	else if(px.a >= 1.0f/255.0f) {
		foo.v = _mm_div_ps(px.v, _mm_set_ps1(px.a));
		foo.a = px.a;
	} else {
		foo.v = _mm_setzero_ps();
	}
	//convert to srgb
	px_srgb out;
	out.r = linear2srgb(foo.r);
	out.g = linear2srgb(foo.g);
	out.b = linear2srgb(foo.b);
	float tmpa = foo.a * 255.0f;
	tmpa = (tmpa > 255.0f) ? 255.0f : tmpa;
	tmpa = (tmpa < 0.0f) ? 0.0f : tmpa;
	out.a = (uint8_t) tmpa;
	//calculate error
	//TODO (involves converting back to linear and diffing)
	err->v = _mm_setzero_ps();
	//done
	return out;
}

typedef struct {
	unsigned char* data;
	unsigned w;
	unsigned h;
	int repeat;
}
bgdata;
static bgdata bg_setup(size_t pnglen, void* pngdata, float bg_lox, float bg_hix, float bg_loy, float bg_hiy, unsigned output_w, unsigned output_h, int repeat);
static void bg_cleanup(bgdata* bg);
static px_linear bg_getpixel(bgdata* bg, float x, float y);

static int quad2image(unsigned* out_w, unsigned* out_h, unsigned char** out_data, yrQuad* q, uint32_t tiles[64], yrSceneFile* sf, size_t bglen, unsigned char* bgpng)
{
	px_linear editrow[2][258] = {0};
	px_linear* scratch = editrow[0]+1;
	px_linear* error_row = editrow[1]+1;
	px_linear error_col[257] = {0};
	px_srgb* tile = NULL;
	size_t tilelen = 0;
	void* tilepng = NULL;
	bgdata bg = {NULL};

	//background color
	px_linear bgpx;
	bgpx.a = yrColor_alpha(q->color);
	bgpx.b = yrColor_blue(q->color);
	bgpx.g = yrColor_green(q->color);
	bgpx.r = yrColor_red(q->color);

	//calc final size
	float dim_w = vec3f_length(vec4f_mul(1.0f/YR_ACCURACY, vec4f_from_vec4i(vec4i_sub(q->v[1], q->v[0]))));
	float dim_h = vec3f_length(vec4f_mul(1.0f/YR_ACCURACY, vec4f_from_vec4i(vec4i_sub(q->v[2], q->v[0]))));
	*out_w = (unsigned)(dim_w * UV_PER_METER * 256); //1 UV = 1 tile = 256 pixels (which in turn is 0.256 meters because I wanted 1mm=1px but that might change later)
	*out_h = (unsigned)(dim_h * UV_PER_METER * 256);
	if(*out_w == 0) { yrLog(0, "Invalid quad, width is 0"); return -1; }
	if(*out_h == 0) { yrLog(0, "Invalid quad, height is 0"); return -1; }
	if(*out_w > 2048) *out_w = 2048;
	if(*out_h > 2048) *out_h = 2048;

	//alloc final image
	*out_data = calloc(1, (*out_w) * (*out_h) * 4);
	if(!*out_data) { yrLog(0, "Out of memory"); return -1; }

	//prepare background
	if(bgpng) {
		bg = bg_setup(bglen, bgpng, q->uv_back[0][0],q->uv_back[1][0],q->uv_back[0][1],q->uv_back[1][1],*out_w,*out_h, q->flags & QUAD_BG_TILE);
		if(!bg.data) { yrLog(0, "Exporting whiteboard without background."); }
	}

	//iterate over tiles
	int tile_xhi = (int)(ceilf(q->uv_draw[0] + dim_w * UV_PER_METER) - floorf(q->uv_draw[0]));
	int tile_yhi = (int)(ceilf(q->uv_draw[1] + dim_h * UV_PER_METER) - floorf(q->uv_draw[1]));
	if(tile_xhi > 8) tile_xhi = 8;
	if(tile_yhi > 8) tile_yhi = 8;

	for(int ty = 0; ty < tile_yhi; ty += 1)
	{
		for(int tx = 0; tx < tile_xhi; tx += 1)
		{
			//get tile png
			unsigned tidx = ty * tile_xhi + tx;
			int err;
			if(tiles[tidx] != 0xFFFFFFFFul) {
				err = yrSFTile_read(sf, tiles[tidx], &tilelen, &tilepng);
				if(err) {tilepng = NULL; goto onerror;}
			}
			//decode
			if(tilepng) {
				unsigned w,h;
				const char* errtxt = yrImgLoad_decode(tilelen, tilepng, &w, &h, &tile);
				free(tilepng); tilepng = NULL;
				if(errtxt) { yrLog(0, "Tile %u decode failed: %s", tiles[tidx], errtxt); goto onerror; }
				if(w != 256 || h != 256) { yrLog(0, "Tile %u has bad dimensions", tiles[tidx]); goto onerror; }
			}
			//determine output rect
			int xuvshift = (size_t)(q->uv_draw[0] * 256);
			int yuvshift = (size_t)(q->uv_draw[1] * 256);
			size_t out_xlo = (xuvshift > tx*256) ? 0 : (tx*256 - xuvshift);
			size_t out_ylo = (yuvshift > ty*256) ? 0 : (ty*256 - yuvshift);
			size_t out_xhi = (tx+1)*256 - xuvshift;
			size_t out_yhi = (ty+1)*256 - yuvshift;
			if(out_xhi > *out_w) out_xhi = *out_w;
			if(out_yhi > *out_h) out_yhi = *out_h;
		
			//iterate over rect
			size_t row_off = ty ? 0 : yuvshift;
			int row_lo = (int) row_off;
			int row_hi = row_lo + (int)(out_yhi - out_ylo);
			px_linear next_colerror = error_col[row_lo];
			error_col[row_lo].v = _mm_setzero_ps();
			for(int r = row_lo; r < row_hi; r += 1)
			{
				//fill with error
				px_linear* swap = scratch;
				scratch = error_row; //TODO: carries over the top error row to the bot of the tile next to it?
				error_row = swap;
				scratch[0].v = _mm_add_ps(scratch[0].v, next_colerror.v);
				memset(error_row-1, 0, 258 * sizeof(px_linear));
				//add bg color
				for(int c = 0; c < 256; c += 1)
					scratch[c].v = _mm_add_ps(scratch[c].v, bgpx.v);
				//overlay with bg image
				if(bg.data) {
					for(int c = 0; c < 256; c += 1) {
						float bgx = (((float) (out_xlo + c           ) + 0.5f) / *out_w) * (q->uv_back[1][0] - q->uv_back[0][0]) + q->uv_back[0][0];
						float bgy = (((float) (out_ylo + (r - row_lo)) + 0.5f) / *out_h) * (q->uv_back[1][1] - q->uv_back[0][1]) + q->uv_back[0][1];
						px_linear bgpx = bg_getpixel(&bg, bgx, bgy);
						scratch[c] = premul_combine(scratch[c], bgpx);
					}
				}
				//overlay with tile
				if(tile) {
					for(int c = 0; c < 256; c += 1) {
						int pxidx = (255 - r)*256 + c;
						px_linear fpx = ipx2fpx(tile[pxidx]);
						scratch[c] = premul_combine(scratch[c], fpx);
					}
				}
				//output to final			
				for(size_t c = out_xlo; c < out_xhi; c += 1) {
					//calc position in output image
					size_t outrow = (*out_h - 1) - (out_ylo + (r - row_lo));
					size_t pxoff = (outrow * (*out_w) + c)*4;
					size_t tileoff = (xuvshift + c) % 256;
					//convert to integer values and write to output
					px_linear pxerr;
					px_srgb ipx = fpx2ipx(scratch[tileoff], &pxerr, 1);
					*(px_srgb*)(*out_data + pxoff) = ipx;
					//diffuse error
					scratch  [tileoff+1].v = _mm_add_ps(scratch  [tileoff+1].v, _mm_mul_ps(pxerr.v, _mm_set_ps1(7.0f/16.0f)));
					error_row[tileoff-1].v = _mm_add_ps(error_row[tileoff-1].v, _mm_mul_ps(pxerr.v, _mm_set_ps1(3.0f/16.0f))); //this error is lost between tiles
					error_row[tileoff+0].v = _mm_add_ps(error_row[tileoff+0].v, _mm_mul_ps(pxerr.v, _mm_set_ps1(5.0f/16.0f)));
					error_row[tileoff+1].v = _mm_add_ps(error_row[tileoff+1].v, _mm_mul_ps(pxerr.v, _mm_set_ps1(1.0f/16.0f)));
				}
				//carry over to next tile
				next_colerror = error_col[r+1];
				error_col[r].v   = _mm_add_ps(error_col[r].v, scratch[256].v);
				error_col[r+1].v = error_row[256].v;
			}
			//cleanup tile
			free(tile);
			tile = NULL;
		}
		//prep error for next row
		memset(error_col, 0, 257 * sizeof(px_linear)); //the error in error_col[256] is lost here
	}
	bg_cleanup(&bg);
	return 0;
onerror:
	bg_cleanup(&bg);
	free(tile);
	free(tilepng);
	free(*out_data);
	return -1;
}

static void calc_horizontal_mip(bgdata* bg, unsigned level, unsigned mip_w, unsigned char* dst)
{
	unsigned accum = (1<<level);
	unsigned incomplete = bg->w - (mip_w - 1) * accum;
	//full pixels
	for(unsigned y = 0; y < bg->h; y += 1)
	{
		for(unsigned x = 0; x < mip_w - 1; x += 1)
		{
			unsigned px = y * bg->w + x*accum;
			for(unsigned c = 0; c < 4; c += 1)
			{
				unsigned val = 0;
				for(unsigned a = 0; a < accum; ++a)
				{
					val += bg->data[(px+a)*4+c];
				}
				val /= accum;
				dst[(y*mip_w + x)*4+c] = (unsigned char) val;
			}
		}
		{
			//incomplete last col
			unsigned x = mip_w - 1;
			unsigned px = y * bg->w + x*accum;
			for(unsigned c = 0; c < 4; c += 1)
			{
				unsigned val = 0;
				for(unsigned a = 0; a < incomplete; ++a)
				{
					val += bg->data[(px+a)*4+c];
				}
				val /= incomplete;
				dst[(y*mip_w + x)*4+c] = (unsigned char) val;
			}
		}
	}
}

static void calc_vertical_mip(bgdata* bg, unsigned level, unsigned mip_h, unsigned char* dst)
{
	unsigned accum = (1<<level);
	unsigned* row = malloc(bg->w*4*sizeof(unsigned));
	if(!row) {yrLog(0, "Out of memory, exported image will have an incorrect background"); return;}
	//full pixels
	for(unsigned y = 0; y < mip_h - 1; y += 1)
	{
		memset(row, 0, bg->w*4*sizeof(unsigned));
		for(unsigned a = 0; a < accum; ++a)
		{
			unsigned px = (y*accum+a) * bg->w * 4;
			for(unsigned x = 0; x < bg->w*4; x += 1)
			{
				row[x] += bg->data[px+x];
			}
		}
		for(unsigned x = 0; x < bg->w*4; x += 1)
		{
			row[x] /= accum;
			dst[y*bg->w*4 + x] = (unsigned char) row[x];
		}
	}
	//incomplete last row
	unsigned incomplete = bg->h - (mip_h - 1) * accum;
	unsigned y = mip_h - 1;
	memset(row, 0, bg->w*4*sizeof(unsigned));
	for(unsigned a = 0; a < incomplete; ++a)
	{
		unsigned px = (y*accum+a) * bg->w * 4;
		for(unsigned x = 0; x < bg->w*4; x += 1)
		{
			row[x] += bg->data[px+x];
		}
	}
	for(unsigned x = 0; x < bg->w*4; x += 1)
	{
		row[x] /= incomplete;
		dst[y*bg->w*4 + x] = (unsigned char) row[x];
	}
	free(row);
}

static bgdata bg_setup(size_t pnglen, void* pngdata, float bg_lox, float bg_hix, float bg_loy, float bg_hiy, unsigned output_w, unsigned output_h, int repeat)
{
	bgdata out = {NULL};
	out.repeat = repeat;
	
	//decode
	const char* errtxt = yrImgLoad_decode(pnglen, pngdata, &out.w, &out.h, &out.data);
	if(errtxt) { yrLog(0, "Background decode failed: %s", errtxt); return out; }

	//up or downscale?
	float covered_w = (bg_hix - bg_lox) * out.w;
	float covered_h = (bg_hiy - bg_loy) * out.h;
	float ratio_w = covered_w / output_w;
	float ratio_h = covered_h / output_h;

	//calc mip and mip above levels and sizes
	float lrw = log2f(ratio_w);
	float lrh = log2f(ratio_h);
	unsigned level_w = (unsigned) ((lrw >= 0.0f)? lrw : 0.0f);
	unsigned level_h = (unsigned) ((lrh >= 0.0f)? lrh : 0.0f);
	unsigned mip_w = (out.w + ((1 << level_w) - 1)) / (1 << level_w);
	unsigned mip_h = (out.h + ((1 << level_h) - 1)) / (1 << level_h);

	//calculate mip horizontal if needed
	if(level_w) {
		calc_horizontal_mip(&out, level_w, mip_w, out.data);
		out.w = mip_w;
	}
	
	//calculate mip vertical if needed
	if(level_h) {
		calc_vertical_mip(&out, level_h, mip_h, out.data);
		out.h = mip_h;
	}

	//resize data (should become smaller so shouldn't go wrong)
	out.data = realloc(out.data, out.w*out.h*4);
	YR_ASSERT(out.data);

	return out;
}

static void bg_cleanup(bgdata* bg)
{
	free(bg->data);
	bg->data = NULL;
}

static px_linear sample_cubic(float t, px_linear n1, px_linear p0, px_linear p1, px_linear p2)
{
	const mat4f mat = {{{0.0f,-1.0f,2.0f,-1.0f},{2.0f,0.0f,-5.0f,3.0f},{0.0f,1.0f,4.0f,-3.0f},{0.0f,0.0f,-1.0f,1.0f}}};
	vec4f w = {0.5f, t/2, t*t/2, t*t*t/2};
	mat4f samples = {{n1.v,p0.v,p1.v,p2.v}};

	_MM_TRANSPOSE4_PS(samples.col[0].m, samples.col[1].m, samples.col[2].m, samples.col[3].m);
	samples.col[0].m = _mm_mul_ps(w.m, mat4f_apply(mat, samples.col[0]).m);
	samples.col[1].m = _mm_mul_ps(w.m, mat4f_apply(mat, samples.col[1]).m);
	samples.col[2].m = _mm_mul_ps(w.m, mat4f_apply(mat, samples.col[2]).m);
	samples.col[3].m = _mm_mul_ps(w.m, mat4f_apply(mat, samples.col[3]).m);
	_MM_TRANSPOSE4_PS(samples.col[0].m, samples.col[1].m, samples.col[2].m, samples.col[3].m);

	px_linear out;
	out.v = _mm_add_ps(_mm_add_ps(samples.col[0].m, samples.col[1].m),
					   _mm_add_ps(samples.col[2].m, samples.col[3].m));
	return out;
}

static px_linear sample_bicubic(float t, float u,
							 px_srgb* n1n1, px_srgb* p0n1, px_srgb* p1n1, px_srgb* p2n1,
							 px_srgb* n1p0, px_srgb* p0p0, px_srgb* p1p0, px_srgb* p2p0,
							 px_srgb* n1p1, px_srgb* p0p1, px_srgb* p1p1, px_srgb* p2p1,
							 px_srgb* n1p2, px_srgb* p0p2, px_srgb* p1p2, px_srgb* p2p2)
{
	px_linear a,b,c,d;

	a = ipx2fpx(*n1n1);
	b = ipx2fpx(*p0n1);
	c = ipx2fpx(*p1n1);
	d = ipx2fpx(*p2n1);
	px_linear bn1 = sample_cubic(t, a, b, c, d);

	a = ipx2fpx(*n1p0);
	b = ipx2fpx(*p0p0);
	c = ipx2fpx(*p1p0);
	d = ipx2fpx(*p2p0);
	px_linear bp0 = sample_cubic(t, a, b, c, d);

	a = ipx2fpx(*n1p1);
	b = ipx2fpx(*p0p1);
	c = ipx2fpx(*p1p1);
	d = ipx2fpx(*p2p1);
	px_linear bp1 = sample_cubic(t, a, b, c, d);

	a = ipx2fpx(*n1p2);
	b = ipx2fpx(*p0p2);
	c = ipx2fpx(*p1p2);
	d = ipx2fpx(*p2p2);
	px_linear bp2 = sample_cubic(t, a, b, c, d);

	return sample_cubic(u, bn1, bp0, bp1, bp2);
}

static int correctidx(int repeat, int dim, int x)
{
	if(x < 0)			return repeat ? (x + dim) : -1;
	else if(x >= dim)	return repeat ? (x - dim) : -1;
	else return x;
}

static px_srgb izeropixel = {0};
static px_linear fzeropixel = {0.0f};
static px_srgb* getpixelref(px_srgb* img, unsigned w, unsigned h, int repeat, int x, int y)
{
	x = correctidx(repeat, (int) w, x);
	y = correctidx(repeat, (int) h, y);
	if(x < 0) return &izeropixel;
	if(y < 0) return &izeropixel;
	return img + y * w + x;
}

static px_linear bg_getpixel(bgdata* bg, float x, float y)
{
	if(!bg->repeat && x < 0.0f) return fzeropixel;
	if(!bg->repeat && y < 0.0f) return fzeropixel;
	if(!bg->repeat && x > 1.0f) return fzeropixel;
	if(!bg->repeat && y > 1.0f) return fzeropixel;
	float px = (x - floorf(x)) * bg->w - 0.5f;
	float py = (y - floorf(y)) * bg->h - 0.5f;
	int ipx = (int) floorf(px);
	int ipy = (int) floorf(py);
	float t = px - ipx;
	float u = py - ipy;
	px_srgb* near[4][4];
	for(int r = 0; r < 4; r += 1)
	for(int c = 0; c < 4; c += 1)
	{
		near[r][c] = getpixelref((px_srgb*)bg->data, bg->w, bg->h, bg->repeat, ipx + c - 1, ipy + r - 1);
	}
	return sample_bicubic(t, u,
						  near[0][0],near[0][1],near[0][2],near[0][3],
						  near[1][0],near[1][1],near[1][2],near[1][3],
						  near[2][0],near[2][1],near[2][2],near[2][3],
						  near[3][0],near[3][1],near[3][2],near[3][3]);
}



/************
* Quad Import
*************/

static int import_make_tiles(bgdata* img, yrSceneFile* sf, unsigned target_w, unsigned target_h, float wnd_x0, float wnd_y0, float wnd_x1, float wnd_y1, uint32_t tiles[64]);
int yrQuadImport_single(const char* scenefile, const char* image, unsigned target_w, unsigned target_h)
{
	YR_ASSERT(target_w);
	YR_ASSERT(target_h);
	int err = ERRIMP_GENERIC;
	yrFile* f = NULL;
	size_t pnglen, rw;
	void* png = NULL;
	bgdata bg;
	uint32_t tiles[64];
	yrSceneFile* sf = NULL;
	vec4i offset = {0};
	yrQuad q = {0};
	//open and read image file
	f = yrFile_open(image, yrF_read);														if(!f)			{ err = ERRIMP_FILE; goto onerror;}
	pnglen = (size_t) yrFile_seek(f, 0, yrF_seekend);										if(!pnglen)		{ err = ERRIMP_FILE; goto onerror;}
	yrFile_seek(f, 0, yrF_seekset);
	png = malloc(pnglen);																	if(!png)		{ err = ERRIMP_MEM; goto onerror;}
	rw = yrFile_read(f, (int64_t) pnglen, png);												if(rw != pnglen){ err = ERRIMP_FILE; goto onerror;}
	yrFile_close(f);
	f = NULL;
	//scale with same algo as the background in export if required
	bg = bg_setup(pnglen, png, 0.0f, 1.0f, 0.0f, 1.0f, target_w, target_h, 0);				if(!bg.data)	{ err = ERRIMP_GENERIC; goto onerror;}
	free(png);
	png = NULL;
	//open scenefile
	sf = yrSceneFile_open(scenefile, 0);													if(!sf)			{ err = ERRIMP_GENERIC; goto onerror;}
	//get last position
	offset = yrVR_cold_get_coord_offset(sf);
	offset.y += YR_ACCURACY;
	//make tiles
	err = import_make_tiles(&bg, sf, target_w, target_h, 0.0f, 0.0f, 1.0f, 1.0f, tiles);	if(err < 0) goto onerror;
	uint32_t ins_ntiles = (uint32_t) err;
	err = 0;
	//make quad
	offset.z += YR_ACCURACY;
	q.v[0] = offset;
	q.v[1] = offset;
	q.v[2] = offset;
	q.v[1].x += (int32_t)(((target_w > 50)?target_w:50) * YR_ACCURACY / 1000.0f);
	q.v[2].z += (int32_t)(((target_h > 50)?target_h:50) * YR_ACCURACY / 1000.0f);
	q.uv_back[0][0] = 0.0f;
	q.uv_back[0][1] = 0.0f;
	q.uv_back[1][0] = 1.0f;
	q.uv_back[1][1] = 1.0f;
	q.flags = QUAD_VALID;
	q.color = 0xFFFFFFFF;
	//insert quad
	uint32_t ins_bg = 0;
	uint32_t* tptr = tiles;
	err = yrQuadStore_cold_insert(sf, 1, &q, &ins_bg, &ins_ntiles, &tptr, 1);				if(err) goto onerror;
	//done
	yrSceneFile_close(sf);
	bg_cleanup(&bg);
	return 0;
onerror:
	if(sf) yrSceneFile_close(sf);
	if(bg.data) bg_cleanup(&bg);
	if(png) free(png);
	if(f) yrFile_close(f);
	return err;
}

static int import_make_tiles(bgdata* img, yrSceneFile* sf, unsigned target_w, unsigned target_h, float wnd_x0, float wnd_y0, float wnd_x1, float wnd_y1, uint32_t tiles[64])
{
	memset(tiles, -1, 64*sizeof(uint32_t));
	int err = ERRIMP_GENERIC;
	uint32_t tid = 0;
	int inserted = 0;
	unsigned char* png = NULL;
	size_t pngsize;
	uint32_t tile_w, tile_h;
	px_srgb* tile = NULL;
	px_linear e_side = {0};
	px_linear e_hold = {0};
	px_linear* e_row = NULL;
	px_linear* e_superrow = NULL;
	px_linear* e_col = NULL;
	//calc dim
	tile_w = (target_w + 255)/256;
	tile_h = (target_h + 255)/256;
	int yoff = (tile_h*256) - target_h;
	//alloc memory
	tile = malloc(256*256*sizeof(px_srgb));							if(!tile) {err = ERRIMP_MEM; goto onerror;}
	e_row = (px_linear*) malloc(258*sizeof(px_linear)) + 1;			if(!e_row) {err = ERRIMP_MEM; goto onerror;}
	e_superrow = (px_linear*) malloc(256*tile_w*sizeof(px_linear));	if(!e_superrow) {err = ERRIMP_MEM; goto onerror;}
	e_col = (px_linear*) malloc(257*sizeof(px_linear));				if(!e_col) {err = ERRIMP_MEM; goto onerror;}
	memset(e_row-1, 0, 258*sizeof(px_linear));
	memset(e_superrow, 0, 256*tile_w*sizeof(px_linear));
	//for each tile
	for(uint32_t ty = 0; ty < tile_h; ty += 1)
	{
		memset(e_col, 0, 257*sizeof(px_linear));
		for(uint32_t tx = 0; tx < tile_w; tx += 1)
		{
			//load error row
			e_row[-1].v = _mm_setzero_ps();
			e_row[256].v = _mm_setzero_ps();
			memcpy(e_row, e_superrow + 256*tx, 256 * sizeof(px_linear));
			//make tile
			for(int y = 0; y < 256; y += 1) {
				e_side = e_col[y];
				e_hold = e_row[0];
				for(int x = 0; x < 256; x += 1)
				{
					px_linear px, err;
					float imgx = (x + tx * 256 + 0.5f) / target_w;
					float imgy = (y + (tile_h - ty - 1) * 256 - yoff + 0.5f) / target_h;
					imgx = imgx * (wnd_x1 - wnd_x0) + wnd_x0;
					imgy = imgy * (wnd_y1 - wnd_y0) + wnd_y0;
					px = bg_getpixel(img, imgx, imgy);
					px.v = _mm_add_ps(px.v, e_side.v);
					px.v = _mm_add_ps(px.v, e_hold.v);
					px.r *= px.a; //premultiply alpha
					px.g *= px.a;
					px.b *= px.a;
					tile[y*256 + x] = fpx2ipx(px, &err, 0);
					e_hold = e_row[x+1];
					e_side.v =		_mm_mul_ps(err.v, _mm_set_ps1(7.0f/16));
					e_row[x-1].v = 	_mm_mul_ps(err.v, _mm_set_ps1(3.0f/16));
					e_row[x+0].v = 	_mm_mul_ps(err.v, _mm_set_ps1(5.0f/16));
					e_row[x+1].v = 	_mm_mul_ps(err.v, _mm_set_ps1(1.0f/16));
				}
				e_col[y] = e_row[256];
			}
			//encode tile
			const char* errtxt = yrImgLoad_encode(&pngsize, &png, 256, 256, tile);
			if(errtxt) {
				yrLog(0, "Tile encode failed: %s", errtxt);
				err = ERRIMP_GENERIC;
				goto onerror;
			}
			//save tile
			while(yrSFTile_exists(sf, tid)) tid += 1;
			err = yrSFTile_write(sf, tid, pngsize, png);		if(err) {err = ERRIMP_GENERIC; goto onerror;}
			free(png);
			png = NULL;
			tiles[inserted] = tid;
			inserted += 1;
			//save error row to superrow
			memcpy(e_superrow + 256*tx, e_row, 256 * sizeof(px_linear)); //little lost pixel but good enough
		}
	}
	//done
	free(tile);
	free(e_superrow);
	free(e_row-1);
	free(e_col);
	return inserted;
onerror:
	//free memory
	free(png);
	free(tile);
	free(e_superrow);
	free(e_row-1);
	free(e_col);
	//delete inserted tiles
	for(int i = 0; i < inserted; i += 1)
		yrSFTile_delete(sf, tiles[i]);
	memset(tiles, -1, 64*sizeof(uint32_t));
	return err;
}

/*************
* Multi Import
**************/
static int32_t strtofixed(const char** line)
{
	int neg = (**line == '-') ? -1 : 1;
	if(neg == -1) *line += 1;
	long foo = strtol(*line, (char**) line, 10);
	char* baz = NULL;
	long bar = (**line=='.') ? strtol(*line+1, &baz, 10) : 0;
	long qux = (long)(baz - (*line + 1));
	if(qux > 20) qux = 0;
	long quin = 1;
	for(int i = 0; i < qux; ++i) quin *= 10;
	*line = baz;
	return neg * (foo * YR_ACCURACY + ((foo < 0) ? -bar : bar)*YR_ACCURACY/quin);
}

static char* mipl_image(const char* line)
{
	const char* foo = NULL;
	const char* bar = NULL;
	foo = strstr(line, "img=");
	if(!foo) return NULL;
	
	foo += 4;
	if(*foo == '\"') {
		foo += 1;
		bar = strchr(foo, '\"');
		if(!bar) bar = strchr(foo, 0);
	} else {
		bar = strchr(foo, ' ');
		if(!bar) bar = strchr(foo, 0);
	}
	if(foo == bar) return NULL;
	char* out = malloc(bar - foo + 1);
	if(!out) { yrLog(0, "Out of memory"); return NULL;}
	memcpy(out, foo, bar - foo);
	out[bar - foo] = 0;
	return out;
}

static void mipl_imargin(const char* line, float* il, float* ir, float* it, float* ib)
{
	const char* foo = NULL;
	foo = strstr(line, "il=");
	if(foo) {
		foo += 3;
		*il = strtof(foo, NULL);
	} else {
		*il = 0.0f;
	}
	foo = strstr(line, "ir=");
	if(foo) {
		foo += 3;
		*ir = strtof(foo, NULL);
	} else {
		*ir = 0.0f;
	}
	foo = strstr(line, "it=");
	if(foo) {
		foo += 3;
		*it = strtof(foo, NULL);
	} else {
		*it = 0.0f;
	}
	foo = strstr(line, "ib=");
	if(foo) {
		foo += 3;
		*ib = strtof(foo, NULL);
	} else {
		*ib = 0.0f;
	}
}

static vec4i mipl_pos(const char* line)
{
	vec4i vec;
	const char* foo = NULL;
	foo = strstr(line, "px=");
	if(foo) {
		foo += 3;
		vec.x = strtofixed(&foo);
	} else {
		vec.x = 0;
	}
	foo = strstr(line, "py=");
	if(foo) {
		foo += 3;
		vec.y = strtofixed(&foo);
	} else {
		vec.y = 0;
	}
	foo = strstr(line, "pz=");
	if(foo) {
		foo += 3;
		vec.z = strtofixed(&foo);
	} else {
		vec.z = 0;
	}
	vec.w = YR_ACCURACY;
	const int32_t maxpos = (LONG_MAX / 2 - YR_ACCURACY * 2);
	if(vec.x > maxpos) vec.x = maxpos;
	if(vec.y > maxpos) vec.y = maxpos;
	if(vec.z > maxpos) vec.z = maxpos;
	if(vec.x < -maxpos) vec.x = -maxpos;
	if(vec.y < -maxpos) vec.y = -maxpos;
	if(vec.z < -maxpos) vec.z = -maxpos;
	return vec;
}

static vec4f mipl_edge(const char* line, char* sig)
{
	vec4i vec;
	const char* foo = NULL;
	sig[2] = 'x';
	foo = strstr(line, sig);
	if(foo) {
		foo += 4;
		vec.x = strtofixed(&foo);
	} else {
		vec.x = YR_ACCURACY;
	}
	sig[2] = 'y';
	foo = strstr(line, sig);
	if(foo) {
		foo += 4;
		vec.y = strtofixed(&foo);
	} else {
		vec.y = YR_ACCURACY;
	}
	sig[2] = 'z';
	foo = strstr(line, sig);
	if(foo) {
		foo += 4;
		vec.z = strtofixed(&foo);
	} else {
		vec.z = YR_ACCURACY;
	}
	vec.w = 0;

	vec4f fvec = vec4f_from_vec4i(vec);
	float len = vec3f_length(fvec);
	if(len > YR_ACCURACY * 2) {
		fvec = vec4f_mul(2 * YR_ACCURACY / len, fvec);
	}
	return fvec;
}

static unsigned hexchartonum(int c)
{
	     if(c>='0' && c<='9') return c - '0';
	else if(c>='A' && c<='F') return c - 'A' + 10;
	else if(c>='a' && c<='f') return c - 'a' + 10;
	else { yrLog(1, "Invalid color value detected"); return 0;}
}

static yrColor mipl_color(const char* line)
{
	const char* s = NULL;
	s = strstr(line, "clr=");
	if(!s) return 0xFFFFFFFFul;
	s += 4;
	while(*s == '#') s += 1;
	uint32_t out = 0;
	size_t len = strspn(s, "0123456789abcdefABCDEF");
	if(len == 3 || len == 4) {
		out |= (hexchartonum(s[0]) * 0x11) << 0;
		out |= (hexchartonum(s[1]) * 0x11) << 8;
		out |= (hexchartonum(s[2]) * 0x11) << 16;
		if(len == 4) {
			out |= (hexchartonum(s[3]) * 0x11) << 24;
		} else {
			out |= 0xFF << 24;
		}
	}
	else if(len == 6 || len == 8) {
		out |= hexchartonum(s[0]) << 4;
		out |= hexchartonum(s[1]) << 0;
		out |= hexchartonum(s[2]) << 12;
		out |= hexchartonum(s[3]) << 8;
		out |= hexchartonum(s[4]) << 20;
		out |= hexchartonum(s[5]) << 16;
		if(len == 8) {
			out |= hexchartonum(s[6]) << 28;
			out |= hexchartonum(s[7]) << 24;
		}
	}
	else yrLog(1, "Invalid color value detected");
	return out;
}

static bgdata mipl_prepimg(const char* img, float w, float h)
{
	int err;
	yrFile* f = NULL;
	size_t pnglen = 0;
	void* png = NULL;
	size_t rw;
	bgdata bg = {0};
	//open and read image file
	f = yrFile_open(img, yrF_read);										if(!f)			{ err = ERRIMP_FILE; goto onerror;}
	pnglen = (size_t) yrFile_seek(f, 0, yrF_seekend);					if(!pnglen)		{ err = ERRIMP_FILE; goto onerror;}
	yrFile_seek(f, 0, yrF_seekset);
	png = malloc(pnglen);												if(!png)		{ err = ERRIMP_MEM; goto onerror;}
	rw = yrFile_read(f, (int64_t) pnglen, png);							if(rw != pnglen){ err = ERRIMP_FILE; goto onerror;}
	yrFile_close(f);
	f = NULL;
	//scale with same algo as the background in export if required
	bg = bg_setup(pnglen, png, 0.0f, 1.0f, 0.0f, 1.0f, (unsigned) ceilf(w), (unsigned) ceilf(h), 0);		if(!bg.data)	{ err = ERRIMP_GENERIC; goto onerror;}
	free(png);
	png = NULL;
	return bg;
onerror:
	if(png) free(png);
	if(f) yrFile_close(f);
	return bg;
}

static yrQuad multiimport_parseline(const char* line, yrSceneFile* sf, uint32_t* tilecount, uint32_t tiles[64])
{
	const char* foo = NULL;
	yrQuad out = {0};
	out.uv_back[1][0] = 1.0f;
	out.uv_back[1][1] = 1.0f;
	//position
	char sig[5] = "e0x=";
	vec4f e0 = mipl_edge(line, sig);
	sig[1] = '1';
	vec4f e1 = mipl_edge(line, sig);
	vec4f normal = vec3f_cross(e0, e1); //calculate normal to force a 90 degree angle between e0 and e1
	if(vec3f_dot(normal, normal) < 0.00001f) {
		normal = vec3f_normalized(e0);
		if(fabs(normal.x) > fabs(normal.y)) normal.y = 1.0f;
		else normal.x = 1.0f;
		normal = vec3f_cross(e0, normal);
	}
	normal = vec3f_normalized(normal);
	e1 = vec4f_mul(vec3f_length(e1), vec3f_normalized(vec3f_cross(normal, e0)));
	out.v[0] = mipl_pos(line);
	out.v[1] = vec4i_add(out.v[0], vec4i_from_vec4f(e0));
	out.v[2] = vec4i_add(out.v[0], vec4i_from_vec4f(e1));
	//color
	out.color = mipl_color(line);
	//imagereposition
	float il, ir, it, ib;
	mipl_imargin(line, &il, &ir, &it, &ib);
	float iw = vec3f_length(e0)/YR_ACCURACY;
	float ih = vec3f_length(e1)/YR_ACCURACY;
	unsigned quad_pxw = (unsigned)ceilf(iw * 256.0f*UV_PER_METER);
	unsigned quad_pxh = (unsigned)ceilf(ih * 256.0f*UV_PER_METER);
	unsigned tile_w = (quad_pxw + 255)/256;
	unsigned tile_h = (quad_pxh + 255)/256;
	if(tile_w > 8) tile_w = 8;
	if(tile_h > 8) tile_h = 8;
	*tilecount = tile_w * tile_h;
	if(il >= iw) il = 0.0f;
	if(ir >= iw) ir = 0.0f;
	if(it >= ih) it = 0.0f;
	if(ib >= ih) ib = 0.0f;
	if(il + ir >= iw) il = 0.0f;
	if(it + ib >= ih) it = 0.0f;
	//image
	char* img = mipl_image(line);
	if(img) {
		//prepare image for rescaling
		float bgw = iw - (il + ir);
		float bgh = ih - (it + ib);
		bgdata bg = mipl_prepimg(img, bgw * 256.0f*UV_PER_METER, bgh * 256.0f*UV_PER_METER);
		if(!bg.data) return out;
		free(img);

		//make tiles and add to file
		float wnd_x0 = -il / bgw;
		float wnd_y0 = -ib / bgh;
		float wnd_x1 = 1.0f + ir / bgw;
		float wnd_y1 = 1.0f + it / bgh;
		int err = import_make_tiles(&bg, sf, quad_pxw, quad_pxh, wnd_x0, wnd_y0, wnd_x1, wnd_y1, tiles);
		bg_cleanup(&bg);
		if(err <= 0) return out;
	}
	//done
	out.flags |= QUAD_VALID;
	return out;
}

struct quadlist
{
	size_t cap;
	size_t pos;
	yrQuad* q;
	uint32_t* bg;
	uint32_t* tcount;
	uint32_t** tiles;
	size_t tcap;
	size_t tpos;
	uint32_t* tstore;
};

static struct quadlist ql_setup(void)
{
	struct quadlist ql;
	ql.cap = 16;
	ql.pos = 0;
	ql.tcap = 8 * 64;
	ql.tpos = 0;
	ql.q =		malloc(ql.cap * sizeof(yrQuad));
	ql.bg =		malloc(ql.cap * sizeof(uint32_t));
	ql.tcount =	malloc(ql.cap * sizeof(uint32_t));
	ql.tiles =	malloc(ql.cap * sizeof(uint32_t*));
	ql.tstore =	malloc(ql.tcap * sizeof(uint32_t));
	if(!ql.q || !ql.bg || !ql.tcount || !ql.tiles || !ql.tstore)
	{
		free(ql.q);
		free(ql.bg);
		free(ql.tcount);
		free(ql.tiles);
		free(ql.tstore);
		ql.q = NULL;
		ql.bg = NULL;
		ql.tcount = NULL;
		ql.tiles = NULL;
		ql.tstore = NULL;
	}
	return ql;
}

static int ql_grow(struct quadlist* ql)
{
	size_t newcap = ql->cap + (ql->cap >> 1);
	yrQuad* newq =			realloc(ql->q,		newcap * sizeof(yrQuad));
	uint32_t* newbg =		realloc(ql->bg,		newcap * sizeof(uint32_t));
	uint32_t* newtcount =	realloc(ql->tcount, newcap * sizeof(uint32_t));
	uint32_t** newtiles =	realloc(ql->tiles,	newcap * sizeof(uint32_t*));
	if(!newq || !newbg || !newtcount || !newtiles)
	{
		yrLog(0, "Out of memory");
		free(newq);
		free(newbg);
		free(newtcount);
		free(newtiles);
		return -1;
	} else {
		ql->q = newq;
		ql->bg = newbg;
		ql->tcount = newtcount;
		ql->tiles = newtiles;
		ql->cap = newcap;
		return 0;
	}
}

static int ql_tgrow(struct quadlist* ql)
{
	size_t newtcap = ql->tcap + (ql->tcap >> 1);
	uint32_t* newtstore = realloc(ql->tstore, newtcap * sizeof(uint32_t));
	if(!newtcap)
	{
		yrLog(0, "Out of memory");
		return -1;
	} else {
		ptrdiff_t diff = newtstore - ql->tstore;
		for(size_t i = 0; i < ql->pos; i += 1)
			ql->tiles[i] += diff;
		ql->tstore = newtstore;
		ql->tcap = newtcap;
		return 0;
	}
}

static void ql_cleanup(struct quadlist* ql)
{
	free(ql->q);
	free(ql->bg);
	free(ql->tcount);
	free(ql->tiles);
	free(ql->tstore);
}

static int ql_add(struct quadlist* ql, yrQuad q, uint32_t tcount, uint32_t* tiles)
{
	int err = 0;
	if(ql->pos == ql->cap) err = ql_grow(ql);
	if(err) return err;
	if(ql->tpos + tcount > ql->tcap) err = ql_tgrow(ql);
	if(err) return err;

	ql->q[ql->pos] = q;
	ql->bg[ql->pos] = 0;
	ql->tcount[ql->pos] = tcount;
	ql->tiles[ql->pos] = ql->tstore + ql->tpos;
	ql->pos += 1;

	memcpy(ql->tstore + ql->tpos, tiles, tcount * sizeof(uint32_t));
	ql->tpos += tcount;
	return 0;
}

int yrQuadImport_multi(const char* scenefile, const char* importfile)
{
	int err = 0;
	yrSceneFile* sf  = NULL;
	yrFile* f = NULL;
	char* fdata = NULL;
	size_t nlines = 0;
	struct quadlist ql = {0};
	err = yrImgLoad_init();								if(err)			{ err = ERRIMP_GENERIC; goto onerror; }
	ql = ql_setup();									if(!ql.q)		{ yrLog(0,"Out of memory"); err = ERRIMP_MEM; goto onerror; }
	//open scenefile
	sf = yrSceneFile_open(scenefile, 0);				if(!sf)			{ err = ERRIMP_GENERIC; goto onerror; }
	//open import and get data
	int64_t flen = 0;
	size_t rw = 0;
	f = yrFile_open(importfile, yrF_read);				if(!f)			{ err = ERRIMP_GENERIC; goto onerror; }
	flen = yrFile_seek(f, 0, yrF_seekend);				if(!flen)		{ yrLog(0,"Empty import file, aborting"); err = ERRIMP_FILE; goto onerror; }
	fdata = malloc(flen+2);								if(!fdata)		{ yrLog(0,"Out of memory"); err = ERRIMP_MEM; goto onerror; }
	yrFile_seek(f, 0, yrF_seekset);
	rw = yrFile_read(f, flen, fdata);					if(rw != flen)	{ err = ERRIMP_FILE; goto onerror; }
	yrFile_close(f);
	fdata[flen+0] = 0;
	fdata[flen+1] = 0;
	f = NULL;
	//parse lines -> build quad, insert tiles
	char* line_start = fdata;
	char* line_end = fdata;
	while(*line_start) {
		line_end = strpbrk(line_start, "\r\n");
		if(!line_end) line_end = strchr(line_start, 0);
		if(line_end != line_start) {
			*line_end = 0;
			nlines += 1;
			printf("Processing line #%zu.\n", nlines);
			uint32_t tcount = 0;
			uint32_t tiles[64];
			memset(tiles, -1, sizeof(tiles));
			yrQuad newq = multiimport_parseline(line_start, sf, &tcount, tiles);
			if(newq.flags & QUAD_VALID) {
				int qlerr = ql_add(&ql, newq, tcount, tiles);
				if(qlerr) newq.flags = 0;
			}
			if(!(newq.flags & QUAD_VALID)) {
				printf("Board #%zu could not be added.\n", nlines);
			}
		}
		line_start = line_end + 1;
	}
	free(fdata);
	//insert all quads
	err = yrQuadStore_cold_insert(sf, ql.pos, ql.q, ql.bg, ql.tcount, ql.tiles, 0);		if(err) goto onerror;
	ql_cleanup(&ql);
	yrSceneFile_close(sf);
	//done
	yrImgLoad_shutdown();
	printf("Successfully added %zu boards out of %zu described in '%s' to '%s'.", ql.pos, nlines, importfile, scenefile);
	return 0;
onerror:
	free(fdata);
	if(f) yrFile_close(f);
	if(sf) yrSceneFile_close(sf);
	ql_cleanup(&ql);
	yrImgLoad_shutdown();
	return err;
}