#version 430


vec4 rotateXZ(vec4 p, float a) {
  vec4 r = p;
  r.x = cos(a)*p.x - sin(a)*p.z;
  r.z = sin(a)*p.x + cos(a)*p.z;
  return r;
}

vec3 rotateXZ3(vec3 p, float a) {
  return rotateXZ(vec4(p, 0.0), a).xyz;
}

vec4 rotateXY(vec4 p, float a) {
  vec4 r = p;
  r.x = cos(a)*p.x - sin(a)*p.y;
  r.y = sin(a)*p.x + cos(a)*p.y;
  return r;
}

vec4 rotateYZ(vec4 p, float a) {
  vec4 r = p;
  r.y = cos(a)*p.y - sin(a)*p.z;
  r.z = sin(a)*p.y + cos(a)*p.z;
  return r;
}

layout(triangles) in;
layout(triangle_strip, max_vertices = 32) out;

in vec3 position[3];
in vec3 normal[3];
in vec2 uv[3];
in vec3 tangent[3];
in vec4 origCenter[3];
in vec4 genInfo[3];
in vec4 emitInfo[3];
in vec4 branchInfo[3];

//out lowp vec3 posG;
//out lowp vec3 normalG;
//out lowp vec2 uvG;
//out lowp vec3 tangentG;
//out lowp vec3 origCenterG;
//out lowp vec4 genInfoG;
//out lowp vec4 emitInfoG;
//out lowp vec4 branchInfoG;
out uvec2 positionGI;
out uvec2 normalGI;
out uint uvGI;
out uvec2 tangentGI;
out uvec2 origCenterGI;
out uvec2 genInfoGI;
out uvec2 emitInfoGI;
out uvec2 branchInfoGI;

uniform float g_time;
uniform float g_timeStep;
uniform float g_emit = 1.0;

uniform float windowWidth;
uniform float windowHeight;


uniform float g_emitRateMS = 40.0;

uniform float g_discardEdgeLen = 50.0;
uniform float g_branchRateMS = 500.0;
uniform float g_branchRateVar = 100.0;
uniform float g_branchRateVarFreq = 10.0;
uniform float g_branchMax = 6.0;
uniform float g_maxLevel = 3.0;
uniform float g_maxAge = 10.0;
uniform float g_maxEmitTime = 10.0;
uniform float g_branchMoveFreq = 1.0;
uniform float g_branchMoveFreqVar = 1.0;
uniform float g_branchGrowScale = 1.002;
uniform float g_branchGrowScaleVar = 0.0;
uniform float g_branchGrowScaleVarFreq = 1.0;
uniform float g_branchGrowScaleVarPhase = 0.0;

uniform vec4 g_branchMove = ve4(0.08, 1.0, 1.0, 1.0);
uniform vec4 g_branchMovePal = vec4(0.0, 0.0, 0.0, 0.0);
uniform vec4 g_branchMovePalFreqOfs = vec4(1.0, 0.0, 0.0, 0.0);
uniform float g_branchMovePerLevel = -0.005;
uniform float g_branchMoveMulLevel = 1.0;

uniform float g_branchMoveOfs = 0.10;
uniform float g_branchMoveOfsPerLevel = -0.003;

uniform float g_branchScalePerLevel = 0.5;

uniform float g_branchRot1=1.0;
uniform float g_branchRot2=1.0;
uniform float g_branchRot1Level=1.0;
uniform float g_branchRot2Level=1.0;

// angle of the child branch, 0.0 is 90 degrees from the parents dir, 1.0 is into the parent dir and -1.0 completely against
// .x: angle, .y: angleVar, .z: angleVarFreq
uniform vec4 g_branchAngle = vec4(0.0, 0.0, 0.0, 0.0);

uniform float g_maxDist = 100.0;


uniform float g_rotAmount = 0.5;
uniform float g_rotAmountLevel = 0.0;

uniform float g_ageFadeMS = 100.0;
uniform float g_emitTimeFadeMS = 100.0;
uniform float g_discardLenFadeMS = 100.0;


uniform mat4 projectionMatrix;
uniform mat4 modelViewMatrix;


uniform float g_maxSca = 0.20;
uniform float g_maxScaSmooth = 0.5;



layout(binding=0) uniform sampler2D tex;
layout(binding=3) uniform sampler2D tex3;


vec4 up4(uvec2 v) {
    vec4 r;
    r.xy = unpackHalf2x16(v.x);
    r.zw = unpackHalf2x16(v.y);
    return r;
}
vec3 up3(uvec2 v) {
    return up4(v).xyz;
}
vec2 up2(uint v) {
    return unpackHalf2x16(v);
}

uint p2(vec2 v) {
    return packHalf2x16(v);
}
uvec2 p4(vec4 v) {
    uvec2 r;
    r.x = packHalf2x16(v.xy);
    r.y = packHalf2x16(v.zw);
    return r;
}
uvec2 p3(vec3 v) {
    uvec2 r;
    r.x = packHalf2x16(v.xy);
    r.y = packHalf2x16(vec2(v.z, 0.0));
    return r;
}

float atanSafe(float y, float x) {
    float ret=0.0;
    if (x!=0.0) {
        if (x>0.0) {
            ret=atan(y/x);
        } else	{
            ret=atan(y/x)+3.141592;
        }
    } else {
        if (y>=0.0) {
            ret=0.5*3.141592;
        } else {
            ret=-0.5*3.141592;
        }
    }
    return ret;
}

vec3 rotAroundAxis(vec3 vec, vec3 axis, float alpha) {
  float ca = cos(alpha);
  float sa = sin(alpha);
  float u = axis.x;
  float v = axis.y;
  float w = axis.z;
  vec3 rot;
  rot.x = u*(u*vec.x+v*vec.y+w*vec.z)*(1.0-ca)+vec.x*ca+(-w*vec.y+v*vec.z)*sa;
  rot.y = v*(u*vec.x+v*vec.y+w*vec.z)*(1.0-ca)+vec.y*ca+(w*vec.x-u*vec.z)*sa;
  rot.z = w*(u*vec.x+v*vec.y+w*vec.z)*(1.0-ca)+vec.z*ca+(-v*vec.x+u*vec.y)*sa;
  return rot;
}

float getMoveSpeed(float branchLevel, float dt) {
    float moveAmp = 0.0;
    float t= g_time+dt;
    if (branchLevel < 0.5) {
        moveAmp = g_branchMove.x*sin(t*0.5*g_branchMoveFreq)+g_branchMoveOfs;
        moveAmp *= pow(1.3, (g_timeStep+dt)/0.05);
    } else {
        moveAmp = (((g_branchMove.x+g_branchMovePerLevel*branchLevel)*pow(g_branchMoveMulLevel*g_branchScalePerLevel,branchLevel))*sin(t*0.5*2.0*branchLevel*g_branchMoveFreq)+g_branchMoveOfs+g_branchMoveOfsPerLevel*branchLevel);
        moveAmp *= pow(1.3, (g_timeStep+dt)/0.05);
     }
     return moveAmp;
}

vec3 calcEmitMove = vec3(0.0);


vec3 movePos(vec3 p, vec3 oc, float branchType, vec4 branchInfo, float dt) {
    vec3 moveDir;
    float fre;

    float t= g_time+dt;

    vec3 mDir = branchInfo.xyz;

     float rotAmount = g_rotAmount+g_rotAmountLevel*branchType;
     mDir = normalize(vec3(0.0, 1.0, 0.0)*rotAmount+branchInfo.xyz*(1.0-rotAmount));

    float moveAmp = getMoveSpeed(branchType, dt);

    if (branchType < 0.5) {
//        float moveAmp = g_branchMove*sin(t*0.5*g_branchMoveFreq)+g_branchMoveOfs;
//        moveAmp *= pow(1.3, (g_timeStep+dt)/0.05);
        moveDir = mDir*moveAmp;
        fre = (0.15+g_branchMoveFreqVar*0.1*sin(t+4.7*g_branchMoveFreq*cos(t)))*g_branchMoveFreq;
        moveDir.yxz = rotateXZ3(moveDir.yxz, (0.40+oc.y*0.03)*sin(oc.y*2.0*fre+t)*g_branchRot1);
        moveDir.zyx = rotateXZ3(moveDir.zyx, (0.40+oc.y*0.03)*cos(oc.x*1.50*fre+oc.z*fre)*g_branchRot2);
    } else {
//        float moveAmp = ((g_branchMove+g_branchMovePerLevel*branchType)*sin(t*0.5*2.0*branchType*g_branchMoveFreq)+g_branchMoveOfs+g_branchMoveOfsPerLevel*branchType);
//        moveAmp *= pow(1.3, (g_timeStep+dt)/0.05);
        moveDir = mDir*moveAmp;
        fre = (0.15+g_branchMoveFreqVar*0.1*sin(t+4.7*cos(t*2.0*branchType*g_branchMoveFreq)))*g_branchMoveFreq;
        moveDir.yxz = rotateXZ3(moveDir.yxz, (0.40+oc.y*0.03)*sin(oc.y*(2.0+branchType)*fre+t)*(g_branchRot1+branchType*g_branchRot1Level));
        moveDir.zyx = rotateXZ3(moveDir.zyx, (0.40+oc.y*0.03)*cos(oc.x*(1.50+branchType)*fre+oc.z*fre)*(g_branchRot2+branchType*g_branchRot2Level));
    }

    moveDir.x *= calcEmitMove.x;
    moveDir.y *= calcEmitMove.y;
    moveDir.z *= calcEmitMove.z;

    p += moveDir;

    return p;
}

vec3 getDir(vec3 p, vec3 oc, float branchType, vec4 branchInfo) {
    vec3 p0 = movePos(p, oc, branchType, branchInfo, 0.0);
    vec3 p1 = movePos(p, oc, branchType, branchInfo, 0.01);

    return normalize(p1-p0);
}

vec3 rotPos(vec3 p, vec3 oc, vec4 bi, vec3 di, vec3 ta) {
  //  ta = normalize(ta);
  //  float dotsi = dot(normalize(di), vec3(0.0, 1.0, .0));
  //  float ang = acos(dotsi)+3.14;
    float growScale = g_branchGrowScale+g_branchGrowScaleVar*sin(bi.x*g_branchGrowScaleVarFreq+bi.x+g_branchGrowScaleVarPhase);
    p *= pow(growScale, g_timeStep/0.05);
   // p = rotAroundAxis(p, ta, ang*0.02);
    return p;
}


vec3 scaPos(vec3 p, vec3 oc, vec4 bi) {
    float growScale = g_branchGrowScale+g_branchGrowScaleVar*sin((bi.x+g_time*0.01)*g_branchGrowScaleVarFreq+g_branchGrowScaleVarPhase);

    float len = length(p);
    float fa = 1.0-smoothstep(g_maxSca*(1.0-g_maxScaSmooth), g_maxSca, len);

    p *= pow(fa*growScale+(1.0-fa)*1.0, g_timeStep/0.05);
    return p;
}

vec3 posG;
vec3 normalG;
vec2 uvG;
vec3 tangentG;
vec4 origCenterG;
vec4 genInfoG;
vec4 emitInfoG;
vec4 branchInfoG;

void doPack() {
    positionGI = p3(posG);
    normalGI = p3(normalG);
    uvGI = p2(uvG);
    tangentGI = p3(tangentG);
    origCenterGI = p4(origCenterG);
    genInfoGI = p4(genInfoG);
    emitInfoGI = p4(emitInfoG);
    branchInfoGI = p4(branchInfoG);
}



void main(void) {

    float dgl = g_discardEdgeLen*g_discardEdgeLen;
    vec3 dd = position[1]-position[0];
    if (dot(dd,dd) > dgl) {
        return;
    }
    dd = position[2]-position[0];
//    vec3 dd2 = dd;
    if (dot(dd,dd) > dgl) {
        return;
    }
    dd = position[2]-position[1];
    if (dot(dd,dd) > dgl) {
        return;
    }

//    vec3 nt = cross(dd, dd2);
//    if (dot(nt, normal[0])>0.0) {
//        return;
//    }

// layout (location=4) in vec4 vertexOrigCenter; // xyz: original center of the mesh, w: the number of branches done
// layout (location=6) in vec4 vertexEmitInfo; // x: birth time, y: emit time counter, z: emit type being 0.0 = main branch & > 1.0 side branch, w: number of times processed with emit
// layout (location=7) in vec4 vertexBranchInfo; // xyz: branch main dir, w: branch time counter



    float age = genInfo[0].y;
    float emitTime = emitInfo[0].x;
    float emitTimeCounter = emitInfo[0].y;
    float branchType = emitInfo[0].z;
    float processedWithEmitCount = emitInfo[0].w;
    vec3 mainDir = branchInfo[0].xyz;
    float branchTimeCounter = branchInfo[0].w;
    float branchesDone = origCenter[0].w;



    //uniform float g_ageFadeMS = 100.0;
    //uniform float g_emitTimeFadeMS = 100.0;
    //uniform float g_discardLenFadeMS = 100.0;


    if (age>g_maxAge*1000.0) {
        return;
    }
    if (emitTime>g_maxEmitTime*1000.0) {
        return;
    }


// discard out of screen primitives
//    vec4 pos4 = vec4(position[0], 1.0);
//    mat4 mvp = projectionMatrix * modelViewMatrix;
//    vec4 posScreen = mvp*pos4;
//    vec2 scrTexPos = posScreen.xy/posScreen.w*0.5+vec2(0.5, 0.5);
//    if (!(scrTexPos.x>=0.0 && scrTexPos.y>=0.0 && scrTexPos.x<1.0 && scrTexPos.y<1.0)) {
//      return;
//    }

    int emitNew = 0;

    emitTimeCounter += g_timeStep*1000.0;
    if (emitTimeCounter > g_emitRateMS) {
        float times = emitTimeCounter/g_emitRateMS;
        emitNew = int(floor(times));
        emitTimeCounter -= float(emitNew)*g_emitRateMS; // leave the fraction into the counter
    }

    float doBranch = 0.0;
    branchTimeCounter += g_timeStep*1.0;

    float curBranchRateMS = g_branchRateMS+g_branchRateVar*sin(g_branchRateVarFreq*g_time+cos(g_branchRateVarFreq*g_time));

    if (branchTimeCounter > (g_branchRateMS/1000.0) && branchType < g_maxLevel && branchesDone < (g_branchMax-branchType)) {
        doBranch = 1.0;
        branchTimeCounter -= g_branchRateMS/1000.0; // with every branch we increase this by 1000.0 which tracks the total number of branches
        branchesDone += 1.0;
    }

    for (int i = 0; i < gl_in.length(); ++i) {
        gl_Position = gl_in[i].gl_Position;
        posG = position[i];
        origCenterG = origCenter[i];

//posG.y += 0.015;
//origCenterG.y += 0.015;


        posG = scaPos(posG-origCenter[0].xyz, origCenter[0].xyz, emitInfo[0])+origCenter[0].xyz;
      //  normalG = scaPos(normalP[i], origCenterG.xyz, branchInfoG);
       // tangentG = scaPos(tangentP[i], origCenterG.xyz, branchInfoG);


        float d = g_maxDist;
        if (dot(posG, posG) > d*d) {
            return;
        }
        normalG = normal[i];
        uvG = uv[i];
        tangentG = tangent[i];
        genInfoG = genInfo[i];
        emitInfoG = emitInfo[i];
        branchInfoG.xyz = mainDir;

        if (emitNew>0) {
            emitInfoG.w = processedWithEmitCount + float(emitNew);
        }

        emitInfoG.y = emitTimeCounter;
//        if (g_timeStep > 0.0) {
            genInfoG.y += g_timeStep*1000.0; // advance the ages
//        }
        branchInfoG.w = branchTimeCounter;
        origCenterG.w = branchesDone;

        // increase the age counter
       // branchInfoG.x = age + g_timeStep;

       // }
        doPack();

        EmitVertex();
    }

    EndPrimitive();

//    emitNew = 1;

    if (g_timeStep > 0.0 && emitNew > 0) {

// vec4 g_branchMovePal
// vec4 g_branchMovePalFreqOfs

        calcEmitMove.xyz = g_branchMove.yzw;

        if (g_branchMovePal.x > 0.0) {
            vec4 fo = g_branchMovePalFreqOfs;
            float palMove = g_branchMovePal.x*texture2D(tex3, vec2(g_branchMovePal.y*age+(fo.x+fo.z*branchType)*g_time+fo.y+fo.w*branchType,0.0)).g;
            palMove = fract(palMove);
            if (palMove < 0.333) {
            } else if (palMove < 0.667) {
                calcEmitMove.xyz = calcEmitMove.zxy;
            } else {
                calcEmitMove.xyz = calcEmitMove.yzx;
            }
        }
        //calcEmitMove.x = g_branchMove.x;


        // replicate the one in the end which is not yet replicated!
        if (processedWithEmitCount < 0.5) { //  && ((branchType < g_maxLevel) || (branchType >= g_maxLevel && bc < 100))) {

            vec3 positionP[3];
            vec3 normalP[3];
            vec4 origCenterP[3];
            vec2 uvP[3];
            vec3 tangentP[3];

            for (int i=0; i<gl_in.length(); i++) {
                positionP[i] = position[i];
                origCenterP[i] = origCenter[i];
                normalP[i] = normal[i];
                uvP[i] = uv[i];
                tangentP[i] = tangent[i];
            }

            for (int ie=0; ie<emitNew; ie++) {

                emitInfoG.w = emitNew-1-ie; // last one will get zero
                emitInfoG.x = emitInfoG.x+g_emitRateMS;
                genInfoG.y = (emitNew-1-ie)*g_emitRateMS; // age


                vec3 dir = getDir(origCenterG.xyz, origCenterG.xyz, emitInfoG.z, branchInfoG);
                float rotAmount = g_rotAmount+g_rotAmountLevel*branchType;
                dir = normalize(dir*rotAmount+mainDir.xyz*(1.0-rotAmount));

                vec3 tang = cross(dir, vec3(0.0, 1.0, 0.0));
                float moveSpeed = getMoveSpeed(emitInfoG.z, 0.0);

                dir.x *= calcEmitMove.x;
                dir.y *= calcEmitMove.y;
                dir.z *= calcEmitMove.z;

                vec3 dirDiff = dir-branchInfoG.xyz;

                branchInfoG.xyz += dirDiff*(0.25*pow(1.05, g_timeStep/0.04));
              //s  branchInfoG.xyz = dir;

                float lensu = dot(dir,dir)*moveSpeed;

              //  if (lensu > 0.0001) {
                    for (int i = 0; i < gl_in.length(); ++i) {
                        gl_Position = gl_in[i].gl_Position;
                        posG = positionP[i];
                        origCenterG.xyz = origCenterP[i].xyz;


                        posG += dir*moveSpeed;
                        origCenterG.xyz += dir*moveSpeed;
                        uvG = uvP[i];

                        //posG = rotPos(posG-origCenterG.xyz, origCenterG.xyz, branchInfoG, dir, tang)+origCenterG.xyz;
                        //normalG = rotPos(normalP[i], origCenterG.xyz, branchInfoG, dir, tang);
                        //tangentG = rotPos(tangentP[i], origCenterG.xyz, branchInfoG, dir, tang);

                        normalG = normalP[i];
                        tangentG = tangentP[i];

                        positionP[i] = posG;
                        origCenterP[i].xyz = origCenterG.xyz;
                        normalP[i] = normalG;
                        uvP[i] = uvG;
                        tangentP[i] = tangentG;

                        doPack();

                        EmitVertex();
                    }
            //    }
                EndPrimitive();
            }


            if (doBranch > 0.5) {
//            //layout (location=6) in vec4 vertexBranchInfo; // x: age, y: 0.0 = main branch, > 1.0 side branch, z: number of replicas done, w: branch counter
//            layout (location=6) in vec4 vertexEmitInfo; // x: birth time (root is 0.0), y: emit time counter, z: emit type being 0.0 = main branch & > 1.0 side branch, w: number of times processed with emit
//            layout (location=7) in vec4 vertexBranchInfo; // xyz: branch main dir, w: branch time counter

                emitInfoG.x = emitInfo[0].x+g_emitRateMS;
                emitInfoG.z = emitInfoG.z+1.0;
                emitInfoG.w = 0.0;
                genInfoG.y = 0.0; // age
                origCenterG.w = 0.0; // branches done for the level

                vec3 dir = getDir(origCenterG.xyz, origCenterG.xyz, emitInfoG.z, branchInfoG);
                float rotAmount = g_rotAmount+g_rotAmountLevel*branchType;
                dir = normalize(dir*rotAmount+branchInfoG.xyz*(1.0-rotAmount));

                vec3 tang = cross(dir, vec3(0.0, 1.0, 0.0));
                tang = normalize(tang);
                float angFact = 0.0; // (g_branchAngle.x+g_branchAngle.y*sin(g_time*g_branchAngle.z));
                branchInfoG.xyz = normalize(dir*angFact+(1.0-angFact)*tang)*pow(g_branchScalePerLevel, branchType); // store the grow dir

                origCenterG.xyz = origCenter[0].xyz;

                for (int i = 0; i < gl_in.length(); ++i) {
                    gl_Position = gl_in[i].gl_Position;

                    posG = position[i];
                    vec3 per = posG-origCenterG.xyz;
                    per *= g_branchScalePerLevel;
                    per += origCenterG.xyz;
                    posG = per;

                    // posG = per+tang;
                    //origCenterG = origCenterG;



                    normalG = normal[i];
                    uvG = uv[i];
                    tangentG = tangent[i];

                    doPack();
                    EmitVertex();
                }
                EndPrimitive();
            }
        }
    }
}
