#version 460
#extension GL_EXT_shader_atomic_int64 : require
#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_KHR_shader_subgroup_quad : require
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

layout(set = 0, binding = 2, std140) uniform SpdConstants
{
    uint mips;
    uint numWorkGroups;
    ivec2 workGroupOffset;
    vec2 invInputSize;
} spdConstants;

layout(set = 0, binding = 1, std430) coherent buffer SpdGlobalAtomicBuffer
{
    uint counter[6];
} spdGlobalAtomic;

layout(set = 0, binding = 15) uniform sampler2D imgSrc;
layout(set = 0, binding = 3, rgba8) uniform image2D imgDst[12];

shared uint spdCounter;
shared float spdIntermediateR[16][16];
shared float spdIntermediateG[16][16];
shared float spdIntermediateB[16][16];
shared float spdIntermediateA[16][16];

uint ABfe(uint src, uint off, uint bits)
{
    return bitfieldExtract(src, int(off), int(bits));
}

uint ABfiM(uint src, uint ins, uint bits)
{
    return bitfieldInsert(src, ins, 0, int(bits));
}

uvec2 ARmpRed8x8(uint a)
{
    uint param = a;
    uint param_1 = 2u;
    uint param_2 = 3u;
    uint param_3 = ABfe(param, param_1, param_2);
    uint param_4 = a;
    uint param_5 = 1u;
    uint param_6 = a;
    uint param_7 = 3u;
    uint param_8 = 3u;
    uint param_9 = a;
    uint param_10 = 1u;
    uint param_11 = 2u;
    uint param_12 = ABfe(param_6, param_7, param_8);
    uint param_13 = ABfe(param_9, param_10, param_11);
    uint param_14 = 2u;
    return uvec2(ABfiM(param_3, param_4, param_5), ABfiM(param_12, param_13, param_14));
}

vec4 SpdLoadSourceImage(ivec2 p, uint slice)
{
    vec2 textureCoord = (vec2(p) * spdConstants.invInputSize) + spdConstants.invInputSize;
    return textureLod(imgSrc, textureCoord, 0.0);
}

vec4 SpdReduceLoadSourceImage(uvec2 base, uint slice)
{
    ivec2 param = ivec2(base);
    uint param_1 = slice;
    return SpdLoadSourceImage(param, param_1);
}

void SpdStore(ivec2 p, vec4 value, uint mip, uint slice)
{
    if (mip == 5u)
    {
        imageStore(imgDst[5], p, value);
        return;
    }
    imageStore(imgDst[mip], p, value);
}

vec4 SpdReduce4(vec4 v0, vec4 v1, vec4 v2, vec4 v3)
{
    return (((v0 + v1) + v2) + v3) * 0.25;
}

vec4 SpdReduceQuad(vec4 v)
{
    vec4 v0 = v;
    vec4 v1 = subgroupQuadSwapHorizontal(v);
    vec4 v2 = subgroupQuadSwapVertical(v);
    vec4 v3 = subgroupQuadSwapDiagonal(v);
    vec4 param = v0;
    vec4 param_1 = v1;
    vec4 param_2 = v2;
    vec4 param_3 = v3;
    return SpdReduce4(param, param_1, param_2, param_3);
}

void SpdStoreIntermediate(uint x, uint y, vec4 value)
{
    spdIntermediateR[x][y] = value.x;
    spdIntermediateG[x][y] = value.y;
    spdIntermediateB[x][y] = value.z;
    spdIntermediateA[x][y] = value.w;
}

void SpdDownsampleMips_0_1_Intrinsics(uint x, uint y, uvec2 workGroupID, uint localInvocationIndex, uint mip, uint slice)
{
    ivec2 tex = ivec2(workGroupID * uvec2(64u)) + ivec2(int(x * 2u), int(y * 2u));
    ivec2 pix = ivec2(workGroupID * uvec2(32u)) + ivec2(int(x), int(y));
    uvec2 param = uvec2(tex);
    uint param_1 = slice;
    vec4 v[4];
    v[0] = SpdReduceLoadSourceImage(param, param_1);
    ivec2 param_2 = pix;
    vec4 param_3 = v[0];
    uint param_4 = 0u;
    uint param_5 = slice;
    SpdStore(param_2, param_3, param_4, param_5);
    tex = ivec2(workGroupID * uvec2(64u)) + ivec2(int((x * 2u) + 32u), int(y * 2u));
    pix = ivec2(workGroupID * uvec2(32u)) + ivec2(int(x + 16u), int(y));
    uvec2 param_6 = uvec2(tex);
    uint param_7 = slice;
    v[1] = SpdReduceLoadSourceImage(param_6, param_7);
    ivec2 param_8 = pix;
    vec4 param_9 = v[1];
    uint param_10 = 0u;
    uint param_11 = slice;
    SpdStore(param_8, param_9, param_10, param_11);
    tex = ivec2(workGroupID * uvec2(64u)) + ivec2(int(x * 2u), int((y * 2u) + 32u));
    pix = ivec2(workGroupID * uvec2(32u)) + ivec2(int(x), int(y + 16u));
    uvec2 param_12 = uvec2(tex);
    uint param_13 = slice;
    v[2] = SpdReduceLoadSourceImage(param_12, param_13);
    ivec2 param_14 = pix;
    vec4 param_15 = v[2];
    uint param_16 = 0u;
    uint param_17 = slice;
    SpdStore(param_14, param_15, param_16, param_17);
    tex = ivec2(workGroupID * uvec2(64u)) + ivec2(int((x * 2u) + 32u), int((y * 2u) + 32u));
    pix = ivec2(workGroupID * uvec2(32u)) + ivec2(int(x + 16u), int(y + 16u));
    uvec2 param_18 = uvec2(tex);
    uint param_19 = slice;
    v[3] = SpdReduceLoadSourceImage(param_18, param_19);
    ivec2 param_20 = pix;
    vec4 param_21 = v[3];
    uint param_22 = 0u;
    uint param_23 = slice;
    SpdStore(param_20, param_21, param_22, param_23);
    if (mip <= 1u)
    {
        return;
    }
    vec4 param_24 = v[0];
    v[0] = SpdReduceQuad(param_24);
    vec4 param_25 = v[1];
    v[1] = SpdReduceQuad(param_25);
    vec4 param_26 = v[2];
    v[2] = SpdReduceQuad(param_26);
    vec4 param_27 = v[3];
    v[3] = SpdReduceQuad(param_27);
    if ((localInvocationIndex % 4u) == 0u)
    {
        ivec2 param_28 = ivec2(workGroupID * uvec2(16u)) + ivec2(int(x / 2u), int(y / 2u));
        vec4 param_29 = v[0];
        uint param_30 = 1u;
        uint param_31 = slice;
        SpdStore(param_28, param_29, param_30, param_31);
        uint param_32 = x / 2u;
        uint param_33 = y / 2u;
        vec4 param_34 = v[0];
        SpdStoreIntermediate(param_32, param_33, param_34);
        ivec2 param_35 = ivec2(workGroupID * uvec2(16u)) + ivec2(int((x / 2u) + 8u), int(y / 2u));
        vec4 param_36 = v[1];
        uint param_37 = 1u;
        uint param_38 = slice;
        SpdStore(param_35, param_36, param_37, param_38);
        uint param_39 = (x / 2u) + 8u;
        uint param_40 = y / 2u;
        vec4 param_41 = v[1];
        SpdStoreIntermediate(param_39, param_40, param_41);
        ivec2 param_42 = ivec2(workGroupID * uvec2(16u)) + ivec2(int(x / 2u), int((y / 2u) + 8u));
        vec4 param_43 = v[2];
        uint param_44 = 1u;
        uint param_45 = slice;
        SpdStore(param_42, param_43, param_44, param_45);
        uint param_46 = x / 2u;
        uint param_47 = (y / 2u) + 8u;
        vec4 param_48 = v[2];
        SpdStoreIntermediate(param_46, param_47, param_48);
        ivec2 param_49 = ivec2(workGroupID * uvec2(16u)) + ivec2(int((x / 2u) + 8u), int((y / 2u) + 8u));
        vec4 param_50 = v[3];
        uint param_51 = 1u;
        uint param_52 = slice;
        SpdStore(param_49, param_50, param_51, param_52);
        uint param_53 = (x / 2u) + 8u;
        uint param_54 = (y / 2u) + 8u;
        vec4 param_55 = v[3];
        SpdStoreIntermediate(param_53, param_54, param_55);
    }
}

void SpdDownsampleMips_0_1(uint x, uint y, uvec2 workGroupID, uint localInvocationIndex, uint mip, uint slice)
{
    uint param = x;
    uint param_1 = y;
    uvec2 param_2 = workGroupID;
    uint param_3 = localInvocationIndex;
    uint param_4 = mip;
    uint param_5 = slice;
    SpdDownsampleMips_0_1_Intrinsics(param, param_1, param_2, param_3, param_4, param_5);
}

void SpdWorkgroupShuffleBarrier()
{
    barrier();
}

vec4 SpdLoadIntermediate(uint x, uint y)
{
    return vec4(spdIntermediateR[x][y], spdIntermediateG[x][y], spdIntermediateB[x][y], spdIntermediateA[x][y]);
}

void SpdDownsampleMip_2(uint x, uint y, uvec2 workGroupID, uint localInvocationIndex, uint mip, uint slice)
{
    uint param = x;
    uint param_1 = y;
    vec4 v = SpdLoadIntermediate(param, param_1);
    vec4 param_2 = v;
    v = SpdReduceQuad(param_2);
    if ((localInvocationIndex % 4u) == 0u)
    {
        ivec2 param_3 = ivec2(workGroupID * uvec2(8u)) + ivec2(int(x / 2u), int(y / 2u));
        vec4 param_4 = v;
        uint param_5 = mip;
        uint param_6 = slice;
        SpdStore(param_3, param_4, param_5, param_6);
        uint param_7 = x + ((y / 2u) % 2u);
        uint param_8 = y;
        vec4 param_9 = v;
        SpdStoreIntermediate(param_7, param_8, param_9);
    }
}

void SpdDownsampleMip_3(uint x, uint y, uvec2 workGroupID, uint localInvocationIndex, uint mip, uint slice)
{
    if (localInvocationIndex < 64u)
    {
        uint param = (x * 2u) + (y % 2u);
        uint param_1 = y * 2u;
        vec4 v = SpdLoadIntermediate(param, param_1);
        vec4 param_2 = v;
        v = SpdReduceQuad(param_2);
        if ((localInvocationIndex % 4u) == 0u)
        {
            ivec2 param_3 = ivec2(workGroupID * uvec2(4u)) + ivec2(int(x / 2u), int(y / 2u));
            vec4 param_4 = v;
            uint param_5 = mip;
            uint param_6 = slice;
            SpdStore(param_3, param_4, param_5, param_6);
            uint param_7 = (x * 2u) + (y / 2u);
            uint param_8 = y * 2u;
            vec4 param_9 = v;
            SpdStoreIntermediate(param_7, param_8, param_9);
        }
    }
}

void SpdDownsampleMip_4(uint x, uint y, uvec2 workGroupID, uint localInvocationIndex, uint mip, uint slice)
{
    if (localInvocationIndex < 16u)
    {
        uint param = (x * 4u) + y;
        uint param_1 = y * 4u;
        vec4 v = SpdLoadIntermediate(param, param_1);
        vec4 param_2 = v;
        v = SpdReduceQuad(param_2);
        if ((localInvocationIndex % 4u) == 0u)
        {
            ivec2 param_3 = ivec2(workGroupID * uvec2(2u)) + ivec2(int(x / 2u), int(y / 2u));
            vec4 param_4 = v;
            uint param_5 = mip;
            uint param_6 = slice;
            SpdStore(param_3, param_4, param_5, param_6);
            uint param_7 = (x / 2u) + y;
            uint param_8 = 0u;
            vec4 param_9 = v;
            SpdStoreIntermediate(param_7, param_8, param_9);
        }
    }
}

void SpdDownsampleMip_5(uvec2 workGroupID, uint localInvocationIndex, uint mip, uint slice)
{
    if (localInvocationIndex < 4u)
    {
        uint param = localInvocationIndex;
        uint param_1 = 0u;
        vec4 v = SpdLoadIntermediate(param, param_1);
        vec4 param_2 = v;
        v = SpdReduceQuad(param_2);
        if ((localInvocationIndex % 4u) == 0u)
        {
            ivec2 param_3 = ivec2(workGroupID);
            vec4 param_4 = v;
            uint param_5 = mip;
            uint param_6 = slice;
            SpdStore(param_3, param_4, param_5, param_6);
        }
    }
}

void SpdDownsampleNextFour(uint x, uint y, uvec2 workGroupID, uint localInvocationIndex, uint baseMip, uint mips, uint slice)
{
    if (mips <= baseMip)
    {
        return;
    }
    SpdWorkgroupShuffleBarrier();
    uint param = x;
    uint param_1 = y;
    uvec2 param_2 = workGroupID;
    uint param_3 = localInvocationIndex;
    uint param_4 = baseMip;
    uint param_5 = slice;
    SpdDownsampleMip_2(param, param_1, param_2, param_3, param_4, param_5);
    if (mips <= (baseMip + 1u))
    {
        return;
    }
    SpdWorkgroupShuffleBarrier();
    uint param_6 = x;
    uint param_7 = y;
    uvec2 param_8 = workGroupID;
    uint param_9 = localInvocationIndex;
    uint param_10 = baseMip + 1u;
    uint param_11 = slice;
    SpdDownsampleMip_3(param_6, param_7, param_8, param_9, param_10, param_11);
    if (mips <= (baseMip + 2u))
    {
        return;
    }
    SpdWorkgroupShuffleBarrier();
    uint param_12 = x;
    uint param_13 = y;
    uvec2 param_14 = workGroupID;
    uint param_15 = localInvocationIndex;
    uint param_16 = baseMip + 2u;
    uint param_17 = slice;
    SpdDownsampleMip_4(param_12, param_13, param_14, param_15, param_16, param_17);
    if (mips <= (baseMip + 3u))
    {
        return;
    }
    SpdWorkgroupShuffleBarrier();
    uvec2 param_18 = workGroupID;
    uint param_19 = localInvocationIndex;
    uint param_20 = baseMip + 3u;
    uint param_21 = slice;
    SpdDownsampleMip_5(param_18, param_19, param_20, param_21);
}

void SpdIncreaseAtomicCounter(uint slice)
{
    uint _300 = atomicAdd(spdGlobalAtomic.counter[slice], 1u);
    spdCounter = _300;
}

uint SpdGetAtomicCounter()
{
    return spdCounter;
}

bool SpdExitWorkgroup(uint numWorkGroups, uint localInvocationIndex, uint slice)
{
    if (localInvocationIndex == 0u)
    {
        uint param = slice;
        SpdIncreaseAtomicCounter(param);
    }
    SpdWorkgroupShuffleBarrier();
    return SpdGetAtomicCounter() != (numWorkGroups - 1u);
}

void SpdResetAtomicCounter(uint slice)
{
    spdGlobalAtomic.counter[slice] = 0u;
}

vec4 SpdLoad(ivec2 p, uint slice)
{
    return imageLoad(imgDst[5], p);
}

vec4 SpdReduceLoad4(uvec2 i0, uvec2 i1, uvec2 i2, uvec2 i3, uint slice)
{
    ivec2 param = ivec2(i0);
    uint param_1 = slice;
    vec4 v0 = SpdLoad(param, param_1);
    ivec2 param_2 = ivec2(i1);
    uint param_3 = slice;
    vec4 v1 = SpdLoad(param_2, param_3);
    ivec2 param_4 = ivec2(i2);
    uint param_5 = slice;
    vec4 v2 = SpdLoad(param_4, param_5);
    ivec2 param_6 = ivec2(i3);
    uint param_7 = slice;
    vec4 v3 = SpdLoad(param_6, param_7);
    vec4 param_8 = v0;
    vec4 param_9 = v1;
    vec4 param_10 = v2;
    vec4 param_11 = v3;
    return SpdReduce4(param_8, param_9, param_10, param_11);
}

vec4 SpdReduceLoad4(uvec2 base, uint slice)
{
    uvec2 param = uvec2(base + uvec2(0u));
    uvec2 param_1 = uvec2(base + uvec2(0u, 1u));
    uvec2 param_2 = uvec2(base + uvec2(1u, 0u));
    uvec2 param_3 = uvec2(base + uvec2(1u));
    uint param_4 = slice;
    return SpdReduceLoad4(param, param_1, param_2, param_3, param_4);
}

void SpdDownsampleMips_6_7(uint x, uint y, uint mips, uint slice)
{
    ivec2 tex = ivec2(int((x * 4u) + 0u), int((y * 4u) + 0u));
    ivec2 pix = ivec2(int((x * 2u) + 0u), int((y * 2u) + 0u));
    uvec2 param = uvec2(tex);
    uint param_1 = slice;
    vec4 v0 = SpdReduceLoad4(param, param_1);
    ivec2 param_2 = pix;
    vec4 param_3 = v0;
    uint param_4 = 6u;
    uint param_5 = slice;
    SpdStore(param_2, param_3, param_4, param_5);
    tex = ivec2(int((x * 4u) + 2u), int((y * 4u) + 0u));
    pix = ivec2(int((x * 2u) + 1u), int((y * 2u) + 0u));
    uvec2 param_6 = uvec2(tex);
    uint param_7 = slice;
    vec4 v1 = SpdReduceLoad4(param_6, param_7);
    ivec2 param_8 = pix;
    vec4 param_9 = v1;
    uint param_10 = 6u;
    uint param_11 = slice;
    SpdStore(param_8, param_9, param_10, param_11);
    tex = ivec2(int((x * 4u) + 0u), int((y * 4u) + 2u));
    pix = ivec2(int((x * 2u) + 0u), int((y * 2u) + 1u));
    uvec2 param_12 = uvec2(tex);
    uint param_13 = slice;
    vec4 v2 = SpdReduceLoad4(param_12, param_13);
    ivec2 param_14 = pix;
    vec4 param_15 = v2;
    uint param_16 = 6u;
    uint param_17 = slice;
    SpdStore(param_14, param_15, param_16, param_17);
    tex = ivec2(int((x * 4u) + 2u), int((y * 4u) + 2u));
    pix = ivec2(int((x * 2u) + 1u), int((y * 2u) + 1u));
    uvec2 param_18 = uvec2(tex);
    uint param_19 = slice;
    vec4 v3 = SpdReduceLoad4(param_18, param_19);
    ivec2 param_20 = pix;
    vec4 param_21 = v3;
    uint param_22 = 6u;
    uint param_23 = slice;
    SpdStore(param_20, param_21, param_22, param_23);
    if (mips <= 7u)
    {
        return;
    }
    vec4 param_24 = v0;
    vec4 param_25 = v1;
    vec4 param_26 = v2;
    vec4 param_27 = v3;
    vec4 v = SpdReduce4(param_24, param_25, param_26, param_27);
    ivec2 param_28 = ivec2(int(x), int(y));
    vec4 param_29 = v;
    uint param_30 = 7u;
    uint param_31 = slice;
    SpdStore(param_28, param_29, param_30, param_31);
    uint param_32 = x;
    uint param_33 = y;
    vec4 param_34 = v;
    SpdStoreIntermediate(param_32, param_33, param_34);
}

void SpdDownsample(uvec2 workGroupID, uint localInvocationIndex, uint mips, uint numWorkGroups, uint slice)
{
    uint param = localInvocationIndex % 64u;
    uvec2 sub_xy = ARmpRed8x8(param);
    uint x = sub_xy.x + (8u * ((localInvocationIndex >> uint(6)) % 2u));
    uint y = sub_xy.y + (8u * (localInvocationIndex >> uint(7)));
    uint param_1 = x;
    uint param_2 = y;
    uvec2 param_3 = workGroupID;
    uint param_4 = localInvocationIndex;
    uint param_5 = mips;
    uint param_6 = slice;
    SpdDownsampleMips_0_1(param_1, param_2, param_3, param_4, param_5, param_6);
    uint param_7 = x;
    uint param_8 = y;
    uvec2 param_9 = workGroupID;
    uint param_10 = localInvocationIndex;
    uint param_11 = 2u;
    uint param_12 = mips;
    uint param_13 = slice;
    SpdDownsampleNextFour(param_7, param_8, param_9, param_10, param_11, param_12, param_13);
    if (mips <= 6u)
    {
        return;
    }
    uint param_14 = numWorkGroups;
    uint param_15 = localInvocationIndex;
    uint param_16 = slice;
    bool _1317 = SpdExitWorkgroup(param_14, param_15, param_16);
    if (_1317)
    {
        return;
    }
    uint param_17 = slice;
    SpdResetAtomicCounter(param_17);
    uint param_18 = x;
    uint param_19 = y;
    uint param_20 = mips;
    uint param_21 = slice;
    SpdDownsampleMips_6_7(param_18, param_19, param_20, param_21);
    uint param_22 = x;
    uint param_23 = y;
    uvec2 param_24 = uvec2(0u);
    uint param_25 = localInvocationIndex;
    uint param_26 = 8u;
    uint param_27 = mips;
    uint param_28 = slice;
    SpdDownsampleNextFour(param_22, param_23, param_24, param_25, param_26, param_27, param_28);
}

void SpdDownsample(uvec2 workGroupID, uint localInvocationIndex, uint mips, uint numWorkGroups, uint slice, uvec2 workGroupOffset)
{
    uvec2 param = workGroupID + workGroupOffset;
    uint param_1 = localInvocationIndex;
    uint param_2 = mips;
    uint param_3 = numWorkGroups;
    uint param_4 = slice;
    SpdDownsample(param, param_1, param_2, param_3, param_4);
}

void main()
{
    uvec2 param = uvec2(gl_WorkGroupID.xy);
    uint param_1 = gl_LocalInvocationIndex;
    uint param_2 = spdConstants.mips;
    uint param_3 = spdConstants.numWorkGroups;
    uint param_4 = gl_WorkGroupID.z;
    uvec2 param_5 = uvec2(spdConstants.workGroupOffset);
    SpdDownsample(param, param_1, param_2, param_3, param_4, param_5);
}

 