#include <stdio.h>
#include <string.h>
#include "pico/stdlib.h"
#include "hardware/dma.h"
#include "hardware/interp.h"
#include "hardware/divider.h"
#include "hardware/spi.h"
//#include "lib/st7735.h"
//#include "data/scene-copy.h"
#include "data/scene.h"
#include "pico.h"
#include "pico/scanvideo.h"
#include "pico/scanvideo/composable_scanline.h"
#include "pico/multicore.h"

#define VSYNC

char static inline plot_shape(const unsigned char *shape, unsigned char screen[200][256], const unsigned char color, unsigned char vertex_count, unsigned int start_pos)
{
    unsigned int leftcount = 0;
    unsigned int rightcount = 0;

    // const unsigned int yaspect = ((1LL << 23LL) * 80LL) / 200LL;
    // const unsigned int xaspect = ((1LL << 23LL) * 160LL) / 256LL;

    int forward_index = start_pos;
    int reverse_index = forward_index;

    int lx1;
    int ly1;
    int rx1;
    int ry1;
    int rx2 = shape[forward_index++];
    int ry2 = shape[forward_index++];
    if (forward_index == (2 * vertex_count))
    {
        forward_index = 0;
    }
    int lx2 = rx2;
    int ly2 = ry2;

    int ypos = ly2;
    int ttn; // time till next vertex, vertically

    int vert_left = vertex_count;

    while (vert_left > 0)
    {
        // puts("loop start");
        if (leftcount == 0)
        {
            vert_left--;
            ly1 = ly2;
            lx1 = lx2;
            lx2 = shape[forward_index++];
            ly2 = shape[forward_index++];
            if (forward_index == (2 * vertex_count))
            {
                forward_index = 0;
            }

            leftcount = (ly2 - ly1);
            if (leftcount == 0)
            {
                unsigned int right = lx1;
                unsigned int left = lx2;
                unsigned int y = ly1;
                // printf("drawing from %d to %d at %d\n", left, right, ly1);
                for (; left <= right; left++)
                {
                    // printf("plot color %d at x:%d y:%d\n", color, left, ypos);
                    screen[y][left] = color;
                }
                continue;
            }
            int test = (lx2 - lx1) << 23;
            int ldisplace = hw_divider_s32_quotient_inlined(test, leftcount);
            interp0->base[0] = ldisplace; // increment value
            // printf("(l) x1:%d x2:%d y1:%d y2:%d displacement:%d\n", lx1, lx2, ly1, ly2, ldisplace);
            // printf("%d %d %d %d %d\n", lx1, lx2, ly1, ly2, ldisplace);
            interp0->accum[0] = lx1 << 23;
        }
        if (rightcount == 0)
        {
            vert_left--;
            ry1 = ry2;
            rx1 = rx2;
            if (reverse_index == 0)
            {
                reverse_index = (2 * vertex_count);
            }
            ry2 = shape[--reverse_index];
            rx2 = shape[--reverse_index];

            rightcount = ry2 - ry1;
            if (rightcount == 0)
            {
                unsigned int right = rx2;
                unsigned int left = rx1;
                unsigned int y = ry1;
                // printf("drawing from %d to %d at %d\n", left, right, ry1);
                for (; left <= right; left++)
                {
                    // printf("plot color %d at x:%d y:%d\n", color, left, ypos);
                    screen[y][left] = color;
                }
                continue;
            }
            int test = (rx2 - rx1) << 23;
            int rdisplace = hw_divider_s32_quotient_inlined(test, rightcount);
            interp1->base[0] = rdisplace; // increment value
            // printf("(r) x1:%d x2:%d y1:%d y2:%d displacement:%d\n", rx1, rx2, ry1, ry2, rdisplace);
            // printf("%d %d %d %d %d\n", rx1, rx2, ry1, ry2, rdisplace);
            interp1->accum[0] = rx1 << 23;
        }

        if (leftcount < rightcount)
        {
            ttn = leftcount;
            rightcount -= leftcount;
            leftcount = 0;
        }
        else
        {
            ttn = rightcount;
            leftcount -= rightcount;
            rightcount = 0;
        }

        for (; ttn > 0; ttn--, ypos++)
        {
            unsigned int right = interp1->pop[2];
            unsigned int left = interp0->pop[2];
            //  printf("%d %d\n", left, right);
            //  printf("drawing from %d to %d at %d\n", left, right, ypos);
            for (; left <= right; left++)
            {
                // printf("plot color %d at x:%d y:%d\n", color, left, ypos);
                screen[ypos][left] = color;
            }
        }
    }
    // puts("end of polygon");
    return 1 + (2 * vertex_count);
}

extern unsigned char screenbuf1[200][256];
extern unsigned char screenbuf2[200][256];

extern unsigned short palette1[16];
extern unsigned short palette2[16];

#define vga_mode vga_mode_320x240_60
extern const struct scanvideo_pio_program video_24mhz_composable;

bool screen = true;

static uint16_t linebuffers[4][256];

static inline uint16_t *raw_scanline_prepare(struct scanvideo_scanline_buffer *dest, uint width)
{
    assert(width >= 3);
    assert(width % 2 == 0);
    // +1 for the black pixel at the end, -3 because the program outputs n+3 pixels.
    dest->data[0] = COMPOSABLE_COLOR_RUN;
    dest->data[1] = 32 | COMPOSABLE_RAW_RUN << 16;
    dest->data[2] = (width + 1 - 3);
    // After user pixels, 1 black pixel then discard remaining FIFO data
    dest->data[width / 2 + 2] = 0;
    dest->data[width / 2 + 3] = (COMPOSABLE_EOL_SKIP_ALIGN);
    dest->data_used = width / 2 + 3;
    assert(dest->data_used <= dest->data_max);
    return (uint16_t *)&(dest->data[2]) + 1;
}

static inline void raw_scanline_finish(struct scanvideo_scanline_buffer *dest)
{
    // Need to pivot the first pixel with the count so that PIO can keep up
    // with its 1 pixel per 2 clocks
    uint32_t first = dest->data[2];
    dest->data[2] = ((first & 0xffff0000u) >> 16) | ((first & 0x0000ffffu) << 16);
    dest->status = SCANLINE_OK;
}

auto_init_mutex(screen_mutex);

void fill_scanline_buffer(struct scanvideo_scanline_buffer *buffer)
{
    int ypos = scanvideo_scanline_number(buffer->scanline_id);

    if ((ypos >= 219) || (ypos < 20))
    {
        buffer->data[0] = COMPOSABLE_RAW_1P;
        buffer->data[1] = COMPOSABLE_EOL_SKIP_ALIGN;
        buffer->data_used = 2;
        buffer->status = SCANLINE_OK;
        return;
    }

    uint16_t *linebuf = raw_scanline_prepare(buffer, 256);
    mutex_enter_blocking(&screen_mutex);
    for (size_t i = 0; i < 256; i++)
    {
        if (screen)
        {
            linebuf[i] = palette1[screenbuf1[ypos - 20][i]];
        }
        else
        {
            linebuf[i] = palette2[screenbuf2[ypos - 20][i]];
        }
    }
    mutex_exit(&screen_mutex);
    raw_scanline_finish(buffer);
}

void core1setup()
{
    scanvideo_setup(&vga_mode);
    scanvideo_timing_enable(true);
    while (1)
    {
        scanvideo_scanline_buffer_t *buffer = scanvideo_begin_scanline_generation(true);
        fill_scanline_buffer(buffer);
        scanvideo_end_scanline_generation(buffer);
    }
}

int main()
{
    int base_freq = 50000;
    set_sys_clock_khz(base_freq * 5, true);

    int *zero = (int *)0x21030000;
    (*zero) = 0;
    // stdio_init_all();

    // ST7735_Init();

    // printf("screenbuf1 = %p\n", screenbuf1);
    // printf("screenbuf2 = %p\n", screenbuf2);

    // gpio_debug_pins_init();

    // gpio_init(25);
    // gpio_set_dir(25, GPIO_OUT);

    // just from this core
    // gpio_set_dir_out_masked(0x01380000);
    // gpio_set_dir_in_masked(0x00400000);

    // debug pin
    // gpio_put(25, 0);

    // gpio_set_function(input_pin0, GPIO_FUNC_SIO); // todo is this necessary
    //  go for launch (debug pin)
    // gpio_put(25, 1);

    // DMA setup
    const uint dma_flip = 0;  // used to display to the screen
    const uint dma_clear = 1; // used to clear screen buffers

    dma_channel_config c = dma_channel_get_default_config(dma_clear);
    channel_config_set_transfer_data_size(&c, DMA_SIZE_32);
    channel_config_set_read_increment(&c, false);
    channel_config_set_write_increment(&c, true);
    dma_channel_configure(dma_clear, &c,
                          screenbuf1,    // write address
                          zero,          // read address
                          200 * 256 / 4, // element count (each element is of size transfer_data_size)
                          false);        // don't start yet

    /*c = dma_channel_get_default_config(dma_flip);
    channel_config_set_transfer_data_size(&c, DMA_SIZE_8);
    channel_config_set_dreq(&c, spi_get_dreq(SPI_PORT, true));
    // channel_config_set_chain_to(&c, dma_clear);
    dma_channel_configure(dma_flip, &c,
                          &spi_get_hw(SPI_PORT)->dr, // write address
                          screenbuf1,                // read address
                          160 * 80 * 2,              // element count (each element is of size transfer_data_size)
                          false);                    // don't start yet
                          */

    // Interpolator setup
    interp_config cfg = interp_default_config();
    interp_set_config(interp0, 1, &cfg);
    interp_set_config(interp1, 1, &cfg);
    interp_config_set_cross_input(&cfg, false);
    interp_config_set_cross_result(&cfg, false);
    interp_config_set_force_bits(&cfg, 0);
    interp_config_set_mask(&cfg, 0, 7);
    interp_config_set_shift(&cfg, 23);
    interp_config_set_signed(&cfg, false);
    interp_config_set_add_raw(&cfg, true);
    interp_config_set_blend(&cfg, false);
    interp_config_set_clamp(&cfg, false);
    interp_set_config(interp0, 0, &cfg);
    interp_set_config(interp1, 0, &cfg);

    interp0->accum[1] = 0;
    interp0->base[1] = 0;
    interp0->base[2] = 0;
    interp1->accum[1] = 0;
    interp1->base[1] = 0;
    interp1->base[2] = 0;

    static unsigned char shape[30];
    for (size_t x = 0; x < 30; x++)
    {
        shape[x] = 0;
    }
    static unsigned char vertex_buffer[255 * 2];
    for (size_t x = 0; x < (255 * 2); x++)
    {
        vertex_buffer[x] = 0;
    }

    // puts("Starting render\n");
    unsigned int data_pointer = 0;
    screen = true;
#ifndef VSYNC
    bool dma_screen = screen;
#endif

    // ST7735_Select();
    // ST7735_SetAddressWindow(0, 0, 79, 159);
    // gpio_put(EPD_DC_PIN, 1);

    memset(palette1, 0, 32);
    memset(palette2, 0, 32);

    multicore_launch_core1(core1setup);

// const unsigned char UCSCENE[] = *SCENE; //*(SCENE + 0x03000000);
#define UCSCENE (SCENE + 0x03000000)
    // unsigned char next_control =
    while (1)
    {
        mutex_enter_blocking(&screen_mutex);
        screen = !screen;
        mutex_exit(&screen_mutex);

        if (screen)
        {
            memcpy(palette2, palette1, 32);
        }
        else
        {
            memcpy(palette1, palette2, 32);
        }

        unsigned char control = UCSCENE[data_pointer++];

        if (control & 0x1)
        {
            if (screen)
            {
                dma_channel_set_write_addr(dma_clear, screenbuf2, true);
            }
            else
            {
                dma_channel_set_write_addr(dma_clear, screenbuf1, true);
            }
            dma_channel_wait_for_finish_blocking(dma_clear);
        }
        if (control & 0x2)
        {
            unsigned short bitmask = (((unsigned short)UCSCENE[data_pointer++]) << 8) | ((unsigned short)UCSCENE[data_pointer++]);
            unsigned int pos = 0;
            unsigned short *pal;
            if (screen)
            {
                pal = palette2;
            }
            else
            {
                pal = palette1;
            }
            while (bitmask)
            {
                if (bitmask & 0x8000)
                {
                    unsigned short color = (((unsigned short)UCSCENE[data_pointer++]) << 8) | ((unsigned short)UCSCENE[data_pointer++]);
                    color = ((color & 0b0000011100000000) >> 6) | ((color & 0b0000000001110000) << 4) | ((color & 0b0000000000000111) << 13);
                    pal[pos] = color;
                }
                bitmask <<= 1;
                pos++;
            }
        }
        if (control & 0x4)
        {
            unsigned int no_verts = (unsigned int)UCSCENE[data_pointer++] * 2;
            for (unsigned int i = 0; i < no_verts;)
            {
                vertex_buffer[i++] = UCSCENE[data_pointer++];
            }
        }

        while (1)
        {
            unsigned int descriptor = UCSCENE[data_pointer++];
            if (descriptor == 0xFD)
            {
                data_pointer = 0;
                break;
            }
            else if (descriptor == 0xFE)
            {
                data_pointer = (data_pointer & 0xFFFF0000) + 0x10000;
                // data_pointer = 0;
                break;
            }
            else if (descriptor == 0xFF)
            {
                // data_pointer = 0;
                break;
            }

            unsigned char color = descriptor >> 4;
            unsigned char vertex_count = descriptor & 0xF;

            unsigned int shape_pointer = 0;
            if (control & 0x4)
            {
                for (size_t i = 0; i < vertex_count; i++)
                {
                    unsigned int index = (unsigned int)UCSCENE[data_pointer++] * 2;
                    shape[shape_pointer++] = vertex_buffer[index++];
                    shape[shape_pointer++] = vertex_buffer[index++];
                }
            }
            else
            {
                for (size_t i = 0; i < vertex_count; i++)
                {
                    shape[shape_pointer++] = UCSCENE[data_pointer++];
                    shape[shape_pointer++] = UCSCENE[data_pointer++];
                }
            }

            int lowest_pos = 0;
            int lowest = 512;
            shape_pointer = 1;
            for (size_t i = 0; i < vertex_count; i++)
            {
                if ((shape[shape_pointer]) < lowest)
                {
                    lowest = shape[shape_pointer];
                    lowest_pos = shape_pointer - 1;
                }
                shape_pointer += 2;
            }

            if (screen)
            {
                plot_shape(shape, screenbuf2, color, vertex_count, lowest_pos);
            }
            else
            {
                plot_shape(shape, screenbuf1, color, vertex_count, lowest_pos);
            }

            // ST7735_DrawImage(0, 0, 80, 160, screen);
        }
    }
    return 0;
}
