import struct
import os
import sys
import hashlib
import argparse

# No need for PE-parsing :)
base_address = 0x400000
text_address = 0x401000
text_pos = 0x400
data_address = 0x44d000
data_pos = 0x4b000
reloc_pos = 0x5a600
reloc_padded_size = 0x5400

replaced_func_start = 0x4126a8
replaced_func_end = 0x41283d
replaced_func_size = replaced_func_end - replaced_func_start

def convert_pos(pos):
    if pos >= data_address:
        return pos - data_address + data_pos
    elif pos >= text_pos:
        return pos - text_address + text_pos
    else:
        return pos
    
def write_byte(data, pos, value):
    data[convert_pos(pos)] = value

def read_byte(data, pos):
    return data[convert_pos(pos)]

def write_bytes(data, pos, values):
    for i in range(len(values)):
        write_byte(data, pos+i, values[i])

def read_bytes(data, pos, size):
    output = bytearray(size)
    for i in range(size):
        output[i] = read_byte(data, pos+i)
    return output
        
def write_dword(data, pos, value):
    write_bytes(data, pos, struct.pack('<L', value & 0xffffffff))

def read_dword(data, pos):
    return struct.unpack('<L', read_bytes(data, pos, 4))[0]

def write_float(data, pos, value):
    write_bytes(data, pos, struct.pack('<f', value))

def write_pc32(data, pos, target_address):
    value = target_address - (pos+4)
    write_dword(data, pos, value)

def read_reloc_table(data):
    # Read size from the section header
    data_size = read_dword(data, 0x124)
    data = data[reloc_pos:reloc_pos + data_size]
    pos = 0

    output = []

    while pos < data_size:
        rva,size = struct.unpack_from("<LL", data, pos)
        if size == 0:
            break
        pos += 8
        num = (size-8)//2
        
        for i in range(num):
            w, = struct.unpack_from("<H", data, pos)
            pos += 2
            address = (w & 4095) + rva + base_address
            type = w >> 12
            output.append( (address, type) )
    assert pos == data_size

    return output

def write_reloc_table(data, table):
    table = sorted(table)

    groups = {}
    
    # Group into 4k RVAs
    for address,type in table:
        rva = address - base_address
        rva_high = rva & -4096
        rva_low = rva & 4095
        if rva_high not in groups:
            groups[rva_high] = []
        groups[rva_high].append(rva_low | (type << 12))

    # Create data
    table_data = bytearray()
    for rva_offset,offsets in groups.items():
        table_data += struct.pack('<LL', rva_offset, 8 + 2*len(offsets))
        for w in offsets:
            table_data += struct.pack('<H', w)

    assert len(table_data) <= reloc_padded_size

    # Write new size to the section header
    write_dword(data, 0x124, len(table_data))
    
    # Pad with 0
    table_data += b'\x00' * (reloc_padded_size - len(table_data))

    # Replace relocation table data
    data[reloc_pos:reloc_pos + len(table_data)] = table_data

    
    

def remove_range_from_reloc_table(table, from_address, to_address):
    for i in reversed(range(len(table))):
        if table[i][0] >= from_address and table[i][0] < to_address:
            del table[i]

def add_relocations_from_code(code, code_address, table, addresses):
    # This searches through the code and finds dword that matches the
    # addresses given, hopefully without any false positives
    addresses = set(addresses)
    relocated_addresses = set()
    for i in range(len(code)-3):
        value, = struct.unpack_from('<L', code, i)
        if value in addresses:
            table.append( (code_address + i, 3) )
            relocated_addresses.add(value)
    assert addresses == relocated_addresses
            
def pop_address(data):
    value, = struct.unpack('<L', data[-4:])
    return data[:-4],value

def patch_negative_fix(data, reloc_table, negative_fix_address, negative_fix_func_address):
    # Patch calls
    calls = [0x421a14, 0x421cf0, 0x42307e, 0x423385,
             0x423788, 0x423bd9, 0x424145, 0x4244d6]

    for address in calls:
        assert read_bytes(data, address, 6) == bytes([0xff, 0x15, 0x90, 0xcc, 0x42, 0x01])
        write_dword(data, address+2, negative_fix_address)

    # Write function pointer
    write_dword(data, negative_fix_address, negative_fix_func_address)
    reloc_table.append( (negative_fix_address, 3) )

def patch(data, code_filename):
    # Read code
    try:
        code = open(code_filename, 'rb').read()
    except FileNotFoundError:
        print(f"Could not find {code_filename}")
        sys.exit(1)

    # Read relocation table
    reloc_table = read_reloc_table(data)

    # Get addresses from code
    code,code_address = pop_address(code)
    code,memset_fix_address = pop_address(code)
    code,negative_fix_address = pop_address(code)
    code,negative_fix_func_address = pop_address(code)
    code,vblank_wait_address = pop_address(code)
    code,init_graphics_fix_address = pop_address(code)
    code,sync_fix_address = pop_address(code)
    code,init_address = pop_address(code)
    code,vblank_str_address = pop_address(code)
    code,sound_sync_str_address = pop_address(code)

    # Get addresses that should be relocated
    reloc_addresses = []
    code,address = pop_address(code)
    while address != 0:
        reloc_addresses.append(address)
        code,address = pop_address(code)

    # Pad code with nops
    assert len(code) <= replaced_func_size
    #print(f"{replaced_func_size - len(code)} bytes remaining")
    code += b'\x90' * (replaced_func_size - len(code))
    
    # Remove window border by adding some style flags in the
    # CreateWindowEx call.
    WS_POPUP = 0x80000000
    WS_CLIPSIBLINGS = 0x04000000
    WS_CLIPCHILDREN = 0x02000000

    flags = read_dword(data, 0x413212+1)
    write_dword(data, 0x413212+1, flags | WS_POPUP | WS_CLIPSIBLINGS | WS_CLIPCHILDREN)

    # Patch call to stosb in memset()
    write_pc32(data, 0x413d60+1, memset_fix_address)

    # Patch all calls that need the other negative fix
    patch_negative_fix(data, reloc_table, negative_fix_address, negative_fix_func_address)

    # Patch call to the timeline function for the sound sync fix
    write_pc32(data, 0x4135d4+1, sync_fix_address)

    # Patch call to init graphics function for the scaling fix
    write_pc32(data, 0x413408+1, init_graphics_fix_address)

    # Replace the function pointer for the function that copies pixels
    # for the vblank fix
    write_dword(data, 0x401381+1, vblank_wait_address)

    # Replace the call to WinMain with a call to our init
    write_pc32(data, 0x42d8c3+1, init_address)

    # Write /vblank and /soundsync string to data
    write_bytes(data, vblank_str_address, b'/vblank\x00')
    write_bytes(data, sound_sync_str_address, b'/soundsync\x00')

    # Remove knot deformation function from relocation table
    remove_range_from_reloc_table(reloc_table, replaced_func_start, replaced_func_end)

    # Add relocations from the new code
    add_relocations_from_code(code, code_address, reloc_table, reloc_addresses)
    
    # Write the new relocation table
    write_reloc_table(data, reloc_table)

    # Write the new code
    write_bytes(data, code_address, code)

    return data


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-c", "--code", default='yume2k_fix')
    parser.add_argument("executable")
    args = parser.parse_args()

    dest_filename = os.path.join(os.path.dirname(args.executable), "YUME2K_FIX.EXE")

    try:
        data = bytearray(open(args.executable, 'rb').read())
    except FileNotFoundError:
        print(f"Could not find {args.executable}")
        sys.exit(1)
    
    if hashlib.sha256(data).hexdigest() != '844aee0a33b36173eeb05674f644e7d6751eb1fbd4c54f61cd6b7baad2bc71c9':
        print("Wrong input file, use YUME2K.EXE from the update")
        sys.exit(1)

    patch(data, args.code)
    open(dest_filename, 'wb').write(data)

    
if __name__ == '__main__':
    main()
       
