A fork of the cSID music player by Hermit, currently aiming to improve the presentation of the source code.

// cSID by Hermit (Mihaly Horvath), (Year 2016..2017) http://hermit.sidrip.com
// (based on jsSID but totally revorked in C to be cycle-based & oversampled)
// License: WTF - Do what the fuck you want with this code, but I please mention me as its original author.

// This is a modified version of cSID, copyright (C) 2018 UltrasonicMadness

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <unistd.h>
#include <SDL/SDL_config.h>
#include <SDL/SDL.h>
#include <SDL/SDL_audio.h>

typedef unsigned char byte;

// global constants and variables
#define C64_PAL_CPUCLK 985248
#define MAX_DATA_LEN 65536
#define PAL_FRAMERATE 49.4 // important to match, otherwise some ADSR-sensitive tunes suffer.

// raw output divided by this after multiplied by main volume, this also compensates for filter-resonance emphasis to avoid distotion


const byte FILTSW[9] = {1, 2, 4, 1, 2, 4, 1, 2, 4};
byte ADSRstate[9], expcnt[9], envcnt[9], sourceMSBrise[9];
unsigned int clock_ratio = 22, ratecnt[9], prevwfout[9];
unsigned long int phaseaccu[9], prevaccu[9], sourceMSB[3], noise_LFSR[9];
long int prevlowpass[3], prevbandpass[3];
float cutoff_ratio_8580, cutoff_ratio_6581, cutoff_bias_6581;
int SIDamount = 1, SID_model[3] = {8580, 8580, 8580}, requested_SID_model = -1, sampleratio;
byte filedata[MAX_DATA_LEN], memory[MAX_DATA_LEN], timermode[0x20], SIDtitle[0x20], SIDauthor[0x20], SIDinfo[0x20];
int subtune = 0, tunelength = -1;
unsigned int initaddr, playaddr, playaddf, SID_address[3] = {0xD400, 0, 0};
long int samplerate = DEFAULT_SAMPLERATE;
int framecnt = 0, frame_sampleperiod = DEFAULT_SAMPLERATE / PAL_FRAMERATE;

// CPU (and CIA/VIC-IRQ) emulation constants and variables - avoiding internal/automatic variables to retain speed
const byte flagsw[] = {0x01, 0x21, 0x04, 0x24, 0x00, 0x40, 0x08, 0x28}, branchflag[] = {0x80, 0x40, 0x01, 0x02};
unsigned int PC = 0, pPC = 0, addr = 0, storadd = 0;
short int A = 0, T = 0, SP = 0xFF;

// STATUS-flags: N V - B D I Z C
byte X = 0, Y = 0, IR = 0, ST = 0x00;

char CPUtime = 0, cycles = 0, finished = 0, dynCIA = 0;

// function prototypes
void cSID_init(int samplerate);
int SID(char num, unsigned int baseaddr);
void initSID();
void initCPU(unsigned int mempos);

byte CPU();
void init(byte subtune);

void play(void* userdata, Uint8 *stream, int len);
unsigned int combinedWF(char num, char channel, unsigned int* wfarray, int index, char differ6581);
void createCombinedWF(unsigned int* wfarray, float bitmul, float bitstrength, float treshold);

//----------------------------- MAIN thread ----------------------------

int main(int argc, char *argv[])
    int readata, strend, subtune_amount, preferred_SID_model[3] = {8580.0, 8580.0, 8580.0};
    unsigned int i, datalen, offs, loadaddr;
    FILE *InputFile;
    // wait a bit to avoid keypress leftover (btw this might not happen in Linux)
    // open and process the file
    if (argc < 2)
        printf("\nUsage: csid <inputfile> [ subtune_number [SID_modelnumber [seconds]] ]\n\n");
        return 1;
    if (argc >= 3)
        sscanf(argv[2], "%d", &subtune);
        if (subtune < 0 || subtune > 63)
            subtune = 0;
        subtune = 0;
    if (argc >= 4)
        sscanf(argv[3], "%d", &requested_SID_model);
    if (argc >= 5)
        sscanf(argv[4], "%d", &tunelength);
    InputFile = fopen(argv[1], "rb");
    if (InputFile == NULL)
        printf("File not found.\n");
        return 1;
    datalen = 0;
        readata = fgetc(InputFile);
        filedata[datalen++] = readata;
    while (readata != EOF && datalen < MAX_DATA_LEN);
    printf("\n%d bytes read (%s subtune %d)", --datalen, argv[1], subtune + 1);
    offs = filedata[7];
    loadaddr = filedata[8] + filedata[9] ? filedata[8] * 256 + filedata[9] : filedata[offs] + filedata[offs + 1] * 256;
    printf("\nOffset: $%4.4X, Loadaddress: $%4.4X \nTimermodes:", offs, loadaddr);
    for (i = 0; i < 32; i++)
        timermode[31 - i] = (filedata[0x12 + (i >> 3)] & (byte)pow(2, 7 - i % 8)) ? 1 : 0;
        printf(" %1d", timermode[31 - i]);
    for (i = 0; i < MAX_DATA_LEN; i++)
        memory[i] = 0;
    for (i = offs + 2; i < datalen; i++)
        if (loadaddr + i - (offs + 2) < MAX_DATA_LEN)
            memory[loadaddr + i - (offs + 2)] = filedata[i];
    strend = 1;
    for (i = 0; i < 32; i++)
        if (strend != 0)
            strend = SIDtitle[i] = filedata[0x16 + i];
            strend = SIDtitle[i] = 0;
    printf("\nTitle: %s    ", SIDtitle);
    strend = 1;
    for (i = 0; i < 32; i++)
        if (strend != 0)
            strend = SIDauthor[i] = filedata[0x36 + i];
            strend = SIDauthor[i] = 0;
    printf("Author: %s    ", SIDauthor);
    strend = 1;
    for (i = 0; i < 32; i++)
        if (strend != 0)
            strend = SIDinfo[i] = filedata[0x56 + i];
            strend = SIDinfo[i] = 0;
    printf("Info: %s", SIDinfo);
    initaddr = filedata[0xA] + filedata[0xB] ? filedata[0xA] * 256 + filedata[0xB] : loadaddr;
    playaddr = playaddf = filedata[0xC] * 256 + filedata[0xD];
    printf("\nInit:$%4.4X,Play:$%4.4X, ", initaddr, playaddr);
    subtune_amount = filedata[0xF];
    preferred_SID_model[0] = (filedata[0x77] & 0x30) >= 0x20 ? 8580 : 6581;
    printf("Subtunes:%d , preferred SID-model:%d", subtune_amount, preferred_SID_model[0]);
    preferred_SID_model[1] = (filedata[0x77] & 0xC0) >= 0x80 ? 8580 : 6581;
    preferred_SID_model[2] = (filedata[0x76] & 3) >= 3 ? 8580 : 6581;
    SID_address[1] = filedata[0x7A] >= 0x42 && (filedata[0x7A] < 0x80 || filedata[0x7A] >= 0xE0) ? 0xD000 + filedata[0x7A] * 16 : 0;
    SID_address[2] = filedata[0x7B] >= 0x42 && (filedata[0x7B] < 0x80 || filedata[0x7B] >= 0xE0) ? 0xD000 + filedata[0x7B] * 16 : 0;
    SIDamount = 1 + (SID_address[1] > 0) + (SID_address[2] > 0);
    if (SIDamount >= 2)
        printf("(SID1), %d(SID2:%4.4X)", preferred_SID_model[1], SID_address[1]);
    if (SIDamount == 3)
        printf(", %d(SID3:%4.4X)", preferred_SID_model[2], SID_address[2]);
    if (requested_SID_model != -1)
        printf(" (requested:%d)", requested_SID_model);
    samplerate = DEFAULT_SAMPLERATE;
    sampleratio = round(C64_PAL_CPUCLK / samplerate);
    if (SDL_Init(SDL_INIT_AUDIO) < 0)
        fprintf(stderr, "Couldn't initialize SDL: %s\n", SDL_GetError());
    SDL_AudioSpec soundspec;
    soundspec.freq = samplerate;
    soundspec.channels = 1;
    soundspec.format = AUDIO_S16;
    soundspec.samples = 16384;
    soundspec.userdata = NULL;
    soundspec.callback = play;
    if (SDL_OpenAudio(&soundspec, NULL) < 0)
        fprintf(stderr, "Couldn't open audio: %s\n", SDL_GetError());
    for (i = 0; i < SIDamount; i++)
        if (requested_SID_model == 8580 || requested_SID_model == 6581)
            SID_model[i] = requested_SID_model;
            SID_model[i] = preferred_SID_model[i];
    if (SIDamount == 2)
        OUTPUT_SCALEDOWN /= 0.6;
    else if (SIDamount >= 3)
        OUTPUT_SCALEDOWN /= 0.4;
    if (tunelength != -1)
        printf("Press Enter to abort playback...\n");
    return 0;

void init(byte subt)
    static long int timeout;
    subtune = subt;
    A = subtune;
    memory[1] = 0x37;
    memory[0xDC05] = 0;
    for (timeout = 100000; timeout >= 0; timeout--)
        if (CPU())
    if (timermode[subtune] || memory[0xDC05]) // && playaddf {   //CIA timing
        if (!memory[0xDC05])
            // C64 startup-default
            memory[0xDC04] = 0x24;
            memory[0xDC05] = 0x40;
        frame_sampleperiod = (memory[0xDC04] + memory[0xDC05] * 256) / clock_ratio;
        // Vsync timing
        frame_sampleperiod = samplerate / PAL_FRAMERATE;
    printf("Frame-sampleperiod: %d samples  (%.2fX speed)\n", frame_sampleperiod, (double)(samplerate / PAL_FRAMERATE) / frame_sampleperiod);
    // frame_sampleperiod = (memory[0xDC05] != 0 || (!timermode[subtune] && playaddf)) ? samplerate / PAL_FRAMERATE : (memory[0xDC04] + memory[0xDC05] * 256) / clock_ratio;
    if (playaddf == 0)
        playaddr = ((memory[1] & 3) < 2) ? memory[0xFFFE] + memory[0xFFFF] * 256 : memory[0x314] + memory[0x315] * 256;
        printf("IRQ-playaddress:%4.4X\n", playaddr);
        // player under KERNAL (Crystal Kingdom Dizzy)
        playaddr = playaddf;
        if (playaddr >= 0xE000 && memory[1] == 0x37)
            memory[1] = 0x35;
    framecnt = 1;
    finished = 0;
    CPUtime = 0;

void play(void* userdata, Uint8 *stream, int len) // called by SDL at samplerate pace
    static int i, j, output;
    static float average;
    for (i = 0; i < len; i += 2)
        if (framecnt <= 0)
            framecnt = frame_sampleperiod;
            finished = 0;
            PC = playaddr;
            SP = 0xFF;
        // printf("%d  %f\n",framecnt,playtime); }
        average = 0.0;
        for (j = 0; j < sampleratio; j++)
            if (finished == 0 && --cycles <= 0)
                pPC = PC;
                if (CPU() >= 0xFE || ((memory[1] & 3) > 1 && pPC < 0xE000 && (PC == 0xEA31 || PC == 0xEA81)))
                    finished = 1; // IRQ player ROM return handling
                if ((addr == 0xDC05 || addr == 0xDC04) && (memory[1] & 3) && timermode[subtune])
                    // dynamic CIA-setting (Galway/Rubicon workaround)
                    frame_sampleperiod = (memory[0xDC04] + memory[0xDC05] * 256) / clock_ratio;
                    if (!dynCIA)
                        dynCIA = 1;
                        printf("( Dynamic CIA settings. New frame-sampleperiod: %d samples  (%.2fX speed) )\n", frame_sampleperiod, (double)(samplerate / PAL_FRAMERATE) / frame_sampleperiod);
                if (storadd >= 0xD420 && storadd < 0xD800 && (memory[1] & 3))
                    // CJ in the USA workaround (writing above $d420, except SID2/SID3)
                    if (!(SID_address[1] <= storadd && storadd < SID_address[1] + 0x1F) && !(SID_address[2] <= storadd && storadd < SID_address[2] + 0x1F))
                        memory[storadd & 0xD41F] = memory[storadd]; // write to $D400..D41F if not in SID2/SID3 address-space
            average += SID(0, 0xD400);
            if (SIDamount >= 2)
                average += SID(1, SID_address[1]);
            if (SIDamount == 3)
                average += SID(2, SID_address[2]);
        output = average / sampleratio;
        stream[i] = output & 0xFF;
        stream[i + 1] = output >> 8;
     * mix = SID(0,0xD400);
     * if (SID_address[1])
     *     mix += SID(1, SID_address[1]);
     * if (SID_address[2])
     *     mix += SID(2, SID_address[2]);
     * return mix * volume * SIDamount_vol[SIDamount] + (Math.random() * background_noise - background_noise / 2);

//--------------------------------- CPU emulation -------------------------------------------

void initCPU(unsigned int mempos)
    PC = mempos;
    A = 0;
    X = 0;
    Y = 0;
    ST = 0;
    SP = 0xFF;

byte CPU() // the CPU emulation for SID/PRG playback (ToDo: CIA/VIC-IRQ/NMI/RESET vectors, BCD-mode)
    // 'IR' is the instruction-register, naming after the hardware-equivalent
    IR = memory[PC];
    // 'cycle': ensure smallest 6510 runtime (for implied/register instructions)
    cycles = 2;
    storadd = 0;
    if (IR & 1) // nybble2:  1/5/9/D:accu.instructions, 3/7/B/F:illegal opcodes
        switch (IR & 0x1F) // addressing modes (begin with more complex cases), PC wraparound not handled inside to save codespace
            case 1:
            case 3:
                // (zp,x)
                addr = memory[memory[++PC] + X] + memory[memory[PC] + X + 1] * 256;
                cycles = 6;
            case 0x11:
            case 0x13:
                // (zp),y
                addr = memory[memory[++PC]] + memory[memory[PC] + 1] * 256 + Y;
                cycles = 6;
            case 0x19:
            case 0x1B:
                // abs,y
                addr = memory[++PC] + memory[++PC] * 256 + Y;
                cycles = 5;
            case 0x1D:
                // abs,x
                addr = memory[++PC] + memory[++PC] * 256 + X;
                cycles = 5;
            case 0xD:
            case 0xF:
                // abs
                addr = memory[++PC] + memory[++PC] * 256;
                cycles = 4;
            case 0x15:
                // zp,x
                addr = memory[++PC] + X;
                cycles = 4;
            case 5:
            case 7:
                // zp
                addr = memory[++PC];
                cycles = 3;
            case 0x17:
                if ((IR & 0xC0) != 0x80)
                    // zp,x for illegal opcodes
                    addr = memory[++PC] + X;
                    cycles = 4;
                    // zp,y for LAX/SAX illegal opcodes
                    addr = memory[++PC] + Y;
                    cycles = 4;
            case 0x1F:
                if ((IR & 0xC0) != 0x80)
                    // abs,x for illegal opcodes
                    addr = memory[++PC] + memory[++PC] * 256 + X;
                    cycles = 5;
                    // abs,y for LAX/SAX illegal opcodes
                    addr = memory[++PC] + memory[++PC] * 256 + Y;
                    cycles = 5;
            case 9:
            case 0xB:
                // immediate
                addr = ++PC;
                cycles = 2;
        addr &= 0xFFFF;
        switch (IR & 0xE0)
            case 0x60:
                if ((IR & 0x1F) != 0xB)
                    if ((IR & 3) == 3)
                        // ADC / RRA (ROR+ADC)
                        T = (memory[addr] >> 1) + (ST & 1) * 128;
                        ST &= 124;
                        ST |= (T & 1);
                        memory[addr] = T;
                        cycles += 2;
                    T = A;
                    A += memory[addr] + (ST & 1);
                    ST &= 60;
                    ST |= (A & 128) | (A > 255);
                    A &= 0xFF;
                    ST |= (!A) << 1 | (!((T ^ memory[addr]) & 0x80) & ((T ^ A) & 0x80)) >> 1;
                    // V-flag set by intermediate ADC mechanism: (A&mem)+mem
                    A &= memory[addr];
                    T += memory[addr] + (ST & 1);
                    ST &= 60;
                    ST |= (T > 255) | (!((A ^ memory[addr]) & 0x80) & ((T ^ A) & 0x80)) >> 1;
                    // ARR (AND+ROR, bit0 not going to C, but C and bit7 get exchanged.)
                    T = A;
                    A = (A >> 1) + (ST & 1) * 128;
                    ST |= (A & 128) | (T > 127);
                    ST |= (!A) << 1;
            case 0xE0:
                // SBC / ISC(ISB)=INC+SBC
                if ((IR & 3) == 3 && (IR & 0x1F) != 0xB)
                    cycles += 2;
                T = A;
                A -= memory[addr] + !(ST & 1);
                ST &= 60;
                ST |= (A & 128) | (A >= 0);
                A &= 0xFF;
                ST |= (!A) << 1 | (((T ^ memory[addr]) & 0x80) & ((T ^ A) & 0x80)) >> 1;
            case 0xC0:
                if((IR & 0x1F) != 0xB)
                    // CMP / DCP(DEC+CMP)
                    if ((IR & 3) == 3)
                        cycles += 2;
                    T = A - memory[addr];
                    // SBX(AXS)
                    X = T = (A & X) - memory[addr];
                // SBX (AXS) (CMP+DEX at the same time)
                ST &= 124;
                ST |= (!(T & 0xFF)) << 1 | (T & 128) | (T >= 0);
            case 0x00:
                if ((IR & 0x1F) != 0xB)
                    if ((IR & 3) == 3)
                        ST &= 124;
                        ST |= (memory[addr] > 127);
                        memory[addr] <<= 1;
                        cycles += 2;
                    // ORA / SLO(ASO)=ASL+ORA
                    A |= memory[addr];
                    ST &= 125;
                    ST |= (!A) << 1 | (A & 128);
                    // ANC (AND+Carry=bit7)
                    A &= memory[addr];
                    ST &= 124;
                    ST |= (!A) << 1 | (A & 128) | (A > 127);
            case 0x20:
                if ((IR & 0x1F) != 0xB)
                    if ((IR & 3) == 3)
                        T = (memory[addr] << 1) + (ST & 1);
                        ST &= 124;
                        ST |= (T > 255);
                        T &= 0xFF;
                        memory[addr] = T;
                        cycles += 2;
                    // AND / RLA (ROL+AND)
                    A &= memory[addr];
                    ST &= 125;
                    ST |= (!A) << 1 | (A & 128);
                    // ANC (AND+Carry=bit7)
                    A &= memory[addr];
                    ST &= 124;
                    ST |= (!A) << 1 | (A & 128) | (A > 127);
            case 0x40:
                if ((IR & 0x1F) != 0xB)
                    if ((IR & 3) == 3)
                        ST &= 124;
                        ST |= (memory[addr] & 1);
                        memory[addr] >>= 1;
                        cycles += 2;
                    // EOR / SRE(LSE)=LSR+EOR
                    A ^= memory[addr];
                    ST &= 125;
                    ST |= (!A) << 1 | (A & 128);
                    // ALR(ASR)=(AND+LSR)
                    A &= memory[addr];
                    ST &= 124;
                    ST |= (A & 1);
                    A >>= 1;
                    A &= 0xFF;
                    ST |= (A & 128) | ((!A) << 1);
            case 0xA0:
                if ((IR & 0x1F) != 0x1B)
                    // LDA / LAX (illegal, used by my 1 rasterline player)
                    A = memory[addr];
                    if ((IR & 3) == 3)
                        X = A;
                    // LAS(LAR)
                    A = X = SP = memory[addr] & SP;
                ST &= 125;
                ST |= ((!A) << 1) | (A & 128);
            case 0x80:
                if ((IR & 0x1F) == 0xB)
                    // XAA (TXA+AND), highly unstable on real 6502!
                    A = X & memory[addr];
                    ST &= 125;
                    ST |= (A & 128) | ((!A) << 1);
                else if ((IR & 0x1F) == 0x1B)
                    // TAS(SHS) (SP=A&X, mem=S&H} - unstable on real 6502
                    SP = A & X;
                    memory[addr] = SP & ((addr >> 8) + 1);
                    // STA / SAX (at times same as AHX/SHX/SHY) (illegal)
                    memory[addr] = A & (((IR & 3) == 3) ? X : 0xFF);
                    storadd = addr;
    else if (IR & 2) // nybble2:  2:illegal/LDX, 6:A/X/INC/DEC, A:Accu-shift/reg.transfer/NOP, E:shift/X/INC/DEC
        switch (IR & 0x1F) // addressing modes
            case 0x1E:
                // abs,x / abs,y
                addr = memory[++PC] + memory[++PC] * 256 + (((IR & 0xC0) != 0x80) ? X : Y);
                cycles = 5;
            case 0xE:
                // abs
                addr = memory[++PC] + memory[++PC] * 256;
                cycles = 4;
            case 0x16:
                // zp,x / zp,y
                addr = memory[++PC] + (((IR & 0xC0) != 0x80) ? X : Y);
                cycles = 4;
            case 6:
                // zp
                addr = memory[++PC];
                cycles = 3;
            case 2:
                // imm.
                addr = ++PC;
                cycles = 2;
        addr &= 0xFFFF;
        switch (IR & 0xE0)
            case 0x00:
                ST &= 0xFE;
            case 0x20:
                if ((IR & 0xF) == 0xA)
                    // ASL/ROL (Accu)
                    A = (A << 1) + (ST & 1);
                    ST &= 124;
                    ST |= (A & 128) | (A > 255);
                    A &= 0xFF;
                    ST |= (!A) << 1;
                    // RMW (Read-Write-Modify)
                    T = (memory[addr] << 1) + (ST & 1);
                    ST &= 124;
                    ST |= (T & 128) | (T > 255);
                    T &= 0xFF;
                    ST |= (!T) << 1;
                    memory[addr] = T;
                    cycles += 2;
            case 0x40:
                ST &= 0xFE;
            case 0x60:
                if ((IR & 0xF) == 0xA)
                    // LSR/ROR (Accu)
                    T = A;
                    A = (A >> 1) + (ST & 1) * 128;
                    ST &= 124;
                    ST |= (A & 128) | (T & 1);
                    A &= 0xFF;
                    ST |= (!A) << 1;
                    // memory (RMW)
                    T = (memory[addr] >> 1) + (ST & 1) * 128;
                    ST &= 124;
                    ST |= (T & 128) | (memory[addr] & 1);
                    T &= 0xFF;
                    ST |= (!T) << 1;
                    memory[addr] = T;
                    cycles += 2;
            case 0xC0:
                if (IR & 4)
                    // DEC
                    ST &= 125;
                    ST |= (!memory[addr]) << 1 | (memory[addr] & 128);
                    cycles += 2;
                    // DEX
                    X &= 0xFF;
                    ST &= 125;
                    ST |= (!X) << 1 | (X & 128);
            case 0xA0:
                // LDX/TSX/TAX
                if ((IR & 0xF) != 0xA)
                    X = memory[addr];
                else if (IR & 0x10)
                    X = SP;
                    X = A;
                ST &= 125;
                ST |= (!X) << 1 | (X & 128);
            case 0x80:
                // STX/TXS/TXA
                if (IR & 4)
                    memory[addr] = X;
                    storadd = addr;
                else if (IR & 0x10)
                    SP = X;
                    A = X;
                    ST &= 125;
                    ST |= (!A) << 1 | (A & 128);
            case 0xE0:
                // INC/NOP
                if (IR & 4)
                    ST &= 125;
                    ST |= (!memory[addr]) << 1 | (memory[addr] & 128);
                    cycles += 2;
    else if ((IR & 0xC) == 8) // nybble2:  8:register/status
        switch (IR & 0xF0)
            case 0x60:
                // PLA
                SP &= 0xFF;
                A = memory[0x100 + SP];
                ST &= 125;
                ST |= (!A) << 1 | (A & 128);
                cycles = 4;
            case 0xC0:
                // INY
                Y &= 0xFF;
                ST &= 125;
                ST |= (!Y) << 1 | (Y & 128);
            case 0xE0:
                // INX
                X &= 0xFF;
                ST &= 125;
                ST |= (!X) << 1 | (X & 128);
            case 0x80:
                // DEY
                Y &= 0xFF;
                ST &= 125;
                ST |= (!Y) << 1 | (Y & 128);
            case 0x00:
                // PHP
                memory[0x100 + SP] = ST;
                SP &= 0xFF;
                cycles = 3;
            case 0x20:
                // PLP
                SP &= 0xFF;
                ST = memory[0x100 + SP];
                cycles = 4;
            case 0x40:
                // PHA
                memory[0x100 + SP] = A;
                SP &= 0xFF;
                cycles = 3;
            case 0x90:
                // TYA
                A = Y;
                ST &= 125;
                ST |= (!A) << 1 | (A & 128);
            case 0xA0:
                // TAY
                Y = A;
                ST &= 125;
                ST |= (!Y) << 1 | (Y & 128);
                // CLC/SEC/CLI/SEI/CLV/CLD/SED
                if (flagsw[IR >> 5] & 0x20)
                    ST |= (flagsw[IR >> 5] & 0xDF);
                    ST &= 255 - (flagsw[IR >> 5] & 0xDF);
    else // nybble2:  0: control/branch/Y/compare  4: Y/compare  C:Y/compare/JMP
        if ((IR & 0x1F) == 0x10)
            // BPL/BMI/BVC/BVS/BCC/BCS/BNE/BEQ  relative branch
            T = memory[PC];
            if (T & 0x80)
                T -= 0x100;
            if (IR & 0x20)
                if (ST & branchflag[IR >> 6])
                    PC += T;
                    cycles = 3;
                if (!(ST & branchflag[IR >> 6]))
                    PC += T;
                    cycles = 3;
        else // nybble2:  0:Y/control/Y/compare  4:Y/compare  C:Y/compare/JMP
            switch (IR & 0x1F) // addressing modes
                case 0:
                    // imm. (or abs.low for JSR/BRK)
                    addr = ++PC;
                    cycles = 2;
                case 0x1C:
                    // abs,x
                    addr = memory[++PC] + memory[++PC] * 256 + X;
                    cycles = 5;
                case 0xC:
                    // abs
                    addr = memory[++PC] + memory[++PC] * 256;
                    cycles = 4;
                case 0x14:
                    // zp,x
                    addr = memory[++PC] + X;
                    cycles = 4;
                case 4:
                    // zp
                    addr = memory[++PC];
                    cycles = 3;
            addr &= 0xFFFF;
            switch (IR & 0xE0)
                case 0x00:
                    // BRK
                    memory[0x100 + SP] = PC % 256;
                    SP &= 0xFF;
                    memory[0x100 + SP] = PC / 256;
                    SP &= 0xFF;
                    memory[0x100 + SP] = ST;
                    SP &= 0xFF;
                    PC = memory[0xFFFE] + memory[0xFFFF] * 256 - 1;
                    cycles = 7;
                case 0x20:
                    if (IR & 0xF)
                        // BIT
                        ST &= 0x3D;
                        ST |= (memory[addr] & 0xC0) | (!(A & memory[addr])) << 1;
                        // JSR
                        memory[0x100 + SP] = (PC + 2) % 256;
                        SP &= 0xFF;
                        memory[0x100 + SP] = (PC + 2) / 256;
                        SP &= 0xFF;
                        PC = memory[addr] + memory[addr + 1] * 256 - 1;
                        cycles = 6;
                case 0x40:
                    if (IR & 0xF)
                        // JMP
                        PC = addr - 1;
                        cycles = 3;
                        // RTI
                        if (SP >= 0xFF)
                            return 0xFE;
                        SP &= 0xFF;
                        ST = memory[0x100 + SP];
                        SP &= 0xFF;
                        T = memory[0x100 + SP];
                        SP &= 0xFF;
                        PC = memory[0x100 + SP] + T * 256 - 1;
                        cycles = 6;
                case 0x60:
                    if (IR & 0xF)
                        // JMP() (indirect)
                        PC = memory[addr] + memory[addr + 1] * 256 - 1;
                        cycles = 5;
                        // RTS
                        if (SP >= 0xFF)
                            return 0xFF;
                        SP &= 0xFF;
                        T = memory[0x100 + SP];
                        SP &= 0xFF;
                        PC = memory[0x100 + SP] + T * 256 - 1;
                        cycles = 6;
                case 0xC0:
                    // CPY
                    T = Y - memory[addr];
                    ST &= 124;
                    ST |= (!(T & 0xFF)) << 1 | (T & 128) | (T >= 0);
                case 0xE0:
                    // CPX
                    T = X - memory[addr];
                    ST &= 124;
                    ST |= (!(T & 0xFF)) << 1 | (T & 128) | (T >= 0);
                case 0xA0:
                    // LDY
                    Y = memory[addr];
                    ST &= 125;
                    ST |= (!Y) << 1 | (Y & 128);
                case 0x80:
                    // STY
                    memory[addr] = Y;
                    storadd = addr;

    // if(IR==0xCB) //test SBX
    // printf("PC:%4.4X IR:%2.2X, addr: %4.4X,%2.2X,  storadd: %4.4X,%2.2X,  A:%2.2X, X:%2.2X, Y:%2.2X ST:%2.2X\n",PC,IR,addr,memory[addr],storadd,memory[storadd],A,X,Y,ST);
    PC++; // PC &= 0xFFFF;
    return 0;

//----------------------------- SID emulation -----------------------------------------

unsigned int TriSaw_8580[4096], PulseSaw_8580[4096], PulseTriSaw_8580[4096];
int ADSRperiods[16] = {9, 32, 63, 95, 149, 220, 267, 313, 392, 977, 1954, 3126, 3907, 11720, 19532, 31251};

const byte ADSR_exptable[256] = {1, 30, 30, 30, 30, 30, 30, 16, 16, 16, 16, 16, 16, 16, 16, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, //pos0:1  pos6:30  pos14:16  pos26:8
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, //pos54:4 //pos93:2
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };

void cSID_init(int samplerate)
    int i;
    clock_ratio = round(C64_PAL_CPUCLK / samplerate);
    cutoff_ratio_8580 = -2 * 3.14 * (12500.0 / 2048) / C64_PAL_CPUCLK;
    cutoff_ratio_6581 = -2 * 3.14 * (20000.0 / 2048) / C64_PAL_CPUCLK;
    // around 220Hz below treshold
    cutoff_bias_6581 = 1 - exp(-2 * 3.14 * 220 / C64_PAL_CPUCLK);
    createCombinedWF(TriSaw_8580, 0.8, 2.4, 0.64);
    createCombinedWF(PulseSaw_8580, 1.4, 1.9, 0.68);
    createCombinedWF(PulseTriSaw_8580, 0.8, 2.5, 0.64);
    for (i = 0; i < 9; i++)
        ADSRstate[i] = HOLDZERO_BITMASK;
        envcnt[i] = 0;
        ratecnt[i] = 0;
        phaseaccu[i] = 0;
        prevaccu[i] = 0;
        expcnt[i] = 0;
        noise_LFSR[i] = 0x7FFFF8;
        prevwfout[i] = 0;
    for (i = 0; i < 3; i++)
        sourceMSBrise[i] = 0;
        sourceMSB[i] = 0;
        prevlowpass[i] = 0;
        prevbandpass[i] = 0;

void initSID()
    int i;
    for (i = 0xD400; i <= 0xD7FF; i++)
        memory[i] = 0;
    for (i = 0xDE00; i <= 0xDFFF; i++)
    for (i = 0; i < 9; i++)
        ADSRstate[i] = HOLDZERO_BITMASK;
        ratecnt[i] = envcnt[i] = expcnt[i] = 0;

int SID(char num, unsigned int baseaddr)
    // better keep these variables static so they won't slow down the routine like if they were internal automatic variables always recreated
    static byte channel, ctrl, SR, prevgate, wf, test, filterctrl_prescaler[3];
    static byte *sReg, *vReg;
    static unsigned int period, accuadd, pw, wfout;
    static unsigned long int MSB;
    // cutoff must be signed otherwise compiler may make errors in multiplications
    static int nonfilt, filtin, cutoff[3], resonance[3];
    // so if samplerate is smaller, cutoff needs to be 'long int' as its value can exceed 32768
    static long int output, filtout, ftmp;

    filtin = nonfilt = 0;
    sReg = &memory[baseaddr];
    vReg = sReg;
    for (channel = num * SID_CHANNEL_AMOUNT; channel < (num + 1) * SID_CHANNEL_AMOUNT; channel++, vReg += 7)
        ctrl = vReg[4];
        // ADSR envelope generator:
            SR = vReg[6];
            prevgate = (ADSRstate[channel] & GATE_BITMASK);
            // gatebit-change?
            if (prevgate != (ctrl & GATE_BITMASK))
                if (prevgate) // falling edge
                    ADSRstate[channel] &= 0xFF - (GATE_BITMASK | ATTACK_BITMASK | DECAYSUSTAIN_BITMASK);
                else // rising edge, also sets hold_zero_bit=0
                    ADSRstate[channel] = (GATE_BITMASK | ATTACK_BITMASK | DECAYSUSTAIN_BITMASK);
            if (ADSRstate[channel] & ATTACK_BITMASK)
                period = ADSRperiods[vReg[5] >> 4];
            else if (ADSRstate[channel] & DECAYSUSTAIN_BITMASK)
                period = ADSRperiods[vReg[5] & 0xF];
                period = ADSRperiods[SR & 0xF];
            // can wrap around (ADSR delay-bug: short 1st frame)
            ratecnt[channel] &= 0x7FFF;
            if (ratecnt[channel] == period) // ratecounter shot (matches rateperiod) (in genuine SID ratecounter is LFSR)
                // reset rate-counter on period-match
                ratecnt[channel] = 0;
                if ((ADSRstate[channel] & ATTACK_BITMASK) || ++expcnt[channel] == ADSR_exptable[envcnt[channel]])
                    expcnt[channel] = 0;
                    if (!(ADSRstate[channel] & HOLDZERO_BITMASK))
                        if (ADSRstate[channel] & ATTACK_BITMASK)
                            if (envcnt[channel] == 0xFF)
                                ADSRstate[channel] &= 0xFF - ATTACK_BITMASK;
                        else if (!(ADSRstate[channel] & DECAYSUSTAIN_BITMASK) || envcnt[channel] != (SR >> 4) + (SR & 0xF0))
                            // resid adds 1 cycle delay, we omit that pipelining mechanism here
                            if (envcnt[channel] == 0)
                                ADSRstate[channel] |= HOLDZERO_BITMASK;
        // WAVE generation codes (phase accumulator and waveform-selector):
        test = ctrl & TEST_BITMASK;
        wf = ctrl & 0xF0;
        accuadd = (vReg[0] + vReg[1] * 256);
        if (test || ((ctrl & SYNC_BITMASK) && sourceMSBrise[num]))
            phaseaccu[channel] = 0;
            phaseaccu[channel] += accuadd;
            phaseaccu[channel] &= 0xFFFFFF;
        MSB = phaseaccu[channel] & 0x800000;
        sourceMSBrise[num] = (MSB > (prevaccu[channel] & 0x800000)) ? 1 : 0;
        if (wf & NOISE_BITMASK)
            int tmp = noise_LFSR[channel];
            if (((phaseaccu[channel] & 0x100000) != (prevaccu[channel] & 0x100000)))
                int step = (tmp & 0x400000) ^ ((tmp & 0x20000) << 5);
                tmp = ((tmp << 1) + (step ? 1 : test)) & 0x7FFFFF;
                noise_LFSR[channel] = tmp;
            wfout = (wf & 0x70) ? 0 : ((tmp & 0x100000) >> 5) + ((tmp & 0x40000) >> 4) + ((tmp & 0x4000) >> 1) + ((tmp & 0x800) << 1) + ((tmp & 0x200) << 2) + ((tmp & 0x20) << 5) + ((tmp & 0x04) << 7) + ((tmp & 0x01) << 8);
        else if (wf & PULSE_BITMASK)
            pw = (vReg[2] + (vReg[3] & 0xF) * 256) * 16;
            int tmp = phaseaccu[channel] >> 8;
            if (wf == PULSE_BITMASK)
                if (test || tmp >= pw)
                    wfout = 0xFFFF;
                    wfout = 0;
            else // combined pulse
                wfout = (tmp >= pw || test) ? 0xFFFF : 0;
                if (wf & TRI_BITMASK) // pulse+triangle
                    if (wf & SAW_BITMASK) // pulse+saw+triangle (waveform nearly identical to tri+saw)
                        wfout = (wfout) ? combinedWF(num, channel, PulseTriSaw_8580, tmp >> 4, 1) : 0;
                        tmp = phaseaccu[channel] ^ (ctrl & RING_BITMASK ? sourceMSB[num] : 0);
                        wfout = (wfout) ? combinedWF(num, channel, PulseSaw_8580, (tmp ^ (tmp & 0x800000 ? 0xFFFFFF : 0)) >> 11, 0) : 0;
                else if (wf & SAW_BITMASK) // pulse+saw
                    wfout = (wfout) ? combinedWF(num, channel, PulseSaw_8580, tmp >> 4, 1) : 0;
        else if (wf & SAW_BITMASK)
            // saw
            wfout = phaseaccu[channel] >> 8;
            if (wf & TRI_BITMASK) // saw+triangle
                wfout = combinedWF(num, channel, TriSaw_8580, wfout >> 4, 1);
        else if (wf & TRI_BITMASK)
            int tmp = phaseaccu[channel] ^ (ctrl & RING_BITMASK ? sourceMSB[num] : 0);
            wfout = (tmp ^ (tmp & 0x800000 ? 0xFFFFFF : 0)) >> 7;
        if (wf)
            prevwfout[channel] = wfout;
        else // emulate waveform 00 floating wave-DAC
            wfout = prevwfout[channel];
        prevaccu[channel] = phaseaccu[channel];
        sourceMSB[num] = MSB;
        if (sReg[0x17] & FILTSW[channel])
            filtin += ((long int)wfout - 0x8000) * envcnt[channel] / 256;
        else if ((FILTSW[channel] != 4) || !(sReg[0x18] & OFF3_BITMASK))
            nonfilt += ((long int)wfout - 0x8000) * envcnt[channel] / 256;
    // update readable SID1-registers (some SID tunes might use 3rd channel ENV3/OSC3 value as control)
    if (num == 0, memory[1] & 3) // OSC3, ENV3 (some players rely on it)
        sReg[0x1B] = wfout >> 8;
        sReg[0x1C] = envcnt[3];
    // FILTER:
    if (filterctrl_prescaler[num] == 0)
        // calculate cutoff and resonance curves only at samplerate is still adequate and reduces CPU stress of frequent float calculations
        filterctrl_prescaler[num] = clock_ratio;
        cutoff[num] = 2 + sReg[0x16] * 8 + (sReg[0x15] & 7);
        if (SID_model[num] == 8580)
            cutoff[num] = (1 - exp(cutoff[num] * cutoff_ratio_8580)) * 0x10000;
            // resonance could be taken from table as well
            resonance[num] = (pow(2, ((4 - (sReg[0x17] >> 4)) / 8.0))) * 0x100;
            cutoff[num] = (cutoff_bias_6581 + ((cutoff[num] < 192) ? 0 : 1 - exp((cutoff[num] - 192) * cutoff_ratio_6581))) * 0x10000;
            resonance[num] = ((sReg[0x17] > 0x5F) ? 8.0 / (sReg[0x17] >> 4) : 1.41) * 0x100;
    // the filter-calculation itself can't be prescaled because sound-quality would suffer of no 'oversampling'
    filtout = 0;
    ftmp = filtin + prevbandpass[num] * resonance[num] / 0x100 + prevlowpass[num];
    if (sReg[0x18] & HIGHPASS_BITMASK)
        filtout -= ftmp;
    ftmp = prevbandpass[num] - ftmp * cutoff[num] / 0x10000;
    prevbandpass[num] = ftmp;
    if (sReg[0x18] & BANDPASS_BITMASK)
        filtout -= ftmp;
    ftmp = prevlowpass[num] + ftmp * cutoff[num] / 0x10000;
    prevlowpass[num] = ftmp;
    if (sReg[0x18] & LOWPASS_BITMASK)
        filtout += ftmp;
    // output stage for one SID
    output = (nonfilt + filtout) * (sReg[0x18] & 0xF) / OUTPUT_SCALEDOWN;
    // saturation logic on overload (not needed if the callback handles it)
    if (output >= 32767)
        output = 32767;
    else if (output <= -32768)
        output = -32768;
    // master output
    return (int)output;

unsigned int combinedWF(char num, char channel, unsigned int* wfarray, int index, char differ6581)
    if (differ6581 && SID_model[num] == 6581)
        index &= 0x7FF;
    return wfarray[index];

void createCombinedWF(unsigned int* wfarray, float bitmul, float bitstrength, float treshold)
    int i, j, k;
    for (i = 0; i < 4096; i++)
        wfarray[i] = 0;
        for (j = 0; j < 12; j++)
            float bitlevel = 0;
            for (k = 0; k < 12; k++)
                bitlevel += (bitmul / pow(bitstrength, fabs(k - j))) * (((i >> k) & 1) - 0.5);
            wfarray[i] += (bitlevel >= treshold) ? pow(2, j) : 0;
        wfarray[i] *= 12;

