/*  ax.c
    ===========================================================================
    No Frills un-compactor for C64/C128 style archives           06Jun89  - CS
    ===========================================================================
    This is a simple program to allow users of non-Commodore machines to get
    access to the contents of SDA's or archives as created by the Commodore
    64 or 128. Its no speedster, but then its not the kind of program you or I
    will be using 100 times a day so what the heck.

    Compiles succesfully with Lattice 5.0 on the Amiga, or with any PC ANSI
    compiler. (I used Zortech for the EXE file included here because it gave
    the fastest executable)

    This file and derivatives thereof are placed into the public domain.
*/

#include <stdio.h>
#include <string.h>
#include <ctype.h>

#define ULONG unsigned long
#define BYTE  unsigned char
#define WORD  unsigned short 

#ifdef  MPU8086         /* If Manx PC */
#define RB   "r"
#define WB   "w"
#else                   /* If the rest of the civilized world */
#define RB   "rb"
#define WB   "wb"
#endif

#ifdef  AMIGA           /* Compiled with Lattice 5.0 on the Amiga    */
#undef  NORMAL          /* undefine if reverse byte ordering (68000) */
#else
#define NORMAL
#endif


int         Status;     /* I/O status. 0=ok, or EOF */
ULONG       FilePos;    /* Current offset from ARC or SDA file's beginning */
FILE *      fp;         /* archive */
int         BitPos;     /* Bit position within one byte bit buffer */
WORD        crc;        /* checksum */
BYTE        crc2;       /* used in checksum calculation */
ULONG       hc[256];    /* Huffman codes */
BYTE        hl[256];    /* Lengths of huffman codes */
BYTE        hv[256];    /* Character associated with Huffman code */
WORD        hcount;     /* Number of Huffman codes */
WORD        ctrl;       /* Run-Length control character */
WORD        loadaddr;   /* Load address if CBM PRG file */
long        truesize;   /* True file size */
char        RealName[17];   /* True name for BASIC file */


/* Archive entry header */

struct ARC64Header {
    BYTE    version;    /* Version number, must be 1 or 2 */
    BYTE    mode;       /* 0=store, 1=pack, 2=squeeze, 3=crunch, 4=squeeze+pack, 5=crunch in one pass */
    WORD    check;      /* Checksum */
    long    size;       /* Original size. Only three bytes are stored */
    WORD    blocks;     /* Compressed size in CBM disk blocks */
    BYTE    type;       /* File type. P,S,U or R */
    BYTE    fnlen;      /* Filename length */
    char    name[17];   /* Filename. Only fnlen bytes are stored */
    BYTE    rl;         /* Record length if relative file */
    WORD    date;       /* Date archive was created. Same format as in MS-DOS directories */
} entry;
 
/* Lempel Zev compression string table entry */

struct LZ {
    WORD    prefix;     /* Prefix code */
    BYTE    ext;        /* Extension character */
};

/* LZ globals */

#define LZSTKSIZ 512        /* Actually only 40 or 50 bytes is sufficient */

int         State = 0;      /* Set to 0 to reset un-crunch */
struct LZ   lztab[4096];    /* String table */
BYTE        stack[LZSTKSIZ];/* Stack for push/pop */
int         lzstack;        /* Stack pointer */
int         cdlen,          /* Current code size */
            code,           /* Last code rec'd  */
            wtcl;           /* Bump cdlen when code reaches this value */
int         ncodes,         /* Current # of codes in table */
            wttcl;          /* Copy of wtcl */





/*  --------------------------------------------------------------------------
    Shell Sort.  From "C Programmer's Library" by Purdum, Leslie & Stegemoller

    'swap' and 'comp' functions vary depending on the data type being sorted.

    swp(i,j)  - simply swaps array elements i and j.
    cmp(i,j)  - returns 0 if elements are equal.
                      >0 if element[i] > element[j]
                      <0 if    ''      <    ''
    --------------------------------------------------------------------------
    n   = Number of elements to be sorted. (0 is 1st, n-1 is last)
    cmp = Compare two elements. Return result like strcmp()
    swp = Swap to elements
*/

void ssort(unsigned n, int (*cmp)(int,int), void (*swp)(int,int))
{
    int m;
    int h,i,j,k;

    m=n;

    while( (m /= 2) != 0) {
        k = n-m;
        j = 1;
        do {
            i=j;
            do {
                h=i+m;
                if ((*cmp)(i-1,h-1) >0 ) {
                    (*swp)(i-1,h-1);
                    i -= m;
                }
                else
                    break;
            } while (i >= 1);
            j += 1;
        } while(j <= k);
    }
}


/*  ------------------------------------------------
    Compare function for Shell Sort of Huffman codes
    Set up to sort them in reverse order by length
    ------------------------------------------------
*/
    
int hcomp(int x, int y)
{
    return ( hl[y] - hl[x] );
}

void hswap(int x, int y)
{
    unsigned long t0;
    unsigned char t1, t2;

    t0    = hc[x];
    t1    = hv[x];
    t2    = hl[x];
    hc[x] = hc[y];
    hv[x] = hv[y];
    hl[x] = hl[y];
    hc[y] = t0;
    hv[y] = t1;
    hl[y] = t2;
}


/*  -------------------------------------
    Convert CBM ASCII to 'Standard' ASCII
    -------------------------------------
*/

char p2a(int c)
{
    static unsigned cc;
    static unsigned oldcc;

    c &= 0xff;

    oldcc = cc;
    cc    = c;

    switch (cc) {

       case 10: {                                   /* LF           */
                if (oldcc!=13)
                   return 10;
                else
                   return '\0';
                }
       case 13:  return '\n';                       /* CR           */
       case 160: return ' ';                        /* Shift+space  */
       case 164: return '_';
       case 171: return 'Ì';
       case 173: return 'È';
       case 174: return '»';
       case 175: return '~';
       case 176: return 'É';
       case 177: return 'Ê';
       case 178: return 'Ë';
       case 179: return '¹';
       case 189: return '¼';
       case 192: return 'Í';
       case 219: return 'Î';
       case 221: return 'º';
       case 223: return '|';
       default:  {
            if (cc<65 || cc>219)
                return (char) cc;
            if (cc<91)
                return (char) (cc|0x20);
            if (cc<193)
                return (char) cc;
            else
                return (char) (cc&0x7f);
       }
   }
}


/*  -----------------
    Input Subroutines
    -----------------
    These are terribly inefficient
*/


BYTE GetByte()
{
    int c;

    if (Status == EOF)
        return 0;

    if ( (c=fgetc(fp)) == EOF) {
        Status = c;
    }
    return (BYTE) (c & 0xff);
}

WORD GetWord()
{
    union {
        WORD u;
        char c[2];
    } x;

    int i;

    x.u = 0;

#ifdef NORMAL
    for (i=0; i<2; i++)
#else
    for (i=1; i>=0; i--)
#endif
        x.c[i] = GetByte();

    return x.u;
}

long GetThree()
{
    union {
        long u;
        char c[4];
    } x;

    int i;

    x.u = 0;

#ifdef NORMAL
    for (i=0; i<3; i++)
#else
    for (i=3; i; i--)
#endif
        x.c[i] = GetByte();

    return x.u;

}

  
int GetBit()
{
    static int Byte;
    int c;

    if (!BitPos)
        Byte = GetByte();

    c    = Byte & 1;
    Byte = Byte >>1;
    BitPos++;

    if (BitPos == 8)
        BitPos = 0;

    return c;
}

#ifdef AMIGA    /* Lattice doesn't seem to have one of these  */

/*  6502 wants words low to high */

void putw(unsigned cc, FILE * fp)
{
    fputc((cc & 0xff), fp);
    fputc((cc/256),fp);
}

#endif

/*  --------------------------------------------------------
    Fetch huffman code and convert it to what it represents
    --------------------------------------------------------
    I wrote this a long time ago when I thought binary trees
    were things that grew in northern Saskatchewan.
*/

int Huffin()
{
    long hcode = 0;
    long mask  = 1;
    int  size  = 1;
    int  now;

    now = hcount;       /* First non=zero Huffman code */

    do {
        if (GetBit())
            hcode |= mask;

        while( hl[now] == size) {

            if (hc[now] == hcode)
                return hv[now];

            if (--now < 0) {         /* Error in decode table */
                Status = EOF;
                return EOF;
            }
        }
        size++;
        mask = mask << 1;
    } while(size < 24);

    Status = EOF;
    return EOF;                      /* Error. Huffman code too big */
}

/*  ------------------------------------------------------------------
    Fetch ARC64 header. Returns 1 if header is ok, otherwise returns 0
    ------------------------------------------------------------------
*/

int GetHeader()
{
    WORD    w, i;
    char *  LegalTypes = "SPUR";
    ULONG   mask;

    if (feof(fp) || ferror(fp))
        return 0;

    BitPos        = 0;              /* Bit buffer pointer */
    crc           = 0;              /* checksum */
    crc2          = 0;              /* Used in checksum calculation */
    State         = 0;              /* LZW state */
    ctrl          = 254;            /* Run-Length control character */

    entry.version = GetByte();
    entry.mode    = GetByte();
    entry.check   = GetWord();
    entry.size    = GetThree();
    entry.blocks  = GetWord();
    entry.type    = p2a(GetByte());
    entry.type    = toupper(entry.type);
    entry.fnlen   = GetByte();

    /* Check for invalid header, If invalid, then we've input past the end  */
    /* Possibly due to XMODEM padding or whatever                           */

    if  (entry.fnlen > 16)
        return 0;

    for (w=0; w < entry.fnlen; w++) {
        RealName[w] = GetByte();
        entry.name[w] = p2a(RealName[w]);
    }

    RealName[entry.fnlen] = 0;
    entry.name[entry.fnlen] = 0;

    if (entry.version > 1) {
        entry.rl  = GetByte();
        entry.date= GetWord();
    }

    if (Status == EOF)
        return 0;

    if ( (entry.version == 0) || (entry.version >2) )
        return 0;

    if ( entry.version == 1) {                  /* If ARC64 version 1.xx */
        if (entry.mode > 2)                     /* Only store, pack, squeeze */
            return 0;
    }
    if (entry.mode == 1)                        /* If packed get control char */
        ctrl = GetByte();                       /* V2 always uses 0xfe V1 varies */
 
    if (entry.mode > 5)
        return 0;

    if (entry.blocks > 4133)                    /* Largest CBM disk is 8250 */
        return 0;

    if ( (entry.mode == 2) || (entry.mode == 4) ) {   /* if squeezed or squashed */

        hcount = 255;                                 /* Will be first code */
        for (w=0; w<256; w++) {                       /* Fetch Huffman codes */

            hv[w] = w;

            hl[w]=0;
            mask = 1;
            for (i=1; i<6; i++) {
                if (GetBit())
                    hl[w] |= mask;
                mask = mask << 1;
            }

            if (hl[w] > 24)
                return 0;                             /* Code too big */

            hc[w] = 0;
            if (hl[w]) {
                i = 0;
                mask = 1;
                while (i<hl[w]) {
                    if (GetBit())
                        hc[w] |= mask;
                    i++;
                    mask = mask << 1;
                }
            }
            else
                hcount--;
        }
        ssort(256,hcomp,hswap);
    }

    if (strchr(LegalTypes, entry.type) == 0)
        return 0;

    return 1;
}





/*  --------------------------------------------------------------------------
     Get start of data. Ignores SDA header, and returns -1 if not an archive.
     Otherwise return value is the starting position of useful data within the
     file. (Normally 0)
    --------------------------------------------------------------------------
*/

int GetStartPos()
{
    int c;                      /* Temp */
    int cpu;                    /* C64 or C128 if SDA */
    int linenum;                /* Sys line number */
    int skip;                   /* Size of SDA header in bytes */

    fseek(fp,(long) 0,0);       /* Goto start of file */

    if ( (c=GetByte()) == 2)    /* Probably type 2 archive */
        return 0;               /* Data starts at offset 0 */

    if (c != 1)                 /* IBM archive, or not an archive at all */
        return -1;

    /* Check if its an SDA */

    GetByte();                  /* Skip to line number (which is # of header blocks) */
    GetWord();
    linenum = GetWord();
    c = GetByte();

    if (c != 0x9e)              /* Must be BASIC SYS token */
        return 0;               /* Else probably type 1 archive */

    c = GetByte();              /* Get SYS address */
    cpu = GetByte();            /* '2' for C64, '7' for C128 */

    skip = (linenum-6)*254;     /* True except for SDA232.128  */

    if ( (linenum==15) && (cpu=='7') )   /* handle the special case */
        skip -= 1;

    return skip;
}


/*  ----------------------------------------------------------------------
    Un-Crunch a byte
    ----------------------------------------------------------------------
    This is pretty straight forward if you have Terry Welch's article
    "A Technique for High Performance Data Compression" from IEEE Computer
    June 1984

    This implemention reserves code 256 to indicate the end of a crunched
    file, and code 257 was reserved for future considerations. Codes grow
    up to 12 bits and then stay there. There is no reset of the string
    table.
*/

    /* PUSH/POP LZ stack */

void push(int c)
{
    if (lzstack > LZSTKSIZ-1) {
        printf("Lempel Zev Stack Overflow\n");
        exit(1);
    }
    else
        stack[lzstack++] = c;
}

int pop()
{
    if ( !lzstack ) {
        printf("Lempel Zev stack underflow.\n");
        exit(1);
    }
    else
        return stack[--lzstack];
}


int unc()
{
    static int  oldcode, incode;
    static BYTE kay;
    static int  omega;
    static BYTE finchar;

    switch (State) {

        case 0: {                       /* First time. Reset. */

            lzstack = 0;
            ncodes  = 258;              /* 2 reserved codes */
            wtcl    = 256;              /* 256 Bump code size when we get here */
            wttcl   = 254;              /* 1st time only 254 due to resvd codes */
            cdlen   = 9;                /* Start with 9 bit codes */
            oldcode = getcode();

            if (oldcode == 256) {       /* code 256 marks end of this entry */
                Status = EOF;
                return EOF;
            }
            kay = oldcode & 0xff;
            finchar = kay;
            State = 1;
            return kay;
        }
        case 1: {

            incode = getcode();

            if (incode == 256) {
                State = 0;
                Status = EOF;
                return EOF;
            }

            if (incode >= ncodes) {     /* Undefined code, special case */
                kay = finchar;
                push(kay);
                code = oldcode;
                omega = oldcode;
                incode = ncodes;
            }
            while ( code > 255 ) {      /* Decompose string */
                push(lztab[code].ext);
                code = lztab[code].prefix;
            }
            kay = code;
            finchar = code;
            State = 2;
            return kay & 0xff;
        }
        case 2: {

            if (lzstack == 0) {         /* Empty stack */
                omega = oldcode;
                if (ncodes < 4096) {
                    lztab[ncodes].prefix = omega;
                    lztab[ncodes].ext = kay;
                    ncodes++;
                }
                oldcode = incode;
                State = 1;
                return unc();
            }
            else
                return pop();
        }
    }
}





/*  -------------
    Fetch LZ code
    -------------
*/

int getcode()
{
    register i;

    code = 0;
    i = cdlen;

    while(i--)
        code = (code << 1) | GetBit();

    /*  Special case of 1 pass crunch. Checksum and size are at the end */

    if ( (code == 256) && (entry.mode == 5) ) {
        i = 16;
        while(i--)
            entry.check = (entry.check << 1) | GetBit();
        i = 24;
        while(i--)
            entry.size = (entry.size << 1) | GetBit();
    }

    /* Get ready for next time */

    if ( (cdlen<12) ) {
        if ( !(--wttcl) ) {
            wtcl = wtcl << 1;
            cdlen++;
            wttcl = wtcl;
        }
    }

    return code;
}

void UpdateChecksum(int c)
{
    truesize++;

    c &= 0xff;

    if (entry.version == 1)         /* Simple checksum for version 1 */
        crc += c;
    else
        crc += (c ^ (++crc2));      /* A slightly better checksum for version 2 */
}


int UnPack()
{
    switch (entry.mode) {

        case 0:             /* Stored */
        case 1:             /* Packed (Run-Length) */

            return GetByte();

        case 2:             /* Squeezed (Huffman only) */
        case 4:             /* Squashed (Huffman + Run-Length */

            return Huffin();

        case 3:             /* Crunched */
        case 5:             /* Crunched in one pass */

            return unc();

        default:            /* Otherwise ERROR */

            Status = EOF;
            return EOF;
    }
}


/*  ----------------------------------------------
    Try a few default extensions if none was given
    ----------------------------------------------
*/

int MakeName(char * buf, char * arg)
{
    strcpy(buf,arg);

    if (strchr(buf,'.') != NULL)
        return 0;

    strcat(buf,".sda");

    if (!access(buf,0))
        return 0;

    strcpy(buf,arg);
    strcat(buf,".ark");

    if (!access(buf,0))
        return 0;

    strcpy(buf,arg);
    strcat(buf,".arc");

    if (!access(buf,0))
        return 0;

    strcpy(buf,arg);
    return 1;
}

/*  Finally */

void main(int argc, char ** argv)
{
    int     temp, count, i;
    WORD    c;
    BYTE    prev;           /* previous byte for Run Length */
    FILE *  op,             /* File being extracted */
         *  contents,       /* Summary of contents  */
         *  basic,          /* CBM BASIC program to rename to correct names */
         *  batch;          /* Batch file to RENAME for MS-DOS */

    char *  tempname   = "File.001";
    int     filecount  = 0;
    long    bytecount  = 0;
    long    filesize;
    char    sdaname[64];
    WORD    linnum = 1000;      /* For BASIC file */
    WORD    linptr = 0x0801;    /* BASIC line link */

    FilePos = 0;
    Status  = 0;

    if (argc < 2) {
        printf("Usage:   %s filename[.sda | .arc]\n\n",argv[0]);
        printf("Purpose: Dissolves archives made by ARC64/128 on the Commodore 64/128\n");
        exit(1);
    }

    MakeName(sdaname,argv[1]);

    if ( (fp=fopen(sdaname, RB)) == 0 ) {
        printf("\nCan't open '%s'\n",sdaname);
        exit(1);
    }

    if ( (temp=GetStartPos()) < 0 ) {
        printf("\nThis doesn't look like a C64 archive or an SDA\n");
        printf("Perhaps its an IBM or AMIGA archive, or not an archive at all\n");
        exit(1);
    }
    else {
        fseek(fp,(long) temp, 0);
        FilePos = temp;
    }

    if ( (contents = fopen("Contents","w")) == 0) {
        printf("File I/O error\n");
        fclose(fp);
        exit(1);
    }

    if ( (basic = fopen("re-name.bas", WB)) == NULL) {
        printf("File I/O error\n");
        fclose(fp);
        fclose(contents);
        exit(1);
    }

    fputc(0x01,basic);  /* CBM BASIC Load address */
    fputc(0x08,basic);
    linptr = 0x080f;    /* 1000 open 15,8,15 */
    putw(linptr,basic);
    putw(linnum,basic);
    fputc(0x9f,basic);
    fputs(" 15,8,15",basic);
    fputc(0,basic);

    if ( (batch = fopen("re-name.bat","w")) == NULL) {
        printf("File I/O error\n");
        fclose(fp);
        fclose(contents);
        fclose(basic);
        exit(1);
    }

    fprintf(contents,"--------------------------------------------------\n");
    fprintf(contents,"           True Name     Type Bytes\n");
    fprintf(contents,"--------------------------------------------------\n");

    while(GetHeader()) {

        truesize = 0;

        printf("%-16s ",entry.name);

        filesize = entry.size;

        if (entry.mode == 5)        /* If 1 pass crunch size is unknown */
            filesize = 10000000;

        if ( (op = fopen(tempname, WB)) == NULL) {
            printf("File I/O error ... Aborting\n");
            fclose(contents);
            fclose(fp);
            exit(2);
        }

        while( filesize && (Status != EOF) && !feof(fp) ) {

            c = UnPack();

            if (Status == EOF)
                break;

            /* If Run Length is needed */

            if ( (entry.mode != 0) && (entry.mode != 2) ) {

                if ( (c & 0xff) == ctrl) {

                    count = UnPack();
                    prev  = UnPack();
                    c     = prev;

                    if (count == 0) {
                        count = 256;
                        if (entry.version == 1)  /* version 1 problem */
                            count--;
                    }

                    while(count > 1) {
                        fputc(c,op);
                        UpdateChecksum(c);
                        filesize--;
                        if (filesize == 1) {   
                            break;
                        }
                        count--;
                    }
                }
            }
            fputc(c,op);
            filesize--;
            UpdateChecksum(c);
        }

        if (crc != entry.check)
            printf(" <Checksum Error>\n");
        else
            printf(" OK.\n");

        fclose(op);

        bytecount += entry.size;
        filecount ++;

        fprintf(contents,"%-10s %-16s %c %7lu ",tempname,entry.name,entry.type,entry.size);

        if (toupper(entry.type) == 'R')
            fprintf(contents,"Record Length = %d",entry.rl);

        if (crc != entry.check)
            fprintf(contents,"Checksum Error\n");
        else
            fprintf(contents,"Checksum OK\n");

#ifdef AMIGA
        fprintf(batch,"RENAME \"%s\" \"%s\"\n",tempname,entry.name);
#else
        fprintf(batch,"REN %s %s\n",tempname,entry.name);
#endif

        linnum += 10;
        putw(linptr,basic);
        putw(linnum,basic);
        fputc(0x98,basic);
        fputs("15,\"R0:",basic);
        fputs(RealName,basic);
        fputc('=',basic);
        for (i=0; tempname[i]; i++)
            fputc(toupper(tempname[i]),basic);
        fputc('\"',basic);
        fputc(0,basic);
        linptr += (15+strlen(entry.name)+strlen(RealName));

        tempname[7] += 1;
        if (tempname[7] > '9') {
            tempname[7] = '0';
            tempname[6] += 1;
            if (tempname[6] >'9') {
                tempname[6] = '0';
                tempname[5] += 1;
            }
        }

        FilePos += 254*(long)entry.blocks;
        fseek(fp,FilePos,0);

    }

    fprintf(contents,"--------------------------------------------------\n");
    fprintf(contents,"%lu Bytes in %u files.\n",bytecount,filecount);
    fclose(contents);

    linnum += 10;
    putw(linptr,basic);
    putw(linnum,basic);
    fputc(0xa0,basic);              /* close 15 */
    fputs(" 15",basic);
    for (i=0; i<4; i++)
        fputc(0,basic);

    fclose(basic);
    fclose(batch);
    exit(0);
}