 *      
* Juddpeg -- A jpeg decoder for the C64
*
* SLJ 9/17/99
*
* MCM-lace version 10/1/99
* Generic version 10/7/99
* Last update: 11/04/99
* IFLI version: 11/22/99
*

         ORG $1000

RendInit = $2000
Render   = $2003
Display  = $2006

         JMP Start
         JMP IDCT2D

temp     = $fe
quantp   = $fc            ;Quant table
huff     = $fa            ;Huffman pointers
temp2    = $f9
count    = $f8            ;Used by GetBits
                          ;and AddNode

point    = $02
dest     = $04

Bitslo   = $06            ;and Dequantize
Bitshi   = $07

mult1lo  = $08            ;Multiplication tables
mult1hi  = $0A
mult2lo  = $0C
mult2hi  = $0E

vsamp    = $28            ;Desample
hsamp    = $29

index    = $20            ;IDCT stuff
t1       = $22
t2       = $24
t3       = $26
DCT      = $10
*Coeff    =   $30

QT0      = $0340          ;Quantization tables
QT1      = QT0+64
QT2      = QT1+64         ;Only use 3

Huffmem  = $0400          ;Huffman trees

Negmlo   = $0A00
Posmlo   = $0B00          ;Mult tables
Negmhi   = $0D00
Posmhi   = $0E00          ;2 pages

CrTab1   = $8400          ;RGB conversion
CrTab2   = $8480
CbTab1   = $8500
CbTab2   = $8580

trans    = $8600          ;Transform

Veclo    = $8680          ;Vec to be quantized
Vechi    = $86C0

ImgBuf   = $8700          ;Image data buffer
ImgBufSize = $3900
*EmptyBuf =   $9F80

YBuf     = ImgBuf
CbBuf    = ImgBuf+$1300
CrBuf    = ImgBuf+$2600

NOTJPG   = 1              ;Errors
READERR  = 2
BADQT    = 3
BADHT    = 4
HEADERR  = 5
HUFFERR  = 6


Start    
         LDA #$76
         STA $01
         CLC
         XCE              ;100% SCPU baby!
         SEP #$30

         LDA #00
         STA ERROR
         STA AtEOF
         STA SkipFF
         LDA #$FF
         STA filepos
         STA filepos+1
         STA filepos+2
         STA reslen
         STA reslen+1

         LDA #>Posmlo
         STA mult1lo+1
         LDA #>Negmlo
         STA mult2lo+1
         LDA #>Posmhi
         STA mult1hi+1
         LDA #>Negmhi
         STA mult2hi+1

         jsr GetFile
         bcs :rts

         jsr GETIN        ;check JPEG SOI
         cmp #$ff         ;marker = $FFD8
         bne :err1
         jsr GETIN
         cmp #$d8
         bne :err1

         jsr InitHuff
         jsr InitBuff
         jsr OptSCPU

         jsr GetAPP0
:loop    lda ERROR
         bne :err
         jsr DoMarker
         lda AtEOF
         beq :loop

:rts     JSR NoSCPU
         JSR CloseFile
         STZ $D020
         JSR Display
         JMP :done

:err1    LDA #NOTJPG
         DFB $2c
:err3    LDA #READERR
         dfb $2c
:err4    LDA #HEADERR

:err                      ;.A = err no
         PHA
         JSR NoSCPU
         JSR STROUT
         dfb 13
         TXT 'error at pos=$',00
         LDA filepos+2
         JSR HexOut
         LDA filepos+1
         JSR HexOut
         LDA filepos
         JSR HexOut
         LDA #13
         JSR $FFD2
         PLA
         ASL
         TAX
         LDA ERRTAB,X
         STA temp
         LDA ERRTAB+1,X
         STA temp+1
         JSR CloseFile
         LDA #13
         JSR $FFD2
         LDY #00
:loop2   LDA (temp),Y
         BEQ :done
         JSR $FFD2
         INY
         BNE :loop2

:done    LDA #$77
         STA $01
         SEC
         XCE
         LDA #$1B
         STA $D011
         LDA #$14
         STA $D018
         LDA #$08
         STA $D016
         LDA $DD00
         ORA #$03
         STA $DD00
         LDA #13
         JSR $FFD2
         LDA #$00
         STA $C6
         JSR $E3BF
         JSR $E453
         JSR $A65E
         JMP $A474

AtEOF    DFB 00
ERROR    DFB 00

ERRTAB   
         DA 00
         DA :notjpg
         DA :readerr
         DA :dqt
         DA :dht
         DA :head
         da :huff

:notjpg  txt 'not a jpeg file!',00
:readerr txt 'error reading file',00
:dqt     TXT 'error in quantization table',00
:dht     TXT 'error in huffman table',00
:head    TXT 'expected header, got crap',00
:huff    TXT 'huffman decoding error',00

* Bit patterns (masks)

BITP     
         HEX 00
         HEX 0102040810204080
         HEX 0102040810204080

         txt 'wyn wuz here'

* DEBUG -- display code
         do 0
Display  
         LDA $D011
         BMI Display
:l1      LDA $D011
         BPL :l1
         LDA $DD00
         EOR #2
         STA $DD00
         LDA $D016
         EOR #1
         STA $D016
         LDA $D018
         EOR #$40
         STA $D018
         LDA #00
         STA $DC00
         LDA $DC01
         CMP #$FF
         BEQ Display
         RTS
         fin
*-------------------------------

* Zero out image buffer

InitBuff 
         REP #$20
         LDX #ImgBufSize
         LDA #$80
:loop    STA ImgBuf,X
         DEX
         BPL :loop
         SEP #$30
         RTS


*
* Disable SCPU mirroring
*
OptSCPU  
         LDA #$84         ;No mirroring
         STA $D07E
         STA $D0B3
         STA $D07F
         RTS

NoSCPU                    ;Enable
         SEC
         XCE
         STA $D07E
         STA $D077
         STA $D07F
         RTS

*
* DoMarker -- Read Marker and call
*   appropriate routine.
*

Unknown  
* Huffman @ $0400 -> no printing!
         do 0
         jsr strout
         txt 'unknown header:',00
         lda Header
         jsr HexOut
         lda Header+1
         jsr HexOut
         jsr strout
         txt ' trying anyways...'
         dfb 13,00
         fin
         JSR Ignore

DoMarker 
         lda AtEOF
         beq :c1
         rts
:c1      jsr GetHeader    ;find next
         bcs DoMarker
Do2      
         lda Header+1
*         cmp #$fe
*         beq :com
         cmp #$dd
         beq :dri
         cmp #$db
         beq :dqt
         cmp #$c4
         beq :dht
         cmp #$c0
         beq :sof
         cmp #$da
         bne Unknown

:sos     jmp SOS
*:com     jmp Comment
:dri     jmp DRI
:dqt     jmp DQT
:dht     jmp DHT
:sof     jmp SOF

:get     jsr GetHeader
         bcc :rts
         lda #HEADERR
         sta error
:rts     rts

*
* GetAPP0 -- Read JFIF header
*
GetAPP0  
         jsr GetHeader
         bcs :jmp
         lda Header+1
         cmp #$e0         ;APP0 marker
         beq Ignore

         do 0             ;No printing!
         jsr strout
         txt 'not jfif format! '
         txt 'trying anyways...'
         dfb 13,00
         fin

         jmp Do2
:jmp     jmp DoMarker

* Ignore rest of segment

Ignore   
         JSR GetByte
         BCS :rts
         LDA AtEOF
         BNE :rts
         JSR DecLen
         BNE Ignore
:rts     RTS

*
* GetHeader -- Read in header bytes.
* On exit:
*   C set -> error
*   Z set -> end of file
*
Header   da 00            ;hi,lo
Length   da 00            ;lo,hi

GetHeader
         stz Header
         stz Header+1
         jsr GetByte
         bcs :rts
         cmp #$ff
         bne :sec

         jsr GetByte
         sta Header+1
         cmp #$d8         ;Start of JPEG
         beq :clc         ;(just in case)
         cmp #$d9         ;End of file
         bne :c2
         sta AtEOF
         beq :clc
:c2      
         jsr GetByte
         bcs :rts
         sta Length+1
         jsr GetByte
         bcs :rts
         sec
         sbc #2
         sta Length
         bcs :c3
         dec Length+1
:c3      ora Length+1     ;Empty segment
         beq GetHeader
:clc     clc
         rts

:sec     sec
:rts     rts

*
* GetFile -- Open jpeg file.
*
GetFile  
         lda #$00
         sta $d020
         sta $d021
         sta nbits

         jsr strout
         dfb 13,5
         txt 'juddpeg-ifli v.wxyz slj 12/8'
         dfb 13,13
         txt 'renderer by a. gonzalez'
         dfb 13,13
         txt 'sys 4096 to restart'
         dfb 13,13
         txt 'file:',00

         ldy #00
:l2      jsr $ffcf
         sta $0200,y      ;Filename
         iny
         cmp #13
         bne :l2
         jsr $ffd2
         lda #','
         sta $0200,y
         iny
         lda #'r'
         sta $0200,y

         tya
         ldx #00
         ldy #$02
         jsr $ffbd        ;setnam

         jsr strout
         txt 'offset col:0'
         dfb 157,00
         jsr getnum
         sta coloff

         jsr strout
         txt 'offset row:0'
         dfb 157,00
         jsr getnum
         sta rowoff

         lda #2
         tay
         ldx $ba
         jsr $ffba
         jsr $ffc0        ;open
         bcs :error
         lda $90
         bne :error
         ldx #2
         jsr $ffc6        ;chkin
         bcs :error
         rts              ;c set -> error

:error   jsr CloseFile
         jsr strout
         txt 'load error',00
         sec
:rts     rts

getnum   
         lda #00
:loop2   sta temp
:loop    jsr $ffcf
         cmp #13
         beq :done
         sec
         sbc #'0'
         sta temp+1
         ldx #10          ;cheesy *10 routine
         lda #00
         clc
:l2      adc temp
         dex
         bne :l2
         adc temp+1
         jmp :loop2
:done    jsr $ffd2
         lda temp
         rts

CloseFile
         lda #2
         jsr $ffc3        ;close
         jmp $ffcc        ;clrchn

*
* GetByte
*
SkipFF   DFB 00           ;Flag

GetByte  
         inc $d020
         jsr GETIN
         sta Header
         cmp #$ff
         bne :rts
         ldx SkipFF
         beq :rts
         jsr GETIN
         sta Header+1
         cmp #$ff
         beq GetByte      ;$ffff -> skip
         cmp #00          ;$ff00 -> $ff
         bne :rts
         lda #$ff
:rts     ldx $90
         stx AtEOF
         cpx #64
         rts              ;C set -> error

filepos  ds 3

GETIN    
         INC filepos
         BNE :c1
         INC filepos+1
         BNE :c1
         INC filepos+2
:c1      JMP $FFA5

*
* GetBit -- get next bit!
*
nbits    dfb 00           ;# of bits left
byte     dfb 00

GetBit   
         dec nbits
         bpl :get
         lda #7
         sta nbits
         jsr GetByte
         sta byte
:get     asl byte
:rts     rts


* Print string

strout   
         pla
         tay
         pla
         sta :loop+2
:loop    lda $c001,y
         beq :exit
         jsr $ffd2
         iny
         bne :loop
         inc :loop+2
         bne :loop
:exit    lda :loop+2
         cpy #$ff
         iny
         adc #00
         pha
         tya
         pha
         rts

* Print hex number
HexOut   
         PHA
         LSR
         LSR
         LSR
         LSR
         JSR :print
         PLA
         AND #$0F
:print   cmp #10
         BCC :c1
         ADC #6
:c1      ADC #48
         JMP $FFD2

********************************
*
* Main header processing routines
*
********************************

DecLen   
         LDA Length
         BNE :c1
         ORA Length+1
         BEQ :rts
         DEC Length+1
:c1      DEC Length
         LDA Length
         ORA Length+1
:rts     RTS


Comment  
         JSR GetByte
         BCS :rts
         CMP #$0A         ;LF
         BEQ :oops2
:c0      CMP #32
         BCC :oops
         CMP #128
         BCC :c1
:oops    LDA #'@'
         DFB $2C
:oops2   LDA #13
:c1      CMP #65
         BCC :c2
         EOR #32          ;->PETSCII
:c2      JSR $FFD2
         JSR DecLen
         BNE Comment
:rts     LDA #13
         JMP $FFD2

* Lame restart markers

reslen   da 00
cres     da 00

DRI      
         JSR GETIN
         STA reslen+1
         STA cres+1
         JSR GETIN
         STA reslen
         STA cres
         RTS

DecRes   
         LDA reslen+1
         cmp #$ff
         beq :rts

         DEC cres
         BNE :rts
         LDA cres+1
         BEQ :restart
         DEC cres+1
:rts     RTS

:restart 
         STA nbits        ;Skip bits
         JSR GetByte      ;Read $FFxx
         LDA reslen
         STA cres
         LDA reslen+1
         STA cres+1
Restart  
         LDA #00
         LDX #5
:l2      STA DClo,X
         STA DChi,X
         DEX
         BPL :l2

         RTS

* Define Quantization table

DQT      
         JSR DecLen
         BEQ :rts
         JSR GetByte
         BCS :err
         TAY
         AND #$0F         ;number of QT
         BNE :c1
         LDX #<QT0
         LDA #>QT0
         BNE :ok
:c1      CMP #1
         BNE :c2
         LDX #<QT1
         LDA #>QT1
         BNE :ok
:c2      CMP #2
         BNE :err
         LDX #<QT2
         LDA #>QT2

:ok      STX point        ;QT addr
         STA point+1
         TYA
         AND #$F0
         BNE :err         ;0 = 8-bit
         LDY #00
:loop    STY temp         ;Counter
         LDA Length
         ORA Length+1
         BEQ :err
         JSR GetByte
         BCS :err
         LDY temp
         STA (point),Y
         JSR DecLen
         INY
         CPY #64
         BNE :loop
         JMP DQT          ;Multiple QT's allowed

:err     LDA #BADQT       ;Only 0-3 allowed
         STA ERROR
:rts     RTS


* Define Huffman table

symbols  ds 16
hufflen  dfb 00

DHT      
         JSR DecLen
         BEQ :jerr
:getb    JSR GetByte
         BCC :cont
:jerr    JMP :err
:cont    
         TAY              ;Info byte
         AND #$0F
         CMP #$04
         BCS :jerr
         ASL
         TAX              ;table num 0-3
         TYA
         AND #$F0
         BEQ :ok          ;DC table
         CMP #$10
         BNE :jerr
         TXA              ;AC table
         ORA #$08         ;+8
         TAX
:ok      LDA HuffTop
         STA DChuff0,X
         STA huff
         LDA HuffTop+1
         STA DCHuff0+1,X
         STA huff+1
         STX temp2
         LDY #01          ;Right node
         JSR NewNode      ;Root node

         LDX #01
:l1      STX temp
         LDA Length
         ORA Length+1
         BEQ :err
         JSR GetByte
         BCS :err
         LDX temp
         STA symbols-1,X
         JSR DecLen
         INX
         CPX #17
         BNE :l1

         LDA #$FF
         STA huffbits
         STA huffbits+1
         LDA #1
         STA hufflen
:loop    
         INC huffbits+1   ;hi,lo!
         BNE :c1
         INC huffbits
:c1      
:l2      LDX hufflen
         DEC symbols-1,X
         BPL :c2
         CPX #16
         BEQ :next
         ASL huffbits+1
         ROL huffbits
         INC hufflen
         BNE :l2
:c2      
         LDX temp2
         LDA DCHuff0,X
         STA huff
         LDA DCHuff0+1,X
         STA huff+1
         JSR GetByte
         BCS :err
         LDX hufflen
         JSR AddNode
         BCS :rts
         JSR DecLen
         JMP :loop
:next    JSR DecLen
         BEQ :rts
         JMP :getb        ;Multiple HTs

:err     LDA #BADHT
         STA ERROR
:rts     RTS

* Start of Frame

height   da 00
width    da 00
numrows  dfb 00
numcols  dfb 00
ncomps   dfb 00           ;Num components
csampv   ds 6             ;Sampling factors
csamph   ds 6             ;(horizontal)
cquant   ds 6             ;Quantization table

SOF      
         LDX #5
         LDA #00
:l1      STA csampv,X
         STA csamph,X
         DEX
         BPL :l1

         JSR :get
         CMP #8
         BEQ :ok
         LDA #BADQT
         STA ERROR
         RTS
:ok      
         JSR :get
         STA height+1
         JSR :get
         STA height
         SEC
         SBC #1
         STA numrows
         LDA height+1
         SBC #00
         LSR
         ROR numrows
         LSR
         ROR numrows
         LSR
         ROR numrows
         INC numrows

         JSR :get
         STA width+1
         JSR :get
         STA width
         SEC
         SBC #1           ;0..7 instead of 1..8
         STA numcols
         LDA width+1
         SBC #00
         LSR
         ROR numcols
         LSR
         ROR numcols
         LSR
         ROR numcols
         INC numcols      ;0..7 => 1 col, etc.

         JSR :get
         STA ncomps
         STA temp
:loop    JSR :get
         STA temp+1       ;ID
         JSR :get
         LDX temp+1
         PHA
         AND #$0F
         STA csampv,X
         PLA
         LSR
         LSR
         LSR
         LSR
         STA csamph,X
         JSR :get
         LDX temp+1
         STA cquant,X
         DEC temp
         BNE :loop

         LDX #5           ;Find max sample
         LDA #00
:l2      CMP csamph,X
         BCS :c2
         LDA csamph,X
:c2      DEX
         BNE :l2
         STA csamph       ;Store in +0

         LDX #5
         LDA #00
:l3      CMP csampv,X
         BCS :c3
         LDA csampv,X
:c3      DEX
         BNE :l3
         STA csampv

         RTS
:get     
Get      
         LDA Length
         ORA Length+1
         BEQ :err2
         JSR DecLen
         JSR GetByte
         BCC :rts
:err2    PLA
         PLA

:err     LDA #READERR
         STA ERROR
:rts     RTS

* And finally -- start of scan!

DClo     DS 6             ;DC coeffs
DChi     DS 6

row      dfb 00
col      dfb 00
rowoff   dfb 00           ;Row offset
coloff   dfb 00           ;Col offset
buffpt   da 00            ;Image buffer
comp     dfb 00           ;Current component

AChuff   ds 6             ;AC table to use
DChuff   ds 6             ;DC table to use

SOS      
         DEC SkipFF       ;Skip $FF bytes
         JSR RendInit
*        JSR OptSCPU

         JSR Get
         STA temp         ;# of components
         STA ncomps
:l1      JSR Get
         STA temp+1       ;Component ID
         JSR Get
         LDX temp+1
         PHA
         AND #$0F
         STA AChuff,X
         PLA
         LSR
         LSR
         LSR
         LSR
         STA DChuff,X
         DEC temp
         BNE :l1
         JSR Get          ;Scan parameters
         JSR Get          ;(progressive)
         JSR Get          ;(ignore)

* Image data begins here

         STZ row
         STZ col
         JSR Restart

ReadY                     ;Intensity
         LDX #1           ;Component
         LDA #<YBuf
         LDY #>YBuf
         JSR ReadDU

ReadCb                    ;Chrominance
         LDX ncomps
         DEX
         BEQ ReadDone
         LDX #2
         LDA #<CbBuf
         LDY #>CbBuf
         JSR ReadDU

ReadCr                    ;Chrominance
         LDX ncomps
         DEX
         BEQ ReadDone
         LDX #3
         LDA #<CrBuf
         LDY #>CrBuf
         JSR ReadDU

ReadDone 
         JSR DecRes       ;Restart

         LDA AtEof
         BNE :done
         LDA csamph       ;Max sample
         CLC
         ADC col
         STA col
         CMP numcols
         BCC ReadY

         JSR ToRGB

         LDA #00
         STA col

         LDA #<ImgBuf
         LDY #>ImgBuf
         LDX csampv
         STX temp2
:rend    STA temp
         STY temp+1

         LDX row
         CPX rowoff
         BCC :norend

         SEI
         JSR Render       ;unto Ceaser
         CLI

:norend  INC row
         LDA row
         CMP numrows
         BCS :done
         SEC
         SBC rowoff
         BCC :c2
         CMP #25
         BCS :done
:c2      
         LDA temp         ;Next buffer
         CLC
         ADC buflen
         STA temp
         LDA temp+1
         ADC buflen+1
         TAY
         LDA temp
         DEC temp2
         BNE :rend

:jmp     JMP ReadY
:done    INC AtEOF
         RTS

*
* Read in a data unit
*

* buffer size = 38*8 = $0130 * 8 lines

linelen  da $0130
buflen   da $0980

curbuf   da 00
curcomp  dfb 00
rend     dfb 00
currow   dfb 00
curcol   dfb 00
RendFlag dfb 00

ReadDU   
         STA curbuf
         STY curbuf+1
         STX curcomp
         LDA #00
         STA Rend

         ldy #00          ;Compute expansion factors
         tya              ;maxsamp/samp
         clc
:l1      iny
         adc csamph,x
         cmp csamph       ;max
         bcc :l1
         sty hsamp
         lda #00
         tay
         clc
:l2      iny
         adc csampv,x
         cmp csampv
         bcc :l2
         sty vsamp

         LDA csampv,X     ;Vert samp
         STA temp

:loopy   LDX curcomp
         LDA csamph,X     ;Horiz sampling
         STA temp+1
         LDA col
         SEC
         SBC coloff
         STA curcol
:loopx   LDA rend
         STA RendFlag
         JSR Fetch
         LDA ERROR
         BNE :rts
         LDA curcol
         CLC
         ADC hsamp
         STA curcol
         DEC temp+1
         BNE :loopx

         LDX vsamp
:next    LDA curbuf       ;Next row
         CLC
         ADC buflen
         STA curbuf
         LDA curbuf+1
         ADC buflen+1
         STA curbuf+1
         DEX
         BNE :next

         DEC temp
         BNE :loopy
:rts     RTS

*
* Fetch the data
*
Fetch    
         LDA #00
         STA dest+1
         LDA curcol
         CMP #37          ;catches neg too
         ROL RendFlag     ;C set?

         ASL              ;offset = col*8
         ROL dest+1
         ASL
         ROL dest+1
         ASL
         ROL dest+1
         ADC curbuf       ;Ybuf, etc.
         STA dest         ;Data storage
         LDA curbuf+1
         ADC dest+1
         STA dest+1
:decode  
         JSR DecodeDC
         LDA ERROR
         BNE :c1
         JSR DecodeAC
         LDA ERROR
         BNE :c1
         LDA RendFlag
         BNE :c1
         JSR Dequantize
         JSR IDCT2D
         JMP Desample

**** debug
*         JMP PrintDU  ;DEBUG
**** debug

:c1      RTS

*-------------------------------
* DEBUG * DEBUG
         do 0
PrintDU  
         LDY #00
:loop    LDA trans+1,Y
         JSR HexOut
         LDA trans,Y
         JSR HexOut
         LDA #32
         JSR $FFD2
         INY
         INY
         CPY #128
         BCC :loop
         LDA #13
         JMP $FFD2
         fin
*-------------------------------

*
* Decode DC coeff.
*

DecodeDC 
         LDX curcomp      ;Set Huffman
         LDA DCHuff,X
         ASL
         TAX
         LDA DCHuff0,X
         STA huff
         LDA DCHuff0+1,X
         STA huff+1

         JSR GetHuff      ;Get category
         LDX Error
         BNE :rts
         JSR GetBits      ;Get the bits
         LDX curcomp
         LDA Bitslo
         CLC
         ADC DClo,X
         STA DClo,X
         STA Veclo
         LDA DChi,X
         ADC Bitshi
         STA DChi,X
         STA Vechi
:rts     RTS

*
* Decode AC coeffs
*

tmphuf   dfb 00

DecodeAC 
         LDX curcomp      ;Set Huffman
         LDA ACHuff,X
         ASL
         TAX
         STX tmphuf

         LDY #1
:loop    STY temp2        ;Index
         LDX tmphuf
         LDA ACHuff0,X
         STA huff
         LDA ACHuff0+1,X
         STA huff+1

         JSR GetHuff      ;Get RLE len
         BEQ :fill
         LDX ERROR
         BNE :done
         STA count        ;temp
         LSR
         LSR
         LSR
         LSR              ;# of zeros
         BEQ :skip
:fill    TAX
         LDA #00
         LDY temp2
:lout    STA Veclo,Y
         STA Vechi,Y
         INY
         CPY #64
         BCS :done
         DEX
         BNE :lout
         STY temp2
:skip    LDA count
         AND #$0F         ;category
         JSR GetBits
         LDY temp2
         LDA Bitslo
         STA Veclo,Y
         LDA Bitshi
         STA Vechi,Y
         INY
         CPY #64
         BCC :loop
:done    RTS

*
* Dequantize the vector Vec
*
* Mult is 16 bit signed x 8 bit unsigned
* with 16-bit result, so sign etc. are
* taken care of automatically.
*
* result -> trans
*
quanttab 
         DA QT0
         DA QT1
         DA QT2

* Table to un-zigzag coeffs; multiples
* of 2, since 2 byte result.

zigzag   
         dfb 0,2,16,32,18,4,6,20
         dfb 34,48,64,50,36,22,8,10
         dfb 24,38,52,66,80,96,82,68
         dfb 54,40,26,12,14,28,42,56
         dfb 70,84,98,112,114,100,86,72
         dfb 58,44,30,46,60,74,88,102
         dfb 116,118,104,90,76,62,78,92
         dfb 106,120,122,108,94,110,124,126


Dequantize
         LDX curcomp
         LDA cquant,X
         ASL
         TAX
         LDA quanttab,X
         STA quantp
         LDA quanttab+1,X
         STA quantp+1

         LDX #63
:loop    TXA
         TAY
         LDA (quantp),Y
         STA mult1lo
         STA mult1hi
         EOR #$FF
         CLC
         ADC #1
         STA mult2lo
         STA mult2hi

         LDY Veclo,X
         BNE :c1
         STY Bitslo
         STY Bitshi
         BEQ :high
:c1      
         LDA (mult1lo),Y
         SEC
         SBC (mult2lo),Y
         STA Bitslo
         LDA (mult1hi),Y
         SBC (mult2hi),Y
         STA Bitshi

:high    LDY Vechi,X
         LDA (mult1lo),Y
         SEC
         SBC (mult2lo),Y
         CLC
         ADC Bitshi

         LDY zigzag,X     ;Un-zigzag
         INY
         STA trans,Y
         DEY
         LDA Bitslo
         STA trans,Y
         DEX
         BPL :loop
         RTS

*
* Desample -- expand DCT square by sample factor
* and reorg data.
*
* On entry:
*   dest = destination buffer
*

*curpos   da  00       ;buffer position
*bufcol   dfb 00       ;buffer column

Desample 
         lda #00
:newrow  ldx vsamp
         stx huff         ;temporary
:oldrow  sta temp2        ;current element
         lda #8
         sta count        ;column
         ldy #00
:l1      ldx temp2
         lda trans,x
         ldx hsamp
:expand  
         sta (dest),y
         iny
         dex
         bne :expand
         inc temp2
         dec count
         bne :l1

         lda dest         ;next scanline
         clc
         adc linelen
         sta dest
         lda dest+1
         adc linelen+1
         sta dest+1

         lda temp2
         sec
         sbc #8           ;Start of row
         dec huff         ;horizonal sampling
         bne :oldrow
         lda temp2
         cmp #64
         bne :newrow
         rts

*
* Convert to RGB
*

Ypoint   = point
CbPoint  = dest
CrPoint  = Bitslo

ToRGB    
         LDA ncomps
         CMP #1           ;grayscale
         BNE :color
         REP #$30
         LDX #YBuf        ;Copy YBuf to
         LDY #CbBuf       ;CbBuf and CrBuf
         LDA #CrBuf-YBuf-1
         MVN 0,0
         SEP #$30
         RTS
:color   
         REP #$20
         LDA #Ybuf
         STA Ypoint
         LDA #CbBuf
         STA Cbpoint
         LDA #CrBuf
         STA Crpoint
         SEP #$30

         LDY #00
:loop    LDA #00
         STA temp+1
         LDA (CbPoint),Y
         EOR #$80
         BPL :posCb
:negCb   EOR #$FF
         CLC
         ADC #01
         TAX
         LDA (Ypoint),Y
         CLC
         ADC CbTab1,X
         STA temp
         BCC :c1
         INC temp+1       ;high byte
:c1      LDA (Ypoint),Y
         SEC
         SBC CbTab2,X
         BCS :cont
         LDA #00          ;Underflow
         BEQ :cont

:posCb   TAX
         LDA (Ypoint),Y
         SEC
         SBC CbTab1,X
         STA temp
         BCS :c2
         DEC temp+1
:c2      LDA (Ypoint),Y
         CLC
         ADC CbTab2,X
         BCC :cont
         LDA #255
:cont    STA temp2

         LDA (CrPoint),Y
         EOR #$80
         BPL :posCr
:negCr   EOR #$FF
         CLC
         ADC #01
         TAX
         LDA temp
         CLC
         ADC CrTab2,X
         STA temp
         LDA temp+1
         ADC #00
         BEQ :c3
         BPL :p1
         LDA #00
         DFB $2C
:p1      LDA #255
         DFB $2C
:c3      LDA temp
         STA (CbPoint),Y  ;Green
         LDA (YPoint),Y
         SEC
         SBC CrTab1,X
         BCS :done
         LDA #00
         BEQ :done

:posCr   TAX
         LDA temp
         SEC
         SBC CrTab2,X
         STA temp
         LDA temp+1
         SBC #00
         BEQ :c4
         BPL :p2
         LDA #00
         DFB $2C
:p2      LDA #255
         DFB $2C
:c4      LDA temp
         STA (CbPoint),Y
         LDA (YPoint),Y
         CLC
         ADC CrTab1,X
         BCC :done
         LDA #255
:done    STA (YPoint),Y   ;Red
         LDA temp2
         STA (CrPoint),Y  ;Blue
         INY
         BEQ :inc
:jmp     JMP :loop
:inc     INC YPoint+1
         INC CbPoint+1
         INC CrPoint+1
         LDA YPoint+1
         CMP #>CbBuf
         BCC :jmp
         RTS

*
* Retrieve .A bits and convert
* to signed number in (Bitslo, Bitshi)
*
sign     DFB 00

GetBits  
         STA count
         TAX
         BEQ :zero
         JSR GetBit
         LDA #00
         BCS :c1
         LDA #$FF         ;0-> negative
:c1      STA Bitshi
         ROL
         STA Bitslo
         STA sign
         DEC count
         BEQ :done
:loop    JSR GetBit
         ROL Bitslo
         ROL Bitshi
         DEC count
         BNE :loop
:done    LDA sign
         BPL :rts
         INC Bitslo       ;Make 2's comp
         BNE :rts
         INC Bitshi
:rts     RTS

:zero    STA Bitslo
         STA Bitshi
         RTS

*
* Huffman tree routines.
*
* The Huffman tree is implemented as
* a series of 2-byte nodes.  Left
* nodes are at huff+2, right nodes
* are at (huff) if link < $8000.
* Link = $80xx means xx=leaf value,
* link = $FFxx means no right link,
* link+2 = HuffTop -> no left link.
*

DCHuff0  da 00            ;Addresses
DCHuff1  da 00
DCHuff2  da 00
DCHuff3  da 00
ACHuff0  da 00
ACHuff1  da 00
ACHuff2  da 00
ACHuff3  da 00

HuffTop  da 00            ;End of Huffman tree

InitHuff 
         LDA #<Huffmem
         STA HuffTop
         LDA #>Huffmem
         STA HuffTop+1
         RTS

* Create new node; make current node
* point to it.
*
* On entry: .Y = 0 -> right node,
*            otherwise left node

NewNode  
         PHY
         PHX

         REP #$20

         TYX
         BNE :skip
         LDA HuffTop
         SEC
         SBC huff
         STA (huff)       ;point -> new node
:skip    
         LDA HuffTop
         STA point
         CLC
         ADC #2
         STA HuffTop
         LDA #$FFFF
         STA (point)      ;Init new node

         SEP #$30
         PLX
         PLY
         CLC
         RTS

* Add a new node; .X = length
* (huff) -> tree root

huffbits da 00            ;hi,lo
huffval  da $8000         ;high byte = $80

AddNode  
         STA huffval
:loop    
         SEP #$30
         LDY #1
         CPX #9
         BCC :c1
         DEY
:c1      LDA BITP,X
         AND huffbits,Y
         BNE :right

:left    REP #$20
         LDA huff         ;Check if at end
         CLC
         ADC #2
         CMP HuffTop
         BCC :skip1       ;Not a new node
         PHA
         LDY #$80         ;Create left node
         JSR NewNode      ;m set
         REP #$20
         PLA
:skip1   STA huff
         BRA :dex

:right   LDY #1
         LDA (huff),Y     ;Check for rt ptr
         BPL :skip2
         DEY              ;.Y=0 -> rt node
         JSR NewNode
:skip2   REP #$20
         LDA (huff)
         CLC
         ADC huff
         STA huff

:dex     DEX
         BNE :loop
         LDA huffval      ;high byte = $80
         STA (huff)
         SEP #$30
         CLC
         RTS

*
* GetHuff -- Get valid Huffman code
*   from (huff)
*

GetHuff  
         SEP #$30
         LDY #01
         LDA (huff),Y
         CMP #$80
         BEQ :found

         JSR GetBit
         BCS :right
         REP #$20
         LDA huff
         ADC #2           ;C clear
         CMP HuffTop
         BCS :err
         STA huff
         BRA GetHuff

*        LDA huff
*        ADC #2       ;C clear
*        TAX
*        LDA huff+1
*        ADC #00
*        TAY
*        CPX HuffTop
*        SBC HuffTop+1
*        BCS :err
*        STY huff+1
*        STX huff
*        BCC GetHuff

:right   REP #$20
         LDA (huff)
         BMI :err
         CLC
         ADC huff
         STA huff
         BRA GetHuff

*:right   LDY #01
*         LDA (huff),Y
*         BMI :err
*         PHA
*         DEY
*         LDA (huff),Y
*         CLC
*         ADC huff
*         STA huff
*         PLA
*         ADC huff+1
*         STA huff+1
*         BNE GetHuff

:found   SEP #$30
         LDA (huff)
         RTS

:err     SEP #$30
         LDA #HUFFERR
         STA ERROR
         RTS

********************************
*
* IDCT routines
*
********************************


*trans    =   $9000    ;Transform
                          ;128 bytes

a1lo     = $2900          ;cos(2a), a=pi/8
a1hi     = $2A00
a2lo     = $2B00          ;cos(a)-cos(3a)
a2hi     = $2C00
a3lo     = a1lo           ;cos(2a)
a3hi     = a1hi
a4lo     = $2D00          ;cos(a)+cos(3a)
a4hi     = $2E00
a4gh     = $2F00
a5lo     = $3000          ;cos(3a)
a5hi     = $3100

sec1     = $3200
sec2     = $3400
sec3     = $3600
sec4     = $3800
sec5     = $3A00
sec6     = $3C00
sec7     = $3E00

a1216    = 46341          ;a1*2^16
a2216    = 35468          ;a2*2^16
a3216    = a1216
a4216    = 20091          ;...
a5216    = 25080

Coeff    DS 16

IDCT2D   
*         CLC
*         XCE

*         SEP #$10     ;16 bit XY
         REP #$20

* First the columns

cols     
         LDX #00
:l0      STX index
         LDY #00
:l1      LDA trans,X
         STA DCT,Y
         TXA
         CLC
         ADC #16
         TAX
         INY
         INY
         CPY #16
         BNE :l1
         JSR IDCT
         LDY #0
         LDX index
:l1b     LDA Coeff,Y
         STA trans,X
         TXA
         CLC
         ADC #16
         TAX
         INY
         INY
         CPY #16
         BNE :l1b
         LDX index
         INX
         INX
         CPX #16
         BCC :l0

* Then the rows
rows     
         LDX #00
         STX index
         STX count
:l0      LDY #00
:l1      LDA trans,X
         STA DCT,Y
         INX
         INX
         INY
         INY
         CPY #16
         BNE :l1
         STX index
         JSR IDCT
         LDX #0
         LDY count
:l1b     LDA Coeff,X
         CMP #$8000
         ROR
         CMP #$8000
         ROR
         ADC #128         ;C determines rounding
         BPL :c1          ;range check
         LDA #00
:c1      CMP #256
         BCC :c2
         LDA #255
:c2      
         SEP #$20         ;8-bit result
* STA (dest)
         STA trans,Y
         REP #$20
*         INC dest
         INY
         INX
         INX
         CPX #16
         BNE :l1b
         STY count
         LDX index
         CPX #128
         BCC :l0

*         XCE
         SEP #$30
         RTS

F0       = DCT
F1       = DCT+2
F2       = DCT+4
F3       = DCT+6
F4       = DCT+8
F5       = DCT+10
F6       = DCT+12
F7       = DCT+14

C0       = Coeff
C1       = Coeff+2
C2       = Coeff+4
C3       = Coeff+6
C4       = Coeff+8
C5       = Coeff+10
C6       = Coeff+12
C7       = Coeff+14

*
* Compute the inverse DCT (1D)
*
* Uses modified reversed flowgraph from
* Pennebaker & Mitchell, p. 52
*
* Input: DCT coeffs contained in Flo/Fhi
* Output: Original coeffs in COEFFS
*

IDCT     
         JSR PrepDat      ;Shift and such

* Stage 1: F(5) <- F(5) - F(3)
*          F(1) <- F(1) + F(7)
*          F(7) <- F(1) - F(7)
*          F(3) <- F(5) + F(3)

         LDA F5
         SEC
         SBC F3
         STA t1

         LDA F1
         CLC
         ADC F7
         STA t2

         LDA F1
         SEC
         SBC F7
         STA t3

         LDA F5
         CLC
         ADC F3
         STA F3

         LDA t3
         STA F7

         LDA t2
         STA F1

         LDA t1
         STA F5

* Stage 2: F(2) <- F(2) - F(6)
*          F(6) <- F(2) + F(6)
*          F(1) <- F(1) - F(3)
*          F(3) <- F(1) + F(3)

         LDA F2
         SEC
         SBC F6
         STA t1

         LDA F2
         CLC
         ADC F6
         STA F6

         LDA t1
         STA F2

         LDA F1
         SEC
         SBC F3
         STA t1

         LDA F1
         CLC
         ADC F3
         STA F3

         LDA t1
         STA F1


* Stage 3: F(2) <- a1*F(2)
*          F(5) <- -a2*F(5) + t1
*          F(1) <- a3*F(1)
*          F(7) <- a4*F(7) + t1
* where t1 = -a5*(F(5) + F(7))


* F(2) <- a1*F(2)

         SEP #$30         ;8-bit .AXY
         LDX F2           ;Lo
         LDY F2+1         ;Hi
         LDA a1lo,Y
         CLC
         ADC a1hi,X
         XBA              ;lo byte
         LDA a1hi,Y
         ADC #00
         XBA              ;hi byte
         REP #$20
         CPY #$80
         BCC :pos1
         SBC #a1216
:pos1    STA F2

* F(1) = a3*F(1)

         SEP #$30         ;8-bit .A
         LDX F1           ;Lo
         LDY F1+1         ;Hi
         LDA a3lo,Y
         CLC
         ADC a3hi,X
         XBA
         LDA a3hi,Y
         ADC #00
         XBA
         REP #$20
         CPY #$80
         BCC :pos1b
         SBC #a3216
:pos1b   STA F1


* t1 = -a5*(F(5) + F(7))

         LDA F5
         CLC
         ADC F7
         SEP #$30         ;8-bit .A
         TAX              ;Lo
         XBA
         TAY              ;Hi
         LDA a5lo,Y
         CLC
         ADC a5hi,X
         XBA
         LDA a5hi,Y
         ADC #00
         XBA
         REP #$20
         CPY #$80
         BCC :pos2
         SBC #a5216
:pos2    EOR #$FFFF
         DFB $1A          ;Note to self: fix INC!
         STA t1

* F(5) = t1 - a2*F(5)

         SEP #$30         ;8-bit .A
         LDX F5           ;Lo
         LDY F5+1         ;Hi
         LDA a2lo,Y
         CLC
         ADC a2hi,X
         XBA
         LDA a2hi,Y
         ADC #00
         XBA
         REP #$20
         CPY #$80
         BCC :pos3
         SBC #a2216
:pos3    EOR #$FFFF
         DFB $1a          ;INC
         CLC
         ADC t1
         STA F5

* F(7) = a4*F(7) + t1

         SEP #$30         ;8-bit .A
         LDX F7           ;Lo
         LDY F7+1         ;Hi
         LDA a4lo,Y
         CLC
         ADC a4hi,X
         XBA
         LDA a4hi,Y
         ADC a4gh,X       ;a4*.X can be >255
         XBA
         REP #$20
         CPY #$80
         BCC :pos4
         SBC #a4216
:pos4    CLC
         ADC t1
         STA F7

* Stage 4:
*   F(0) <- F(0) + F(4)
*   F(4) <- F(0) - F(4)
*   F(6) <- F(2) + F(6)

         LDA F0
         CLC
         ADC F4
         STA t1

         LDA F0
         SEC
         SBC F4
         STA F4

         LDA t1
         STA F0

         LDA F2
         CLC
         ADC F6
         STA F6

* Stage 5:
*   F(0) <- F(0) + F(6)
*   F(4) <- F(2) + F(4)
*   F(2) <- F(4) - F(2)
*   F(6) <- F(0) - F(6)
*   F(3) <- F(3) + F(7)
*   F(7) <- F(7) + F(1)
*   F(1) <- F(1) - F(5)
*   F(5) <- -F(5)

         LDA F0
         CLC
         ADC F6
         STA t1

         LDA F0
         SEC
         SBC F6
         STA F6
         LDA t1
         STA F0

         LDA F4
         CLC
         ADC F2
         STA t1

         LDA F4
         SEC
         SBC F2
         STA F2
         LDA t1
         STA F4

         LDA F3
         CLC
         ADC F7
         STA F3

         LDA F7
         CLC
         ADC F1
         STA F7

         LDA F1
         SEC
         SBC F5
         STA F1

         LDA #0000
         SEC
         SBC F5
         STA F5

* Final stage:
*   c(0) = F(0) + F(3)
*   c(1) = F(4) + F(7)
*   c(2) = F(2) + F(1)
*   c(3) = F(6) + F(5)
*   c(4) = F(6) - F(5)
*   c(5) = F(2) - F(1)
*   c(6) = F(4) - F(7)
*   c(7) = F(0) - F(3)
*
* Note: values are offset -128

         LDA F0
         CLC
         ADC F3
         STA C0

         LDA F4
         CLC
         ADC F7
         STA C1

         LDA F2
         CLC
         ADC F1
         STA C2

         LDA F6
         CLC
         ADC F5
         STA C3

         LDA F6
         SEC
         SBC F5
         STA C4

         LDA F2
         SEC
         SBC F1
         STA C5

         LDA F4
         SEC
         SBC F7
         STA C6

         LDA F0
         SEC
         SBC F3
         STA C7
         RTS              ;Sheeew!

*
* Since the algorithm is really an
* FFT converted into a DCT, the
* coefficients need a little massaging
* before tranformation.
*
* Specifically,
*   F(i) = S(i)/(2cos(i*pi/16)) i=0..7
* with F(0)=F(0)*2/sqrt(2), which can
* be combined with the first step
* using the table for i=4.
*
* These multipliers can in part be
* incorporated in the quantization
* table, but for now they're out in
* the open.
*


PrepDat  
         SEP #$10         ;.X/Y 8-bits

         LDX #00
         LDA #sec4
         STA point
         LDA F0
         JSR PMult
         STA F0

         LDX #00
         LDA #sec1
         STA point
         LDA F1
         JSR PMult
         STA F1

         LDX #00
         LDA #sec2
         STA point
         LDA F2
         JSR PMult
         STA F2

         LDX #00
         LDA #sec3
         STA point
         LDA F3
         JSR PMult
         STA F3

         LDX #00
         LDA #sec4
         STA point
         LDA F4
         JSR Pmult
         STA F4

         LDX #00
         LDA #sec5
         STA point
         LDA F5
         JSR Pmult
         STA F5

         LDX #00
         LDA #sec6
         STA point
         LDA F6
         JSR Pmult
         STA F6

         LDX #00
         LDA #sec7
         STA point
         LDA F7
         JSR Pmult
         STA F7
         RTS

PMult    
         BMI :neg
         CMP #256
         BCC :ok
:l1      INX              ;Shift count
         LSR
         CMP #256
         BCS :l1
:ok      ASL
         ADC point
         STA point
         LDA (point)
         DEX
         BMI :rts
:l1b     ASL
         DEX
         BPL :l1b
:rts     RTS

:neg     EOR #$FFFF
         INC
         CMP #256
         BCC :ok2
:l2      INX              ;Shift count
         LSR
         CMP #256
         BCS :l2
:ok2     ASL
         ADC point
         STA point
         LDA (point)
         DEX
         BMI :rts2
:l2b     ASL
         DEX
         BPL :l2b
:rts2    EOR #$FFFF
         INC
         RTS






