;
; This source handles conversion from yuv to rgb space, which includes
; initialization of the conversion & clamp tables.
;
; Original routines are in 24bit.c, which mainly contained those lovely 
; just-perfectly-fit-for-assembler-kick-ass constructions.
;
; Michael Rausch  14-4-94  1:12:57
;

	SECTION	text,CODE

;************************************************************************************

; We'll define the "ConvertColor" macro here to do fixed point arithmetic
; that'll convert from YCrCb to RGB using:
;	R = L + 1.40200*Cr
;	G = L - 0.34414*Cb - 0.71414*Cr
;	B = L + 1.77200*Cb
;

; void InitColorDither(void)
	XDEF	@InitColorDither
@InitColorDither:			

	lea	Cb_r_tab,a0
Cr_gb_off EQU	256*4
	lea	Cb_r_tab+Cr_gb_off,a1

	move.w	#255,d0
	moveq	#-128,d1
crgbtabs:
	move.w	d1,d2
	muls.w	#179,d2		; 1.40200
	asr.w	#7,d2
	add.w	d2,d2
	move.w	d2,(a0)+	;br
	move.w	d1,d2
	muls.w	#-91,d2		; -0.71414
	asr.w	#7,d2
	add.w	d2,d2
	move.w	d2,(a1)+	;bg
	move.w	d1,d2
	muls.w	#-44,d2		; -0.34414
	asr.w	#7,d2
	add.w	d2,d2
	move.w	d2,(a0)+	;rg
	move.w	d1,d2
	muls.w	#226,d2		; 1.77200
	asr.w	#7,d2
	add.w	d2,d2
	move.w	d2,(a1)+	;rb
	addq.l	#1,d1
	dbra	d0,crgbtabs

;
; create the clamp tables, including the shifted one
;
; IMPORTANT: The first versions had a MUCH too low clampsize, resulting in really
; annoying purple artifacts !!!
;
clampsize EQU	192

	moveq	#0,d1
	lea	_clamp-clampsize*2,a0			; clear lower and upper 64 bytes
	move.w	#clampsize-1,d0
fill_ct1:move.w	#$ff00,((clampsize+256)*2,a0)
	move.w	d1,(a0)+
	dbra	d0,fill_ct1
	move.w	#255,d0
fill_ct2:move.w	d1,(a0)+
	add.w	#$100,d1
	dbra	d0,fill_ct2

	rts

;************************************************************************************

;void ColorDitherImage(unsigned char *lum, unsigned char *cr, unsigned char *cb,
;		      unsigned char *out, int rows, int cols)
	XDEF @ColorDitherImage_lores
@ColorDitherImage_lores:

;    a0  unsigned char *lum
;    a1  unsigned char *cr
; 4(sp)  unsigned char *cb
; 8(sp)  unsigned char *out
;    d0  int rows
;    d1  int cols

cdi_regs REG	d2-d7/a2-a6	; 6+5=11
	movem.l	cdi_regs,-(sp)

;saved7	EQU	0
saverows EQU	4
savecols EQU	8
localvars EQU	savecols+4
cb_offset EQU	localvars+11*4+4
out_offset EQU	cb_offset+4

	sub.w	#localvars,sp

	move.l	d1,(savecols,sp)
	lsr.l	#1,d0
	subq.l	#1,d0
	move.l	d0,d7

	move.l	a0,a2			; lum1
	move.l	a2,a3
	add.l	d1,a3			; lum2 = lum1+cols

	move.l	a1,a4			; cr channel
	move.l	cb_offset(sp),d5
	sub.l	a4,d5			; cb channel is addressed via its offset to the cr channel

	lea	Cb_r_tab,a5		; conversion tab(s)

	move.l	(out_offset,sp),a0	; row1
	move.l	a0,a1
	add.l	d1,d1			
	add.l	d1,a1			; row2=row1+cols*2

all_rows_lores:
	move.l	(savecols,sp),d6
	lsr.l	#1,d6
	subq.l	#1,d6
all_cols_lores:

; free: d4

	moveq	#0,d1
	moveq	#0,d2
	move.b	(a4,d5.l),d1		; CB channel
	move.b	(a4)+,d2		; CR channel

	move.l	(a5,d1.w*4),d0			; cb_r cb_g
	move.l	(Cr_gb_off,a5,d2.w*4),d1	; cr_g cr_b

	move.w	d1,d2
	swap	d1
	add.w	d0,d1
	swap 	d0


	sub.w	d1,d0
	sub.w	d1,d2

	ext.l	d1
	add.l	#_clamp,d1

; d0   cb_r	  *2
; d1  (cr_g+cb_g) *2
; d2   cr_b	  *2

	moveq	#0,d3			; *lum++
	move.b	(a2)+,d3
	move.l	d1,a6			; clamp table includes on L
	add.l	d3,d3
	add.l	d3,a6
	move.w	(a6,d0.w),d3		; r
	or.b	(a6),d3			; g

	swap	d3

	move.b	(a2)+,d3		; *lum++
	move.l	d1,a6			; clamp table includes on L
	add.w	d3,d3
	add.w	d3,a6
	move.w	(a6,d2.w),d3		; b
	or.b	(a6),d3			; g

	move.l	d3,(a0)+		; *row1++


	moveq	#0,d3			; *lum++
	move.b	(a3)+,d3
	move.l	d1,a6			; clamp table includes on L
	add.l	d3,d3
	add.l	d3,a6
	move.w	(a6,d0.w),d3		; r
	move.b	(a6),d3			; g

	swap	d3

	move.b	(a3)+,d3		; *lum++
	move.l	d1,a6			; clamp table includes on L
	add.w	d3,d3
	add.w	d3,a6
	move.w	(a6,d2.w),d3		; b
	move.b	(a6),d3			; g

	move.l	d3,(a1)+		; *row1++


	dbra	d6,all_cols_lores

	move.l	savecols(sp),d0		; next line in the luminance channel
	add.l	d0,a2
	add.l	d0,a3

	add.l	d0,d0			; 2 bytes per pixel
	add.l	d0,a0			; next line in the output row
	add.l	d0,a1

	dbra	d7,all_rows_lores

	add.w	#localvars,sp
	movem.l	(sp)+,cdi_regs
	rts

;************************************************************************************
	XDEF @ColorDitherImage_RGB
@ColorDitherImage_RGB:
	movem.l	cdi_regs,-(sp)

	sub.w	#localvars,sp

	move.l	d1,(savecols,sp)
	lsr.l	#1,d0
	subq.l	#1,d0
	move.l	d0,d7

	move.l	a0,a2			; lum1
	move.l	a2,a3
	add.l	d1,a3			; lum2 = lum1+cols

	move.l	a1,a4			; cr channel
	move.l	cb_offset(sp),d5
	sub.l	a4,d5			; cb channel is addressed via its offset to the cr channel

	lea	Cb_r_tab,a5		; conversion tab(s)

	move.l	(out_offset,sp),a0	; row1
	move.l	a0,a1
	lsl.l	#2,d1
	add.l	d1,a1			; row2=row1+cols*4


all_rows_rgb:
	move.l	(savecols,sp),d6
	lsr.l	#1,d6
	subq.l	#1,d6
all_cols_rgb:

; still free: d4

	moveq	#0,d1
	moveq	#0,d2
	move.b	(a4,d5.l),d1		; CB channel
	move.b	(a4)+,d2		; CR channel

	move.l	(a5,d1.w*4),d0			; cb_r cb_g
	move.l	(Cr_gb_off,a5,d2.w*4),d1	; cr_g cr_b

	move.w	d1,d2
	swap	d1
	add.w	d0,d1
	swap 	d0

	sub.w	d2,d0
	sub.w	d2,d1

	ext.l	d2
	add.l	#_clamp,d2

	exg		a4,d2

; d0   cb_r	  *2
; d1  (cr_g+cb_g) *2
; d2   cr_b	  *2

	moveq	#0,d3			; *lum++
	move.b	(a2)+,d3
	lea		(a4,d3.w*2),a6

	move.b	(a6,d0.w),d3
	swap	d3
	move.w	(a6,d1.w),d3
	move.b	(a6),d3
	move.l	d3,(a0)+

	moveq	#0,d3			; *lum++
	move.b	(a2)+,d3
	lea	(a4,d3.w*2),a6

	move.b	(a6,d0.w),d3
	swap	d3
	move.w	(a6,d1.w),d3
	move.b	(a6),d3
	move.l	d3,(a0)+

	moveq	#0,d3			; *lum2++
	move.b	(a3)+,d3
	lea	(a4,d3.w*2),a6

	move.b	(a6,d0.w),d3
	swap	d3
	move.w	(a6,d1.w),d3
	move.b	(a6),d3
	move.l	d3,(a1)+

	moveq	#0,d3			; *lum2++
	move.b	(a3)+,d3
	lea	(a4,d3.w*2),a6

	move.b	(a6,d0.w),d3
	swap	d3
	move.w	(a6,d1.w),d3
	move.b	(a6),d3
	move.l	d3,(a1)+

	move.l	d2,a4

	dbra	d6,all_cols_rgb

	move.l	savecols(sp),d0		; next line in the luminance channel
	add.l	d0,a2
	add.l	d0,a3

	lsl.l	#2,d0			; 4 bytes per pixel
	add.l	d0,a0			; next line in the output row
	add.l	d0,a1

	dbra	d7,all_rows_rgb

	add.w	#localvars,sp
	movem.l	(sp)+,cdi_regs
	rts


;************************************************************************************

	XDEF @ColorDitherImage
@ColorDitherImage:
	movem.l	cdi_regs,-(sp)

	sub.w	#localvars,sp

	move.l	d1,(savecols,sp)
	lsr.l	#1,d0
	subq.l	#1,d0
	move.l	d0,d7

	move.l	a0,a2			; lum1
	move.l	a2,a3
	add.l	d1,a3			; lum2 = lum1+cols

	move.l	a1,a4			; cr channel
	move.l	cb_offset(sp),d5
	sub.l	a4,d5			; cb channel is addressed via its offset to the cr channel

	lea	Cb_r_tab,a5		; conversion tab(s)

	move.l	(out_offset,sp),a0	; row1
	move.l	a0,a1
	lsl.l	#2,d1
	add.l	d1,a1			; row2=row1+cols*4


all_rows:
	move.l	(savecols,sp),d6
	lsr.l	#1,d6
	subq.l	#1,d6
all_cols:

; still free: d4

	moveq	#0,d1
	moveq	#0,d2
	move.b	(a4,d5.l),d1		; CB channel
	move.b	(a4)+,d2		; CR channel

	move.l	(a5,d1.w*4),d0			; cb_r cb_g
	move.l	(Cr_gb_off,a5,d2.w*4),d1	; cr_g cr_b

	move.w	d1,d2
	swap	d1
	add.w	d0,d1
	swap 	d0

	sub.w	d2,d0
	sub.w	d2,d1

	ext.l	d2
	add.l	#_clamp,d2

	exg		a4,d2

; d0   cb_r	  *2
; d1  (cr_g+cb_g) *2
; d2   cr_b	  *2

	moveq	#0,d3			; *lum++
	move.b	(a2)+,d3
	lea	(a4,d3.w*2),a6

	move.w	(a6),d3			; b	BRG0
	move.b	(a6,d0.w),d3		; r
	swap	d3
	move.w	(a6,d1.w),d3		; g
	move.l	d3,(a0)+		; *row1++

	moveq	#0,d3			; *lum++
	move.b	(a2)+,d3
	lea	(a4,d3.w*2),a6

	move.w	(a6),d3			; b	BRG0
	move.b	(a6,d0.w),d3		; r
	swap	d3
	move.w	(a6,d1.w),d3		; g
	move.l	d3,(a0)+		; *row1++

	moveq	#0,d3			; *lum2++
	move.b	(a3)+,d3
	lea		(a4,d3.w*2),a6

	move.w	(a6),d3			; b	BRG0
	move.b	(a6,d0.w),d3		; r
	swap	d3
	move.w	(a6,d1.w),d3		; g
	move.l	d3,(a1)+		; *row2++

	moveq	#0,d3			; *lum2++
	move.b	(a3)+,d3
	lea		(a4,d3.w*2),a6

	move.w	(a6),d3			; b	BRG0
	move.b	(a6,d0.w),d3		; r
	swap	d3
	move.w	(a6,d1.w),d3		; g
	move.l	d3,(a1)+		; *row2++

	move.l	d2,a4

	dbra	d6,all_cols

	move.l	savecols(sp),d0		; next line in the luminance channel
	add.l	d0,a2
	add.l	d0,a3

	lsl.l	#2,d0			; 4 bytes per pixel
	add.l	d0,a0			; next line in the output row
	add.l	d0,a1

	dbra	d7,all_rows

	add.w	#localvars,sp
	movem.l	(sp)+,cdi_regs
	rts


;************************************************************************************

	XDEF @ColorDitherImage_12bit
@ColorDitherImage_12bit:
	movem.l	cdi_regs,-(sp)

	sub.w	#localvars,sp

	move.l	d1,(savecols,sp)
	lsr.l	#1,d0
	subq.l	#1,d0
	move.l	d0,d7

	move.l	a0,a2			; lum1
	move.l	a2,a3
	add.l	d1,a3			; lum2 = lum1+cols

	move.l	a1,a4			; cr channel
	move.l	cb_offset(sp),d5
	sub.l	a4,d5			; cb channel is addressed via its offset to the cr channel

	lea	Cb_r_tab,a5		; conversion tab(s)

	move.l	(out_offset,sp),a0	; row1
	move.l	a0,a1
	add.l	d1,a1			; row2=row1+cols

all_rows_12bit:
	move.l	(savecols,sp),d6
	lsr.l	#1,d6
	subq.l	#1,d6
all_cols_12bit:

	moveq	#0,d1
	moveq	#0,d2
	move.b	(a4,d5.l),d1		; CB channel
	move.b	(a4)+,d2		; CR channel

	move.l	(a5,d1.w*4),d0			; cb_r cb_g
	move.l	(Cr_gb_off,a5,d2.w*4),d1	; cr_g cr_b

	move.w	d1,d2
	swap	d1
	add.w	d0,d1
	swap 	d0

	sub.w	d1,d0
	sub.w	d1,d2

	ext.l	d1
	add.l	#_clamp,d1

; d0   cb_r	  *2
; d1  (cr_g+cb_g) *2
; d2   cr_b	  *2

	moveq	#0,d3			; *lum++
	moveq	#0,d4
	move.b	(a2)+,d3
	move.l	d1,a6			; clamp table includes on L
	add.l	d3,d3
	add.l	d3,a6
	move.w	(a6,d0.w),d3		; r
	lsl.l	#4,d3
	move.w	(a6),d3			; g
	lsl.l	#4,d3

	move.b	(a2)+,d4		; *lum++
	move.l	d1,a6			; clamp table includes on L
	add.w	d4,d4
	add.l	d4,a6
	move.w	(a6,d2.w),d3		; b
	lsr.l	#4,d3
	move.b	(a6),d3			; g
	lsr.l	#4,d3
	move.w	d3,(a0)+		; *row1++


	moveq	#0,d3			; *lum++
	moveq	#0,d4
	move.b	(a3)+,d3
	move.l	d1,a6			; clamp table includes on L
	add.l	d3,d3
	add.l	d3,a6
	move.w	(a6,d0.w),d3		; r
	lsl.l	#4,d3
	move.w	(a6),d3			; g
	lsl.l	#4,d3

	move.b	(a3)+,d4		; *lum++
	move.l	d1,a6			; clamp table includes on L
	add.w	d4,d4
	add.l	d4,a6
	move.w	(a6,d2.w),d3		; b
	lsr.l	#4,d3
	move.b	(a6),d3			; g
	lsr.l	#4,d3
	move.w	d3,(a1)+		; *row1++


	dbra	d6,all_cols_12bit

	move.l	savecols(sp),d0		; next line in the luminance channel
	add.l	d0,a2
	add.l	d0,a3

	add.l	d0,a0			; next line in the output row
	add.l	d0,a1

	dbra	d7,all_rows_12bit

	add.w	#localvars,sp
	movem.l	(sp)+,cdi_regs
	rts

;************************************************************************************

	SECTION	__MERGED,BSS

;
; uv conversion table; contains 4 sets describing the relation between the two
; chrominance channels and the four-times bigger luminance channel.
;
Cb_r_tab: ds.l	2*256


; clamp table
;
;    clamp[x]    = 0xff00
; *(&clamp[x]-1) = 0x00ff
;
; an offset into this clamp table could very easily implement overall brightness control!
;
; we can reuse this special construct in video.c/sutils.s
;

	XDEF _clamp


	ds.w	clampsize
_clamp:	ds.w	256+clampsize


	END