;
; This source handles the conversion from 24-bit data to HAM8, initialization
; of the screen and so on.
;
; Support for Kaiko is not yet build-in, but very easy to accomplish, as it
; makes flush_cunkyloop completely obsolete. Looking forward to see
; custom logic like this, possibly more flexible, built-in in all Amigas.
;
; Phew! This one consumes 43% of the overall processor time in hires, 33% in
; lores. I want this Kaiko thingy!
;
;
; Michael Rausch  14-4-94  1:15:00
;


	SECTION	text,CODE

	include	"graphics/rastport.i"
	include	"graphics/gfx.i"

	XREF	_max_x
	XREF	_max_y


	XDEF	@HAM6_Init_lores
@HAM6_Init_lores:
	movem.l	d2-d7/a2-a6,-(sp)

;		 RGBG  BRGB  R=8 G=C B=4
P7	set	%0111
P8	set	%1101

	move.l	#(P7<<28)|(P7<<24)|(P7<<20)|(P7<<16)|(P7<<12)|(P7<<8)|(P7<<4)|P7,d5
	move.l	#(P8<<28)|(P8<<24)|(P8<<20)|(P8<<16)|(P8<<12)|(P8<<8)|(P8<<4)|P8,d6

	move.l	a0,HAM8_rastport
	move.l	(rp_BitMap,a0),a0

	moveq	#0,d1
	move.w	(bm_BytesPerRow,a0),d1
	move.l	d1,HAM8_offset		; we have advanced one complete line when accessing this

	lea	(bm_Planes,a0),a6
	move.l	a6,HAM8_screen

	move.l	(a6)+,a1
	move.l	(a6)+,a2
	move.l	(a6)+,a3
	move.l	(a6)+,a4
	move.l	(a6)+,a5
	move.l	(a6),a6

	moveq.l	#0,d4

	lsr.l	#2,d1			; longs
	subq.l	#1,d1

	move.w	(bm_Rows,a0),d0
	subq.w	#1,d0
h6_prep_rows:

	move.l	d1,d2
h6_prep_nulls:
	move.l	d4,(a1)+
	move.l	d4,(a2)+
	move.l	d4,(a3)+
	move.l	d4,(a4)+

	move.l	d5,(a5)+
	move.l	d6,(a6)+

	dbra	d2,h6_prep_nulls

	dbra	d0,h6_prep_rows

	movem.l	(sp)+,d2-d7/a2-a6
	rts


	XDEF	@HAM8_Init_lores
@HAM8_Init_lores:
	movem.l	d2-d7/a2,-(sp)

;		 RGBG  BRGB  R=8 G=C B=4
P7	set	%0111
P8	set	%1101

	move.l	#(P7<<28)|(P7<<24)|(P7<<20)|(P7<<16)|(P7<<12)|(P7<<8)|(P7<<4)|P7,d5
	move.l	#(P8<<28)|(P8<<24)|(P8<<20)|(P8<<16)|(P8<<12)|(P8<<8)|(P8<<4)|P8,d6
	bra.s	ham8cont

	XDEF	@HAM8_Init
@HAM8_Init:				; init from rastport data
	movem.l	d2-d7/a2,-(sp)

;		 BRGB  R=8 G=C B=4
P7	set	%1011
P8	set	%0110

	move.l	#(P7<<28)|(P7<<24)|(P7<<20)|(P7<<16)|(P7<<12)|(P7<<8)|(P7<<4)|P7,d5
	move.l	#(P8<<28)|(P8<<24)|(P8<<20)|(P8<<16)|(P8<<12)|(P8<<8)|(P8<<4)|P8,d6

ham8cont:
	bsr	std_init
prep_rows:

	move.l	d1,d2
prep_nulls:
	move.l	d4,(a1)+
	move.l	d4,(a1)+
	move.l	d4,(a1)+
	move.l	d4,(a1)+
	move.l	d4,(a1)+
	move.l	d4,(a1)+
	dbra	d2,prep_nulls

	move.l	a1,a0
	add.l	a2,a0

	move.l	d1,d2
	lsr.l	#1,d2			; ohoh
prep_cols67:
	move.l	d5,(a1)+
	move.l	d5,(a1)+
	move.l	d6,(a0)+
	move.l	d6,(a0)+
	dbra	d2,prep_cols67

	add.l	a2,a1
	dbra	d0,prep_rows

	movem.l	(sp)+,d2-d7/a2
	rts


	XDEF	@HAM8_Init_x3
@HAM8_Init_x3:				; init from rastport data
	movem.l	d2-d7/a2-a3,-(sp)

	bsr.s	std_init
prep_rows_3:

	move.l	d1,d2
prep_nulls_3:
	move.l	d4,(a1)+
	move.l	d4,(a1)+
	move.l	d4,(a1)+
	move.l	d4,(a1)+
	move.l	d4,(a1)+
	move.l	d4,(a1)+
	dbra	d2,prep_nulls_3

	move.l	a1,a0
	add.l	a2,a0

	lea	(scale3tab1,pc),a3
	move.l	d1,d2
prep_cols67_3:
	move.l	(scale3tab2-scale3tab1,a3),(a0)+
	move.l	(a3)+,(a1)+
	subq.w	#1,d2
	bmi.s	pc67_ready_3
	move.l	(scale3tab2-scale3tab1,a3),(a0)+
	move.l	(a3)+,(a1)+
	subq.w	#1,d2
	bmi.s	pc67_ready_3
	move.l	(scale3tab2-scale3tab1,a3),(a0)+
	move.l	(a3)+,(a1)+
	sub.w	#3*4,a3
	dbra	d2,prep_cols67_3
pc67_ready_3:

	add.l	a2,a1
	dbra	d0,prep_rows_3

	movem.l	(sp)+,d2-d7/a2-a3
	rts

;		 BRG    R=8 G=C B=4
P7	set	%101
P8	set	%011

scale3tab1:
	dc.l	(P7<<29)|(P7<<26)|(P7<<23)|(P7<<20)|(P7<<17)|(P7<<14)|(P7<<11)|(P7<<8)|(P7<<5)|(P7<<2)|(P7>>1)
	dc.l	(P7<<31)|(P7<<28)|(P7<<25)|(P7<<22)|(P7<<19)|(P7<<16)|(P7<<13)|(P7<<10)|(P7<<7)|(P7<<4)|(P7<<1)|(P7>>2)
	dc.l	(P7<<30)|(P7<<27)|(P7<<24)|(P7<<21)|(P7<<18)|(P7<<15)|(P7<<12)|(P7<<9)|(P7<<6)|(P7<<3)|P7
scale3tab2:
	dc.l	(P8<<29)|(P8<<26)|(P8<<23)|(P8<<20)|(P8<<17)|(P8<<14)|(P8<<11)|(P8<<8)|(P8<<5)|(P8<<2)|(P8>>1)
	dc.l	(P8<<31)|(P8<<28)|(P8<<25)|(P8<<22)|(P8<<19)|(P8<<16)|(P8<<13)|(P8<<10)|(P8<<7)|(P8<<4)|(P8<<1)|(P8>>2)
	dc.l	(P8<<30)|(P8<<27)|(P8<<24)|(P8<<21)|(P8<<18)|(P8<<15)|(P8<<12)|(P8<<9)|(P8<<6)|(P8<<3)|P8


std_init:
	move.l	a0,HAM8_rastport
	move.l	(rp_BitMap,a0),a0
	moveq	#0,d1
	move.w	(bm_BytesPerRow,a0),d1
	move.l	(bm_Planes,a0),a1
	move.l	a1,HAM8_screen
	move.l	(bm_Planes+4,a0),a2
	sub.l	a1,a2			; plane delta

	cmp.l	a2,d1
	bgt.s	check_interleaved
	sub.l	a2,a2			; RED ALERT, no interlaved bitmap or strange, weirdo layout!
check_interleaved:
	move.l	a2,HAM8_next
	move.l	d1,HAM8_offset		; we have advanced one complete line when accessing this

	moveq.l	#0,d4

	move.l	a2,d1
	lsr.l	#2,d1			; longs
	subq.l	#1,d1

	move.w	(bm_Rows,a0),d0
	subq.w	#1,d0
	rts

********************************************************

	XDEF	@HAM6_draw_lores	; 	(data=a0, x=d0, y=d1)
@HAM6_draw_lores:
	movem.l	d2-d7/a2-a4,-(sp)

	move.l	_max_y(a4),d7
	cmp.l	d7,d1
	bge.s	h6_max128lores
	move.l	d1,d7		; height
h6_max128lores:
	subq.l	#1,d7

	move.l	_max_x(a4),d1
	cmp.l	d1,d0
	bge.s	h6_max160lores
	move.l	d0,d1		; width
h6_max160lores:

	move.l	d1,d6
	lsr.w	#4,d6
	subq.w	#1,d6		; width in 8 pixel chunks

	and.w	#$fff0,d1	; correct eol of over-large pix
	sub.l	d1,d0

	move.l	d0,d2		; this is some quick fix for 360 pixels wide b0.mpg, which is invalid!
	and.w	#8,d2
	add.w	d2,d0

	add.l	d0,d0		; 1 word per pixel
	move.l	d0,line_offset

	move.l	HAM8_screen,a1
	addq.l	#4,a1
	move.l	(a1)+,a2
	move.l	(a1)+,a3
	move.l	(a1),a4
	move.l	-12(a1),a1

	move.l	HAM8_offset,d2
	move.l	d6,d0
	addq.l	#1,d0
	lsl.l	#2,d0		; writing one long per main loop
	sub.l	d0,d2
	move.l	d2,draw_offset

h6_draw_all_rows_lores:
	move.l	d7,-(sp)
	move.l	d6,d5

h6_blocks_loop_lores:

	moveq	#7,d4
h6_flush_chunkyloop_lores:
	move.w	(a0)+,d7		; RGBG

	rept	4
	add.w	d7,d7
	addx.l	d0,d0
	add.w	d7,d7
	addx.l	d1,d1
	add.w	d7,d7
	addx.l	d2,d2
	add.w	d7,d7
	addx.l	d3,d3
	endr

	dbra	d4,h6_flush_chunkyloop_lores

	move.l	d3,(a1)+
	move.l	d2,(a2)+
	move.l	d1,(a3)+
	move.l	d0,(a4)+

	dbra	d5,h6_blocks_loop_lores

	add.l	line_offset,a0

;	move.l	draw_offset,d1
;	add.l	d1,a1
;	add.l	d1,a2
;	add.l	d1,a3
;	add.l	d1,a4

	move.l	(sp)+,d7
	dbra	d7,h6_draw_all_rows_lores


	movem.l	(sp)+,d2-d7/a2-a4
	rts


********************************************************


	XDEF	@HAM8_draw_lores	; 	(data=a0, x=d0, y=d1)
@HAM8_draw_lores:
	movem.l	d2-d7/a2-a6,-(sp)

	move.l	_max_y(a4),d7
	cmp.l	d7,d1
	bge.s	max128lores
	move.l	d1,d7		; height
max128lores:
	subq.l	#1,d7

	move.l	_max_x(a4),d1
	cmp.l	d1,d0
	bge.s	max160lores
	move.l	d0,d1		; width
max160lores:

	move.l	d1,d6
	lsr.w	#4,d6
	subq.w	#1,d6		; width in 8 pixel chunks

	and.w	#$fff0,d1	; correct eol of over-large pix
	sub.l	d1,d0

	move.l	d0,d2		; this is some quick fix for 360 pixels wide b0.mpg, which is invalid!
	and.w	#8,d2
	add.w	d2,d0

	add.l	d0,d0		; 1 word per pixel
	move.l	d0,line_offset


	move.l	HAM8_screen,a1	; get addr, prep next line
	move.l	HAM8_next,d1
	move.l	a1,a2
	add.l	d1,a2
	move.l	a2,a3
	add.l	d1,a3
	move.l	a3,a4
	add.l	d1,a4
	move.l	a4,a5
	add.l	d1,a5
	move.l	a5,a6
	add.l	d1,a6

	move.l	HAM8_offset,d2
	move.l	d6,d0
	addq.l	#1,d0
	lsl.l	#2,d0		; writing one long per main loop
	sub.l	d0,d2
	move.l	d2,draw_offset


draw_all_rows_lores:
	move.l	d7,-(sp)
	move.l	d6,-(sp)

blocks_loop_lores:
	move.l	d6,-(sp)

	moveq	#7,d6
flush_chunkyloop_lores:
	move.l	(a0)+,d7		; RGBG

	add.l	d7,d7
	addx.l	d0,d0
	add.l	d7,d7
	addx.l	d1,d1
	add.l	d7,d7
	addx.l	d2,d2
	add.l	d7,d7
	addx.l	d3,d3
	add.l	d7,d7
	addx.l	d4,d4
	add.l	d7,d7
	addx.l	d5,d5

	rept	3
	lsl.l	#3,d7
	addx.l	d0,d0
	add.l	d7,d7
	addx.l	d1,d1
	add.l	d7,d7
	addx.l	d2,d2
	add.l	d7,d7
	addx.l	d3,d3
	add.l	d7,d7
	addx.l	d4,d4
	add.l	d7,d7
	addx.l	d5,d5
	endr

	dbra	d6,flush_chunkyloop_lores
	
	move.l	d5,(a1)+
	move.l	d4,(a2)+
	move.l	d3,(a3)+
	move.l	d2,(a4)+
	move.l	d1,(a5)+
	move.l	d0,(a6)+

	move.l	(sp)+,d6
	dbra	d6,blocks_loop_lores

	add.l	line_offset,a0

	move.l	draw_offset,d1
	add.l	d1,a1
	add.l	d1,a2
	add.l	d1,a3
	add.l	d1,a4
	add.l	d1,a5
	add.l	d1,a6

	move.l	(sp)+,d6
	move.l	(sp)+,d7
	dbra	d7,draw_all_rows_lores


	movem.l	(sp)+,d2-d7/a2-a6
	rts


********************************************************


	XDEF	@HAM8_draw_hires	; (data=a0, x=d0, y=d1)
@HAM8_draw_hires:
	movem.l	d2-d7/a2-a6,-(sp)

	move.l	_max_y(a4),d7
	cmp.l	d7,d1
	bge.s	max128
	move.l	d1,d7		; height
max128:	subq.l	#1,d7

	move.l	_max_x(a4),d1
	cmp.l	d1,d0
	bge.s	max160
	move.l	d0,d1		; width
max160:

	move.l	d1,d6
	lsr.w	#3,d6
	subq.w	#1,d6		; width in 8 pixel chunks

	and.w	#$fff8,d1	; correct eol of over-large pix
	sub.l	d1,d0
	lsl.l	#2,d0		; 1 long per pixel
	move.l	d0,line_offset


	move.l	HAM8_screen,a1		; get addr, prep next line
	move.l	HAM8_next,d1
	move.l	a1,a2
	add.l	d1,a2
	move.l	a2,a3
	add.l	d1,a3
	move.l	a3,a4
	add.l	d1,a4
	move.l	a4,a5
	add.l	d1,a5
	move.l	a5,a6
	add.l	d1,a6

	move.l	HAM8_offset,d2
	move.l	d6,d0
	addq.l	#1,d0
	lsl.l	#2,d0
	sub.l	d0,d2
	move.l	d2,draw_offset

draw_all_rows:
	move.l	d7,-(sp)
	move.l	d6,-(sp)

blocks_loop:
	move.l	d6,-(sp)


	moveq	#7,d6
flush_chunkyloop:
	move.l	(a0)+,d7		; BRG0

	add.l	d7,d7
	addx.l	d0,d0
	add.l	d7,d7
	addx.l	d1,d1
	add.l	d7,d7
	addx.l	d2,d2
	add.l	d7,d7
	addx.l	d3,d3
	add.l	d7,d7
	addx.l	d4,d4
	add.l	d7,d7
	addx.l	d5,d5

	rept	2
	lsl.l	#3,d7
	addx.l	d0,d0
	add.l	d7,d7
	addx.l	d1,d1
	add.l	d7,d7
	addx.l	d2,d2
	add.l	d7,d7
	addx.l	d3,d3
	add.l	d7,d7
	addx.l	d4,d4
	add.l	d7,d7
	addx.l	d5,d5
	endr

	add.l	d0,d0
	add.l	d1,d1
	add.l	d2,d2
	add.l	d3,d3
	add.l	d4,d4
	add.l	d5,d5

	dbra	d6,flush_chunkyloop


	move.l	#$11111111,d6
copcol	macro	(reg)
	move.l	\1,d7
	lsr.l	#3,d7			; BRGB
	and.l	d6,d7
	or.l	d7,\1
	endm
	copcol	d0
	copcol	d1
	copcol	d2
	copcol	d3
	copcol	d4
	copcol	d5
	
	move.l	d5,(a1)+
	move.l	d4,(a2)+
	move.l	d3,(a3)+
	move.l	d2,(a4)+
	move.l	d1,(a5)+
	move.l	d0,(a6)+

	move.l	(sp)+,d6
	dbra	d6,blocks_loop

	add.l	line_offset,a0

	move.l	draw_offset,d1
	add.l	d1,a1
	add.l	d1,a2
	add.l	d1,a3
	add.l	d1,a4
	add.l	d1,a5
	add.l	d1,a6

	move.l	(sp)+,d6
	move.l	(sp)+,d7
	dbra	d7,draw_all_rows


	movem.l	(sp)+,d2-d7/a2-a6
	rts


********************************************************


	SECTION	__MERGED,BSS


HAM8_rastport:	ds.l	1			; original rastport struct
HAM8_screen:	ds.l	1			; first bitplane
HAM8_offset:	ds.l	1			; next line in the first bitplane
HAM8_next:	ds.l	1			; next bitplane

draw_offset:	ds.l	1
line_offset:	ds.l	1

is_interleaved: ds.w	1

	END