vx32

Local 9vx git repository for patches.
git clone git://r-36.net/vx32
Log | Files | Refs

draw.c (56885B)


      1 #include "u.h"
      2 #include "lib.h"
      3 #include "draw.h"
      4 #include "memdraw.h"
      5 
      6 int drawdebug;
      7 static int	tablesbuilt;
      8 
      9 /* perfect approximation to NTSC = .299r+.587g+.114b when 0 ≤ r,g,b < 256 */
     10 #define RGB2K(r,g,b)	((156763*(r)+307758*(g)+59769*(b))>>19)
     11 
     12 /*
     13  * For 16-bit values, x / 255 == (t = x+1, (t+(t>>8)) >> 8).
     14  * We add another 127 to round to the nearest value rather
     15  * than truncate.
     16  *
     17  * CALCxy does x bytewise calculations on y input images (x=1,4; y=1,2).
     18  * CALC2x does two parallel 16-bit calculations on y input images (y=1,2).
     19  */
     20 #define CALC11(a, v, tmp) \
     21  (tmp=(a)*(v)+128, (tmp+(tmp>>8))>>8)
     22 #define CALC12(a1, v1, a2, v2, tmp) \
     23  (tmp=(a1)*(v1)+(a2)*(v2)+128, (tmp+(tmp>>8))>>8)
     24 #define MASK 0xFF00FF
     25 
     26 #define CALC21(a, vvuu, tmp) \
     27  (tmp=(a)*(vvuu)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)
     28 
     29 #define CALC41(a, rgba, tmp1, tmp2) \
     30  (CALC21(a, rgba & MASK, tmp1) | \
     31  (CALC21(a, (rgba>>8)&MASK, tmp2)<<8))
     32 
     33 #define CALC22(a1, vvuu1, a2, vvuu2, tmp) \
     34  (tmp=(a1)*(vvuu1)+(a2)*(vvuu2)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)
     35 
     36 #define CALC42(a1, rgba1, a2, rgba2, tmp1, tmp2) \
     37  (CALC22(a1, rgba1 & MASK, a2, rgba2 & MASK, tmp1) | \
     38  (CALC22(a1, (rgba1>>8) & MASK, a2, (rgba2>>8) & MASK, tmp2)<<8))
     39 
     40 static void mktables(void);
     41 typedef int Subdraw(Memdrawparam*);
     42 static Subdraw chardraw, alphadraw, memoptdraw;
     43 
     44 static Memimage*	memones;
     45 static Memimage*	memzeros;
     46 Memimage *memwhite;
     47 Memimage *memblack;
     48 Memimage *memtransparent;
     49 Memimage *memopaque;
     50 
     51 int	__ifmt(Fmt*);
     52 
     53 void
     54 _memimageinit(void)
     55 {
     56 	static int didinit = 0;
     57 
     58 	if(didinit)
     59 		return;
     60 
     61 	didinit = 1;
     62 
     63 
     64 	mktables();
     65 	_memmkcmap();
     66 
     67 	fmtinstall('R', Rfmt); 
     68 	fmtinstall('P', Pfmt);
     69 	fmtinstall('b', __ifmt);
     70 
     71 	memones = allocmemimage(Rect(0,0,1,1), GREY1);
     72 	memones->flags |= Frepl;
     73 	memones->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
     74 	*byteaddr(memones, ZP) = ~0;
     75 
     76 	memzeros = allocmemimage(Rect(0,0,1,1), GREY1);
     77 	memzeros->flags |= Frepl;
     78 	memzeros->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
     79 	*byteaddr(memzeros, ZP) = 0;
     80 
     81 	if(memones == nil || memzeros == nil)
     82 		assert(0 /*cannot initialize memimage library */);	/* RSC BUG */
     83 
     84 	memwhite = memones;
     85 	memblack = memzeros;
     86 	memopaque = memones;
     87 	memtransparent = memzeros;
     88 }
     89 
     90 
     91 #define DBG if(0)
     92 Memdrawparam*
     93 _memimagedrawsetup(Memimage *dst, Rectangle r, Memimage *src, Point p0, Memimage *mask, Point p1, int op)
     94 {
     95 	static Memdrawparam par;
     96 
     97 	if(mask == nil)
     98 		mask = memopaque;
     99 
    100 DBG	print("memimagedraw %p/%luX %R @ %p %p/%luX %P %p/%luX %P... ", dst, dst->chan, r, dst->data->bdata, src, src->chan, p0, mask, mask->chan, p1);
    101 
    102 	if(drawclip(dst, &r, src, &p0, mask, &p1, &par.sr, &par.mr) == 0){
    103 //		if(drawdebug)
    104 //			iprint("empty clipped rectangle\n");
    105 		return nil;
    106 	}
    107 
    108 	if(op < Clear || op > SoverD){
    109 //		if(drawdebug)
    110 //			iprint("op out of range: %d\n", op);
    111 		return nil;
    112 	}
    113 
    114 	par.op = op;
    115 	par.dst = dst;
    116 	par.r = r;
    117 	par.src = src;
    118 	/* par.sr set by drawclip */
    119 	par.mask = mask;
    120 	/* par.mr set by drawclip */
    121 
    122 	par.state = 0;
    123 	if(src->flags&Frepl){
    124 		par.state |= Replsrc;
    125 		if(Dx(src->r)==1 && Dy(src->r)==1){
    126 			par.sval = pixelbits(src, src->r.min);
    127 			par.state |= Simplesrc;
    128 			par.srgba = _imgtorgba(src, par.sval);
    129 			par.sdval = _rgbatoimg(dst, par.srgba);
    130 			if((par.srgba&0xFF) == 0xFF)
    131 				par.state |= Fullsrc;
    132 			if((par.srgba&0xFF) == 0 && (op&DoutS)){
    133 //				if (drawdebug) iprint("fill with transparent source\n");
    134 				return nil;	/* no-op successfully handled */
    135 			}
    136 		}
    137 	}
    138 
    139 	if(mask->flags & Frepl){
    140 		par.state |= Replmask;
    141 		if(Dx(mask->r)==1 && Dy(mask->r)==1){
    142 			par.mval = pixelbits(mask, mask->r.min);
    143 			if(par.mval == 0 && (op&DoutS)){
    144 //				if(drawdebug) iprint("fill with zero mask\n");
    145 				return nil;	/* no-op successfully handled */
    146 			}
    147 			par.state |= Simplemask;
    148 			if(par.mval == ~0)
    149 				par.state |= Fullmask;
    150 			par.mrgba = _imgtorgba(mask, par.mval);
    151 		}
    152 	}
    153 
    154 //	if(drawdebug)
    155 //		iprint("dr %R sr %R mr %R...", r, par.sr, par.mr);
    156 DBG print("draw dr %R sr %R mr %R %lux\n", r, par.sr, par.mr, par.state);
    157 
    158 	return &par;
    159 }
    160 
    161 void
    162 _memimagedraw(Memdrawparam *par)
    163 {
    164 	if(par == nil)
    165 		return;
    166 
    167 	/*
    168 	 * Now that we've clipped the parameters down to be consistent, we 
    169 	 * simply try sub-drawing routines in order until we find one that was able
    170 	 * to handle us.  If the sub-drawing routine returns zero, it means it was
    171 	 * unable to satisfy the request, so we do not return.
    172 	 */
    173 
    174 	/*
    175 	 * Hardware support.  Each video driver provides this function,
    176 	 * which checks to see if there is anything it can help with.
    177 	 * There could be an if around this checking to see if dst is in video memory.
    178 	 */
    179 DBG print("test hwdraw\n");
    180 	if(hwdraw(par)){
    181 //if(drawdebug) iprint("hw handled\n");
    182 DBG print("hwdraw handled\n");
    183 		return;
    184 	}
    185 	/*
    186 	 * Optimizations using memmove and memset.
    187 	 */
    188 DBG print("test memoptdraw\n");
    189 	if(memoptdraw(par)){
    190 //if(drawdebug) iprint("memopt handled\n");
    191 DBG print("memopt handled\n");
    192 		return;
    193 	}
    194 
    195 	/*
    196 	 * Character drawing.
    197 	 * Solid source color being painted through a boolean mask onto a high res image.
    198 	 */
    199 DBG print("test chardraw\n");
    200 	if(chardraw(par)){
    201 //if(drawdebug) iprint("chardraw handled\n");
    202 DBG print("chardraw handled\n");
    203 		return;
    204 	}
    205 
    206 	/*
    207 	 * General calculation-laden case that does alpha for each pixel.
    208 	 */
    209 DBG print("do alphadraw\n");
    210 	alphadraw(par);
    211 //if(drawdebug) iprint("alphadraw handled\n");
    212 DBG print("alphadraw handled\n");
    213 }
    214 #undef DBG
    215 
    216 /*
    217  * Clip the destination rectangle further based on the properties of the 
    218  * source and mask rectangles.  Once the destination rectangle is properly
    219  * clipped, adjust the source and mask rectangles to be the same size.
    220  * Then if source or mask is replicated, move its clipped rectangle
    221  * so that its minimum point falls within the repl rectangle.
    222  *
    223  * Return zero if the final rectangle is null.
    224  */
    225 int
    226 drawclip(Memimage *dst, Rectangle *r, Memimage *src, Point *p0, Memimage *mask, Point *p1, Rectangle *sr, Rectangle *mr)
    227 {
    228 	Point rmin, delta;
    229 	int splitcoords;
    230 	Rectangle omr;
    231 
    232 	if(r->min.x>=r->max.x || r->min.y>=r->max.y)
    233 		return 0;
    234 	splitcoords = (p0->x!=p1->x) || (p0->y!=p1->y);
    235 	/* clip to destination */
    236 	rmin = r->min;
    237 	if(!rectclip(r, dst->r) || !rectclip(r, dst->clipr))
    238 		return 0;
    239 	/* move mask point */
    240 	p1->x += r->min.x-rmin.x;
    241 	p1->y += r->min.y-rmin.y;
    242 	/* move source point */
    243 	p0->x += r->min.x-rmin.x;
    244 	p0->y += r->min.y-rmin.y;
    245 	/* map destination rectangle into source */
    246 	sr->min = *p0;
    247 	sr->max.x = p0->x+Dx(*r);
    248 	sr->max.y = p0->y+Dy(*r);
    249 	/* sr is r in source coordinates; clip to source */
    250 	if(!(src->flags&Frepl) && !rectclip(sr, src->r))
    251 		return 0;
    252 	if(!rectclip(sr, src->clipr))
    253 		return 0;
    254 	/* compute and clip rectangle in mask */
    255 	if(splitcoords){
    256 		/* move mask point with source */
    257 		p1->x += sr->min.x-p0->x;
    258 		p1->y += sr->min.y-p0->y;
    259 		mr->min = *p1;
    260 		mr->max.x = p1->x+Dx(*sr);
    261 		mr->max.y = p1->y+Dy(*sr);
    262 		omr = *mr;
    263 		/* mr is now rectangle in mask; clip it */
    264 		if(!(mask->flags&Frepl) && !rectclip(mr, mask->r))
    265 			return 0;
    266 		if(!rectclip(mr, mask->clipr))
    267 			return 0;
    268 		/* reflect any clips back to source */
    269 		sr->min.x += mr->min.x-omr.min.x;
    270 		sr->min.y += mr->min.y-omr.min.y;
    271 		sr->max.x += mr->max.x-omr.max.x;
    272 		sr->max.y += mr->max.y-omr.max.y;
    273 		*p1 = mr->min;
    274 	}else{
    275 		if(!(mask->flags&Frepl) && !rectclip(sr, mask->r))
    276 			return 0;
    277 		if(!rectclip(sr, mask->clipr))
    278 			return 0;
    279 		*p1 = sr->min;
    280 	}
    281 
    282 	/* move source clipping back to destination */
    283 	delta.x = r->min.x - p0->x;
    284 	delta.y = r->min.y - p0->y;
    285 	r->min.x = sr->min.x + delta.x;
    286 	r->min.y = sr->min.y + delta.y;
    287 	r->max.x = sr->max.x + delta.x;
    288 	r->max.y = sr->max.y + delta.y;
    289 
    290 	/* move source rectangle so sr->min is in src->r */
    291 	if(src->flags&Frepl) {
    292 		delta.x = drawreplxy(src->r.min.x, src->r.max.x, sr->min.x) - sr->min.x;
    293 		delta.y = drawreplxy(src->r.min.y, src->r.max.y, sr->min.y) - sr->min.y;
    294 		sr->min.x += delta.x;
    295 		sr->min.y += delta.y;
    296 		sr->max.x += delta.x;
    297 		sr->max.y += delta.y;
    298 	}
    299 	*p0 = sr->min;
    300 
    301 	/* move mask point so it is in mask->r */
    302 	*p1 = drawrepl(mask->r, *p1);
    303 	mr->min = *p1;
    304 	mr->max.x = p1->x+Dx(*sr);
    305 	mr->max.y = p1->y+Dy(*sr);
    306 
    307 	assert(Dx(*sr) == Dx(*mr) && Dx(*mr) == Dx(*r));
    308 	assert(Dy(*sr) == Dy(*mr) && Dy(*mr) == Dy(*r));
    309 	assert(ptinrect(*p0, src->r));
    310 	assert(ptinrect(*p1, mask->r));
    311 	assert(ptinrect(r->min, dst->r));
    312 
    313 	return 1;
    314 }
    315 
    316 /*
    317  * Conversion tables.
    318  */
    319 static uchar replbit[1+8][256];		/* replbit[x][y] is the replication of the x-bit quantity y to 8-bit depth */
    320 static uchar conv18[256][8];		/* conv18[x][y] is the yth pixel in the depth-1 pixel x */
    321 static uchar conv28[256][4];		/* ... */
    322 static uchar conv48[256][2];
    323 
    324 /*
    325  * bitmap of how to replicate n bits to fill 8, for 1 ≤ n ≤ 8.
    326  * the X's are where to put the bottom (ones) bit of the n-bit pattern.
    327  * only the top 8 bits of the result are actually used.
    328  * (the lower 8 bits are needed to get bits in the right place
    329  * when n is not a divisor of 8.)
    330  *
    331  * Should check to see if its easier to just refer to replmul than
    332  * use the precomputed values in replbit.  On PCs it may well
    333  * be; on machines with slow multiply instructions it probably isn't.
    334  */
    335 #define a ((((((((((((((((0
    336 #define X *2+1)
    337 #define _ *2)
    338 static int replmul[1+8] = {
    339 	0,
    340 	a X X X X X X X X X X X X X X X X,
    341 	a _ X _ X _ X _ X _ X _ X _ X _ X,
    342 	a _ _ X _ _ X _ _ X _ _ X _ _ X _,
    343 	a _ _ _ X _ _ _ X _ _ _ X _ _ _ X,
    344 	a _ _ _ _ X _ _ _ _ X _ _ _ _ X _,
    345 	a _ _ _ _ _ X _ _ _ _ _ X _ _ _ _, 
    346 	a _ _ _ _ _ _ X _ _ _ _ _ _ X _ _,
    347 	a _ _ _ _ _ _ _ X _ _ _ _ _ _ _ X,
    348 };
    349 #undef a
    350 #undef X
    351 #undef _
    352 
    353 static void
    354 mktables(void)
    355 {
    356 	int i, j, mask, sh, small;
    357 		
    358 	if(tablesbuilt)
    359 		return;
    360 
    361 	fmtinstall('R', Rfmt);
    362 	fmtinstall('P', Pfmt);
    363 	tablesbuilt = 1;
    364 
    365 	/* bit replication up to 8 bits */
    366 	for(i=0; i<256; i++){
    367 		for(j=0; j<=8; j++){	/* j <= 8 [sic] */
    368 			small = i & ((1<<j)-1);
    369 			replbit[j][i] = (small*replmul[j])>>8;
    370 		}
    371 	}
    372 
    373 	/* bit unpacking up to 8 bits, only powers of 2 */
    374 	for(i=0; i<256; i++){
    375 		for(j=0, sh=7, mask=1; j<8; j++, sh--)
    376 			conv18[i][j] = replbit[1][(i>>sh)&mask];
    377 
    378 		for(j=0, sh=6, mask=3; j<4; j++, sh-=2)
    379 			conv28[i][j] = replbit[2][(i>>sh)&mask];
    380 
    381 		for(j=0, sh=4, mask=15; j<2; j++, sh-=4)
    382 			conv48[i][j] = replbit[4][(i>>sh)&mask];
    383 	}
    384 }
    385 
    386 static uchar ones = 0xff;
    387 
    388 /*
    389  * General alpha drawing case.  Can handle anything.
    390  */
    391 typedef struct	Buffer	Buffer;
    392 struct Buffer {
    393 	/* used by most routines */
    394 	uchar	*red;
    395 	uchar	*grn;
    396 	uchar	*blu;
    397 	uchar	*alpha;
    398 	uchar	*grey;
    399 	uint32	*rgba;
    400 	int	delta;	/* number of bytes to add to pointer to get next pixel to the right */
    401 
    402 	/* used by boolcalc* for mask data */
    403 	uchar	*m;		/* ptr to mask data r.min byte; like p->bytermin */
    404 	int		mskip;	/* no. of left bits to skip in *m */
    405 	uchar	*bm;		/* ptr to mask data img->r.min byte; like p->bytey0s */
    406 	int		bmskip;	/* no. of left bits to skip in *bm */
    407 	uchar	*em;		/* ptr to mask data img->r.max.x byte; like p->bytey0e */
    408 	int		emskip;	/* no. of right bits to skip in *em */
    409 };
    410 
    411 typedef struct	Param	Param;
    412 typedef Buffer	Readfn(Param*, uchar*, int);
    413 typedef void	Writefn(Param*, uchar*, Buffer);
    414 typedef Buffer	Calcfn(Buffer, Buffer, Buffer, int, int, int);
    415 
    416 enum {
    417 	MAXBCACHE = 16
    418 };
    419 
    420 /* giant rathole to customize functions with */
    421 struct Param {
    422 	Readfn	*replcall;
    423 	Readfn	*greymaskcall;	
    424 	Readfn	*convreadcall;
    425 	Writefn	*convwritecall;
    426 
    427 	Memimage *img;
    428 	Rectangle	r;
    429 	int	dx;	/* of r */
    430 	int	needbuf;
    431 	int	convgrey;
    432 	int	alphaonly;
    433 
    434 	uchar	*bytey0s;		/* byteaddr(Pt(img->r.min.x, img->r.min.y)) */
    435 	uchar	*bytermin;	/* byteaddr(Pt(r.min.x, img->r.min.y)) */
    436 	uchar	*bytey0e;		/* byteaddr(Pt(img->r.max.x, img->r.min.y)) */
    437 	int		bwidth;
    438 
    439 	int	replcache;	/* if set, cache buffers */
    440 	Buffer	bcache[MAXBCACHE];
    441 	uint32	bfilled;
    442 	uchar	*bufbase;
    443 	int	bufoff;
    444 	int	bufdelta;
    445 
    446 	int	dir;
    447 
    448 	int	convbufoff;
    449 	uchar	*convbuf;
    450 	Param	*convdpar;
    451 	int	convdx;
    452 };
    453 
    454 static Readfn	greymaskread, replread, readptr;
    455 static Writefn	nullwrite;
    456 static Calcfn	alphacalc0, alphacalc14, alphacalc2810, alphacalc3679, alphacalc5, alphacalc11, alphacalcS;
    457 static Calcfn	boolcalc14, boolcalc236789, boolcalc1011;
    458 
    459 static Readfn*	readfn(Memimage*);
    460 static Readfn*	readalphafn(Memimage*);
    461 static Writefn*	writefn(Memimage*);
    462 
    463 static Calcfn*	boolcopyfn(Memimage*, Memimage*);
    464 static Readfn*	convfn(Memimage*, Param*, Memimage*, Param*, int*);
    465 
    466 static Calcfn *alphacalc[Ncomp] = 
    467 {
    468 	alphacalc0,		/* Clear */
    469 	alphacalc14,		/* DoutS */
    470 	alphacalc2810,		/* SoutD */
    471 	alphacalc3679,		/* DxorS */
    472 	alphacalc14,		/* DinS */
    473 	alphacalc5,		/* D */
    474 	alphacalc3679,		/* DatopS */
    475 	alphacalc3679,		/* DoverS */
    476 	alphacalc2810,		/* SinD */
    477 	alphacalc3679,		/* SatopD */
    478 	alphacalc2810,		/* S */
    479 	alphacalc11,		/* SoverD */
    480 };
    481 
    482 static Calcfn *boolcalc[Ncomp] =
    483 {
    484 	alphacalc0,		/* Clear */
    485 	boolcalc14,		/* DoutS */
    486 	boolcalc236789,		/* SoutD */
    487 	boolcalc236789,		/* DxorS */
    488 	boolcalc14,		/* DinS */
    489 	alphacalc5,		/* D */
    490 	boolcalc236789,		/* DatopS */
    491 	boolcalc236789,		/* DoverS */
    492 	boolcalc236789,		/* SinD */
    493 	boolcalc236789,		/* SatopD */
    494 	boolcalc1011,		/* S */
    495 	boolcalc1011,		/* SoverD */
    496 };
    497 
    498 /*
    499  * Avoid standard Lock, QLock so that can be used in kernel.
    500  */
    501 typedef struct Dbuf Dbuf;
    502 struct Dbuf
    503 {
    504 	uchar *p;
    505 	int n;
    506 	Param spar, mpar, dpar;
    507 	int inuse;
    508 };
    509 static Dbuf dbuf[10];
    510 extern int _tas(int*);
    511 
    512 static Dbuf*
    513 allocdbuf(void)
    514 {
    515 	int i;
    516 
    517 	for(i=0; i<nelem(dbuf); i++){
    518 		if(dbuf[i].inuse)
    519 			continue;
    520 		if(!_tas(&dbuf[i].inuse))
    521 			return &dbuf[i];
    522 	}
    523 	return nil;
    524 }
    525 
    526 static void
    527 getparam(Param *p, Memimage *img, Rectangle r, int convgrey, int needbuf, int *ndrawbuf)
    528 {
    529 	int nbuf;
    530 
    531 	memset(p, 0, sizeof *p);
    532 
    533 	p->img = img;
    534 	p->r = r;
    535 	p->dx = Dx(r);
    536 	p->needbuf = needbuf;
    537 	p->convgrey = convgrey;
    538 
    539 	assert(img->r.min.x <= r.min.x && r.min.x < img->r.max.x);
    540 
    541 	p->bytey0s = byteaddr(img, Pt(img->r.min.x, img->r.min.y));
    542 	p->bytermin = byteaddr(img, Pt(r.min.x, img->r.min.y));
    543 	p->bytey0e = byteaddr(img, Pt(img->r.max.x, img->r.min.y));
    544 	p->bwidth = sizeof(uint32)*img->width;
    545 
    546 	assert(p->bytey0s <= p->bytermin && p->bytermin <= p->bytey0e);
    547 
    548 	if(p->r.min.x == p->img->r.min.x)
    549 		assert(p->bytermin == p->bytey0s);
    550 
    551 	nbuf = 1;
    552 	if((img->flags&Frepl) && Dy(img->r) <= MAXBCACHE && Dy(img->r) < Dy(r)){
    553 		p->replcache = 1;
    554 		nbuf = Dy(img->r);
    555 	}
    556 	p->bufdelta = 4*p->dx;
    557 	p->bufoff = *ndrawbuf;
    558 	*ndrawbuf += p->bufdelta*nbuf;
    559 }
    560 
    561 static void
    562 clipy(Memimage *img, int *y)
    563 {
    564 	int dy;
    565 
    566 	dy = Dy(img->r);
    567 	if(*y == dy)
    568 		*y = 0;
    569 	else if(*y == -1)
    570 		*y = dy-1;
    571 	assert(0 <= *y && *y < dy);
    572 }
    573 
    574 static void
    575 dumpbuf(char *s, Buffer b, int n)
    576 {
    577 	int i;
    578 	uchar *p;
    579 	
    580 	print("%s", s);
    581 	for(i=0; i<n; i++){
    582 		print(" ");
    583 		if((p=b.grey)){
    584 			print(" k%.2uX", *p);
    585 			b.grey += b.delta;
    586 		}else{	
    587 			if((p=b.red)){
    588 				print(" r%.2uX", *p);
    589 				b.red += b.delta;
    590 			}
    591 			if((p=b.grn)){
    592 				print(" g%.2uX", *p);
    593 				b.grn += b.delta;
    594 			}
    595 			if((p=b.blu)){
    596 				print(" b%.2uX", *p);
    597 				b.blu += b.delta;
    598 			}
    599 		}
    600 		if((p=b.alpha) != &ones){
    601 			print(" α%.2uX", *p);
    602 			b.alpha += b.delta;
    603 		}
    604 	}
    605 	print("\n");
    606 }
    607 
    608 /*
    609  * For each scan line, we expand the pixels from source, mask, and destination
    610  * into byte-aligned red, green, blue, alpha, and grey channels.  If buffering is not
    611  * needed and the channels were already byte-aligned (grey8, rgb24, rgba32, rgb32),
    612  * the readers need not copy the data: they can simply return pointers to the data.
    613  * If the destination image is grey and the source is not, it is converted using the NTSC
    614  * formula.
    615  *
    616  * Once we have all the channels, we call either rgbcalc or greycalc, depending on 
    617  * whether the destination image is color.  This is allowed to overwrite the dst buffer (perhaps
    618  * the actual data, perhaps a copy) with its result.  It should only overwrite the dst buffer
    619  * with the same format (i.e. red bytes with red bytes, etc.)  A new buffer is returned from
    620  * the calculator, and that buffer is passed to a function to write it to the destination.
    621  * If the buffer is already pointing at the destination, the writing function is a no-op.
    622  */
    623 #define DBG if(0)
    624 static int
    625 alphadraw(Memdrawparam *par)
    626 {
    627 	int isgrey, starty, endy, op;
    628 	int needbuf, dsty, srcy, masky;
    629 	int y, dir, dx, dy, ndrawbuf;
    630 	uchar *drawbuf;
    631 	Buffer bsrc, bdst, bmask;
    632 	Readfn *rdsrc, *rdmask, *rddst;
    633 	Calcfn *calc;
    634 	Writefn *wrdst;
    635 	Memimage *src, *mask, *dst;
    636 	Rectangle r, sr, mr;
    637 	Dbuf *z;
    638 
    639 	r = par->r;
    640 	dx = Dx(r);
    641 	dy = Dy(r);
    642 
    643 	z = allocdbuf();
    644 	if(z == nil)
    645 		return 0;
    646 
    647 	src = par->src;
    648 	mask = par->mask;	
    649 	dst = par->dst;
    650 	sr = par->sr;
    651 	mr = par->mr;
    652 	op = par->op;
    653 
    654 	isgrey = dst->flags&Fgrey;
    655 
    656 	/*
    657 	 * Buffering when src and dst are the same bitmap is sufficient but not 
    658 	 * necessary.  There are stronger conditions we could use.  We could
    659 	 * check to see if the rectangles intersect, and if simply moving in the
    660 	 * correct y direction can avoid the need to buffer.
    661 	 */
    662 	needbuf = (src->data == dst->data);
    663 
    664 	ndrawbuf = 0;
    665 	getparam(&z->spar, src, sr, isgrey, needbuf, &ndrawbuf);
    666 	getparam(&z->dpar, dst, r, isgrey, needbuf, &ndrawbuf);
    667 	getparam(&z->mpar, mask, mr, 0, needbuf, &ndrawbuf);
    668 
    669 	dir = (needbuf && byteaddr(dst, r.min) > byteaddr(src, sr.min)) ? -1 : 1;
    670 	z->spar.dir = z->mpar.dir = z->dpar.dir = dir;
    671 
    672 	/*
    673 	 * If the mask is purely boolean, we can convert from src to dst format
    674 	 * when we read src, and then just copy it to dst where the mask tells us to.
    675 	 * This requires a boolean (1-bit grey) mask and lack of a source alpha channel.
    676 	 *
    677 	 * The computation is accomplished by assigning the function pointers as follows:
    678 	 *	rdsrc - read and convert source into dst format in a buffer
    679 	 * 	rdmask - convert mask to bytes, set pointer to it
    680 	 * 	rddst - fill with pointer to real dst data, but do no reads
    681 	 *	calc - copy src onto dst when mask says to.
    682 	 *	wrdst - do nothing
    683 	 * This is slightly sleazy, since things aren't doing exactly what their names say,
    684 	 * but it avoids a fair amount of code duplication to make this a case here
    685 	 * rather than have a separate booldraw.
    686 	 */
    687 //if(drawdebug) iprint("flag %lud mchan %lux=?%x dd %d\n", src->flags&Falpha, mask->chan, GREY1, dst->depth);
    688 	if(!(src->flags&Falpha) && mask->chan == GREY1 && dst->depth >= 8 && op == SoverD){
    689 //if(drawdebug) iprint("boolcopy...");
    690 		rdsrc = convfn(dst, &z->dpar, src, &z->spar, &ndrawbuf);
    691 		rddst = readptr;
    692 		rdmask = readfn(mask);
    693 		calc = boolcopyfn(dst, mask);
    694 		wrdst = nullwrite;
    695 	}else{
    696 		/* usual alphadraw parameter fetching */
    697 		rdsrc = readfn(src);
    698 		rddst = readfn(dst);
    699 		wrdst = writefn(dst);
    700 		calc = alphacalc[op];
    701 
    702 		/*
    703 		 * If there is no alpha channel, we'll ask for a grey channel
    704 		 * and pretend it is the alpha.
    705 		 */
    706 		if(mask->flags&Falpha){
    707 			rdmask = readalphafn(mask);
    708 			z->mpar.alphaonly = 1;
    709 		}else{
    710 			z->mpar.greymaskcall = readfn(mask);
    711 			z->mpar.convgrey = 1;
    712 			rdmask = greymaskread;
    713 
    714 			/*
    715 			 * Should really be above, but then boolcopyfns would have
    716 			 * to deal with bit alignment, and I haven't written that.
    717 			 *
    718 			 * This is a common case for things like ellipse drawing.
    719 			 * When there's no alpha involved and the mask is boolean,
    720 			 * we can avoid all the division and multiplication.
    721 			 */
    722 			if(mask->chan == GREY1 && !(src->flags&Falpha))
    723 				calc = boolcalc[op];
    724 			else if(op == SoverD && !(src->flags&Falpha))
    725 				calc = alphacalcS;
    726 		}
    727 	}
    728 
    729 	/*
    730 	 * If the image has a small enough repl rectangle,
    731 	 * we can just read each line once and cache them.
    732 	 */
    733 	if(z->spar.replcache){
    734 		z->spar.replcall = rdsrc;
    735 		rdsrc = replread;
    736 	}
    737 	if(z->mpar.replcache){
    738 		z->mpar.replcall = rdmask;
    739 		rdmask = replread;
    740 	}
    741 
    742 	if(z->n < ndrawbuf){
    743 		free(z->p);
    744 		if((z->p = mallocz(ndrawbuf, 0)) == nil){
    745 			z->inuse = 0;
    746 			return 0;
    747 		}
    748 		z->n = ndrawbuf;
    749 	}
    750 	drawbuf = z->p;
    751 
    752 	/*
    753 	 * Before we were saving only offsets from drawbuf in the parameter
    754 	 * structures; now that drawbuf has been grown to accomodate us,
    755 	 * we can fill in the pointers.
    756 	 */
    757 	z->spar.bufbase = drawbuf+z->spar.bufoff;
    758 	z->mpar.bufbase = drawbuf+z->mpar.bufoff;
    759 	z->dpar.bufbase = drawbuf+z->dpar.bufoff;
    760 	z->spar.convbuf = drawbuf+z->spar.convbufoff;
    761 
    762 	if(dir == 1){
    763 		starty = 0;
    764 		endy = dy;
    765 	}else{
    766 		starty = dy-1;
    767 		endy = -1;
    768 	}
    769 
    770 	/*
    771 	 * srcy, masky, and dsty are offsets from the top of their
    772 	 * respective Rectangles.  they need to be contained within
    773 	 * the rectangles, so clipy can keep them there without division.
    774  	 */
    775 	srcy = (starty + sr.min.y - src->r.min.y)%Dy(src->r);
    776 	masky = (starty + mr.min.y - mask->r.min.y)%Dy(mask->r);
    777 	dsty = starty + r.min.y - dst->r.min.y;
    778 
    779 	assert(0 <= srcy && srcy < Dy(src->r));
    780 	assert(0 <= masky && masky < Dy(mask->r));
    781 	assert(0 <= dsty && dsty < Dy(dst->r));
    782 
    783 	for(y=starty; y!=endy; y+=dir, srcy+=dir, masky+=dir, dsty+=dir){
    784 		clipy(src, &srcy);
    785 		clipy(dst, &dsty);
    786 		clipy(mask, &masky);
    787 
    788 		bsrc = rdsrc(&z->spar, z->spar.bufbase, srcy);
    789 DBG print("[");
    790 		bmask = rdmask(&z->mpar, z->mpar.bufbase, masky);
    791 DBG print("]\n");
    792 		bdst = rddst(&z->dpar, z->dpar.bufbase, dsty);
    793 DBG		dumpbuf("src", bsrc, dx);
    794 DBG		dumpbuf("mask", bmask, dx);
    795 DBG		dumpbuf("dst", bdst, dx);
    796 		bdst = calc(bdst, bsrc, bmask, dx, isgrey, op);
    797 		wrdst(&z->dpar, z->dpar.bytermin+dsty*z->dpar.bwidth, bdst);
    798 	}
    799 
    800 	z->inuse = 0;
    801 	return 1;
    802 }
    803 #undef DBG
    804 
    805 static Buffer
    806 alphacalc0(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
    807 {
    808 	USED(grey);
    809 	USED(op);
    810 	USED(b1);
    811 	USED(b2);
    812 	memset(bdst.rgba, 0, dx*bdst.delta);
    813 	return bdst;
    814 }
    815 
    816 /*
    817  * Do the channels in the buffers match enough
    818  * that we can do word-at-a-time operations
    819  * on the pixels?
    820  */
    821 static int
    822 chanmatch(Buffer *bdst, Buffer *bsrc)
    823 {
    824 	uchar *drgb, *srgb;
    825 	
    826 	/*
    827 	 * first, r, g, b must be in the same place
    828 	 * in the rgba word.
    829 	 */
    830 	drgb = (uchar*)bdst->rgba;
    831 	srgb = (uchar*)bsrc->rgba;
    832 	if(bdst->red - drgb != bsrc->red - srgb
    833 	|| bdst->blu - drgb != bsrc->blu - srgb
    834 	|| bdst->grn - drgb != bsrc->grn - srgb)
    835 		return 0;
    836 	
    837 	/*
    838 	 * that implies alpha is in the same place,
    839 	 * if it is there at all (it might be == &ones).
    840 	 * if the destination is &ones, we can scribble
    841 	 * over the rgba slot just fine.
    842 	 */
    843 	if(bdst->alpha == &ones)
    844 		return 1;
    845 	
    846 	/*
    847 	 * if the destination is not ones but the src is,
    848 	 * then the simultaneous calculation will use
    849 	 * bogus bytes from the src's rgba.  no good.
    850 	 */
    851 	if(bsrc->alpha == &ones)
    852 		return 0;
    853 	
    854 	/*
    855 	 * otherwise, alphas are in the same place.
    856 	 */
    857 	return 1;
    858 }
    859 
    860 static Buffer
    861 alphacalc14(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
    862 {
    863 	Buffer obdst;
    864 	int fd, sadelta;
    865 	int i, sa, ma, q;
    866 	uint32 t, t1;
    867 
    868 	obdst = bdst;
    869 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
    870 	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
    871 
    872 	for(i=0; i<dx; i++){
    873 		sa = *bsrc.alpha;
    874 		ma = *bmask.alpha;
    875 		fd = CALC11(sa, ma, t);
    876 		if(op == DoutS)
    877 			fd = 255-fd;
    878 
    879 		if(grey){
    880 			*bdst.grey = CALC11(fd, *bdst.grey, t);
    881 			bsrc.grey += bsrc.delta;
    882 			bdst.grey += bdst.delta;
    883 		}else{
    884 			if(q){
    885 				*bdst.rgba = CALC41(fd, *bdst.rgba, t, t1);
    886 				bsrc.rgba++;
    887 				bdst.rgba++;
    888 				bsrc.alpha += sadelta;
    889 				bmask.alpha += bmask.delta;
    890 				continue;
    891 			}
    892 			*bdst.red = CALC11(fd, *bdst.red, t);
    893 			*bdst.grn = CALC11(fd, *bdst.grn, t);
    894 			*bdst.blu = CALC11(fd, *bdst.blu, t);
    895 			bsrc.red += bsrc.delta;
    896 			bsrc.blu += bsrc.delta;
    897 			bsrc.grn += bsrc.delta;
    898 			bdst.red += bdst.delta;
    899 			bdst.blu += bdst.delta;
    900 			bdst.grn += bdst.delta;
    901 		}
    902 		if(bdst.alpha != &ones){
    903 			*bdst.alpha = CALC11(fd, *bdst.alpha, t);
    904 			bdst.alpha += bdst.delta;
    905 		}
    906 		bmask.alpha += bmask.delta;
    907 		bsrc.alpha += sadelta;
    908 	}
    909 	return obdst;
    910 }
    911 
    912 static Buffer
    913 alphacalc2810(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
    914 {
    915 	Buffer obdst;
    916 	int fs, sadelta;
    917 	int i, ma, da, q;
    918 	uint32 t, t1;
    919 
    920 	obdst = bdst;
    921 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
    922 	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
    923 
    924 	for(i=0; i<dx; i++){
    925 		ma = *bmask.alpha;
    926 		da = *bdst.alpha;
    927 		if(op == SoutD)
    928 			da = 255-da;
    929 		fs = ma;
    930 		if(op != S)
    931 			fs = CALC11(fs, da, t);
    932 
    933 		if(grey){
    934 			*bdst.grey = CALC11(fs, *bsrc.grey, t);
    935 			bsrc.grey += bsrc.delta;
    936 			bdst.grey += bdst.delta;
    937 		}else{
    938 			if(q){
    939 				*bdst.rgba = CALC41(fs, *bsrc.rgba, t, t1);
    940 				bsrc.rgba++;
    941 				bdst.rgba++;
    942 				bmask.alpha += bmask.delta;
    943 				bdst.alpha += bdst.delta;
    944 				continue;
    945 			}
    946 			*bdst.red = CALC11(fs, *bsrc.red, t);
    947 			*bdst.grn = CALC11(fs, *bsrc.grn, t);
    948 			*bdst.blu = CALC11(fs, *bsrc.blu, t);
    949 			bsrc.red += bsrc.delta;
    950 			bsrc.blu += bsrc.delta;
    951 			bsrc.grn += bsrc.delta;
    952 			bdst.red += bdst.delta;
    953 			bdst.blu += bdst.delta;
    954 			bdst.grn += bdst.delta;
    955 		}
    956 		if(bdst.alpha != &ones){
    957 			*bdst.alpha = CALC11(fs, *bsrc.alpha, t);
    958 			bdst.alpha += bdst.delta;
    959 		}
    960 		bmask.alpha += bmask.delta;
    961 		bsrc.alpha += sadelta;
    962 	}
    963 	return obdst;
    964 }
    965 
    966 static Buffer
    967 alphacalc3679(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
    968 {
    969 	Buffer obdst;
    970 	int fs, fd, sadelta;
    971 	int i, sa, ma, da, q;
    972 	uint32 t, t1;
    973 
    974 	obdst = bdst;
    975 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
    976 	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
    977 
    978 	for(i=0; i<dx; i++){
    979 		sa = *bsrc.alpha;
    980 		ma = *bmask.alpha;
    981 		da = *bdst.alpha;
    982 		if(op == SatopD)
    983 			fs = CALC11(ma, da, t);
    984 		else
    985 			fs = CALC11(ma, 255-da, t);
    986 		if(op == DoverS)
    987 			fd = 255;
    988 		else{
    989 			fd = CALC11(sa, ma, t);
    990 			if(op != DatopS)
    991 				fd = 255-fd;
    992 		}
    993 
    994 		if(grey){
    995 			*bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);
    996 			bsrc.grey += bsrc.delta;
    997 			bdst.grey += bdst.delta;
    998 		}else{
    999 			if(q){
   1000 				*bdst.rgba = CALC42(fs, *bsrc.rgba, fd, *bdst.rgba, t, t1);
   1001 				bsrc.rgba++;
   1002 				bdst.rgba++;
   1003 				bsrc.alpha += sadelta;
   1004 				bmask.alpha += bmask.delta;
   1005 				bdst.alpha += bdst.delta;
   1006 				continue;
   1007 			}
   1008 			*bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);
   1009 			*bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);
   1010 			*bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);
   1011 			bsrc.red += bsrc.delta;
   1012 			bsrc.blu += bsrc.delta;
   1013 			bsrc.grn += bsrc.delta;
   1014 			bdst.red += bdst.delta;
   1015 			bdst.blu += bdst.delta;
   1016 			bdst.grn += bdst.delta;
   1017 		}
   1018 		if(bdst.alpha != &ones){
   1019 			*bdst.alpha = CALC12(fs, sa, fd, da, t);
   1020 			bdst.alpha += bdst.delta;
   1021 		}
   1022 		bmask.alpha += bmask.delta;
   1023 		bsrc.alpha += sadelta;
   1024 	}
   1025 	return obdst;
   1026 }
   1027 
   1028 static Buffer
   1029 alphacalc5(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
   1030 {
   1031 	USED(dx);
   1032 	USED(grey);
   1033 	USED(op);
   1034 	USED(b1);
   1035 	USED(b2);
   1036 	return bdst;
   1037 }
   1038 
   1039 static Buffer
   1040 alphacalc11(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
   1041 {
   1042 	Buffer obdst;
   1043 	int fd, sadelta;
   1044 	int i, sa, ma, q;
   1045 	uint32 t, t1;
   1046 
   1047 	USED(op);
   1048 	obdst = bdst;
   1049 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
   1050 	q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
   1051 
   1052 	for(i=0; i<dx; i++){
   1053 		sa = *bsrc.alpha;
   1054 		ma = *bmask.alpha;
   1055 		fd = 255-CALC11(sa, ma, t);
   1056 
   1057 		if(grey){
   1058 			*bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);
   1059 			bsrc.grey += bsrc.delta;
   1060 			bdst.grey += bdst.delta;
   1061 		}else{
   1062 			if(q){
   1063 				*bdst.rgba = CALC42(ma, *bsrc.rgba, fd, *bdst.rgba, t, t1);
   1064 				bsrc.rgba++;
   1065 				bdst.rgba++;
   1066 				bsrc.alpha += sadelta;
   1067 				bmask.alpha += bmask.delta;
   1068 				continue;
   1069 			}
   1070 			*bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);
   1071 			*bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);
   1072 			*bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);
   1073 			bsrc.red += bsrc.delta;
   1074 			bsrc.blu += bsrc.delta;
   1075 			bsrc.grn += bsrc.delta;
   1076 			bdst.red += bdst.delta;
   1077 			bdst.blu += bdst.delta;
   1078 			bdst.grn += bdst.delta;
   1079 		}
   1080 		if(bdst.alpha != &ones){
   1081 			*bdst.alpha = CALC12(ma, sa, fd, *bdst.alpha, t);
   1082 			bdst.alpha += bdst.delta;
   1083 		}
   1084 		bmask.alpha += bmask.delta;
   1085 		bsrc.alpha += sadelta;
   1086 	}
   1087 	return obdst;
   1088 }
   1089 
   1090 /*
   1091 not used yet
   1092 source and mask alpha 1
   1093 static Buffer
   1094 alphacalcS0(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
   1095 {
   1096 	Buffer obdst;
   1097 	int i;
   1098 
   1099 	USED(op);
   1100 	obdst = bdst;
   1101 	if(bsrc.delta == bdst.delta){
   1102 		memmove(bdst.rgba, bsrc.rgba, dx*bdst.delta);
   1103 		return obdst;
   1104 	}
   1105 	for(i=0; i<dx; i++){
   1106 		if(grey){
   1107 			*bdst.grey = *bsrc.grey;
   1108 			bsrc.grey += bsrc.delta;
   1109 			bdst.grey += bdst.delta;
   1110 		}else{
   1111 			*bdst.red = *bsrc.red;
   1112 			*bdst.grn = *bsrc.grn;
   1113 			*bdst.blu = *bsrc.blu;
   1114 			bsrc.red += bsrc.delta;
   1115 			bsrc.blu += bsrc.delta;
   1116 			bsrc.grn += bsrc.delta;
   1117 			bdst.red += bdst.delta;
   1118 			bdst.blu += bdst.delta;
   1119 			bdst.grn += bdst.delta;
   1120 		}
   1121 		if(bdst.alpha != &ones){
   1122 			*bdst.alpha = 255;
   1123 			bdst.alpha += bdst.delta;
   1124 		}
   1125 	}
   1126 	return obdst;
   1127 }
   1128 */
   1129 
   1130 /* source alpha 1 */
   1131 static Buffer
   1132 alphacalcS(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
   1133 {
   1134 	Buffer obdst;
   1135 	int fd;
   1136 	int i, ma;
   1137 	uint32 t;
   1138 
   1139 	USED(op);
   1140 	obdst = bdst;
   1141 
   1142 	for(i=0; i<dx; i++){
   1143 		ma = *bmask.alpha;
   1144 		fd = 255-ma;
   1145 
   1146 		if(grey){
   1147 			*bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);
   1148 			bsrc.grey += bsrc.delta;
   1149 			bdst.grey += bdst.delta;
   1150 		}else{
   1151 			*bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);
   1152 			*bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);
   1153 			*bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);
   1154 			bsrc.red += bsrc.delta;
   1155 			bsrc.blu += bsrc.delta;
   1156 			bsrc.grn += bsrc.delta;
   1157 			bdst.red += bdst.delta;
   1158 			bdst.blu += bdst.delta;
   1159 			bdst.grn += bdst.delta;
   1160 		}
   1161 		if(bdst.alpha != &ones){
   1162 			*bdst.alpha = ma+CALC11(fd, *bdst.alpha, t);
   1163 			bdst.alpha += bdst.delta;
   1164 		}
   1165 		bmask.alpha += bmask.delta;
   1166 	}
   1167 	return obdst;
   1168 }
   1169 
   1170 static Buffer
   1171 boolcalc14(Buffer bdst, Buffer b1, Buffer bmask, int dx, int grey, int op)
   1172 {
   1173 	Buffer obdst;
   1174 	int i, ma, zero;
   1175 
   1176 	USED(b1);
   1177 
   1178 	obdst = bdst;
   1179 
   1180 	for(i=0; i<dx; i++){
   1181 		ma = *bmask.alpha;
   1182 		zero = ma ? op == DoutS : op == DinS;
   1183 
   1184 		if(grey){
   1185 			if(zero)
   1186 				*bdst.grey = 0;
   1187 			bdst.grey += bdst.delta;
   1188 		}else{
   1189 			if(zero)
   1190 				*bdst.red = *bdst.grn = *bdst.blu = 0;
   1191 			bdst.red += bdst.delta;
   1192 			bdst.blu += bdst.delta;
   1193 			bdst.grn += bdst.delta;
   1194 		}
   1195 		bmask.alpha += bmask.delta;
   1196 		if(bdst.alpha != &ones){
   1197 			if(zero)
   1198 				*bdst.alpha = 0;
   1199 			bdst.alpha += bdst.delta;
   1200 		}
   1201 	}
   1202 	return obdst;
   1203 }
   1204 
   1205 static Buffer
   1206 boolcalc236789(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
   1207 {
   1208 	Buffer obdst;
   1209 	int fs, fd;
   1210 	int i, ma, da, zero;
   1211 	uint32 t;
   1212 
   1213 	obdst = bdst;
   1214 	zero = !(op&1);
   1215 
   1216 	for(i=0; i<dx; i++){
   1217 		ma = *bmask.alpha;
   1218 		da = *bdst.alpha;
   1219 		fs = da;
   1220 		if(op&2)
   1221 			fs = 255-da;
   1222 		fd = 0;
   1223 		if(op&4)
   1224 			fd = 255;
   1225 
   1226 		if(grey){
   1227 			if(ma)
   1228 				*bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);
   1229 			else if(zero)
   1230 				*bdst.grey = 0;
   1231 			bsrc.grey += bsrc.delta;
   1232 			bdst.grey += bdst.delta;
   1233 		}else{
   1234 			if(ma){
   1235 				*bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);
   1236 				*bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);
   1237 				*bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);
   1238 			}
   1239 			else if(zero)
   1240 				*bdst.red = *bdst.grn = *bdst.blu = 0;
   1241 			bsrc.red += bsrc.delta;
   1242 			bsrc.blu += bsrc.delta;
   1243 			bsrc.grn += bsrc.delta;
   1244 			bdst.red += bdst.delta;
   1245 			bdst.blu += bdst.delta;
   1246 			bdst.grn += bdst.delta;
   1247 		}
   1248 		bmask.alpha += bmask.delta;
   1249 		if(bdst.alpha != &ones){
   1250 			if(ma)
   1251 				*bdst.alpha = fs+CALC11(fd, da, t);
   1252 			else if(zero)
   1253 				*bdst.alpha = 0;
   1254 			bdst.alpha += bdst.delta;
   1255 		}
   1256 	}
   1257 	return obdst;
   1258 }
   1259 
   1260 static Buffer
   1261 boolcalc1011(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
   1262 {
   1263 	Buffer obdst;
   1264 	int i, ma, zero;
   1265 
   1266 	obdst = bdst;
   1267 	zero = !(op&1);
   1268 
   1269 	for(i=0; i<dx; i++){
   1270 		ma = *bmask.alpha;
   1271 
   1272 		if(grey){
   1273 			if(ma)
   1274 				*bdst.grey = *bsrc.grey;
   1275 			else if(zero)
   1276 				*bdst.grey = 0;
   1277 			bsrc.grey += bsrc.delta;
   1278 			bdst.grey += bdst.delta;
   1279 		}else{
   1280 			if(ma){
   1281 				*bdst.red = *bsrc.red;
   1282 				*bdst.grn = *bsrc.grn;
   1283 				*bdst.blu = *bsrc.blu;
   1284 			}
   1285 			else if(zero)
   1286 				*bdst.red = *bdst.grn = *bdst.blu = 0;
   1287 			bsrc.red += bsrc.delta;
   1288 			bsrc.blu += bsrc.delta;
   1289 			bsrc.grn += bsrc.delta;
   1290 			bdst.red += bdst.delta;
   1291 			bdst.blu += bdst.delta;
   1292 			bdst.grn += bdst.delta;
   1293 		}
   1294 		bmask.alpha += bmask.delta;
   1295 		if(bdst.alpha != &ones){
   1296 			if(ma)
   1297 				*bdst.alpha = 255;
   1298 			else if(zero)
   1299 				*bdst.alpha = 0;
   1300 			bdst.alpha += bdst.delta;
   1301 		}
   1302 	}
   1303 	return obdst;
   1304 }
   1305 /*
   1306  * Replicated cached scan line read.  Call the function listed in the Param,
   1307  * but cache the result so that for replicated images we only do the work once.
   1308  */
   1309 static Buffer
   1310 replread(Param *p, uchar *s, int y)
   1311 {
   1312 	Buffer *b;
   1313 
   1314 	USED(s);
   1315 	b = &p->bcache[y];
   1316 	if((p->bfilled & (1<<y)) == 0){
   1317 		p->bfilled |= 1<<y;
   1318 		*b = p->replcall(p, p->bufbase+y*p->bufdelta, y);
   1319 	}
   1320 	return *b;
   1321 }
   1322 
   1323 /*
   1324  * Alpha reading function that simply relabels the grey pointer.
   1325  */
   1326 static Buffer
   1327 greymaskread(Param *p, uchar *buf, int y)
   1328 {
   1329 	Buffer b;
   1330 
   1331 	b = p->greymaskcall(p, buf, y);
   1332 	b.alpha = b.grey;
   1333 	return b;
   1334 }
   1335 
   1336 #define DBG if(0)
   1337 static Buffer
   1338 readnbit(Param *p, uchar *buf, int y)
   1339 {
   1340 	Buffer b;
   1341 	Memimage *img;
   1342 	uchar *repl, *r, *w, *ow, bits;
   1343 	memset(&b, 0, sizeof b); // shut up gcc
   1344 	int i, n, sh, depth, x, dx, npack, nbits;
   1345 
   1346 	b.rgba = (uint32*)buf;
   1347 	b.grey = w = buf;
   1348 	b.red = b.blu = b.grn = w;
   1349 	b.alpha = &ones;
   1350 	b.delta = 1;
   1351 
   1352 	dx = p->dx;
   1353 	img = p->img;
   1354 	depth = img->depth;
   1355 	repl = &replbit[depth][0];
   1356 	npack = 8/depth;
   1357 	sh = 8-depth;
   1358 
   1359 	/* copy from p->r.min.x until end of repl rectangle */
   1360 	x = p->r.min.x;
   1361 	n = dx;
   1362 	if(n > p->img->r.max.x - x)
   1363 		n = p->img->r.max.x - x;
   1364 
   1365 	r = p->bytermin + y*p->bwidth;
   1366 DBG print("readnbit dx %d %p=%p+%d*%d, *r=%d fetch %d ", dx, r, p->bytermin, y, p->bwidth, *r, n);
   1367 	bits = *r++;
   1368 	nbits = 8;
   1369 	if((i=x&(npack-1))){
   1370 DBG print("throwaway %d...", i);
   1371 		bits <<= depth*i;
   1372 		nbits -= depth*i;
   1373 	}
   1374 	for(i=0; i<n; i++){
   1375 		if(nbits == 0){
   1376 DBG print("(%.2ux)...", *r);
   1377 			bits = *r++;
   1378 			nbits = 8;
   1379 		}
   1380 		*w++ = repl[bits>>sh];
   1381 DBG print("bit %x...", repl[bits>>sh]);
   1382 		bits <<= depth;
   1383 		nbits -= depth;
   1384 	}
   1385 	dx -= n;
   1386 	if(dx == 0)
   1387 		return b;
   1388 
   1389 	assert(x+i == p->img->r.max.x);
   1390 
   1391 	/* copy from beginning of repl rectangle until where we were before. */
   1392 	x = p->img->r.min.x;
   1393 	n = dx;
   1394 	if(n > p->r.min.x - x)
   1395 		n = p->r.min.x - x;
   1396 
   1397 	r = p->bytey0s + y*p->bwidth;
   1398 DBG print("x=%d r=%p...", x, r);
   1399 	bits = *r++;
   1400 	nbits = 8;
   1401 	if((i=x&(npack-1))){
   1402 		bits <<= depth*i;
   1403 		nbits -= depth*i;
   1404 	}
   1405 DBG print("nbits=%d...", nbits);
   1406 	for(i=0; i<n; i++){
   1407 		if(nbits == 0){
   1408 			bits = *r++;
   1409 			nbits = 8;
   1410 		}
   1411 		*w++ = repl[bits>>sh];
   1412 DBG print("bit %x...", repl[bits>>sh]);
   1413 		bits <<= depth;
   1414 		nbits -= depth;
   1415 DBG print("bits %x nbits %d...", bits, nbits);
   1416 	}
   1417 	dx -= n;
   1418 	if(dx == 0)
   1419 		return b;
   1420 
   1421 	assert(dx > 0);
   1422 	/* now we have exactly one full scan line: just replicate the buffer itself until we are done */
   1423 	ow = buf;
   1424 	while(dx--)
   1425 		*w++ = *ow++;
   1426 
   1427 	return b;
   1428 }
   1429 #undef DBG
   1430 
   1431 #define DBG if(0)
   1432 static void
   1433 writenbit(Param *p, uchar *w, Buffer src)
   1434 {
   1435 	uchar *r;
   1436 	uint32 bits;
   1437 	int i, sh, depth, npack, nbits, x, ex;
   1438 
   1439 	assert(src.grey != nil && src.delta == 1);
   1440 
   1441 	x = p->r.min.x;
   1442 	ex = x+p->dx;
   1443 	depth = p->img->depth;
   1444 	npack = 8/depth;
   1445 
   1446 	i=x&(npack-1);
   1447 	bits = i ? (*w >> (8-depth*i)) : 0;
   1448 	nbits = depth*i;
   1449 	sh = 8-depth;
   1450 	r = src.grey;
   1451 
   1452 	for(; x<ex; x++){
   1453 		bits <<= depth;
   1454 DBG print(" %x", *r);
   1455 		bits |= (*r++ >> sh);
   1456 		nbits += depth;
   1457 		if(nbits == 8){
   1458 			*w++ = bits;
   1459 			nbits = 0;
   1460 		}
   1461 	}
   1462 
   1463 	if(nbits){
   1464 		sh = 8-nbits;
   1465 		bits <<= sh;
   1466 		bits |= *w & ((1<<sh)-1);
   1467 		*w = bits;
   1468 	}
   1469 DBG print("\n");
   1470 	return;
   1471 }
   1472 #undef DBG
   1473 
   1474 static Buffer
   1475 readcmap(Param *p, uchar *buf, int y)
   1476 {
   1477 	Buffer b;
   1478 	int a, convgrey, copyalpha, dx, i, m;
   1479 	uchar *q, *cmap, *begin, *end, *r, *w;
   1480 	memset(&b, 0, sizeof b); // shut up gcc
   1481 
   1482 	begin = p->bytey0s + y*p->bwidth;
   1483 	r = p->bytermin + y*p->bwidth;
   1484 	end = p->bytey0e + y*p->bwidth;
   1485 	cmap = p->img->cmap->cmap2rgb;
   1486 	convgrey = p->convgrey;
   1487 	copyalpha = (p->img->flags&Falpha) ? 1 : 0;
   1488 
   1489 	w = buf;
   1490 	dx = p->dx;
   1491 	if(copyalpha){
   1492 		b.alpha = buf++;
   1493 		a = p->img->shift[CAlpha]/8;
   1494 		m = p->img->shift[CMap]/8;
   1495 		for(i=0; i<dx; i++){
   1496 			*w++ = r[a];
   1497 			q = cmap+r[m]*3;
   1498 			r += 2;
   1499 			if(r == end)
   1500 				r = begin;
   1501 			if(convgrey){
   1502 				*w++ = RGB2K(q[0], q[1], q[2]);
   1503 			}else{
   1504 				*w++ = q[2];	/* blue */
   1505 				*w++ = q[1];	/* green */
   1506 				*w++ = q[0];	/* red */
   1507 			}
   1508 		}
   1509 	}else{
   1510 		b.alpha = &ones;
   1511 		for(i=0; i<dx; i++){
   1512 			q = cmap+*r++*3;
   1513 			if(r == end)
   1514 				r = begin;
   1515 			if(convgrey){
   1516 				*w++ = RGB2K(q[0], q[1], q[2]);
   1517 			}else{
   1518 				*w++ = q[2];	/* blue */
   1519 				*w++ = q[1];	/* green */
   1520 				*w++ = q[0];	/* red */
   1521 			}
   1522 		}
   1523 	}
   1524 
   1525 	b.rgba = (uint32*)(buf-copyalpha);
   1526 
   1527 	if(convgrey){
   1528 		b.grey = buf;
   1529 		b.red = b.blu = b.grn = buf;
   1530 		b.delta = 1+copyalpha;
   1531 	}else{
   1532 		b.blu = buf;
   1533 		b.grn = buf+1;
   1534 		b.red = buf+2;
   1535 		b.grey = nil;
   1536 		b.delta = 3+copyalpha;
   1537 	}
   1538 	return b;
   1539 }
   1540 
   1541 static void
   1542 writecmap(Param *p, uchar *w, Buffer src)
   1543 {
   1544 	uchar *cmap, *red, *grn, *blu;
   1545 	int i, dx, delta;
   1546 
   1547 	cmap = p->img->cmap->rgb2cmap;
   1548 	
   1549 	delta = src.delta;
   1550 	red= src.red;
   1551 	grn = src.grn;
   1552 	blu = src.blu;
   1553 
   1554 	dx = p->dx;
   1555 	for(i=0; i<dx; i++, red+=delta, grn+=delta, blu+=delta)
   1556 		*w++ = cmap[(*red>>4)*256+(*grn>>4)*16+(*blu>>4)];
   1557 }
   1558 
   1559 #define DBG if(0)
   1560 static Buffer
   1561 readbyte(Param *p, uchar *buf, int y)
   1562 {
   1563 	Buffer b;
   1564 	Memimage *img;
   1565 	int dx, isgrey, convgrey, alphaonly, copyalpha, i, nb;
   1566 	uchar *begin, *end, *r, *w, *rrepl, *grepl, *brepl, *arepl, *krepl;
   1567 	uchar ured, ugrn, ublu;
   1568 	uint32 u;
   1569 
   1570 	img = p->img;
   1571 	begin = p->bytey0s + y*p->bwidth;
   1572 	r = p->bytermin + y*p->bwidth;
   1573 	end = p->bytey0e + y*p->bwidth;
   1574 
   1575 	w = buf;
   1576 	dx = p->dx;
   1577 	nb = img->depth/8;
   1578 
   1579 	convgrey = p->convgrey;	/* convert rgb to grey */
   1580 	isgrey = img->flags&Fgrey;
   1581 	alphaonly = p->alphaonly;
   1582 	copyalpha = (img->flags&Falpha) ? 1 : 0;
   1583 
   1584 DBG print("copyalpha %d alphaonly %d convgrey %d isgrey %d\n", copyalpha, alphaonly, convgrey, isgrey);
   1585 	/* if we can, avoid processing everything */
   1586 	if(!(img->flags&Frepl) && !convgrey && (img->flags&Fbytes)){
   1587 		memset(&b, 0, sizeof b);
   1588 		if(p->needbuf){
   1589 			memmove(buf, r, dx*nb);
   1590 			r = buf;
   1591 		}
   1592 		b.rgba = (uint32*)r;
   1593 		if(copyalpha)
   1594 			b.alpha = r+img->shift[CAlpha]/8;
   1595 		else
   1596 			b.alpha = &ones;
   1597 		if(isgrey){
   1598 			b.grey = r+img->shift[CGrey]/8;
   1599 			b.red = b.grn = b.blu = b.grey;
   1600 		}else{
   1601 			b.red = r+img->shift[CRed]/8;
   1602 			b.grn = r+img->shift[CGreen]/8;
   1603 			b.blu = r+img->shift[CBlue]/8;
   1604 		}
   1605 		b.delta = nb;
   1606 		return b;
   1607 	}
   1608 
   1609 DBG print("2\n");
   1610 	rrepl = replbit[img->nbits[CRed]];
   1611 	grepl = replbit[img->nbits[CGreen]];
   1612 	brepl = replbit[img->nbits[CBlue]];
   1613 	arepl = replbit[img->nbits[CAlpha]];
   1614 	krepl = replbit[img->nbits[CGrey]];
   1615 
   1616 	for(i=0; i<dx; i++){
   1617 		u = r[0] | (r[1]<<8) | (r[2]<<16) | (r[3]<<24);
   1618 		if(copyalpha) {
   1619 			*w++ = arepl[(u>>img->shift[CAlpha]) & img->mask[CAlpha]];
   1620 DBG print("a %x\n", w[-1]);
   1621 		}
   1622 
   1623 		if(isgrey)
   1624 			*w++ = krepl[(u >> img->shift[CGrey]) & img->mask[CGrey]];
   1625 		else if(!alphaonly){
   1626 			ured = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
   1627 			ugrn = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
   1628 			ublu = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
   1629 			if(convgrey){
   1630 DBG print("g %x %x %x\n", ured, ugrn, ublu);
   1631 				*w++ = RGB2K(ured, ugrn, ublu);
   1632 DBG print("%x\n", w[-1]);
   1633 			}else{
   1634 				*w++ = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
   1635 				*w++ = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
   1636 				*w++ = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
   1637 			}
   1638 		}
   1639 		r += nb;
   1640 		if(r == end)
   1641 			r = begin;
   1642 	}
   1643 	
   1644 	b.alpha = copyalpha ? buf : &ones;
   1645 	b.rgba = (uint32*)buf;
   1646 	if(alphaonly){
   1647 		b.red = b.grn = b.blu = b.grey = nil;
   1648 		if(!copyalpha)
   1649 			b.rgba = nil;
   1650 		b.delta = 1;
   1651 	}else if(isgrey || convgrey){
   1652 		b.grey = buf+copyalpha;
   1653 		b.red = b.grn = b.blu = buf+copyalpha;
   1654 		b.delta = copyalpha+1;
   1655 DBG print("alpha %x grey %x\n", b.alpha ? *b.alpha : 0xFF, *b.grey);
   1656 	}else{
   1657 		b.blu = buf+copyalpha;
   1658 		b.grn = buf+copyalpha+1;
   1659 		b.grey = nil;
   1660 		b.red = buf+copyalpha+2;
   1661 		b.delta = copyalpha+3;
   1662 	}
   1663 	return b;
   1664 }
   1665 #undef DBG
   1666 
   1667 #define DBG if(0)
   1668 static void
   1669 writebyte(Param *p, uchar *w, Buffer src)
   1670 {
   1671 	Memimage *img;
   1672 	int i, isalpha, isgrey, nb, delta, dx, adelta;
   1673 	uchar ff, *red, *grn, *blu, *grey, *alpha;
   1674 	uint32 u, mask;
   1675 
   1676 	img = p->img;
   1677 
   1678 	red = src.red;
   1679 	grn = src.grn;
   1680 	blu = src.blu;
   1681 	alpha = src.alpha;
   1682 	delta = src.delta;
   1683 	grey = src.grey;
   1684 	dx = p->dx;
   1685 
   1686 	nb = img->depth/8;
   1687 	mask = (nb==4) ? 0 : ~((1<<img->depth)-1);
   1688 
   1689 	isalpha = img->flags&Falpha;
   1690 	isgrey = img->flags&Fgrey;
   1691 	adelta = src.delta;
   1692 
   1693 	if(isalpha && (alpha == nil || alpha == &ones)){
   1694 		ff = 0xFF;
   1695 		alpha = &ff;
   1696 		adelta = 0;
   1697 	}
   1698 
   1699 	for(i=0; i<dx; i++){
   1700 		u = w[0] | (w[1]<<8) | (w[2]<<16) | (w[3]<<24);
   1701 DBG print("u %.8lux...", u);
   1702 		u &= mask;
   1703 DBG print("&mask %.8lux...", u);
   1704 		if(isgrey){
   1705 			u |= ((*grey >> (8-img->nbits[CGrey])) & img->mask[CGrey]) << img->shift[CGrey];
   1706 DBG print("|grey %.8lux...", u);
   1707 			grey += delta;
   1708 		}else{
   1709 			u |= ((*red >> (8-img->nbits[CRed])) & img->mask[CRed]) << img->shift[CRed];
   1710 			u |= ((*grn >> (8-img->nbits[CGreen])) & img->mask[CGreen]) << img->shift[CGreen];
   1711 			u |= ((*blu >> (8-img->nbits[CBlue])) & img->mask[CBlue]) << img->shift[CBlue];
   1712 			red += delta;
   1713 			grn += delta;
   1714 			blu += delta;
   1715 DBG print("|rgb %.8lux...", u);
   1716 		}
   1717 
   1718 		if(isalpha){
   1719 			u |= ((*alpha >> (8-img->nbits[CAlpha])) & img->mask[CAlpha]) << img->shift[CAlpha];
   1720 			alpha += adelta;
   1721 DBG print("|alpha %.8lux...", u);
   1722 		}
   1723 
   1724 		w[0] = u;
   1725 		w[1] = u>>8;
   1726 		w[2] = u>>16;
   1727 		w[3] = u>>24;
   1728 		w += nb;
   1729 	}
   1730 }
   1731 #undef DBG
   1732 
   1733 static Readfn*
   1734 readfn(Memimage *img)
   1735 {
   1736 	if(img->depth < 8)
   1737 		return readnbit;
   1738 	if(img->nbits[CMap] == 8)
   1739 		return readcmap;
   1740 	return readbyte;
   1741 }
   1742 
   1743 static Readfn*
   1744 readalphafn(Memimage *m)
   1745 {
   1746 	USED(m);
   1747 	return readbyte;
   1748 }
   1749 
   1750 static Writefn*
   1751 writefn(Memimage *img)
   1752 {
   1753 	if(img->depth < 8)
   1754 		return writenbit;
   1755 	if(img->chan == CMAP8)
   1756 		return writecmap;
   1757 	return writebyte;
   1758 }
   1759 
   1760 static void
   1761 nullwrite(Param *p, uchar *s, Buffer b)
   1762 {
   1763 	USED(p);
   1764 	USED(s);
   1765 	USED(b);
   1766 }
   1767 
   1768 static Buffer
   1769 readptr(Param *p, uchar *s, int y)
   1770 {
   1771 	Buffer b;
   1772 	uchar *q;
   1773 
   1774 	memset(&b, 0, sizeof b); // shut up gcc
   1775 	USED(s);
   1776 	q = p->bytermin + y*p->bwidth;
   1777 	b.red = q;	/* ptr to data */
   1778 	b.grn = b.blu = b.grey = b.alpha = nil;
   1779 	b.rgba = (uint32*)q;
   1780 	b.delta = p->img->depth/8;
   1781 	return b;
   1782 }
   1783 
   1784 static Buffer
   1785 boolmemmove(Buffer bdst, Buffer bsrc, Buffer b1, int dx, int i, int o)
   1786 {
   1787 	USED(i);
   1788 	USED(o);
   1789 	USED(b1);
   1790 	USED(bsrc);
   1791 	memmove(bdst.red, bsrc.red, dx*bdst.delta);
   1792 	return bdst;
   1793 }
   1794 
   1795 static Buffer
   1796 boolcopy8(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
   1797 {
   1798 	uchar *m, *r, *w, *ew;
   1799 
   1800 	USED(i);
   1801 	USED(o);
   1802 	m = bmask.grey;
   1803 	w = bdst.red;
   1804 	r = bsrc.red;
   1805 	ew = w+dx;
   1806 	for(; w < ew; w++,r++)
   1807 		if(*m++)
   1808 			*w = *r;
   1809 	return bdst;	/* not used */
   1810 }
   1811 
   1812 static Buffer
   1813 boolcopy16(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
   1814 {
   1815 	uchar *m;
   1816 	ushort *r, *w, *ew;
   1817 
   1818 	USED(i);
   1819 	USED(o);
   1820 	m = bmask.grey;
   1821 	w = (ushort*)bdst.red;
   1822 	r = (ushort*)bsrc.red;
   1823 	ew = w+dx;
   1824 	for(; w < ew; w++,r++)
   1825 		if(*m++)
   1826 			*w = *r;
   1827 	return bdst;	/* not used */
   1828 }
   1829 
   1830 static Buffer
   1831 boolcopy24(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
   1832 {
   1833 	uchar *m;
   1834 	uchar *r, *w, *ew;
   1835 
   1836 	USED(i);
   1837 	USED(o);
   1838 	m = bmask.grey;
   1839 	w = bdst.red;
   1840 	r = bsrc.red;
   1841 	ew = w+dx*3;
   1842 	while(w < ew){
   1843 		if(*m++){
   1844 			*w++ = *r++;
   1845 			*w++ = *r++;
   1846 			*w++ = *r++;
   1847 		}else{
   1848 			w += 3;
   1849 			r += 3;
   1850 		}
   1851 	}
   1852 	return bdst;	/* not used */
   1853 }
   1854 
   1855 static Buffer
   1856 boolcopy32(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
   1857 {
   1858 	uchar *m;
   1859 	uint32 *r, *w, *ew;
   1860 
   1861 	USED(i);
   1862 	USED(o);
   1863 	m = bmask.grey;
   1864 	w = (uint32*)bdst.red;
   1865 	r = (uint32*)bsrc.red;
   1866 	ew = w+dx;
   1867 	for(; w < ew; w++,r++)
   1868 		if(*m++)
   1869 			*w = *r;
   1870 	return bdst;	/* not used */
   1871 }
   1872 
   1873 static Buffer
   1874 genconv(Param *p, uchar *buf, int y)
   1875 {
   1876 	Buffer b;
   1877 	int nb;
   1878 	uchar *r, *w, *ew;
   1879 
   1880 	/* read from source into RGB format in convbuf */
   1881 	b = p->convreadcall(p, p->convbuf, y);
   1882 
   1883 	/* write RGB format into dst format in buf */
   1884 	p->convwritecall(p->convdpar, buf, b);
   1885 
   1886 	if(p->convdx){
   1887 		nb = p->convdpar->img->depth/8;
   1888 		r = buf;
   1889 		w = buf+nb*p->dx;
   1890 		ew = buf+nb*p->convdx;
   1891 		while(w<ew)
   1892 			*w++ = *r++;
   1893 	}
   1894 
   1895 	b.red = buf;
   1896 	b.blu = b.grn = b.grey = b.alpha = nil;
   1897 	b.rgba = (uint32*)buf;
   1898 	b.delta = 0;
   1899 	
   1900 	return b;
   1901 }
   1902 
   1903 static Readfn*
   1904 convfn(Memimage *dst, Param *dpar, Memimage *src, Param *spar, int *ndrawbuf)
   1905 {
   1906 	if(dst->chan == src->chan && !(src->flags&Frepl)){
   1907 //if(drawdebug) iprint("readptr...");
   1908 		return readptr;
   1909 	}
   1910 
   1911 	if(dst->chan==CMAP8 && (src->chan==GREY1||src->chan==GREY2||src->chan==GREY4)){
   1912 		/* cheat because we know the replicated value is exactly the color map entry. */
   1913 //if(drawdebug) iprint("Readnbit...");
   1914 		return readnbit;
   1915 	}
   1916 
   1917 	spar->convreadcall = readfn(src);
   1918 	spar->convwritecall = writefn(dst);
   1919 	spar->convdpar = dpar;
   1920 
   1921 	/* allocate a conversion buffer */
   1922 	spar->convbufoff = *ndrawbuf;
   1923 	*ndrawbuf += spar->dx*4;
   1924 
   1925 	if(spar->dx > Dx(spar->img->r)){
   1926 		spar->convdx = spar->dx;
   1927 		spar->dx = Dx(spar->img->r);
   1928 	}
   1929 
   1930 //if(drawdebug) iprint("genconv...");
   1931 	return genconv;
   1932 }
   1933 
   1934 uint32
   1935 _pixelbits(Memimage *i, Point pt)
   1936 {
   1937 	uchar *p;
   1938 	uint32 val;
   1939 	int off, bpp, npack;
   1940 
   1941 	val = 0;
   1942 	p = byteaddr(i, pt);
   1943 	switch(bpp=i->depth){
   1944 	case 1:
   1945 	case 2:
   1946 	case 4:
   1947 		npack = 8/bpp;
   1948 		off = pt.x%npack;
   1949 		val = p[0] >> bpp*(npack-1-off);
   1950 		val &= (1<<bpp)-1;
   1951 		break;
   1952 	case 8:
   1953 		val = p[0];
   1954 		break;
   1955 	case 16:
   1956 		val = p[0]|(p[1]<<8);
   1957 		break;
   1958 	case 24:
   1959 		val = p[0]|(p[1]<<8)|(p[2]<<16);
   1960 		break;
   1961 	case 32:
   1962 		val = p[0]|(p[1]<<8)|(p[2]<<16)|(p[3]<<24);
   1963 		break;
   1964 	}
   1965 	while(bpp<32){
   1966 		val |= val<<bpp;
   1967 		bpp *= 2;
   1968 	}
   1969 	return val;
   1970 }
   1971 
   1972 static Calcfn*
   1973 boolcopyfn(Memimage *img, Memimage *mask)
   1974 {
   1975 	if(mask->flags&Frepl && Dx(mask->r)==1 && Dy(mask->r)==1 && pixelbits(mask, mask->r.min)==~0)
   1976 		return boolmemmove;
   1977 
   1978 	switch(img->depth){
   1979 	case 8:
   1980 		return boolcopy8;
   1981 	case 16:
   1982 		return boolcopy16;
   1983 	case 24:
   1984 		return boolcopy24;
   1985 	case 32:
   1986 		return boolcopy32;
   1987 	default:
   1988 		assert(0 /* boolcopyfn */);
   1989 	}
   1990 	return nil;
   1991 }
   1992 
   1993 /*
   1994  * Optimized draw for filling and scrolling; uses memset and memmove.
   1995  */
   1996 #if 0
   1997 static void
   1998 memsetb(void *vp, uchar val, int n)
   1999 {
   2000 	uchar *p, *ep;
   2001 
   2002 	p = vp;
   2003 	ep = p+n;
   2004 	while(p<ep)
   2005 		*p++ = val;
   2006 }
   2007 #endif
   2008 
   2009 static void
   2010 memsets(void *vp, ushort val, int n)
   2011 {
   2012 	ushort *p, *ep;
   2013 
   2014 	p = vp;
   2015 	ep = p+n;
   2016 	while(p<ep)
   2017 		*p++ = val;
   2018 }
   2019 
   2020 static void
   2021 memsetl(void *vp, uint32 val, int n)
   2022 {
   2023 	uint32 *p, *ep;
   2024 
   2025 	p = vp;
   2026 	ep = p+n;
   2027 	while(p<ep)
   2028 		*p++ = val;
   2029 }
   2030 
   2031 static void
   2032 memset24(void *vp, uint32 val, int n)
   2033 {
   2034 	uchar *p, *ep;
   2035 	uchar a,b,c;
   2036 
   2037 	p = vp;
   2038 	ep = p+3*n;
   2039 	a = val;
   2040 	b = val>>8;
   2041 	c = val>>16;
   2042 	while(p<ep){
   2043 		*p++ = a;
   2044 		*p++ = b;
   2045 		*p++ = c;
   2046 	}
   2047 }
   2048 
   2049 uint32
   2050 _imgtorgba(Memimage *img, uint32 val)
   2051 {
   2052 	uchar r, g, b, a;
   2053 	int nb, ov, v;
   2054 	uint32 chan;
   2055 	uchar *p;
   2056 
   2057 	a = 0xFF;
   2058 	r = g = b = 0xAA;	/* garbage */
   2059 	for(chan=img->chan; chan; chan>>=8){
   2060 		nb = NBITS(chan);
   2061 		ov = v = val&((1<<nb)-1);
   2062 		val >>= nb;
   2063 
   2064 		while(nb < 8){
   2065 			v |= v<<nb;
   2066 			nb *= 2;
   2067 		}
   2068 		v >>= (nb-8);
   2069 
   2070 		switch(TYPE(chan)){
   2071 		case CRed:
   2072 			r = v;
   2073 			break;
   2074 		case CGreen:
   2075 			g = v;
   2076 			break;
   2077 		case CBlue:
   2078 			b = v;
   2079 			break;
   2080 		case CAlpha:
   2081 			a = v;
   2082 			break;
   2083 		case CGrey:
   2084 			r = g = b = v;
   2085 			break;
   2086 		case CMap:
   2087 			p = img->cmap->cmap2rgb+3*ov;
   2088 			r = *p++;
   2089 			g = *p++;	
   2090 			b = *p;
   2091 			break;
   2092 		}
   2093 	}
   2094 	return (r<<24)|(g<<16)|(b<<8)|a;	
   2095 }
   2096 
   2097 uint32
   2098 _rgbatoimg(Memimage *img, uint32 rgba)
   2099 {
   2100 	uint32 chan;
   2101 	int d, nb;
   2102 	uint32 v;
   2103 	uchar *p, r, g, b, a, m;
   2104 
   2105 	v = 0;
   2106 	r = rgba>>24;
   2107 	g = rgba>>16;
   2108 	b = rgba>>8;
   2109 	a = rgba;
   2110 	d = 0;
   2111 	for(chan=img->chan; chan; chan>>=8){
   2112 		nb = NBITS(chan);
   2113 		switch(TYPE(chan)){
   2114 		case CRed:
   2115 			v |= (r>>(8-nb))<<d;
   2116 			break;
   2117 		case CGreen:
   2118 			v |= (g>>(8-nb))<<d;
   2119 			break;
   2120 		case CBlue:
   2121 			v |= (b>>(8-nb))<<d;
   2122 			break;
   2123 		case CAlpha:
   2124 			v |= (a>>(8-nb))<<d;
   2125 			break;
   2126 		case CMap:
   2127 			p = img->cmap->rgb2cmap;
   2128 			m = p[(r>>4)*256+(g>>4)*16+(b>>4)];
   2129 			v |= (m>>(8-nb))<<d;
   2130 			break;
   2131 		case CGrey:
   2132 			m = RGB2K(r,g,b);
   2133 			v |= (m>>(8-nb))<<d;
   2134 			break;
   2135 		}
   2136 		d += nb;
   2137 	}
   2138 //	print("rgba2img %.8lux = %.*lux\n", rgba, 2*d/8, v);
   2139 	return v;
   2140 }
   2141 
   2142 #define DBG if(0)
   2143 static int
   2144 memoptdraw(Memdrawparam *par)
   2145 {
   2146 	int m, y, dy, dx, op;
   2147 	uint32 v;
   2148 	Memimage *src;
   2149 	Memimage *dst;
   2150 
   2151 	dx = Dx(par->r);
   2152 	dy = Dy(par->r);
   2153 	src = par->src;
   2154 	dst = par->dst;
   2155 	op = par->op;
   2156 
   2157 DBG print("state %lux mval %lux dd %d\n", par->state, par->mval, dst->depth);
   2158 	/*
   2159 	 * If we have an opaque mask and source is one opaque pixel we can convert to the
   2160 	 * destination format and just replicate with memset.
   2161 	 */
   2162 	m = Simplesrc|Simplemask|Fullmask;
   2163 	if((par->state&m)==m && (par->srgba&0xFF) == 0xFF && (op ==S || op == SoverD)){
   2164 		uchar *dp, p[4];
   2165 		int d, dwid, ppb, np, nb;
   2166 		uchar lm, rm;
   2167 
   2168 DBG print("memopt, dst %p, dst->data->bdata %p\n", dst, dst->data->bdata);
   2169 		dwid = dst->width*sizeof(uint32);
   2170 		dp = byteaddr(dst, par->r.min);
   2171 		v = par->sdval;
   2172 DBG print("sdval %lud, depth %d\n", v, dst->depth);
   2173 		switch(dst->depth){
   2174 		case 1:
   2175 		case 2:
   2176 		case 4:
   2177 			for(d=dst->depth; d<8; d*=2)
   2178 				v |= (v<<d);
   2179 			ppb = 8/dst->depth;	/* pixels per byte */
   2180 			m = ppb-1;
   2181 			/* left edge */
   2182 			np = par->r.min.x&m;		/* no. pixels unused on left side of word */
   2183 			dx -= (ppb-np);
   2184 			nb = 8 - np * dst->depth;		/* no. bits used on right side of word */
   2185 			lm = (1<<nb)-1;
   2186 DBG print("np %d x %d nb %d lm %ux ppb %d m %ux\n", np, par->r.min.x, nb, lm, ppb, m);	
   2187 
   2188 			/* right edge */
   2189 			np = par->r.max.x&m;	/* no. pixels used on left side of word */
   2190 			dx -= np;
   2191 			nb = 8 - np * dst->depth;		/* no. bits unused on right side of word */
   2192 			rm = ~((1<<nb)-1);
   2193 DBG print("np %d x %d nb %d rm %ux ppb %d m %ux\n", np, par->r.max.x, nb, rm, ppb, m);	
   2194 
   2195 DBG print("dx %d Dx %d\n", dx, Dx(par->r));
   2196 			/* lm, rm are masks that are 1 where we should touch the bits */
   2197 			if(dx < 0){	/* just one byte */
   2198 				lm &= rm;
   2199 				for(y=0; y<dy; y++, dp+=dwid)
   2200 					*dp ^= (v ^ *dp) & lm;
   2201 			}else if(dx == 0){	/* no full bytes */
   2202 				if(lm)
   2203 					dwid--;
   2204 
   2205 				for(y=0; y<dy; y++, dp+=dwid){
   2206 					if(lm){
   2207 DBG print("dp %p v %lux lm %ux (v ^ *dp) & lm %lux\n", dp, v, lm, (v^*dp)&lm);
   2208 						*dp ^= (v ^ *dp) & lm;
   2209 						dp++;
   2210 					}
   2211 					*dp ^= (v ^ *dp) & rm;
   2212 				}
   2213 			}else{		/* full bytes in middle */
   2214 				dx /= ppb;
   2215 				if(lm)
   2216 					dwid--;
   2217 				dwid -= dx;
   2218 
   2219 				for(y=0; y<dy; y++, dp+=dwid){
   2220 					if(lm){
   2221 						*dp ^= (v ^ *dp) & lm;
   2222 						dp++;
   2223 					}
   2224 					memset(dp, v, dx);
   2225 					dp += dx;
   2226 					*dp ^= (v ^ *dp) & rm;
   2227 				}
   2228 			}
   2229 			return 1;
   2230 		case 8:
   2231 			for(y=0; y<dy; y++, dp+=dwid)
   2232 				memset(dp, v, dx);
   2233 			return 1;
   2234 		case 16:
   2235 			p[0] = v;		/* make little endian */
   2236 			p[1] = v>>8;
   2237 			v = *(ushort*)p;
   2238 DBG print("dp=%p; dx=%d; for(y=0; y<%d; y++, dp+=%d)\nmemsets(dp, v, dx);\n",
   2239 	dp, dx, dy, dwid);
   2240 			for(y=0; y<dy; y++, dp+=dwid)
   2241 				memsets(dp, v, dx);
   2242 			return 1;
   2243 		case 24:
   2244 			for(y=0; y<dy; y++, dp+=dwid)
   2245 				memset24(dp, v, dx);
   2246 			return 1;
   2247 		case 32:
   2248 			p[0] = v;		/* make little endian */
   2249 			p[1] = v>>8;
   2250 			p[2] = v>>16;
   2251 			p[3] = v>>24;
   2252 			v = *(uint32*)p;
   2253 			for(y=0; y<dy; y++, dp+=dwid)
   2254 				memsetl(dp, v, dx);
   2255 			return 1;
   2256 		default:
   2257 			assert(0 /* bad dest depth in memoptdraw */);
   2258 		}
   2259 	}
   2260 
   2261 	/*
   2262 	 * If no source alpha, an opaque mask, we can just copy the
   2263 	 * source onto the destination.  If the channels are the same and
   2264 	 * the source is not replicated, memmove suffices.
   2265 	 */
   2266 	m = Simplemask|Fullmask;
   2267 	if((par->state&(m|Replsrc))==m && src->depth >= 8 
   2268 	&& src->chan == dst->chan && !(src->flags&Falpha) && (op == S || op == SoverD)){
   2269 		uchar *sp, *dp;
   2270 		long swid, dwid, nb;
   2271 		int dir;
   2272 
   2273 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min))
   2274 			dir = -1;
   2275 		else
   2276 			dir = 1;
   2277 
   2278 		swid = src->width*sizeof(uint32);
   2279 		dwid = dst->width*sizeof(uint32);
   2280 		sp = byteaddr(src, par->sr.min);
   2281 		dp = byteaddr(dst, par->r.min);
   2282 		if(dir == -1){
   2283 			sp += (dy-1)*swid;
   2284 			dp += (dy-1)*dwid;
   2285 			swid = -swid;
   2286 			dwid = -dwid;
   2287 		}
   2288 		nb = (dx*src->depth)/8;
   2289 		for(y=0; y<dy; y++, sp+=swid, dp+=dwid)
   2290 			memmove(dp, sp, nb);
   2291 		return 1;
   2292 	}
   2293 
   2294 	/*
   2295 	 * If we have a 1-bit mask, 1-bit source, and 1-bit destination, and
   2296 	 * they're all bit aligned, we can just use bit operators.  This happens
   2297 	 * when we're manipulating boolean masks, e.g. in the arc code.
   2298 	 */
   2299 	if((par->state&(Simplemask|Simplesrc|Replmask|Replsrc))==0 
   2300 	&& dst->chan==GREY1 && src->chan==GREY1 && par->mask->chan==GREY1 
   2301 	&& (par->r.min.x&7)==(par->sr.min.x&7) && (par->r.min.x&7)==(par->mr.min.x&7)){
   2302 		uchar *sp, *dp, *mp;
   2303 		uchar lm, rm;
   2304 		long swid, dwid, mwid;
   2305 		int i, x, dir;
   2306 
   2307 		sp = byteaddr(src, par->sr.min);
   2308 		dp = byteaddr(dst, par->r.min);
   2309 		mp = byteaddr(par->mask, par->mr.min);
   2310 		swid = src->width*sizeof(uint32);
   2311 		dwid = dst->width*sizeof(uint32);
   2312 		mwid = par->mask->width*sizeof(uint32);
   2313 
   2314 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min)){
   2315 			dir = -1;
   2316 		}else
   2317 			dir = 1;
   2318 
   2319 		lm = 0xFF>>(par->r.min.x&7);
   2320 		rm = 0xFF<<(8-(par->r.max.x&7));
   2321 		dx -= (8-(par->r.min.x&7)) + (par->r.max.x&7);
   2322 
   2323 		if(dx < 0){	/* one byte wide */
   2324 			lm &= rm;
   2325 			if(dir == -1){
   2326 				dp += dwid*(dy-1);
   2327 				sp += swid*(dy-1);
   2328 				mp += mwid*(dy-1);
   2329 				dwid = -dwid;
   2330 				swid = -swid;
   2331 				mwid = -mwid;
   2332 			}
   2333 			for(y=0; y<dy; y++){
   2334 				*dp ^= (*dp ^ *sp) & *mp & lm;
   2335 				dp += dwid;
   2336 				sp += swid;
   2337 				mp += mwid;
   2338 			}
   2339 			return 1;
   2340 		}
   2341 
   2342 		dx /= 8;
   2343 		if(dir == 1){
   2344 			i = (lm!=0)+dx+(rm!=0);
   2345 			mwid -= i;
   2346 			swid -= i;
   2347 			dwid -= i;
   2348 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
   2349 				if(lm){
   2350 					*dp ^= (*dp ^ *sp++) & *mp++ & lm;
   2351 					dp++;
   2352 				}
   2353 				for(x=0; x<dx; x++){
   2354 					*dp ^= (*dp ^ *sp++) & *mp++;
   2355 					dp++;
   2356 				}
   2357 				if(rm){
   2358 					*dp ^= (*dp ^ *sp++) & *mp++ & rm;
   2359 					dp++;
   2360 				}
   2361 			}
   2362 			return 1;
   2363 		}else{
   2364 		/* dir == -1 */
   2365 			i = (lm!=0)+dx+(rm!=0);
   2366 			dp += dwid*(dy-1)+i-1;
   2367 			sp += swid*(dy-1)+i-1;
   2368 			mp += mwid*(dy-1)+i-1;
   2369 			dwid = -dwid+i;
   2370 			swid = -swid+i;
   2371 			mwid = -mwid+i;
   2372 			for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
   2373 				if(rm){
   2374 					*dp ^= (*dp ^ *sp--) & *mp-- & rm;
   2375 					dp--;
   2376 				}
   2377 				for(x=0; x<dx; x++){
   2378 					*dp ^= (*dp ^ *sp--) & *mp--;
   2379 					dp--;
   2380 				}
   2381 				if(lm){
   2382 					*dp ^= (*dp ^ *sp--) & *mp-- & lm;
   2383 					dp--;
   2384 				}
   2385 			}
   2386 		}
   2387 		return 1;
   2388 	}
   2389 	return 0;	
   2390 }
   2391 #undef DBG
   2392 
   2393 /*
   2394  * Boolean character drawing.
   2395  * Solid opaque color through a 1-bit greyscale mask.
   2396  */
   2397 #define DBG if(0)
   2398 static int
   2399 chardraw(Memdrawparam *par)
   2400 {
   2401 	uint32 bits;
   2402 	int i, ddepth, dy, dx, x, bx, ex, y, npack, bsh, depth, op;
   2403 	uint32 v, maskwid, dstwid;
   2404 	uchar *wp, *rp, *q, *wc;
   2405 	ushort *ws;
   2406 	uint32 *wl;
   2407 	uchar sp[4];
   2408 	Rectangle r, mr;
   2409 	Memimage *mask, *src, *dst;
   2410 
   2411 if(0) if(drawdebug) iprint("chardraw? mf %lux md %d sf %lux dxs %d dys %d dd %d ddat %p sdat %p\n",
   2412 		par->mask->flags, par->mask->depth, par->src->flags, 
   2413 		Dx(par->src->r), Dy(par->src->r), par->dst->depth, par->dst->data, par->src->data);
   2414 
   2415 	mask = par->mask;
   2416 	src = par->src;
   2417 	dst = par->dst;
   2418 	r = par->r;
   2419 	mr = par->mr;
   2420 	op = par->op;
   2421 
   2422 	if((par->state&(Replsrc|Simplesrc|Fullsrc|Replmask)) != (Replsrc|Simplesrc|Fullsrc)
   2423 	|| mask->depth != 1 || src->flags&Falpha || dst->depth<8 || dst->data==src->data
   2424 	|| op != SoverD)
   2425 		return 0;
   2426 
   2427 //if(drawdebug) iprint("chardraw...");
   2428 
   2429 	depth = mask->depth;
   2430 	maskwid = mask->width*sizeof(uint32);
   2431 	rp = byteaddr(mask, mr.min);
   2432 	npack = 8/depth;
   2433 	bsh = (mr.min.x % npack) * depth;
   2434 
   2435 	wp = byteaddr(dst, r.min);
   2436 	dstwid = dst->width*sizeof(uint32);
   2437 DBG print("bsh %d\n", bsh);
   2438 	dy = Dy(r);
   2439 	dx = Dx(r);
   2440 
   2441 	ddepth = dst->depth;
   2442 
   2443 	/*
   2444 	 * for loop counts from bsh to bsh+dx
   2445 	 *
   2446 	 * we want the bottom bits to be the amount
   2447 	 * to shift the pixels down, so for n≡0 (mod 8) we want 
   2448 	 * bottom bits 7.  for n≡1, 6, etc.
   2449 	 * the bits come from -n-1.
   2450 	 */
   2451 
   2452 	bx = -bsh-1;
   2453 	ex = -bsh-1-dx;
   2454 	bits=0;
   2455 	v = par->sdval;
   2456 
   2457 	/* make little endian */
   2458 	sp[0] = v;
   2459 	sp[1] = v>>8;
   2460 	sp[2] = v>>16;
   2461 	sp[3] = v>>24;
   2462 
   2463 //print("sp %x %x %x %x\n", sp[0], sp[1], sp[2], sp[3]);
   2464 	for(y=0; y<dy; y++, rp+=maskwid, wp+=dstwid){
   2465 		q = rp;
   2466 		if(bsh)
   2467 			bits = *q++;
   2468 		switch(ddepth){
   2469 		case 8:
   2470 //if(drawdebug) iprint("8loop...");
   2471 			wc = wp;
   2472 			for(x=bx; x>ex; x--, wc++){
   2473 				i = x&7;
   2474 				if(i == 8-1)
   2475 					bits = *q++;
   2476 DBG print("bits %lux sh %d...", bits, i);
   2477 				if((bits>>i)&1)
   2478 					*wc = v;
   2479 			}
   2480 			break;
   2481 		case 16:
   2482 			ws = (ushort*)wp;
   2483 			v = *(ushort*)sp;
   2484 			for(x=bx; x>ex; x--, ws++){
   2485 				i = x&7;
   2486 				if(i == 8-1)
   2487 					bits = *q++;
   2488 DBG print("bits %lux sh %d...", bits, i);
   2489 				if((bits>>i)&1)
   2490 					*ws = v;
   2491 			}
   2492 			break;
   2493 		case 24:
   2494 			wc = wp;
   2495 			for(x=bx; x>ex; x--, wc+=3){
   2496 				i = x&7;
   2497 				if(i == 8-1)
   2498 					bits = *q++;
   2499 DBG print("bits %lux sh %d...", bits, i);
   2500 				if((bits>>i)&1){
   2501 					wc[0] = sp[0];
   2502 					wc[1] = sp[1];
   2503 					wc[2] = sp[2];
   2504 				}
   2505 			}
   2506 			break;
   2507 		case 32:
   2508 			wl = (uint32*)wp;
   2509 			v = *(uint32*)sp;
   2510 			for(x=bx; x>ex; x--, wl++){
   2511 				i = x&7;
   2512 				if(i == 8-1)
   2513 					bits = *q++;
   2514 DBG iprint("bits %lux sh %d...", bits, i);
   2515 				if((bits>>i)&1)
   2516 					*wl = v;
   2517 			}
   2518 			break;
   2519 		}
   2520 	}
   2521 
   2522 DBG print("\n");	
   2523 	return 1;	
   2524 }
   2525 #undef DBG
   2526 
   2527 
   2528 /*
   2529  * Fill entire byte with replicated (if necessary) copy of source pixel,
   2530  * assuming destination ldepth is >= source ldepth.
   2531  *
   2532  * This code is just plain wrong for >8bpp.
   2533  *
   2534 uint32
   2535 membyteval(Memimage *src)
   2536 {
   2537 	int i, val, bpp;
   2538 	uchar uc;
   2539 
   2540 	unloadmemimage(src, src->r, &uc, 1);
   2541 	bpp = src->depth;
   2542 	uc <<= (src->r.min.x&(7/src->depth))*src->depth;
   2543 	uc &= ~(0xFF>>bpp);
   2544 	// pixel value is now in high part of byte. repeat throughout byte 
   2545 	val = uc;
   2546 	for(i=bpp; i<8; i<<=1)
   2547 		val |= val>>i;
   2548 	return val;
   2549 }
   2550  * 
   2551  */
   2552 
   2553 void
   2554 _memfillcolor(Memimage *i, uint32 val)
   2555 {
   2556 	uint32 bits;
   2557 	int d, y;
   2558 
   2559 	if(val == DNofill)
   2560 		return;
   2561 
   2562 	bits = _rgbatoimg(i, val);
   2563 	switch(i->depth){
   2564 	case 24:	/* 24-bit images suck */
   2565 		for(y=i->r.min.y; y<i->r.max.y; y++)
   2566 			memset24(byteaddr(i, Pt(i->r.min.x, y)), bits, Dx(i->r));
   2567 		break;
   2568 	default:	/* 1, 2, 4, 8, 16, 32 */
   2569 		for(d=i->depth; d<32; d*=2)
   2570 			bits = (bits << d) | bits;
   2571 		memsetl(wordaddr(i, i->r.min), bits, i->width*Dy(i->r));
   2572 		break;
   2573 	}
   2574 }
   2575