xmlpull

Simple XML parsing for Plan 9 and Linux.
git clone git://r-36.net/xmlpull
Log | Files | Refs | LICENSE

xmlpull.c (7087B)


      1 /*
      2  * Copy me if you can.
      3  * by 20h
      4  */
      5 
      6 #ifndef PLAN9
      7 #include <stdio.h>
      8 #include <stdlib.h>
      9 #include <fcntl.h>
     10 #include <string.h>
     11 #endif
     12 #ifdef PLAN9
     13 #include <u.h>
     14 #include <libc.h>
     15 #endif
     16 #include "xmlpull.h"
     17 
     18 void *
     19 reallocp(void *p, int s, short d)
     20 {
     21 
     22 	p = realloc(p, s);
     23 	if(p == nil) {
     24 		perror("realloc");
     25 		exits("realloc");
     26 	}
     27 
     28 	if(d != 0)
     29 		memset(p, 0, s);
     30 
     31 	return (void *)p;
     32 }
     33 
     34 void
     35 freexmlpull(xmlpull *x)
     36 {
     37 	if(x != nil){
     38 		if(x->na != nil)
     39 			free(x->na);
     40 		if(x->va != nil)
     41 			free(x->va);
     42 		free(x);
     43 	}
     44 
     45 	return;
     46 }
     47 
     48 xmlpull *
     49 openxmlpull(int fd)
     50 {
     51 	xmlpull *ret;
     52 
     53 	ret = reallocp(nil, sizeof(xmlpull), 2);
     54 	ret->na = nil;
     55 	ret->va = nil;
     56 	ret->lm = nil;
     57 	ret->ln = 0;
     58 	ret->lv = 0;
     59 	ret->la = 0;
     60 	ret->ev = START_DOCUMENT;
     61 	ret->nev = START_DOCUMENT;
     62 	ret->fd = fd;
     63 
     64 	return ret;
     65 }
     66 
     67 char
     68 getchara(xmlpull *x)
     69 {
     70 	char g;
     71 
     72 	if(read(x->fd, &g, 1) <= 0){
     73 		x->ev = END_DOCUMENT;
     74 		return (char)0;
     75 	}
     76 
     77 	return g;
     78 }
     79 
     80 char *
     81 addchara(char *b, int *l, char c)
     82 {
     83 	b = reallocp(b, ++(*l) + 1, 0);
     84 	b[(*l) - 1] = c;
     85 	b[*l] = '\0';
     86 
     87 	return b;
     88 }
     89 
     90 char *
     91 readuntilstr(xmlpull *x, char *str)
     92 {
     93 	char g, *u;
     94 	int p;
     95 
     96 	u = reallocp(nil, strlen(str) + 1, 2);
     97 	p = 0;
     98 
     99 	while((g = getchara(x)) != 0) {
    100 		u[p++] = g;
    101 		if(p < strlen(str))
    102 			continue;
    103 		if(!strncmp(u, str, strlen(str))) {
    104 			free(u);
    105 			return x->na;
    106 		}
    107 		p--;
    108 
    109 		x->na = addchara(x->na, &x->ln, u[0]);
    110 		memmove(u, u + 1, strlen(str) - 1);
    111 	}
    112 	free(u);
    113 
    114 	return nil;
    115 }
    116 
    117 char *
    118 readuntil(xmlpull *x, char *b, int *l, char w, char t)
    119 {
    120 	char g;
    121 
    122 	while((g = getchara(x)) != 0) {
    123 		//print("||%c>%c||", g, w);
    124 		if(g == w){
    125 			b = addchara(b, l, '\0');
    126 			return b;
    127 		}
    128 
    129 		switch(g) {
    130 		case '/':
    131 		case '>':
    132 			if(t != 0) {
    133 				addchara(b, l, g);
    134 				return nil;
    135 			}
    136 		case '\t':
    137 		case '\r':
    138 		case '\n':
    139 		case ' ':
    140 			if(t != 0)
    141 				return b;
    142 			b = addchara(b, l, g);
    143 			break;
    144 		case '\\':
    145 			g = getchara(x);
    146 			//print("%c", g);
    147 			if(g == 0)
    148 				return nil;
    149 			b = addchara(b, l, g);
    150 			break;
    151 		default:
    152 			b = addchara(b, l, g);
    153 			break;
    154 		}
    155 	}
    156 
    157 	return nil;
    158 }
    159 
    160 char *
    161 parseattrib(xmlpull *x)
    162 {
    163 	char g, *b;
    164 
    165 	while((g = getchara(x)) != 0) {
    166 		//print("%c", g);
    167 		switch(g){
    168 		case '\t':
    169 		case '\r':
    170 		case '\n':
    171 		case ' ':
    172 			continue;
    173 		case '/':
    174 		case '>':
    175 			x->na = addchara(x->na, &x->ln, g);
    176 			return nil;
    177 		default:
    178 			x->na = addchara(x->na, &x->ln, g);
    179 			g = (char)0;
    180 		}
    181 		if(g == (char)0)
    182 			break;
    183 	}
    184 
    185 	if((b = readuntil(x, x->na, &x->ln, '=', 2)) == nil)
    186 		return nil;
    187 	x->na = b;
    188 
    189 	if((g = getchara(x)) == 0)
    190 		return nil;
    191 
    192 	//print("magic char: %c\n", g);
    193 	switch(g) {
    194 	case '"':
    195 	case '\'':
    196 		if((b = readuntil(x, x->va, &x->lv, g, 0)) == nil)
    197 			return nil;
    198 		x->va = b;
    199 		return x->va;
    200 	default:
    201 		if((b = readuntil(x, x->va, &x->lv, '>', 2)) == nil)
    202 			return nil;
    203 		x->va = b;
    204 		return x->na;
    205 	}
    206 
    207 	return x->na;
    208 }
    209 
    210 char *
    211 readname(xmlpull *x)
    212 {
    213 	char g;
    214 
    215 	while((g = getchara(x)) != 0){
    216 		//print("%c", g);
    217 		switch(g){
    218 		case '\n':
    219 		case '\t':
    220 		case '\r':
    221 		case ' ':
    222 		case '>':
    223 		case '/':
    224 			x->na = addchara(x->na, &x->ln, g);
    225 			return x->na;
    226 		default:
    227 			x->na = addchara(x->na, &x->ln, g);
    228 		}
    229 	}
    230 
    231 	return nil;
    232 }
    233 
    234 xmlpull *
    235 nextxmlpull(xmlpull *x)
    236 {
    237 	char g;
    238 
    239 	if(x->va != nil)
    240 		free(x->va);
    241 
    242 	if(x->ev == START_TAG){
    243 		if(x->lm != nil)
    244 			free(x->lm);
    245 		x->lm = x->na;
    246 		x->la = x->ln;
    247 	} else
    248 		if(x->na != nil)
    249 			free(x->na);
    250 
    251 	x->na = nil;
    252 	x->va = nil;
    253 	x->ln = 0;
    254 	x->lv = 0;
    255 	g = '\0';
    256 
    257 	switch(x->nev){
    258 	case START_DOCUMENT:
    259 		if((x->na = readuntil(x, x->na, &x->ln, '<', 0)) == nil)
    260 			x->nev = END_DOCUMENT;
    261 		else
    262 			x->nev = START_TAG;
    263 		x->ev = START_DOCUMENT;
    264 		break;
    265 	case START_TAG:
    266 		g = getchara(x);
    267 		//print("%c", g);
    268 		if(g == '/')
    269 			x->ev = END_TAG;
    270 		else {
    271 			x->na = addchara(x->na, &x->ln, g);
    272 			x->ev = START_TAG;
    273 		}
    274 
    275 		if(readname(x) == nil)
    276 			x->nev = END_DOCUMENT;
    277 		else {
    278 			if(!strncmp(x->na, "![CDATA[", 8)) {
    279 				memmove(x->na, x->na + 8, strlen(x->na) - 8);
    280 				x->ln -= 8;
    281 				x->na = readuntilstr(x, "]]>");
    282 				x->ev = TEXT;
    283 				x->nev = TEXT;
    284 				return x;
    285 			}
    286 			if(!strncmp(x->na, "!--", 3)) {
    287 				x->na[x->ln - 1] = '\0';
    288 				x->nev = TEXT_C;
    289 				return x;
    290 			}
    291 			if(x->ev == END_TAG){
    292 				x->na[x->ln - 1] = '\0';
    293 				x->nev = TEXT;
    294 			} else {
    295 				switch(x->na[x->ln - 1]){
    296 				case '/':
    297 					getchara(x);
    298 					x->ev = START_END_TAG;
    299 					x->nev = TEXT;
    300 					x->na[x->ln - 1] = '\0';
    301 					break;
    302 				case '>':
    303 					x->nev = TEXT;
    304 					x->na[x->ln - 1] = '\0';
    305 					break;
    306 				default:
    307 					x->na[x->ln - 1] = '\0';
    308 					x->nev = ATTR;
    309 				}
    310 			}
    311 		}
    312 		break;
    313 	case TEXT_C:
    314 		g = '>';
    315 	case TEXT:
    316 		if(g != '>')
    317 			g = '<';
    318 
    319 		if((x->na = readuntil(x, x->na, &x->ln, g, 0)) == nil){
    320 			x->ev = END_DOCUMENT;
    321 			x->nev = END_DOCUMENT + 1;
    322 		} else {
    323 			if(x->nev == TEXT_C)
    324 				x->nev = TEXT;
    325 			else
    326 				x->nev = START_TAG;
    327 			x->ev = TEXT;
    328 		}
    329 		break;
    330 	case ATTR:
    331 		if(parseattrib(x) == nil){
    332 			//print("%c\n", x->na[x->ln - 1]);
    333 			switch(x->na[x->ln - 1]){
    334 			case '/':
    335 				free(x->na);
    336 				x->na = x->lm;
    337 				x->ln = x->la;
    338 				x->lm = nil;
    339 				x->la = 0;
    340 
    341 				getchara(x);
    342 				x->ev = END_TAG;
    343 				x->nev = TEXT;
    344 				return x;
    345 			case '>':
    346 			default:
    347 				x->na[x->ln - 1] = '\0';
    348 			}
    349 			x->ev = ATTR;
    350 			x->nev = TEXT;
    351 			return nextxmlpull(x);
    352 		} else
    353 			x->nev = ATTR;
    354 		x->ev = ATTR;
    355 		break;
    356 	case END_DOCUMENT:
    357 		x->ev = END_DOCUMENT;
    358 		x->nev = END_DOCUMENT + 1;
    359 		break;
    360 	default:
    361 		return nil;
    362 	}
    363 
    364 	return x;
    365 }
    366 
    367 xmlpull *
    368 writexmlpull(xmlpull *x)
    369 {
    370 	char *b;
    371 
    372 	b = nil;
    373 
    374 	switch(x->nev){
    375 	case START_DOCUMENT:
    376 		if(write(x->fd, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", 39) < 0)
    377 			return nil;
    378 		return x;
    379 	case START_TAG:
    380 		if(x->na == nil)
    381 			return nil;
    382 
    383 		b = reallocp(b, x->ln + 3, 2);
    384 		snprint(b, x->ln + 3, "<%s ", x->na);
    385 		if(write(x->fd, b, strlen(b)) < 0){
    386 			free(b);
    387 			return nil;
    388 		}
    389 		free(b);
    390 		return x;
    391 	case START_END_TAG:
    392 		if(x->na == nil)
    393 			return nil;
    394 
    395 		b = reallocp(b, x->ln + 4, 2);
    396 		snprint(b, x->ln + 4, "<%s/>", x->na);
    397 		if(write(x->fd, b, strlen(b)) < 0){
    398 			free(b);
    399 			return nil;
    400 		}
    401 		free(b);
    402 		return x;
    403 	case TEXT:
    404 		if(x->na == nil)
    405 			return nil;
    406 		if(write(x->fd, x->na, x->ln) < 0)
    407 			return nil;
    408 		return x;
    409 	case TEXT_C:
    410 		if(x->na == nil)
    411 			return nil;
    412 
    413 		b = reallocp(b, x->ln + 5, 2);
    414 		snprint(b, x->ln + 5, "%s -->", x->na);
    415 		if(write(x->fd, b, strlen(b)) < 0){
    416 			free(b);
    417 			return nil;
    418 		}
    419 		free(b);
    420 		return x;
    421 	case ATTR:
    422 		if(x->na == nil)
    423 			return nil;
    424 
    425 		b = reallocp(b, x->ln + x->lv + 5, 2);
    426 		snprint(b, x->ln + x->lv + 5, "%s=\"%s\" ", x->na, (x->va == nil) ? "" : x->va);
    427 		if(write(x->fd, b, strlen(b)) < 0){
    428 			free(b);
    429 			return nil;
    430 		}
    431 		free(b);
    432 		return x;
    433 	case END_TAG:
    434 		if(x->na == nil)
    435 			return nil;
    436 
    437 		b = reallocp(b, x->ln + 4, 2);
    438 		snprint(b, x->ln + 4, "</%s>", x->na);
    439 		if(write(x->fd, b, strlen(b)) < 0){
    440 			free(b);
    441 			return nil;
    442 		}
    443 		free(b);
    444 		return x;
    445 	case END_TAG_S:
    446 		if(write(x->fd, "/>", 2) < 0)
    447 			return nil;
    448 		return x;
    449 	case END_TAG_N:
    450 		if(write(x->fd, ">", 1) < 0)
    451 			return nil;
    452 		return x;
    453 	case END_DOCUMENT:
    454 		close(x->fd);
    455 		return nil;	
    456 	default:
    457 		break;
    458 	}
    459 
    460 	return nil;
    461 }
    462