mime.c (26639B)
1 /* 2 * Copy me if you can. 3 * by 20h 4 */ 5 6 #include <unistd.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <strings.h> 11 #include <ctype.h> 12 #include <iconv.h> 13 #include <errno.h> 14 #include <time.h> 15 16 #include "ind.h" 17 #include "llist.h" 18 #include "mime.h" 19 #include "parser.h" 20 #include "base64.h" 21 #include "quote.h" 22 #include "param.h" 23 #include "dos.h" 24 25 enum { 26 HEADER = 0x01, 27 HEADERVALUE, 28 }; 29 30 mime_t * 31 mime_new(void) 32 { 33 mime_t *part; 34 35 part = mallocz(sizeof(mime_t), 2); 36 part->hdrs = llist_new(); 37 part->parts = llist_new(); 38 part->state = HEADER; 39 40 return part; 41 } 42 43 void 44 mime_subfree(mime_t *mime, llistelem_t *elem) 45 { 46 forllist(mime->parts, elem) { 47 if (elem->key == NULL) 48 mime_free((mime_t *)elem->data); 49 elem->data = NULL; 50 } 51 llist_free(mime->parts); 52 } 53 54 void 55 mime_free(mime_t *mime) 56 { 57 mime_subfree(mime, NULL); 58 llist_free(mime->hdrs); 59 60 if (mime->body != NULL) 61 free(mime->body); 62 if (mime->partid != NULL) 63 free(mime->partid); 64 if (mime->ct != NULL) 65 free(mime->ct); 66 if (mime->cte != NULL) 67 free(mime->cte); 68 if (mime->charset != NULL) 69 free(mime->charset); 70 if (mime->boundary != NULL) 71 free(mime->boundary); 72 if (mime->rawhdrs != NULL) 73 free(mime->rawhdrs); 74 free(mime); 75 } 76 77 struct tm * 78 mime_parsedate(char *str) 79 { 80 struct tm tim; 81 82 memset(&tim, 0, sizeof(tim)); 83 if (strptime(str, "%a, %d %b %Y %T %z", &tim) != NULL) 84 return memdup(&tim, sizeof(tim)); 85 86 if (!strncmp(str, "Date: ", 6)) 87 str += 6; 88 89 /* 90 * Malformatted dates seen in the wild. 91 */ 92 if (strptime(str, "%a, %d %b %Y %T %Z", &tim) != NULL) 93 return memdup(&tim, sizeof(tim)); 94 95 if (strptime(str, "%d %b %Y %T %z", &tim) != NULL) 96 return memdup(&tim, sizeof(tim)); 97 98 if (strptime(str, "%a, %d %b %Y, %T %z", &tim) != NULL) 99 return memdup(&tim, sizeof(tim)); 100 101 if (strptime(str, "%d.%m.%Y", &tim) != NULL) 102 return memdup(&tim, sizeof(tim)); 103 104 return memdup(&tim, sizeof(tim)); 105 } 106 107 char * 108 mime_iconv(char *str, char *from, char *to) 109 { 110 iconv_t *ifd; 111 size_t left, avail, nconv; 112 char *outb, *strp, *outbp; 113 int olen, sd; 114 115 ifd = iconv_open(to, from); 116 if (ifd == (iconv_t)-1) 117 return NULL; 118 119 //printf("mime_iconv: '%s'; from='%s'\n", str, from); 120 121 left = strlen(str); 122 olen = left / 2; 123 avail = olen; 124 outb = mallocz(olen+1, 1); 125 outbp = outb; 126 strp = str; 127 for (;;) { 128 nconv = iconv(ifd, &strp, &left, &outbp, &avail); 129 if (nconv == (size_t)-1) { 130 if (errno == E2BIG) { 131 olen += 5; 132 sd = outbp - outb; 133 outb = reallocz(outb, olen+1, 0); 134 outbp = &outb[sd]; 135 avail += 5; 136 continue; 137 } 138 if (errno == EILSEQ || errno == EINVAL) 139 return NULL; 140 free(outb); 141 iconv_close(ifd); 142 return NULL; 143 } 144 break; 145 } 146 147 iconv_close(ifd); 148 if (outbp != NULL) 149 outbp[0] = '\0'; 150 return outb; 151 } 152 153 char * 154 mime_decodeheaderext(char *value) 155 { 156 char *work, *cret, *ret, *cs, *str, *enc, *ast, *dstr; 157 int len, slen; 158 159 len = strlen(value); 160 161 ret = memdupz(value, len); 162 work = memdupz(value, len); 163 164 if (!(work[0] == '=' && work[1] == '?' && work[len-1] == '=' 165 && work[len-2] == '?')) { 166 free(work); 167 return ret; 168 } 169 cs = &work[2]; 170 171 work[len-2] = '\0'; 172 enc = strchr(&work[2], '?'); 173 if (enc == NULL) { 174 free(work); 175 return ret; 176 } 177 enc[0] = '\0'; 178 enc++; 179 str = strchr(enc, '?'); 180 if (str == NULL) { 181 free(work); 182 return ret; 183 } 184 str[0] = '\0'; 185 str++; 186 187 /* 188 * RFC 2231 :( 189 * See: https://en.wikipedia.org/wiki/Mr._Mime 190 */ 191 ast = strchr(enc, '*'); 192 if (ast != NULL) 193 ast[0] = '\0'; 194 195 slen = strlen(str); 196 if (slen == 0) { 197 free(work); 198 free(ret); 199 return memdupz("", 1); 200 } 201 202 cret = NULL; 203 switch(enc[0]) { 204 case 'B': 205 case 'b': 206 cret = b64dec(str, &slen); 207 break; 208 case 'Q': 209 case 'q': 210 cret = qpdec(str, &slen, 1); 211 break; 212 } 213 214 //printf("mime_decodeheader: mime_iconv str='%s'; cret='%s';\n", str, cret); 215 if (cret != NULL) { 216 free(ret); 217 if (strcasecmp(cs, "utf-8")) { 218 dstr = mime_iconv(cret, cs, "UTF-8"); 219 if (dstr == NULL) { 220 str = smprintf("ERR(%s)", str); 221 } else { 222 str = dstr; 223 } 224 free(cret); 225 } else { 226 str = cret; 227 } 228 } else { 229 str = ret; 230 } 231 free(work); 232 233 return str; 234 } 235 236 int 237 mime_isextws(char *str, int len) 238 { 239 int i; 240 241 for (i = 0; i < len; i++) { 242 switch (str[i]) { 243 case '\n': 244 case '\r': 245 case ' ': 246 case '\t': 247 break; 248 default: 249 return 0; 250 } 251 } 252 return 1; 253 } 254 255 char * 256 mime_decodeheader(char *value) 257 { 258 char *work, *extp, *extw, *extb, *exte, *extr, *ret, *q1, *q2; 259 int vlen, rlen, elen, wasenc, i; 260 261 //printf("mime_decodeheader\n"); 262 ret = NULL; 263 rlen = 0; 264 vlen = strlen(value); 265 work = memdup(value, vlen+1); 266 extp = work; 267 wasenc = 0; 268 269 /* 270 * Avoid being tricked by malformed headers. 271 */ 272 for (i = 0; i < 32; i++) { 273 extb = strstr(extp, "=?"); 274 if (extb != NULL) { 275 elen = extb - extp; 276 if (extp != extb && (!wasenc || 277 !mime_isextws(extp, elen))) { 278 extw = memdupz(extp, elen); 279 ret = memdupcat(ret, rlen, extw, elen+1); 280 free(extw); 281 rlen += elen; 282 } 283 284 exte = NULL; 285 q1 = strchr(&extb[2], '?'); 286 if (q1 != NULL) { 287 q2 = strchr(&q1[1], '?'); 288 if (q2 != NULL) 289 exte = strstr(&q2[1], "?="); 290 } 291 if (exte != NULL) { 292 elen = &exte[2] - extb; 293 extw = memdupz(extb, elen); 294 extr = mime_decodeheaderext(extw); 295 free(extw); 296 elen = strlen(extr); 297 ret = memdupcat(ret, rlen, extr, elen+1); 298 rlen += elen; 299 free(extr); 300 extp = &exte[2]; 301 wasenc = 1; 302 continue; 303 } 304 } 305 break; 306 } 307 if ((extp - work) < vlen) 308 ret = memdupcat(ret, rlen, extp, strlen(extp)+1); 309 free(work); 310 311 /* Remove any space character, like newline. */ 312 if (ret != NULL) 313 strnormalizespace(ret); 314 315 return ret; 316 } 317 318 char *cstries[] = { 319 "utf-8", 320 "iso-8859-1", 321 "windows-1252", 322 "koi8", 323 "euc-jp" 324 "shift_jis", 325 "big5", 326 "iso-8859-15", 327 NULL 328 }; 329 330 char * 331 mime_guesscharset(char *str) 332 { 333 int i, eq; 334 char *itry; 335 336 for (i = 0; i < nelem(cstries); i++) { 337 itry = mime_iconv(str, cstries[i], cstries[i]); 338 if (itry == NULL) 339 continue; 340 eq = strcmp(str, itry); 341 free(itry); 342 if (!eq) 343 break; 344 } 345 346 return cstries[i]; 347 } 348 349 char * 350 mime_guessheader(char *value) 351 { 352 char *nvalue, *gcs; 353 354 gcs = NULL; 355 356 //printf("mime_guessheader '%s'\n", value); 357 358 nvalue = value; 359 if (!strisascii(value)) { 360 /* 361 * Guessing begins. Some major MUA developers did not read any 362 * RFCs. 363 */ 364 365 gcs = mime_guesscharset(value); 366 if (gcs != NULL) { 367 nvalue = mime_iconv(value, gcs, "utf-8"); 368 if (nvalue == NULL) { 369 nvalue = value; 370 gcs = NULL; 371 } 372 } 373 } 374 375 value = mime_decodeheader(nvalue); 376 if (gcs != NULL) 377 free(nvalue); 378 return value; 379 } 380 381 char * 382 mime_decodeparam(char *value) 383 { 384 char *work, *cret, *ret, *cs, *str, *lang, *dstr; 385 int len, slen; 386 387 len = strlen(value); 388 ret = memdup(value, len+1); 389 work = memdup(value, len+1); 390 391 cs = work; 392 lang = strchr(work, '\''); 393 if (lang == NULL) { 394 free(work); 395 return ret; 396 } 397 lang[0] = '\0'; 398 lang++; 399 str = strchr(lang, '\''); 400 if (str == NULL) { 401 free(work); 402 return ret; 403 } 404 str[0] = '\0'; 405 str++; 406 407 slen = strlen(str); 408 cret = paramdec(str, &slen); 409 410 if (cret != NULL) { 411 if (strcasecmp(cs, "utf-8")) { 412 free(ret); 413 dstr = mime_iconv(cret, cs, "UTF-8"); 414 if (dstr == NULL) { 415 str = smprintf("ERR(%s)", str); 416 } else { 417 str = dstr; 418 } 419 free(cret); 420 } else { 421 free(ret); 422 str = cret; 423 } 424 } else { 425 str = ret; 426 } 427 free(work); 428 429 return str; 430 } 431 432 char * 433 mime_encodestring(char *value) 434 { 435 char *b64, *ret; 436 437 if (strisascii(value)) 438 return memdups(value); 439 440 b64 = b64enc(value, strlen(value)); 441 ret = smprintf("=?UTF-8?b?%s?=", b64); 442 free(b64); 443 444 return ret; 445 } 446 447 char * 448 mime_encodeheader(char *header, char *value) 449 { 450 char *ret, *b64, *p, *mp, *str; 451 int hlen, lmax, isascii, firstline, slen; 452 453 isascii = 0; 454 455 /* 456 * RFC 2047: 457 * One encoded word should be at max. 75 characters. 458 * One encoded line is limited to 76 characters. 459 */ 460 hlen = strlen(header) + 2; 461 if (strisascii(value)) { 462 isascii = 1; 463 lmax = 75 - hlen; 464 } else { 465 lmax = 63 - hlen; 466 } 467 slen = strlen(value); 468 469 ret = NULL; 470 for (p = value, firstline = 1; slen > 0; slen -= lmax, p = mp) { 471 if (firstline) { 472 lmax += hlen; 473 firstline = 0; 474 } 475 476 mp = findlimitws(p, lmax); 477 if (mp == NULL) { 478 str = memdupz(p, slen); 479 } else { 480 str = memdupz(p, mp - p); 481 } 482 483 if (!isascii) { 484 b64 = b64enc(str, strlen(str)); 485 free(str); 486 mp = smprintf("=?UTF-8?b?%s?=", b64); 487 free(b64); 488 str = mp; 489 } 490 491 if (ret != NULL) { 492 mp = smprintf("%s %s", ret, str); 493 free(ret); 494 ret = mp; 495 } else { 496 ret = smprintf("%s", str); 497 } 498 } 499 500 return ret; 501 } 502 503 int 504 mime_paramsort(llistelem_t *elem1, llistelem_t *elem2) 505 { 506 int a, b; 507 char *n1, *n2; 508 509 n1 = strrchr(elem1->key, '*'); 510 if (n1 == NULL) 511 a = -1; 512 else 513 a = atoi(&n1[1]); 514 n2 = strrchr(elem2->key, '*'); 515 if (n2 == NULL) 516 b = -1; 517 else 518 b = atoi(&n2[1]); 519 520 return a - b; 521 } 522 523 /* 524 * Order and concatenate ordered params. 525 */ 526 llist_t * 527 mime_sanitizeparams(llist_t *params) 528 { 529 llistelem_t *param, *hit, *nparam; 530 llist_t *reorder, *hits; 531 char *nvalue, *dvalue; 532 int i, n; 533 //char *key; 534 //int klen; 535 536 reorder = llist_new(); 537 //printf("mime_sanitizeparams: start\n"); 538 n = 0; 539 forllist(params, param) { 540 if (n == 0) { 541 //printf("first key: %s\n", param->key); 542 n++; 543 continue; 544 } 545 546 //key = param->key; 547 //printf("key = %s\n", key); 548 //klen = strlen(key); 549 550 nvalue = strrchr(param->key, '*'); 551 if (nvalue == NULL) 552 continue; 553 for (i = 1; nvalue[i]; i++) 554 if (!isdigit(nvalue[i])) 555 break; 556 if (nvalue[i]) 557 continue; 558 //printf("nvalue = %s\n", nvalue); 559 560 dvalue = mime_decodeparam((char *)param->data); 561 if (dvalue != NULL) { 562 //printf("decoded: %s\n", dvalue); 563 free(param->data); 564 param->data = dvalue; 565 param->datalen = strlen(dvalue)+1; 566 } 567 568 nvalue[0] = '\0'; 569 //printf("key after = %s\n", key); 570 if (llist_get(reorder, param->key) != NULL) 571 llist_add(reorder, param->key, NULL, 0); 572 nvalue[0] = '*'; 573 } 574 575 /* 576 * Sort and concatenate the return list. 577 */ 578 forllist(reorder, param) { 579 hits = llist_new(); 580 forllist(params, nparam) { 581 if (!strncmp(nparam->key, param->key, 582 strlen(param->key))) { 583 //printf("nparam->key = %s\n", nparam->key); 584 llist_add(hits, nparam->key, nparam->data, 585 nparam->datalen); 586 } 587 } 588 if (hits->len < 1) { 589 llist_free(hits); 590 continue; 591 } 592 593 nparam = llistelem_new(param->key, NULL, 0); 594 hits = llist_internsort(hits, mime_paramsort); 595 596 forllist(hits, hit) { 597 nparam->data = memdupcat(nparam->data, 598 nparam->datalen, hit->data, 599 hit->datalen); 600 nparam->datalen += hit->datalen-1; 601 } 602 603 params = llist_listdel(params, hits); 604 llist_free(hits); 605 llist_addelem(params, nparam); 606 } 607 llist_free(reorder); 608 609 return params; 610 } 611 612 llist_t * 613 mime_parseheader(char *field) 614 { 615 char *tok, *buf, *key, *value, *sep, *eq, quot; 616 llist_t *ret; 617 int tlen; 618 619 buf = memdups(field); 620 621 tok = buf; 622 ret = llist_new(); 623 //printf("mime_parseheader: buf = '%s'\n", buf); 624 while (tok[0] != '\0') { 625 key = NULL; 626 value = NULL; 627 628 /* 629 * 0.) Sanitize the beginning and the end. 630 */ 631 while (isspace(tok[0])) 632 tok++; 633 tlen = strlen(tok); 634 while (isspace(tok[tlen-1])) { 635 tok[tlen-1] = '\0'; 636 tlen--; 637 } 638 //printf("mime_parseheader: after sanitize: tok = '%s'\n", tok); 639 640 /* 641 * 1.) ([\t\r\v\f ]*)key 642 */ 643 key = tok + strspn(tok, "\t\r\v\f "); 644 //printf("mime_parseheader: key = '%s'\n", tok); 645 646 /* 647 * 2.) key 648 */ 649 tok = key + strcspn(key, "\t\r\v\f =;"); 650 if (tok[0] == ';' || tok[0] == '\0') { 651 quot = tok[0]; 652 tok[0] = '\0'; 653 if (strlen(key) > 0) { 654 //printf("mime_parseheader: add key '%s'\n", key); 655 llist_add(ret, key, NULL, 0); 656 } 657 if (quot != '\0') 658 tok++; 659 continue; 660 } 661 662 //printf("mime_parseheader: tok = '%s'\n", tok); 663 if (tok[0] == '=') { 664 eq = tok; 665 } else { 666 /* 667 * 3.) key([\t\r\v\f ]*)= 668 */ 669 tok[0] = '\0'; 670 eq = tok + 1 + strspn(tok+1, "\t\r\v\f ;"); 671 if (eq[0] == ';') { 672 if (strlen(key) > 0) 673 llist_add(ret, key, NULL, 0); 674 tok++; 675 continue; 676 } 677 678 if (eq[0] != '=') { 679 /* 680 * 3.1.) key; 681 */ 682 if (strlen(key) > 0) 683 llist_add(ret, key, NULL, 0); 684 tok++; 685 continue; 686 } 687 } 688 tok[0] = '\0'; 689 /* 690 * 4.) key=([\t\r\v\f ]*)("|)value 691 */ 692 tok = eq + 1 + strspn(eq+1, "\t\r\v\f "); 693 switch (tok[0]) { 694 case '"': 695 case '\'': 696 quot = tok[0]; 697 for (sep = tok+1; sep[0] != '\0'; sep++) { 698 if (sep[0] == quot) { 699 sep[0] = '\0'; 700 sep++; 701 break; 702 } 703 if (sep[0] == '\\' && sep[1] != '\0') 704 memmove(&sep[1], sep, strlen(&sep[1])); 705 } 706 value = &tok[1]; 707 tok = sep; 708 709 sep = tok + strcspn(tok, ";"); 710 if (sep[0] == ';') { 711 tok = sep + 1; 712 } else { 713 tok = sep; 714 } 715 break; 716 default: 717 /* 718 * 4.1.) value 719 */ 720 value = tok; 721 sep = tok + strcspn(tok, "\t\r\v\f ;"); 722 if (sep[0] == ';') { 723 sep[0] = '\0'; 724 tok = sep + 1; 725 } else { 726 tok = sep; 727 } 728 break; 729 } 730 731 //printf("mime_parseheader: add %s = '%s'\n", key, value); 732 llist_add(ret, key, value, strlen(value)+1); 733 } 734 free(buf); 735 736 //printf("ret->len = %d\n", ret->len); 737 if (ret->len > 0) 738 return mime_sanitizeparams(ret); 739 740 llist_free(ret); 741 return NULL; 742 } 743 744 void 745 mime_mkpartidsintern(mime_t *mime, char *sect, int pid) 746 { 747 llistelem_t *part; 748 749 mime->partid = smprintf("%s%d", sect, pid); 750 sect = smprintf("%s.", mime->partid); 751 752 pid = 1; 753 forllist(mime->parts, part) { 754 mime_mkpartidsintern((mime_t *)part->data, sect, pid); 755 pid++; 756 } 757 free(sect); 758 } 759 760 void 761 mime_mkpartids(mime_t *mime) 762 { 763 int pid; 764 llistelem_t *part; 765 766 mime->partid = memdupz("0", 1); 767 pid = 1; 768 forllist(mime->parts, part) 769 mime_mkpartidsintern((mime_t *)part->data, "", pid++); 770 } 771 772 /* 773 * This functions searches for the next boundary occurence. It will 774 * return *choice = 1, if it was an end boundary; otherwise 0. 775 */ 776 char * 777 mime_sgetbound(char *bound, char **p, char *max, int *len, int *choice) 778 { 779 char *ret, *op; 780 int slen, isenl, isend, sublen; 781 782 ret = NULL; 783 784 //printf("bound = '%s'\n", bound); 785 //printf("p = '%s'\n", *p); 786 slen = strlen(bound); 787 *choice = 0; 788 isenl = 0; 789 isend = 0; 790 sublen = 0; 791 792 for (;;) { 793 op = memmem(*p, (max-(*p)), bound, slen); 794 if (op == NULL) 795 return ret; 796 797 if (!strncmp(op+slen, "--", 2)) { 798 isend = 1; 799 if (op[slen+2] == '\n') 800 isenl = 1; 801 } else if (op[slen] == '\n') { 802 isenl = 1; 803 } 804 //printf("isenl = %d, isend = %d\n", isenl, isend); 805 806 if (op == *p) 807 break; 808 809 if (op > (*p + 1) && op[-2] == '\r' && op[-1] == '\n') 810 sublen = 2; 811 if (op > *p && op[-2] != '\r' && op[-1] == '\n') 812 sublen = 1; 813 //printf("sublen = %d\n", sublen); 814 break; 815 } 816 817 if (isend) { 818 *choice = 1; 819 slen += 2; 820 } 821 822 *len = op - *p - sublen; 823 ret = memdupz(*p, *len); 824 825 *p = op + slen + (isend * 2) + (2 - isenl); 826 827 //printf("p = '%s'\n", *p); 828 829 return ret; 830 } 831 832 mime_t * 833 mime_preparepart(mime_t *mime) 834 { 835 llistelem_t *hdr, *field; 836 llist_t *hdrf; 837 838 //printf("mime = %p\n", mime); 839 hdr = llist_ciget(mime->hdrs, "content-type"); 840 if (hdr != NULL && hdr->data != NULL && strlen(hdr->data) > 0) { 841 //printf("content-type: %s\n", (char *)hdr->data); 842 hdrf = mime_parseheader(hdr->data); 843 //printf("hdrf = %p\n", hdrf); 844 //printf("%s\n", hdrf->first->key); 845 if (hdrf != NULL) { 846 if (!strncasecmp(hdrf->first->key, "multipart", 9)) { 847 //printf("is multipart\n"); 848 field = llist_ciget(hdrf, "boundary"); 849 if (field == NULL) { 850 return NULL; 851 //die("Could not find boundary " 852 // "in multipart!\n"); 853 } 854 mime->boundary = smprintf("--%s", 855 (char *)field->data); 856 //printf("boundary: \"%s\"\n", mime->boundary); 857 } 858 mime->ct = memdups(hdrf->first->key); 859 860 field = llist_ciget(hdrf, "charset"); 861 if (field != NULL && field->data != NULL) { 862 mime->charset = memdupz(field->data, 863 field->datalen); 864 } 865 866 llist_free(hdrf); 867 } 868 } 869 870 if (mime->ct == NULL) 871 mime->ct = memdupz("text/plain", 10); 872 //printf("mime->ct = %s\n", mime->ct); 873 if (mime->charset == NULL) 874 mime->charset = memdupz("iso8859-1", 9); 875 //printf("mime->charset = %s\n", mime->charset); 876 877 hdr = llist_ciget(mime->hdrs, "Content-Transfer-Encoding"); 878 if (hdr != NULL && hdr->data != NULL) { 879 mime->cte = memdupz(hdr->data, hdr->datalen); 880 } else { 881 mime->cte = memdupz("7bit", 4); 882 } 883 //printf("mime->cte = %s\n", mime->cte); 884 885 return mime; 886 } 887 888 mime_t * 889 mime_parsebufintern(mime_t *mime, char *str, int len) 890 { 891 int i, partlen, isend, blen; 892 char *p, *rp, buf[1025], *key, *value, *tvalue, *part; 893 llistelem_t *hdr; 894 mime_t *partm; 895 896 rp = str; 897 p = str; 898 for (; (rp = sgets(buf, sizeof(buf)-1, &p));) { 899 blen = strlen(buf); 900 if (buf[blen-1] == '\r') 901 buf[blen-1] = '\0'; 902 //printf("line '%s'\n", buf); 903 904 switch (mime->state) { 905 case HEADERVALUE: 906 switch (buf[0]) { 907 case ' ': 908 case '\t': 909 case '\r': 910 case '\f': 911 case '\v': 912 //printf("hdrvalue: %s (%d)\n", buf, 913 // (int)strlen(buf)); 914 /* 915 * " value" 916 */ 917 sscanf(buf, "%*[ \t\r\v\f]%1024m[^\n]", 918 &value); 919 if (value != NULL && hdr != NULL) { 920 if (hdr->data != NULL) { 921 part = memdup(value, strlen(value)+1); 922 923 /* Adding a space. */ 924 hdr->data = memdupcat(hdr->data, 925 hdr->datalen-1, 926 " ", 1); 927 hdr->datalen++; 928 929 /* Adding the next line. */ 930 i = strlen(part); 931 key = memdupcat(hdr->data, 932 hdr->datalen-1, 933 part, i+1); 934 free(part); 935 hdr->data = key; 936 hdr->datalen += i; 937 //printf("%s = %s\n", hdr->key, 938 // (char *)hdr->data); 939 } 940 free(value); 941 } 942 goto mimeparsebufagain; 943 default: 944 break; 945 } 946 947 if (hdr != NULL) 948 hdr = NULL; 949 mime->state = HEADER; 950 /* FALL THROUGH: No header value found. */ 951 case HEADER: 952 //printf("hdr: %s\n", buf); 953 954 /* 955 * End of headers. 956 */ 957 if (strlen(buf) == 0) { 958 //printf("end of headers '%c' + '%c'\n", p[0], p[1]); 959 /* 960 * Heuristics for ugly e-mail generators 961 * follow. 962 */ 963 /* 964 * Does the line begin with "--"? Looks 965 * like a boundary. Go to next body part. 966 */ 967 if (p[0] == '-' && p[1] == '-') { 968 mime->rawhdrs = memdupz(str, (p - str)); 969 mime->rawhdrslen = p - str; 970 goto mimeparsebufbodyparse; 971 } 972 /* 973 * Does the line have some "header: 974 * value\n" form? Go on parsing headers. 975 */ 976 for (key = p; key[0] != '\n'; key++) { 977 //printf("key[0] = '%c'\n", key[0]); 978 if (key[0] == ':') 979 break; 980 if (key[0] == ' ') { 981 mime->rawhdrs = memdupz(str, (p - str)); 982 mime->rawhdrslen = p - str; 983 goto mimeparsebufbodyparse; 984 } 985 } 986 /* 987 * A line simply ended with no header 988 * suspicion. 989 */ 990 if (key[0] == '\n') { 991 mime->rawhdrs = memdupz(str, (p - str)); 992 mime->rawhdrslen = p - str; 993 goto mimeparsebufbodyparse; 994 } 995 } 996 997 /* 998 * "key: value" 999 */ 1000 key = NULL; 1001 value = NULL; 1002 tvalue = NULL; 1003 sscanf(buf, "%1024m[^: \t\r\v\f]:" 1004 "%1024m[^\n]", &key, &value); 1005 if (value == NULL) 1006 value = memdupz(" ", 2); 1007 //printf("%s = %s\n", key, value); 1008 if (key != NULL && value != NULL) { 1009 tvalue = value + strspn(value, 1010 " \t\r\v\f"); 1011 hdr = llistelem_new(key, tvalue, 1012 strlen(tvalue)+1); 1013 llist_addelem(mime->hdrs, hdr); 1014 mime->state = HEADERVALUE; 1015 } 1016 if (key != NULL) 1017 free(key); 1018 if (value != NULL) 1019 free(value); 1020 break; 1021 default: 1022 mimeparsebufagain: 1023 break; 1024 } 1025 } 1026 //printf("return mime_preparepart\n"); 1027 return mime_preparepart(mime); 1028 1029 mimeparsebufbodyparse: 1030 //printf("body parsing begins.\n"); 1031 mime = mime_preparepart(mime); 1032 if (mime == NULL) 1033 return NULL; 1034 1035 /* 1036 * It is not a multipart message, so take the remainder 1037 * of the given message. 1038 */ 1039 if (mime->boundary == NULL) { 1040 //printf("No boundary there. Taking the remainder.\n"); 1041 partlen = str - p + len; 1042 mime->body = memdupz(p, partlen); 1043 mime->bodylen = partlen; 1044 //printf("strlen = %ld; partlen = %d;\n", strlen(mime->body), 1045 // partlen); 1046 //printf("mime->body = \"%s\"\n", mime->body); 1047 1048 return mime; 1049 } else { 1050 //printf("There is a boundary.\n"); 1051 } 1052 1053 partlen = 0; 1054 //printf("p = \"%s\"\n", p); 1055 mime->body = mime_sgetbound(mime->boundary, &p, str + len - 1, 1056 &partlen, &isend); 1057 mime->bodylen = partlen; 1058 if (isend) { 1059 /* 1060 * This is an end boundary at the beginning 1061 * of a multipart message. Abort. 1062 */ 1063 //die("End boundary at beginning of multipart.\n"); 1064 return mime; 1065 } 1066 if (mime->body == NULL) { 1067 //die("Could not find beginning MIME content.\n"); 1068 return mime; 1069 } 1070 //printf("mime->body = \"%s\"\n", mime->body); 1071 1072 for(;;) { 1073 partlen = 0; 1074 part = mime_sgetbound(mime->boundary, &p, str + len - 1, 1075 &partlen, &isend); 1076 //printf("part = \"%s\"\n", part); 1077 if (part == NULL) { 1078 /* 1079 * There maybe no ending boundary. Some e-mail 1080 * signing applications forget this. 1081 */ 1082 if (p < (str + len - 1)) { 1083 partlen = str - p + len; 1084 part = memdupz(p, partlen); 1085 p = str + len - 1; 1086 } else { 1087 break; 1088 } 1089 } 1090 1091 partm = mime_new(); 1092 partm = mime_parsebufintern(partm, part, partlen); 1093 if (partm != NULL) 1094 llist_addraw(mime->parts, NULL, partm, sizeof(partm)); 1095 free(part); 1096 1097 if (isend) 1098 break; 1099 } 1100 1101 return mime; 1102 } 1103 1104 mime_t * 1105 mime_parsebuf(char *str, int len) 1106 { 1107 mime_t *ret, *pret; 1108 1109 ret = mime_new(); 1110 pret = mime_parsebufintern(ret, str, len); 1111 if (pret == NULL) { 1112 mime_free(ret); 1113 return NULL; 1114 } 1115 1116 mime_mkpartids(ret); 1117 1118 return ret; 1119 } 1120 1121 char * 1122 mime_searchsplit(char *data, int klen) 1123 { 1124 char *p, *op; 1125 int incomment; 1126 1127 if (strlen(data) + klen <= 74) 1128 return NULL; 1129 1130 p = &data[73 - klen]; 1131 op = p; 1132 incomment = 0; 1133 1134 for (;;) { 1135 switch (p[0]) { 1136 case '"': 1137 case '\'': 1138 /* 1139 * This is meant to be broken. 1140 * It's just heuristics. 1141 */ 1142 incomment = !incomment; 1143 break; 1144 case ' ': 1145 case '\t': 1146 case '\f': 1147 case '\n': 1148 case '\r': 1149 if (incomment) 1150 break; 1151 return p; 1152 case '\0': 1153 return &data[73 - klen]; 1154 } 1155 1156 if (p == data) { 1157 p = op; 1158 op = NULL; 1159 continue; 1160 } 1161 1162 if (op != NULL) { 1163 p--; 1164 } else { 1165 p++; 1166 } 1167 } 1168 1169 return NULL; 1170 } 1171 1172 char * 1173 mime_printheader(llistelem_t *hdr) 1174 { 1175 char *buf, *sp, *osp; 1176 int blen, splen; 1177 1178 blen = 0; 1179 sp = mime_searchsplit((char *)hdr->data, strlen(hdr->key) + 2); 1180 if (sp != NULL) { 1181 buf = smprintf("%s: ", hdr->key); 1182 blen = strlen(buf); 1183 1184 buf = memdupcat(buf, blen, (char *)hdr->data, 1185 (sp - (char *)hdr->data)); 1186 blen += (sp - (char *)hdr->data); 1187 buf = memdupcat(buf, blen, "\r\n", 2); 1188 blen += 2; 1189 1190 for (osp = sp;; osp = sp) { 1191 sp = mime_searchsplit(osp, 8); 1192 if (sp == NULL) 1193 break; 1194 1195 buf = memdupcat(buf, blen, "\t", 1); 1196 blen += 1; 1197 buf = memdupcat(buf, blen, osp, (sp - osp)); 1198 blen += (sp - osp); 1199 buf = memdupcat(buf, blen, "\r\n", 2); 1200 blen += 2; 1201 } 1202 1203 if (strlen(osp) > 0) { 1204 buf = memdupcat(buf, blen, "\t", 1); 1205 blen += 1; 1206 splen = strlen(osp); 1207 buf = memdupcat(buf, blen, osp, splen); 1208 blen += splen; 1209 buf = memdupcat(buf, blen, "\r\n", 2); 1210 } 1211 } else { 1212 buf = smprintf("%s: %s\r\n", hdr->key, (char *)hdr->data); 1213 } 1214 1215 return buf; 1216 } 1217 1218 char * 1219 mime_printbuf(mime_t *mime, int *len) 1220 { 1221 llistelem_t *hdr; 1222 char *ret, *abuf; 1223 int rlen, alen; 1224 1225 rlen = 0; 1226 ret = NULL; 1227 1228 forllist(mime->hdrs, hdr) { 1229 abuf = mime_printheader(hdr); 1230 alen = strlen(abuf); 1231 1232 ret = memdupcat(ret, rlen, abuf, alen); 1233 rlen += alen; 1234 free(abuf); 1235 /* 1236 * TODO: Add part handling. 1237 */ 1238 } 1239 1240 ret = memdupcat(ret, rlen, "\r\n", 2); 1241 rlen += 2; 1242 1243 return ret; 1244 } 1245 1246 void 1247 printtabs(int depth) 1248 { 1249 for (; depth; depth--) 1250 printf("\t"); 1251 } 1252 1253 void 1254 mime_printintern(mime_t *mime, int depth) 1255 { 1256 llistelem_t *elem; 1257 1258 printtabs(depth); 1259 printf("partid: %s\n", mime->partid); 1260 printtabs(depth); 1261 printf("hdr:\n"); 1262 forllist(mime->hdrs, elem) { 1263 printtabs(depth); 1264 printf("%s = %s\n", elem->key, (char *)elem->data); 1265 } 1266 1267 printtabs(depth); 1268 printf("body:\n"); 1269 printtabs(depth); 1270 printf("%d\n", mime->bodylen); 1271 printf("%s", mime->body); 1272 1273 if (mime->parts->len > 0) { 1274 printtabs(depth); 1275 printf("parts:\n"); 1276 forllist(mime->parts, elem) 1277 mime_printintern((mime_t *)elem->data, depth+1); 1278 } 1279 } 1280 1281 void 1282 mime_print(mime_t *mime) 1283 { 1284 mime_printintern(mime, 0); 1285 } 1286 1287 char * 1288 mime_decodepartencoding(mime_t *mime, int *len) 1289 { 1290 char *ret; 1291 1292 //printf("ct = \"%s\"\n", mime->ct); 1293 //printf("cte = \"%s\"\n", mime->cte); 1294 ret = NULL; 1295 if (!strcasecmp(mime->cte, "base64")) { 1296 *len = mime->bodylen; 1297 ret = b64dec(mime->body, len); 1298 } else if (!strcasecmp(mime->cte, "quoted-printable")) { 1299 *len = mime->bodylen; 1300 ret = qpdec(mime->body, len, 0); 1301 } else if (!strncasecmp(mime->ct, "text/", 5)) { 1302 /* Convert CRLF to LF. */ 1303 *len = mime->bodylen; 1304 ret = dosdec(mime->body, len); 1305 } 1306 1307 if (ret == NULL && mime->body != NULL && mime->bodylen > 0) { 1308 *len = mime->bodylen; 1309 ret = memdupz(mime->body, mime->bodylen); 1310 } 1311 1312 return ret; 1313 } 1314 1315 char * 1316 mime_decodepart(mime_t *mime, int *len) 1317 { 1318 char *ret, *cret; 1319 1320 if (mime->bodylen == 0) { 1321 *len = 0; 1322 return memdupz("", 1); 1323 } 1324 1325 ret = mime_decodepartencoding(mime, len); 1326 if (ret == NULL) { 1327 *len = 0; 1328 return memdupz("", 1); 1329 } 1330 1331 if (strcasecmp(mime->cte, "binary")) { 1332 if (strcasecmp(mime->charset, "utf-8")) { 1333 cret = mime_iconv(ret, mime->charset, "UTF-8"); 1334 if (cret != NULL) { 1335 free(ret); 1336 ret = cret; 1337 } 1338 *len = strlen(ret); 1339 } 1340 } 1341 1342 return ret; 1343 } 1344 1345 char * 1346 mime_filename(mime_t *mime) 1347 { 1348 char *filename; 1349 llistelem_t *hdr, *name; 1350 llist_t *hdrp; 1351 1352 filename = NULL; 1353 1354 /* 1355 * 1.) The standard. 1356 */ 1357 hdr = llist_ciget(mime->hdrs, "Content-Disposition"); 1358 if (hdr != NULL && hdr->data != NULL) { 1359 hdrp = mime_parseheader((char *)hdr->data); 1360 if (hdrp != NULL) { 1361 name = llist_ciget(hdrp, "filename"); 1362 if (name != NULL && name->data != NULL) { 1363 filename = mime_guessheader( 1364 (char *)name->data); 1365 } 1366 llist_free(hdrp); 1367 } 1368 1369 if (filename != NULL) 1370 return filename; 1371 } 1372 1373 /* 1374 * 2.) The modern age. 1375 */ 1376 hdr = llist_ciget(mime->hdrs, "Content-Type"); 1377 if (hdr != NULL && hdr->data != NULL) { 1378 hdrp = mime_parseheader((char *)hdr->data); 1379 if (hdrp != NULL) { 1380 name = llist_ciget(hdrp, "name"); 1381 if (name != NULL && name->data != NULL) { 1382 filename = mime_guessheader( 1383 (char *)name->data); 1384 } 1385 llist_free(hdrp); 1386 } 1387 1388 if (filename != NULL) 1389 return filename; 1390 } 1391 1392 return NULL; 1393 } 1394 1395 1396 char * 1397 mime_mkfilename(char *id, mime_t *mime) 1398 { 1399 char *filename; 1400 llistelem_t *hdr; 1401 1402 filename = mime_filename(mime); 1403 if (filename != NULL) 1404 return filename; 1405 1406 /* 1407 * 3.) The ugly. 1408 */ 1409 hdr = llist_ciget(mime->hdrs, "Content-Description"); 1410 if (hdr != NULL && hdr->data != NULL) { 1411 filename = mime_guessheader((char *)hdr->data); 1412 if (filename != NULL) 1413 return filename; 1414 } 1415 1416 /* 1417 * 4.) Last resort. 1418 */ 1419 if (id == NULL) 1420 id = "000"; 1421 return smprintf("%s.%s.part", id, mime->partid); 1422 } 1423 1424 char * 1425 mime_mkboundary(void) 1426 { 1427 srand(time(NULL)); 1428 return smprintf("=--= _TUlNRSBTdWNrcyEK/%x_ =--=", rand()); 1429 } 1430