mime.c (26891B)
1 /* 2 * Copy me if you can. 3 * by 20h 4 */ 5 6 #include <unistd.h> 7 #include <stdio.h> 8 #include <stdlib.h> 9 #include <string.h> 10 #include <strings.h> 11 #include <ctype.h> 12 #include <iconv.h> 13 #include <errno.h> 14 #include <time.h> 15 16 #include "ind.h" 17 #include "llist.h" 18 #include "mime.h" 19 #include "parser.h" 20 #include "base64.h" 21 #include "quote.h" 22 #include "param.h" 23 #include "dos.h" 24 25 enum { 26 HEADER = 0x01, 27 HEADERVALUE, 28 }; 29 30 mime_t * 31 mime_new(void) 32 { 33 mime_t *part; 34 35 part = mallocz(sizeof(mime_t), 2); 36 part->hdrs = llist_new(); 37 part->parts = llist_new(); 38 part->state = HEADER; 39 40 return part; 41 } 42 43 void 44 mime_subfree(mime_t *mime, llistelem_t *elem) 45 { 46 forllist(mime->parts, elem) { 47 if (elem->key == NULL) 48 mime_free((mime_t *)elem->data); 49 elem->data = NULL; 50 } 51 llist_free(mime->parts); 52 } 53 54 void 55 mime_free(mime_t *mime) 56 { 57 mime_subfree(mime, NULL); 58 llist_free(mime->hdrs); 59 60 if (mime->body != NULL) 61 free(mime->body); 62 if (mime->partid != NULL) 63 free(mime->partid); 64 if (mime->ct != NULL) 65 free(mime->ct); 66 if (mime->cte != NULL) 67 free(mime->cte); 68 if (mime->charset != NULL) 69 free(mime->charset); 70 if (mime->boundary != NULL) 71 free(mime->boundary); 72 if (mime->rawhdrs != NULL) 73 free(mime->rawhdrs); 74 free(mime); 75 } 76 77 struct tm * 78 mime_parsedate(char *str) 79 { 80 struct tm tim; 81 82 memset(&tim, 0, sizeof(tim)); 83 if (strptime(str, "%a, %d %b %Y %T %z", &tim) != NULL) 84 return memdup(&tim, sizeof(tim)); 85 86 if (!strncmp(str, "Date: ", 6)) 87 str += 6; 88 89 /* 90 * Malformatted dates seen in the wild. 91 */ 92 if (strptime(str, "%a, %d %b %Y %T %Z", &tim) != NULL) 93 return memdup(&tim, sizeof(tim)); 94 95 if (strptime(str, "%d %b %Y %T %z", &tim) != NULL) 96 return memdup(&tim, sizeof(tim)); 97 98 if (strptime(str, "%a, %d %b %Y, %T %z", &tim) != NULL) 99 return memdup(&tim, sizeof(tim)); 100 101 if (strptime(str, "%d.%m.%Y", &tim) != NULL) 102 return memdup(&tim, sizeof(tim)); 103 104 return memdup(&tim, sizeof(tim)); 105 } 106 107 char * 108 mime_iconv(char *str, char *from, char *to) 109 { 110 iconv_t *ifd; 111 size_t left, avail, nconv; 112 char *outb, *strp, *outbp; 113 int olen, sd; 114 115 ifd = iconv_open(to, from); 116 if (ifd == (iconv_t)-1) 117 return NULL; 118 119 //printf("mime_iconv: '%s'; from='%s'\n", str, from); 120 121 left = strlen(str); 122 olen = left / 2; 123 avail = olen; 124 outb = mallocz(olen+1, 1); 125 outbp = outb; 126 strp = str; 127 for (;;) { 128 nconv = iconv(ifd, &strp, &left, &outbp, &avail); 129 if (nconv == (size_t)-1) { 130 if (errno == E2BIG) { 131 olen += 5; 132 sd = outbp - outb; 133 outb = reallocz(outb, olen+1, 0); 134 outbp = &outb[sd]; 135 avail += 5; 136 continue; 137 } 138 if (errno == EILSEQ || errno == EINVAL) 139 return NULL; 140 free(outb); 141 iconv_close(ifd); 142 return NULL; 143 } 144 break; 145 } 146 147 iconv_close(ifd); 148 if (outbp != NULL) 149 outbp[0] = '\0'; 150 return outb; 151 } 152 153 char * 154 mime_decodeheaderext(char *value) 155 { 156 char *work, *cret, *ret, *cs, *str, *enc, *ast, *dstr; 157 int len, slen; 158 159 len = strlen(value); 160 161 ret = memdupz(value, len); 162 work = memdupz(value, len); 163 164 if (!(work[0] == '=' && work[1] == '?' && work[len-1] == '=' 165 && work[len-2] == '?')) { 166 free(work); 167 return ret; 168 } 169 cs = &work[2]; 170 171 work[len-2] = '\0'; 172 enc = strchr(&work[2], '?'); 173 if (enc == NULL) { 174 free(work); 175 return ret; 176 } 177 enc[0] = '\0'; 178 enc++; 179 str = strchr(enc, '?'); 180 if (str == NULL) { 181 free(work); 182 return ret; 183 } 184 str[0] = '\0'; 185 str++; 186 187 /* 188 * RFC 2231 :( 189 * See: https://en.wikipedia.org/wiki/Mr._Mime 190 */ 191 ast = strchr(enc, '*'); 192 if (ast != NULL) 193 ast[0] = '\0'; 194 195 slen = strlen(str); 196 if (slen == 0) { 197 free(work); 198 free(ret); 199 return memdupz("", 1); 200 } 201 202 cret = NULL; 203 switch(enc[0]) { 204 case 'B': 205 case 'b': 206 cret = b64dec(str, &slen); 207 break; 208 case 'Q': 209 case 'q': 210 cret = qpdec(str, &slen, 1); 211 break; 212 } 213 214 //printf("mime_decodeheader: mime_iconv str='%s'; cret='%s';\n", str, cret); 215 if (cret != NULL) { 216 free(ret); 217 if (strcasecmp(cs, "utf-8")) { 218 dstr = mime_iconv(cret, cs, "UTF-8"); 219 if (dstr == NULL) { 220 str = smprintf("ERR(%s)", str); 221 } else { 222 str = dstr; 223 } 224 free(cret); 225 } else { 226 str = cret; 227 } 228 } else { 229 str = ret; 230 } 231 free(work); 232 233 return str; 234 } 235 236 int 237 mime_isextws(char *str, int len) 238 { 239 int i; 240 241 for (i = 0; i < len; i++) { 242 switch (str[i]) { 243 case '\n': 244 case '\r': 245 case ' ': 246 case '\t': 247 break; 248 default: 249 return 0; 250 } 251 } 252 return 1; 253 } 254 255 char * 256 mime_decodeheader(char *value) 257 { 258 char *work, *extp, *extw, *extb, *exte, *extr, *ret, *q1, *q2; 259 int vlen, rlen, elen, wasenc, i; 260 261 //printf("mime_decodeheader\n"); 262 ret = NULL; 263 rlen = 0; 264 vlen = strlen(value); 265 work = memdup(value, vlen+1); 266 extp = work; 267 wasenc = 0; 268 269 /* 270 * Avoid being tricked by malformed headers. 271 */ 272 for (i = 0; i < 32; i++) { 273 extb = strstr(extp, "=?"); 274 if (extb != NULL) { 275 elen = extb - extp; 276 if (extp != extb && (!wasenc || 277 !mime_isextws(extp, elen))) { 278 extw = memdupz(extp, elen); 279 ret = memdupcat(ret, rlen, extw, elen+1); 280 free(extw); 281 rlen += elen; 282 } 283 284 exte = NULL; 285 q1 = strchr(&extb[2], '?'); 286 if (q1 != NULL) { 287 q2 = strchr(&q1[1], '?'); 288 if (q2 != NULL) 289 exte = strstr(&q2[1], "?="); 290 } 291 if (exte != NULL) { 292 elen = &exte[2] - extb; 293 extw = memdupz(extb, elen); 294 extr = mime_decodeheaderext(extw); 295 free(extw); 296 elen = strlen(extr); 297 ret = memdupcat(ret, rlen, extr, elen+1); 298 rlen += elen; 299 free(extr); 300 extp = &exte[2]; 301 wasenc = 1; 302 continue; 303 } 304 } 305 break; 306 } 307 if ((extp - work) < vlen) 308 ret = memdupcat(ret, rlen, extp, strlen(extp)+1); 309 free(work); 310 311 /* Remove any space character, like newline. */ 312 if (ret != NULL) 313 strnormalizespace(ret); 314 315 return ret; 316 } 317 318 char *cstries[] = { 319 "utf-8", 320 "iso-8859-1", 321 "windows-1252", 322 "koi8", 323 "euc-jp" 324 "shift_jis", 325 "big5", 326 "iso-8859-15", 327 NULL 328 }; 329 330 char * 331 mime_guesscharset(char *str) 332 { 333 int i, eq; 334 char *itry; 335 336 for (i = 0; i < nelem(cstries); i++) { 337 itry = mime_iconv(str, cstries[i], cstries[i]); 338 if (itry == NULL) 339 continue; 340 eq = strcmp(str, itry); 341 free(itry); 342 if (!eq) 343 break; 344 } 345 346 return cstries[i]; 347 } 348 349 char * 350 mime_guessheader(char *value) 351 { 352 char *nvalue, *gcs; 353 354 gcs = NULL; 355 356 //printf("mime_guessheader '%s'\n", value); 357 358 nvalue = value; 359 if (!strisascii(value)) { 360 /* 361 * Guessing begins. Some major MUA developers did not read any 362 * RFCs. 363 */ 364 365 gcs = mime_guesscharset(value); 366 if (gcs != NULL) { 367 nvalue = mime_iconv(value, gcs, "utf-8"); 368 if (nvalue == NULL) { 369 nvalue = value; 370 gcs = NULL; 371 } 372 } 373 } 374 375 value = mime_decodeheader(nvalue); 376 if (gcs != NULL) 377 free(nvalue); 378 return value; 379 } 380 381 char * 382 mime_decodeparam(char *value) 383 { 384 char *work, *cret, *ret, *cs, *str, *lang, *dstr; 385 int len, slen; 386 387 len = strlen(value); 388 ret = memdup(value, len+1); 389 work = memdup(value, len+1); 390 391 cs = work; 392 lang = strchr(work, '\''); 393 if (lang == NULL) { 394 free(work); 395 return ret; 396 } 397 lang[0] = '\0'; 398 lang++; 399 str = strchr(lang, '\''); 400 if (str == NULL) { 401 free(work); 402 return ret; 403 } 404 str[0] = '\0'; 405 str++; 406 407 slen = strlen(str); 408 cret = paramdec(str, &slen); 409 410 if (cret != NULL) { 411 if (strcasecmp(cs, "utf-8")) { 412 free(ret); 413 dstr = mime_iconv(cret, cs, "UTF-8"); 414 if (dstr == NULL) { 415 str = smprintf("ERR(%s)", str); 416 } else { 417 str = dstr; 418 } 419 free(cret); 420 } else { 421 free(ret); 422 str = cret; 423 } 424 } else { 425 str = ret; 426 } 427 free(work); 428 429 return str; 430 } 431 432 char * 433 mime_encodestring(char *value) 434 { 435 char *b64, *ret; 436 437 if (strisascii(value)) 438 return memdups(value); 439 440 b64 = b64enc(value, strlen(value)); 441 ret = smprintf("=?UTF-8?b?%s?=", b64); 442 free(b64); 443 444 return ret; 445 } 446 447 char * 448 mime_encodeheader(char *header, char *value) 449 { 450 char *ret, *b64, *p, *mp, *str; 451 int hlen, lmax, isascii, firstline, slen; 452 453 isascii = 0; 454 455 /* 456 * RFC 2047: 457 * One encoded word should be at max. 75 characters. 458 * One encoded line is limited to 76 characters. 459 */ 460 hlen = strlen(header) + 2; 461 if (strisascii(value)) { 462 isascii = 1; 463 lmax = 75 - hlen; 464 } else { 465 lmax = 63 - hlen; 466 } 467 slen = strlen(value); 468 469 ret = NULL; 470 for (p = value, firstline = 1; slen > 0; slen -= lmax, p = mp) { 471 if (firstline) { 472 lmax += hlen; 473 firstline = 0; 474 } 475 476 mp = findlimitws(p, lmax); 477 if (mp == NULL) { 478 str = memdupz(p, slen); 479 } else { 480 str = memdupz(p, mp - p); 481 } 482 483 if (!isascii) { 484 b64 = b64enc(str, strlen(str)); 485 free(str); 486 mp = smprintf("=?UTF-8?b?%s?=", b64); 487 free(b64); 488 str = mp; 489 } 490 491 if (ret != NULL) { 492 mp = smprintf("%s %s", ret, str); 493 free(ret); 494 ret = mp; 495 } else { 496 ret = smprintf("%s", str); 497 } 498 } 499 500 return ret; 501 } 502 503 int 504 mime_paramsort(llistelem_t *elem1, llistelem_t *elem2) 505 { 506 int a, b; 507 char *n1, *n2; 508 509 n1 = strrchr(elem1->key, '*'); 510 if (n1 == NULL) 511 a = -1; 512 else 513 a = atoi(&n1[1]); 514 n2 = strrchr(elem2->key, '*'); 515 if (n2 == NULL) 516 b = -1; 517 else 518 b = atoi(&n2[1]); 519 520 return a - b; 521 } 522 523 /* 524 * Order and concatenate ordered params. 525 */ 526 llist_t * 527 mime_sanitizeparams(llist_t *params) 528 { 529 llistelem_t *param, *hit, *nparam; 530 llist_t *reorder, *hits; 531 char *nvalue, *dvalue; 532 int i, n; 533 //char *key; 534 //int klen; 535 536 reorder = llist_new(); 537 //printf("mime_sanitizeparams: start\n"); 538 n = 0; 539 forllist(params, param) { 540 if (n == 0) { 541 //printf("first key: %s\n", param->key); 542 n++; 543 continue; 544 } 545 546 //key = param->key; 547 //printf("key = %s\n", key); 548 //klen = strlen(key); 549 550 nvalue = strrchr(param->key, '*'); 551 if (nvalue == NULL) 552 continue; 553 for (i = 1; nvalue[i]; i++) 554 if (!isdigit(nvalue[i])) 555 break; 556 if (nvalue[i]) 557 continue; 558 //printf("nvalue = %s\n", nvalue); 559 560 dvalue = mime_decodeparam((char *)param->data); 561 if (dvalue != NULL) { 562 //printf("decoded: %s\n", dvalue); 563 free(param->data); 564 param->data = dvalue; 565 param->datalen = strlen(dvalue)+1; 566 } 567 568 nvalue[0] = '\0'; 569 //printf("key after = %s\n", key); 570 if (llist_get(reorder, param->key) != NULL) 571 llist_add(reorder, param->key, NULL, 0); 572 nvalue[0] = '*'; 573 } 574 575 /* 576 * Sort and concatenate the return list. 577 */ 578 forllist(reorder, param) { 579 hits = llist_new(); 580 forllist(params, nparam) { 581 if (!strncmp(nparam->key, param->key, 582 strlen(param->key))) { 583 //printf("nparam->key = %s\n", nparam->key); 584 llist_add(hits, nparam->key, nparam->data, 585 nparam->datalen); 586 } 587 } 588 if (hits->len < 1) { 589 llist_free(hits); 590 continue; 591 } 592 593 nparam = llistelem_new(param->key, NULL, 0); 594 hits = llist_internsort(hits, mime_paramsort); 595 596 forllist(hits, hit) { 597 nparam->data = memdupcat(nparam->data, 598 nparam->datalen, hit->data, 599 hit->datalen); 600 nparam->datalen += hit->datalen-1; 601 } 602 603 params = llist_listdel(params, hits); 604 llist_free(hits); 605 llist_addelem(params, nparam); 606 } 607 llist_free(reorder); 608 609 return params; 610 } 611 612 llist_t * 613 mime_parseheader(char *field) 614 { 615 char *tok, *buf, *key, *value, *sep, *eq, quot; 616 llist_t *ret; 617 int tlen; 618 619 buf = memdups(field); 620 621 tok = buf; 622 ret = llist_new(); 623 //printf("mime_parseheader: buf = '%s'\n", buf); 624 while (tok[0] != '\0') { 625 key = NULL; 626 value = NULL; 627 628 /* 629 * 0.) Sanitize the beginning and the end. 630 */ 631 while (isspace(tok[0])) 632 tok++; 633 tlen = strlen(tok); 634 while (isspace(tok[tlen-1])) { 635 tok[tlen-1] = '\0'; 636 tlen--; 637 } 638 //printf("mime_parseheader: after sanitize: tok = '%s'\n", tok); 639 640 /* 641 * 1.) ([\t\r\v\f ]*)key 642 */ 643 key = tok + strspn(tok, "\t\r\v\f "); 644 //printf("mime_parseheader: key = '%s'\n", tok); 645 646 /* 647 * 2.) key 648 */ 649 tok = key + strcspn(key, "\t\r\v\f =;"); 650 if (tok[0] == ';' || tok[0] == '\0') { 651 quot = tok[0]; 652 tok[0] = '\0'; 653 if (strlen(key) > 0) { 654 //printf("mime_parseheader: add key '%s'\n", key); 655 llist_add(ret, key, NULL, 0); 656 } 657 if (quot != '\0') 658 tok++; 659 continue; 660 } 661 662 //printf("mime_parseheader: tok = '%s'\n", tok); 663 if (tok[0] == '=') { 664 eq = tok; 665 } else { 666 /* 667 * 3.) key([\t\r\v\f ]*)= 668 */ 669 tok[0] = '\0'; 670 eq = tok + 1 + strspn(tok+1, "\t\r\v\f ;"); 671 if (eq[0] == ';') { 672 if (strlen(key) > 0) 673 llist_add(ret, key, NULL, 0); 674 tok++; 675 continue; 676 } 677 678 if (eq[0] != '=') { 679 /* 680 * 3.1.) key; 681 */ 682 if (strlen(key) > 0) 683 llist_add(ret, key, NULL, 0); 684 tok++; 685 continue; 686 } 687 } 688 tok[0] = '\0'; 689 /* 690 * 4.) key=([\t\r\v\f ]*)("|)value 691 */ 692 tok = eq + 1 + strspn(eq+1, "\t\r\v\f "); 693 switch (tok[0]) { 694 case '"': 695 case '\'': 696 quot = tok[0]; 697 for (sep = tok+1; sep[0] != '\0'; sep++) { 698 if (sep[0] == quot) { 699 sep[0] = '\0'; 700 sep++; 701 break; 702 } 703 if (sep[0] == '\\' && sep[1] != '\0') 704 memmove(&sep[1], sep, strlen(&sep[1])); 705 } 706 value = &tok[1]; 707 tok = sep; 708 709 sep = tok + strcspn(tok, ";"); 710 if (sep[0] == ';') { 711 tok = sep + 1; 712 } else { 713 tok = sep; 714 } 715 break; 716 default: 717 /* 718 * 4.1.) value 719 */ 720 value = tok; 721 sep = tok + strcspn(tok, "\t\r\v\f ;"); 722 if (sep[0] == ';') { 723 sep[0] = '\0'; 724 tok = sep + 1; 725 } else { 726 tok = sep; 727 } 728 break; 729 } 730 731 //printf("mime_parseheader: add %s = '%s'\n", key, value); 732 llist_add(ret, key, value, strlen(value)+1); 733 } 734 free(buf); 735 736 //printf("ret->len = %d\n", ret->len); 737 if (ret->len > 0) 738 return mime_sanitizeparams(ret); 739 740 llist_free(ret); 741 return NULL; 742 } 743 744 void 745 mime_mkpartidsintern(mime_t *mime, char *sect, int pid) 746 { 747 llistelem_t *part; 748 749 mime->partid = smprintf("%s%d", sect, pid); 750 sect = smprintf("%s.", mime->partid); 751 752 pid = 1; 753 forllist(mime->parts, part) { 754 mime_mkpartidsintern((mime_t *)part->data, sect, pid); 755 pid++; 756 } 757 free(sect); 758 } 759 760 void 761 mime_mkpartids(mime_t *mime) 762 { 763 int pid; 764 llistelem_t *part; 765 766 mime->partid = memdupz("0", 1); 767 pid = 1; 768 forllist(mime->parts, part) 769 mime_mkpartidsintern((mime_t *)part->data, "", pid++); 770 } 771 772 /* 773 * This functions searches for the next boundary occurence. It will 774 * return *choice = 1, if it was an end boundary; otherwise 0. 775 */ 776 char * 777 mime_sgetbound(char *bound, char **p, char *max, int *len, int *choice) 778 { 779 char *ret, *op; 780 int slen, isenl, isend, sublen; 781 782 ret = NULL; 783 784 //printf("bound = '%s'\n", bound); 785 //printf("p = '%s'\n", *p); 786 slen = strlen(bound); 787 *choice = 0; 788 isenl = 0; 789 isend = 0; 790 sublen = 0; 791 792 for (;;) { 793 op = memmem(*p, (max-(*p)), bound, slen); 794 if (op == NULL) 795 return ret; 796 797 if (!strncmp(op+slen, "--", 2)) { 798 isend = 1; 799 if (op[slen+2] == '\n') 800 isenl = 1; 801 } else if (op[slen] == '\n') { 802 isenl = 1; 803 } 804 //printf("isenl = %d, isend = %d\n", isenl, isend); 805 806 if (op == *p) 807 break; 808 809 if (op > (*p + 1) && op[-2] == '\r' && op[-1] == '\n') 810 sublen = 2; 811 if (op > *p && op[-2] != '\r' && op[-1] == '\n') 812 sublen = 1; 813 //printf("sublen = %d\n", sublen); 814 break; 815 } 816 817 if (isend) { 818 *choice = 1; 819 slen += 2; 820 } 821 822 *len = op - *p - sublen; 823 ret = memdupz(*p, *len); 824 825 *p = op + slen + (isend * 2) + (2 - isenl); 826 827 //printf("p = '%s'\n", *p); 828 829 return ret; 830 } 831 832 mime_t * 833 mime_preparepart(mime_t *mime) 834 { 835 llistelem_t *hdr, *field; 836 llist_t *hdrf; 837 838 //printf("mime = %p\n", mime); 839 hdr = llist_ciget(mime->hdrs, "content-type"); 840 if (hdr != NULL && hdr->data != NULL && strlen(hdr->data) > 0) { 841 //printf("content-type: %s\n", (char *)hdr->data); 842 hdrf = mime_parseheader(hdr->data); 843 //printf("hdrf = %p\n", hdrf); 844 //printf("%s\n", hdrf->first->key); 845 if (hdrf != NULL) { 846 if (!strncasecmp(hdrf->first->key, "multipart", 9)) { 847 //printf("is multipart\n"); 848 field = llist_ciget(hdrf, "boundary"); 849 if (field == NULL) { 850 return NULL; 851 //die("Could not find boundary " 852 // "in multipart!\n"); 853 } 854 mime->boundary = smprintf("--%s", 855 (char *)field->data); 856 //printf("boundary: \"%s\"\n", mime->boundary); 857 } 858 mime->ct = memdups(hdrf->first->key); 859 860 field = llist_ciget(hdrf, "charset"); 861 if (field != NULL && field->data != NULL) { 862 mime->charset = memdupz(field->data, 863 field->datalen); 864 } 865 866 llist_free(hdrf); 867 } 868 } 869 870 if (mime->ct == NULL) 871 mime->ct = memdupz("text/plain", 10); 872 //printf("mime->ct = %s\n", mime->ct); 873 if (mime->charset == NULL) 874 mime->charset = memdupz("iso8859-1", 9); 875 //printf("mime->charset = %s\n", mime->charset); 876 877 hdr = llist_ciget(mime->hdrs, "Content-Transfer-Encoding"); 878 if (hdr != NULL && hdr->data != NULL) { 879 mime->cte = memdupz(hdr->data, hdr->datalen); 880 } else { 881 mime->cte = memdupz("7bit", 4); 882 } 883 //printf("mime->cte = %s\n", mime->cte); 884 885 return mime; 886 } 887 888 mime_t * 889 mime_parsebufintern(mime_t *mime, char *str, int len) 890 { 891 int i, partlen, isend, blen; 892 char *p, *rp, buf[1025], *key, *value, *tvalue, *part; 893 llistelem_t *hdr; 894 mime_t *partm; 895 896 rp = str; 897 p = str; 898 for (; (rp = sgets(buf, sizeof(buf)-1, &p));) { 899 blen = strlen(buf); 900 if (buf[blen-1] == '\r') 901 buf[blen-1] = '\0'; 902 //printf("line '%s'\n", buf); 903 904 switch (mime->state) { 905 case HEADERVALUE: 906 switch (buf[0]) { 907 case ' ': 908 case '\t': 909 case '\r': 910 case '\f': 911 case '\v': 912 //printf("hdrvalue: %s (%d)\n", buf, 913 // (int)strlen(buf)); 914 /* 915 * " value" 916 */ 917 sscanf(buf, "%*[ \t\r\v\f]%1024m[^\n]", 918 &value); 919 if (value != NULL && hdr != NULL) { 920 if (hdr->data != NULL) { 921 part = memdup(value, strlen(value)+1); 922 923 /* Adding a space. */ 924 hdr->data = memdupcat(hdr->data, 925 hdr->datalen-1, 926 " ", 1); 927 hdr->datalen++; 928 929 /* Adding the next line. */ 930 i = strlen(part); 931 key = memdupcat(hdr->data, 932 hdr->datalen-1, 933 part, i+1); 934 free(part); 935 hdr->data = key; 936 hdr->datalen += i; 937 //printf("%s = %s\n", hdr->key, 938 // (char *)hdr->data); 939 } 940 free(value); 941 } 942 goto mimeparsebufagain; 943 default: 944 break; 945 } 946 947 if (hdr != NULL) 948 hdr = NULL; 949 mime->state = HEADER; 950 /* FALL THROUGH: No header value found. */ 951 case HEADER: 952 //printf("hdr: %s\n", buf); 953 954 /* 955 * End of headers. 956 */ 957 if (strlen(buf) == 0) { 958 //printf("end of headers '%c' + '%c'\n", p[0], p[1]); 959 /* 960 * Heuristics for ugly e-mail generators 961 * follow. 962 */ 963 /* 964 * Does the line begin with "--"? Looks 965 * like a boundary. Go to next body part. 966 */ 967 /* 968 if (p[0] == '-' && p[1] == '-') { 969 mime->rawhdrs = memdupz(str, (p - str)); 970 mime->rawhdrslen = p - str; 971 goto mimeparsebufbodyparse; 972 }*/ 973 /* 974 * TODO: Find Apple and Google developers 975 * and teach them how to not do this. 976 * Does the line have some "header: value\n" 977 * form? Go on parsing headers. 978 */ 979 /*for (key = p; key[0] != '\n'; key++) { 980 //printf("key[0] = '%c'\n", key[0]); 981 if (key[0] == ':' && key[1] == ' ') 982 break; 983 if (key[0] == ' ') { 984 mime->rawhdrs = memdupz(str, (p - str)); 985 mime->rawhdrslen = p - str; 986 goto mimeparsebufbodyparse; 987 } 988 }*/ 989 /* 990 * A line simply ended with no header. 991 * That is suspicious. 992 */ 993 /* 994 if (key[0] == '\n') { 995 mime->rawhdrs = memdupz(str, (p - str)); 996 mime->rawhdrslen = p - str; 997 goto mimeparsebufbodyparse; 998 }*/ 999 mime->rawhdrs = memdupz(str, (p - str)); 1000 mime->rawhdrslen = p - str; 1001 goto mimeparsebufbodyparse; 1002 } 1003 1004 /* 1005 * "key: value" 1006 */ 1007 key = NULL; 1008 value = NULL; 1009 tvalue = NULL; 1010 sscanf(buf, "%1024m[^: \t\r\v\f]:" 1011 "%1024m[^\n]", &key, &value); 1012 if (value == NULL) 1013 value = memdupz(" ", 2); 1014 //printf("%s = %s\n", key, value); 1015 if (key != NULL && value != NULL) { 1016 tvalue = value + strspn(value, 1017 " \t\r\v\f"); 1018 hdr = llistelem_new(key, tvalue, 1019 strlen(tvalue)+1); 1020 llist_addelem(mime->hdrs, hdr); 1021 mime->state = HEADERVALUE; 1022 } 1023 if (key != NULL) 1024 free(key); 1025 if (value != NULL) 1026 free(value); 1027 break; 1028 default: 1029 mimeparsebufagain: 1030 break; 1031 } 1032 } 1033 //printf("return mime_preparepart\n"); 1034 return mime_preparepart(mime); 1035 1036 mimeparsebufbodyparse: 1037 //printf("body parsing begins.\n"); 1038 mime = mime_preparepart(mime); 1039 if (mime == NULL) 1040 return NULL; 1041 1042 /* 1043 * It is not a multipart message, so take the remainder 1044 * of the given message. 1045 */ 1046 if (mime->boundary == NULL) { 1047 //printf("No boundary there. Taking the remainder.\n"); 1048 partlen = str - p + len; 1049 mime->body = memdupz(p, partlen); 1050 mime->bodylen = partlen; 1051 //printf("strlen = %ld; partlen = %d;\n", strlen(mime->body), 1052 // partlen); 1053 //printf("mime->body = \"%s\"\n", mime->body); 1054 1055 return mime; 1056 } else { 1057 //printf("There is a boundary.\n"); 1058 } 1059 1060 partlen = 0; 1061 //printf("p = \"%s\"\n", p); 1062 mime->body = mime_sgetbound(mime->boundary, &p, str + len - 1, 1063 &partlen, &isend); 1064 mime->bodylen = partlen; 1065 if (isend) { 1066 /* 1067 * This is an end boundary at the beginning 1068 * of a multipart message. Abort. 1069 */ 1070 //die("End boundary at beginning of multipart.\n"); 1071 return mime; 1072 } 1073 if (mime->body == NULL) { 1074 //die("Could not find beginning MIME content.\n"); 1075 return mime; 1076 } 1077 //printf("mime->body = \"%s\"\n", mime->body); 1078 1079 for(;;) { 1080 partlen = 0; 1081 part = mime_sgetbound(mime->boundary, &p, str + len - 1, 1082 &partlen, &isend); 1083 //printf("part = \"%s\"\n", part); 1084 if (part == NULL) { 1085 /* 1086 * There maybe no ending boundary. Some e-mail 1087 * signing applications forget this. 1088 */ 1089 if (p < (str + len - 1)) { 1090 partlen = str - p + len; 1091 part = memdupz(p, partlen); 1092 p = str + len - 1; 1093 } else { 1094 break; 1095 } 1096 } 1097 1098 partm = mime_new(); 1099 partm = mime_parsebufintern(partm, part, partlen); 1100 if (partm != NULL) 1101 llist_addraw(mime->parts, NULL, partm, sizeof(partm)); 1102 free(part); 1103 1104 if (isend) 1105 break; 1106 } 1107 1108 return mime; 1109 } 1110 1111 mime_t * 1112 mime_parsebuf(char *str, int len) 1113 { 1114 mime_t *ret, *pret; 1115 1116 ret = mime_new(); 1117 pret = mime_parsebufintern(ret, str, len); 1118 if (pret == NULL) { 1119 mime_free(ret); 1120 return NULL; 1121 } 1122 1123 mime_mkpartids(ret); 1124 1125 return ret; 1126 } 1127 1128 char * 1129 mime_searchsplit(char *data, int klen) 1130 { 1131 char *p, *op; 1132 int incomment; 1133 1134 if (strlen(data) + klen <= 74) 1135 return NULL; 1136 1137 p = &data[73 - klen]; 1138 op = p; 1139 incomment = 0; 1140 1141 for (;;) { 1142 switch (p[0]) { 1143 case '"': 1144 case '\'': 1145 /* 1146 * This is meant to be broken. 1147 * It's just heuristics. 1148 */ 1149 incomment = !incomment; 1150 break; 1151 case ' ': 1152 case '\t': 1153 case '\f': 1154 case '\n': 1155 case '\r': 1156 if (incomment) 1157 break; 1158 return p; 1159 case '\0': 1160 return &data[73 - klen]; 1161 } 1162 1163 if (p == data) { 1164 p = op; 1165 op = NULL; 1166 continue; 1167 } 1168 1169 if (op != NULL) { 1170 p--; 1171 } else { 1172 p++; 1173 } 1174 } 1175 1176 return NULL; 1177 } 1178 1179 char * 1180 mime_printheader(llistelem_t *hdr) 1181 { 1182 char *buf, *sp, *osp; 1183 int blen, splen; 1184 1185 blen = 0; 1186 sp = mime_searchsplit((char *)hdr->data, strlen(hdr->key) + 2); 1187 if (sp != NULL) { 1188 buf = smprintf("%s: ", hdr->key); 1189 blen = strlen(buf); 1190 1191 buf = memdupcat(buf, blen, (char *)hdr->data, 1192 (sp - (char *)hdr->data)); 1193 blen += (sp - (char *)hdr->data); 1194 buf = memdupcat(buf, blen, "\r\n", 2); 1195 blen += 2; 1196 1197 for (osp = sp;; osp = sp) { 1198 sp = mime_searchsplit(osp, 8); 1199 if (sp == NULL) 1200 break; 1201 1202 buf = memdupcat(buf, blen, "\t", 1); 1203 blen += 1; 1204 buf = memdupcat(buf, blen, osp, (sp - osp)); 1205 blen += (sp - osp); 1206 buf = memdupcat(buf, blen, "\r\n", 2); 1207 blen += 2; 1208 } 1209 1210 if (strlen(osp) > 0) { 1211 buf = memdupcat(buf, blen, "\t", 1); 1212 blen += 1; 1213 splen = strlen(osp); 1214 buf = memdupcat(buf, blen, osp, splen); 1215 blen += splen; 1216 buf = memdupcat(buf, blen, "\r\n", 2); 1217 } 1218 } else { 1219 buf = smprintf("%s: %s\r\n", hdr->key, (char *)hdr->data); 1220 } 1221 1222 return buf; 1223 } 1224 1225 char * 1226 mime_printbuf(mime_t *mime, int *len) 1227 { 1228 llistelem_t *hdr; 1229 char *ret, *abuf; 1230 int rlen, alen; 1231 1232 rlen = 0; 1233 ret = NULL; 1234 1235 forllist(mime->hdrs, hdr) { 1236 abuf = mime_printheader(hdr); 1237 alen = strlen(abuf); 1238 1239 ret = memdupcat(ret, rlen, abuf, alen); 1240 rlen += alen; 1241 free(abuf); 1242 /* 1243 * TODO: Add part handling. 1244 */ 1245 } 1246 1247 ret = memdupcat(ret, rlen, "\r\n", 2); 1248 rlen += 2; 1249 1250 return ret; 1251 } 1252 1253 void 1254 printtabs(int depth) 1255 { 1256 for (; depth; depth--) 1257 printf("\t"); 1258 } 1259 1260 void 1261 mime_printintern(mime_t *mime, int depth) 1262 { 1263 llistelem_t *elem; 1264 1265 printtabs(depth); 1266 printf("partid: %s\n", mime->partid); 1267 printtabs(depth); 1268 printf("hdr:\n"); 1269 forllist(mime->hdrs, elem) { 1270 printtabs(depth); 1271 printf("%s = %s\n", elem->key, (char *)elem->data); 1272 } 1273 1274 printtabs(depth); 1275 printf("body:\n"); 1276 printtabs(depth); 1277 printf("%d\n", mime->bodylen); 1278 printf("%s", mime->body); 1279 1280 if (mime->parts->len > 0) { 1281 printtabs(depth); 1282 printf("parts:\n"); 1283 forllist(mime->parts, elem) 1284 mime_printintern((mime_t *)elem->data, depth+1); 1285 } 1286 } 1287 1288 void 1289 mime_print(mime_t *mime) 1290 { 1291 mime_printintern(mime, 0); 1292 } 1293 1294 char * 1295 mime_decodepartencoding(mime_t *mime, int *len) 1296 { 1297 char *ret; 1298 1299 //printf("ct = \"%s\"\n", mime->ct); 1300 //printf("cte = \"%s\"\n", mime->cte); 1301 ret = NULL; 1302 if (!strcasecmp(mime->cte, "base64")) { 1303 *len = mime->bodylen; 1304 ret = b64dec(mime->body, len); 1305 } else if (!strcasecmp(mime->cte, "quoted-printable")) { 1306 *len = mime->bodylen; 1307 ret = qpdec(mime->body, len, 0); 1308 } else if (!strncasecmp(mime->ct, "text/", 5)) { 1309 /* Convert CRLF to LF. */ 1310 *len = mime->bodylen; 1311 ret = dosdec(mime->body, len); 1312 } 1313 1314 if (ret == NULL && mime->body != NULL && mime->bodylen > 0) { 1315 *len = mime->bodylen; 1316 ret = memdupz(mime->body, mime->bodylen); 1317 } 1318 1319 return ret; 1320 } 1321 1322 char * 1323 mime_decodepart(mime_t *mime, int *len) 1324 { 1325 char *ret, *cret; 1326 1327 if (mime->bodylen == 0) { 1328 *len = 0; 1329 return memdupz("", 1); 1330 } 1331 1332 ret = mime_decodepartencoding(mime, len); 1333 if (ret == NULL) { 1334 *len = 0; 1335 return memdupz("", 1); 1336 } 1337 1338 if (strcasecmp(mime->cte, "binary")) { 1339 if (strcasecmp(mime->charset, "utf-8")) { 1340 cret = mime_iconv(ret, mime->charset, "UTF-8"); 1341 if (cret != NULL) { 1342 free(ret); 1343 ret = cret; 1344 } 1345 *len = strlen(ret); 1346 } 1347 } 1348 1349 return ret; 1350 } 1351 1352 char * 1353 mime_filename(mime_t *mime) 1354 { 1355 char *filename; 1356 llistelem_t *hdr, *name; 1357 llist_t *hdrp; 1358 1359 filename = NULL; 1360 1361 /* 1362 * 1.) The standard. 1363 */ 1364 hdr = llist_ciget(mime->hdrs, "Content-Disposition"); 1365 if (hdr != NULL && hdr->data != NULL) { 1366 hdrp = mime_parseheader((char *)hdr->data); 1367 if (hdrp != NULL) { 1368 name = llist_ciget(hdrp, "filename"); 1369 if (name != NULL && name->data != NULL) { 1370 filename = mime_guessheader( 1371 (char *)name->data); 1372 } 1373 llist_free(hdrp); 1374 } 1375 1376 if (filename != NULL) 1377 return filename; 1378 } 1379 1380 /* 1381 * 2.) The modern age. 1382 */ 1383 hdr = llist_ciget(mime->hdrs, "Content-Type"); 1384 if (hdr != NULL && hdr->data != NULL) { 1385 hdrp = mime_parseheader((char *)hdr->data); 1386 if (hdrp != NULL) { 1387 name = llist_ciget(hdrp, "name"); 1388 if (name != NULL && name->data != NULL) { 1389 filename = mime_guessheader( 1390 (char *)name->data); 1391 } 1392 llist_free(hdrp); 1393 } 1394 1395 if (filename != NULL) 1396 return filename; 1397 } 1398 1399 return NULL; 1400 } 1401 1402 1403 char * 1404 mime_mkfilename(char *id, mime_t *mime) 1405 { 1406 char *filename; 1407 llistelem_t *hdr; 1408 1409 filename = mime_filename(mime); 1410 if (filename != NULL) 1411 return filename; 1412 1413 /* 1414 * 3.) The ugly. 1415 */ 1416 hdr = llist_ciget(mime->hdrs, "Content-Description"); 1417 if (hdr != NULL && hdr->data != NULL) { 1418 filename = mime_guessheader((char *)hdr->data); 1419 if (filename != NULL) 1420 return filename; 1421 } 1422 1423 /* 1424 * 4.) Last resort. 1425 */ 1426 if (id == NULL) 1427 id = "000"; 1428 return smprintf("%s.%s.part", id, mime->partid); 1429 } 1430 1431 char * 1432 mime_mkboundary(void) 1433 { 1434 srand(time(NULL)); 1435 return smprintf("=--= _TUlNRSBTdWNrcyEK/%x_ =--=", rand()); 1436 } 1437