url.c (35755B)
1 /*************************************************************************** 2 * _ _ ____ _ 3 * Project ___| | | | _ \| | 4 * / __| | | | |_) | | 5 * | (__| |_| | _ <| |___ 6 * \___|\___/|_| \_\_____| 7 * 8 * Copyright (C) 1998 - 2018, Daniel Stenberg, <daniel@haxx.se>, et al. 9 * 10 * This software is licensed as described in the file COPYING, which 11 * you should have received as part of this distribution. The terms 12 * are also available at https://curl.haxx.se/docs/copyright.html. 13 * 14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell 15 * copies of the Software, and permit persons to whom the Software is 16 * furnished to do so, under the terms of the COPYING file. 17 * 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19 * KIND, either express or implied. 20 * 21 ***************************************************************************/ 22 23 #define MAX_SCHEME_LEN 8 24 25 #include <assert.h> 26 #include <ctype.h> 27 #include <stdarg.h> 28 #include <stdbool.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <strings.h> 33 #include "escape.h" 34 #include <gmni/url.h> 35 36 /* Provided by gmni */ 37 static char * 38 aprintf(const char *fmt, ...) 39 { 40 va_list ap; 41 va_start(ap, fmt); 42 int n = vsnprintf(NULL, 0, fmt, ap); 43 va_end(ap); 44 45 char *strp = calloc(n + 1, 1); 46 assert(strp); 47 48 va_start(ap, fmt); 49 n = vsnprintf(strp, n + 1, fmt, ap); 50 va_end(ap); 51 return strp; 52 } 53 54 /* via lib/dotdot.c */ 55 char *Curl_dedotdotify(const char *input) 56 { 57 size_t inlen = strlen(input); 58 char *clone; 59 size_t clen = inlen; /* the length of the cloned input */ 60 char *out = malloc(inlen + 1); 61 char *outptr; 62 char *orgclone; 63 char *queryp; 64 if(!out) 65 return NULL; /* out of memory */ 66 67 *out = 0; /* zero terminates, for inputs like "./" */ 68 69 /* get a cloned copy of the input */ 70 clone = strdup(input); 71 if(!clone) { 72 free(out); 73 return NULL; 74 } 75 orgclone = clone; 76 outptr = out; 77 78 if(!*clone) { 79 /* zero length string, return that */ 80 free(out); 81 return clone; 82 } 83 84 /* 85 * To handle query-parts properly, we must find it and remove it during the 86 * dotdot-operation and then append it again at the end to the output 87 * string. 88 */ 89 queryp = strchr(clone, '?'); 90 if(queryp) 91 *queryp = 0; 92 93 do { 94 95 /* A. If the input buffer begins with a prefix of "../" or "./", then 96 remove that prefix from the input buffer; otherwise, */ 97 98 if(!strncmp("./", clone, 2)) { 99 clone += 2; 100 clen -= 2; 101 } 102 else if(!strncmp("../", clone, 3)) { 103 clone += 3; 104 clen -= 3; 105 } 106 107 /* B. if the input buffer begins with a prefix of "/./" or "/.", where 108 "." is a complete path segment, then replace that prefix with "/" in 109 the input buffer; otherwise, */ 110 else if(!strncmp("/./", clone, 3)) { 111 clone += 2; 112 clen -= 2; 113 } 114 else if(!strcmp("/.", clone)) { 115 clone[1]='/'; 116 clone++; 117 clen -= 1; 118 } 119 120 /* C. if the input buffer begins with a prefix of "/../" or "/..", where 121 ".." is a complete path segment, then replace that prefix with "/" in 122 the input buffer and remove the last segment and its preceding "/" (if 123 any) from the output buffer; otherwise, */ 124 125 else if(!strncmp("/../", clone, 4)) { 126 clone += 3; 127 clen -= 3; 128 /* remove the last segment from the output buffer */ 129 while(outptr > out) { 130 outptr--; 131 if(*outptr == '/') 132 break; 133 } 134 *outptr = 0; /* zero-terminate where it stops */ 135 } 136 else if(!strcmp("/..", clone)) { 137 clone[2]='/'; 138 clone += 2; 139 clen -= 2; 140 /* remove the last segment from the output buffer */ 141 while(outptr > out) { 142 outptr--; 143 if(*outptr == '/') 144 break; 145 } 146 *outptr = 0; /* zero-terminate where it stops */ 147 } 148 149 /* D. if the input buffer consists only of "." or "..", then remove 150 that from the input buffer; otherwise, */ 151 152 else if(!strcmp(".", clone) || !strcmp("..", clone)) { 153 *clone = 0; 154 *out = 0; 155 } 156 157 else { 158 /* E. move the first path segment in the input buffer to the end of 159 the output buffer, including the initial "/" character (if any) and 160 any subsequent characters up to, but not including, the next "/" 161 character or the end of the input buffer. */ 162 163 do { 164 *outptr++ = *clone++; 165 clen--; 166 } while(*clone && (*clone != '/')); 167 *outptr = 0; 168 } 169 170 } while(*clone); 171 172 if(queryp) { 173 size_t qlen; 174 /* There was a query part, append that to the output. The 'clone' string 175 may now have been altered so we copy from the original input string 176 from the correct index. */ 177 size_t oindex = queryp - orgclone; 178 qlen = strlen(&input[oindex]); 179 memcpy(outptr, &input[oindex], qlen + 1); /* include the end zero byte */ 180 } 181 182 free(orgclone); 183 return out; 184 } 185 186 /* via lib/url.c */ 187 CURLcode Curl_parse_login_details(const char *login, const size_t len, 188 char **userp, char **passwdp, 189 char **optionsp) 190 { 191 CURLcode result = CURLE_OK; 192 char *ubuf = NULL; 193 char *pbuf = NULL; 194 char *obuf = NULL; 195 const char *psep = NULL; 196 const char *osep = NULL; 197 size_t ulen; 198 size_t plen; 199 size_t olen; 200 201 /* Attempt to find the password separator */ 202 if(passwdp) { 203 psep = strchr(login, ':'); 204 205 /* Within the constraint of the login string */ 206 if(psep >= login + len) 207 psep = NULL; 208 } 209 210 /* Attempt to find the options separator */ 211 if(optionsp) { 212 osep = strchr(login, ';'); 213 214 /* Within the constraint of the login string */ 215 if(osep >= login + len) 216 osep = NULL; 217 } 218 219 /* Calculate the portion lengths */ 220 ulen = (psep ? 221 (size_t)(osep && psep > osep ? osep - login : psep - login) : 222 (osep ? (size_t)(osep - login) : len)); 223 plen = (psep ? 224 (osep && osep > psep ? (size_t)(osep - psep) : 225 (size_t)(login + len - psep)) - 1 : 0); 226 olen = (osep ? 227 (psep && psep > osep ? (size_t)(psep - osep) : 228 (size_t)(login + len - osep)) - 1 : 0); 229 230 /* Allocate the user portion buffer */ 231 if(userp && ulen) { 232 ubuf = malloc(ulen + 1); 233 if(!ubuf) 234 result = CURLE_OUT_OF_MEMORY; 235 } 236 237 /* Allocate the password portion buffer */ 238 if(!result && passwdp && plen) { 239 pbuf = malloc(plen + 1); 240 if(!pbuf) { 241 free(ubuf); 242 result = CURLE_OUT_OF_MEMORY; 243 } 244 } 245 246 /* Allocate the options portion buffer */ 247 if(!result && optionsp && olen) { 248 obuf = malloc(olen + 1); 249 if(!obuf) { 250 free(pbuf); 251 free(ubuf); 252 result = CURLE_OUT_OF_MEMORY; 253 } 254 } 255 256 if(!result) { 257 /* Store the user portion if necessary */ 258 if(ubuf) { 259 memcpy(ubuf, login, ulen); 260 ubuf[ulen] = '\0'; 261 free(*userp); 262 *userp = ubuf; 263 } 264 265 /* Store the password portion if necessary */ 266 if(pbuf) { 267 memcpy(pbuf, psep + 1, plen); 268 pbuf[plen] = '\0'; 269 free(*passwdp); 270 *passwdp = pbuf; 271 } 272 273 /* Store the options portion if necessary */ 274 if(obuf) { 275 memcpy(obuf, osep + 1, olen); 276 obuf[olen] = '\0'; 277 free(*optionsp); 278 *optionsp = obuf; 279 } 280 } 281 282 return result; 283 } 284 285 /* Internal representation of CURLU. Point to URL-encoded strings. */ 286 struct Curl_URL { 287 char *scheme; 288 char *user; 289 char *password; 290 char *options; /* IMAP only? */ 291 char *host; 292 char *port; 293 char *path; 294 char *query; 295 char *fragment; 296 297 char *scratch; /* temporary scratch area */ 298 long portnum; /* the numerical version */ 299 }; 300 301 #define DEFAULT_SCHEME "https" 302 303 static void free_urlhandle(struct Curl_URL *u) 304 { 305 free(u->scheme); 306 free(u->user); 307 free(u->password); 308 free(u->options); 309 free(u->host); 310 free(u->port); 311 free(u->path); 312 free(u->query); 313 free(u->fragment); 314 free(u->scratch); 315 } 316 317 /* move the full contents of one handle onto another and 318 free the original */ 319 static void mv_urlhandle(struct Curl_URL *from, 320 struct Curl_URL *to) 321 { 322 free_urlhandle(to); 323 *to = *from; 324 free(from); 325 } 326 327 /* 328 * Find the separator at the end of the host name, or the '?' in cases like 329 * http://www.url.com?id=2380 330 */ 331 static const char *find_host_sep(const char *url) 332 { 333 const char *sep; 334 const char *query; 335 336 /* Find the start of the hostname */ 337 sep = strstr(url, "//"); 338 if(!sep) 339 sep = url; 340 else 341 sep += 2; 342 343 query = strchr(sep, '?'); 344 sep = strchr(sep, '/'); 345 346 if(!sep) 347 sep = url + strlen(url); 348 349 if(!query) 350 query = url + strlen(url); 351 352 return sep < query ? sep : query; 353 } 354 355 /* 356 * Decide in an encoding-independent manner whether a character in an 357 * URL must be escaped. The same criterion must be used in strlen_url() 358 * and strcpy_url(). 359 */ 360 static bool urlchar_needs_escaping(int c) 361 { 362 return !(iscntrl(c) || isspace(c) || isgraph(c)); 363 } 364 365 /* 366 * strlen_url() returns the length of the given URL if the spaces within the 367 * URL were properly URL encoded. 368 * URL encoding should be skipped for host names, otherwise IDN resolution 369 * will fail. 370 */ 371 size_t Curl_strlen_url(const char *url, bool relative) 372 { 373 const unsigned char *ptr; 374 size_t newlen = 0; 375 bool left = true; /* left side of the ? */ 376 const unsigned char *host_sep = (const unsigned char *) url; 377 378 if(!relative) 379 host_sep = (const unsigned char *) find_host_sep(url); 380 381 for(ptr = (unsigned char *)url; *ptr; ptr++) { 382 383 if(ptr < host_sep) { 384 ++newlen; 385 continue; 386 } 387 388 switch(*ptr) { 389 case '?': 390 left = false; 391 /* FALLTHROUGH */ 392 default: 393 if(urlchar_needs_escaping(*ptr)) 394 newlen += 2; 395 newlen++; 396 break; 397 case ' ': 398 if(left) 399 newlen += 3; 400 else 401 newlen++; 402 break; 403 } 404 } 405 return newlen; 406 } 407 408 /* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in 409 * the source URL accordingly. 410 * URL encoding should be skipped for host names, otherwise IDN resolution 411 * will fail. 412 */ 413 void Curl_strcpy_url(char *output, const char *url, bool relative) 414 { 415 /* we must add this with whitespace-replacing */ 416 bool left = true; 417 const unsigned char *iptr; 418 char *optr = output; 419 const unsigned char *host_sep = (const unsigned char *) url; 420 421 if(!relative) 422 host_sep = (const unsigned char *) find_host_sep(url); 423 424 for(iptr = (unsigned char *)url; /* read from here */ 425 *iptr; /* until zero byte */ 426 iptr++) { 427 428 if(iptr < host_sep) { 429 *optr++ = *iptr; 430 continue; 431 } 432 433 switch(*iptr) { 434 case '?': 435 left = false; 436 /* FALLTHROUGH */ 437 default: 438 if(urlchar_needs_escaping(*iptr)) { 439 snprintf(optr, 4, "%%%02x", *iptr); 440 optr += 3; 441 } 442 else 443 *optr++=*iptr; 444 break; 445 case ' ': 446 if(left) { 447 *optr++='%'; /* add a '%' */ 448 *optr++='2'; /* add a '2' */ 449 *optr++='0'; /* add a '0' */ 450 } 451 else 452 *optr++='+'; /* add a '+' here */ 453 break; 454 } 455 } 456 *optr = 0; /* zero terminate output buffer */ 457 458 } 459 460 /* 461 * Returns true if the given URL is absolute (as opposed to relative) within 462 * the buffer size. Returns the scheme in the buffer if true and 'buf' is 463 * non-NULL. 464 */ 465 bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen) 466 { 467 size_t i; 468 for(i = 0; i < buflen && url[i]; ++i) { 469 char s = url[i]; 470 if((s == ':') && (url[i + 1] == '/')) { 471 if(buf) 472 buf[i] = 0; 473 return true; 474 } 475 /* RFC 3986 3.1 explains: 476 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 477 */ 478 else if(isalnum(s) || (s == '+') || (s == '-') || (s == '.') ) { 479 if(buf) 480 buf[i] = (char)tolower(s); 481 } 482 else 483 break; 484 } 485 return false; 486 } 487 488 /* 489 * Concatenate a relative URL to a base URL making it absolute. 490 * URL-encodes any spaces. 491 * The returned pointer must be freed by the caller unless NULL 492 * (returns NULL on out of memory). 493 */ 494 char *Curl_concat_url(const char *base, const char *relurl) 495 { 496 /*** 497 TRY to append this new path to the old URL 498 to the right of the host part. Oh crap, this is doomed to cause 499 problems in the future... 500 */ 501 char *newest; 502 char *protsep; 503 char *pathsep; 504 size_t newlen; 505 bool host_changed = false; 506 507 const char *useurl = relurl; 508 size_t urllen; 509 510 /* we must make our own copy of the URL to play with, as it may 511 point to read-only data */ 512 char *url_clone = strdup(base); 513 514 if(!url_clone) 515 return NULL; /* skip out of this NOW */ 516 517 /* protsep points to the start of the host name */ 518 protsep = strstr(url_clone, "//"); 519 if(!protsep) 520 protsep = url_clone; 521 else 522 protsep += 2; /* pass the slashes */ 523 524 if('/' != relurl[0]) { 525 int level = 0; 526 527 /* First we need to find out if there's a ?-letter in the URL, 528 and cut it and the right-side of that off */ 529 pathsep = strchr(protsep, '?'); 530 if(pathsep) 531 *pathsep = 0; 532 533 /* we have a relative path to append to the last slash if there's one 534 available, or if the new URL is just a query string (starts with a 535 '?') we append the new one at the end of the entire currently worked 536 out URL */ 537 if(useurl[0] != '?') { 538 pathsep = strrchr(protsep, '/'); 539 if(pathsep) 540 *pathsep = 0; 541 } 542 543 /* Check if there's any slash after the host name, and if so, remember 544 that position instead */ 545 pathsep = strchr(protsep, '/'); 546 if(pathsep) 547 protsep = pathsep + 1; 548 else 549 protsep = NULL; 550 551 /* now deal with one "./" or any amount of "../" in the newurl 552 and act accordingly */ 553 554 if((useurl[0] == '.') && (useurl[1] == '/')) 555 useurl += 2; /* just skip the "./" */ 556 557 while((useurl[0] == '.') && 558 (useurl[1] == '.') && 559 (useurl[2] == '/')) { 560 level++; 561 useurl += 3; /* pass the "../" */ 562 } 563 564 if(protsep) { 565 while(level--) { 566 /* cut off one more level from the right of the original URL */ 567 pathsep = strrchr(protsep, '/'); 568 if(pathsep) 569 *pathsep = 0; 570 else { 571 *protsep = 0; 572 break; 573 } 574 } 575 } 576 } 577 else { 578 /* We got a new absolute path for this server */ 579 580 if((relurl[0] == '/') && (relurl[1] == '/')) { 581 /* the new URL starts with //, just keep the protocol part from the 582 original one */ 583 *protsep = 0; 584 useurl = &relurl[2]; /* we keep the slashes from the original, so we 585 skip the new ones */ 586 host_changed = true; 587 } 588 else { 589 /* cut off the original URL from the first slash, or deal with URLs 590 without slash */ 591 pathsep = strchr(protsep, '/'); 592 if(pathsep) { 593 /* When people use badly formatted URLs, such as 594 "http://www.url.com?dir=/home/daniel" we must not use the first 595 slash, if there's a ?-letter before it! */ 596 char *sep = strchr(protsep, '?'); 597 if(sep && (sep < pathsep)) 598 pathsep = sep; 599 *pathsep = 0; 600 } 601 else { 602 /* There was no slash. Now, since we might be operating on a badly 603 formatted URL, such as "http://www.url.com?id=2380" which doesn't 604 use a slash separator as it is supposed to, we need to check for a 605 ?-letter as well! */ 606 pathsep = strchr(protsep, '?'); 607 if(pathsep) 608 *pathsep = 0; 609 } 610 } 611 } 612 613 /* If the new part contains a space, this is a mighty stupid redirect 614 but we still make an effort to do "right". To the left of a '?' 615 letter we replace each space with %20 while it is replaced with '+' 616 on the right side of the '?' letter. 617 */ 618 newlen = Curl_strlen_url(useurl, !host_changed); 619 620 urllen = strlen(url_clone); 621 622 newest = malloc(urllen + 1 + /* possible slash */ 623 newlen + 1 /* zero byte */); 624 625 if(!newest) { 626 free(url_clone); /* don't leak this */ 627 return NULL; 628 } 629 630 /* copy over the root url part */ 631 memcpy(newest, url_clone, urllen); 632 633 /* check if we need to append a slash */ 634 if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0])) 635 ; 636 else 637 newest[urllen++]='/'; 638 639 /* then append the new piece on the right side */ 640 Curl_strcpy_url(&newest[urllen], useurl, !host_changed); 641 642 free(url_clone); 643 644 return newest; 645 } 646 647 /* 648 * parse_hostname_login() 649 * 650 * Parse the login details (user name, password and options) from the URL and 651 * strip them out of the host name 652 * 653 */ 654 static CURLUcode parse_hostname_login(struct Curl_URL *u, 655 char **hostname, 656 unsigned int flags) 657 { 658 CURLUcode result = CURLUE_OK; 659 CURLcode ccode; 660 char *userp = NULL; 661 char *passwdp = NULL; 662 char *optionsp = NULL; 663 664 /* At this point, we're hoping all the other special cases have 665 * been taken care of, so conn->host.name is at most 666 * [user[:password][;options]]@]hostname 667 * 668 * We need somewhere to put the embedded details, so do that first. 669 */ 670 671 char *ptr = strchr(*hostname, '@'); 672 char *login = *hostname; 673 674 if(!ptr) 675 goto out; 676 677 /* We will now try to extract the 678 * possible login information in a string like: 679 * ftp://user:password@ftp.my.site:8021/README */ 680 *hostname = ++ptr; 681 682 /* We could use the login information in the URL so extract it. Only parse 683 options if the handler says we should. Note that 'h' might be NULL! */ 684 ccode = Curl_parse_login_details(login, ptr - login - 1, 685 &userp, &passwdp, NULL); 686 if(ccode) { 687 result = CURLUE_MALFORMED_INPUT; 688 goto out; 689 } 690 691 if(userp) { 692 if(flags & CURLU_DISALLOW_USER) { 693 /* Option DISALLOW_USER is set and url contains username. */ 694 result = CURLUE_USER_NOT_ALLOWED; 695 goto out; 696 } 697 698 u->user = userp; 699 } 700 701 if(passwdp) 702 u->password = passwdp; 703 704 if(optionsp) 705 u->options = optionsp; 706 707 return CURLUE_OK; 708 out: 709 710 free(userp); 711 free(passwdp); 712 free(optionsp); 713 714 return result; 715 } 716 717 static CURLUcode parse_port(struct Curl_URL *u, char *hostname) 718 { 719 char *portptr; 720 char endbracket; 721 int len; 722 723 if((1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.%%]%c%n", 724 &endbracket, &len)) && 725 (']' == endbracket)) { 726 /* this is a RFC2732-style specified IP-address */ 727 portptr = &hostname[len]; 728 if(*portptr) { 729 if(*portptr != ':') 730 return CURLUE_MALFORMED_INPUT; 731 } 732 else 733 portptr = NULL; 734 } 735 else 736 portptr = strchr(hostname, ':'); 737 738 if(portptr) { 739 char *rest; 740 long port; 741 char portbuf[7]; 742 743 if(!isdigit(portptr[1])) 744 return CURLUE_BAD_PORT_NUMBER; 745 746 port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */ 747 748 if((port <= 0) || (port > 0xffff)) 749 /* Single unix standard says port numbers are 16 bits long, but we don't 750 treat port zero as OK. */ 751 return CURLUE_BAD_PORT_NUMBER; 752 753 if(rest[0]) 754 return CURLUE_BAD_PORT_NUMBER; 755 756 if(rest != &portptr[1]) { 757 *portptr++ = '\0'; /* cut off the name there */ 758 *rest = 0; 759 /* generate a new to get rid of leading zeroes etc */ 760 snprintf(portbuf, sizeof(portbuf), "%ld", port); 761 u->portnum = port; 762 u->port = strdup(portbuf); 763 if(!u->port) 764 return CURLUE_OUT_OF_MEMORY; 765 } 766 else { 767 /* Browser behavior adaptation. If there's a colon with no digits after, 768 just cut off the name there which makes us ignore the colon and just 769 use the default port. Firefox and Chrome both do that. */ 770 *portptr = '\0'; 771 } 772 } 773 774 return CURLUE_OK; 775 } 776 777 /* scan for byte values < 31 or 127 */ 778 static CURLUcode junkscan(char *part) 779 { 780 char badbytes[]={ 781 /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 782 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 783 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 784 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 785 0x7f, 786 0x00 /* zero terminate */ 787 }; 788 if(part) { 789 size_t n = strlen(part); 790 size_t nfine = strcspn(part, badbytes); 791 if(nfine != n) 792 /* since we don't know which part is scanned, return a generic error 793 code */ 794 return CURLUE_MALFORMED_INPUT; 795 } 796 return CURLUE_OK; 797 } 798 799 static CURLUcode hostname_check(char *hostname, unsigned int flags) 800 { 801 const char *l = NULL; /* accepted characters */ 802 size_t len; 803 size_t hlen = strlen(hostname); 804 (void)flags; 805 806 if(hostname[0] == '[') { 807 hostname++; 808 l = "0123456789abcdefABCDEF::.%"; 809 hlen -= 2; 810 } 811 812 if(l) { 813 /* only valid letters are ok */ 814 len = strspn(hostname, l); 815 if(hlen != len) 816 /* hostname with bad content */ 817 return CURLUE_MALFORMED_INPUT; 818 } 819 else { 820 /* letters from the second string is not ok */ 821 len = strcspn(hostname, " "); 822 if(hlen != len) 823 /* hostname with bad content */ 824 return CURLUE_MALFORMED_INPUT; 825 } 826 return CURLUE_OK; 827 } 828 829 #define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#')) 830 831 static CURLUcode seturl(const char *url, struct Curl_URL *u, unsigned int flags) 832 { 833 char *path; 834 bool path_alloced = false; 835 char *hostname; 836 char *query = NULL; 837 char *fragment = NULL; 838 CURLUcode result; 839 bool url_has_scheme = false; 840 char schemebuf[MAX_SCHEME_LEN]; 841 char *schemep = NULL; 842 size_t schemelen = 0; 843 size_t urllen; 844 845 if(!url) 846 return CURLUE_MALFORMED_INPUT; 847 848 /************************************************************* 849 * Parse the URL. 850 ************************************************************/ 851 /* allocate scratch area */ 852 urllen = strlen(url); 853 path = u->scratch = malloc(urllen * 2 + 2); 854 if(!path) 855 return CURLUE_OUT_OF_MEMORY; 856 857 hostname = &path[urllen + 1]; 858 hostname[0] = 0; 859 860 if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) { 861 url_has_scheme = true; 862 schemelen = strlen(schemebuf); 863 } 864 865 /* handle the file: scheme */ 866 if(url_has_scheme && strcasecmp(schemebuf, "file") == 0) { 867 /* path has been allocated large enough to hold this */ 868 strcpy(path, &url[5]); 869 870 hostname = NULL; /* no host for file: URLs */ 871 u->scheme = strdup("file"); 872 if(!u->scheme) 873 return CURLUE_OUT_OF_MEMORY; 874 875 /* Extra handling URLs with an authority component (i.e. that start with 876 * "file://") 877 * 878 * We allow omitted hostname (e.g. file:/<path>) -- valid according to 879 * RFC 8089, but not the (current) WHAT-WG URL spec. 880 */ 881 if(path[0] == '/' && path[1] == '/') { 882 /* swallow the two slashes */ 883 char *ptr = &path[2]; 884 path = ptr; 885 } 886 } 887 else { 888 /* clear path */ 889 const char *p; 890 const char *hostp; 891 size_t len; 892 path[0] = 0; 893 894 if(url_has_scheme) { 895 int i = 0; 896 p = &url[schemelen + 1]; 897 while(p && (*p == '/') && (i < 4)) { 898 p++; 899 i++; 900 } 901 if((i < 1) || (i>3)) 902 /* less than one or more than three slashes */ 903 return CURLUE_MALFORMED_INPUT; 904 905 schemep = schemebuf; 906 if(junkscan(schemep)) 907 return CURLUE_MALFORMED_INPUT; 908 } 909 else { 910 /* no scheme! */ 911 return CURLUE_MALFORMED_INPUT; 912 } 913 hostp = p; /* host name starts here */ 914 915 while(*p && !HOSTNAME_END(*p)) /* find end of host name */ 916 p++; 917 918 len = p - hostp; 919 if(!len) 920 return CURLUE_MALFORMED_INPUT; 921 922 memcpy(hostname, hostp, len); 923 hostname[len] = 0; 924 925 len = strlen(p); 926 memcpy(path, p, len); 927 path[len] = 0; 928 929 u->scheme = strdup(schemep); 930 if(!u->scheme) 931 return CURLUE_OUT_OF_MEMORY; 932 } 933 934 if(junkscan(path)) 935 return CURLUE_MALFORMED_INPUT; 936 937 query = strchr(path, '?'); 938 if(query) 939 *query++ = 0; 940 941 fragment = strchr(query?query:path, '#'); 942 if(fragment) 943 *fragment++ = 0; 944 945 if(!path[0]) 946 /* if there's no path set, unset */ 947 path = NULL; 948 else if(!(flags & CURLU_PATH_AS_IS)) { 949 /* sanitise paths and remove ../ and ./ sequences according to RFC3986 */ 950 char *newp = Curl_dedotdotify(path); 951 if(!newp) 952 return CURLUE_OUT_OF_MEMORY; 953 954 if(strcmp(newp, path)) { 955 /* if we got a new version */ 956 path = newp; 957 path_alloced = true; 958 } 959 else 960 free(newp); 961 } 962 if(path) { 963 u->path = path_alloced?path:strdup(path); 964 if(!u->path) 965 return CURLUE_OUT_OF_MEMORY; 966 } 967 968 if(hostname) { 969 /* 970 * Parse the login details and strip them out of the host name. 971 */ 972 if(junkscan(hostname)) 973 return CURLUE_MALFORMED_INPUT; 974 975 result = parse_hostname_login(u, &hostname, flags); 976 if(result) 977 return result; 978 979 result = parse_port(u, hostname); 980 if(result) 981 return result; 982 983 result = hostname_check(hostname, flags); 984 if(result) 985 return result; 986 987 u->host = strdup(hostname); 988 if(!u->host) 989 return CURLUE_OUT_OF_MEMORY; 990 } 991 992 if(query && query[0]) { 993 u->query = strdup(query); 994 if(!u->query) 995 return CURLUE_OUT_OF_MEMORY; 996 } 997 if(fragment && fragment[0]) { 998 u->fragment = strdup(fragment); 999 if(!u->fragment) 1000 return CURLUE_OUT_OF_MEMORY; 1001 } 1002 1003 free(u->scratch); 1004 u->scratch = NULL; 1005 1006 return CURLUE_OK; 1007 } 1008 1009 /* 1010 * Parse the URL and set the relevant members of the Curl_URL struct. 1011 */ 1012 static CURLUcode parseurl(const char *url, struct Curl_URL *u, unsigned int flags) 1013 { 1014 CURLUcode result = seturl(url, u, flags); 1015 if(result) { 1016 free_urlhandle(u); 1017 memset(u, 0, sizeof(struct Curl_URL)); 1018 } 1019 return result; 1020 } 1021 1022 /* 1023 */ 1024 struct Curl_URL *curl_url(void) 1025 { 1026 return calloc(sizeof(struct Curl_URL), 1); 1027 } 1028 1029 void curl_url_cleanup(struct Curl_URL *u) 1030 { 1031 if(u) { 1032 free_urlhandle(u); 1033 free(u); 1034 } 1035 } 1036 1037 #define DUP(dest, src, name) \ 1038 if(src->name) { \ 1039 dest->name = strdup(src->name); \ 1040 if(!dest->name) \ 1041 goto fail; \ 1042 } 1043 1044 struct Curl_URL *curl_url_dup(struct Curl_URL *in) 1045 { 1046 struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1); 1047 if(u) { 1048 DUP(u, in, scheme); 1049 DUP(u, in, user); 1050 DUP(u, in, password); 1051 DUP(u, in, options); 1052 DUP(u, in, host); 1053 DUP(u, in, port); 1054 DUP(u, in, path); 1055 DUP(u, in, query); 1056 DUP(u, in, fragment); 1057 u->portnum = in->portnum; 1058 } 1059 return u; 1060 fail: 1061 curl_url_cleanup(u); 1062 return NULL; 1063 } 1064 1065 CURLUcode curl_url_get(struct Curl_URL *u, CURLUPart what, 1066 char **part, unsigned int flags) 1067 { 1068 char *ptr; 1069 CURLUcode ifmissing = CURLUE_UNKNOWN_PART; 1070 bool urldecode = (flags & CURLU_URLDECODE)?1:0; 1071 bool plusdecode = false; 1072 (void)flags; 1073 if(!u) 1074 return CURLUE_BAD_HANDLE; 1075 if(!part) 1076 return CURLUE_BAD_PARTPOINTER; 1077 *part = NULL; 1078 1079 switch(what) { 1080 case CURLUPART_SCHEME: 1081 ptr = u->scheme; 1082 ifmissing = CURLUE_NO_SCHEME; 1083 urldecode = false; /* never for schemes */ 1084 break; 1085 case CURLUPART_USER: 1086 ptr = u->user; 1087 ifmissing = CURLUE_NO_USER; 1088 break; 1089 case CURLUPART_PASSWORD: 1090 ptr = u->password; 1091 ifmissing = CURLUE_NO_PASSWORD; 1092 break; 1093 case CURLUPART_OPTIONS: 1094 ptr = u->options; 1095 ifmissing = CURLUE_NO_OPTIONS; 1096 break; 1097 case CURLUPART_HOST: 1098 ptr = u->host; 1099 ifmissing = CURLUE_NO_HOST; 1100 break; 1101 case CURLUPART_PORT: 1102 ptr = u->port; 1103 ifmissing = CURLUE_NO_PORT; 1104 urldecode = false; /* never for port */ 1105 break; 1106 case CURLUPART_PATH: 1107 ptr = u->path; 1108 if(!ptr) { 1109 ptr = u->path = strdup("/"); 1110 if(!u->path) 1111 return CURLUE_OUT_OF_MEMORY; 1112 } 1113 break; 1114 case CURLUPART_QUERY: 1115 ptr = u->query; 1116 ifmissing = CURLUE_NO_QUERY; 1117 plusdecode = urldecode; 1118 break; 1119 case CURLUPART_FRAGMENT: 1120 ptr = u->fragment; 1121 ifmissing = CURLUE_NO_FRAGMENT; 1122 break; 1123 case CURLUPART_URL: { 1124 char *url; 1125 char *scheme; 1126 char *options = u->options; 1127 char *port = u->port; 1128 if(u->scheme && strcasecmp("file", u->scheme) == 0) { 1129 url = aprintf("file://%s%s%s", 1130 u->path, 1131 u->fragment? "#": "", 1132 u->fragment? u->fragment : ""); 1133 } 1134 else if(!u->host) 1135 return CURLUE_NO_HOST; 1136 else { 1137 if(u->scheme) 1138 scheme = u->scheme; 1139 else 1140 return CURLUE_NO_SCHEME; 1141 1142 options = NULL; 1143 1144 url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 1145 scheme, 1146 u->user ? u->user : "", 1147 u->password ? ":": "", 1148 u->password ? u->password : "", 1149 options ? ";" : "", 1150 options ? options : "", 1151 (u->user || u->password || options) ? "@": "", 1152 u->host, 1153 port ? ":": "", 1154 port ? port : "", 1155 (u->path && (u->path[0] != '/')) ? "/": "", 1156 u->path ? u->path : "/", 1157 u->query? "?": "", 1158 u->query? u->query : "", 1159 u->fragment? "#": "", 1160 u->fragment? u->fragment : ""); 1161 } 1162 if(!url) 1163 return CURLUE_OUT_OF_MEMORY; 1164 *part = url; 1165 return CURLUE_OK; 1166 break; 1167 } 1168 default: 1169 ptr = NULL; 1170 } 1171 if(ptr) { 1172 *part = strdup(ptr); 1173 if(!*part) 1174 return CURLUE_OUT_OF_MEMORY; 1175 if(plusdecode) { 1176 /* convert + to space */ 1177 char *plus; 1178 for(plus = *part; *plus; ++plus) { 1179 if(*plus == '+') 1180 *plus = ' '; 1181 } 1182 } 1183 if(urldecode) { 1184 char *decoded; 1185 size_t dlen; 1186 CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, true); 1187 free(*part); 1188 if(res) { 1189 *part = NULL; 1190 return CURLUE_URLDECODE; 1191 } 1192 *part = decoded; 1193 } 1194 return CURLUE_OK; 1195 } 1196 else 1197 return ifmissing; 1198 } 1199 1200 CURLUcode curl_url_set(struct Curl_URL *u, CURLUPart what, 1201 const char *part, unsigned int flags) 1202 { 1203 char **storep = NULL; 1204 long port = 0; 1205 bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0; 1206 bool plusencode = false; 1207 bool urlskipslash = false; 1208 bool appendquery = false; 1209 bool equalsencode = false; 1210 1211 if(!u) 1212 return CURLUE_BAD_HANDLE; 1213 if(!part) { 1214 /* setting a part to NULL clears it */ 1215 switch(what) { 1216 case CURLUPART_URL: 1217 break; 1218 case CURLUPART_SCHEME: 1219 storep = &u->scheme; 1220 break; 1221 case CURLUPART_USER: 1222 storep = &u->user; 1223 break; 1224 case CURLUPART_PASSWORD: 1225 storep = &u->password; 1226 break; 1227 case CURLUPART_OPTIONS: 1228 storep = &u->options; 1229 break; 1230 case CURLUPART_HOST: 1231 storep = &u->host; 1232 break; 1233 case CURLUPART_PORT: 1234 storep = &u->port; 1235 break; 1236 case CURLUPART_PATH: 1237 storep = &u->path; 1238 break; 1239 case CURLUPART_QUERY: 1240 storep = &u->query; 1241 break; 1242 case CURLUPART_FRAGMENT: 1243 storep = &u->fragment; 1244 break; 1245 default: 1246 return CURLUE_UNKNOWN_PART; 1247 } 1248 if(storep && *storep) { 1249 free(*storep); 1250 *storep = NULL; 1251 } 1252 return CURLUE_OK; 1253 } 1254 1255 switch(what) { 1256 case CURLUPART_SCHEME: 1257 storep = &u->scheme; 1258 urlencode = false; /* never */ 1259 break; 1260 case CURLUPART_USER: 1261 storep = &u->user; 1262 break; 1263 case CURLUPART_PASSWORD: 1264 storep = &u->password; 1265 break; 1266 case CURLUPART_OPTIONS: 1267 storep = &u->options; 1268 break; 1269 case CURLUPART_HOST: 1270 storep = &u->host; 1271 break; 1272 case CURLUPART_PORT: 1273 urlencode = false; /* never */ 1274 port = strtol(part, NULL, 10); /* Port number must be decimal */ 1275 if((port <= 0) || (port > 0xffff)) 1276 return CURLUE_BAD_PORT_NUMBER; 1277 storep = &u->port; 1278 break; 1279 case CURLUPART_PATH: 1280 urlskipslash = true; 1281 storep = &u->path; 1282 break; 1283 case CURLUPART_QUERY: 1284 plusencode = urlencode; 1285 appendquery = (flags & CURLU_APPENDQUERY)?1:0; 1286 equalsencode = appendquery; 1287 storep = &u->query; 1288 break; 1289 case CURLUPART_FRAGMENT: 1290 storep = &u->fragment; 1291 break; 1292 case CURLUPART_URL: { 1293 /* 1294 * Allow a new URL to replace the existing (if any) contents. 1295 * 1296 * If the existing contents is enough for a URL, allow a relative URL to 1297 * replace it. 1298 */ 1299 CURLUcode result; 1300 char *oldurl; 1301 char *redired_url; 1302 struct Curl_URL *handle2; 1303 1304 if(Curl_is_absolute_url(part, NULL, MAX_SCHEME_LEN)) { 1305 handle2 = curl_url(); 1306 if(!handle2) 1307 return CURLUE_OUT_OF_MEMORY; 1308 result = parseurl(part, handle2, flags); 1309 if(!result) 1310 mv_urlhandle(handle2, u); 1311 else 1312 curl_url_cleanup(handle2); 1313 return result; 1314 } 1315 /* extract the full "old" URL to do the redirect on */ 1316 result = curl_url_get(u, CURLUPART_URL, &oldurl, flags); 1317 if(result) { 1318 /* couldn't get the old URL, just use the new! */ 1319 handle2 = curl_url(); 1320 if(!handle2) 1321 return CURLUE_OUT_OF_MEMORY; 1322 result = parseurl(part, handle2, flags); 1323 if(!result) 1324 mv_urlhandle(handle2, u); 1325 else 1326 curl_url_cleanup(handle2); 1327 return result; 1328 } 1329 1330 /* apply the relative part to create a new URL */ 1331 redired_url = Curl_concat_url(oldurl, part); 1332 free(oldurl); 1333 if(!redired_url) 1334 return CURLUE_OUT_OF_MEMORY; 1335 1336 /* now parse the new URL */ 1337 handle2 = curl_url(); 1338 if(!handle2) { 1339 free(redired_url); 1340 return CURLUE_OUT_OF_MEMORY; 1341 } 1342 result = parseurl(redired_url, handle2, flags); 1343 free(redired_url); 1344 if(!result) 1345 mv_urlhandle(handle2, u); 1346 else 1347 curl_url_cleanup(handle2); 1348 return result; 1349 } 1350 default: 1351 return CURLUE_UNKNOWN_PART; 1352 } 1353 if(storep) { 1354 const char *newp = part; 1355 size_t nalloc = strlen(part); 1356 1357 if(urlencode) { 1358 const char *i; 1359 char *o; 1360 bool free_part = false; 1361 char *enc = malloc(nalloc * 3 + 1); /* for worst case! */ 1362 if(!enc) 1363 return CURLUE_OUT_OF_MEMORY; 1364 for(i = part, o = enc; *i; i++) { 1365 if(Curl_isunreserved(*i) || 1366 ((*i == '/') && urlskipslash) || 1367 ((*i == '=') && equalsencode) || 1368 ((*i == '+') && plusencode)) { 1369 if((*i == '=') && equalsencode) 1370 /* only skip the first equals sign */ 1371 equalsencode = false; 1372 *o = *i; 1373 o++; 1374 } 1375 else { 1376 snprintf(o, 4, "%%%02x", *i); 1377 o += 3; 1378 } 1379 } 1380 *o = 0; /* zero terminate */ 1381 newp = enc; 1382 if(free_part) 1383 free((char *)part); 1384 } 1385 else { 1386 char *p; 1387 newp = strdup(part); 1388 if(!newp) 1389 return CURLUE_OUT_OF_MEMORY; 1390 p = (char *)newp; 1391 while(*p) { 1392 /* make sure percent encoded are lower case */ 1393 if((*p == '%') && isxdigit(p[1]) && isxdigit(p[2]) && 1394 (isupper(p[1]) || isupper(p[2]))) { 1395 p[1] = (char)tolower(p[1]); 1396 p[2] = (char)tolower(p[2]); 1397 p += 3; 1398 } 1399 else 1400 p++; 1401 } 1402 } 1403 1404 if(appendquery) { 1405 /* Append the string onto the old query. Add a '&' separator if none is 1406 present at the end of the exsting query already */ 1407 size_t querylen = u->query ? strlen(u->query) : 0; 1408 bool addamperand = querylen && (u->query[querylen -1] != '&'); 1409 if(querylen) { 1410 size_t newplen = strlen(newp); 1411 char *p = malloc(querylen + addamperand + newplen + 1); 1412 if(!p) { 1413 free((char *)newp); 1414 return CURLUE_OUT_OF_MEMORY; 1415 } 1416 strcpy(p, u->query); /* original query */ 1417 if(addamperand) 1418 p[querylen] = '&'; /* ampersand */ 1419 strcpy(&p[querylen + addamperand], newp); /* new suffix */ 1420 free((char *)newp); 1421 free(*storep); 1422 *storep = p; 1423 return CURLUE_OK; 1424 } 1425 } 1426 1427 free(*storep); 1428 *storep = (char *)newp; 1429 } 1430 /* set after the string, to make it not assigned if the allocation above 1431 fails */ 1432 if(port) 1433 u->portnum = port; 1434 return CURLUE_OK; 1435 }