gmni

a gemini line mode client
git clone https://git.clttr.info/gmni.git
Log (Feed) | Files | Refs (Tags) | README | LICENSE

url.c (35755B)


      1 /***************************************************************************
      2  *                                  _   _ ____  _
      3  *  Project                     ___| | | |  _ \| |
      4  *                             / __| | | | |_) | |
      5  *                            | (__| |_| |  _ <| |___
      6  *                             \___|\___/|_| \_\_____|
      7  *
      8  * Copyright (C) 1998 - 2018, Daniel Stenberg, <daniel@haxx.se>, et al.
      9  *
     10  * This software is licensed as described in the file COPYING, which
     11  * you should have received as part of this distribution. The terms
     12  * are also available at https://curl.haxx.se/docs/copyright.html.
     13  *
     14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
     15  * copies of the Software, and permit persons to whom the Software is
     16  * furnished to do so, under the terms of the COPYING file.
     17  *
     18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
     19  * KIND, either express or implied.
     20  *
     21  ***************************************************************************/
     22 
     23 #define MAX_SCHEME_LEN 8
     24 
     25 #include <assert.h>
     26 #include <ctype.h>
     27 #include <stdarg.h>
     28 #include <stdbool.h>
     29 #include <stdio.h>
     30 #include <stdlib.h>
     31 #include <string.h>
     32 #include <strings.h>
     33 #include "escape.h"
     34 #include <gmni/url.h>
     35 
     36 /* Provided by gmni */
     37 static char *
     38 aprintf(const char *fmt, ...)
     39 {
     40 	va_list ap;
     41 	va_start(ap, fmt);
     42 	int n = vsnprintf(NULL, 0, fmt, ap);
     43 	va_end(ap);
     44 
     45 	char *strp = calloc(n + 1, 1);
     46 	assert(strp);
     47 
     48 	va_start(ap, fmt);
     49 	n = vsnprintf(strp, n + 1, fmt, ap);
     50 	va_end(ap);
     51 	return strp;
     52 }
     53 
     54 /* via lib/dotdot.c */
     55 char *Curl_dedotdotify(const char *input)
     56 {
     57   size_t inlen = strlen(input);
     58   char *clone;
     59   size_t clen = inlen; /* the length of the cloned input */
     60   char *out = malloc(inlen + 1);
     61   char *outptr;
     62   char *orgclone;
     63   char *queryp;
     64   if(!out)
     65     return NULL; /* out of memory */
     66 
     67   *out = 0; /* zero terminates, for inputs like "./" */
     68 
     69   /* get a cloned copy of the input */
     70   clone = strdup(input);
     71   if(!clone) {
     72     free(out);
     73     return NULL;
     74   }
     75   orgclone = clone;
     76   outptr = out;
     77 
     78   if(!*clone) {
     79     /* zero length string, return that */
     80     free(out);
     81     return clone;
     82   }
     83 
     84   /*
     85    * To handle query-parts properly, we must find it and remove it during the
     86    * dotdot-operation and then append it again at the end to the output
     87    * string.
     88    */
     89   queryp = strchr(clone, '?');
     90   if(queryp)
     91     *queryp = 0;
     92 
     93   do {
     94 
     95     /*  A.  If the input buffer begins with a prefix of "../" or "./", then
     96         remove that prefix from the input buffer; otherwise, */
     97 
     98     if(!strncmp("./", clone, 2)) {
     99       clone += 2;
    100       clen -= 2;
    101     }
    102     else if(!strncmp("../", clone, 3)) {
    103       clone += 3;
    104       clen -= 3;
    105     }
    106 
    107     /*  B.  if the input buffer begins with a prefix of "/./" or "/.", where
    108         "."  is a complete path segment, then replace that prefix with "/" in
    109         the input buffer; otherwise, */
    110     else if(!strncmp("/./", clone, 3)) {
    111       clone += 2;
    112       clen -= 2;
    113     }
    114     else if(!strcmp("/.", clone)) {
    115       clone[1]='/';
    116       clone++;
    117       clen -= 1;
    118     }
    119 
    120     /*  C.  if the input buffer begins with a prefix of "/../" or "/..", where
    121         ".." is a complete path segment, then replace that prefix with "/" in
    122         the input buffer and remove the last segment and its preceding "/" (if
    123         any) from the output buffer; otherwise, */
    124 
    125     else if(!strncmp("/../", clone, 4)) {
    126       clone += 3;
    127       clen -= 3;
    128       /* remove the last segment from the output buffer */
    129       while(outptr > out) {
    130         outptr--;
    131         if(*outptr == '/')
    132           break;
    133       }
    134       *outptr = 0; /* zero-terminate where it stops */
    135     }
    136     else if(!strcmp("/..", clone)) {
    137       clone[2]='/';
    138       clone += 2;
    139       clen -= 2;
    140       /* remove the last segment from the output buffer */
    141       while(outptr > out) {
    142         outptr--;
    143         if(*outptr == '/')
    144           break;
    145       }
    146       *outptr = 0; /* zero-terminate where it stops */
    147     }
    148 
    149     /*  D.  if the input buffer consists only of "." or "..", then remove
    150         that from the input buffer; otherwise, */
    151 
    152     else if(!strcmp(".", clone) || !strcmp("..", clone)) {
    153       *clone = 0;
    154       *out = 0;
    155     }
    156 
    157     else {
    158       /*  E.  move the first path segment in the input buffer to the end of
    159           the output buffer, including the initial "/" character (if any) and
    160           any subsequent characters up to, but not including, the next "/"
    161           character or the end of the input buffer. */
    162 
    163       do {
    164         *outptr++ = *clone++;
    165         clen--;
    166       } while(*clone && (*clone != '/'));
    167       *outptr = 0;
    168     }
    169 
    170   } while(*clone);
    171 
    172   if(queryp) {
    173     size_t qlen;
    174     /* There was a query part, append that to the output. The 'clone' string
    175        may now have been altered so we copy from the original input string
    176        from the correct index. */
    177     size_t oindex = queryp - orgclone;
    178     qlen = strlen(&input[oindex]);
    179     memcpy(outptr, &input[oindex], qlen + 1); /* include the end zero byte */
    180   }
    181 
    182   free(orgclone);
    183   return out;
    184 }
    185 
    186 /* via lib/url.c */
    187 CURLcode Curl_parse_login_details(const char *login, const size_t len,
    188                                   char **userp, char **passwdp,
    189                                   char **optionsp)
    190 {
    191   CURLcode result = CURLE_OK;
    192   char *ubuf = NULL;
    193   char *pbuf = NULL;
    194   char *obuf = NULL;
    195   const char *psep = NULL;
    196   const char *osep = NULL;
    197   size_t ulen;
    198   size_t plen;
    199   size_t olen;
    200 
    201   /* Attempt to find the password separator */
    202   if(passwdp) {
    203     psep = strchr(login, ':');
    204 
    205     /* Within the constraint of the login string */
    206     if(psep >= login + len)
    207       psep = NULL;
    208   }
    209 
    210   /* Attempt to find the options separator */
    211   if(optionsp) {
    212     osep = strchr(login, ';');
    213 
    214     /* Within the constraint of the login string */
    215     if(osep >= login + len)
    216       osep = NULL;
    217   }
    218 
    219   /* Calculate the portion lengths */
    220   ulen = (psep ?
    221           (size_t)(osep && psep > osep ? osep - login : psep - login) :
    222           (osep ? (size_t)(osep - login) : len));
    223   plen = (psep ?
    224           (osep && osep > psep ? (size_t)(osep - psep) :
    225                                  (size_t)(login + len - psep)) - 1 : 0);
    226   olen = (osep ?
    227           (psep && psep > osep ? (size_t)(psep - osep) :
    228                                  (size_t)(login + len - osep)) - 1 : 0);
    229 
    230   /* Allocate the user portion buffer */
    231   if(userp && ulen) {
    232     ubuf = malloc(ulen + 1);
    233     if(!ubuf)
    234       result = CURLE_OUT_OF_MEMORY;
    235   }
    236 
    237   /* Allocate the password portion buffer */
    238   if(!result && passwdp && plen) {
    239     pbuf = malloc(plen + 1);
    240     if(!pbuf) {
    241       free(ubuf);
    242       result = CURLE_OUT_OF_MEMORY;
    243     }
    244   }
    245 
    246   /* Allocate the options portion buffer */
    247   if(!result && optionsp && olen) {
    248     obuf = malloc(olen + 1);
    249     if(!obuf) {
    250       free(pbuf);
    251       free(ubuf);
    252       result = CURLE_OUT_OF_MEMORY;
    253     }
    254   }
    255 
    256   if(!result) {
    257     /* Store the user portion if necessary */
    258     if(ubuf) {
    259       memcpy(ubuf, login, ulen);
    260       ubuf[ulen] = '\0';
    261       free(*userp);
    262       *userp = ubuf;
    263     }
    264 
    265     /* Store the password portion if necessary */
    266     if(pbuf) {
    267       memcpy(pbuf, psep + 1, plen);
    268       pbuf[plen] = '\0';
    269       free(*passwdp);
    270       *passwdp = pbuf;
    271     }
    272 
    273     /* Store the options portion if necessary */
    274     if(obuf) {
    275       memcpy(obuf, osep + 1, olen);
    276       obuf[olen] = '\0';
    277       free(*optionsp);
    278       *optionsp = obuf;
    279     }
    280   }
    281 
    282   return result;
    283 }
    284 
    285 /* Internal representation of CURLU. Point to URL-encoded strings. */
    286 struct Curl_URL {
    287   char *scheme;
    288   char *user;
    289   char *password;
    290   char *options; /* IMAP only? */
    291   char *host;
    292   char *port;
    293   char *path;
    294   char *query;
    295   char *fragment;
    296 
    297   char *scratch; /* temporary scratch area */
    298   long portnum; /* the numerical version */
    299 };
    300 
    301 #define DEFAULT_SCHEME "https"
    302 
    303 static void free_urlhandle(struct Curl_URL *u)
    304 {
    305   free(u->scheme);
    306   free(u->user);
    307   free(u->password);
    308   free(u->options);
    309   free(u->host);
    310   free(u->port);
    311   free(u->path);
    312   free(u->query);
    313   free(u->fragment);
    314   free(u->scratch);
    315 }
    316 
    317 /* move the full contents of one handle onto another and
    318    free the original */
    319 static void mv_urlhandle(struct Curl_URL *from,
    320                          struct Curl_URL *to)
    321 {
    322   free_urlhandle(to);
    323   *to = *from;
    324   free(from);
    325 }
    326 
    327 /*
    328  * Find the separator at the end of the host name, or the '?' in cases like
    329  * http://www.url.com?id=2380
    330  */
    331 static const char *find_host_sep(const char *url)
    332 {
    333   const char *sep;
    334   const char *query;
    335 
    336   /* Find the start of the hostname */
    337   sep = strstr(url, "//");
    338   if(!sep)
    339     sep = url;
    340   else
    341     sep += 2;
    342 
    343   query = strchr(sep, '?');
    344   sep = strchr(sep, '/');
    345 
    346   if(!sep)
    347     sep = url + strlen(url);
    348 
    349   if(!query)
    350     query = url + strlen(url);
    351 
    352   return sep < query ? sep : query;
    353 }
    354 
    355 /*
    356  * Decide in an encoding-independent manner whether a character in an
    357  * URL must be escaped. The same criterion must be used in strlen_url()
    358  * and strcpy_url().
    359  */
    360 static bool urlchar_needs_escaping(int c)
    361 {
    362     return !(iscntrl(c) || isspace(c) || isgraph(c));
    363 }
    364 
    365 /*
    366  * strlen_url() returns the length of the given URL if the spaces within the
    367  * URL were properly URL encoded.
    368  * URL encoding should be skipped for host names, otherwise IDN resolution
    369  * will fail.
    370  */
    371 size_t Curl_strlen_url(const char *url, bool relative)
    372 {
    373   const unsigned char *ptr;
    374   size_t newlen = 0;
    375   bool left = true; /* left side of the ? */
    376   const unsigned char *host_sep = (const unsigned char *) url;
    377 
    378   if(!relative)
    379     host_sep = (const unsigned char *) find_host_sep(url);
    380 
    381   for(ptr = (unsigned char *)url; *ptr; ptr++) {
    382 
    383     if(ptr < host_sep) {
    384       ++newlen;
    385       continue;
    386     }
    387 
    388     switch(*ptr) {
    389     case '?':
    390       left = false;
    391       /* FALLTHROUGH */
    392     default:
    393       if(urlchar_needs_escaping(*ptr))
    394         newlen += 2;
    395       newlen++;
    396       break;
    397     case ' ':
    398       if(left)
    399         newlen += 3;
    400       else
    401         newlen++;
    402       break;
    403     }
    404   }
    405   return newlen;
    406 }
    407 
    408 /* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
    409  * the source URL accordingly.
    410  * URL encoding should be skipped for host names, otherwise IDN resolution
    411  * will fail.
    412  */
    413 void Curl_strcpy_url(char *output, const char *url, bool relative)
    414 {
    415   /* we must add this with whitespace-replacing */
    416   bool left = true;
    417   const unsigned char *iptr;
    418   char *optr = output;
    419   const unsigned char *host_sep = (const unsigned char *) url;
    420 
    421   if(!relative)
    422     host_sep = (const unsigned char *) find_host_sep(url);
    423 
    424   for(iptr = (unsigned char *)url;    /* read from here */
    425       *iptr;         /* until zero byte */
    426       iptr++) {
    427 
    428     if(iptr < host_sep) {
    429       *optr++ = *iptr;
    430       continue;
    431     }
    432 
    433     switch(*iptr) {
    434     case '?':
    435       left = false;
    436       /* FALLTHROUGH */
    437     default:
    438       if(urlchar_needs_escaping(*iptr)) {
    439         snprintf(optr, 4, "%%%02x", *iptr);
    440         optr += 3;
    441       }
    442       else
    443         *optr++=*iptr;
    444       break;
    445     case ' ':
    446       if(left) {
    447         *optr++='%'; /* add a '%' */
    448         *optr++='2'; /* add a '2' */
    449         *optr++='0'; /* add a '0' */
    450       }
    451       else
    452         *optr++='+'; /* add a '+' here */
    453       break;
    454     }
    455   }
    456   *optr = 0; /* zero terminate output buffer */
    457 
    458 }
    459 
    460 /*
    461  * Returns true if the given URL is absolute (as opposed to relative) within
    462  * the buffer size. Returns the scheme in the buffer if true and 'buf' is
    463  * non-NULL.
    464  */
    465 bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen)
    466 {
    467   size_t i;
    468   for(i = 0; i < buflen && url[i]; ++i) {
    469     char s = url[i];
    470     if((s == ':') && (url[i + 1] == '/')) {
    471       if(buf)
    472         buf[i] = 0;
    473       return true;
    474     }
    475     /* RFC 3986 3.1 explains:
    476       scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
    477     */
    478     else if(isalnum(s) || (s == '+') || (s == '-') || (s == '.') ) {
    479       if(buf)
    480         buf[i] = (char)tolower(s);
    481     }
    482     else
    483       break;
    484   }
    485   return false;
    486 }
    487 
    488 /*
    489  * Concatenate a relative URL to a base URL making it absolute.
    490  * URL-encodes any spaces.
    491  * The returned pointer must be freed by the caller unless NULL
    492  * (returns NULL on out of memory).
    493  */
    494 char *Curl_concat_url(const char *base, const char *relurl)
    495 {
    496   /***
    497    TRY to append this new path to the old URL
    498    to the right of the host part. Oh crap, this is doomed to cause
    499    problems in the future...
    500   */
    501   char *newest;
    502   char *protsep;
    503   char *pathsep;
    504   size_t newlen;
    505   bool host_changed = false;
    506 
    507   const char *useurl = relurl;
    508   size_t urllen;
    509 
    510   /* we must make our own copy of the URL to play with, as it may
    511      point to read-only data */
    512   char *url_clone = strdup(base);
    513 
    514   if(!url_clone)
    515     return NULL; /* skip out of this NOW */
    516 
    517   /* protsep points to the start of the host name */
    518   protsep = strstr(url_clone, "//");
    519   if(!protsep)
    520     protsep = url_clone;
    521   else
    522     protsep += 2; /* pass the slashes */
    523 
    524   if('/' != relurl[0]) {
    525     int level = 0;
    526 
    527     /* First we need to find out if there's a ?-letter in the URL,
    528        and cut it and the right-side of that off */
    529     pathsep = strchr(protsep, '?');
    530     if(pathsep)
    531       *pathsep = 0;
    532 
    533     /* we have a relative path to append to the last slash if there's one
    534        available, or if the new URL is just a query string (starts with a
    535        '?')  we append the new one at the end of the entire currently worked
    536        out URL */
    537     if(useurl[0] != '?') {
    538       pathsep = strrchr(protsep, '/');
    539       if(pathsep)
    540         *pathsep = 0;
    541     }
    542 
    543     /* Check if there's any slash after the host name, and if so, remember
    544        that position instead */
    545     pathsep = strchr(protsep, '/');
    546     if(pathsep)
    547       protsep = pathsep + 1;
    548     else
    549       protsep = NULL;
    550 
    551     /* now deal with one "./" or any amount of "../" in the newurl
    552        and act accordingly */
    553 
    554     if((useurl[0] == '.') && (useurl[1] == '/'))
    555       useurl += 2; /* just skip the "./" */
    556 
    557     while((useurl[0] == '.') &&
    558           (useurl[1] == '.') &&
    559           (useurl[2] == '/')) {
    560       level++;
    561       useurl += 3; /* pass the "../" */
    562     }
    563 
    564     if(protsep) {
    565       while(level--) {
    566         /* cut off one more level from the right of the original URL */
    567         pathsep = strrchr(protsep, '/');
    568         if(pathsep)
    569           *pathsep = 0;
    570         else {
    571           *protsep = 0;
    572           break;
    573         }
    574       }
    575     }
    576   }
    577   else {
    578     /* We got a new absolute path for this server */
    579 
    580     if((relurl[0] == '/') && (relurl[1] == '/')) {
    581       /* the new URL starts with //, just keep the protocol part from the
    582          original one */
    583       *protsep = 0;
    584       useurl = &relurl[2]; /* we keep the slashes from the original, so we
    585                               skip the new ones */
    586       host_changed = true;
    587     }
    588     else {
    589       /* cut off the original URL from the first slash, or deal with URLs
    590          without slash */
    591       pathsep = strchr(protsep, '/');
    592       if(pathsep) {
    593         /* When people use badly formatted URLs, such as
    594            "http://www.url.com?dir=/home/daniel" we must not use the first
    595            slash, if there's a ?-letter before it! */
    596         char *sep = strchr(protsep, '?');
    597         if(sep && (sep < pathsep))
    598           pathsep = sep;
    599         *pathsep = 0;
    600       }
    601       else {
    602         /* There was no slash. Now, since we might be operating on a badly
    603            formatted URL, such as "http://www.url.com?id=2380" which doesn't
    604            use a slash separator as it is supposed to, we need to check for a
    605            ?-letter as well! */
    606         pathsep = strchr(protsep, '?');
    607         if(pathsep)
    608           *pathsep = 0;
    609       }
    610     }
    611   }
    612 
    613   /* If the new part contains a space, this is a mighty stupid redirect
    614      but we still make an effort to do "right". To the left of a '?'
    615      letter we replace each space with %20 while it is replaced with '+'
    616      on the right side of the '?' letter.
    617   */
    618   newlen = Curl_strlen_url(useurl, !host_changed);
    619 
    620   urllen = strlen(url_clone);
    621 
    622   newest = malloc(urllen + 1 + /* possible slash */
    623                   newlen + 1 /* zero byte */);
    624 
    625   if(!newest) {
    626     free(url_clone); /* don't leak this */
    627     return NULL;
    628   }
    629 
    630   /* copy over the root url part */
    631   memcpy(newest, url_clone, urllen);
    632 
    633   /* check if we need to append a slash */
    634   if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
    635     ;
    636   else
    637     newest[urllen++]='/';
    638 
    639   /* then append the new piece on the right side */
    640   Curl_strcpy_url(&newest[urllen], useurl, !host_changed);
    641 
    642   free(url_clone);
    643 
    644   return newest;
    645 }
    646 
    647 /*
    648  * parse_hostname_login()
    649  *
    650  * Parse the login details (user name, password and options) from the URL and
    651  * strip them out of the host name
    652  *
    653  */
    654 static CURLUcode parse_hostname_login(struct Curl_URL *u,
    655                                       char **hostname,
    656                                       unsigned int flags)
    657 {
    658   CURLUcode result = CURLUE_OK;
    659   CURLcode ccode;
    660   char *userp = NULL;
    661   char *passwdp = NULL;
    662   char *optionsp = NULL;
    663 
    664   /* At this point, we're hoping all the other special cases have
    665    * been taken care of, so conn->host.name is at most
    666    *    [user[:password][;options]]@]hostname
    667    *
    668    * We need somewhere to put the embedded details, so do that first.
    669    */
    670 
    671   char *ptr = strchr(*hostname, '@');
    672   char *login = *hostname;
    673 
    674   if(!ptr)
    675     goto out;
    676 
    677   /* We will now try to extract the
    678    * possible login information in a string like:
    679    * ftp://user:password@ftp.my.site:8021/README */
    680   *hostname = ++ptr;
    681 
    682   /* We could use the login information in the URL so extract it. Only parse
    683      options if the handler says we should. Note that 'h' might be NULL! */
    684   ccode = Curl_parse_login_details(login, ptr - login - 1,
    685                                    &userp, &passwdp, NULL);
    686   if(ccode) {
    687     result = CURLUE_MALFORMED_INPUT;
    688     goto out;
    689   }
    690 
    691   if(userp) {
    692     if(flags & CURLU_DISALLOW_USER) {
    693       /* Option DISALLOW_USER is set and url contains username. */
    694       result = CURLUE_USER_NOT_ALLOWED;
    695       goto out;
    696     }
    697 
    698     u->user = userp;
    699   }
    700 
    701   if(passwdp)
    702     u->password = passwdp;
    703 
    704   if(optionsp)
    705     u->options = optionsp;
    706 
    707   return CURLUE_OK;
    708   out:
    709 
    710   free(userp);
    711   free(passwdp);
    712   free(optionsp);
    713 
    714   return result;
    715 }
    716 
    717 static CURLUcode parse_port(struct Curl_URL *u, char *hostname)
    718 {
    719   char *portptr;
    720   char endbracket;
    721   int len;
    722 
    723   if((1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.%%]%c%n",
    724                   &endbracket, &len)) &&
    725      (']' == endbracket)) {
    726     /* this is a RFC2732-style specified IP-address */
    727     portptr = &hostname[len];
    728     if(*portptr) {
    729       if(*portptr != ':')
    730         return CURLUE_MALFORMED_INPUT;
    731     }
    732     else
    733       portptr = NULL;
    734   }
    735   else
    736     portptr = strchr(hostname, ':');
    737 
    738   if(portptr) {
    739     char *rest;
    740     long port;
    741     char portbuf[7];
    742 
    743     if(!isdigit(portptr[1]))
    744       return CURLUE_BAD_PORT_NUMBER;
    745 
    746     port = strtol(portptr + 1, &rest, 10);  /* Port number must be decimal */
    747 
    748     if((port <= 0) || (port > 0xffff))
    749       /* Single unix standard says port numbers are 16 bits long, but we don't
    750          treat port zero as OK. */
    751       return CURLUE_BAD_PORT_NUMBER;
    752 
    753     if(rest[0])
    754       return CURLUE_BAD_PORT_NUMBER;
    755 
    756     if(rest != &portptr[1]) {
    757       *portptr++ = '\0'; /* cut off the name there */
    758       *rest = 0;
    759       /* generate a new to get rid of leading zeroes etc */
    760       snprintf(portbuf, sizeof(portbuf), "%ld", port);
    761       u->portnum = port;
    762       u->port = strdup(portbuf);
    763       if(!u->port)
    764         return CURLUE_OUT_OF_MEMORY;
    765     }
    766     else {
    767       /* Browser behavior adaptation. If there's a colon with no digits after,
    768          just cut off the name there which makes us ignore the colon and just
    769          use the default port. Firefox and Chrome both do that. */
    770       *portptr = '\0';
    771     }
    772   }
    773 
    774   return CURLUE_OK;
    775 }
    776 
    777 /* scan for byte values < 31 or 127 */
    778 static CURLUcode junkscan(char *part)
    779 {
    780   char badbytes[]={
    781     /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
    782     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    783     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
    784     0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
    785     0x7f,
    786     0x00 /* zero terminate */
    787   };
    788   if(part) {
    789     size_t n = strlen(part);
    790     size_t nfine = strcspn(part, badbytes);
    791     if(nfine != n)
    792       /* since we don't know which part is scanned, return a generic error
    793          code */
    794       return CURLUE_MALFORMED_INPUT;
    795   }
    796   return CURLUE_OK;
    797 }
    798 
    799 static CURLUcode hostname_check(char *hostname, unsigned int flags)
    800 {
    801   const char *l = NULL; /* accepted characters */
    802   size_t len;
    803   size_t hlen = strlen(hostname);
    804   (void)flags;
    805 
    806   if(hostname[0] == '[') {
    807     hostname++;
    808     l = "0123456789abcdefABCDEF::.%";
    809     hlen -= 2;
    810   }
    811 
    812   if(l) {
    813     /* only valid letters are ok */
    814     len = strspn(hostname, l);
    815     if(hlen != len)
    816       /* hostname with bad content */
    817       return CURLUE_MALFORMED_INPUT;
    818   }
    819   else {
    820     /* letters from the second string is not ok */
    821     len = strcspn(hostname, " ");
    822     if(hlen != len)
    823       /* hostname with bad content */
    824       return CURLUE_MALFORMED_INPUT;
    825   }
    826   return CURLUE_OK;
    827 }
    828 
    829 #define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
    830 
    831 static CURLUcode seturl(const char *url, struct Curl_URL *u, unsigned int flags)
    832 {
    833   char *path;
    834   bool path_alloced = false;
    835   char *hostname;
    836   char *query = NULL;
    837   char *fragment = NULL;
    838   CURLUcode result;
    839   bool url_has_scheme = false;
    840   char schemebuf[MAX_SCHEME_LEN];
    841   char *schemep = NULL;
    842   size_t schemelen = 0;
    843   size_t urllen;
    844 
    845   if(!url)
    846     return CURLUE_MALFORMED_INPUT;
    847 
    848   /*************************************************************
    849    * Parse the URL.
    850    ************************************************************/
    851   /* allocate scratch area */
    852   urllen = strlen(url);
    853   path = u->scratch = malloc(urllen * 2 + 2);
    854   if(!path)
    855     return CURLUE_OUT_OF_MEMORY;
    856 
    857   hostname = &path[urllen + 1];
    858   hostname[0] = 0;
    859 
    860   if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) {
    861     url_has_scheme = true;
    862     schemelen = strlen(schemebuf);
    863   }
    864 
    865   /* handle the file: scheme */
    866   if(url_has_scheme && strcasecmp(schemebuf, "file") == 0) {
    867     /* path has been allocated large enough to hold this */
    868     strcpy(path, &url[5]);
    869 
    870     hostname = NULL; /* no host for file: URLs */
    871     u->scheme = strdup("file");
    872     if(!u->scheme)
    873       return CURLUE_OUT_OF_MEMORY;
    874 
    875     /* Extra handling URLs with an authority component (i.e. that start with
    876      * "file://")
    877      *
    878      * We allow omitted hostname (e.g. file:/<path>) -- valid according to
    879      * RFC 8089, but not the (current) WHAT-WG URL spec.
    880      */
    881     if(path[0] == '/' && path[1] == '/') {
    882       /* swallow the two slashes */
    883       char *ptr = &path[2];
    884       path = ptr;
    885     }
    886   }
    887   else {
    888     /* clear path */
    889     const char *p;
    890     const char *hostp;
    891     size_t len;
    892     path[0] = 0;
    893 
    894     if(url_has_scheme) {
    895       int i = 0;
    896       p = &url[schemelen + 1];
    897       while(p && (*p == '/') && (i < 4)) {
    898         p++;
    899         i++;
    900       }
    901       if((i < 1) || (i>3))
    902         /* less than one or more than three slashes */
    903         return CURLUE_MALFORMED_INPUT;
    904 
    905       schemep = schemebuf;
    906       if(junkscan(schemep))
    907         return CURLUE_MALFORMED_INPUT;
    908     }
    909     else {
    910       /* no scheme! */
    911       return CURLUE_MALFORMED_INPUT;
    912     }
    913     hostp = p; /* host name starts here */
    914 
    915     while(*p && !HOSTNAME_END(*p)) /* find end of host name */
    916       p++;
    917 
    918     len = p - hostp;
    919     if(!len)
    920       return CURLUE_MALFORMED_INPUT;
    921 
    922     memcpy(hostname, hostp, len);
    923     hostname[len] = 0;
    924 
    925     len = strlen(p);
    926     memcpy(path, p, len);
    927     path[len] = 0;
    928 
    929     u->scheme = strdup(schemep);
    930     if(!u->scheme)
    931       return CURLUE_OUT_OF_MEMORY;
    932   }
    933 
    934   if(junkscan(path))
    935     return CURLUE_MALFORMED_INPUT;
    936 
    937   query = strchr(path, '?');
    938   if(query)
    939     *query++ = 0;
    940 
    941   fragment = strchr(query?query:path, '#');
    942   if(fragment)
    943     *fragment++ = 0;
    944 
    945   if(!path[0])
    946     /* if there's no path set, unset */
    947     path = NULL;
    948   else if(!(flags & CURLU_PATH_AS_IS)) {
    949     /* sanitise paths and remove ../ and ./ sequences according to RFC3986 */
    950     char *newp = Curl_dedotdotify(path);
    951     if(!newp)
    952       return CURLUE_OUT_OF_MEMORY;
    953 
    954     if(strcmp(newp, path)) {
    955       /* if we got a new version */
    956       path = newp;
    957       path_alloced = true;
    958     }
    959     else
    960       free(newp);
    961   }
    962   if(path) {
    963     u->path = path_alloced?path:strdup(path);
    964     if(!u->path)
    965       return CURLUE_OUT_OF_MEMORY;
    966   }
    967 
    968   if(hostname) {
    969     /*
    970      * Parse the login details and strip them out of the host name.
    971      */
    972     if(junkscan(hostname))
    973       return CURLUE_MALFORMED_INPUT;
    974 
    975     result = parse_hostname_login(u, &hostname, flags);
    976     if(result)
    977       return result;
    978 
    979     result = parse_port(u, hostname);
    980     if(result)
    981       return result;
    982 
    983     result = hostname_check(hostname, flags);
    984     if(result)
    985       return result;
    986 
    987     u->host = strdup(hostname);
    988     if(!u->host)
    989       return CURLUE_OUT_OF_MEMORY;
    990   }
    991 
    992   if(query && query[0]) {
    993     u->query = strdup(query);
    994     if(!u->query)
    995       return CURLUE_OUT_OF_MEMORY;
    996   }
    997   if(fragment && fragment[0]) {
    998     u->fragment = strdup(fragment);
    999     if(!u->fragment)
   1000       return CURLUE_OUT_OF_MEMORY;
   1001   }
   1002 
   1003   free(u->scratch);
   1004   u->scratch = NULL;
   1005 
   1006   return CURLUE_OK;
   1007 }
   1008 
   1009 /*
   1010  * Parse the URL and set the relevant members of the Curl_URL struct.
   1011  */
   1012 static CURLUcode parseurl(const char *url, struct Curl_URL *u, unsigned int flags)
   1013 {
   1014   CURLUcode result = seturl(url, u, flags);
   1015   if(result) {
   1016     free_urlhandle(u);
   1017     memset(u, 0, sizeof(struct Curl_URL));
   1018   }
   1019   return result;
   1020 }
   1021 
   1022 /*
   1023  */
   1024 struct Curl_URL *curl_url(void)
   1025 {
   1026   return calloc(sizeof(struct Curl_URL), 1);
   1027 }
   1028 
   1029 void curl_url_cleanup(struct Curl_URL *u)
   1030 {
   1031   if(u) {
   1032     free_urlhandle(u);
   1033     free(u);
   1034   }
   1035 }
   1036 
   1037 #define DUP(dest, src, name)         \
   1038   if(src->name) {                    \
   1039     dest->name = strdup(src->name);  \
   1040     if(!dest->name)                  \
   1041       goto fail;                     \
   1042   }
   1043 
   1044 struct Curl_URL *curl_url_dup(struct Curl_URL *in)
   1045 {
   1046   struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
   1047   if(u) {
   1048     DUP(u, in, scheme);
   1049     DUP(u, in, user);
   1050     DUP(u, in, password);
   1051     DUP(u, in, options);
   1052     DUP(u, in, host);
   1053     DUP(u, in, port);
   1054     DUP(u, in, path);
   1055     DUP(u, in, query);
   1056     DUP(u, in, fragment);
   1057     u->portnum = in->portnum;
   1058   }
   1059   return u;
   1060   fail:
   1061   curl_url_cleanup(u);
   1062   return NULL;
   1063 }
   1064 
   1065 CURLUcode curl_url_get(struct Curl_URL *u, CURLUPart what,
   1066                        char **part, unsigned int flags)
   1067 {
   1068   char *ptr;
   1069   CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
   1070   bool urldecode = (flags & CURLU_URLDECODE)?1:0;
   1071   bool plusdecode = false;
   1072   (void)flags;
   1073   if(!u)
   1074     return CURLUE_BAD_HANDLE;
   1075   if(!part)
   1076     return CURLUE_BAD_PARTPOINTER;
   1077   *part = NULL;
   1078 
   1079   switch(what) {
   1080   case CURLUPART_SCHEME:
   1081     ptr = u->scheme;
   1082     ifmissing = CURLUE_NO_SCHEME;
   1083     urldecode = false; /* never for schemes */
   1084     break;
   1085   case CURLUPART_USER:
   1086     ptr = u->user;
   1087     ifmissing = CURLUE_NO_USER;
   1088     break;
   1089   case CURLUPART_PASSWORD:
   1090     ptr = u->password;
   1091     ifmissing = CURLUE_NO_PASSWORD;
   1092     break;
   1093   case CURLUPART_OPTIONS:
   1094     ptr = u->options;
   1095     ifmissing = CURLUE_NO_OPTIONS;
   1096     break;
   1097   case CURLUPART_HOST:
   1098     ptr = u->host;
   1099     ifmissing = CURLUE_NO_HOST;
   1100     break;
   1101   case CURLUPART_PORT:
   1102     ptr = u->port;
   1103     ifmissing = CURLUE_NO_PORT;
   1104     urldecode = false; /* never for port */
   1105     break;
   1106   case CURLUPART_PATH:
   1107     ptr = u->path;
   1108     if(!ptr) {
   1109       ptr = u->path = strdup("/");
   1110       if(!u->path)
   1111         return CURLUE_OUT_OF_MEMORY;
   1112     }
   1113     break;
   1114   case CURLUPART_QUERY:
   1115     ptr = u->query;
   1116     ifmissing = CURLUE_NO_QUERY;
   1117     plusdecode = urldecode;
   1118     break;
   1119   case CURLUPART_FRAGMENT:
   1120     ptr = u->fragment;
   1121     ifmissing = CURLUE_NO_FRAGMENT;
   1122     break;
   1123   case CURLUPART_URL: {
   1124     char *url;
   1125     char *scheme;
   1126     char *options = u->options;
   1127     char *port = u->port;
   1128     if(u->scheme && strcasecmp("file", u->scheme) == 0) {
   1129       url = aprintf("file://%s%s%s",
   1130                     u->path,
   1131                     u->fragment? "#": "",
   1132                     u->fragment? u->fragment : "");
   1133     }
   1134     else if(!u->host)
   1135       return CURLUE_NO_HOST;
   1136     else {
   1137       if(u->scheme)
   1138         scheme = u->scheme;
   1139       else
   1140         return CURLUE_NO_SCHEME;
   1141 
   1142       options = NULL;
   1143 
   1144       url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
   1145                     scheme,
   1146                     u->user ? u->user : "",
   1147                     u->password ? ":": "",
   1148                     u->password ? u->password : "",
   1149                     options ? ";" : "",
   1150                     options ? options : "",
   1151                     (u->user || u->password || options) ? "@": "",
   1152                     u->host,
   1153                     port ? ":": "",
   1154                     port ? port : "",
   1155                     (u->path && (u->path[0] != '/')) ? "/": "",
   1156                     u->path ? u->path : "/",
   1157                     u->query? "?": "",
   1158                     u->query? u->query : "",
   1159                     u->fragment? "#": "",
   1160                     u->fragment? u->fragment : "");
   1161     }
   1162     if(!url)
   1163       return CURLUE_OUT_OF_MEMORY;
   1164     *part = url;
   1165     return CURLUE_OK;
   1166     break;
   1167   }
   1168   default:
   1169     ptr = NULL;
   1170   }
   1171   if(ptr) {
   1172     *part = strdup(ptr);
   1173     if(!*part)
   1174       return CURLUE_OUT_OF_MEMORY;
   1175     if(plusdecode) {
   1176       /* convert + to space */
   1177       char *plus;
   1178       for(plus = *part; *plus; ++plus) {
   1179         if(*plus == '+')
   1180           *plus = ' ';
   1181       }
   1182     }
   1183     if(urldecode) {
   1184       char *decoded;
   1185       size_t dlen;
   1186       CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, true);
   1187       free(*part);
   1188       if(res) {
   1189         *part = NULL;
   1190         return CURLUE_URLDECODE;
   1191       }
   1192       *part = decoded;
   1193     }
   1194     return CURLUE_OK;
   1195   }
   1196   else
   1197     return ifmissing;
   1198 }
   1199 
   1200 CURLUcode curl_url_set(struct Curl_URL *u, CURLUPart what,
   1201                        const char *part, unsigned int flags)
   1202 {
   1203   char **storep = NULL;
   1204   long port = 0;
   1205   bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
   1206   bool plusencode = false;
   1207   bool urlskipslash = false;
   1208   bool appendquery = false;
   1209   bool equalsencode = false;
   1210 
   1211   if(!u)
   1212     return CURLUE_BAD_HANDLE;
   1213   if(!part) {
   1214     /* setting a part to NULL clears it */
   1215     switch(what) {
   1216     case CURLUPART_URL:
   1217       break;
   1218     case CURLUPART_SCHEME:
   1219       storep = &u->scheme;
   1220       break;
   1221     case CURLUPART_USER:
   1222       storep = &u->user;
   1223       break;
   1224     case CURLUPART_PASSWORD:
   1225       storep = &u->password;
   1226       break;
   1227     case CURLUPART_OPTIONS:
   1228       storep = &u->options;
   1229       break;
   1230     case CURLUPART_HOST:
   1231       storep = &u->host;
   1232       break;
   1233     case CURLUPART_PORT:
   1234       storep = &u->port;
   1235       break;
   1236     case CURLUPART_PATH:
   1237       storep = &u->path;
   1238       break;
   1239     case CURLUPART_QUERY:
   1240       storep = &u->query;
   1241       break;
   1242     case CURLUPART_FRAGMENT:
   1243       storep = &u->fragment;
   1244       break;
   1245     default:
   1246       return CURLUE_UNKNOWN_PART;
   1247     }
   1248     if(storep && *storep) {
   1249       free(*storep);
   1250       *storep = NULL;
   1251     }
   1252     return CURLUE_OK;
   1253   }
   1254 
   1255   switch(what) {
   1256   case CURLUPART_SCHEME:
   1257     storep = &u->scheme;
   1258     urlencode = false; /* never */
   1259     break;
   1260   case CURLUPART_USER:
   1261     storep = &u->user;
   1262     break;
   1263   case CURLUPART_PASSWORD:
   1264     storep = &u->password;
   1265     break;
   1266   case CURLUPART_OPTIONS:
   1267     storep = &u->options;
   1268     break;
   1269   case CURLUPART_HOST:
   1270     storep = &u->host;
   1271     break;
   1272   case CURLUPART_PORT:
   1273     urlencode = false; /* never */
   1274     port = strtol(part, NULL, 10);  /* Port number must be decimal */
   1275     if((port <= 0) || (port > 0xffff))
   1276       return CURLUE_BAD_PORT_NUMBER;
   1277     storep = &u->port;
   1278     break;
   1279   case CURLUPART_PATH:
   1280     urlskipslash = true;
   1281     storep = &u->path;
   1282     break;
   1283   case CURLUPART_QUERY:
   1284     plusencode = urlencode;
   1285     appendquery = (flags & CURLU_APPENDQUERY)?1:0;
   1286     equalsencode = appendquery;
   1287     storep = &u->query;
   1288     break;
   1289   case CURLUPART_FRAGMENT:
   1290     storep = &u->fragment;
   1291     break;
   1292   case CURLUPART_URL: {
   1293     /*
   1294      * Allow a new URL to replace the existing (if any) contents.
   1295      *
   1296      * If the existing contents is enough for a URL, allow a relative URL to
   1297      * replace it.
   1298      */
   1299     CURLUcode result;
   1300     char *oldurl;
   1301     char *redired_url;
   1302     struct Curl_URL *handle2;
   1303 
   1304     if(Curl_is_absolute_url(part, NULL, MAX_SCHEME_LEN)) {
   1305       handle2 = curl_url();
   1306       if(!handle2)
   1307         return CURLUE_OUT_OF_MEMORY;
   1308       result = parseurl(part, handle2, flags);
   1309       if(!result)
   1310         mv_urlhandle(handle2, u);
   1311       else
   1312         curl_url_cleanup(handle2);
   1313       return result;
   1314     }
   1315     /* extract the full "old" URL to do the redirect on */
   1316     result = curl_url_get(u, CURLUPART_URL, &oldurl, flags);
   1317     if(result) {
   1318       /* couldn't get the old URL, just use the new! */
   1319       handle2 = curl_url();
   1320       if(!handle2)
   1321         return CURLUE_OUT_OF_MEMORY;
   1322       result = parseurl(part, handle2, flags);
   1323       if(!result)
   1324         mv_urlhandle(handle2, u);
   1325       else
   1326         curl_url_cleanup(handle2);
   1327       return result;
   1328     }
   1329 
   1330     /* apply the relative part to create a new URL */
   1331     redired_url = Curl_concat_url(oldurl, part);
   1332     free(oldurl);
   1333     if(!redired_url)
   1334       return CURLUE_OUT_OF_MEMORY;
   1335 
   1336     /* now parse the new URL */
   1337     handle2 = curl_url();
   1338     if(!handle2) {
   1339       free(redired_url);
   1340       return CURLUE_OUT_OF_MEMORY;
   1341     }
   1342     result = parseurl(redired_url, handle2, flags);
   1343     free(redired_url);
   1344     if(!result)
   1345       mv_urlhandle(handle2, u);
   1346     else
   1347       curl_url_cleanup(handle2);
   1348     return result;
   1349   }
   1350   default:
   1351     return CURLUE_UNKNOWN_PART;
   1352   }
   1353   if(storep) {
   1354     const char *newp = part;
   1355     size_t nalloc = strlen(part);
   1356 
   1357     if(urlencode) {
   1358       const char *i;
   1359       char *o;
   1360       bool free_part = false;
   1361       char *enc = malloc(nalloc * 3 + 1); /* for worst case! */
   1362       if(!enc)
   1363         return CURLUE_OUT_OF_MEMORY;
   1364       for(i = part, o = enc; *i; i++) {
   1365         if(Curl_isunreserved(*i) ||
   1366            ((*i == '/') && urlskipslash) ||
   1367            ((*i == '=') && equalsencode) ||
   1368            ((*i == '+') && plusencode)) {
   1369           if((*i == '=') && equalsencode)
   1370             /* only skip the first equals sign */
   1371             equalsencode = false;
   1372           *o = *i;
   1373           o++;
   1374         }
   1375         else {
   1376           snprintf(o, 4, "%%%02x", *i);
   1377           o += 3;
   1378         }
   1379       }
   1380       *o = 0; /* zero terminate */
   1381       newp = enc;
   1382       if(free_part)
   1383         free((char *)part);
   1384     }
   1385     else {
   1386       char *p;
   1387       newp = strdup(part);
   1388       if(!newp)
   1389         return CURLUE_OUT_OF_MEMORY;
   1390       p = (char *)newp;
   1391       while(*p) {
   1392         /* make sure percent encoded are lower case */
   1393         if((*p == '%') && isxdigit(p[1]) && isxdigit(p[2]) &&
   1394            (isupper(p[1]) || isupper(p[2]))) {
   1395           p[1] = (char)tolower(p[1]);
   1396           p[2] = (char)tolower(p[2]);
   1397           p += 3;
   1398         }
   1399         else
   1400           p++;
   1401       }
   1402     }
   1403 
   1404     if(appendquery) {
   1405       /* Append the string onto the old query. Add a '&' separator if none is
   1406          present at the end of the exsting query already */
   1407       size_t querylen = u->query ? strlen(u->query) : 0;
   1408       bool addamperand = querylen && (u->query[querylen -1] != '&');
   1409       if(querylen) {
   1410         size_t newplen = strlen(newp);
   1411         char *p = malloc(querylen + addamperand + newplen + 1);
   1412         if(!p) {
   1413           free((char *)newp);
   1414           return CURLUE_OUT_OF_MEMORY;
   1415         }
   1416         strcpy(p, u->query); /* original query */
   1417         if(addamperand)
   1418           p[querylen] = '&'; /* ampersand */
   1419         strcpy(&p[querylen + addamperand], newp); /* new suffix */
   1420         free((char *)newp);
   1421         free(*storep);
   1422         *storep = p;
   1423         return CURLUE_OK;
   1424       }
   1425     }
   1426 
   1427     free(*storep);
   1428     *storep = (char *)newp;
   1429   }
   1430   /* set after the string, to make it not assigned if the allocation above
   1431      fails */
   1432   if(port)
   1433     u->portnum = port;
   1434   return CURLUE_OK;
   1435 }