Selaa lähdekoodia

Rewritten URI parser

Tatsuhiro Tsujikawa 13 vuotta sitten
vanhempi
commit
692f978978
7 muutettua tiedostoa jossa 1031 lisäystä ja 162 poistoa
  1. 1 0
      src/Makefile.am
  2. 54 134
      src/uri.cc
  3. 488 0
      src/uri_split.c
  4. 92 0
      src/uri_split.h
  5. 1 0
      test/Makefile.am
  6. 389 0
      test/UriSplitTest.cc
  7. 6 28
      test/UriTest.cc

+ 1 - 0
src/Makefile.am

@@ -197,6 +197,7 @@ SRCS =  Socket.h\
 	ChunkedDecodingStreamFilter.cc ChunkedDecodingStreamFilter.h\
 	NullSinkStreamFilter.cc NullSinkStreamFilter.h\
 	uri.cc uri.h\
+	uri_split.c uri_split.h\
 	Triplet.h\
 	cookie_helper.cc cookie_helper.h\
 	json.cc json.h\

+ 54 - 134
src/uri.cc

@@ -36,6 +36,7 @@
 #include "A2STR.h"
 #include "FeatureConfig.h"
 #include "util.h"
+#include "uri_split.h"
 
 namespace aria2 {
 
@@ -101,150 +102,69 @@ void swap(UriStruct& lhs, UriStruct& rhs)
 
 bool parse(UriStruct& result, const std::string& uri)
 {
-  // http://user:password@aria2.sourceforge.net:80/dir/file?query#fragment
-  //        |            ||                    || |    |   |     |
-  //        |            ||             hostLast| |    |   |     |
-  //        |            ||              portFirst|    |   |     |
-  //    authorityFirst   ||             authorityLast  |   |     |
-  //                     ||                       |    |   |     |
-  //                   userInfoLast               |    |   |     |
-  //                      |                       |    |   |     |
-  //                     hostPortFirst            |    |   |     |
-  //                                              |    |   |     |
-  //                                       dirFirst dirLast|     |
-  //                                                       |     |
-  //                                                queryFirst fragmentFirst
-
-  // find fragment part
-  std::string::const_iterator fragmentFirst = uri.begin();
-  for(; fragmentFirst != uri.end(); ++fragmentFirst) {
-    if(*fragmentFirst == '#') break;
-  }
-  // find query part
-  std::string::const_iterator queryFirst = uri.begin();
-  for(; queryFirst != fragmentFirst; ++queryFirst) {
-    if(*queryFirst == '?') break;
-  }
-  result.query.assign(queryFirst, fragmentFirst);
-  // find protocol
-  std::string::size_type protocolOffset = uri.find("://");
-  if(protocolOffset == std::string::npos) return false;
-  result.protocol.assign(uri.begin(), uri.begin()+protocolOffset);
-  uint16_t defPort;
-  if((defPort = getDefaultPort(result.protocol)) == 0) {
-    return false;
-  }
-  // find authority
-  std::string::const_iterator authorityFirst = uri.begin()+protocolOffset+3;
-  std::string::const_iterator authorityLast = authorityFirst;
-  for(; authorityLast != queryFirst; ++authorityLast) {
-    if(*authorityLast == '/') break;
-  }
-  if(authorityFirst == authorityLast) {
-    // No authority found
-    return false;
-  }
-  // find userinfo(username and password) in authority if they exist
-  result.username = A2STR::NIL;
-  result.password = A2STR::NIL;
-  result.hasPassword = false;
-  std::string::const_iterator userInfoLast = authorityLast;
-  std::string::const_iterator hostPortFirst = authorityFirst;
-  for(; userInfoLast != authorityFirst-1; --userInfoLast) {
-    if(*userInfoLast == '@') {
-      hostPortFirst = userInfoLast;
-      ++hostPortFirst;
-      std::string::const_iterator userLast = authorityFirst;
-      for(; userLast != userInfoLast; ++userLast) {
-        if(*userLast == ':') {
-          result.password =
-            util::percentDecode(userLast+1,userInfoLast);
-          result.hasPassword = true;
-          break;
-        }
+  uri_split_result res;
+  int rv;
+  const char* p = uri.c_str();
+  rv = uri_split(&res, p);
+  if(rv == 0) {
+    result.protocol.assign(p + res.fields[USR_SCHEME].off,
+                           res.fields[USR_SCHEME].len);
+    result.host.assign(p + res.fields[USR_HOST].off, res.fields[USR_HOST].len);
+    if(res.port == 0) {
+      uint16_t defPort;
+      if((defPort = getDefaultPort(result.protocol)) == 0) {
+        return false;
       }
-      result.username =
-        util::percentDecode(authorityFirst, userLast);
-      break;
+      result.port = defPort;
+    } else {
+      result.port = res.port;
     }
-  }
-  std::string::const_iterator hostLast = hostPortFirst;
-  std::string::const_iterator portFirst = authorityLast;
-  result.ipv6LiteralAddress = false;
-  if(*hostPortFirst == '[') {
-    // Detected IPv6 literal address in square brackets
-    for(; hostLast != authorityLast; ++hostLast) {
-      if(*hostLast == ']') {
-        ++hostLast;
-        if(hostLast == authorityLast) {
-          result.ipv6LiteralAddress = true;
-        } else {
-          if(*hostLast == ':') {
-            portFirst = hostLast;
-            ++portFirst;
-            result.ipv6LiteralAddress = true;
-          }
-        }
-        break;
+    if(res.field_set & (1 << USR_PATH)) {
+      if(res.field_set & (1 << USR_BASENAME)) {
+        result.dir.assign(p + res.fields[USR_PATH].off,
+                          res.fields[USR_PATH].len -
+                          res.fields[USR_BASENAME].len);
+        result.file.assign(p + res.fields[USR_BASENAME].off,
+                           res.fields[USR_BASENAME].len);
+      } else {
+        result.dir.assign(p + res.fields[USR_PATH].off,
+                          res.fields[USR_PATH].len);
+        result.file = A2STR::NIL;
       }
+    } else {
+      result.dir = "/";
+      result.file = A2STR::NIL;
     }
-    if(!result.ipv6LiteralAddress) {
-      return false;
-    }
-  } else {
-    for(; hostLast != authorityLast; ++hostLast) {
-      if(*hostLast == ':') {
-        portFirst = hostLast;
-        ++portFirst;
-        break;
-      }
+    if(res.field_set & (1 << USR_QUERY)) {
+      result.query = "?";
+      result.query.append(p + res.fields[USR_QUERY].off,
+                          res.fields[USR_QUERY].len);
+    } else {
+      result.query = A2STR::NIL;
     }
-  }
-  if(hostPortFirst == hostLast) {
-    // No host
-    return false;
-  }
-  if(portFirst == authorityLast) {
-    // If port is not specified, then we set it to default port of
-    // its protocol..
-    result.port = defPort;
-  } else {
-    uint32_t tempPort;
-    if(util::parseUIntNoThrow(tempPort,
-                              std::string(portFirst, authorityLast))) {
-      if(65535 < tempPort) {
-        return false;
-      }
-      result.port = tempPort;
+    if(res.field_set & (1 << USR_USER)) {
+      result.username.assign(p + res.fields[USR_USER].off,
+                             res.fields[USR_USER].len);
+      result.username = util::percentDecode(result.username.begin(),
+                                            result.username.end());
     } else {
-      return false;
+      result.username = A2STR::NIL;
     }
-  }
-  if(result.ipv6LiteralAddress) {
-    result.host.assign(hostPortFirst+1, hostLast-1);
-  } else {
-    result.host.assign(hostPortFirst, hostLast);
-  }
-  // find directory and file part
-  std::string::const_iterator dirLast = authorityLast;
-  for(std::string::const_iterator i = authorityLast;
-      i != queryFirst; ++i) {
-    if(*i == '/') {
-      dirLast = i+1;
+    if(res.field_set & (1 << USR_PASSWD)) {
+      result.hasPassword = true;
+      result.password.assign(p + res.fields[USR_PASSWD].off,
+                             res.fields[USR_PASSWD].len);
+      result.password = util::percentDecode(result.password.begin(),
+                                            result.password.end());
+    } else {
+      result.hasPassword = false;
+      result.password = A2STR::NIL;
     }
-  }
-  if(dirLast == queryFirst) {
-    result.file = A2STR::NIL;
-  } else {
-    result.file.assign(dirLast, queryFirst);
-  }
-  // dirFirst == authorityLast
-  if(authorityLast == dirLast) {
-    result.dir = "/";
+    result.ipv6LiteralAddress = res.flags & USF_IPV6ADDR;
+    return true;
   } else {
-    result.dir.assign(authorityLast, dirLast);
+    return false;
   }
-  return true;
 }
 
 std::string construct(const UriStruct& us)

+ 488 - 0
src/uri_split.c

@@ -0,0 +1,488 @@
+/* <!-- copyright */
+/*
+ * aria2 - The high speed download utility
+ *
+ * Copyright (C) 2012 Tatsuhiro Tsujikawa
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of portions of this program with the
+ * OpenSSL library under certain conditions as described in each
+ * individual source file, and distribute linked combinations
+ * including the two.
+ * You must obey the GNU General Public License in all respects
+ * for all of the code used other than OpenSSL.  If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so.  If you
+ * do not wish to do so, delete this exception statement from your
+ * version.  If you delete this exception statement from all source
+ * files in the program, then also delete it here.
+ */
+/* copyright --> */
+#include "uri_split.h"
+
+#include <stdlib.h>
+
+typedef enum {
+  URI_BEFORE_SCHEME,
+  URI_SCHEME,
+  URI_SCHEME_SLASH1,
+  URI_SCHEME_SLASH2,
+  URI_BEFORE_MAYBE_USER,
+  URI_MAYBE_USER,
+  URI_BEFORE_MAYBE_PASSWD,
+  URI_MAYBE_PASSWD,
+  URI_BEFORE_HOST,
+  URI_HOST,
+  URI_BEFORE_IPV6HOST,
+  URI_IPV6HOST,
+  URI_AFTER_IPV6HOST,
+  URI_BEFORE_PORT,
+  URI_PORT,
+  URI_PATH,
+  URI_BEFORE_QUERY,
+  URI_QUERY,
+  URI_BEFORE_FRAGMENT,
+  URI_FRAGMENT
+} uri_split_state;
+
+static void uri_set_field(uri_split_result *res, int field,
+                          const char *first, const char *last, const char *uri)
+{
+  if(first) {
+    res->field_set |= 1 << field;
+    res->fields[field].off = first - uri;
+    res->fields[field].len = last - first;
+  }
+}
+
+static int is_digit(char c)
+{
+  return '0' <= c && c <= '9';
+}
+
+int uri_split(uri_split_result *res, const char *uri)
+{
+  int state = URI_BEFORE_SCHEME;
+  const char *scheme_first = NULL, *scheme_last = NULL,
+    *host_first = NULL, *host_last = NULL,
+    *path_first = NULL, *path_last = NULL,
+    *query_first = NULL, *query_last = NULL,
+    *fragment_first = NULL, *fragment_last = NULL,
+    *user_first = NULL, *user_last = NULL,
+    *passwd_first = NULL, *passwd_last = NULL,
+    *last_atmark = NULL, *last_slash = NULL,
+    *p = uri;
+  int32_t port = -1;
+  uint8_t flags = 0;
+
+  for(; *p; ++p) {
+    switch(state) {
+    case URI_BEFORE_SCHEME:
+      scheme_first = p;
+      state = URI_SCHEME;
+      break;
+    case URI_SCHEME:
+      if(*p == ':') {
+        scheme_last = p;
+        state = URI_SCHEME_SLASH1;
+      }
+      break;
+    case URI_SCHEME_SLASH1:
+      if(*p == '/') {
+        state = URI_SCHEME_SLASH2;
+      } else {
+        return -1;
+      }
+      break;
+    case URI_SCHEME_SLASH2:
+      if(*p == '/') {
+        state = URI_BEFORE_MAYBE_USER;
+      } else {
+        return -1;
+      }
+      break;
+    case URI_BEFORE_MAYBE_USER:
+      switch(*p) {
+      case '@':
+      case ':':
+      case '/':
+        return -1;
+      case '[':
+        state = URI_BEFORE_IPV6HOST;
+        break;
+      default:
+        user_first = p;
+        state = URI_MAYBE_USER;
+      }
+      break;
+    case URI_MAYBE_USER:
+      switch(*p) {
+      case '@':
+        last_atmark = p;
+        break;
+      case ':':
+        user_last = p;
+        state = URI_BEFORE_MAYBE_PASSWD;
+        break;
+      case '[':
+        if(last_atmark == p - 1) {
+          user_last = last_atmark;
+          state = URI_BEFORE_IPV6HOST;
+        } else {
+          return -1;
+        }
+        break;
+      case '/':
+      case '?':
+      case '#':
+        /* It turns out that this is only host or user + host if
+           last_atmark is not NULL. */
+        if(last_atmark) {
+          host_first = last_atmark + 1;
+          host_last = p;
+          user_last = last_atmark;
+        } else {
+          host_first = user_first;
+          host_last = p;
+          user_first = user_last = NULL;
+        }
+        switch(*p) {
+        case '/':
+          path_first = last_slash = p;
+          state = URI_PATH;
+          break;
+        case '?':
+          state = URI_BEFORE_QUERY;
+          break;
+        case '#':
+          state = URI_BEFORE_FRAGMENT;
+          break;
+        }
+        break;
+      }
+      break;
+    case URI_BEFORE_MAYBE_PASSWD:
+      passwd_first = p;
+      switch(*p) {
+      case '@':
+        passwd_last = last_atmark = p;
+        state = URI_BEFORE_HOST;
+        break;
+      case '/':
+        return -1;
+      default:
+        /* sums up port number in case of port. */
+        if(is_digit(*p)) {
+          port = *p - '0';
+        }
+        state = URI_MAYBE_PASSWD;
+      }
+      break;
+    case URI_MAYBE_PASSWD:
+      switch(*p) {
+      case '@':
+        passwd_last = last_atmark = p;
+        /* Passwd confirmed, reset port to -1. */
+        port = -1;
+        state = URI_BEFORE_HOST;
+        break;
+      case '[':
+        return -1;
+      case '/':
+      case '?':
+      case '#':
+        /* This is port not password.  port is in [passwd_first, p) */
+        if(port == -1) {
+          return -1;
+        }
+        if(last_atmark) {
+          host_first = last_atmark + 1;
+          host_last = passwd_first - 1;
+          user_last = last_atmark;
+        } else {
+          host_first = user_first;
+          host_last = passwd_first - 1;
+          user_first = user_last = NULL;
+        }
+        passwd_first = passwd_last = NULL;
+        switch(*p) {
+        case '/':
+          path_first = last_slash = p;
+          state = URI_PATH;
+          break;
+        case '?':
+          state = URI_BEFORE_QUERY;
+          break;
+        case '#':
+          state = URI_BEFORE_FRAGMENT;
+          break;
+        }
+        break;
+      default:
+        if(port != -1) {
+          if(is_digit(*p)) {
+            port *= 10;
+            port += *p - '0';
+            if(port > UINT16_MAX) {
+              port = -1;
+            }
+          } else {
+            port = -1;
+          }
+        }
+        break;
+      }
+      break;
+    case URI_BEFORE_HOST:
+      switch(*p) {
+      case ':':
+      case '/':
+        return -1;
+      case '[':
+        state = URI_BEFORE_IPV6HOST;
+        break;
+      default:
+        host_first = p;
+        state = URI_HOST;
+        break;
+      }
+      break;
+    case URI_HOST:
+      switch(*p) {
+      case ':':
+        host_last = p;
+        state = URI_BEFORE_PORT;
+        break;
+      case '/':
+        host_last = path_first = p;
+        state = URI_PATH;
+        break;
+      case '?':
+        host_last = p;
+        state = URI_BEFORE_QUERY;
+        break;
+      case '#':
+        host_last = p;
+        state = URI_BEFORE_FRAGMENT;
+        break;
+      }
+      break;
+    case URI_BEFORE_IPV6HOST:
+      if(*p == ']') {
+        return -1;
+      }
+      host_first = p;
+      state = URI_IPV6HOST;
+      break;
+    case URI_IPV6HOST:
+      if(*p == ']') {
+        flags |= USF_IPV6ADDR;
+        host_last = p;
+        state = URI_AFTER_IPV6HOST;
+      }
+      break;
+    case URI_AFTER_IPV6HOST:
+      switch(*p) {
+      case ':':
+        state = URI_BEFORE_PORT;
+        break;
+      case '/':
+        path_first = p;
+        state = URI_PATH;
+        break;
+      case '?':
+        state = URI_BEFORE_QUERY;
+        break;
+      case '#':
+        state = URI_BEFORE_FRAGMENT;
+        break;
+      default:
+        return -1;
+      }
+      break;
+    case URI_BEFORE_PORT:
+      if(is_digit(*p)) {
+        port = *p - '0';
+        state = URI_PORT;
+      } else {
+        return -1;
+      }
+      break;
+    case URI_PORT:
+      switch(*p) {
+      case '/':
+        path_first = p;
+        state = URI_PATH;
+        break;
+      case '?':
+        state = URI_BEFORE_QUERY;
+        break;
+      case '#':
+        state = URI_BEFORE_FRAGMENT;
+        break;
+      default:
+        if(is_digit(*p)) {
+          port *= 10;
+          port += *p - '0';
+          if(port > UINT16_MAX) {
+            return -1;
+          }
+        } else {
+          return -1;
+        }
+      }
+      break;
+    case URI_PATH:
+      switch(*p) {
+      case '/':
+        last_slash = p;
+        break;
+      case '?':
+        path_last = p;
+        state = URI_BEFORE_QUERY;
+        break;
+      case '#':
+        path_last = p;
+        state = URI_BEFORE_FRAGMENT;
+        break;
+      }
+      break;
+    case URI_BEFORE_QUERY:
+      query_first = p;
+      if(*p == '#') {
+        query_last = p;
+        state = URI_BEFORE_FRAGMENT;
+      } else {
+        state = URI_QUERY;
+      }
+      break;
+    case URI_QUERY:
+      if(*p == '#') {
+        query_last = p;
+        state = URI_BEFORE_FRAGMENT;
+      }
+      break;
+    case URI_BEFORE_FRAGMENT:
+      fragment_first = p;
+      state = URI_FRAGMENT;
+      break;
+    case URI_FRAGMENT:
+      break;
+    }
+  }
+  /* Handle premature states */
+  switch(state) {
+  case URI_BEFORE_SCHEME:
+  case URI_SCHEME:
+  case URI_SCHEME_SLASH1:
+  case URI_SCHEME_SLASH2:
+    return -1;
+  case URI_BEFORE_MAYBE_USER:
+    return -1;
+  case URI_MAYBE_USER:
+    if(last_atmark) {
+      host_first = last_atmark + 1;
+      host_last = p;
+      if(host_first == host_last) {
+        return -1;
+      }
+      user_last = last_atmark;
+    } else {
+      host_first = user_first;
+      host_last = p;
+      user_first = user_last = NULL;
+    }
+    break;
+  case URI_BEFORE_MAYBE_PASSWD:
+    return -1;
+  case URI_MAYBE_PASSWD:
+    if(port == -1) {
+      return -1;
+    }
+    if(last_atmark) {
+      host_first = last_atmark + 1;
+      host_last = passwd_first - 1;
+      user_last = last_atmark;
+    } else {
+      host_first = user_first;
+      host_last = passwd_first - 1;
+      user_first = user_last = NULL;
+    }
+    passwd_first = passwd_last = NULL;
+    break;
+  case URI_BEFORE_HOST:
+    return -1;
+  case URI_HOST:
+    host_last = p;
+    break;
+  case URI_BEFORE_IPV6HOST:
+  case URI_IPV6HOST:
+    return -1;
+  case URI_AFTER_IPV6HOST:
+    break;
+  case URI_BEFORE_PORT:
+    return -1;
+  case URI_PORT:
+    if(port == -1) {
+      return -1;
+    }
+    break;
+  case URI_PATH:
+    path_last = p;
+    break;
+  case URI_BEFORE_QUERY:
+    query_first = query_last = p;
+    break;
+  case URI_QUERY:
+    query_last = p;
+    break;
+  case URI_BEFORE_FRAGMENT:
+    fragment_first = fragment_last = p;
+    break;
+  case URI_FRAGMENT:
+    fragment_last = p;
+    break;
+  default:
+    return -1;
+  };
+
+  if(res) {
+    res->field_set = 0;
+    res->port = 0;
+    res->flags = flags;
+
+    uri_set_field(res, USR_SCHEME, scheme_first, scheme_last, uri);
+    uri_set_field(res, USR_HOST, host_first, host_last, uri);
+    uri_set_field(res, USR_PATH, path_first, path_last, uri);
+    uri_set_field(res, USR_QUERY, query_first, query_last, uri);
+    uri_set_field(res, USR_FRAGMENT, fragment_first, fragment_last, uri);
+    uri_set_field(res, USR_USER, user_first, user_last, uri);
+    uri_set_field(res, USR_PASSWD, passwd_first, passwd_last, uri);
+    if(res->field_set & (1 << USR_USER)) {
+      uri_set_field(res, USR_USERINFO, user_first, last_atmark, uri);
+    }
+    if(last_slash && last_slash + 1 != path_last) {
+      uri_set_field(res, USR_BASENAME, last_slash + 1, path_last, uri);
+    }
+    if(port != -1) {
+      res->field_set |= 1 << USR_PORT;
+      res->port = port;
+    }
+  }
+
+  return 0;
+}

+ 92 - 0
src/uri_split.h

@@ -0,0 +1,92 @@
+/* <!-- copyright */
+/*
+ * aria2 - The high speed download utility
+ *
+ * Copyright (C) 2012 Tatsuhiro Tsujikawa
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of portions of this program with the
+ * OpenSSL library under certain conditions as described in each
+ * individual source file, and distribute linked combinations
+ * including the two.
+ * You must obey the GNU General Public License in all respects
+ * for all of the code used other than OpenSSL.  If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so.  If you
+ * do not wish to do so, delete this exception statement from your
+ * version.  If you delete this exception statement from all source
+ * files in the program, then also delete it here.
+ */
+/* copyright --> */
+#ifndef D_URI_SPLIT_H
+#define D_URI_SPLIT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <stdint.h>
+
+typedef enum {
+  USR_SCHEME,
+  USR_HOST,
+  USR_PORT,
+  USR_PATH,
+  USR_QUERY,
+  USR_FRAGMENT,
+  USR_USERINFO,
+  USR_USER,
+  USR_PASSWD,
+  USR_BASENAME,
+  USR_MAX
+} uri_split_field;
+
+typedef enum {
+  USF_IPV6ADDR = 1
+} uri_split_flag;
+
+/* The structure is based on http-parser by Joyent, Inc and other Node
+   contributors. https://github.com/joyent/http-parser */
+typedef struct {
+  uint16_t field_set;
+  uint16_t port;
+
+  struct {
+    uint16_t off;
+    uint16_t len;
+  } fields[USR_MAX];
+
+  uint8_t flags;
+} uri_split_result;
+
+/* Splits URI |uri| and stores the results in the |res|. To check
+ * particular URI component is available, evaluate |res->field_set|
+ * with 1 shifted by the field defined in uri_split_field. If the
+ * |res| is NULL, processing is done but the result will not stored.
+ * If the host component of the |uri| is IPv6 numeric address, then
+ * USF_IPV6ADDR & res->flags will be nonzero.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+int uri_split(uri_split_result *res, const char *uri);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* D_URI_SPLIT_H */

+ 1 - 0
test/Makefile.am

@@ -71,6 +71,7 @@ aria2c_SOURCES = AllTest.cc\
 	ValueBaseTest.cc\
 	ChunkedDecodingStreamFilterTest.cc\
 	UriTest.cc\
+	UriSplitTest.cc\
 	MockSegment.h\
 	TripletTest.cc\
 	CookieHelperTest.cc\

+ 389 - 0
test/UriSplitTest.cc

@@ -0,0 +1,389 @@
+#include "uri_split.h"
+
+#include <cstring>
+
+#include <cppunit/extensions/HelperMacros.h>
+
+#include "uri_split.h"
+
+namespace aria2 {
+
+class UriSplitTest:public CppUnit::TestFixture {
+
+  CPPUNIT_TEST_SUITE(UriSplitTest);
+  CPPUNIT_TEST(testUriSplit);
+  CPPUNIT_TEST(testUriSplit_fail);
+  CPPUNIT_TEST_SUITE_END();
+
+public:
+  void testUriSplit();
+  void testUriSplit_fail();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION( UriSplitTest );
+
+namespace {
+const char* fieldstr[] = {
+  "USR_SCHEME",
+  "USR_HOST",
+  "USR_PORT",
+  "USR_PATH",
+  "USR_QUERY",
+  "USR_FRAGMENT",
+  "USR_USERINFO",
+  "USR_USER",
+  "USR_PASSWD",
+  "USR_BASENAME"
+};
+} // namespace
+
+#define CHECK_FIELD_SET(RES, FLAGS)                                     \
+  for(int i = 0; i < USR_MAX; ++i) {                                    \
+    int mask = 1 << i;                                                  \
+    if((FLAGS) & mask) {                                                \
+      CPPUNIT_ASSERT_MESSAGE(fieldstr[i], RES.field_set & mask);        \
+    } else {                                                            \
+      CPPUNIT_ASSERT_MESSAGE(fieldstr[i], !(RES.field_set & mask));     \
+    }                                                                   \
+  }
+
+namespace {
+std::string mkstr(const uri_split_result& res, int field, const char* base)
+{
+  return std::string(base + res.fields[field].off, res.fields[field].len);
+}
+} // namespace
+
+void UriSplitTest::testUriSplit()
+{
+  uri_split_result res;
+  const char* uri;
+  uri = "http://aria2.sf.net/path/";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PATH));
+  CPPUNIT_ASSERT_EQUAL(std::string("http"), mkstr(res, USR_SCHEME, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2.sf.net"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("/path/"), mkstr(res, USR_PATH, uri));
+
+  uri = "http://user@aria2.sf.net/path/";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PATH) |
+                  (1 << USR_USERINFO) | (1 << USR_USER));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2.sf.net"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("/path/"), mkstr(res, USR_PATH, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user"), mkstr(res, USR_USERINFO, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user"), mkstr(res, USR_USER, uri));
+
+  uri = "http://user:pass@aria2.sf.net/path/";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PATH) |
+                  (1 << USR_USERINFO) | (1 << USR_USER) | (1 << USR_PASSWD));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2.sf.net"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("/path/"), mkstr(res, USR_PATH, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user:pass"), mkstr(res, USR_USERINFO, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user"), mkstr(res, USR_USER, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("pass"), mkstr(res, USR_PASSWD, uri));
+
+  // According to RFC 3986, @ in userinfo is illegal. But many people
+  // have e-mail account as username and don't understand
+  // percent-encoding and keep getting erros putting it in URI in
+  // unecoded form. Because of this, we support @ in username.
+  uri = "http://user@foo.com:pass@aria2.sf.net/path/";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PATH) |
+                  (1 << USR_USERINFO) | (1 << USR_USER) | (1 << USR_PASSWD));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2.sf.net"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("/path/"), mkstr(res, USR_PATH, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user@foo.com:pass"),
+                       mkstr(res, USR_USERINFO, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user@foo.com"), mkstr(res, USR_USER, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("pass"), mkstr(res, USR_PASSWD, uri));
+
+  // Port processed in URI_MAYBE_USER -> URI_PORT
+  uri = "https://aria2.sf.net:443/path/";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PORT) |
+                  (1 << USR_PATH));
+  CPPUNIT_ASSERT_EQUAL(std::string("https"), mkstr(res, USR_SCHEME, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2.sf.net"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("/path/"), mkstr(res, USR_PATH, uri));
+  CPPUNIT_ASSERT_EQUAL((uint16_t)443, res.port);
+
+  // Port processed in URI_PORT
+  uri = "https://user:pass@aria2.sf.net:443/path/";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PORT) |
+                  (1 << USR_PATH) | (1 << USR_USERINFO) | (1 << USR_USER) |
+                  (1 << USR_PASSWD));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2.sf.net"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("/path/"), mkstr(res, USR_PATH, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user:pass"), mkstr(res, USR_USERINFO, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user"), mkstr(res, USR_USER, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("pass"), mkstr(res, USR_PASSWD, uri));
+  CPPUNIT_ASSERT_EQUAL((uint16_t)443, res.port);
+
+  // Port processed in URI_MAYBE_PASSWD
+  uri = "https://user@aria2.sf.net:443/path/";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PORT) |
+                  (1 << USR_PATH) | (1 << USR_USERINFO) | (1 << USR_USER));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2.sf.net"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("/path/"), mkstr(res, USR_PATH, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user"), mkstr(res, USR_USERINFO, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user"), mkstr(res, USR_USER, uri));
+  CPPUNIT_ASSERT_EQUAL((uint16_t)443, res.port);
+
+  // Port processed in URI_MAYBE_PASSWD
+  uri = "http://aria2";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"), mkstr(res, USR_HOST, uri));
+
+  uri = "http://aria2:8080";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PORT));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL((uint16_t)8080, res.port);
+
+  uri = "http://user@aria2";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) |
+                  (1 << USR_USERINFO) | (1 << USR_USER));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user"), mkstr(res, USR_USERINFO, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user"), mkstr(res, USR_USER, uri));
+
+  uri = "http://user:@aria2";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) |
+                  (1 << USR_USERINFO) | (1 << USR_USER) | (1 << USR_PASSWD));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user:"), mkstr(res, USR_USERINFO, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user"), mkstr(res, USR_USER, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string(""), mkstr(res, USR_PASSWD, uri));
+
+  uri = "http://aria2/?foo#bar";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PATH) |
+                  (1 << USR_QUERY) | (1 << USR_FRAGMENT));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("/"), mkstr(res, USR_PATH, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("foo"), mkstr(res, USR_QUERY, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("bar"), mkstr(res, USR_FRAGMENT, uri));
+
+  // URI_MAYBE_USER
+  uri = "http://aria2?foo";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) |
+                  (1 << USR_QUERY));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("foo"), mkstr(res, USR_QUERY, uri));
+
+  // URI_MAYBE_USER
+  uri = "http://aria2#bar";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) |
+                  (1 << USR_FRAGMENT));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("bar"), mkstr(res, USR_FRAGMENT, uri));
+
+  // URI_MAYBE_PASSWD
+  uri = "https://aria2:443?foo";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PORT) |
+                  (1 << USR_QUERY));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("foo"), mkstr(res, USR_QUERY, uri));
+  CPPUNIT_ASSERT_EQUAL((uint16_t)443, res.port);
+
+  // URI_MAYBE_PASSWD
+  uri = "https://aria2:443#bar";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PORT) |
+                  (1 << USR_FRAGMENT));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("bar"), mkstr(res, USR_FRAGMENT, uri));
+  CPPUNIT_ASSERT_EQUAL((uint16_t)443, res.port);
+
+  // URI_PORT
+  uri = "https://user:pass@aria2:443?foo";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PORT) |
+                  (1 << USR_QUERY) | (1 << USR_USERINFO) | (1 << USR_USER) |
+                  (1 << USR_PASSWD));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user"), mkstr(res, USR_USER, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("pass"), mkstr(res, USR_PASSWD, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("foo"), mkstr(res, USR_QUERY, uri));
+  CPPUNIT_ASSERT_EQUAL((uint16_t)443, res.port);
+
+  // URI_PORT
+  uri = "https://user:pass@aria2:443#bar";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PORT) |
+                  (1 << USR_FRAGMENT) | (1 << USR_USERINFO) | (1 << USR_USER) |
+                  (1 << USR_PASSWD));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user"), mkstr(res, USR_USER, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("pass"), mkstr(res, USR_PASSWD, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("bar"), mkstr(res, USR_FRAGMENT, uri));
+  CPPUNIT_ASSERT_EQUAL((uint16_t)443, res.port);
+
+  // URI_HOST
+  uri = "http://user:pass@aria2?foo";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) |
+                  (1 << USR_QUERY) | (1 << USR_USERINFO) |
+                  (1 << USR_USER) | (1 << USR_PASSWD));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user"), mkstr(res, USR_USER, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("pass"), mkstr(res, USR_PASSWD, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("foo"), mkstr(res, USR_QUERY, uri));
+
+  // URI_HOST
+  uri = "http://user:pass@aria2#bar";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) |
+                  (1 << USR_FRAGMENT) | (1 << USR_USERINFO) |
+                  (1 << USR_USER) | (1 << USR_PASSWD));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user"), mkstr(res, USR_USER, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("pass"), mkstr(res, USR_PASSWD, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("bar"), mkstr(res, USR_FRAGMENT, uri));
+
+  // empty query
+  uri = "http://aria2/?";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PATH) |
+                  (1 << USR_QUERY));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("/"), mkstr(res, USR_PATH, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string(""), mkstr(res, USR_QUERY, uri));
+
+  // empty fragment
+  uri = "http://aria2/#";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PATH) |
+                  (1 << USR_FRAGMENT));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("/"), mkstr(res, USR_PATH, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string(""), mkstr(res, USR_FRAGMENT, uri));
+
+  // empty query and fragment
+  uri = "http://aria2/?#";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PATH) |
+                  (1 << USR_QUERY) | (1 << USR_FRAGMENT));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("/"), mkstr(res, USR_PATH, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string(""), mkstr(res, USR_QUERY, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string(""), mkstr(res, USR_FRAGMENT, uri));
+
+  // IPv6 numeric address
+  uri = "http://[::1]";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST));
+  CPPUNIT_ASSERT_EQUAL(std::string("::1"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT(res.flags & USF_IPV6ADDR);
+
+  uri = "https://[::1]:443";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PORT));
+  CPPUNIT_ASSERT_EQUAL(std::string("::1"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL((uint16_t)443, res.port);
+  CPPUNIT_ASSERT(res.flags & USF_IPV6ADDR);
+
+  // USR_MAYBE_USER
+  uri = "https://user@[::1]";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) |
+                  (1 << USR_USERINFO) | (1 << USR_USER));
+  CPPUNIT_ASSERT_EQUAL(std::string("::1"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user"), mkstr(res, USR_USERINFO, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user"), mkstr(res, USR_USER, uri));
+  CPPUNIT_ASSERT(res.flags & USF_IPV6ADDR);
+
+  // USR_BEFORE_HOST
+  uri = "https://user:pass@[::1]";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) |
+                  (1 << USR_USERINFO) | (1 << USR_USER) | (1 << USR_PASSWD));
+  CPPUNIT_ASSERT_EQUAL(std::string("::1"), mkstr(res, USR_HOST, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user:pass"), mkstr(res, USR_USERINFO, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("user"), mkstr(res, USR_USER, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("pass"), mkstr(res, USR_PASSWD, uri));
+  CPPUNIT_ASSERT(res.flags & USF_IPV6ADDR);
+
+  uri = "http://aria2/f";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PATH) |
+                  (1 << USR_BASENAME));
+  CPPUNIT_ASSERT_EQUAL(std::string("/f"), mkstr(res, USR_PATH, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("f"), mkstr(res, USR_BASENAME, uri));
+
+  uri = "http://aria2/index.html?foo";
+  memset(&res, 0, sizeof(res));
+  CPPUNIT_ASSERT_EQUAL(0, uri_split(&res, uri));
+  CHECK_FIELD_SET(res, (1 << USR_SCHEME) | (1 << USR_HOST) | (1 << USR_PATH) |
+                  (1 << USR_QUERY) | (1 << USR_BASENAME));
+  CPPUNIT_ASSERT_EQUAL(std::string("/index.html"), mkstr(res, USR_PATH, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("index.html"),
+                       mkstr(res, USR_BASENAME, uri));
+  CPPUNIT_ASSERT_EQUAL(std::string("foo"), mkstr(res, USR_QUERY, uri));
+}
+
+void UriSplitTest::testUriSplit_fail()
+{
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, ""));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "h"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http:"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http:a"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http:/"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http://"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http:/a"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http://:host"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http://@user@host"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http://user:"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http://user:pass"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http://user:65536"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http://user:pass?"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http://user:pass@host:65536"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http://user:pass@host:x"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http://user:pass@host:80x"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http://user@"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http://[]"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http://[::"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http://user[::1]"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http://user[::1]x"));
+  CPPUNIT_ASSERT_EQUAL(-1, uri_split(NULL, "http://user:pass[::1]"));
+}
+
+} // namespace aria2

+ 6 - 28
test/UriTest.cc

@@ -302,34 +302,12 @@ void UriTest::testSetUri20()
 void UriTest::testSetUri_zeroUsername()
 {
   UriStruct us;
-  CPPUNIT_ASSERT(parse(us, "ftp://@localhost/download/aria2-1.0.0.tar.bz2"));
-  CPPUNIT_ASSERT_EQUAL(std::string("ftp"), us.protocol);
-  CPPUNIT_ASSERT_EQUAL((uint16_t)21, us.port);
-  CPPUNIT_ASSERT_EQUAL(std::string("localhost"), us.host);
-  CPPUNIT_ASSERT_EQUAL(std::string("/download/"), us.dir);
-  CPPUNIT_ASSERT_EQUAL(std::string("aria2-1.0.0.tar.bz2"), us.file);
-  CPPUNIT_ASSERT_EQUAL(std::string(""), us.username);
-  CPPUNIT_ASSERT_EQUAL(std::string(""), us.password);
+  CPPUNIT_ASSERT(!parse(us, "ftp://@localhost/download/aria2-1.0.0.tar.bz2"));
 
-  CPPUNIT_ASSERT(parse(us, "ftp://:@localhost/download/aria2-1.0.0.tar.bz2"));
-  CPPUNIT_ASSERT_EQUAL(std::string("ftp"), us.protocol);
-  CPPUNIT_ASSERT_EQUAL((uint16_t)21, us.port);
-  CPPUNIT_ASSERT_EQUAL(std::string("localhost"), us.host);
-  CPPUNIT_ASSERT_EQUAL(std::string("/download/"), us.dir);
-  CPPUNIT_ASSERT_EQUAL(std::string("aria2-1.0.0.tar.bz2"), us.file);
-  CPPUNIT_ASSERT_EQUAL(std::string(""), us.username);
-  CPPUNIT_ASSERT_EQUAL(std::string(""), us.password);
-
-  CPPUNIT_ASSERT(parse(us,
-                       "ftp://:pass@localhost/download/aria2-1.0.0.tar.bz2"));
-  CPPUNIT_ASSERT_EQUAL(std::string("ftp"), us.protocol);
-  CPPUNIT_ASSERT_EQUAL((uint16_t)21, us.port);
-  CPPUNIT_ASSERT_EQUAL(std::string("localhost"), us.host);
-  CPPUNIT_ASSERT_EQUAL(std::string("/download/"), us.dir);
-  CPPUNIT_ASSERT_EQUAL(std::string("aria2-1.0.0.tar.bz2"), us.file);
-  CPPUNIT_ASSERT_EQUAL(std::string(""), us.username);
-  CPPUNIT_ASSERT_EQUAL(std::string("pass"), us.password);
+  CPPUNIT_ASSERT(!parse(us, "ftp://:@localhost/download/aria2-1.0.0.tar.bz2"));
 
+  CPPUNIT_ASSERT(!parse(us,
+                        "ftp://:pass@localhost/download/aria2-1.0.0.tar.bz2"));
 }
 
 void UriTest::testSetUri_username()
@@ -354,11 +332,11 @@ void UriTest::testSetUri_usernamePassword()
                        "aria2-1.0.0.tar.bz2"));
   CPPUNIT_ASSERT_EQUAL(std::string("ftp"), us.protocol);
   CPPUNIT_ASSERT_EQUAL((uint16_t)21, us.port);
-  CPPUNIT_ASSERT_EQUAL(std::string("localhost"), us.host);
+  CPPUNIT_ASSERT_EQUAL(std::string("pass%40@localhost"), us.host);
   CPPUNIT_ASSERT_EQUAL(std::string("/download/"), us.dir);
   CPPUNIT_ASSERT_EQUAL(std::string("aria2-1.0.0.tar.bz2"), us.file);
   CPPUNIT_ASSERT_EQUAL(std::string("aria2@user@"), us.username);
-  CPPUNIT_ASSERT_EQUAL(std::string("aria2@pass@"), us.password);
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"), us.password);
 
   // make sure that after new uri is set, username and password are updated.
   CPPUNIT_ASSERT(parse(us, "ftp://localhost/download/aria2-1.0.0.tar.bz2"));