123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494 |
- /* <!-- copyright */
- /*
- * aria2 - The high speed download utility
- *
- * Copyright (C) 2012 Tatsuhiro Tsujikawa
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * In addition, as a special exception, the copyright holders give
- * permission to link the code of portions of this program with the
- * OpenSSL library under certain conditions as described in each
- * individual source file, and distribute linked combinations
- * including the two.
- * You must obey the GNU General Public License in all respects
- * for all of the code used other than OpenSSL. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you
- * do not wish to do so, delete this exception statement from your
- * version. If you delete this exception statement from all source
- * files in the program, then also delete it here.
- */
- /* copyright --> */
- #include "uri_split.h"
- #include <stdlib.h>
- typedef enum {
- URI_BEFORE_SCHEME,
- URI_SCHEME,
- URI_SCHEME_SLASH1,
- URI_SCHEME_SLASH2,
- URI_BEFORE_MAYBE_USER,
- URI_MAYBE_USER,
- URI_BEFORE_MAYBE_PASSWD,
- URI_MAYBE_PASSWD,
- URI_BEFORE_HOST,
- URI_HOST,
- URI_BEFORE_IPV6HOST,
- URI_IPV6HOST,
- URI_AFTER_IPV6HOST,
- URI_BEFORE_PORT,
- URI_PORT,
- URI_PATH,
- URI_BEFORE_QUERY,
- URI_QUERY,
- URI_BEFORE_FRAGMENT,
- URI_FRAGMENT
- } uri_split_state;
- static void uri_set_field(uri_split_result* res, int field, const char* first,
- const char* last, const char* uri)
- {
- if (first) {
- res->field_set |= 1 << field;
- res->fields[field].off = first - uri;
- res->fields[field].len = last - first;
- }
- }
- static int is_digit(char c) { return '0' <= c && c <= '9'; }
- int uri_split(uri_split_result* res, const char* uri)
- {
- int state = URI_BEFORE_SCHEME;
- const char* scheme_first = NULL, * scheme_last = NULL, * host_first = NULL,
- * host_last = NULL, * path_first = NULL, * path_last = NULL,
- * query_first = NULL, * query_last = NULL,
- * fragment_first = NULL, * fragment_last = NULL,
- * user_first = NULL, * user_last = NULL, * passwd_first = NULL,
- * passwd_last = NULL, * last_atmark = NULL, * last_slash = NULL,
- * p = uri;
- int32_t port = -1;
- uint8_t flags = 0;
- for (; *p; ++p) {
- switch (state) {
- case URI_BEFORE_SCHEME:
- scheme_first = p;
- state = URI_SCHEME;
- break;
- case URI_SCHEME:
- if (*p == ':') {
- scheme_last = p;
- state = URI_SCHEME_SLASH1;
- }
- break;
- case URI_SCHEME_SLASH1:
- if (*p == '/') {
- state = URI_SCHEME_SLASH2;
- }
- else {
- return -1;
- }
- break;
- case URI_SCHEME_SLASH2:
- if (*p == '/') {
- state = URI_BEFORE_MAYBE_USER;
- }
- else {
- return -1;
- }
- break;
- case URI_BEFORE_MAYBE_USER:
- switch (*p) {
- case '@':
- case ':':
- case '/':
- return -1;
- case '[':
- state = URI_BEFORE_IPV6HOST;
- break;
- default:
- user_first = p;
- state = URI_MAYBE_USER;
- }
- break;
- case URI_MAYBE_USER:
- switch (*p) {
- case '@':
- last_atmark = p;
- break;
- case ':':
- user_last = p;
- state = URI_BEFORE_MAYBE_PASSWD;
- break;
- case '[':
- if (last_atmark == p - 1) {
- user_last = last_atmark;
- state = URI_BEFORE_IPV6HOST;
- }
- else {
- return -1;
- }
- break;
- case '/':
- case '?':
- case '#':
- /* It turns out that this is only host or user + host if
- last_atmark is not NULL. */
- if (last_atmark) {
- host_first = last_atmark + 1;
- host_last = p;
- user_last = last_atmark;
- }
- else {
- host_first = user_first;
- host_last = p;
- user_first = user_last = NULL;
- }
- switch (*p) {
- case '/':
- path_first = last_slash = p;
- state = URI_PATH;
- break;
- case '?':
- state = URI_BEFORE_QUERY;
- break;
- case '#':
- state = URI_BEFORE_FRAGMENT;
- break;
- }
- break;
- }
- break;
- case URI_BEFORE_MAYBE_PASSWD:
- passwd_first = p;
- switch (*p) {
- case '@':
- passwd_last = last_atmark = p;
- state = URI_BEFORE_HOST;
- break;
- case '/':
- return -1;
- default:
- /* sums up port number in case of port. */
- if (is_digit(*p)) {
- port = *p - '0';
- }
- state = URI_MAYBE_PASSWD;
- }
- break;
- case URI_MAYBE_PASSWD:
- switch (*p) {
- case '@':
- passwd_last = last_atmark = p;
- /* Passwd confirmed, reset port to -1. */
- port = -1;
- state = URI_BEFORE_HOST;
- break;
- case '[':
- return -1;
- case '/':
- case '?':
- case '#':
- /* This is port not password. port is in [passwd_first, p) */
- if (port == -1) {
- return -1;
- }
- if (last_atmark) {
- host_first = last_atmark + 1;
- host_last = passwd_first - 1;
- user_last = last_atmark;
- }
- else {
- host_first = user_first;
- host_last = passwd_first - 1;
- user_first = user_last = NULL;
- }
- passwd_first = passwd_last = NULL;
- switch (*p) {
- case '/':
- path_first = last_slash = p;
- state = URI_PATH;
- break;
- case '?':
- state = URI_BEFORE_QUERY;
- break;
- case '#':
- state = URI_BEFORE_FRAGMENT;
- break;
- }
- break;
- default:
- if (port != -1) {
- if (is_digit(*p)) {
- port *= 10;
- port += *p - '0';
- if (port > UINT16_MAX) {
- port = -1;
- }
- }
- else {
- port = -1;
- }
- }
- break;
- }
- break;
- case URI_BEFORE_HOST:
- switch (*p) {
- case ':':
- case '/':
- return -1;
- case '[':
- state = URI_BEFORE_IPV6HOST;
- break;
- default:
- host_first = p;
- state = URI_HOST;
- break;
- }
- break;
- case URI_HOST:
- switch (*p) {
- case ':':
- host_last = p;
- state = URI_BEFORE_PORT;
- break;
- case '/':
- host_last = path_first = last_slash = p;
- state = URI_PATH;
- break;
- case '?':
- host_last = p;
- state = URI_BEFORE_QUERY;
- break;
- case '#':
- host_last = p;
- state = URI_BEFORE_FRAGMENT;
- break;
- }
- break;
- case URI_BEFORE_IPV6HOST:
- if (*p == ']') {
- return -1;
- }
- host_first = p;
- state = URI_IPV6HOST;
- break;
- case URI_IPV6HOST:
- if (*p == ']') {
- flags |= USF_IPV6ADDR;
- host_last = p;
- state = URI_AFTER_IPV6HOST;
- }
- break;
- case URI_AFTER_IPV6HOST:
- switch (*p) {
- case ':':
- state = URI_BEFORE_PORT;
- break;
- case '/':
- path_first = last_slash = p;
- state = URI_PATH;
- break;
- case '?':
- state = URI_BEFORE_QUERY;
- break;
- case '#':
- state = URI_BEFORE_FRAGMENT;
- break;
- default:
- return -1;
- }
- break;
- case URI_BEFORE_PORT:
- if (is_digit(*p)) {
- port = *p - '0';
- state = URI_PORT;
- }
- else {
- return -1;
- }
- break;
- case URI_PORT:
- switch (*p) {
- case '/':
- path_first = last_slash = p;
- state = URI_PATH;
- break;
- case '?':
- state = URI_BEFORE_QUERY;
- break;
- case '#':
- state = URI_BEFORE_FRAGMENT;
- break;
- default:
- if (is_digit(*p)) {
- port *= 10;
- port += *p - '0';
- if (port > UINT16_MAX) {
- return -1;
- }
- }
- else {
- return -1;
- }
- }
- break;
- case URI_PATH:
- switch (*p) {
- case '/':
- last_slash = p;
- break;
- case '?':
- path_last = p;
- state = URI_BEFORE_QUERY;
- break;
- case '#':
- path_last = p;
- state = URI_BEFORE_FRAGMENT;
- break;
- }
- break;
- case URI_BEFORE_QUERY:
- query_first = p;
- if (*p == '#') {
- query_last = p;
- state = URI_BEFORE_FRAGMENT;
- }
- else {
- state = URI_QUERY;
- }
- break;
- case URI_QUERY:
- if (*p == '#') {
- query_last = p;
- state = URI_BEFORE_FRAGMENT;
- }
- break;
- case URI_BEFORE_FRAGMENT:
- fragment_first = p;
- state = URI_FRAGMENT;
- break;
- case URI_FRAGMENT:
- break;
- }
- }
- /* Handle premature states */
- switch (state) {
- case URI_BEFORE_SCHEME:
- case URI_SCHEME:
- case URI_SCHEME_SLASH1:
- case URI_SCHEME_SLASH2:
- return -1;
- case URI_BEFORE_MAYBE_USER:
- return -1;
- case URI_MAYBE_USER:
- if (last_atmark) {
- host_first = last_atmark + 1;
- host_last = p;
- if (host_first == host_last) {
- return -1;
- }
- user_last = last_atmark;
- }
- else {
- host_first = user_first;
- host_last = p;
- user_first = user_last = NULL;
- }
- break;
- case URI_BEFORE_MAYBE_PASSWD:
- return -1;
- case URI_MAYBE_PASSWD:
- if (port == -1) {
- return -1;
- }
- if (last_atmark) {
- host_first = last_atmark + 1;
- host_last = passwd_first - 1;
- user_last = last_atmark;
- }
- else {
- host_first = user_first;
- host_last = passwd_first - 1;
- user_first = user_last = NULL;
- }
- passwd_first = passwd_last = NULL;
- break;
- case URI_BEFORE_HOST:
- return -1;
- case URI_HOST:
- host_last = p;
- break;
- case URI_BEFORE_IPV6HOST:
- case URI_IPV6HOST:
- return -1;
- case URI_AFTER_IPV6HOST:
- break;
- case URI_BEFORE_PORT:
- return -1;
- case URI_PORT:
- if (port == -1) {
- return -1;
- }
- break;
- case URI_PATH:
- path_last = p;
- break;
- case URI_BEFORE_QUERY:
- query_first = query_last = p;
- break;
- case URI_QUERY:
- query_last = p;
- break;
- case URI_BEFORE_FRAGMENT:
- fragment_first = fragment_last = p;
- break;
- case URI_FRAGMENT:
- fragment_last = p;
- break;
- default:
- return -1;
- };
- if (res) {
- res->field_set = 0;
- res->port = 0;
- res->flags = flags;
- uri_set_field(res, USR_SCHEME, scheme_first, scheme_last, uri);
- uri_set_field(res, USR_HOST, host_first, host_last, uri);
- uri_set_field(res, USR_PATH, path_first, path_last, uri);
- uri_set_field(res, USR_QUERY, query_first, query_last, uri);
- uri_set_field(res, USR_FRAGMENT, fragment_first, fragment_last, uri);
- uri_set_field(res, USR_USER, user_first, user_last, uri);
- uri_set_field(res, USR_PASSWD, passwd_first, passwd_last, uri);
- if (res->field_set & (1 << USR_USER)) {
- uri_set_field(res, USR_USERINFO, user_first, last_atmark, uri);
- }
- if (last_slash && last_slash + 1 != path_last) {
- uri_set_field(res, USR_BASENAME, last_slash + 1, path_last, uri);
- }
- if (port != -1) {
- res->field_set |= 1 << USR_PORT;
- res->port = port;
- }
- }
- return 0;
- }
|