uri.cc 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. /* <!-- copyright */
  2. /*
  3. * aria2 - The high speed download utility
  4. *
  5. * Copyright (C) 2010 Tatsuhiro Tsujikawa
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. * In addition, as a special exception, the copyright holders give
  22. * permission to link the code of portions of this program with the
  23. * OpenSSL library under certain conditions as described in each
  24. * individual source file, and distribute linked combinations
  25. * including the two.
  26. * You must obey the GNU General Public License in all respects
  27. * for all of the code used other than OpenSSL. If you modify
  28. * file(s) with this exception, you may extend this exception to your
  29. * version of the file(s), but you are not obligated to do so. If you
  30. * do not wish to do so, delete this exception statement from your
  31. * version. If you delete this exception statement from all source
  32. * files in the program, then also delete it here.
  33. */
  34. /* copyright --> */
  35. #include "uri.h"
  36. #include "A2STR.h"
  37. #include "FeatureConfig.h"
  38. #include "util.h"
  39. namespace aria2 {
  40. namespace uri {
  41. UriStruct::UriStruct() : port(0), hasPassword(false), ipv6LiteralAddress(false)
  42. {
  43. }
  44. UriStruct::UriStruct(const UriStruct& c)
  45. : protocol(c.protocol),
  46. host(c.host),
  47. dir(c.dir),
  48. file(c.file),
  49. query(c.query),
  50. username(c.username),
  51. password(c.password),
  52. port(c.port),
  53. hasPassword(c.hasPassword),
  54. ipv6LiteralAddress(c.ipv6LiteralAddress)
  55. {
  56. }
  57. UriStruct::~UriStruct() {}
  58. UriStruct& UriStruct::operator=(const UriStruct& c)
  59. {
  60. if (this != &c) {
  61. protocol = c.protocol;
  62. host = c.host;
  63. dir = c.dir;
  64. file = c.file;
  65. query = c.query;
  66. username = c.username;
  67. password = c.password;
  68. port = c.port;
  69. hasPassword = c.hasPassword;
  70. ipv6LiteralAddress = c.ipv6LiteralAddress;
  71. }
  72. return *this;
  73. }
  74. void UriStruct::swap(UriStruct& other)
  75. {
  76. using std::swap;
  77. if (this != &other) {
  78. swap(protocol, other.protocol);
  79. swap(host, other.host);
  80. swap(dir, other.dir);
  81. swap(file, other.file);
  82. swap(query, other.query);
  83. swap(username, other.username);
  84. swap(password, other.password);
  85. swap(port, other.port);
  86. swap(hasPassword, other.hasPassword);
  87. swap(ipv6LiteralAddress, other.ipv6LiteralAddress);
  88. }
  89. }
  90. void swap(UriStruct& lhs, UriStruct& rhs) { lhs.swap(rhs); }
  91. bool parse(UriStruct& result, const std::string& uri)
  92. {
  93. uri_split_result res;
  94. int rv;
  95. const char* p = uri.c_str();
  96. rv = uri_split(&res, p);
  97. if (rv != 0) {
  98. return false;
  99. }
  100. result.protocol.assign(p + res.fields[USR_SCHEME].off,
  101. res.fields[USR_SCHEME].len);
  102. result.host.assign(p + res.fields[USR_HOST].off, res.fields[USR_HOST].len);
  103. if (res.port == 0) {
  104. uint16_t defPort;
  105. if ((defPort = getDefaultPort(result.protocol)) == 0) {
  106. return false;
  107. }
  108. result.port = defPort;
  109. }
  110. else {
  111. result.port = res.port;
  112. }
  113. if (res.field_set & (1 << USR_PATH)) {
  114. if (res.field_set & (1 << USR_BASENAME)) {
  115. result.dir.assign(p + res.fields[USR_PATH].off,
  116. res.fields[USR_PATH].len -
  117. res.fields[USR_BASENAME].len);
  118. result.file.assign(p + res.fields[USR_BASENAME].off,
  119. res.fields[USR_BASENAME].len);
  120. }
  121. else {
  122. result.dir.assign(p + res.fields[USR_PATH].off, res.fields[USR_PATH].len);
  123. result.file = A2STR::NIL;
  124. }
  125. }
  126. else {
  127. result.dir = "/";
  128. result.file = A2STR::NIL;
  129. }
  130. if (res.field_set & (1 << USR_QUERY)) {
  131. result.query = "?";
  132. result.query.append(p + res.fields[USR_QUERY].off,
  133. res.fields[USR_QUERY].len);
  134. }
  135. else {
  136. result.query = A2STR::NIL;
  137. }
  138. if (res.field_set & (1 << USR_USER)) {
  139. result.username.assign(p + res.fields[USR_USER].off,
  140. res.fields[USR_USER].len);
  141. result.username =
  142. util::percentDecode(result.username.begin(), result.username.end());
  143. }
  144. else {
  145. result.username = A2STR::NIL;
  146. }
  147. if (res.field_set & (1 << USR_PASSWD)) {
  148. result.hasPassword = true;
  149. result.password.assign(p + res.fields[USR_PASSWD].off,
  150. res.fields[USR_PASSWD].len);
  151. result.password =
  152. util::percentDecode(result.password.begin(), result.password.end());
  153. }
  154. else {
  155. result.hasPassword = false;
  156. result.password = A2STR::NIL;
  157. }
  158. result.ipv6LiteralAddress = res.flags & USF_IPV6ADDR;
  159. return true;
  160. }
  161. std::string getFieldString(const uri_split_result& res, int field,
  162. const char* base)
  163. {
  164. if (res.field_set & (1 << field)) {
  165. return std::string(base + res.fields[field].off, res.fields[field].len);
  166. }
  167. return "";
  168. }
  169. std::string construct(const UriStruct& us)
  170. {
  171. std::string res;
  172. res += us.protocol;
  173. res += "://";
  174. if (!us.username.empty()) {
  175. res += util::percentEncode(us.username);
  176. if (us.hasPassword) {
  177. res += ":";
  178. res += util::percentEncode(us.password);
  179. }
  180. res += "@";
  181. }
  182. if (us.ipv6LiteralAddress) {
  183. res += "[";
  184. res += us.host;
  185. res += "]";
  186. }
  187. else {
  188. res += us.host;
  189. }
  190. uint16_t defPort = getDefaultPort(us.protocol);
  191. if (us.port != 0 && defPort != us.port) {
  192. res += fmt(":%u", us.port);
  193. }
  194. res += us.dir;
  195. if (us.dir.empty() || us.dir[us.dir.size() - 1] != '/') {
  196. res += "/";
  197. }
  198. res += us.file;
  199. res += us.query;
  200. return res;
  201. }
  202. namespace {
  203. enum { NPATH_START, NPATH_SLASH, NPATH_SDOT, NPATH_DDOT, NPATH_PATHCOMP };
  204. }
  205. std::string normalizePath(std::string path)
  206. {
  207. auto begin = path.begin(), out = begin;
  208. int state = NPATH_START;
  209. bool startWithSlash = false;
  210. std::vector<int> range;
  211. // 32 is arbitrary
  212. range.reserve(32);
  213. for (auto in = begin, eoi = path.end(); in != eoi; ++in) {
  214. switch (state) {
  215. case NPATH_START:
  216. switch (*in) {
  217. case '.':
  218. state = NPATH_SDOT;
  219. range.push_back(in - begin);
  220. break;
  221. case '/':
  222. startWithSlash = true;
  223. state = NPATH_SLASH;
  224. break;
  225. default:
  226. state = NPATH_PATHCOMP;
  227. range.push_back(in - begin);
  228. break;
  229. }
  230. break;
  231. case NPATH_SLASH:
  232. switch (*in) {
  233. case '.':
  234. state = NPATH_SDOT;
  235. range.push_back(in - begin);
  236. break;
  237. case '/':
  238. // drop duplicate '/'
  239. break;
  240. default:
  241. state = NPATH_PATHCOMP;
  242. range.push_back(in - begin);
  243. break;
  244. }
  245. break;
  246. case NPATH_SDOT:
  247. switch (*in) {
  248. case '.':
  249. state = NPATH_DDOT;
  250. break;
  251. case '/':
  252. // drop path component '.'
  253. state = NPATH_SLASH;
  254. range.pop_back();
  255. break;
  256. default:
  257. state = NPATH_PATHCOMP;
  258. break;
  259. }
  260. break;
  261. case NPATH_DDOT:
  262. switch (*in) {
  263. case '/':
  264. // drop previous path component before '..'
  265. for (int i = 0; i < 3 && !range.empty(); ++i) {
  266. range.pop_back();
  267. }
  268. state = NPATH_SLASH;
  269. break;
  270. default:
  271. state = NPATH_PATHCOMP;
  272. break;
  273. }
  274. break;
  275. case NPATH_PATHCOMP:
  276. if (*in == '/') {
  277. range.push_back(in + 1 - begin);
  278. state = NPATH_SLASH;
  279. }
  280. break;
  281. }
  282. }
  283. switch (state) {
  284. case NPATH_SDOT:
  285. range.pop_back();
  286. break;
  287. case NPATH_DDOT:
  288. for (int i = 0; i < 3 && !range.empty(); ++i) {
  289. range.pop_back();
  290. }
  291. break;
  292. case NPATH_PATHCOMP:
  293. range.push_back(path.end() - begin);
  294. break;
  295. default:
  296. break;
  297. }
  298. if (startWithSlash) {
  299. ++out;
  300. }
  301. for (int i = 0; i < (int)range.size(); i += 2) {
  302. auto a = begin + range[i];
  303. auto b = begin + range[i + 1];
  304. if (a == out) {
  305. out = b;
  306. }
  307. else {
  308. out = std::copy(a, b, out);
  309. }
  310. }
  311. path.erase(out, path.end());
  312. return path;
  313. }
  314. namespace {
  315. std::string joinPath(std::string basePath,
  316. std::string::const_iterator newPathFirst,
  317. std::string::const_iterator newPathLast)
  318. {
  319. if (newPathFirst == newPathLast) {
  320. return basePath;
  321. }
  322. if (basePath.empty() || *newPathFirst == '/') {
  323. return normalizePath(std::string(newPathFirst, newPathLast));
  324. }
  325. if (basePath[basePath.size() - 1] == '/') {
  326. basePath.append(newPathFirst, newPathLast);
  327. return normalizePath(basePath);
  328. }
  329. basePath += "/";
  330. basePath.append(newPathFirst, newPathLast);
  331. return normalizePath(basePath);
  332. }
  333. } // namespace
  334. std::string joinPath(const std::string& basePath, const std::string& newPath)
  335. {
  336. return joinPath(basePath, newPath.begin(), newPath.end());
  337. }
  338. std::string joinUri(const std::string& baseUri, const std::string& uri)
  339. {
  340. UriStruct us;
  341. if (parse(us, uri)) {
  342. return uri;
  343. }
  344. UriStruct bus;
  345. if (!parse(bus, baseUri)) {
  346. return uri;
  347. }
  348. std::string::const_iterator qend;
  349. for (qend = uri.begin(); qend != uri.end(); ++qend) {
  350. if (*qend == '#') {
  351. break;
  352. }
  353. }
  354. std::string::const_iterator end;
  355. for (end = uri.begin(); end != qend; ++end) {
  356. if (*end == '?') {
  357. break;
  358. }
  359. }
  360. std::string newpath = joinPath(bus.dir, uri.begin(), end);
  361. bus.dir.clear();
  362. bus.file.clear();
  363. bus.query.clear();
  364. std::string res = construct(bus);
  365. if (!newpath.empty()) {
  366. // res always ends with '/'. Since bus.dir also starts with '/',
  367. // regardless of uri, newpath always starts with '/'.
  368. res.append(newpath.begin() + 1, newpath.end());
  369. }
  370. res.append(end, qend);
  371. return res;
  372. }
  373. } // namespace uri
  374. } // namespace aria2