uri.cc 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. /* <!-- copyright */
  2. /*
  3. * aria2 - The high speed download utility
  4. *
  5. * Copyright (C) 2010 Tatsuhiro Tsujikawa
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. * In addition, as a special exception, the copyright holders give
  22. * permission to link the code of portions of this program with the
  23. * OpenSSL library under certain conditions as described in each
  24. * individual source file, and distribute linked combinations
  25. * including the two.
  26. * You must obey the GNU General Public License in all respects
  27. * for all of the code used other than OpenSSL. If you modify
  28. * file(s) with this exception, you may extend this exception to your
  29. * version of the file(s), but you are not obligated to do so. If you
  30. * do not wish to do so, delete this exception statement from your
  31. * version. If you delete this exception statement from all source
  32. * files in the program, then also delete it here.
  33. */
  34. /* copyright --> */
  35. #include "uri.h"
  36. #include "A2STR.h"
  37. #include "FeatureConfig.h"
  38. #include "util.h"
  39. namespace aria2 {
  40. namespace uri {
  41. UriStruct::UriStruct()
  42. : port(0), hasPassword(false), ipv6LiteralAddress(false)
  43. {}
  44. UriStruct::UriStruct(const UriStruct& c)
  45. : protocol(c.protocol),
  46. host(c.host),
  47. port(c.port),
  48. dir(c.dir),
  49. file(c.file),
  50. query(c.query),
  51. username(c.username),
  52. password(c.password),
  53. hasPassword(c.hasPassword),
  54. ipv6LiteralAddress(c.ipv6LiteralAddress)
  55. {}
  56. UriStruct::~UriStruct() {}
  57. UriStruct& UriStruct::operator=(const UriStruct& c)
  58. {
  59. if(this != &c) {
  60. protocol = c.protocol;
  61. host = c.host;
  62. port = c.port;
  63. dir = c.dir;
  64. file = c.file;
  65. query = c.query;
  66. username = c.username;
  67. password = c.password;
  68. hasPassword = c.hasPassword;
  69. ipv6LiteralAddress = c.ipv6LiteralAddress;
  70. }
  71. return *this;
  72. }
  73. void UriStruct::swap(UriStruct& other)
  74. {
  75. using std::swap;
  76. if(this != &other) {
  77. swap(protocol, other.protocol);
  78. swap(host, other.host);
  79. swap(port, other.port);
  80. swap(dir, other.dir);
  81. swap(file, other.file);
  82. swap(query, other.query);
  83. swap(username, other.username);
  84. swap(password, other.password);
  85. swap(hasPassword, other.hasPassword);
  86. swap(ipv6LiteralAddress, other.ipv6LiteralAddress);
  87. }
  88. }
  89. void swap(UriStruct& lhs, UriStruct& rhs)
  90. {
  91. lhs.swap(rhs);
  92. }
  93. bool parse(UriStruct& result, const std::string& uri)
  94. {
  95. uri_split_result res;
  96. int rv;
  97. const char* p = uri.c_str();
  98. rv = uri_split(&res, p);
  99. if(rv == 0) {
  100. result.protocol.assign(p + res.fields[USR_SCHEME].off,
  101. res.fields[USR_SCHEME].len);
  102. result.host.assign(p + res.fields[USR_HOST].off, res.fields[USR_HOST].len);
  103. if(res.port == 0) {
  104. uint16_t defPort;
  105. if((defPort = getDefaultPort(result.protocol)) == 0) {
  106. return false;
  107. }
  108. result.port = defPort;
  109. } else {
  110. result.port = res.port;
  111. }
  112. if(res.field_set & (1 << USR_PATH)) {
  113. if(res.field_set & (1 << USR_BASENAME)) {
  114. result.dir.assign(p + res.fields[USR_PATH].off,
  115. res.fields[USR_PATH].len -
  116. res.fields[USR_BASENAME].len);
  117. result.file.assign(p + res.fields[USR_BASENAME].off,
  118. res.fields[USR_BASENAME].len);
  119. } else {
  120. result.dir.assign(p + res.fields[USR_PATH].off,
  121. res.fields[USR_PATH].len);
  122. result.file = A2STR::NIL;
  123. }
  124. } else {
  125. result.dir = "/";
  126. result.file = A2STR::NIL;
  127. }
  128. if(res.field_set & (1 << USR_QUERY)) {
  129. result.query = "?";
  130. result.query.append(p + res.fields[USR_QUERY].off,
  131. res.fields[USR_QUERY].len);
  132. } else {
  133. result.query = A2STR::NIL;
  134. }
  135. if(res.field_set & (1 << USR_USER)) {
  136. result.username.assign(p + res.fields[USR_USER].off,
  137. res.fields[USR_USER].len);
  138. result.username = util::percentDecode(result.username.begin(),
  139. result.username.end());
  140. } else {
  141. result.username = A2STR::NIL;
  142. }
  143. if(res.field_set & (1 << USR_PASSWD)) {
  144. result.hasPassword = true;
  145. result.password.assign(p + res.fields[USR_PASSWD].off,
  146. res.fields[USR_PASSWD].len);
  147. result.password = util::percentDecode(result.password.begin(),
  148. result.password.end());
  149. } else {
  150. result.hasPassword = false;
  151. result.password = A2STR::NIL;
  152. }
  153. result.ipv6LiteralAddress = res.flags & USF_IPV6ADDR;
  154. return true;
  155. } else {
  156. return false;
  157. }
  158. }
  159. std::string getFieldString(const uri_split_result& res, int field,
  160. const char* base)
  161. {
  162. if(res.field_set & (1 << field)) {
  163. return std::string(base + res.fields[field].off, res.fields[field].len);
  164. } else {
  165. return "";
  166. }
  167. }
  168. std::string construct(const UriStruct& us)
  169. {
  170. std::string res;
  171. res += us.protocol;
  172. res += "://";
  173. if(!us.username.empty()) {
  174. res += util::percentEncode(us.username);
  175. if(us.hasPassword) {
  176. res += ":";
  177. res += util::percentEncode(us.password);
  178. }
  179. res += "@";
  180. }
  181. if(us.ipv6LiteralAddress) {
  182. res += "[";
  183. res += us.host;
  184. res += "]";
  185. } else {
  186. res += us.host;
  187. }
  188. uint16_t defPort= getDefaultPort(us.protocol);
  189. if(us.port != 0 && defPort != us.port) {
  190. res += fmt(":%u", us.port);
  191. }
  192. res += us.dir;
  193. if(us.dir.empty() || us.dir[us.dir.size()-1] != '/') {
  194. res += "/";
  195. }
  196. res += us.file;
  197. res += us.query;
  198. return res;
  199. }
  200. enum {
  201. NPATH_START,
  202. NPATH_SLASH,
  203. NPATH_SDOT,
  204. NPATH_DDOT,
  205. NPATH_PATHCOMP
  206. };
  207. std::string normalizePath(std::string path)
  208. {
  209. std::string::iterator begin = path.begin(), out = begin;
  210. int state = NPATH_START;
  211. bool startWithSlash = false;
  212. std::vector<int> range;
  213. // 32 is arbitrary
  214. range.reserve(32);
  215. for(std::string::iterator in = begin, eoi = path.end(); in != eoi; ++in) {
  216. switch(state) {
  217. case NPATH_START:
  218. switch(*in) {
  219. case '.':
  220. state = NPATH_SDOT;
  221. range.push_back(in-begin);
  222. break;
  223. case '/':
  224. startWithSlash = true;
  225. state = NPATH_SLASH;
  226. break;
  227. default:
  228. state = NPATH_PATHCOMP;
  229. range.push_back(in-begin);
  230. break;
  231. }
  232. break;
  233. case NPATH_SLASH:
  234. switch(*in) {
  235. case '.':
  236. state = NPATH_SDOT;
  237. range.push_back(in-begin);
  238. break;
  239. case '/':
  240. // drop duplicate '/'
  241. break;
  242. default:
  243. state = NPATH_PATHCOMP;
  244. range.push_back(in-begin);
  245. break;
  246. }
  247. break;
  248. case NPATH_SDOT:
  249. switch(*in) {
  250. case '.':
  251. state = NPATH_DDOT;
  252. break;
  253. case '/':
  254. // drop path component '.'
  255. state = NPATH_SLASH;
  256. range.pop_back();
  257. break;
  258. default:
  259. state = NPATH_PATHCOMP;
  260. break;
  261. }
  262. break;
  263. case NPATH_DDOT:
  264. switch(*in) {
  265. case '/':
  266. // drop previous path component before '..'
  267. for(int i = 0; i < 3 && !range.empty(); ++i) {
  268. range.pop_back();
  269. }
  270. state = NPATH_SLASH;
  271. break;
  272. default:
  273. state = NPATH_PATHCOMP;
  274. break;
  275. }
  276. break;
  277. case NPATH_PATHCOMP:
  278. if(*in == '/') {
  279. range.push_back(in+1-begin);
  280. state = NPATH_SLASH;
  281. }
  282. break;
  283. }
  284. }
  285. switch(state) {
  286. case NPATH_SDOT:
  287. range.pop_back();
  288. break;
  289. case NPATH_DDOT:
  290. for(int i = 0; i < 3 && !range.empty(); ++i) {
  291. range.pop_back();
  292. }
  293. break;
  294. case NPATH_PATHCOMP:
  295. range.push_back(path.end()-begin);
  296. break;
  297. default:
  298. break;
  299. }
  300. if(startWithSlash) {
  301. ++out;
  302. }
  303. for(int i = 0; i < (int)range.size(); i += 2) {
  304. std::string::iterator a = begin+range[i];
  305. std::string::iterator b = begin+range[i+1];
  306. if(a == out) {
  307. out = b;
  308. } else {
  309. out = std::copy(a, b, out);
  310. }
  311. }
  312. path.erase(out, path.end());
  313. return path;
  314. }
  315. namespace {
  316. std::string joinPath(std::string basePath,
  317. std::string::const_iterator newPathFirst,
  318. std::string::const_iterator newPathLast)
  319. {
  320. if(newPathFirst == newPathLast) {
  321. return basePath;
  322. } else if(basePath.empty() || *newPathFirst == '/') {
  323. return normalizePath(std::string(newPathFirst, newPathLast));
  324. } else if(basePath[basePath.size()-1] == '/') {
  325. basePath.append(newPathFirst, newPathLast);
  326. return normalizePath(basePath);
  327. } else {
  328. basePath += "/";
  329. basePath.append(newPathFirst, newPathLast);
  330. return normalizePath(basePath);
  331. }
  332. }
  333. } // namespace
  334. std::string joinPath(const std::string& basePath, const std::string& newPath)
  335. {
  336. return joinPath(basePath, newPath.begin(), newPath.end());
  337. }
  338. std::string joinUri(const std::string& baseUri, const std::string& uri)
  339. {
  340. UriStruct us;
  341. if(parse(us, uri)) {
  342. return uri;
  343. } else {
  344. UriStruct bus;
  345. if(!parse(bus, baseUri)) {
  346. return uri;
  347. }
  348. std::string::const_iterator qend;
  349. for(qend = uri.begin(); qend != uri.end(); ++qend) {
  350. if(*qend == '#') {
  351. break;
  352. }
  353. }
  354. std::string::const_iterator end;
  355. for(end = uri.begin(); end != qend; ++end) {
  356. if(*end == '?') {
  357. break;
  358. }
  359. }
  360. std::string newpath = joinPath(bus.dir, uri.begin(), end);
  361. bus.dir.clear();
  362. bus.file.clear();
  363. bus.query.clear();
  364. std::string res = construct(bus);
  365. if(!newpath.empty()) {
  366. // res always ends with '/'. Since bus.dir also starts with '/',
  367. // regardless of uri, newpath always starts with '/'.
  368. res.append(newpath.begin()+1, newpath.end());
  369. }
  370. res.append(end, qend);
  371. return res;
  372. }
  373. }
  374. } // namespace uri
  375. } // namespace aria2