HttpResponse.cc 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. /* <!-- copyright */
  2. /*
  3. * aria2 - The high speed download utility
  4. *
  5. * Copyright (C) 2006 Tatsuhiro Tsujikawa
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. * In addition, as a special exception, the copyright holders give
  22. * permission to link the code of portions of this program with the
  23. * OpenSSL library under certain conditions as described in each
  24. * individual source file, and distribute linked combinations
  25. * including the two.
  26. * You must obey the GNU General Public License in all respects
  27. * for all of the code used other than OpenSSL. If you modify
  28. * file(s) with this exception, you may extend this exception to your
  29. * version of the file(s), but you are not obligated to do so. If you
  30. * do not wish to do so, delete this exception statement from your
  31. * version. If you delete this exception statement from all source
  32. * files in the program, then also delete it here.
  33. */
  34. /* copyright --> */
  35. #include "HttpResponse.h"
  36. #include "Request.h"
  37. #include "Segment.h"
  38. #include "HttpRequest.h"
  39. #include "HttpHeader.h"
  40. #include "Range.h"
  41. #include "LogFactory.h"
  42. #include "Logger.h"
  43. #include "util.h"
  44. #include "message.h"
  45. #include "DlAbortEx.h"
  46. #include "DlRetryEx.h"
  47. #include "fmt.h"
  48. #include "A2STR.h"
  49. #include "CookieStorage.h"
  50. #include "AuthConfigFactory.h"
  51. #include "AuthConfig.h"
  52. #include "ChunkedDecodingStreamFilter.h"
  53. #include "error_code.h"
  54. #include "prefs.h"
  55. #include "Option.h"
  56. #include "Checksum.h"
  57. #include "uri.h"
  58. #include "MetalinkHttpEntry.h"
  59. #include "base64.h"
  60. #include "array_fun.h"
  61. #ifdef ENABLE_MESSAGE_DIGEST
  62. #include "MessageDigest.h"
  63. #endif // ENABLE_MESSAGE_DIGEST
  64. #ifdef HAVE_ZLIB
  65. # include "GZipDecodingStreamFilter.h"
  66. #endif // HAVE_ZLIB
  67. namespace aria2 {
  68. HttpResponse::HttpResponse()
  69. : cuid_(0)
  70. {}
  71. HttpResponse::~HttpResponse() {}
  72. void HttpResponse::validateResponse() const
  73. {
  74. int statusCode = getStatusCode();
  75. if(statusCode >= 400) {
  76. return;
  77. }
  78. if(statusCode == 304) {
  79. if(!httpRequest_->conditionalRequest()) {
  80. throw DL_ABORT_EX2("Got 304 without If-Modified-Since or If-None-Match",
  81. error_code::HTTP_PROTOCOL_ERROR);
  82. }
  83. } else if(statusCode == 301 ||
  84. statusCode == 302 ||
  85. statusCode == 303 ||
  86. statusCode == 307) {
  87. if(!httpHeader_->defined(HttpHeader::LOCATION)) {
  88. throw DL_ABORT_EX2(fmt(EX_LOCATION_HEADER_REQUIRED, statusCode),
  89. error_code::HTTP_PROTOCOL_ERROR);
  90. }
  91. return;
  92. } else if(statusCode == 200 || statusCode == 206) {
  93. if(!httpHeader_->defined(HttpHeader::TRANSFER_ENCODING)) {
  94. // compare the received range against the requested range
  95. Range responseRange = httpHeader_->getRange();
  96. if(!httpRequest_->isRangeSatisfied(responseRange)) {
  97. throw DL_ABORT_EX2
  98. (fmt(EX_INVALID_RANGE_HEADER,
  99. httpRequest_->getStartByte(),
  100. httpRequest_->getEndByte(),
  101. httpRequest_->getEntityLength(),
  102. responseRange.startByte,
  103. responseRange.endByte,
  104. responseRange.entityLength),
  105. error_code::CANNOT_RESUME);
  106. }
  107. }
  108. } else {
  109. throw DL_ABORT_EX2(fmt("Unexpected status %d", statusCode),
  110. error_code::HTTP_PROTOCOL_ERROR);
  111. }
  112. }
  113. std::string HttpResponse::determinFilename() const
  114. {
  115. std::string contentDisposition =
  116. util::getContentDispositionFilename
  117. (httpHeader_->find(HttpHeader::CONTENT_DISPOSITION));
  118. if(contentDisposition.empty()) {
  119. std::string file =
  120. util::percentDecode(httpRequest_->getFile().begin(),
  121. httpRequest_->getFile().end());
  122. if(file.empty()) {
  123. return "index.html";
  124. } else {
  125. return file;
  126. }
  127. } else {
  128. A2_LOG_INFO(fmt(MSG_CONTENT_DISPOSITION_DETECTED,
  129. cuid_,
  130. contentDisposition.c_str()));
  131. return contentDisposition;
  132. }
  133. }
  134. void HttpResponse::retrieveCookie()
  135. {
  136. Time now;
  137. std::pair<std::multimap<int, std::string>::const_iterator,
  138. std::multimap<int, std::string>::const_iterator> r =
  139. httpHeader_->equalRange(HttpHeader::SET_COOKIE);
  140. for(; r.first != r.second; ++r.first) {
  141. httpRequest_->getCookieStorage()->parseAndStore
  142. ((*r.first).second, httpRequest_->getHost(), httpRequest_->getDir(),
  143. now.getTime());
  144. }
  145. }
  146. bool HttpResponse::isRedirect() const
  147. {
  148. int statusCode = getStatusCode();
  149. return (301 == statusCode ||
  150. 302 == statusCode ||
  151. 303 == statusCode ||
  152. 307 == statusCode) &&
  153. httpHeader_->defined(HttpHeader::LOCATION);
  154. }
  155. void HttpResponse::processRedirect()
  156. {
  157. if(httpRequest_->getRequest()->redirectUri
  158. (util::percentEncodeMini(getRedirectURI()))) {
  159. A2_LOG_INFO(fmt(MSG_REDIRECT,
  160. cuid_,
  161. httpRequest_->getRequest()->getCurrentUri().c_str()));
  162. } else {
  163. throw DL_RETRY_EX
  164. (fmt("CUID#%" PRId64 " - Redirect to %s failed. It may not be a valid URI.",
  165. cuid_,
  166. httpRequest_->getRequest()->getCurrentUri().c_str()));
  167. }
  168. }
  169. const std::string& HttpResponse::getRedirectURI() const
  170. {
  171. return httpHeader_->find(HttpHeader::LOCATION);
  172. }
  173. bool HttpResponse::isTransferEncodingSpecified() const
  174. {
  175. return httpHeader_->defined(HttpHeader::TRANSFER_ENCODING);
  176. }
  177. const std::string& HttpResponse::getTransferEncoding() const
  178. {
  179. // TODO See TODO in getTransferEncodingStreamFilter()
  180. return httpHeader_->find(HttpHeader::TRANSFER_ENCODING);
  181. }
  182. std::shared_ptr<StreamFilter> HttpResponse::getTransferEncodingStreamFilter() const
  183. {
  184. std::shared_ptr<StreamFilter> filter;
  185. // TODO Transfer-Encoding header field can contains multiple tokens. We should
  186. // parse the field and retrieve each token.
  187. if(isTransferEncodingSpecified()) {
  188. if(util::strieq(getTransferEncoding(), "chunked")) {
  189. filter.reset(new ChunkedDecodingStreamFilter());
  190. }
  191. }
  192. return filter;
  193. }
  194. bool HttpResponse::isContentEncodingSpecified() const
  195. {
  196. return httpHeader_->defined(HttpHeader::CONTENT_ENCODING);
  197. }
  198. const std::string& HttpResponse::getContentEncoding() const
  199. {
  200. return httpHeader_->find(HttpHeader::CONTENT_ENCODING);
  201. }
  202. std::shared_ptr<StreamFilter> HttpResponse::getContentEncodingStreamFilter() const
  203. {
  204. std::shared_ptr<StreamFilter> filter;
  205. #ifdef HAVE_ZLIB
  206. if(util::strieq(getContentEncoding(), "gzip") ||
  207. util::strieq(getContentEncoding(), "deflate")) {
  208. filter.reset(new GZipDecodingStreamFilter());
  209. }
  210. #endif // HAVE_ZLIB
  211. return filter;
  212. }
  213. int64_t HttpResponse::getContentLength() const
  214. {
  215. if(!httpHeader_) {
  216. return 0;
  217. } else {
  218. return httpHeader_->getRange().getContentLength();
  219. }
  220. }
  221. int64_t HttpResponse::getEntityLength() const
  222. {
  223. if(!httpHeader_) {
  224. return 0;
  225. } else {
  226. return httpHeader_->getRange().entityLength;
  227. }
  228. }
  229. std::string HttpResponse::getContentType() const
  230. {
  231. if(!httpHeader_) {
  232. return A2STR::NIL;
  233. } else {
  234. const std::string& ctype = httpHeader_->find(HttpHeader::CONTENT_TYPE);
  235. std::string::const_iterator i = std::find(ctype.begin(), ctype.end(), ';');
  236. Scip p = util::stripIter(ctype.begin(), i);
  237. return std::string(p.first, p.second);
  238. }
  239. }
  240. void HttpResponse::setHttpHeader(const std::shared_ptr<HttpHeader>& httpHeader)
  241. {
  242. httpHeader_ = httpHeader;
  243. }
  244. void HttpResponse::setHttpRequest(const std::shared_ptr<HttpRequest>& httpRequest)
  245. {
  246. httpRequest_ = httpRequest;
  247. }
  248. int HttpResponse::getStatusCode() const
  249. {
  250. return httpHeader_->getStatusCode();
  251. }
  252. Time HttpResponse::getLastModifiedTime() const
  253. {
  254. return Time::parseHTTPDate(httpHeader_->find(HttpHeader::LAST_MODIFIED));
  255. }
  256. bool HttpResponse::supportsPersistentConnection() const
  257. {
  258. return httpHeader_->isKeepAlive();
  259. }
  260. namespace {
  261. bool parseMetalinkHttpLink(MetalinkHttpEntry& result, const std::string& s)
  262. {
  263. std::string::const_iterator first = std::find(s.begin(), s.end(), '<');
  264. if(first == s.end()) {
  265. return false;
  266. }
  267. std::string::const_iterator last = std::find(first, s.end(), '>');
  268. if(last == s.end()) {
  269. return false;
  270. }
  271. std::pair<std::string::const_iterator,
  272. std::string::const_iterator> p = util::stripIter(first+1, last);
  273. if(p.first == p.second) {
  274. return false;
  275. } else {
  276. result.uri.assign(p.first, p.second);
  277. }
  278. last = std::find(last, s.end(), ';');
  279. if(last != s.end()) {
  280. ++last;
  281. }
  282. bool ok = false;
  283. while(1) {
  284. std::string name, value;
  285. std::pair<std::string::const_iterator, bool> r =
  286. util::nextParam(name, value, last, s.end(), ';');
  287. last = r.first;
  288. if(!r.second) {
  289. break;
  290. }
  291. if(value.empty()) {
  292. if(name == "pref") {
  293. result.pref = true;
  294. }
  295. } else {
  296. if(name == "rel") {
  297. if(value == "duplicate") {
  298. ok = true;
  299. } else {
  300. ok = false;
  301. }
  302. } else if(name == "pri") {
  303. int32_t priValue;
  304. if(util::parseIntNoThrow(priValue, value)) {
  305. if(1 <= priValue && priValue <= 999999) {
  306. result.pri = priValue;
  307. }
  308. }
  309. } else if(name == "geo") {
  310. util::lowercase(value);
  311. result.geo = value;
  312. }
  313. }
  314. }
  315. return ok;
  316. }
  317. } // namespace
  318. // Metalink/HTTP is defined by http://tools.ietf.org/html/rfc6249.
  319. // Link header field is defined by http://tools.ietf.org/html/rfc5988.
  320. void HttpResponse::getMetalinKHttpEntries
  321. (std::vector<MetalinkHttpEntry>& result,
  322. const std::shared_ptr<Option>& option) const
  323. {
  324. std::pair<std::multimap<int, std::string>::const_iterator,
  325. std::multimap<int, std::string>::const_iterator> p =
  326. httpHeader_->equalRange(HttpHeader::LINK);
  327. for(; p.first != p.second; ++p.first) {
  328. MetalinkHttpEntry e;
  329. if(parseMetalinkHttpLink(e, (*p.first).second)) {
  330. result.push_back(e);
  331. }
  332. }
  333. if(!result.empty()) {
  334. std::vector<std::string> locs;
  335. if(option->defined(PREF_METALINK_LOCATION)) {
  336. const std::string& loc = option->get(PREF_METALINK_LOCATION);
  337. util::split(loc.begin(), loc.end(), std::back_inserter(locs), ',', true);
  338. for(std::vector<std::string>::iterator i = locs.begin(), eoi = locs.end();
  339. i != eoi; ++i) {
  340. util::lowercase(*i);
  341. }
  342. }
  343. for(std::vector<MetalinkHttpEntry>::iterator i = result.begin(),
  344. eoi = result.end(); i != eoi; ++i) {
  345. if(std::find(locs.begin(), locs.end(), (*i).geo) != locs.end()) {
  346. (*i).pri -= 999999;
  347. }
  348. }
  349. }
  350. std::sort(result.begin(), result.end());
  351. }
  352. #ifdef ENABLE_MESSAGE_DIGEST
  353. // Digest header field is defined by
  354. // http://tools.ietf.org/html/rfc3230.
  355. void HttpResponse::getDigest(std::vector<Checksum>& result) const
  356. {
  357. using std::swap;
  358. std::pair<std::multimap<int, std::string>::const_iterator,
  359. std::multimap<int, std::string>::const_iterator> p =
  360. httpHeader_->equalRange(HttpHeader::DIGEST);
  361. for(; p.first != p.second; ++p.first) {
  362. const std::string& s = (*p.first).second;
  363. std::string::const_iterator itr = s.begin();
  364. while(1) {
  365. std::string hashType, digest;
  366. std::pair<std::string::const_iterator, bool> r =
  367. util::nextParam(hashType, digest, itr, s.end(), ',');
  368. itr = r.first;
  369. if(!r.second) {
  370. break;
  371. }
  372. util::lowercase(hashType);
  373. digest = base64::decode(digest.begin(), digest.end());
  374. if(!MessageDigest::supports(hashType) ||
  375. MessageDigest::getDigestLength(hashType) != digest.size()) {
  376. continue;
  377. }
  378. result.push_back(Checksum(hashType, digest));
  379. }
  380. }
  381. std::sort(result.begin(), result.end(), HashTypeStronger());
  382. std::vector<Checksum> temp;
  383. for(std::vector<Checksum>::iterator i = result.begin(),
  384. eoi = result.end(); i != eoi;) {
  385. bool ok = true;
  386. std::vector<Checksum>::iterator j = i+1;
  387. for(; j != eoi; ++j) {
  388. if((*i).getHashType() != (*j).getHashType()) {
  389. break;
  390. }
  391. if((*i).getDigest() != (*j).getDigest()) {
  392. ok = false;
  393. }
  394. }
  395. if(ok) {
  396. temp.push_back(*i);
  397. }
  398. i = j;
  399. }
  400. swap(temp, result);
  401. }
  402. #endif // ENABLE_MESSAGE_DIGEST
  403. } // namespace aria2