FileEntry.cc 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598
  1. /* <!-- copyright */
  2. /*
  3. * aria2 - The high speed download utility
  4. *
  5. * Copyright (C) 2006 Tatsuhiro Tsujikawa
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. * In addition, as a special exception, the copyright holders give
  22. * permission to link the code of portions of this program with the
  23. * OpenSSL library under certain conditions as described in each
  24. * individual source file, and distribute linked combinations
  25. * including the two.
  26. * You must obey the GNU General Public License in all respects
  27. * for all of the code used other than OpenSSL. If you modify
  28. * file(s) with this exception, you may extend this exception to your
  29. * version of the file(s), but you are not obligated to do so. If you
  30. * do not wish to do so, delete this exception statement from your
  31. * version. If you delete this exception statement from all source
  32. * files in the program, then also delete it here.
  33. */
  34. /* copyright --> */
  35. #include "FileEntry.h"
  36. #include <cassert>
  37. #include <algorithm>
  38. #include "util.h"
  39. #include "URISelector.h"
  40. #include "Logger.h"
  41. #include "LogFactory.h"
  42. #include "wallclock.h"
  43. #include "a2algo.h"
  44. #include "uri.h"
  45. #include "PeerStat.h"
  46. #include "fmt.h"
  47. #include "ServerStatMan.h"
  48. #include "ServerStat.h"
  49. namespace aria2 {
  50. bool FileEntry::RequestFaster::
  51. operator()(const std::shared_ptr<Request>& lhs,
  52. const std::shared_ptr<Request>& rhs) const
  53. {
  54. if (!lhs->getPeerStat()) {
  55. return false;
  56. }
  57. if (!rhs->getPeerStat()) {
  58. return true;
  59. }
  60. int lspd = lhs->getPeerStat()->getAvgDownloadSpeed();
  61. int rspd = rhs->getPeerStat()->getAvgDownloadSpeed();
  62. return lspd > rspd || (lspd == rspd && lhs.get() < rhs.get());
  63. }
  64. FileEntry::FileEntry(std::string path, int64_t length, int64_t offset,
  65. const std::vector<std::string>& uris)
  66. : length_(length),
  67. offset_(offset),
  68. uris_(uris.begin(), uris.end()),
  69. path_(std::move(path)),
  70. lastFasterReplace_(Timer::zero()),
  71. maxConnectionPerServer_(1),
  72. requested_(true),
  73. uniqueProtocol_(false)
  74. {
  75. }
  76. FileEntry::FileEntry()
  77. : length_(0),
  78. offset_(0),
  79. maxConnectionPerServer_(1),
  80. requested_(false),
  81. uniqueProtocol_(false)
  82. {
  83. }
  84. FileEntry::~FileEntry() {}
  85. FileEntry& FileEntry::operator=(const FileEntry& entry)
  86. {
  87. if (this != &entry) {
  88. path_ = entry.path_;
  89. length_ = entry.length_;
  90. offset_ = entry.offset_;
  91. requested_ = entry.requested_;
  92. }
  93. return *this;
  94. }
  95. bool FileEntry::operator<(const FileEntry& fileEntry) const
  96. {
  97. return offset_ < fileEntry.offset_;
  98. }
  99. bool FileEntry::exists() const { return File(getPath()).exists(); }
  100. int64_t FileEntry::gtoloff(int64_t goff) const
  101. {
  102. assert(offset_ <= goff);
  103. return goff - offset_;
  104. }
  105. std::vector<std::string> FileEntry::getUris() const
  106. {
  107. std::vector<std::string> uris(std::begin(spentUris_), std::end(spentUris_));
  108. uris.insert(std::end(uris), std::begin(uris_), std::end(uris_));
  109. return uris;
  110. }
  111. namespace {
  112. template <typename InputIterator, typename OutputIterator>
  113. OutputIterator enumerateInFlightHosts(InputIterator first, InputIterator last,
  114. OutputIterator out)
  115. {
  116. for (; first != last; ++first) {
  117. uri_split_result us;
  118. if (uri_split(&us, (*first)->getUri().c_str()) == 0) {
  119. *out++ = uri::getFieldString(us, USR_HOST, (*first)->getUri().c_str());
  120. }
  121. }
  122. return out;
  123. }
  124. } // namespace
  125. std::shared_ptr<Request> FileEntry::getRequest(
  126. URISelector* selector, bool uriReuse,
  127. const std::vector<std::pair<size_t, std::string>>& usedHosts,
  128. const std::string& referer, const std::string& method)
  129. {
  130. std::shared_ptr<Request> req;
  131. if (requestPool_.empty()) {
  132. std::vector<std::string> inFlightHosts;
  133. enumerateInFlightHosts(inFlightRequests_.begin(), inFlightRequests_.end(),
  134. std::back_inserter(inFlightHosts));
  135. for (int g = 0; g < 2; ++g) {
  136. std::vector<std::string> pending;
  137. std::vector<std::string> ignoreHost;
  138. while (1) {
  139. std::string uri = selector->select(this, usedHosts);
  140. if (uri.empty()) {
  141. break;
  142. }
  143. req = std::make_shared<Request>();
  144. if (req->setUri(uri)) {
  145. if (std::count(inFlightHosts.begin(), inFlightHosts.end(),
  146. req->getHost()) >= maxConnectionPerServer_) {
  147. pending.push_back(uri);
  148. ignoreHost.push_back(req->getHost());
  149. req.reset();
  150. continue;
  151. }
  152. if (referer == "*") {
  153. // Assuming uri has already been percent-encoded.
  154. req->setReferer(uri);
  155. }
  156. else {
  157. req->setReferer(util::percentEncodeMini(referer));
  158. }
  159. req->setMethod(method);
  160. spentUris_.push_back(uri);
  161. inFlightRequests_.insert(req);
  162. break;
  163. }
  164. else {
  165. req.reset();
  166. }
  167. }
  168. uris_.insert(uris_.begin(), pending.begin(), pending.end());
  169. if (g == 0 && uriReuse && !req && uris_.size() == pending.size()) {
  170. // Reuse URIs other than ones in pending
  171. reuseUri(ignoreHost);
  172. }
  173. else {
  174. break;
  175. }
  176. }
  177. }
  178. else {
  179. // Skip Request object if it is still
  180. // sleeping(Request::getWakeTime() < global::wallclock()). If all
  181. // pooled objects are sleeping, return first one. Caller should
  182. // inspect returned object's getWakeTime().
  183. auto i = requestPool_.begin();
  184. auto eoi = requestPool_.end();
  185. for (; i != eoi; ++i) {
  186. if ((*i)->getWakeTime() <= global::wallclock()) {
  187. break;
  188. }
  189. }
  190. if (i == eoi) {
  191. i = requestPool_.begin();
  192. }
  193. req = *i;
  194. requestPool_.erase(i);
  195. inFlightRequests_.insert(req);
  196. A2_LOG_DEBUG(fmt("Picked up from pool: %s", req->getUri().c_str()));
  197. }
  198. return req;
  199. }
  200. namespace {
  201. constexpr auto startupIdleTime = 10_s;
  202. } // namespace
  203. std::shared_ptr<Request>
  204. FileEntry::findFasterRequest(const std::shared_ptr<Request>& base)
  205. {
  206. if (requestPool_.empty() ||
  207. lastFasterReplace_.difference(global::wallclock()) < startupIdleTime) {
  208. return nullptr;
  209. }
  210. const std::shared_ptr<PeerStat>& fastest =
  211. (*requestPool_.begin())->getPeerStat();
  212. if (!fastest) {
  213. return nullptr;
  214. }
  215. const std::shared_ptr<PeerStat>& basestat = base->getPeerStat();
  216. // TODO hard coded value. See PREF_STARTUP_IDLE_TIME
  217. if (!basestat || (basestat->getDownloadStartTime().difference(
  218. global::wallclock()) >= startupIdleTime &&
  219. fastest->getAvgDownloadSpeed() * 0.8 >
  220. basestat->calculateDownloadSpeed())) {
  221. // TODO we should consider that "fastest" is very slow.
  222. std::shared_ptr<Request> fastestRequest = *requestPool_.begin();
  223. requestPool_.erase(requestPool_.begin());
  224. inFlightRequests_.insert(fastestRequest);
  225. lastFasterReplace_ = global::wallclock();
  226. return fastestRequest;
  227. }
  228. return nullptr;
  229. }
  230. std::shared_ptr<Request> FileEntry::findFasterRequest(
  231. const std::shared_ptr<Request>& base,
  232. const std::vector<std::pair<size_t, std::string>>& usedHosts,
  233. const std::shared_ptr<ServerStatMan>& serverStatMan)
  234. {
  235. constexpr int SPEED_THRESHOLD = 20_k;
  236. if (lastFasterReplace_.difference(global::wallclock()) < startupIdleTime) {
  237. return nullptr;
  238. }
  239. std::vector<std::string> inFlightHosts;
  240. enumerateInFlightHosts(inFlightRequests_.begin(), inFlightRequests_.end(),
  241. std::back_inserter(inFlightHosts));
  242. const std::shared_ptr<PeerStat>& basestat = base->getPeerStat();
  243. A2_LOG_DEBUG("Search faster server using ServerStat.");
  244. // Use first 10 good URIs to introduce some randomness.
  245. const size_t NUM_URI = 10;
  246. std::vector<std::pair<std::shared_ptr<ServerStat>, std::string>> fastCands;
  247. std::vector<std::string> normCands;
  248. for (std::deque<std::string>::const_iterator i = uris_.begin(),
  249. eoi = uris_.end();
  250. i != eoi && fastCands.size() < NUM_URI; ++i) {
  251. uri_split_result us;
  252. if (uri_split(&us, (*i).c_str()) == -1) {
  253. continue;
  254. }
  255. std::string host = uri::getFieldString(us, USR_HOST, (*i).c_str());
  256. std::string protocol = uri::getFieldString(us, USR_SCHEME, (*i).c_str());
  257. if (std::count(inFlightHosts.begin(), inFlightHosts.end(), host) >=
  258. maxConnectionPerServer_) {
  259. A2_LOG_DEBUG(fmt("%s has already used %d times, not considered.",
  260. (*i).c_str(), maxConnectionPerServer_));
  261. continue;
  262. }
  263. if (findSecond(usedHosts.begin(), usedHosts.end(), host) !=
  264. usedHosts.end()) {
  265. A2_LOG_DEBUG(fmt("%s is in usedHosts, not considered", (*i).c_str()));
  266. continue;
  267. }
  268. std::shared_ptr<ServerStat> ss = serverStatMan->find(host, protocol);
  269. if (ss && ss->isOK()) {
  270. if ((basestat &&
  271. ss->getDownloadSpeed() > basestat->calculateDownloadSpeed() * 1.5) ||
  272. (!basestat && ss->getDownloadSpeed() > SPEED_THRESHOLD)) {
  273. fastCands.push_back(std::make_pair(ss, *i));
  274. }
  275. }
  276. }
  277. if (!fastCands.empty()) {
  278. std::sort(fastCands.begin(), fastCands.end(), ServerStatFaster());
  279. auto fastestRequest = std::make_shared<Request>();
  280. const std::string& uri = fastCands.front().second;
  281. A2_LOG_DEBUG(fmt("Selected %s from fastCands", uri.c_str()));
  282. // Candidate URIs where already parsed when populating fastCands.
  283. (void)fastestRequest->setUri(uri);
  284. fastestRequest->setReferer(base->getReferer());
  285. uris_.erase(std::find(uris_.begin(), uris_.end(), uri));
  286. spentUris_.push_back(uri);
  287. inFlightRequests_.insert(fastestRequest);
  288. lastFasterReplace_ = global::wallclock();
  289. return fastestRequest;
  290. }
  291. A2_LOG_DEBUG("No faster server found.");
  292. return nullptr;
  293. }
  294. void FileEntry::storePool(const std::shared_ptr<Request>& request)
  295. {
  296. const std::shared_ptr<PeerStat>& peerStat = request->getPeerStat();
  297. if (peerStat) {
  298. // We need to calculate average download speed here in order to
  299. // store Request in the right position in the pool.
  300. peerStat->calculateAvgDownloadSpeed();
  301. }
  302. requestPool_.insert(request);
  303. }
  304. void FileEntry::poolRequest(const std::shared_ptr<Request>& request)
  305. {
  306. removeRequest(request);
  307. if (!request->removalRequested()) {
  308. storePool(request);
  309. }
  310. }
  311. bool FileEntry::removeRequest(const std::shared_ptr<Request>& request)
  312. {
  313. return inFlightRequests_.erase(request) == 1;
  314. }
  315. void FileEntry::removeURIWhoseHostnameIs(const std::string& hostname)
  316. {
  317. std::deque<std::string> newURIs;
  318. for (std::deque<std::string>::const_iterator itr = uris_.begin(),
  319. eoi = uris_.end();
  320. itr != eoi; ++itr) {
  321. uri_split_result us;
  322. if (uri_split(&us, (*itr).c_str()) == -1) {
  323. continue;
  324. }
  325. if (us.fields[USR_HOST].len != hostname.size() ||
  326. memcmp((*itr).c_str() + us.fields[USR_HOST].off, hostname.c_str(),
  327. hostname.size()) != 0) {
  328. newURIs.push_back(*itr);
  329. }
  330. }
  331. A2_LOG_DEBUG(fmt("Removed %lu duplicate hostname URIs for path=%s",
  332. static_cast<unsigned long>(uris_.size() - newURIs.size()),
  333. getPath().c_str()));
  334. uris_.swap(newURIs);
  335. }
  336. void FileEntry::removeIdenticalURI(const std::string& uri)
  337. {
  338. uris_.erase(std::remove(uris_.begin(), uris_.end(), uri), uris_.end());
  339. }
  340. void FileEntry::addURIResult(std::string uri, error_code::Value result)
  341. {
  342. uriResults_.push_back(URIResult(uri, result));
  343. }
  344. namespace {
  345. class FindURIResultByResult {
  346. private:
  347. error_code::Value r_;
  348. public:
  349. FindURIResultByResult(error_code::Value r) : r_(r) {}
  350. bool operator()(const URIResult& uriResult) const
  351. {
  352. return uriResult.getResult() == r_;
  353. }
  354. };
  355. } // namespace
  356. void FileEntry::extractURIResult(std::deque<URIResult>& res,
  357. error_code::Value r)
  358. {
  359. auto i = std::stable_partition(uriResults_.begin(), uriResults_.end(),
  360. FindURIResultByResult(r));
  361. std::copy(uriResults_.begin(), i, std::back_inserter(res));
  362. uriResults_.erase(uriResults_.begin(), i);
  363. }
  364. void FileEntry::reuseUri(const std::vector<std::string>& ignore)
  365. {
  366. if (A2_LOG_DEBUG_ENABLED) {
  367. for (const auto& i : ignore) {
  368. A2_LOG_DEBUG(fmt("ignore host=%s", i.c_str()));
  369. }
  370. }
  371. std::deque<std::string> uris = spentUris_;
  372. std::sort(uris.begin(), uris.end());
  373. uris.erase(std::unique(uris.begin(), uris.end()), uris.end());
  374. std::vector<std::string> errorUris(uriResults_.size());
  375. std::transform(uriResults_.begin(), uriResults_.end(), errorUris.begin(),
  376. std::mem_fn(&URIResult::getURI));
  377. std::sort(errorUris.begin(), errorUris.end());
  378. errorUris.erase(std::unique(errorUris.begin(), errorUris.end()),
  379. errorUris.end());
  380. if (A2_LOG_DEBUG_ENABLED) {
  381. for (std::vector<std::string>::const_iterator i = errorUris.begin(),
  382. eoi = errorUris.end();
  383. i != eoi; ++i) {
  384. A2_LOG_DEBUG(fmt("error URI=%s", (*i).c_str()));
  385. }
  386. }
  387. std::vector<std::string> reusableURIs;
  388. std::set_difference(uris.begin(), uris.end(), errorUris.begin(),
  389. errorUris.end(), std::back_inserter(reusableURIs));
  390. auto insertionPoint = reusableURIs.begin();
  391. for (auto i = reusableURIs.begin(), eoi = reusableURIs.end(); i != eoi; ++i) {
  392. uri_split_result us;
  393. if (uri_split(&us, (*i).c_str()) == 0 &&
  394. std::find(ignore.begin(), ignore.end(),
  395. uri::getFieldString(us, USR_HOST, (*i).c_str())) ==
  396. ignore.end()) {
  397. if (i != insertionPoint) {
  398. *insertionPoint = *i;
  399. }
  400. ++insertionPoint;
  401. }
  402. }
  403. reusableURIs.erase(insertionPoint, reusableURIs.end());
  404. size_t ininum = reusableURIs.size();
  405. if (A2_LOG_DEBUG_ENABLED) {
  406. A2_LOG_DEBUG(
  407. fmt("Found %u reusable URIs", static_cast<unsigned int>(ininum)));
  408. for (std::vector<std::string>::const_iterator i = reusableURIs.begin(),
  409. eoi = reusableURIs.end();
  410. i != eoi; ++i) {
  411. A2_LOG_DEBUG(fmt("URI=%s", (*i).c_str()));
  412. }
  413. }
  414. uris_.insert(uris_.end(), reusableURIs.begin(), reusableURIs.end());
  415. }
  416. void FileEntry::releaseRuntimeResource()
  417. {
  418. requestPool_.clear();
  419. inFlightRequests_.clear();
  420. }
  421. namespace {
  422. template <typename InputIterator>
  423. void putBackUri(std::deque<std::string>& uris, InputIterator first,
  424. InputIterator last)
  425. {
  426. for (; first != last; ++first) {
  427. uris.push_front((*first)->getUri());
  428. }
  429. }
  430. } // namespace
  431. void FileEntry::putBackRequest()
  432. {
  433. putBackUri(uris_, requestPool_.begin(), requestPool_.end());
  434. putBackUri(uris_, inFlightRequests_.begin(), inFlightRequests_.end());
  435. }
  436. namespace {
  437. template <typename InputIterator, typename T>
  438. InputIterator findRequestByUri(InputIterator first, InputIterator last,
  439. const T& uri)
  440. {
  441. for (; first != last; ++first) {
  442. if (!(*first)->removalRequested() && (*first)->getUri() == uri) {
  443. return first;
  444. }
  445. }
  446. return last;
  447. }
  448. } // namespace
  449. bool FileEntry::removeUri(const std::string& uri)
  450. {
  451. auto itr = std::find(spentUris_.begin(), spentUris_.end(), uri);
  452. if (itr == spentUris_.end()) {
  453. itr = std::find(uris_.begin(), uris_.end(), uri);
  454. if (itr == uris_.end()) {
  455. return false;
  456. }
  457. uris_.erase(itr);
  458. return true;
  459. }
  460. spentUris_.erase(itr);
  461. std::shared_ptr<Request> req;
  462. auto riter =
  463. findRequestByUri(inFlightRequests_.begin(), inFlightRequests_.end(), uri);
  464. if (riter == inFlightRequests_.end()) {
  465. auto riter =
  466. findRequestByUri(requestPool_.begin(), requestPool_.end(), uri);
  467. if (riter == requestPool_.end()) {
  468. return true;
  469. }
  470. req = *riter;
  471. requestPool_.erase(riter);
  472. }
  473. else {
  474. req = *riter;
  475. }
  476. req->requestRemoval();
  477. return true;
  478. }
  479. std::string FileEntry::getBasename() const { return File(path_).getBasename(); }
  480. std::string FileEntry::getDirname() const { return File(path_).getDirname(); }
  481. size_t FileEntry::setUris(const std::vector<std::string>& uris)
  482. {
  483. uris_.clear();
  484. return addUris(uris.begin(), uris.end());
  485. }
  486. bool FileEntry::addUri(const std::string& uri)
  487. {
  488. std::string peUri = util::percentEncodeMini(uri);
  489. if (uri_split(nullptr, peUri.c_str()) == 0) {
  490. uris_.push_back(peUri);
  491. return true;
  492. }
  493. else {
  494. return false;
  495. }
  496. }
  497. bool FileEntry::insertUri(const std::string& uri, size_t pos)
  498. {
  499. std::string peUri = util::percentEncodeMini(uri);
  500. if (uri_split(nullptr, peUri.c_str()) != 0) {
  501. return false;
  502. }
  503. pos = std::min(pos, uris_.size());
  504. uris_.insert(uris_.begin() + pos, peUri);
  505. return true;
  506. }
  507. void FileEntry::setPath(std::string path) { path_ = std::move(path); }
  508. void FileEntry::setContentType(std::string contentType)
  509. {
  510. contentType_ = std::move(contentType);
  511. }
  512. size_t FileEntry::countInFlightRequest() const
  513. {
  514. return inFlightRequests_.size();
  515. }
  516. size_t FileEntry::countPooledRequest() const { return requestPool_.size(); }
  517. void FileEntry::setOriginalName(std::string originalName)
  518. {
  519. originalName_ = std::move(originalName);
  520. }
  521. void FileEntry::setSuffixPath(std::string suffixPath)
  522. {
  523. suffixPath_ = std::move(suffixPath);
  524. }
  525. bool FileEntry::emptyRequestUri() const
  526. {
  527. return uris_.empty() && inFlightRequests_.empty() && requestPool_.empty();
  528. }
  529. void writeFilePath(std::ostream& o, const std::shared_ptr<FileEntry>& entry,
  530. bool memory)
  531. {
  532. if (entry->getPath().empty()) {
  533. auto uris = entry->getUris();
  534. if (uris.empty()) {
  535. o << "n/a";
  536. }
  537. else {
  538. o << uris.front();
  539. }
  540. return;
  541. }
  542. if (memory) {
  543. o << "[MEMORY]" << File(entry->getPath()).getBasename();
  544. }
  545. else {
  546. o << entry->getPath();
  547. }
  548. }
  549. } // namespace aria2