FileEntry.cc 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. /* <!-- copyright */
  2. /*
  3. * aria2 - The high speed download utility
  4. *
  5. * Copyright (C) 2006 Tatsuhiro Tsujikawa
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. * In addition, as a special exception, the copyright holders give
  22. * permission to link the code of portions of this program with the
  23. * OpenSSL library under certain conditions as described in each
  24. * individual source file, and distribute linked combinations
  25. * including the two.
  26. * You must obey the GNU General Public License in all respects
  27. * for all of the code used other than OpenSSL. If you modify
  28. * file(s) with this exception, you may extend this exception to your
  29. * version of the file(s), but you are not obligated to do so. If you
  30. * do not wish to do so, delete this exception statement from your
  31. * version. If you delete this exception statement from all source
  32. * files in the program, then also delete it here.
  33. */
  34. /* copyright --> */
  35. #include "FileEntry.h"
  36. #include <cassert>
  37. #include <algorithm>
  38. #include "util.h"
  39. #include "URISelector.h"
  40. #include "Logger.h"
  41. #include "LogFactory.h"
  42. #include "wallclock.h"
  43. #include "a2algo.h"
  44. #include "uri.h"
  45. #include "PeerStat.h"
  46. #include "fmt.h"
  47. #include "ServerStatMan.h"
  48. #include "ServerStat.h"
  49. namespace aria2 {
  50. bool FileEntry::RequestFaster::operator()
  51. (const std::shared_ptr<Request>& lhs,
  52. const std::shared_ptr<Request>& rhs) const
  53. {
  54. if(!lhs->getPeerStat()) {
  55. return false;
  56. }
  57. if(!rhs->getPeerStat()) {
  58. return true;
  59. }
  60. int lspd = lhs->getPeerStat()->getAvgDownloadSpeed();
  61. int rspd = rhs->getPeerStat()->getAvgDownloadSpeed();
  62. return lspd > rspd || (lspd == rspd && lhs.get() < rhs.get());
  63. }
  64. FileEntry::FileEntry(std::string path, int64_t length, int64_t offset,
  65. const std::vector<std::string>& uris)
  66. : length_(length),
  67. offset_(offset),
  68. uris_(uris.begin(), uris.end()),
  69. path_(std::move(path)),
  70. lastFasterReplace_(Timer::zero()),
  71. maxConnectionPerServer_(1),
  72. requested_(true),
  73. uniqueProtocol_(false)
  74. {}
  75. FileEntry::FileEntry()
  76. : length_(0),
  77. offset_(0),
  78. maxConnectionPerServer_(1),
  79. requested_(false),
  80. uniqueProtocol_(false)
  81. {}
  82. FileEntry::~FileEntry() {}
  83. FileEntry& FileEntry::operator=(const FileEntry& entry)
  84. {
  85. if(this != &entry) {
  86. path_ = entry.path_;
  87. length_ = entry.length_;
  88. offset_ = entry.offset_;
  89. requested_ = entry.requested_;
  90. }
  91. return *this;
  92. }
  93. bool FileEntry::operator<(const FileEntry& fileEntry) const
  94. {
  95. return offset_ < fileEntry.offset_;
  96. }
  97. bool FileEntry::exists() const
  98. {
  99. return File(getPath()).exists();
  100. }
  101. int64_t FileEntry::gtoloff(int64_t goff) const
  102. {
  103. assert(offset_ <= goff);
  104. return goff-offset_;
  105. }
  106. std::vector<std::string> FileEntry::getUris() const
  107. {
  108. std::vector<std::string> uris(std::begin(spentUris_), std::end(spentUris_));
  109. uris.insert(std::end(uris), std::begin(uris_), std::end(uris_));
  110. return uris;
  111. }
  112. namespace {
  113. template<typename InputIterator, typename OutputIterator>
  114. OutputIterator enumerateInFlightHosts
  115. (InputIterator first, InputIterator last, OutputIterator out)
  116. {
  117. for(; first != last; ++first) {
  118. uri_split_result us;
  119. if(uri_split(&us, (*first)->getUri().c_str()) == 0) {
  120. *out++ = uri::getFieldString(us, USR_HOST, (*first)->getUri().c_str());
  121. }
  122. }
  123. return out;
  124. }
  125. } // namespace
  126. std::shared_ptr<Request>
  127. FileEntry::getRequest
  128. (URISelector* selector,
  129. bool uriReuse,
  130. const std::vector<std::pair<size_t, std::string> >& usedHosts,
  131. const std::string& referer,
  132. const std::string& method)
  133. {
  134. std::shared_ptr<Request> req;
  135. if(requestPool_.empty()) {
  136. std::vector<std::string> inFlightHosts;
  137. enumerateInFlightHosts(inFlightRequests_.begin(), inFlightRequests_.end(),
  138. std::back_inserter(inFlightHosts));
  139. for(int g = 0; g < 2; ++g) {
  140. std::vector<std::string> pending;
  141. std::vector<std::string> ignoreHost;
  142. while(1) {
  143. std::string uri = selector->select(this, usedHosts);
  144. if(uri.empty()) {
  145. break;
  146. }
  147. req = std::make_shared<Request>();
  148. if(req->setUri(uri)) {
  149. if(std::count(inFlightHosts.begin(),
  150. inFlightHosts.end(),req->getHost())
  151. >= maxConnectionPerServer_) {
  152. pending.push_back(uri);
  153. ignoreHost.push_back(req->getHost());
  154. req.reset();
  155. continue;
  156. }
  157. if(referer == "*") {
  158. // Assuming uri has already been percent-encoded.
  159. req->setReferer(uri);
  160. } else {
  161. req->setReferer(util::percentEncodeMini(referer));
  162. }
  163. req->setMethod(method);
  164. spentUris_.push_back(uri);
  165. inFlightRequests_.insert(req);
  166. break;
  167. } else {
  168. req.reset();
  169. }
  170. }
  171. uris_.insert(uris_.begin(), pending.begin(), pending.end());
  172. if(g == 0 && uriReuse && !req && uris_.size() == pending.size()) {
  173. // Reuse URIs other than ones in pending
  174. reuseUri(ignoreHost);
  175. } else {
  176. break;
  177. }
  178. }
  179. } else {
  180. // Skip Request object if it is still
  181. // sleeping(Request::getWakeTime() < global::wallclock()). If all
  182. // pooled objects are sleeping, return first one. Caller should
  183. // inspect returned object's getWakeTime().
  184. auto i = requestPool_.begin();
  185. auto eoi = requestPool_.end();
  186. for(; i != eoi; ++i) {
  187. if((*i)->getWakeTime() <= global::wallclock()) {
  188. break;
  189. }
  190. }
  191. if(i == eoi) {
  192. i = requestPool_.begin();
  193. }
  194. req = *i;
  195. requestPool_.erase(i);
  196. inFlightRequests_.insert(req);
  197. A2_LOG_DEBUG(fmt("Picked up from pool: %s", req->getUri().c_str()));
  198. }
  199. return req;
  200. }
  201. namespace {
  202. constexpr auto startupIdleTime = 10_s;
  203. } // namespace
  204. std::shared_ptr<Request>
  205. FileEntry::findFasterRequest(const std::shared_ptr<Request>& base)
  206. {
  207. if(requestPool_.empty() ||
  208. lastFasterReplace_.difference(global::wallclock()) < startupIdleTime) {
  209. return nullptr;
  210. }
  211. const std::shared_ptr<PeerStat>& fastest =
  212. (*requestPool_.begin())->getPeerStat();
  213. if(!fastest) {
  214. return nullptr;
  215. }
  216. const std::shared_ptr<PeerStat>& basestat = base->getPeerStat();
  217. // TODO hard coded value. See PREF_STARTUP_IDLE_TIME
  218. if(!basestat ||
  219. (basestat->getDownloadStartTime().
  220. difference(global::wallclock()) >= startupIdleTime &&
  221. fastest->getAvgDownloadSpeed()*0.8 > basestat->calculateDownloadSpeed())){
  222. // TODO we should consider that "fastest" is very slow.
  223. std::shared_ptr<Request> fastestRequest = *requestPool_.begin();
  224. requestPool_.erase(requestPool_.begin());
  225. inFlightRequests_.insert(fastestRequest);
  226. lastFasterReplace_ = global::wallclock();
  227. return fastestRequest;
  228. }
  229. return nullptr;
  230. }
  231. std::shared_ptr<Request>
  232. FileEntry::findFasterRequest
  233. (const std::shared_ptr<Request>& base,
  234. const std::vector<std::pair<size_t, std::string> >& usedHosts,
  235. const std::shared_ptr<ServerStatMan>& serverStatMan)
  236. {
  237. constexpr int SPEED_THRESHOLD = 20_k;
  238. if(lastFasterReplace_.difference(global::wallclock()) < startupIdleTime) {
  239. return nullptr;
  240. }
  241. std::vector<std::string> inFlightHosts;
  242. enumerateInFlightHosts(inFlightRequests_.begin(), inFlightRequests_.end(),
  243. std::back_inserter(inFlightHosts));
  244. const std::shared_ptr<PeerStat>& basestat = base->getPeerStat();
  245. A2_LOG_DEBUG("Search faster server using ServerStat.");
  246. // Use first 10 good URIs to introduce some randomness.
  247. const size_t NUM_URI = 10;
  248. std::vector<std::pair<std::shared_ptr<ServerStat>, std::string> > fastCands;
  249. std::vector<std::string> normCands;
  250. for(std::deque<std::string>::const_iterator i = uris_.begin(),
  251. eoi = uris_.end(); i != eoi && fastCands.size() < NUM_URI; ++i) {
  252. uri_split_result us;
  253. if(uri_split(&us, (*i).c_str()) == -1) {
  254. continue;
  255. }
  256. std::string host = uri::getFieldString(us, USR_HOST, (*i).c_str());
  257. std::string protocol = uri::getFieldString(us, USR_SCHEME, (*i).c_str());
  258. if(std::count(inFlightHosts.begin(), inFlightHosts.end(), host)
  259. >= maxConnectionPerServer_) {
  260. A2_LOG_DEBUG(fmt("%s has already used %d times, not considered.",
  261. (*i).c_str(),
  262. maxConnectionPerServer_));
  263. continue;
  264. }
  265. if(findSecond(usedHosts.begin(), usedHosts.end(), host) !=
  266. usedHosts.end()) {
  267. A2_LOG_DEBUG(fmt("%s is in usedHosts, not considered", (*i).c_str()));
  268. continue;
  269. }
  270. std::shared_ptr<ServerStat> ss = serverStatMan->find(host, protocol);
  271. if(ss && ss->isOK()) {
  272. if((basestat &&
  273. ss->getDownloadSpeed() > basestat->calculateDownloadSpeed()*1.5) ||
  274. (!basestat && ss->getDownloadSpeed() > SPEED_THRESHOLD)) {
  275. fastCands.push_back(std::make_pair(ss, *i));
  276. }
  277. }
  278. }
  279. if(!fastCands.empty()) {
  280. std::sort(fastCands.begin(), fastCands.end(), ServerStatFaster());
  281. auto fastestRequest = std::make_shared<Request>();
  282. const std::string& uri = fastCands.front().second;
  283. A2_LOG_DEBUG(fmt("Selected %s from fastCands", uri.c_str()));
  284. // Candidate URIs where already parsed when populating fastCands.
  285. (void)fastestRequest->setUri(uri);
  286. fastestRequest->setReferer(base->getReferer());
  287. uris_.erase(std::find(uris_.begin(), uris_.end(), uri));
  288. spentUris_.push_back(uri);
  289. inFlightRequests_.insert(fastestRequest);
  290. lastFasterReplace_ = global::wallclock();
  291. return fastestRequest;
  292. }
  293. A2_LOG_DEBUG("No faster server found.");
  294. return nullptr;
  295. }
  296. void FileEntry::storePool(const std::shared_ptr<Request>& request)
  297. {
  298. const std::shared_ptr<PeerStat>& peerStat = request->getPeerStat();
  299. if(peerStat) {
  300. // We need to calculate average download speed here in order to
  301. // store Request in the right position in the pool.
  302. peerStat->calculateAvgDownloadSpeed();
  303. }
  304. requestPool_.insert(request);
  305. }
  306. void FileEntry::poolRequest(const std::shared_ptr<Request>& request)
  307. {
  308. removeRequest(request);
  309. if(!request->removalRequested()) {
  310. storePool(request);
  311. }
  312. }
  313. bool FileEntry::removeRequest(const std::shared_ptr<Request>& request)
  314. {
  315. return inFlightRequests_.erase(request) == 1;
  316. }
  317. void FileEntry::removeURIWhoseHostnameIs(const std::string& hostname)
  318. {
  319. std::deque<std::string> newURIs;
  320. for(std::deque<std::string>::const_iterator itr = uris_.begin(),
  321. eoi = uris_.end(); itr != eoi; ++itr) {
  322. uri_split_result us;
  323. if(uri_split(&us, (*itr).c_str()) == -1) {
  324. continue;
  325. }
  326. if(us.fields[USR_HOST].len != hostname.size() ||
  327. memcmp((*itr).c_str()+us.fields[USR_HOST].off, hostname.c_str(),
  328. hostname.size()) != 0) {
  329. newURIs.push_back(*itr);
  330. }
  331. }
  332. A2_LOG_DEBUG(fmt("Removed %lu duplicate hostname URIs for path=%s",
  333. static_cast<unsigned long>(uris_.size()-newURIs.size()),
  334. getPath().c_str()));
  335. uris_.swap(newURIs);
  336. }
  337. void FileEntry::removeIdenticalURI(const std::string& uri)
  338. {
  339. uris_.erase(std::remove(uris_.begin(), uris_.end(), uri), uris_.end());
  340. }
  341. void FileEntry::addURIResult(std::string uri, error_code::Value result)
  342. {
  343. uriResults_.push_back(URIResult(uri, result));
  344. }
  345. namespace {
  346. class FindURIResultByResult {
  347. private:
  348. error_code::Value r_;
  349. public:
  350. FindURIResultByResult(error_code::Value r):r_(r) {}
  351. bool operator()(const URIResult& uriResult) const
  352. {
  353. return uriResult.getResult() == r_;
  354. }
  355. };
  356. } // namespace
  357. void FileEntry::extractURIResult
  358. (std::deque<URIResult>& res, error_code::Value r)
  359. {
  360. auto i = std::stable_partition(uriResults_.begin(), uriResults_.end(),
  361. FindURIResultByResult(r));
  362. std::copy(uriResults_.begin(), i, std::back_inserter(res));
  363. uriResults_.erase(uriResults_.begin(), i);
  364. }
  365. void FileEntry::reuseUri(const std::vector<std::string>& ignore)
  366. {
  367. if(A2_LOG_DEBUG_ENABLED) {
  368. for (const auto& i: ignore) {
  369. A2_LOG_DEBUG(fmt("ignore host=%s", i.c_str()));
  370. }
  371. }
  372. std::deque<std::string> uris = spentUris_;
  373. std::sort(uris.begin(), uris.end());
  374. uris.erase(std::unique(uris.begin(), uris.end()), uris.end());
  375. std::vector<std::string> errorUris(uriResults_.size());
  376. std::transform(uriResults_.begin(), uriResults_.end(),
  377. errorUris.begin(), std::mem_fn(&URIResult::getURI));
  378. std::sort(errorUris.begin(), errorUris.end());
  379. errorUris.erase(std::unique(errorUris.begin(), errorUris.end()),
  380. errorUris.end());
  381. if(A2_LOG_DEBUG_ENABLED) {
  382. for(std::vector<std::string>::const_iterator i = errorUris.begin(),
  383. eoi = errorUris.end(); i != eoi; ++i) {
  384. A2_LOG_DEBUG(fmt("error URI=%s", (*i).c_str()));
  385. }
  386. }
  387. std::vector<std::string> reusableURIs;
  388. std::set_difference(uris.begin(), uris.end(),
  389. errorUris.begin(), errorUris.end(),
  390. std::back_inserter(reusableURIs));
  391. auto insertionPoint = reusableURIs.begin();
  392. for(auto i = reusableURIs.begin(),
  393. eoi = reusableURIs.end(); i != eoi; ++i) {
  394. uri_split_result us;
  395. if(uri_split(&us, (*i).c_str()) == 0 &&
  396. std::find(ignore.begin(), ignore.end(),
  397. uri::getFieldString(us, USR_HOST, (*i).c_str()))
  398. == ignore.end()) {
  399. if(i != insertionPoint) {
  400. *insertionPoint = *i;
  401. }
  402. ++insertionPoint;
  403. }
  404. }
  405. reusableURIs.erase(insertionPoint, reusableURIs.end());
  406. size_t ininum = reusableURIs.size();
  407. if(A2_LOG_DEBUG_ENABLED) {
  408. A2_LOG_DEBUG(fmt("Found %u reusable URIs",
  409. static_cast<unsigned int>(ininum)));
  410. for(std::vector<std::string>::const_iterator i = reusableURIs.begin(),
  411. eoi = reusableURIs.end(); i != eoi; ++i) {
  412. A2_LOG_DEBUG(fmt("URI=%s", (*i).c_str()));
  413. }
  414. }
  415. uris_.insert(uris_.end(), reusableURIs.begin(), reusableURIs.end());
  416. }
  417. void FileEntry::releaseRuntimeResource()
  418. {
  419. requestPool_.clear();
  420. inFlightRequests_.clear();
  421. }
  422. namespace {
  423. template<typename InputIterator>
  424. void putBackUri
  425. (std::deque<std::string>& uris,
  426. InputIterator first,
  427. InputIterator last)
  428. {
  429. for(; first != last; ++first) {
  430. uris.push_front((*first)->getUri());
  431. }
  432. }
  433. } // namespace
  434. void FileEntry::putBackRequest()
  435. {
  436. putBackUri(uris_, requestPool_.begin(), requestPool_.end());
  437. putBackUri(uris_, inFlightRequests_.begin(), inFlightRequests_.end());
  438. }
  439. namespace {
  440. template<typename InputIterator, typename T>
  441. InputIterator findRequestByUri
  442. (InputIterator first, InputIterator last, const T& uri)
  443. {
  444. for(; first != last; ++first) {
  445. if(!(*first)->removalRequested() && (*first)->getUri() == uri) {
  446. return first;
  447. }
  448. }
  449. return last;
  450. }
  451. } // namespace
  452. bool FileEntry::removeUri(const std::string& uri)
  453. {
  454. auto itr = std::find(spentUris_.begin(), spentUris_.end(), uri);
  455. if(itr == spentUris_.end()) {
  456. itr = std::find(uris_.begin(), uris_.end(), uri);
  457. if(itr == uris_.end()) {
  458. return false;
  459. }
  460. uris_.erase(itr);
  461. return true;
  462. }
  463. spentUris_.erase(itr);
  464. std::shared_ptr<Request> req;
  465. auto riter =
  466. findRequestByUri(inFlightRequests_.begin(), inFlightRequests_.end(), uri);
  467. if(riter == inFlightRequests_.end()) {
  468. auto riter = findRequestByUri(requestPool_.begin(),
  469. requestPool_.end(), uri);
  470. if(riter == requestPool_.end()) {
  471. return true;
  472. }
  473. req = *riter;
  474. requestPool_.erase(riter);
  475. } else {
  476. req = *riter;
  477. }
  478. req->requestRemoval();
  479. return true;
  480. }
  481. std::string FileEntry::getBasename() const
  482. {
  483. return File(path_).getBasename();
  484. }
  485. std::string FileEntry::getDirname() const
  486. {
  487. return File(path_).getDirname();
  488. }
  489. size_t FileEntry::setUris(const std::vector<std::string>& uris)
  490. {
  491. uris_.clear();
  492. return addUris(uris.begin(), uris.end());
  493. }
  494. bool FileEntry::addUri(const std::string& uri)
  495. {
  496. std::string peUri = util::percentEncodeMini(uri);
  497. if(uri_split(nullptr, peUri.c_str()) == 0) {
  498. uris_.push_back(peUri);
  499. return true;
  500. } else {
  501. return false;
  502. }
  503. }
  504. bool FileEntry::insertUri(const std::string& uri, size_t pos)
  505. {
  506. std::string peUri = util::percentEncodeMini(uri);
  507. if(uri_split(nullptr, peUri.c_str()) != 0) {
  508. return false;
  509. }
  510. pos = std::min(pos, uris_.size());
  511. uris_.insert(uris_.begin()+pos, peUri);
  512. return true;
  513. }
  514. void FileEntry::setPath(std::string path)
  515. {
  516. path_ = std::move(path);
  517. }
  518. void FileEntry::setContentType(std::string contentType)
  519. {
  520. contentType_ = std::move(contentType);
  521. }
  522. size_t FileEntry::countInFlightRequest() const
  523. {
  524. return inFlightRequests_.size();
  525. }
  526. size_t FileEntry::countPooledRequest() const
  527. {
  528. return requestPool_.size();
  529. }
  530. void FileEntry::setOriginalName(std::string originalName)
  531. {
  532. originalName_ = std::move(originalName);
  533. }
  534. void FileEntry::setSuffixPath(std::string suffixPath)
  535. {
  536. suffixPath_ = std::move(suffixPath);
  537. }
  538. bool FileEntry::emptyRequestUri() const
  539. {
  540. return uris_.empty() && inFlightRequests_.empty() && requestPool_.empty();
  541. }
  542. void writeFilePath
  543. (std::ostream& o,
  544. const std::shared_ptr<FileEntry>& entry,
  545. bool memory)
  546. {
  547. if(entry->getPath().empty()) {
  548. auto uris = entry->getUris();
  549. if(uris.empty()) {
  550. o << "n/a";
  551. } else {
  552. o << uris.front();
  553. }
  554. return;
  555. }
  556. if(memory) {
  557. o << "[MEMORY]" << File(entry->getPath()).getBasename();
  558. } else {
  559. o << entry->getPath();
  560. }
  561. }
  562. } // namespace aria2