FileEntry.cc 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602
  1. /* <!-- copyright */
  2. /*
  3. * aria2 - The high speed download utility
  4. *
  5. * Copyright (C) 2006 Tatsuhiro Tsujikawa
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. * In addition, as a special exception, the copyright holders give
  22. * permission to link the code of portions of this program with the
  23. * OpenSSL library under certain conditions as described in each
  24. * individual source file, and distribute linked combinations
  25. * including the two.
  26. * You must obey the GNU General Public License in all respects
  27. * for all of the code used other than OpenSSL. If you modify
  28. * file(s) with this exception, you may extend this exception to your
  29. * version of the file(s), but you are not obligated to do so. If you
  30. * do not wish to do so, delete this exception statement from your
  31. * version. If you delete this exception statement from all source
  32. * files in the program, then also delete it here.
  33. */
  34. /* copyright --> */
  35. #include "FileEntry.h"
  36. #include <cassert>
  37. #include <algorithm>
  38. #include "util.h"
  39. #include "URISelector.h"
  40. #include "Logger.h"
  41. #include "LogFactory.h"
  42. #include "wallclock.h"
  43. #include "a2algo.h"
  44. #include "uri.h"
  45. #include "PeerStat.h"
  46. #include "fmt.h"
  47. #include "ServerStatMan.h"
  48. #include "ServerStat.h"
  49. namespace aria2 {
  50. bool FileEntry::RequestFaster::operator()
  51. (const SharedHandle<Request>& lhs,
  52. const SharedHandle<Request>& rhs) const
  53. {
  54. if(!lhs->getPeerStat()) {
  55. return false;
  56. }
  57. if(!rhs->getPeerStat()) {
  58. return true;
  59. }
  60. int lspd = lhs->getPeerStat()->getAvgDownloadSpeed();
  61. int rspd = rhs->getPeerStat()->getAvgDownloadSpeed();
  62. return lspd > rspd || (lspd == rspd && lhs.get() < rhs.get());
  63. }
  64. FileEntry::FileEntry
  65. (const std::string& path,
  66. off_t length,
  67. off_t offset,
  68. const std::vector<std::string>& uris)
  69. : path_(path),
  70. uris_(uris.begin(), uris.end()),
  71. length_(length),
  72. offset_(offset),
  73. requested_(true),
  74. uniqueProtocol_(false),
  75. maxConnectionPerServer_(1),
  76. lastFasterReplace_(0)
  77. {}
  78. FileEntry::FileEntry()
  79. : length_(0),
  80. offset_(0),
  81. requested_(false),
  82. uniqueProtocol_(false),
  83. maxConnectionPerServer_(1)
  84. {}
  85. FileEntry::~FileEntry() {}
  86. FileEntry& FileEntry::operator=(const FileEntry& entry)
  87. {
  88. if(this != &entry) {
  89. path_ = entry.path_;
  90. length_ = entry.length_;
  91. offset_ = entry.offset_;
  92. requested_ = entry.requested_;
  93. }
  94. return *this;
  95. }
  96. bool FileEntry::operator<(const FileEntry& fileEntry) const
  97. {
  98. return offset_ < fileEntry.offset_;
  99. }
  100. bool FileEntry::exists() const
  101. {
  102. return File(getPath()).exists();
  103. }
  104. off_t FileEntry::gtoloff(off_t goff) const
  105. {
  106. assert(offset_ <= goff);
  107. return goff-offset_;
  108. }
  109. void FileEntry::getUris(std::vector<std::string>& uris) const
  110. {
  111. uris.insert(uris.end(), spentUris_.begin(), spentUris_.end());
  112. uris.insert(uris.end(), uris_.begin(), uris_.end());
  113. }
  114. namespace {
  115. template<typename InputIterator, typename OutputIterator>
  116. OutputIterator enumerateInFlightHosts
  117. (InputIterator first, InputIterator last, OutputIterator out)
  118. {
  119. for(; first != last; ++first) {
  120. uri::UriStruct us;
  121. if(uri::parse(us, (*first)->getUri())) {
  122. *out++ = us.host;
  123. }
  124. }
  125. return out;
  126. }
  127. } // namespace
  128. SharedHandle<Request>
  129. FileEntry::getRequest
  130. (const SharedHandle<URISelector>& selector,
  131. bool uriReuse,
  132. const std::vector<std::pair<size_t, std::string> >& usedHosts,
  133. const std::string& referer,
  134. const std::string& method)
  135. {
  136. SharedHandle<Request> req;
  137. if(requestPool_.empty()) {
  138. std::vector<std::string> inFlightHosts;
  139. enumerateInFlightHosts(inFlightRequests_.begin(), inFlightRequests_.end(),
  140. std::back_inserter(inFlightHosts));
  141. for(int g = 0; g < 2; ++g) {
  142. std::vector<std::string> pending;
  143. std::vector<std::string> ignoreHost;
  144. while(1) {
  145. std::string uri = selector->select(this, usedHosts);
  146. if(uri.empty()) {
  147. break;
  148. }
  149. req.reset(new Request());
  150. if(req->setUri(uri)) {
  151. if(std::count(inFlightHosts.begin(),
  152. inFlightHosts.end(),req->getHost())
  153. >= maxConnectionPerServer_) {
  154. pending.push_back(uri);
  155. ignoreHost.push_back(req->getHost());
  156. req.reset();
  157. continue;
  158. }
  159. req->setReferer(util::percentEncodeMini(referer));
  160. req->setMethod(method);
  161. spentUris_.push_back(uri);
  162. inFlightRequests_.insert(req);
  163. break;
  164. } else {
  165. req.reset();
  166. }
  167. }
  168. uris_.insert(uris_.begin(), pending.begin(), pending.end());
  169. if(g == 0 && uriReuse && !req && uris_.size() == pending.size()) {
  170. // Reuse URIs other than ones in pending
  171. reuseUri(ignoreHost);
  172. } else {
  173. break;
  174. }
  175. }
  176. } else {
  177. // Skip Request object if it is still
  178. // sleeping(Request::getWakeTime() < global::wallclock()). If all
  179. // pooled objects are sleeping, return first one. Caller should
  180. // inspect returned object's getWakeTime().
  181. RequestPool::iterator i = requestPool_.begin();
  182. RequestPool::iterator eoi = requestPool_.end();
  183. for(; i != eoi; ++i) {
  184. if((*i)->getWakeTime() <= global::wallclock()) {
  185. break;
  186. }
  187. }
  188. if(i == eoi) {
  189. i = requestPool_.begin();
  190. }
  191. req = *i;
  192. requestPool_.erase(i);
  193. inFlightRequests_.insert(req);
  194. A2_LOG_DEBUG(fmt("Picked up from pool: %s", req->getUri().c_str()));
  195. }
  196. return req;
  197. }
  198. SharedHandle<Request>
  199. FileEntry::findFasterRequest(const SharedHandle<Request>& base)
  200. {
  201. const int startupIdleTime = 10;
  202. if(requestPool_.empty() ||
  203. lastFasterReplace_.difference(global::wallclock()) < startupIdleTime) {
  204. return SharedHandle<Request>();
  205. }
  206. const SharedHandle<PeerStat>& fastest =
  207. (*requestPool_.begin())->getPeerStat();
  208. if(!fastest) {
  209. return SharedHandle<Request>();
  210. }
  211. const SharedHandle<PeerStat>& basestat = base->getPeerStat();
  212. // TODO hard coded value. See PREF_STARTUP_IDLE_TIME
  213. if(!basestat ||
  214. (basestat->getDownloadStartTime().
  215. difference(global::wallclock()) >= startupIdleTime &&
  216. fastest->getAvgDownloadSpeed()*0.8 > basestat->calculateDownloadSpeed())){
  217. // TODO we should consider that "fastest" is very slow.
  218. SharedHandle<Request> fastestRequest = *requestPool_.begin();
  219. requestPool_.erase(requestPool_.begin());
  220. inFlightRequests_.insert(fastestRequest);
  221. lastFasterReplace_ = global::wallclock();
  222. return fastestRequest;
  223. }
  224. return SharedHandle<Request>();
  225. }
  226. SharedHandle<Request>
  227. FileEntry::findFasterRequest
  228. (const SharedHandle<Request>& base,
  229. const std::vector<std::pair<size_t, std::string> >& usedHosts,
  230. const SharedHandle<ServerStatMan>& serverStatMan)
  231. {
  232. const int startupIdleTime = 10;
  233. const int SPEED_THRESHOLD = 20*1024;
  234. if(lastFasterReplace_.difference(global::wallclock()) < startupIdleTime) {
  235. return SharedHandle<Request>();
  236. }
  237. std::vector<std::string> inFlightHosts;
  238. enumerateInFlightHosts(inFlightRequests_.begin(), inFlightRequests_.end(),
  239. std::back_inserter(inFlightHosts));
  240. const SharedHandle<PeerStat>& basestat = base->getPeerStat();
  241. A2_LOG_DEBUG("Search faster server using ServerStat.");
  242. // Use first 10 good URIs to introduce some randomness.
  243. const size_t NUM_URI = 10;
  244. std::vector<std::pair<SharedHandle<ServerStat>, std::string> > fastCands;
  245. std::vector<std::string> normCands;
  246. for(std::deque<std::string>::const_iterator i = uris_.begin(),
  247. eoi = uris_.end(); i != eoi && fastCands.size() < NUM_URI; ++i) {
  248. uri::UriStruct us;
  249. if(!uri::parse(us, *i)) {
  250. continue;
  251. }
  252. if(std::count(inFlightHosts.begin(), inFlightHosts.end(),us.host)
  253. >= maxConnectionPerServer_) {
  254. A2_LOG_DEBUG(fmt("%s has already used %d times, not considered.",
  255. (*i).c_str(),
  256. maxConnectionPerServer_));
  257. continue;
  258. }
  259. if(findSecond(usedHosts.begin(), usedHosts.end(), us.host) !=
  260. usedHosts.end()) {
  261. A2_LOG_DEBUG(fmt("%s is in usedHosts, not considered", (*i).c_str()));
  262. continue;
  263. }
  264. SharedHandle<ServerStat> ss = serverStatMan->find(us.host, us.protocol);
  265. if(ss && ss->isOK()) {
  266. if((basestat &&
  267. ss->getDownloadSpeed() > basestat->calculateDownloadSpeed()*1.5) ||
  268. (!basestat && ss->getDownloadSpeed() > SPEED_THRESHOLD)) {
  269. fastCands.push_back(std::make_pair(ss, *i));
  270. }
  271. }
  272. }
  273. if(!fastCands.empty()) {
  274. std::sort(fastCands.begin(), fastCands.end(), ServerStatFaster());
  275. SharedHandle<Request> fastestRequest(new Request());
  276. const std::string& uri = fastCands.front().second;
  277. A2_LOG_DEBUG(fmt("Selected %s from fastCands", uri.c_str()));
  278. fastestRequest->setUri(uri);
  279. fastestRequest->setReferer(base->getReferer());
  280. uris_.erase(std::find(uris_.begin(), uris_.end(), uri));
  281. spentUris_.push_back(uri);
  282. inFlightRequests_.insert(fastestRequest);
  283. lastFasterReplace_ = global::wallclock();
  284. return fastestRequest;
  285. }
  286. A2_LOG_DEBUG("No faster server found.");
  287. return SharedHandle<Request>();
  288. }
  289. void FileEntry::storePool(const SharedHandle<Request>& request)
  290. {
  291. const SharedHandle<PeerStat>& peerStat = request->getPeerStat();
  292. if(peerStat) {
  293. // We need to calculate average download speed here in order to
  294. // store Request in the right position in the pool.
  295. peerStat->calculateAvgDownloadSpeed();
  296. }
  297. requestPool_.insert(request);
  298. }
  299. void FileEntry::poolRequest(const SharedHandle<Request>& request)
  300. {
  301. removeRequest(request);
  302. if(!request->removalRequested()) {
  303. storePool(request);
  304. }
  305. }
  306. bool FileEntry::removeRequest(const SharedHandle<Request>& request)
  307. {
  308. return inFlightRequests_.erase(request) == 1;
  309. }
  310. void FileEntry::removeURIWhoseHostnameIs(const std::string& hostname)
  311. {
  312. std::deque<std::string> newURIs;
  313. for(std::deque<std::string>::const_iterator itr = uris_.begin(),
  314. eoi = uris_.end(); itr != eoi; ++itr) {
  315. uri::UriStruct us;
  316. if(!uri::parse(us, *itr)) {
  317. continue;
  318. }
  319. if(us.host != hostname) {
  320. newURIs.push_back(*itr);
  321. }
  322. }
  323. A2_LOG_DEBUG(fmt("Removed %lu duplicate hostname URIs for path=%s",
  324. static_cast<unsigned long>(uris_.size()-newURIs.size()),
  325. getPath().c_str()));
  326. uris_.swap(newURIs);
  327. }
  328. void FileEntry::removeIdenticalURI(const std::string& uri)
  329. {
  330. uris_.erase(std::remove(uris_.begin(), uris_.end(), uri), uris_.end());
  331. }
  332. void FileEntry::addURIResult(std::string uri, error_code::Value result)
  333. {
  334. uriResults_.push_back(URIResult(uri, result));
  335. }
  336. namespace {
  337. class FindURIResultByResult {
  338. private:
  339. error_code::Value r_;
  340. public:
  341. FindURIResultByResult(error_code::Value r):r_(r) {}
  342. bool operator()(const URIResult& uriResult) const
  343. {
  344. return uriResult.getResult() == r_;
  345. }
  346. };
  347. } // namespace
  348. void FileEntry::extractURIResult
  349. (std::deque<URIResult>& res, error_code::Value r)
  350. {
  351. std::deque<URIResult>::iterator i =
  352. std::stable_partition(uriResults_.begin(), uriResults_.end(),
  353. FindURIResultByResult(r));
  354. std::copy(uriResults_.begin(), i, std::back_inserter(res));
  355. uriResults_.erase(uriResults_.begin(), i);
  356. }
  357. void FileEntry::reuseUri(const std::vector<std::string>& ignore)
  358. {
  359. if(A2_LOG_DEBUG_ENABLED) {
  360. for(std::vector<std::string>::const_iterator i = ignore.begin(),
  361. eoi = ignore.end(); i != eoi; ++i) {
  362. A2_LOG_DEBUG(fmt("ignore host=%s", (*i).c_str()));
  363. }
  364. }
  365. std::deque<std::string> uris = spentUris_;
  366. std::sort(uris.begin(), uris.end());
  367. uris.erase(std::unique(uris.begin(), uris.end()), uris.end());
  368. std::vector<std::string> errorUris(uriResults_.size());
  369. std::transform(uriResults_.begin(), uriResults_.end(),
  370. errorUris.begin(), std::mem_fun_ref(&URIResult::getURI));
  371. std::sort(errorUris.begin(), errorUris.end());
  372. errorUris.erase(std::unique(errorUris.begin(), errorUris.end()),
  373. errorUris.end());
  374. if(A2_LOG_DEBUG_ENABLED) {
  375. for(std::vector<std::string>::const_iterator i = errorUris.begin(),
  376. eoi = errorUris.end(); i != eoi; ++i) {
  377. A2_LOG_DEBUG(fmt("error URI=%s", (*i).c_str()));
  378. }
  379. }
  380. std::vector<std::string> reusableURIs;
  381. std::set_difference(uris.begin(), uris.end(),
  382. errorUris.begin(), errorUris.end(),
  383. std::back_inserter(reusableURIs));
  384. std::vector<std::string>::iterator insertionPoint = reusableURIs.begin();
  385. for(std::vector<std::string>::iterator i = reusableURIs.begin(),
  386. eoi = reusableURIs.end(); i != eoi; ++i) {
  387. uri::UriStruct us;
  388. if(uri::parse(us, *i) &&
  389. std::find(ignore.begin(), ignore.end(), us.host) == ignore.end()) {
  390. if(i != insertionPoint) {
  391. *insertionPoint = *i;
  392. }
  393. ++insertionPoint;
  394. }
  395. }
  396. reusableURIs.erase(insertionPoint, reusableURIs.end());
  397. size_t ininum = reusableURIs.size();
  398. if(A2_LOG_DEBUG_ENABLED) {
  399. A2_LOG_DEBUG(fmt("Found %u reusable URIs",
  400. static_cast<unsigned int>(ininum)));
  401. for(std::vector<std::string>::const_iterator i = reusableURIs.begin(),
  402. eoi = reusableURIs.end(); i != eoi; ++i) {
  403. A2_LOG_DEBUG(fmt("URI=%s", (*i).c_str()));
  404. }
  405. }
  406. uris_.insert(uris_.end(), reusableURIs.begin(), reusableURIs.end());
  407. }
  408. void FileEntry::releaseRuntimeResource()
  409. {
  410. requestPool_.clear();
  411. inFlightRequests_.clear();
  412. }
  413. namespace {
  414. template<typename InputIterator>
  415. void putBackUri
  416. (std::deque<std::string>& uris,
  417. InputIterator first,
  418. InputIterator last)
  419. {
  420. for(; first != last; ++first) {
  421. uris.push_front((*first)->getUri());
  422. }
  423. }
  424. } // namespace
  425. void FileEntry::putBackRequest()
  426. {
  427. putBackUri(uris_, requestPool_.begin(), requestPool_.end());
  428. putBackUri(uris_, inFlightRequests_.begin(), inFlightRequests_.end());
  429. }
  430. namespace {
  431. template<typename InputIterator, typename T>
  432. InputIterator findRequestByUri
  433. (InputIterator first, InputIterator last, const T& uri)
  434. {
  435. for(; first != last; ++first) {
  436. if(!(*first)->removalRequested() && (*first)->getUri() == uri) {
  437. return first;
  438. }
  439. }
  440. return last;
  441. }
  442. } // namespace
  443. bool FileEntry::removeUri(const std::string& uri)
  444. {
  445. std::deque<std::string>::iterator itr =
  446. std::find(spentUris_.begin(), spentUris_.end(), uri);
  447. if(itr == spentUris_.end()) {
  448. itr = std::find(uris_.begin(), uris_.end(), uri);
  449. if(itr == uris_.end()) {
  450. return false;
  451. } else {
  452. uris_.erase(itr);
  453. return true;
  454. }
  455. } else {
  456. spentUris_.erase(itr);
  457. SharedHandle<Request> req;
  458. InFlightRequestSet::iterator riter =
  459. findRequestByUri(inFlightRequests_.begin(), inFlightRequests_.end(), uri);
  460. if(riter == inFlightRequests_.end()) {
  461. RequestPool::iterator riter = findRequestByUri(requestPool_.begin(),
  462. requestPool_.end(), uri);
  463. if(riter == requestPool_.end()) {
  464. return true;
  465. } else {
  466. req = *riter;
  467. requestPool_.erase(riter);
  468. }
  469. } else {
  470. req = *riter;
  471. }
  472. req->requestRemoval();
  473. return true;
  474. }
  475. }
  476. std::string FileEntry::getBasename() const
  477. {
  478. return File(path_).getBasename();
  479. }
  480. std::string FileEntry::getDirname() const
  481. {
  482. return File(path_).getDirname();
  483. }
  484. size_t FileEntry::setUris(const std::vector<std::string>& uris)
  485. {
  486. uris_.clear();
  487. return addUris(uris.begin(), uris.end());
  488. }
  489. bool FileEntry::addUri(const std::string& uri)
  490. {
  491. uri::UriStruct us;
  492. std::string peUri = util::percentEncodeMini(uri);
  493. if(uri::parse(us, peUri)) {
  494. uris_.push_back(peUri);
  495. return true;
  496. } else {
  497. return false;
  498. }
  499. }
  500. bool FileEntry::insertUri(const std::string& uri, size_t pos)
  501. {
  502. uri::UriStruct us;
  503. std::string peUri = util::percentEncodeMini(uri);
  504. if(uri::parse(us, peUri)) {
  505. pos = std::min(pos, uris_.size());
  506. uris_.insert(uris_.begin()+pos, peUri);
  507. return true;
  508. } else {
  509. return false;
  510. }
  511. }
  512. void FileEntry::setPath(const std::string& path)
  513. {
  514. path_ = path;
  515. }
  516. void FileEntry::setContentType(const std::string& contentType)
  517. {
  518. contentType_ = contentType;
  519. }
  520. size_t FileEntry::countInFlightRequest() const
  521. {
  522. return inFlightRequests_.size();
  523. }
  524. size_t FileEntry::countPooledRequest() const
  525. {
  526. return requestPool_.size();
  527. }
  528. void FileEntry::setOriginalName(const std::string& originalName)
  529. {
  530. originalName_ = originalName;
  531. }
  532. bool FileEntry::emptyRequestUri() const
  533. {
  534. return uris_.empty() && inFlightRequests_.empty() && requestPool_.empty();
  535. }
  536. void writeFilePath
  537. (std::ostream& o,
  538. const SharedHandle<FileEntry>& entry,
  539. bool memory)
  540. {
  541. if(entry->getPath().empty()) {
  542. std::vector<std::string> uris;
  543. entry->getUris(uris);
  544. if(uris.empty()) {
  545. o << "n/a";
  546. } else {
  547. o << uris.front();
  548. }
  549. } else {
  550. if(memory) {
  551. o << "[MEMORY]" << File(entry->getPath()).getBasename();
  552. } else {
  553. o << entry->getPath();
  554. }
  555. }
  556. }
  557. } // namespace aria2