FileEntry.cc 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571
  1. /* <!-- copyright */
  2. /*
  3. * aria2 - The high speed download utility
  4. *
  5. * Copyright (C) 2006 Tatsuhiro Tsujikawa
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. * In addition, as a special exception, the copyright holders give
  22. * permission to link the code of portions of this program with the
  23. * OpenSSL library under certain conditions as described in each
  24. * individual source file, and distribute linked combinations
  25. * including the two.
  26. * You must obey the GNU General Public License in all respects
  27. * for all of the code used other than OpenSSL. If you modify
  28. * file(s) with this exception, you may extend this exception to your
  29. * version of the file(s), but you are not obligated to do so. If you
  30. * do not wish to do so, delete this exception statement from your
  31. * version. If you delete this exception statement from all source
  32. * files in the program, then also delete it here.
  33. */
  34. /* copyright --> */
  35. #include "FileEntry.h"
  36. #include <cassert>
  37. #include <algorithm>
  38. #include "util.h"
  39. #include "URISelector.h"
  40. #include "Logger.h"
  41. #include "LogFactory.h"
  42. #include "wallclock.h"
  43. #include "a2algo.h"
  44. #include "uri.h"
  45. #include "PeerStat.h"
  46. #include "fmt.h"
  47. #include "ServerStatMan.h"
  48. #include "ServerStat.h"
  49. namespace aria2 {
  50. FileEntry::FileEntry
  51. (const std::string& path,
  52. uint64_t length,
  53. off_t offset,
  54. const std::vector<std::string>& uris)
  55. : path_(path),
  56. uris_(uris.begin(), uris.end()),
  57. length_(length),
  58. offset_(offset),
  59. requested_(true),
  60. uniqueProtocol_(false),
  61. maxConnectionPerServer_(1),
  62. lastFasterReplace_(0)
  63. {}
  64. FileEntry::FileEntry()
  65. : length_(0),
  66. offset_(0),
  67. requested_(false),
  68. uniqueProtocol_(false),
  69. maxConnectionPerServer_(1)
  70. {}
  71. FileEntry::~FileEntry() {}
  72. FileEntry& FileEntry::operator=(const FileEntry& entry)
  73. {
  74. if(this != &entry) {
  75. path_ = entry.path_;
  76. length_ = entry.length_;
  77. offset_ = entry.offset_;
  78. requested_ = entry.requested_;
  79. }
  80. return *this;
  81. }
  82. bool FileEntry::operator<(const FileEntry& fileEntry) const
  83. {
  84. return offset_ < fileEntry.offset_;
  85. }
  86. bool FileEntry::exists() const
  87. {
  88. return File(getPath()).exists();
  89. }
  90. off_t FileEntry::gtoloff(off_t goff) const
  91. {
  92. assert(offset_ <= goff);
  93. return goff-offset_;
  94. }
  95. void FileEntry::getUris(std::vector<std::string>& uris) const
  96. {
  97. uris.insert(uris.end(), spentUris_.begin(), spentUris_.end());
  98. uris.insert(uris.end(), uris_.begin(), uris_.end());
  99. }
  100. namespace {
  101. template<typename InputIterator, typename OutputIterator>
  102. OutputIterator enumerateInFlightHosts
  103. (InputIterator first, InputIterator last, OutputIterator out)
  104. {
  105. for(; first != last; ++first) {
  106. uri::UriStruct us;
  107. if(uri::parse(us, (*first)->getUri())) {
  108. *out++ = us.host;
  109. }
  110. }
  111. return out;
  112. }
  113. } // namespace
  114. SharedHandle<Request>
  115. FileEntry::getRequest
  116. (const SharedHandle<URISelector>& selector,
  117. bool uriReuse,
  118. const std::vector<std::pair<size_t, std::string> >& usedHosts,
  119. const std::string& referer,
  120. const std::string& method)
  121. {
  122. SharedHandle<Request> req;
  123. if(requestPool_.empty()) {
  124. std::vector<std::string> inFlightHosts;
  125. enumerateInFlightHosts(inFlightRequests_.begin(), inFlightRequests_.end(),
  126. std::back_inserter(inFlightHosts));
  127. for(int g = 0; g < 2; ++g) {
  128. std::vector<std::string> pending;
  129. std::vector<std::string> ignoreHost;
  130. while(1) {
  131. std::string uri = selector->select(this, usedHosts);
  132. if(uri.empty()) {
  133. break;
  134. }
  135. req.reset(new Request());
  136. if(req->setUri(uri)) {
  137. if(std::count(inFlightHosts.begin(),
  138. inFlightHosts.end(),req->getHost())
  139. >= static_cast<int>(maxConnectionPerServer_)) {
  140. pending.push_back(uri);
  141. ignoreHost.push_back(req->getHost());
  142. req.reset();
  143. continue;
  144. }
  145. req->setReferer(referer);
  146. req->setMethod(method);
  147. spentUris_.push_back(uri);
  148. inFlightRequests_.push_back(req);
  149. break;
  150. } else {
  151. req.reset();
  152. }
  153. }
  154. uris_.insert(uris_.begin(), pending.begin(), pending.end());
  155. if(g == 0 && uriReuse && !req && uris_.size() == pending.size()) {
  156. // Reuse URIs other than ones in pending
  157. reuseUri(ignoreHost);
  158. } else {
  159. break;
  160. }
  161. }
  162. } else {
  163. // Skip Request object if it is still
  164. // sleeping(Request::getWakeTime() < global::wallclock). If all
  165. // pooled objects are sleeping, return first one. Caller should
  166. // inspect returned object's getWakeTime().
  167. std::deque<SharedHandle<Request> >::iterator i = requestPool_.begin();
  168. std::deque<SharedHandle<Request> >::iterator eoi = requestPool_.end();
  169. for(; i != eoi; ++i) {
  170. if((*i)->getWakeTime() <= global::wallclock) {
  171. break;
  172. }
  173. }
  174. if(i == eoi) {
  175. i = requestPool_.begin();
  176. }
  177. req = *i;
  178. requestPool_.erase(i);
  179. inFlightRequests_.push_back(req);
  180. A2_LOG_DEBUG(fmt("Picked up from pool: %s", req->getUri().c_str()));
  181. }
  182. return req;
  183. }
  184. SharedHandle<Request>
  185. FileEntry::findFasterRequest(const SharedHandle<Request>& base)
  186. {
  187. const int startupIdleTime = 10;
  188. if(requestPool_.empty() ||
  189. lastFasterReplace_.difference(global::wallclock) < startupIdleTime) {
  190. return SharedHandle<Request>();
  191. }
  192. const SharedHandle<PeerStat>& fastest = requestPool_.front()->getPeerStat();
  193. if(!fastest) {
  194. return SharedHandle<Request>();
  195. }
  196. const SharedHandle<PeerStat>& basestat = base->getPeerStat();
  197. // TODO hard coded value. See PREF_STARTUP_IDLE_TIME
  198. if(!basestat ||
  199. (basestat->getDownloadStartTime().
  200. difference(global::wallclock) >= startupIdleTime &&
  201. fastest->getAvgDownloadSpeed()*0.8 > basestat->calculateDownloadSpeed())){
  202. // TODO we should consider that "fastest" is very slow.
  203. SharedHandle<Request> fastestRequest = requestPool_.front();
  204. requestPool_.pop_front();
  205. inFlightRequests_.push_back(fastestRequest);
  206. lastFasterReplace_ = global::wallclock;
  207. return fastestRequest;
  208. }
  209. return SharedHandle<Request>();
  210. }
  211. SharedHandle<Request>
  212. FileEntry::findFasterRequest
  213. (const SharedHandle<Request>& base,
  214. const std::vector<std::pair<size_t, std::string> >& usedHosts,
  215. const SharedHandle<ServerStatMan>& serverStatMan)
  216. {
  217. const int startupIdleTime = 10;
  218. const unsigned int SPEED_THRESHOLD = 20*1024;
  219. if(lastFasterReplace_.difference(global::wallclock) < startupIdleTime) {
  220. return SharedHandle<Request>();
  221. }
  222. std::vector<std::string> inFlightHosts;
  223. enumerateInFlightHosts(inFlightRequests_.begin(), inFlightRequests_.end(),
  224. std::back_inserter(inFlightHosts));
  225. const SharedHandle<PeerStat>& basestat = base->getPeerStat();
  226. A2_LOG_DEBUG("Search faster server using ServerStat.");
  227. // Use first 10 good URIs to introduce some randomness.
  228. const size_t NUM_URI = 10;
  229. std::vector<std::pair<SharedHandle<ServerStat>, std::string> > fastCands;
  230. std::vector<std::string> normCands;
  231. for(std::deque<std::string>::const_iterator i = uris_.begin(),
  232. eoi = uris_.end(); i != eoi && fastCands.size() < NUM_URI; ++i) {
  233. uri::UriStruct us;
  234. if(!uri::parse(us, *i)) {
  235. continue;
  236. }
  237. if(std::count(inFlightHosts.begin(), inFlightHosts.end(),us.host)
  238. >= static_cast<int>(maxConnectionPerServer_)) {
  239. A2_LOG_DEBUG(fmt("%s has already used %d times, not considered.",
  240. (*i).c_str(),
  241. static_cast<int>(maxConnectionPerServer_)));
  242. continue;
  243. }
  244. if(findSecond(usedHosts.begin(), usedHosts.end(), us.host) !=
  245. usedHosts.end()) {
  246. A2_LOG_DEBUG(fmt("%s is in usedHosts, not considered", (*i).c_str()));
  247. continue;
  248. }
  249. SharedHandle<ServerStat> ss = serverStatMan->find(us.host, us.protocol);
  250. if(ss && ss->isOK()) {
  251. if((basestat &&
  252. ss->getDownloadSpeed() > basestat->calculateDownloadSpeed()*1.5) ||
  253. (!basestat && ss->getDownloadSpeed() > SPEED_THRESHOLD)) {
  254. fastCands.push_back(std::make_pair(ss, *i));
  255. }
  256. }
  257. }
  258. if(!fastCands.empty()) {
  259. std::sort(fastCands.begin(), fastCands.end(), ServerStatFaster());
  260. SharedHandle<Request> fastestRequest(new Request());
  261. const std::string& uri = fastCands.front().second;
  262. A2_LOG_DEBUG(fmt("Selected %s from fastCands", uri.c_str()));
  263. fastestRequest->setUri(uri);
  264. fastestRequest->setReferer(base->getReferer());
  265. uris_.erase(std::find(uris_.begin(), uris_.end(), uri));
  266. spentUris_.push_back(uri);
  267. inFlightRequests_.push_back(fastestRequest);
  268. lastFasterReplace_ = global::wallclock;
  269. return fastestRequest;
  270. }
  271. A2_LOG_DEBUG("No faster server found.");
  272. return SharedHandle<Request>();
  273. }
  274. namespace {
  275. class RequestFaster {
  276. public:
  277. bool operator()(const SharedHandle<Request>& lhs,
  278. const SharedHandle<Request>& rhs) const
  279. {
  280. if(!lhs->getPeerStat()) {
  281. return false;
  282. }
  283. if(!rhs->getPeerStat()) {
  284. return true;
  285. }
  286. return
  287. lhs->getPeerStat()->getAvgDownloadSpeed() > rhs->getPeerStat()->getAvgDownloadSpeed();
  288. }
  289. };
  290. } // namespace
  291. void FileEntry::storePool(const SharedHandle<Request>& request)
  292. {
  293. const SharedHandle<PeerStat>& peerStat = request->getPeerStat();
  294. if(peerStat) {
  295. // We need to calculate average download speed here in order to
  296. // store Request in the right position in the pool.
  297. peerStat->calculateAvgDownloadSpeed();
  298. }
  299. std::deque<SharedHandle<Request> >::iterator i =
  300. std::lower_bound(requestPool_.begin(), requestPool_.end(), request,
  301. RequestFaster());
  302. requestPool_.insert(i, request);
  303. }
  304. void FileEntry::poolRequest(const SharedHandle<Request>& request)
  305. {
  306. removeRequest(request);
  307. if(!request->removalRequested()) {
  308. storePool(request);
  309. }
  310. }
  311. bool FileEntry::removeRequest(const SharedHandle<Request>& request)
  312. {
  313. for(std::deque<SharedHandle<Request> >::iterator i =
  314. inFlightRequests_.begin(), eoi = inFlightRequests_.end();
  315. i != eoi; ++i) {
  316. if((*i).get() == request.get()) {
  317. inFlightRequests_.erase(i);
  318. return true;
  319. }
  320. }
  321. return false;
  322. }
  323. void FileEntry::removeURIWhoseHostnameIs(const std::string& hostname)
  324. {
  325. std::deque<std::string> newURIs;
  326. for(std::deque<std::string>::const_iterator itr = uris_.begin(),
  327. eoi = uris_.end(); itr != eoi; ++itr) {
  328. uri::UriStruct us;
  329. if(!uri::parse(us, *itr)) {
  330. continue;
  331. }
  332. if(us.host != hostname) {
  333. newURIs.push_back(*itr);
  334. }
  335. }
  336. A2_LOG_DEBUG(fmt("Removed %lu duplicate hostname URIs for path=%s",
  337. static_cast<unsigned long>(uris_.size()-newURIs.size()),
  338. utf8ToNative(getPath()).c_str()));
  339. uris_.swap(newURIs);
  340. }
  341. void FileEntry::removeIdenticalURI(const std::string& uri)
  342. {
  343. uris_.erase(std::remove(uris_.begin(), uris_.end(), uri), uris_.end());
  344. }
  345. void FileEntry::addURIResult(std::string uri, error_code::Value result)
  346. {
  347. uriResults_.push_back(URIResult(uri, result));
  348. }
  349. namespace {
  350. class FindURIResultByResult {
  351. private:
  352. error_code::Value r_;
  353. public:
  354. FindURIResultByResult(error_code::Value r):r_(r) {}
  355. bool operator()(const URIResult& uriResult) const
  356. {
  357. return uriResult.getResult() == r_;
  358. }
  359. };
  360. } // namespace
  361. void FileEntry::extractURIResult
  362. (std::deque<URIResult>& res, error_code::Value r)
  363. {
  364. std::deque<URIResult>::iterator i =
  365. std::stable_partition(uriResults_.begin(), uriResults_.end(),
  366. FindURIResultByResult(r));
  367. std::copy(uriResults_.begin(), i, std::back_inserter(res));
  368. uriResults_.erase(uriResults_.begin(), i);
  369. }
  370. void FileEntry::reuseUri(const std::vector<std::string>& ignore)
  371. {
  372. if(A2_LOG_DEBUG_ENABLED) {
  373. for(std::vector<std::string>::const_iterator i = ignore.begin(),
  374. eoi = ignore.end(); i != eoi; ++i) {
  375. A2_LOG_DEBUG(fmt("ignore host=%s", (*i).c_str()));
  376. }
  377. }
  378. std::deque<std::string> uris = spentUris_;
  379. std::sort(uris.begin(), uris.end());
  380. uris.erase(std::unique(uris.begin(), uris.end()), uris.end());
  381. std::vector<std::string> errorUris(uriResults_.size());
  382. std::transform(uriResults_.begin(), uriResults_.end(),
  383. errorUris.begin(), std::mem_fun_ref(&URIResult::getURI));
  384. std::sort(errorUris.begin(), errorUris.end());
  385. errorUris.erase(std::unique(errorUris.begin(), errorUris.end()),
  386. errorUris.end());
  387. if(A2_LOG_DEBUG_ENABLED) {
  388. for(std::vector<std::string>::const_iterator i = errorUris.begin(),
  389. eoi = errorUris.end(); i != eoi; ++i) {
  390. A2_LOG_DEBUG(fmt("error URI=%s", (*i).c_str()));
  391. }
  392. }
  393. std::vector<std::string> reusableURIs;
  394. std::set_difference(uris.begin(), uris.end(),
  395. errorUris.begin(), errorUris.end(),
  396. std::back_inserter(reusableURIs));
  397. std::vector<std::string>::iterator insertionPoint = reusableURIs.begin();
  398. for(std::vector<std::string>::iterator i = reusableURIs.begin(),
  399. eoi = reusableURIs.end(); i != eoi; ++i) {
  400. uri::UriStruct us;
  401. if(uri::parse(us, *i) &&
  402. std::find(ignore.begin(), ignore.end(), us.host) == ignore.end()) {
  403. if(i != insertionPoint) {
  404. *insertionPoint = *i;
  405. }
  406. ++insertionPoint;
  407. }
  408. }
  409. reusableURIs.erase(insertionPoint, reusableURIs.end());
  410. size_t ininum = reusableURIs.size();
  411. if(A2_LOG_DEBUG_ENABLED) {
  412. A2_LOG_DEBUG(fmt("Found %u reusable URIs",
  413. static_cast<unsigned int>(ininum)));
  414. for(std::vector<std::string>::const_iterator i = reusableURIs.begin(),
  415. eoi = reusableURIs.end(); i != eoi; ++i) {
  416. A2_LOG_DEBUG(fmt("URI=%s", (*i).c_str()));
  417. }
  418. }
  419. uris_.insert(uris_.end(), reusableURIs.begin(), reusableURIs.end());
  420. }
  421. void FileEntry::releaseRuntimeResource()
  422. {
  423. requestPool_.clear();
  424. inFlightRequests_.clear();
  425. }
  426. namespace {
  427. template<typename InputIterator, typename T>
  428. InputIterator findRequestByUri
  429. (InputIterator first, InputIterator last, const T& uri)
  430. {
  431. for(; first != last; ++first) {
  432. if(!(*first)->removalRequested() && (*first)->getUri() == uri) {
  433. return first;
  434. }
  435. }
  436. return last;
  437. }
  438. } // namespace
  439. bool FileEntry::removeUri(const std::string& uri)
  440. {
  441. std::deque<std::string>::iterator itr =
  442. std::find(spentUris_.begin(), spentUris_.end(), uri);
  443. if(itr == spentUris_.end()) {
  444. itr = std::find(uris_.begin(), uris_.end(), uri);
  445. if(itr == uris_.end()) {
  446. return false;
  447. } else {
  448. uris_.erase(itr);
  449. return true;
  450. }
  451. } else {
  452. spentUris_.erase(itr);
  453. SharedHandle<Request> req;
  454. std::deque<SharedHandle<Request> >::iterator riter =
  455. findRequestByUri(inFlightRequests_.begin(), inFlightRequests_.end(), uri);
  456. if(riter == inFlightRequests_.end()) {
  457. riter = findRequestByUri(requestPool_.begin(), requestPool_.end(), uri);
  458. if(riter == requestPool_.end()) {
  459. return true;
  460. } else {
  461. req = *riter;
  462. requestPool_.erase(riter);
  463. }
  464. } else {
  465. req = *riter;
  466. }
  467. req->requestRemoval();
  468. return true;
  469. }
  470. }
  471. std::string FileEntry::getBasename() const
  472. {
  473. return File(path_).getBasename();
  474. }
  475. std::string FileEntry::getDirname() const
  476. {
  477. return File(path_).getDirname();
  478. }
  479. size_t FileEntry::setUris(const std::vector<std::string>& uris)
  480. {
  481. uris_.clear();
  482. return addUris(uris.begin(), uris.end());
  483. }
  484. bool FileEntry::addUri(const std::string& uri)
  485. {
  486. uri::UriStruct us;
  487. if(uri::parse(us, uri)) {
  488. uris_.push_back(uri);
  489. return true;
  490. } else {
  491. return false;
  492. }
  493. }
  494. bool FileEntry::insertUri(const std::string& uri, size_t pos)
  495. {
  496. uri::UriStruct us;
  497. if(uri::parse(us, uri)) {
  498. pos = std::min(pos, uris_.size());
  499. uris_.insert(uris_.begin()+pos, uri);
  500. return true;
  501. } else {
  502. return false;
  503. }
  504. }
  505. void FileEntry::setPath(const std::string& path)
  506. {
  507. path_ = path;
  508. }
  509. void FileEntry::setContentType(const std::string& contentType)
  510. {
  511. contentType_ = contentType;
  512. }
  513. size_t FileEntry::countInFlightRequest() const
  514. {
  515. return inFlightRequests_.size();
  516. }
  517. size_t FileEntry::countPooledRequest() const
  518. {
  519. return requestPool_.size();
  520. }
  521. void FileEntry::setOriginalName(const std::string& originalName)
  522. {
  523. originalName_ = originalName;
  524. }
  525. bool FileEntry::emptyRequestUri() const
  526. {
  527. return uris_.empty() && inFlightRequests_.empty() && requestPool_.empty();
  528. }
  529. } // namespace aria2