CookieStorage.cc 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. /* <!-- copyright */
  2. /*
  3. * aria2 - The high speed download utility
  4. *
  5. * Copyright (C) 2013 Tatsuhiro Tsujikawa
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. * In addition, as a special exception, the copyright holders give
  22. * permission to link the code of portions of this program with the
  23. * OpenSSL library under certain conditions as described in each
  24. * individual source file, and distribute linked combinations
  25. * including the two.
  26. * You must obey the GNU General Public License in all respects
  27. * for all of the code used other than OpenSSL. If you modify
  28. * file(s) with this exception, you may extend this exception to your
  29. * version of the file(s), but you are not obligated to do so. If you
  30. * do not wish to do so, delete this exception statement from your
  31. * version. If you delete this exception statement from all source
  32. * files in the program, then also delete it here.
  33. */
  34. /* copyright --> */
  35. #include "CookieStorage.h"
  36. #include <cstring>
  37. #include <cstdio>
  38. #include <algorithm>
  39. #include "util.h"
  40. #include "LogFactory.h"
  41. #include "Logger.h"
  42. #include "DlAbortEx.h"
  43. #include "fmt.h"
  44. #include "NsCookieParser.h"
  45. #include "File.h"
  46. #include "a2functional.h"
  47. #include "A2STR.h"
  48. #include "message.h"
  49. #include "cookie_helper.h"
  50. #include "BufferedFile.h"
  51. #ifdef HAVE_SQLITE3
  52. #include "Sqlite3CookieParserImpl.h"
  53. #endif // HAVE_SQLITE3
  54. namespace aria2 {
  55. DomainNode::DomainNode(std::string label, DomainNode* parent)
  56. : label_{std::move(label)},
  57. parent_{parent},
  58. lastAccessTime_{0},
  59. lruAccessTime_{0},
  60. inLru_{false}
  61. {
  62. }
  63. void DomainNode::findCookie(std::vector<const Cookie*>& out,
  64. const std::string& requestHost,
  65. const std::string& requestPath, time_t now,
  66. bool secure)
  67. {
  68. if (cookies_) {
  69. for (auto& c : *cookies_) {
  70. if (c->match(requestHost, requestPath, now, secure)) {
  71. c->setLastAccessTime(now);
  72. out.push_back(c.get());
  73. }
  74. }
  75. }
  76. }
  77. bool DomainNode::addCookie(std::unique_ptr<Cookie> cookie, time_t now)
  78. {
  79. using namespace std::placeholders;
  80. setLastAccessTime(now);
  81. if (!cookies_) {
  82. if (cookie->isExpired(now)) {
  83. return false;
  84. }
  85. else {
  86. cookies_ = make_unique<std::deque<std::unique_ptr<Cookie>>>();
  87. cookies_->push_back(std::move(cookie));
  88. return true;
  89. }
  90. }
  91. auto i = std::find_if(
  92. std::begin(*cookies_), std::end(*cookies_),
  93. [&](const std::unique_ptr<Cookie>& c) { return *c == *cookie; });
  94. if (i == std::end(*cookies_)) {
  95. if (cookie->isExpired(now)) {
  96. return false;
  97. }
  98. else {
  99. if (cookies_->size() >= CookieStorage::MAX_COOKIE_PER_DOMAIN) {
  100. cookies_->erase(std::remove_if(std::begin(*cookies_),
  101. std::end(*cookies_),
  102. std::bind(&Cookie::isExpired, _1, now)),
  103. std::end(*cookies_));
  104. if (cookies_->size() >= CookieStorage::MAX_COOKIE_PER_DOMAIN) {
  105. auto m = std::min_element(std::begin(*cookies_), std::end(*cookies_),
  106. [](const std::unique_ptr<Cookie>& lhs,
  107. const std::unique_ptr<Cookie>& rhs) {
  108. return lhs->getLastAccessTime() <
  109. rhs->getLastAccessTime();
  110. });
  111. *m = std::move(cookie);
  112. }
  113. else {
  114. cookies_->push_back(std::move(cookie));
  115. }
  116. }
  117. else {
  118. cookies_->push_back(std::move(cookie));
  119. }
  120. return true;
  121. }
  122. }
  123. else if (cookie->isExpired(now)) {
  124. cookies_->erase(i);
  125. return false;
  126. }
  127. else {
  128. cookie->setCreationTime((*i)->getCreationTime());
  129. *i = std::move(cookie);
  130. return true;
  131. }
  132. }
  133. bool DomainNode::contains(const Cookie& cookie) const
  134. {
  135. if (cookies_) {
  136. for (auto& i : *cookies_) {
  137. if (*i == cookie) {
  138. return true;
  139. }
  140. }
  141. }
  142. return false;
  143. }
  144. bool DomainNode::writeCookie(BufferedFile& fp) const
  145. {
  146. if (cookies_) {
  147. for (const auto& c : *cookies_) {
  148. std::string data = c->toNsCookieFormat();
  149. data += "\n";
  150. if (fp.write(data.data(), data.size()) != data.size()) {
  151. return false;
  152. }
  153. }
  154. }
  155. return true;
  156. }
  157. size_t DomainNode::countCookie() const
  158. {
  159. if (cookies_) {
  160. return cookies_->size();
  161. }
  162. else {
  163. return 0;
  164. }
  165. }
  166. void DomainNode::clearCookie() { cookies_->clear(); }
  167. void DomainNode::setLastAccessTime(time_t lastAccessTime)
  168. {
  169. lastAccessTime_ = lastAccessTime;
  170. }
  171. time_t DomainNode::getLastAccessTime() const { return lastAccessTime_; }
  172. void DomainNode::setLruAccessTime(time_t t) { lruAccessTime_ = t; }
  173. time_t DomainNode::getLruAccessTime() const { return lruAccessTime_; }
  174. bool DomainNode::empty() const { return !cookies_ || cookies_->empty(); }
  175. bool DomainNode::hasNext() const { return !next_.empty(); }
  176. DomainNode* DomainNode::getParent() const { return parent_; }
  177. void DomainNode::removeNode(DomainNode* node) { next_.erase(node->getLabel()); }
  178. DomainNode* DomainNode::findNext(const std::string& label) const
  179. {
  180. auto i = next_.find(label);
  181. if (i == std::end(next_)) {
  182. return nullptr;
  183. }
  184. else {
  185. return (*i).second.get();
  186. }
  187. }
  188. DomainNode* DomainNode::addNext(std::string label,
  189. std::unique_ptr<DomainNode> node)
  190. {
  191. auto& res = next_[std::move(label)] = std::move(node);
  192. return res.get();
  193. }
  194. const std::string& DomainNode::getLabel() const { return label_; }
  195. bool DomainNode::getInLru() const { return inLru_; }
  196. void DomainNode::setInLru(bool f) { inLru_ = f; }
  197. CookieStorage::CookieStorage() : rootNode_{make_unique<DomainNode>("", nullptr)}
  198. {
  199. }
  200. namespace {
  201. // See CookieStorageTest::testDomainIsFull() in CookieStorageTest.cc
  202. const size_t DOMAIN_EVICTION_TRIGGER = 2000;
  203. const double DOMAIN_EVICTION_RATE = 0.1;
  204. } // namespace
  205. namespace {
  206. std::vector<std::string> splitDomainLabel(const std::string& domain)
  207. {
  208. auto labels = std::vector<std::string>{};
  209. if (util::isNumericHost(domain)) {
  210. labels.push_back(domain);
  211. }
  212. else {
  213. util::split(std::begin(domain), std::end(domain),
  214. std::back_inserter(labels), '.');
  215. }
  216. return labels;
  217. }
  218. } // namespace
  219. size_t CookieStorage::getLruTrackerSize() const { return lruTracker_.size(); }
  220. void CookieStorage::evictNode(size_t delnum)
  221. {
  222. for (; delnum > 0 && !lruTracker_.empty(); --delnum) {
  223. auto node = (*lruTracker_.begin()).second;
  224. lruTracker_.erase(lruTracker_.begin());
  225. node->setInLru(false);
  226. node->clearCookie();
  227. while (node->empty() && !node->hasNext()) {
  228. auto parent = node->getParent();
  229. parent->removeNode(node);
  230. if (!parent->empty() || parent->hasNext() || parent == rootNode_.get()) {
  231. break;
  232. }
  233. node = parent;
  234. if (node->getInLru()) {
  235. lruTracker_.erase({node->getLruAccessTime(), node});
  236. node->setInLru(false);
  237. }
  238. }
  239. }
  240. }
  241. const DomainNode* CookieStorage::getRootNode() const { return rootNode_.get(); }
  242. bool CookieStorage::store(std::unique_ptr<Cookie> cookie, time_t now)
  243. {
  244. if (lruTracker_.size() >= DOMAIN_EVICTION_TRIGGER) {
  245. auto delnum = size_t(lruTracker_.size() * DOMAIN_EVICTION_RATE);
  246. evictNode(delnum);
  247. }
  248. auto labels = splitDomainLabel(cookie->getDomain());
  249. auto node = rootNode_.get();
  250. for (auto i = labels.rbegin(), eoi = labels.rend(); i != eoi; ++i) {
  251. auto nextNode = node->findNext(*i);
  252. if (nextNode) {
  253. node = nextNode;
  254. }
  255. else {
  256. node = node->addNext(*i, make_unique<DomainNode>(*i, node));
  257. }
  258. }
  259. bool ok = node->addCookie(std::move(cookie), now);
  260. if (ok) {
  261. updateLru(node, now);
  262. }
  263. return ok;
  264. }
  265. void CookieStorage::updateLru(DomainNode* node, time_t now)
  266. {
  267. if (node->getInLru()) {
  268. lruTracker_.erase({node->getLruAccessTime(), node});
  269. }
  270. else {
  271. node->setInLru(true);
  272. }
  273. node->setLruAccessTime(now);
  274. lruTracker_.insert({node->getLruAccessTime(), node});
  275. }
  276. bool CookieStorage::parseAndStore(const std::string& setCookieString,
  277. const std::string& requestHost,
  278. const std::string& defaultPath, time_t now)
  279. {
  280. auto cookie = cookie::parse(setCookieString, requestHost, defaultPath, now);
  281. return cookie && store(std::move(cookie), now);
  282. }
  283. namespace {
  284. struct CookiePathDivider {
  285. const Cookie* cookie_;
  286. int pathDepth_;
  287. CookiePathDivider(const Cookie* cookie) : cookie_(cookie), pathDepth_(0)
  288. {
  289. const std::string& path = cookie_->getPath();
  290. if (!path.empty()) {
  291. for (size_t i = 1, len = path.size(); i < len; ++i) {
  292. if (path[i] == '/' && path[i - 1] != '/') {
  293. ++pathDepth_;
  294. }
  295. }
  296. if (path[path.size() - 1] != '/') {
  297. ++pathDepth_;
  298. }
  299. }
  300. }
  301. };
  302. } // namespace
  303. namespace {
  304. class CookiePathDividerConverter {
  305. public:
  306. CookiePathDivider operator()(const Cookie* cookie) const
  307. {
  308. return CookiePathDivider(cookie);
  309. }
  310. const Cookie* operator()(const CookiePathDivider& cookiePathDivider) const
  311. {
  312. return cookiePathDivider.cookie_;
  313. }
  314. };
  315. } // namespace
  316. namespace {
  317. class OrderByPathDepthDesc : public std::binary_function<Cookie, Cookie, bool> {
  318. public:
  319. bool operator()(const CookiePathDivider& lhs,
  320. const CookiePathDivider& rhs) const
  321. {
  322. // From http://tools.ietf.org/html/rfc6265#section-5.4:
  323. // 2. The user agent SHOULD sort the cookie-list in the following
  324. // order:
  325. //
  326. // * Cookies with longer paths are listed before cookies with
  327. // shorter paths.
  328. //
  329. // * Among cookies that have equal-length path fields, cookies with
  330. // earlier creation-times are listed before cookies with later
  331. // creation-times.
  332. return lhs.pathDepth_ > rhs.pathDepth_ ||
  333. (!(rhs.pathDepth_ > lhs.pathDepth_) &&
  334. lhs.cookie_->getCreationTime() < rhs.cookie_->getCreationTime());
  335. }
  336. };
  337. } // namespace
  338. namespace {
  339. DomainNode* findNode(const std::string& domain, DomainNode* node)
  340. {
  341. auto labels = splitDomainLabel(domain);
  342. for (auto i = labels.rbegin(), eoi = labels.rend(); i != eoi && node; ++i) {
  343. node = node->findNext(*i);
  344. }
  345. return node;
  346. }
  347. } // namespace
  348. bool CookieStorage::contains(const Cookie& cookie) const
  349. {
  350. auto node = findNode(cookie.getDomain(), rootNode_.get());
  351. return node && node->contains(cookie);
  352. }
  353. std::vector<const Cookie*>
  354. CookieStorage::criteriaFind(const std::string& requestHost,
  355. const std::string& requestPath, time_t now,
  356. bool secure)
  357. {
  358. auto res = std::vector<const Cookie*>{};
  359. if (requestPath.empty()) {
  360. return res;
  361. }
  362. auto labels = splitDomainLabel(requestHost);
  363. auto node = rootNode_.get();
  364. for (auto i = labels.rbegin(), eoi = labels.rend(); i != eoi; ++i) {
  365. auto nextNode = node->findNext(*i);
  366. if (!nextNode) {
  367. break;
  368. }
  369. nextNode->setLastAccessTime(now);
  370. if (nextNode->getInLru()) {
  371. updateLru(nextNode, now);
  372. }
  373. nextNode->findCookie(res, requestHost, requestPath, now, secure);
  374. node = nextNode;
  375. }
  376. auto divs = std::vector<CookiePathDivider>{};
  377. std::transform(std::begin(res), std::end(res), std::back_inserter(divs),
  378. CookiePathDividerConverter{});
  379. std::sort(std::begin(divs), std::end(divs), OrderByPathDepthDesc{});
  380. std::transform(std::begin(divs), std::end(divs), std::begin(res),
  381. CookiePathDividerConverter{});
  382. return res;
  383. }
  384. size_t CookieStorage::size() const
  385. {
  386. size_t n = 0;
  387. for (auto& p : lruTracker_) {
  388. n += p.second->countCookie();
  389. }
  390. return n;
  391. }
  392. bool CookieStorage::load(const std::string& filename, time_t now)
  393. {
  394. char header[16]; // "SQLite format 3" plus \0
  395. size_t headlen;
  396. {
  397. BufferedFile fp{filename.c_str(), BufferedFile::READ};
  398. if (!fp) {
  399. A2_LOG_ERROR(fmt("Failed to open cookie file %s", filename.c_str()));
  400. return false;
  401. }
  402. headlen = fp.read(header, sizeof(header));
  403. }
  404. try {
  405. if (headlen == 16 && memcmp(header, "SQLite format 3\0", 16) == 0) {
  406. #ifdef HAVE_SQLITE3
  407. try {
  408. auto cookies = Sqlite3MozCookieParser(filename).parse();
  409. storeCookies(std::make_move_iterator(std::begin(cookies)),
  410. std::make_move_iterator(std::end(cookies)), now);
  411. }
  412. catch (RecoverableException& e) {
  413. A2_LOG_INFO_EX(EX_EXCEPTION_CAUGHT, e);
  414. A2_LOG_INFO("This does not look like Firefox3 cookie file."
  415. " Retrying, assuming it is Chromium cookie file.");
  416. // Try chrome cookie format
  417. auto cookies = Sqlite3ChromiumCookieParser(filename).parse();
  418. storeCookies(std::make_move_iterator(std::begin(cookies)),
  419. std::make_move_iterator(std::end(cookies)), now);
  420. }
  421. #else // !HAVE_SQLITE3
  422. throw DL_ABORT_EX(
  423. "Cannot read SQLite3 database because SQLite3 support is disabled by"
  424. " configuration.");
  425. #endif // !HAVE_SQLITE3
  426. }
  427. else {
  428. auto cookies = NsCookieParser().parse(filename, now);
  429. storeCookies(std::make_move_iterator(std::begin(cookies)),
  430. std::make_move_iterator(std::end(cookies)), now);
  431. }
  432. return true;
  433. }
  434. catch (RecoverableException& e) {
  435. A2_LOG_ERROR(fmt("Failed to load cookies from %s", filename.c_str()));
  436. return false;
  437. }
  438. }
  439. bool CookieStorage::saveNsFormat(const std::string& filename)
  440. {
  441. auto tempfilename = filename;
  442. tempfilename += "__temp";
  443. {
  444. BufferedFile fp{tempfilename.c_str(), BufferedFile::WRITE};
  445. if (!fp) {
  446. A2_LOG_ERROR(fmt("Cannot create cookie file %s", filename.c_str()));
  447. return false;
  448. }
  449. for (auto& p : lruTracker_) {
  450. if (!p.second->writeCookie(fp)) {
  451. A2_LOG_ERROR(fmt("Failed to save cookies to %s", filename.c_str()));
  452. return false;
  453. }
  454. }
  455. if (fp.close() == EOF) {
  456. A2_LOG_ERROR(fmt("Failed to save cookies to %s", filename.c_str()));
  457. return false;
  458. }
  459. }
  460. if (File(tempfilename).renameTo(filename)) {
  461. return true;
  462. }
  463. else {
  464. A2_LOG_ERROR(fmt("Could not rename file %s as %s", tempfilename.c_str(),
  465. filename.c_str()));
  466. return false;
  467. }
  468. }
  469. } // namespace aria2