BencodeParser.cc 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. /* <!-- copyright */
  2. /*
  3. * aria2 - The high speed download utility
  4. *
  5. * Copyright (C) 2012 Tatsuhiro Tsujikawa
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. * In addition, as a special exception, the copyright holders give
  22. * permission to link the code of portions of this program with the
  23. * OpenSSL library under certain conditions as described in each
  24. * individual source file, and distribute linked combinations
  25. * including the two.
  26. * You must obey the GNU General Public License in all respects
  27. * for all of the code used other than OpenSSL. If you modify
  28. * file(s) with this exception, you may extend this exception to your
  29. * version of the file(s), but you are not obligated to do so. If you
  30. * do not wish to do so, delete this exception statement from your
  31. * version. If you delete this exception statement from all source
  32. * files in the program, then also delete it here.
  33. */
  34. /* copyright --> */
  35. #include "BencodeParser.h"
  36. #include <cassert>
  37. #include "StructParserStateMachine.h"
  38. #include "util.h"
  39. namespace aria2 {
  40. namespace bittorrent {
  41. namespace {
  42. enum {
  43. BENCODE_FINISH,
  44. BENCODE_ERROR,
  45. BENCODE_INITIAL,
  46. BENCODE_VALUE,
  47. BENCODE_DICT_KEY,
  48. BENCODE_DICT_VAL,
  49. BENCODE_LIST,
  50. BENCODE_STRING_LEN,
  51. BENCODE_STRING,
  52. BENCODE_NUMBER_SIGN,
  53. BENCODE_NUMBER,
  54. BENCODE_FLOAT_NUMBER_IGNORE,
  55. };
  56. } // namespace
  57. BencodeParser::BencodeParser(StructParserStateMachine* psm)
  58. : psm_(psm),
  59. currentState_(BENCODE_INITIAL),
  60. strLength_(0),
  61. numberSign_(1),
  62. number_(0),
  63. numConsumed_(0),
  64. lastError_(0)
  65. {
  66. stateStack_.push(BENCODE_FINISH);
  67. }
  68. BencodeParser::~BencodeParser() {}
  69. ssize_t BencodeParser::parseUpdate(const char* data, size_t size)
  70. {
  71. size_t i;
  72. if (currentState_ == BENCODE_FINISH) {
  73. return 0;
  74. }
  75. else if (currentState_ == BENCODE_ERROR) {
  76. return lastError_;
  77. }
  78. for (i = 0; i < size && currentState_ != BENCODE_FINISH; ++i) {
  79. char c = data[i];
  80. switch (currentState_) {
  81. case BENCODE_LIST:
  82. if (c == 'e') {
  83. onListEnd();
  84. break;
  85. }
  86. else {
  87. int rv = pushState(currentState_);
  88. if (rv < 0) {
  89. return rv;
  90. }
  91. currentState_ = BENCODE_VALUE;
  92. runBeginCallback(STRUCT_ARRAY_DATA_T);
  93. }
  94. // Fall through
  95. case BENCODE_INITIAL:
  96. case BENCODE_VALUE:
  97. switch (c) {
  98. case 'd': {
  99. currentState_ = BENCODE_DICT_KEY;
  100. runBeginCallback(STRUCT_DICT_T);
  101. break;
  102. }
  103. case 'l':
  104. currentState_ = BENCODE_LIST;
  105. runBeginCallback(STRUCT_ARRAY_T);
  106. break;
  107. case 'i':
  108. number_ = 0;
  109. numberSign_ = 1;
  110. numConsumed_ = 0;
  111. currentState_ = BENCODE_NUMBER_SIGN;
  112. runBeginCallback(STRUCT_NUMBER_T);
  113. break;
  114. default:
  115. if (util::isDigit(c)) {
  116. strLength_ = c - '0';
  117. numConsumed_ = 1;
  118. currentState_ = BENCODE_STRING_LEN;
  119. runBeginCallback(STRUCT_STRING_T);
  120. break;
  121. }
  122. else {
  123. currentState_ = BENCODE_ERROR;
  124. return lastError_ = ERR_UNEXPECTED_CHAR_BEFORE_VAL;
  125. }
  126. }
  127. break;
  128. case BENCODE_DICT_KEY: {
  129. if (c == 'e') {
  130. onDictEnd();
  131. break;
  132. }
  133. int rv = pushState(currentState_);
  134. if (rv < 0) {
  135. return rv;
  136. }
  137. strLength_ = 0;
  138. numConsumed_ = 0;
  139. runBeginCallback(STRUCT_DICT_KEY_T);
  140. currentState_ = BENCODE_STRING_LEN;
  141. // Fall through
  142. }
  143. case BENCODE_STRING_LEN: {
  144. size_t j;
  145. for (j = i; j < size && in(data[j], '0', '9'); ++j) {
  146. if ((INT64_MAX - (data[j] - '0')) / 10 < strLength_) {
  147. currentState_ = BENCODE_ERROR;
  148. return lastError_ = ERR_STRING_LENGTH_OUT_OF_RANGE;
  149. }
  150. strLength_ *= 10;
  151. strLength_ += data[j] - '0';
  152. }
  153. numConsumed_ += j - i;
  154. if (j != size) {
  155. if (data[j] != ':' || numConsumed_ == 0) {
  156. currentState_ = BENCODE_ERROR;
  157. return lastError_ = ERR_INVALID_STRING_LENGTH;
  158. }
  159. i = j;
  160. currentState_ = BENCODE_STRING;
  161. if (strLength_ == 0) {
  162. runCharactersCallback(nullptr, 0);
  163. onStringEnd();
  164. }
  165. }
  166. else {
  167. i = j - 1;
  168. }
  169. break;
  170. }
  171. case BENCODE_STRING: {
  172. size_t nread = std::min(static_cast<int64_t>(size - i), strLength_);
  173. runCharactersCallback(&data[i], nread);
  174. strLength_ -= nread;
  175. i += nread - 1;
  176. if (strLength_ == 0) {
  177. onStringEnd();
  178. }
  179. break;
  180. }
  181. case BENCODE_NUMBER_SIGN: {
  182. switch (c) {
  183. case '+':
  184. numberSign_ = 1;
  185. currentState_ = BENCODE_NUMBER;
  186. break;
  187. case '-':
  188. numberSign_ = -1;
  189. currentState_ = BENCODE_NUMBER;
  190. break;
  191. default:
  192. if (util::isDigit(c)) {
  193. number_ = c - '0';
  194. numConsumed_ = 1;
  195. currentState_ = BENCODE_NUMBER;
  196. }
  197. }
  198. break;
  199. }
  200. case BENCODE_NUMBER: {
  201. size_t j;
  202. for (j = i; j < size && in(data[j], '0', '9'); ++j) {
  203. if ((INT64_MAX - (data[j] - '0')) / 10 < number_) {
  204. currentState_ = BENCODE_ERROR;
  205. return lastError_ = ERR_NUMBER_OUT_OF_RANGE;
  206. }
  207. number_ *= 10;
  208. number_ += data[j] - '0';
  209. }
  210. numConsumed_ += j - i;
  211. if (j != size) {
  212. if (numConsumed_ == 0) {
  213. currentState_ = BENCODE_ERROR;
  214. return lastError_ = ERR_INVALID_NUMBER;
  215. }
  216. auto c = data[j];
  217. if (util::isDigit(c) || c == '.' || c == 'E' || c == '+' || c == '-') {
  218. // some torrent generator adds floating point number in
  219. // scientific notation (e.g., -1.134E+3) in integer field.
  220. // In this case, just skip these bytes until we find 'e'.
  221. number_ = 0;
  222. numConsumed_ = 0;
  223. currentState_ = BENCODE_FLOAT_NUMBER_IGNORE;
  224. i = j;
  225. break;
  226. }
  227. if (c != 'e') {
  228. currentState_ = BENCODE_ERROR;
  229. return lastError_ = ERR_INVALID_NUMBER;
  230. }
  231. i = j;
  232. onNumberEnd();
  233. }
  234. else {
  235. i = j - 1;
  236. }
  237. break;
  238. }
  239. case BENCODE_FLOAT_NUMBER_IGNORE: {
  240. auto c = data[i];
  241. if (util::isDigit(c) || c == '.' || c == 'E' || c == '+' || c == '-') {
  242. continue;
  243. }
  244. if (c != 'e') {
  245. currentState_ = BENCODE_ERROR;
  246. return lastError_ = ERR_INVALID_FLOAT_NUMBER;
  247. }
  248. onNumberEnd();
  249. break;
  250. }
  251. }
  252. }
  253. return i;
  254. }
  255. ssize_t BencodeParser::parseFinal(const char* data, size_t len)
  256. {
  257. ssize_t rv;
  258. rv = parseUpdate(data, len);
  259. if (rv >= 0) {
  260. if (currentState_ != BENCODE_FINISH && currentState_ != BENCODE_INITIAL) {
  261. rv = ERR_PREMATURE_DATA;
  262. }
  263. }
  264. return rv;
  265. }
  266. void BencodeParser::reset()
  267. {
  268. psm_->reset();
  269. currentState_ = BENCODE_INITIAL;
  270. lastError_ = 0;
  271. while (!stateStack_.empty()) {
  272. stateStack_.pop();
  273. }
  274. stateStack_.push(BENCODE_FINISH);
  275. }
  276. void BencodeParser::onStringEnd()
  277. {
  278. runEndCallback(stateTop() == BENCODE_DICT_KEY ? STRUCT_DICT_KEY_T
  279. : STRUCT_STRING_T);
  280. onValueEnd();
  281. }
  282. void BencodeParser::onNumberEnd()
  283. {
  284. runNumberCallback(numberSign_ * number_);
  285. runEndCallback(STRUCT_NUMBER_T);
  286. onValueEnd();
  287. }
  288. void BencodeParser::onDictEnd()
  289. {
  290. runEndCallback(STRUCT_DICT_T);
  291. onValueEnd();
  292. }
  293. void BencodeParser::onListEnd()
  294. {
  295. runEndCallback(STRUCT_ARRAY_T);
  296. onValueEnd();
  297. }
  298. void BencodeParser::onValueEnd()
  299. {
  300. switch (stateTop()) {
  301. case BENCODE_DICT_KEY:
  302. popState();
  303. pushState(BENCODE_DICT_VAL);
  304. currentState_ = BENCODE_VALUE;
  305. runBeginCallback(STRUCT_DICT_DATA_T);
  306. break;
  307. case BENCODE_DICT_VAL:
  308. runEndCallback(STRUCT_DICT_DATA_T);
  309. popState();
  310. currentState_ = BENCODE_DICT_KEY;
  311. break;
  312. case BENCODE_LIST:
  313. runEndCallback(STRUCT_ARRAY_DATA_T);
  314. popState();
  315. currentState_ = BENCODE_LIST;
  316. break;
  317. default:
  318. assert(stateTop() == BENCODE_FINISH);
  319. currentState_ = stateTop();
  320. break;
  321. }
  322. }
  323. int BencodeParser::pushState(int state)
  324. {
  325. if (stateStack_.size() >= 50) {
  326. return ERR_STRUCTURE_TOO_DEEP;
  327. }
  328. else {
  329. stateStack_.push(state);
  330. return 0;
  331. }
  332. }
  333. int BencodeParser::stateTop() const { return stateStack_.top(); }
  334. int BencodeParser::popState()
  335. {
  336. int state = stateStack_.top();
  337. stateStack_.pop();
  338. return state;
  339. }
  340. void BencodeParser::runBeginCallback(int elementType)
  341. {
  342. // switch(elementType) {
  343. // case STRUCT_DICT_T:
  344. // std::cout << "object start" << std::endl;
  345. // break;
  346. // case STRUCT_DICT_KEY_T:
  347. // std::cout << "object key start" << std::endl;
  348. // break;
  349. // case STRUCT_DICT_DATA_T:
  350. // std::cout << "object data start" << std::endl;
  351. // break;
  352. // case STRUCT_ARRAY_T:
  353. // std::cout << "array start" << std::endl;
  354. // break;
  355. // case STRUCT_ARRAY_DATA_T:
  356. // std::cout << "array data start" << std::endl;
  357. // break;
  358. // case STRUCT_STRING_T:
  359. // std::cout << "string start" << std::endl;
  360. // break;
  361. // case STRUCT_NUMBER_T:
  362. // std::cout << "number start" << std::endl;
  363. // break;
  364. // case STRUCT_BOOL_T:
  365. // std::cout << "bool start" << std::endl;
  366. // break;
  367. // case STRUCT_NULL_T:
  368. // std::cout << "null start" << std::endl;
  369. // break;
  370. // default:
  371. // break;
  372. // };
  373. psm_->beginElement(elementType);
  374. }
  375. void BencodeParser::runEndCallback(int elementType)
  376. {
  377. // switch(elementType) {
  378. // case STRUCT_DICT_T:
  379. // std::cout << "object end" << std::endl;
  380. // break;
  381. // case STRUCT_DICT_KEY_T:
  382. // std::cout << "object key end" << std::endl;
  383. // break;
  384. // case STRUCT_DICT_DATA_T:
  385. // std::cout << "object data end" << std::endl;
  386. // break;
  387. // case STRUCT_ARRAY_T:
  388. // std::cout << "array end" << std::endl;
  389. // break;
  390. // case STRUCT_ARRAY_DATA_T:
  391. // std::cout << "array data end" << std::endl;
  392. // break;
  393. // case STRUCT_STRING_T:
  394. // std::cout << "string end" << std::endl;
  395. // break;
  396. // case STRUCT_NUMBER_T:
  397. // std::cout << "number end" << std::endl;
  398. // break;
  399. // case STRUCT_BOOL_T:
  400. // std::cout << "bool end" << std::endl;
  401. // break;
  402. // case STRUCT_NULL_T:
  403. // std::cout << "null end" << std::endl;
  404. // break;
  405. // default:
  406. // break;
  407. // };
  408. psm_->endElement(elementType);
  409. }
  410. void BencodeParser::runCharactersCallback(const char* data, size_t len)
  411. {
  412. psm_->charactersCallback(data, len);
  413. }
  414. void BencodeParser::runNumberCallback(int64_t number)
  415. {
  416. psm_->numberCallback(number, 0, 0);
  417. }
  418. } // namespace bittorrent
  419. } // namespace aria2