| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451 |
- /* <!-- copyright */
- /*
- * aria2 - The high speed download utility
- *
- * Copyright (C) 2012 Tatsuhiro Tsujikawa
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * In addition, as a special exception, the copyright holders give
- * permission to link the code of portions of this program with the
- * OpenSSL library under certain conditions as described in each
- * individual source file, and distribute linked combinations
- * including the two.
- * You must obey the GNU General Public License in all respects
- * for all of the code used other than OpenSSL. If you modify
- * file(s) with this exception, you may extend this exception to your
- * version of the file(s), but you are not obligated to do so. If you
- * do not wish to do so, delete this exception statement from your
- * version. If you delete this exception statement from all source
- * files in the program, then also delete it here.
- */
- /* copyright --> */
- #include "BencodeParser.h"
- #include <cassert>
- #include "StructParserStateMachine.h"
- #include "util.h"
- namespace aria2 {
- namespace bittorrent {
- namespace {
- enum {
- BENCODE_FINISH,
- BENCODE_ERROR,
- BENCODE_INITIAL,
- BENCODE_VALUE,
- BENCODE_DICT_KEY,
- BENCODE_DICT_VAL,
- BENCODE_LIST,
- BENCODE_STRING_LEN,
- BENCODE_STRING,
- BENCODE_NUMBER_SIGN,
- BENCODE_NUMBER,
- BENCODE_FLOAT_NUMBER_IGNORE,
- };
- } // namespace
- BencodeParser::BencodeParser(StructParserStateMachine* psm)
- : psm_(psm),
- currentState_(BENCODE_INITIAL),
- strLength_(0),
- numberSign_(1),
- number_(0),
- numConsumed_(0),
- lastError_(0)
- {
- stateStack_.push(BENCODE_FINISH);
- }
- BencodeParser::~BencodeParser() {}
- ssize_t BencodeParser::parseUpdate(const char* data, size_t size)
- {
- size_t i;
- if (currentState_ == BENCODE_FINISH) {
- return 0;
- }
- else if (currentState_ == BENCODE_ERROR) {
- return lastError_;
- }
- for (i = 0; i < size && currentState_ != BENCODE_FINISH; ++i) {
- char c = data[i];
- switch (currentState_) {
- case BENCODE_LIST:
- if (c == 'e') {
- onListEnd();
- break;
- }
- else {
- int rv = pushState(currentState_);
- if (rv < 0) {
- return rv;
- }
- currentState_ = BENCODE_VALUE;
- runBeginCallback(STRUCT_ARRAY_DATA_T);
- }
- // Fall through
- case BENCODE_INITIAL:
- case BENCODE_VALUE:
- switch (c) {
- case 'd': {
- currentState_ = BENCODE_DICT_KEY;
- runBeginCallback(STRUCT_DICT_T);
- break;
- }
- case 'l':
- currentState_ = BENCODE_LIST;
- runBeginCallback(STRUCT_ARRAY_T);
- break;
- case 'i':
- number_ = 0;
- numberSign_ = 1;
- numConsumed_ = 0;
- currentState_ = BENCODE_NUMBER_SIGN;
- runBeginCallback(STRUCT_NUMBER_T);
- break;
- default:
- if (util::isDigit(c)) {
- strLength_ = c - '0';
- numConsumed_ = 1;
- currentState_ = BENCODE_STRING_LEN;
- runBeginCallback(STRUCT_STRING_T);
- break;
- }
- else {
- currentState_ = BENCODE_ERROR;
- return lastError_ = ERR_UNEXPECTED_CHAR_BEFORE_VAL;
- }
- }
- break;
- case BENCODE_DICT_KEY: {
- if (c == 'e') {
- onDictEnd();
- break;
- }
- int rv = pushState(currentState_);
- if (rv < 0) {
- return rv;
- }
- strLength_ = 0;
- numConsumed_ = 0;
- runBeginCallback(STRUCT_DICT_KEY_T);
- currentState_ = BENCODE_STRING_LEN;
- // Fall through
- }
- case BENCODE_STRING_LEN: {
- size_t j;
- for (j = i; j < size && in(data[j], '0', '9'); ++j) {
- if ((INT64_MAX - (data[j] - '0')) / 10 < strLength_) {
- currentState_ = BENCODE_ERROR;
- return lastError_ = ERR_STRING_LENGTH_OUT_OF_RANGE;
- }
- strLength_ *= 10;
- strLength_ += data[j] - '0';
- }
- numConsumed_ += j - i;
- if (j != size) {
- if (data[j] != ':' || numConsumed_ == 0) {
- currentState_ = BENCODE_ERROR;
- return lastError_ = ERR_INVALID_STRING_LENGTH;
- }
- i = j;
- currentState_ = BENCODE_STRING;
- if (strLength_ == 0) {
- runCharactersCallback(nullptr, 0);
- onStringEnd();
- }
- }
- else {
- i = j - 1;
- }
- break;
- }
- case BENCODE_STRING: {
- size_t nread = std::min(static_cast<int64_t>(size - i), strLength_);
- runCharactersCallback(&data[i], nread);
- strLength_ -= nread;
- i += nread - 1;
- if (strLength_ == 0) {
- onStringEnd();
- }
- break;
- }
- case BENCODE_NUMBER_SIGN: {
- switch (c) {
- case '+':
- numberSign_ = 1;
- currentState_ = BENCODE_NUMBER;
- break;
- case '-':
- numberSign_ = -1;
- currentState_ = BENCODE_NUMBER;
- break;
- default:
- if (util::isDigit(c)) {
- number_ = c - '0';
- numConsumed_ = 1;
- currentState_ = BENCODE_NUMBER;
- }
- }
- break;
- }
- case BENCODE_NUMBER: {
- size_t j;
- for (j = i; j < size && in(data[j], '0', '9'); ++j) {
- if ((INT64_MAX - (data[j] - '0')) / 10 < number_) {
- currentState_ = BENCODE_ERROR;
- return lastError_ = ERR_NUMBER_OUT_OF_RANGE;
- }
- number_ *= 10;
- number_ += data[j] - '0';
- }
- numConsumed_ += j - i;
- if (j != size) {
- if (numConsumed_ == 0) {
- currentState_ = BENCODE_ERROR;
- return lastError_ = ERR_INVALID_NUMBER;
- }
- auto c = data[j];
- if (util::isDigit(c) || c == '.' || c == 'E' || c == '+' || c == '-') {
- // some torrent generator adds floating point number in
- // scientific notation (e.g., -1.134E+3) in integer field.
- // In this case, just skip these bytes until we find 'e'.
- number_ = 0;
- numConsumed_ = 0;
- currentState_ = BENCODE_FLOAT_NUMBER_IGNORE;
- i = j;
- break;
- }
- if (c != 'e') {
- currentState_ = BENCODE_ERROR;
- return lastError_ = ERR_INVALID_NUMBER;
- }
- i = j;
- onNumberEnd();
- }
- else {
- i = j - 1;
- }
- break;
- }
- case BENCODE_FLOAT_NUMBER_IGNORE: {
- auto c = data[i];
- if (util::isDigit(c) || c == '.' || c == 'E' || c == '+' || c == '-') {
- continue;
- }
- if (c != 'e') {
- currentState_ = BENCODE_ERROR;
- return lastError_ = ERR_INVALID_FLOAT_NUMBER;
- }
- onNumberEnd();
- break;
- }
- }
- }
- return i;
- }
- ssize_t BencodeParser::parseFinal(const char* data, size_t len)
- {
- ssize_t rv;
- rv = parseUpdate(data, len);
- if (rv >= 0) {
- if (currentState_ != BENCODE_FINISH && currentState_ != BENCODE_INITIAL) {
- rv = ERR_PREMATURE_DATA;
- }
- }
- return rv;
- }
- void BencodeParser::reset()
- {
- psm_->reset();
- currentState_ = BENCODE_INITIAL;
- lastError_ = 0;
- while (!stateStack_.empty()) {
- stateStack_.pop();
- }
- stateStack_.push(BENCODE_FINISH);
- }
- void BencodeParser::onStringEnd()
- {
- runEndCallback(stateTop() == BENCODE_DICT_KEY ? STRUCT_DICT_KEY_T
- : STRUCT_STRING_T);
- onValueEnd();
- }
- void BencodeParser::onNumberEnd()
- {
- runNumberCallback(numberSign_ * number_);
- runEndCallback(STRUCT_NUMBER_T);
- onValueEnd();
- }
- void BencodeParser::onDictEnd()
- {
- runEndCallback(STRUCT_DICT_T);
- onValueEnd();
- }
- void BencodeParser::onListEnd()
- {
- runEndCallback(STRUCT_ARRAY_T);
- onValueEnd();
- }
- void BencodeParser::onValueEnd()
- {
- switch (stateTop()) {
- case BENCODE_DICT_KEY:
- popState();
- pushState(BENCODE_DICT_VAL);
- currentState_ = BENCODE_VALUE;
- runBeginCallback(STRUCT_DICT_DATA_T);
- break;
- case BENCODE_DICT_VAL:
- runEndCallback(STRUCT_DICT_DATA_T);
- popState();
- currentState_ = BENCODE_DICT_KEY;
- break;
- case BENCODE_LIST:
- runEndCallback(STRUCT_ARRAY_DATA_T);
- popState();
- currentState_ = BENCODE_LIST;
- break;
- default:
- assert(stateTop() == BENCODE_FINISH);
- currentState_ = stateTop();
- break;
- }
- }
- int BencodeParser::pushState(int state)
- {
- if (stateStack_.size() >= 50) {
- return ERR_STRUCTURE_TOO_DEEP;
- }
- else {
- stateStack_.push(state);
- return 0;
- }
- }
- int BencodeParser::stateTop() const { return stateStack_.top(); }
- int BencodeParser::popState()
- {
- int state = stateStack_.top();
- stateStack_.pop();
- return state;
- }
- void BencodeParser::runBeginCallback(int elementType)
- {
- // switch(elementType) {
- // case STRUCT_DICT_T:
- // std::cout << "object start" << std::endl;
- // break;
- // case STRUCT_DICT_KEY_T:
- // std::cout << "object key start" << std::endl;
- // break;
- // case STRUCT_DICT_DATA_T:
- // std::cout << "object data start" << std::endl;
- // break;
- // case STRUCT_ARRAY_T:
- // std::cout << "array start" << std::endl;
- // break;
- // case STRUCT_ARRAY_DATA_T:
- // std::cout << "array data start" << std::endl;
- // break;
- // case STRUCT_STRING_T:
- // std::cout << "string start" << std::endl;
- // break;
- // case STRUCT_NUMBER_T:
- // std::cout << "number start" << std::endl;
- // break;
- // case STRUCT_BOOL_T:
- // std::cout << "bool start" << std::endl;
- // break;
- // case STRUCT_NULL_T:
- // std::cout << "null start" << std::endl;
- // break;
- // default:
- // break;
- // };
- psm_->beginElement(elementType);
- }
- void BencodeParser::runEndCallback(int elementType)
- {
- // switch(elementType) {
- // case STRUCT_DICT_T:
- // std::cout << "object end" << std::endl;
- // break;
- // case STRUCT_DICT_KEY_T:
- // std::cout << "object key end" << std::endl;
- // break;
- // case STRUCT_DICT_DATA_T:
- // std::cout << "object data end" << std::endl;
- // break;
- // case STRUCT_ARRAY_T:
- // std::cout << "array end" << std::endl;
- // break;
- // case STRUCT_ARRAY_DATA_T:
- // std::cout << "array data end" << std::endl;
- // break;
- // case STRUCT_STRING_T:
- // std::cout << "string end" << std::endl;
- // break;
- // case STRUCT_NUMBER_T:
- // std::cout << "number end" << std::endl;
- // break;
- // case STRUCT_BOOL_T:
- // std::cout << "bool end" << std::endl;
- // break;
- // case STRUCT_NULL_T:
- // std::cout << "null end" << std::endl;
- // break;
- // default:
- // break;
- // };
- psm_->endElement(elementType);
- }
- void BencodeParser::runCharactersCallback(const char* data, size_t len)
- {
- psm_->charactersCallback(data, len);
- }
- void BencodeParser::runNumberCallback(int64_t number)
- {
- psm_->numberCallback(number, 0, 0);
- }
- } // namespace bittorrent
- } // namespace aria2
|