XML2SAXMetalinkProcessor.cc 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. /* <!-- copyright */
  2. /*
  3. * aria2 - The high speed download utility
  4. *
  5. * Copyright (C) 2006 Tatsuhiro Tsujikawa
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. * In addition, as a special exception, the copyright holders give
  22. * permission to link the code of portions of this program with the
  23. * OpenSSL library under certain conditions as described in each
  24. * individual source file, and distribute linked combinations
  25. * including the two.
  26. * You must obey the GNU General Public License in all respects
  27. * for all of the code used other than OpenSSL. If you modify
  28. * file(s) with this exception, you may extend this exception to your
  29. * version of the file(s), but you are not obligated to do so. If you
  30. * do not wish to do so, delete this exception statement from your
  31. * version. If you delete this exception statement from all source
  32. * files in the program, then also delete it here.
  33. */
  34. /* copyright --> */
  35. #include "XML2SAXMetalinkProcessor.h"
  36. #include <cassert>
  37. #include "BinaryStream.h"
  38. #include "MetalinkParserStateMachine.h"
  39. #include "Metalinker.h"
  40. #include "MetalinkEntry.h"
  41. #include "util.h"
  42. #include "message.h"
  43. #include "DlAbortEx.h"
  44. #include "A2STR.h"
  45. #include "error_code.h"
  46. namespace aria2 {
  47. namespace {
  48. class SessionData {
  49. public:
  50. SharedHandle<MetalinkParserStateMachine> stm_;
  51. std::deque<std::string> charactersStack_;
  52. SessionData(const SharedHandle<MetalinkParserStateMachine>& stm):stm_(stm) {}
  53. };
  54. } // namespace
  55. namespace {
  56. void mlStartElement
  57. (void* userData,
  58. const xmlChar* srcLocalname,
  59. const xmlChar* srcPrefix,
  60. const xmlChar* srcNsUri,
  61. int numNamespaces,
  62. const xmlChar **namespaces,
  63. int numAttrs,
  64. int numDefaulted,
  65. const xmlChar **attrs)
  66. {
  67. SessionData* sd = reinterpret_cast<SessionData*>(userData);
  68. std::vector<XmlAttr> xmlAttrs;
  69. size_t index = 0;
  70. for(int attrIndex = 0; attrIndex < numAttrs; ++attrIndex, index += 5) {
  71. XmlAttr xmlAttr;
  72. assert(attrs[index]);
  73. xmlAttr.localname = reinterpret_cast<const char*>(attrs[index]);
  74. if(attrs[index+1]) {
  75. xmlAttr.prefix = reinterpret_cast<const char*>(attrs[index+1]);
  76. }
  77. if(attrs[index+2]) {
  78. xmlAttr.nsUri = reinterpret_cast<const char*>(attrs[index+2]);
  79. }
  80. const char* valueBegin = reinterpret_cast<const char*>(attrs[index+3]);
  81. const char* valueEnd = reinterpret_cast<const char*>(attrs[index+4]);
  82. xmlAttr.value = std::string(valueBegin, valueEnd);
  83. xmlAttrs.push_back(xmlAttr);
  84. }
  85. assert(srcLocalname);
  86. std::string localname = reinterpret_cast<const char*>(srcLocalname);
  87. std::string prefix;
  88. std::string nsUri;
  89. if(srcPrefix) {
  90. prefix = reinterpret_cast<const char*>(srcPrefix);
  91. }
  92. if(srcNsUri) {
  93. nsUri = reinterpret_cast<const char*>(srcNsUri);
  94. }
  95. sd->stm_->beginElement(localname, prefix, nsUri, xmlAttrs);
  96. if(sd->stm_->needsCharactersBuffering()) {
  97. sd->charactersStack_.push_front(A2STR::NIL);
  98. }
  99. }
  100. } // namespace
  101. namespace {
  102. void mlEndElement
  103. (void* userData,
  104. const xmlChar* srcLocalname,
  105. const xmlChar* srcPrefix,
  106. const xmlChar* srcNsUri)
  107. {
  108. SessionData* sd = reinterpret_cast<SessionData*>(userData);
  109. std::string characters;
  110. if(sd->stm_->needsCharactersBuffering()) {
  111. characters = sd->charactersStack_.front();
  112. sd->charactersStack_.pop_front();
  113. }
  114. std::string localname = reinterpret_cast<const char*>(srcLocalname);
  115. std::string prefix;
  116. std::string nsUri;
  117. if(srcPrefix) {
  118. prefix = reinterpret_cast<const char*>(srcPrefix);
  119. }
  120. if(srcNsUri) {
  121. nsUri = reinterpret_cast<const char*>(srcNsUri);
  122. }
  123. sd->stm_->endElement(localname, prefix, nsUri, characters);
  124. }
  125. } // namespace
  126. namespace {
  127. void mlCharacters(void* userData, const xmlChar* ch, int len)
  128. {
  129. SessionData* sd = reinterpret_cast<SessionData*>(userData);
  130. if(sd->stm_->needsCharactersBuffering()) {
  131. sd->charactersStack_.front() += std::string(&ch[0], &ch[len]);
  132. }
  133. }
  134. } // namespace
  135. namespace {
  136. xmlSAXHandler mySAXHandler =
  137. {
  138. 0, // internalSubsetSAXFunc
  139. 0, // isStandaloneSAXFunc
  140. 0, // hasInternalSubsetSAXFunc
  141. 0, // hasExternalSubsetSAXFunc
  142. 0, // resolveEntitySAXFunc
  143. 0, // getEntitySAXFunc
  144. 0, // entityDeclSAXFunc
  145. 0, // notationDeclSAXFunc
  146. 0, // attributeDeclSAXFunc
  147. 0, // elementDeclSAXFunc
  148. 0, // unparsedEntityDeclSAXFunc
  149. 0, // setDocumentLocatorSAXFunc
  150. 0, // startDocumentSAXFunc
  151. 0, // endDocumentSAXFunc
  152. 0, // startElementSAXFunc
  153. 0, // endElementSAXFunc
  154. 0, // referenceSAXFunc
  155. &mlCharacters, // charactersSAXFunc
  156. 0, // ignorableWhitespaceSAXFunc
  157. 0, // processingInstructionSAXFunc
  158. 0, // commentSAXFunc
  159. 0, // warningSAXFunc
  160. 0, // errorSAXFunc
  161. 0, // fatalErrorSAXFunc
  162. 0, // getParameterEntitySAXFunc
  163. 0, // cdataBlockSAXFunc
  164. 0, // externalSubsetSAXFunc
  165. XML_SAX2_MAGIC, // unsigned int initialized
  166. 0, // void * _private
  167. &mlStartElement, // startElementNsSAX2Func
  168. &mlEndElement, // endElementNsSAX2Func
  169. 0, // xmlStructuredErrorFunc
  170. };
  171. } // namespace
  172. MetalinkProcessor::MetalinkProcessor() {}
  173. MetalinkProcessor::~MetalinkProcessor() {}
  174. SharedHandle<Metalinker>
  175. MetalinkProcessor::parseFile(const std::string& filename)
  176. {
  177. stm_.reset(new MetalinkParserStateMachine());
  178. SharedHandle<SessionData> sessionData(new SessionData(stm_));
  179. // Old libxml2(at least 2.7.6, Ubuntu 10.04LTS) does not read stdin
  180. // when "/dev/stdin" is passed as filename while 2.7.7 does. So we
  181. // convert DEV_STDIN to "-" for compatibility.
  182. std::string nfilename;
  183. if(filename == DEV_STDIN) {
  184. nfilename = "-";
  185. } else {
  186. nfilename = filename;
  187. }
  188. int retval = xmlSAXUserParseFile(&mySAXHandler, sessionData.get(),
  189. nfilename.c_str());
  190. if(retval != 0) {
  191. throw DL_ABORT_EX2(MSG_CANNOT_PARSE_METALINK,
  192. error_code::METALINK_PARSE_ERROR);
  193. }
  194. if(!stm_->finished()) {
  195. throw DL_ABORT_EX2(MSG_CANNOT_PARSE_METALINK,
  196. error_code::METALINK_PARSE_ERROR);
  197. }
  198. if(!stm_->getErrors().empty()) {
  199. throw DL_ABORT_EX2(stm_->getErrorString(),
  200. error_code::METALINK_PARSE_ERROR);
  201. }
  202. return stm_->getResult();
  203. }
  204. SharedHandle<Metalinker>
  205. MetalinkProcessor::parseFromBinaryStream(const SharedHandle<BinaryStream>& binaryStream)
  206. {
  207. stm_.reset(new MetalinkParserStateMachine());
  208. size_t bufSize = 4096;
  209. unsigned char buf[bufSize];
  210. ssize_t res = binaryStream->readData(buf, 4, 0);
  211. if(res != 4) {
  212. throw DL_ABORT_EX2("Too small data for parsing XML.",
  213. error_code::METALINK_PARSE_ERROR);
  214. }
  215. SharedHandle<SessionData> sessionData(new SessionData(stm_));
  216. xmlParserCtxtPtr ctx = xmlCreatePushParserCtxt
  217. (&mySAXHandler, sessionData.get(),
  218. reinterpret_cast<const char*>(buf), res, 0);
  219. auto_delete<xmlParserCtxtPtr> deleter(ctx, xmlFreeParserCtxt);
  220. off_t readOffset = res;
  221. while(1) {
  222. ssize_t res = binaryStream->readData(buf, bufSize, readOffset);
  223. if(res == 0) {
  224. break;
  225. }
  226. if(xmlParseChunk(ctx, reinterpret_cast<const char*>(buf), res, 0) != 0) {
  227. throw DL_ABORT_EX2(MSG_CANNOT_PARSE_METALINK,
  228. error_code::METALINK_PARSE_ERROR);
  229. }
  230. readOffset += res;
  231. }
  232. xmlParseChunk(ctx, reinterpret_cast<const char*>(buf), 0, 1);
  233. if(!stm_->finished()) {
  234. throw DL_ABORT_EX2(MSG_CANNOT_PARSE_METALINK,
  235. error_code::METALINK_PARSE_ERROR);
  236. }
  237. if(!stm_->getErrors().empty()) {
  238. throw DL_ABORT_EX2(stm_->getErrorString(),
  239. error_code::METALINK_PARSE_ERROR);
  240. }
  241. return stm_->getResult();
  242. }
  243. } // namespace aria2