Xml2MetalinkProcessor.cc 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. /* <!-- copyright */
  2. /*
  3. * aria2 - The high speed download utility
  4. *
  5. * Copyright (C) 2006 Tatsuhiro Tsujikawa
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. *
  21. * In addition, as a special exception, the copyright holders give
  22. * permission to link the code of portions of this program with the
  23. * OpenSSL library under certain conditions as described in each
  24. * individual source file, and distribute linked combinations
  25. * including the two.
  26. * You must obey the GNU General Public License in all respects
  27. * for all of the code used other than OpenSSL. If you modify
  28. * file(s) with this exception, you may extend this exception to your
  29. * version of the file(s), but you are not obligated to do so. If you
  30. * do not wish to do so, delete this exception statement from your
  31. * version. If you delete this exception statement from all source
  32. * files in the program, then also delete it here.
  33. */
  34. /* copyright --> */
  35. #include "Xml2MetalinkProcessor.h"
  36. #include "DlAbortEx.h"
  37. #include "Util.h"
  38. #include "BinaryStream.h"
  39. #include <libxml/parser.h>
  40. #include <libxml/xpath.h>
  41. #include <libxml/xpathInternals.h>
  42. Xml2MetalinkProcessor::Xml2MetalinkProcessor():doc(0), context(0) {}
  43. Xml2MetalinkProcessor::~Xml2MetalinkProcessor() {
  44. release();
  45. }
  46. void Xml2MetalinkProcessor::release() {
  47. if(context) {
  48. xmlXPathFreeContext(context);
  49. context = 0;
  50. }
  51. if(doc) {
  52. xmlFreeDoc(doc);
  53. doc = 0;
  54. }
  55. }
  56. MetalinkerHandle Xml2MetalinkProcessor::parseFile(const string& filename) {
  57. release();
  58. doc = xmlParseFile(filename.c_str());
  59. if(!doc) {
  60. throw new DlAbortEx("Cannot parse metalink file %s", filename.c_str());
  61. }
  62. return processDoc(doc);
  63. }
  64. MetalinkerHandle Xml2MetalinkProcessor::parseFromBinaryStream(const BinaryStreamHandle& binaryStream) {
  65. release();
  66. int32_t bufSize = 4096;
  67. unsigned char buf[bufSize];
  68. int32_t res = binaryStream->readData(buf, 4, 0);
  69. if(res != 4) {
  70. throw new DlAbortEx("Too small data for metalink parsing.");
  71. }
  72. xmlParserCtxtPtr ctx = xmlCreatePushParserCtxt(0, 0, (const char*)buf, res, 0);
  73. try {
  74. int64_t readOffset = res;
  75. while(1) {
  76. int32_t res = binaryStream->readData(buf, bufSize, readOffset);
  77. if(res == 0) {
  78. break;
  79. }
  80. if(xmlParseChunk(ctx, (const char*)buf, res, 0) != 0) {
  81. throw new DlAbortEx("Cannot parse metalink file");
  82. }
  83. readOffset += res;
  84. }
  85. xmlParseChunk(ctx, (const char*)buf, 0, 1);
  86. doc = ctx->myDoc;
  87. xmlFreeParserCtxt(ctx);
  88. } catch(Exception* e) {
  89. xmlFreeParserCtxt(ctx);
  90. throw;
  91. }
  92. if(!doc) {
  93. throw new DlAbortEx("Cannot parse metalink file");
  94. }
  95. return processDoc(doc);
  96. }
  97. MetalinkerHandle Xml2MetalinkProcessor::processDoc(xmlDocPtr doc)
  98. {
  99. context = xmlXPathNewContext(doc);
  100. if(!context) {
  101. throw new DlAbortEx("Cannot create new xpath context");
  102. }
  103. string defaultNamespace = "http://www.metalinker.org/";
  104. if(xmlXPathRegisterNs(context, (xmlChar*)"m",
  105. (xmlChar*)defaultNamespace.c_str()) != 0) {
  106. throw new DlAbortEx("Cannot register namespace %s",
  107. defaultNamespace.c_str());
  108. }
  109. string xpath = "/m:metalink/m:files/m:file";
  110. MetalinkerHandle metalinker(new Metalinker());
  111. for(uint32_t index = 1; 1; index++) {
  112. MetalinkEntryHandle entry = getEntry(xpath+"["+Util::uitos(index)+"]");
  113. if(!entry.get()) {
  114. break;
  115. } else {
  116. metalinker->entries.push_back(entry);
  117. }
  118. }
  119. return metalinker;
  120. }
  121. MetalinkEntryHandle Xml2MetalinkProcessor::getEntry(const string& xpath) {
  122. xmlXPathObjectPtr result = xpathEvaluation(xpath);
  123. if(!result) {
  124. return 0;
  125. }
  126. xmlNodeSetPtr nodeSet = result->nodesetval;
  127. xmlNodePtr node = nodeSet->nodeTab[0];
  128. string filename = Util::trim(xmlAttribute(node, "name"));
  129. xmlXPathFreeObject(result);
  130. MetalinkEntryHandle entry(new MetalinkEntry());
  131. FileEntryHandle fileEntry = new FileEntry(filename, 0, 0);
  132. string sizeStr = Util::trim(xpathContent(xpath+"/m:size"));
  133. if(sizeStr == "") {
  134. fileEntry->setLength(0);
  135. } else {
  136. fileEntry->setLength(strtoll(sizeStr.c_str(), 0, 10));
  137. }
  138. entry->file = fileEntry;
  139. entry->version = Util::trim(xpathContent(xpath+"/m:version"));
  140. entry->language = Util::trim(xpathContent(xpath+"/m:language"));
  141. entry->os = Util::trim(xpathContent(xpath+"/m:os"));
  142. #ifdef ENABLE_MESSAGE_DIGEST
  143. xmlXPathObjectPtr hashPathObj = xpathEvaluation(xpath+"/m:verification/m:hash");
  144. if(hashPathObj) {
  145. xmlNodeSetPtr nodeSet = hashPathObj->nodesetval;
  146. for(int32_t i = 0; i < nodeSet->nodeNr; ++i) {
  147. xmlNodePtr node = nodeSet->nodeTab[i];
  148. string algo = Util::trim(xmlAttribute(node, "type"));
  149. if(MessageDigestContext::supports(algo)) {
  150. entry->checksum = new Checksum(algo, Util::trim(xmlContent(node)));
  151. break;
  152. }
  153. }
  154. }
  155. xmlXPathFreeObject(hashPathObj);
  156. string piecesPath = xpath+"/m:verification/m:pieces";
  157. xmlXPathObjectPtr pieceHashPathObj = xpathEvaluation(piecesPath);
  158. if(pieceHashPathObj) {
  159. xmlNodeSetPtr nodeSet = pieceHashPathObj->nodesetval;
  160. for(int32_t i = 0; i < nodeSet->nodeNr; ++i) {
  161. xmlNodePtr node = nodeSet->nodeTab[i];
  162. string algo = Util::trim(xmlAttribute(node, "type"));
  163. if(MessageDigestContext::supports(algo)) {
  164. entry->chunkChecksum = getPieceHash(piecesPath+"[@type=\""+algo+"\"]",
  165. entry->getLength());
  166. break;
  167. }
  168. }
  169. }
  170. xmlXPathFreeObject(pieceHashPathObj);
  171. #endif // ENABLE_MESSAGE_DIGEST
  172. string resourcesPath = xpath+"/m:resources[@maxconnections]";
  173. xmlXPathObjectPtr resourcesPathObj = xpathEvaluation(resourcesPath);
  174. if(resourcesPathObj) {
  175. xmlNodeSetPtr nodeSet = resourcesPathObj->nodesetval;
  176. xmlNodePtr node = nodeSet->nodeTab[0];
  177. int32_t maxConnections = strtol(Util::trim(xmlAttribute(node, "maxconnections")).c_str(), 0, 10);
  178. entry->maxConnections = maxConnections;
  179. }
  180. xmlXPathFreeObject(resourcesPathObj);
  181. for(uint32_t index = 1; 1; index++) {
  182. MetalinkResourceHandle resource(getResource(xpath+"/m:resources/m:url["+Util::uitos(index)+"]"));
  183. if(!resource.get()) {
  184. break;
  185. } else {
  186. entry->resources.push_back(resource);
  187. }
  188. }
  189. return entry;
  190. }
  191. #ifdef ENABLE_MESSAGE_DIGEST
  192. ChunkChecksumHandle Xml2MetalinkProcessor::getPieceHash(const string& xpath,
  193. int64_t totalSize)
  194. {
  195. xmlXPathObjectPtr result = xpathEvaluation(xpath);
  196. if(!result) {
  197. return 0;
  198. }
  199. xmlNodeSetPtr nodeSet = result->nodesetval;
  200. xmlNodePtr node = nodeSet->nodeTab[0];
  201. int64_t checksumLength = STRTOLL(Util::trim(xmlAttribute(node, "length")).c_str());
  202. string algoString = Util::trim(xmlAttribute(node, "type"));
  203. xmlXPathFreeObject(result);
  204. if(!MessageDigestContext::supports(algoString)) {
  205. // unknown checksum type
  206. return 0;
  207. }
  208. Strings checksums;
  209. uint64_t numPiece = (totalSize+checksumLength-1)/checksumLength;
  210. for(uint64_t i = 0; i < numPiece; ++i) {
  211. string pieceHash = Util::trim(xpathContent(xpath+"/m:hash[@piece=\""+Util::ullitos(i)+"\"]"));
  212. if(pieceHash == "") {
  213. throw new DlAbortEx("Piece hash missing. index=%s", Util::ullitos(i).c_str());
  214. }
  215. checksums.push_back(pieceHash);
  216. }
  217. return new ChunkChecksum(algoString, checksums, checksumLength);
  218. }
  219. #endif // ENABLE_MESSAGE_DIGEST
  220. MetalinkResourceHandle Xml2MetalinkProcessor::getResource(const string& xpath) {
  221. xmlXPathObjectPtr result = xpathEvaluation(xpath);
  222. if(!result) {
  223. return 0;
  224. }
  225. MetalinkResourceHandle resource(new MetalinkResource());
  226. xmlNodeSetPtr nodeSet = result->nodesetval;
  227. xmlNodePtr node = nodeSet->nodeTab[0];
  228. string type = Util::trim(xmlAttribute(node, "type"));
  229. if(type == "ftp") {
  230. resource->type = MetalinkResource::TYPE_FTP;
  231. } else if(type == "http") {
  232. resource->type = MetalinkResource::TYPE_HTTP;
  233. } else if(type == "https") {
  234. resource->type = MetalinkResource::TYPE_HTTPS;
  235. } else if(type == "bittorrent") {
  236. resource->type = MetalinkResource::TYPE_BITTORRENT;
  237. } else {
  238. resource->type = MetalinkResource::TYPE_NOT_SUPPORTED;
  239. }
  240. string pref = Util::trim(xmlAttribute(node, "preference"));
  241. if(pref.empty()) {
  242. resource->preference = 100;
  243. } else {
  244. resource->preference = STRTOLL(pref.c_str());
  245. }
  246. resource->location = Util::toUpper(Util::trim(xmlAttribute(node, "location")));
  247. resource->url = Util::trim(xmlContent(node));
  248. {
  249. string cnn = Util::trim(xmlAttribute(node, "maxconnections"));
  250. if(!cnn.empty()) {
  251. resource->maxConnections = strtol(cnn.c_str(), 0, 10);
  252. }
  253. }
  254. xmlXPathFreeObject(result);
  255. return resource;
  256. }
  257. xmlXPathObjectPtr Xml2MetalinkProcessor::xpathEvaluation(const string& xpath) {
  258. xmlXPathObjectPtr result = xmlXPathEvalExpression((xmlChar*)xpath.c_str(),
  259. context);
  260. if(!result) {
  261. throw new DlAbortEx("Cannot evaluate xpath %s", xpath.c_str());
  262. }
  263. if(xmlXPathNodeSetIsEmpty(result->nodesetval)) {
  264. xmlXPathFreeObject(result);
  265. return 0;
  266. }
  267. return result;
  268. }
  269. string Xml2MetalinkProcessor::xmlAttribute(xmlNodePtr node, const string& attrName) {
  270. xmlChar* temp = xmlGetNoNsProp(node, (xmlChar*)attrName.c_str());
  271. if(!temp) {
  272. return "";
  273. } else {
  274. string attr = (char*)temp;
  275. xmlFree(temp);
  276. return attr;
  277. }
  278. }
  279. string Xml2MetalinkProcessor::xmlContent(xmlNodePtr node) {
  280. xmlChar* temp = xmlNodeGetContent(node);
  281. if(!temp) {
  282. return "";
  283. } else {
  284. string content = (char*)temp;
  285. xmlFree(temp);
  286. return content;
  287. }
  288. }
  289. string Xml2MetalinkProcessor::xpathContent(const string& xpath) {
  290. xmlXPathObjectPtr result = xpathEvaluation(xpath);
  291. if(!result) {
  292. return "";
  293. }
  294. xmlNodeSetPtr nodeSet = result->nodesetval;
  295. xmlNodePtr node = nodeSet->nodeTab[0]->children;
  296. string content = (char*)node->content;
  297. xmlXPathFreeObject(result);
  298. return content;
  299. }
  300. bool Xml2MetalinkProcessor::xpathExists(const string& xpath) {
  301. xmlXPathObjectPtr result = xpathEvaluation(xpath);
  302. bool retval = true;
  303. if(!result) {
  304. retval = false;
  305. }
  306. xmlXPathFreeObject(result);
  307. return retval;
  308. }