瀏覽代碼

Rewritten Xml2XmlParser

Now it is push parser + utility function for file parsing.
Tatsuhiro Tsujikawa 13 年之前
父節點
當前提交
70685bd233
共有 4 個文件被更改,包括 132 次插入71 次删除
  1. 76 63
      src/Xml2XmlParser.cc
  2. 36 5
      src/Xml2XmlParser.h
  3. 19 2
      src/metalink_helper.cc
  4. 1 1
      src/rpc_helper.cc

+ 76 - 63
src/Xml2XmlParser.cc

@@ -2,7 +2,7 @@
 /*
  * aria2 - The high speed download utility
  *
- * Copyright (C) 2011 Tatsuhiro Tsujikawa
+ * Copyright (C) 2012 Tatsuhiro Tsujikawa
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -36,28 +36,17 @@
 
 #include <cassert>
 #include <cstring>
-#include <deque>
-
-#include <libxml/parser.h>
 
 #include "a2io.h"
-#include "BinaryStream.h"
 #include "ParserStateMachine.h"
 #include "A2STR.h"
 #include "a2functional.h"
 #include "XmlAttr.h"
+#include "util.h"
 
 namespace aria2 {
 
-namespace {
-struct SessionData {
-  std::deque<std::string> charactersStack_;
-  ParserStateMachine* psm_;
-  SessionData(ParserStateMachine* psm)
-    : psm_(psm)
-  {}
-};
-} // namespace
+namespace xml {
 
 namespace {
 void mlStartElement
@@ -88,13 +77,13 @@ void mlStartElement
     xmlAttr.valueLength = pattrs[i+4]-xmlAttr.value;
     xmlAttrs.push_back(xmlAttr);
   }
-  sd->psm_->beginElement
+  sd->psm->beginElement
     (reinterpret_cast<const char*>(localname),
      reinterpret_cast<const char*>(prefix),
      reinterpret_cast<const char*>(nsUri),
      xmlAttrs);
-  if(sd->psm_->needsCharactersBuffering()) {
-    sd->charactersStack_.push_front(A2STR::NIL);
+  if(sd->psm->needsCharactersBuffering()) {
+    sd->charactersStack.push_front(A2STR::NIL);
   }
 }
 } // namespace
@@ -108,11 +97,11 @@ void mlEndElement
 {
   SessionData* sd = reinterpret_cast<SessionData*>(userData);
   std::string characters;
-  if(sd->psm_->needsCharactersBuffering()) {
-    characters = sd->charactersStack_.front();
-    sd->charactersStack_.pop_front();
+  if(sd->psm->needsCharactersBuffering()) {
+    characters = sd->charactersStack.front();
+    sd->charactersStack.pop_front();
   }
-  sd->psm_->endElement
+  sd->psm->endElement
     (reinterpret_cast<const char*>(localname),
      reinterpret_cast<const char*>(prefix),
      reinterpret_cast<const char*>(nsUri),
@@ -124,8 +113,8 @@ namespace {
 void mlCharacters(void* userData, const xmlChar* ch, int len)
 {
   SessionData* sd = reinterpret_cast<SessionData*>(userData);
-  if(sd->psm_->needsCharactersBuffering()) {
-    sd->charactersStack_.front().append(&ch[0], &ch[len]);
+  if(sd->psm->needsCharactersBuffering()) {
+    sd->charactersStack.front().append(&ch[0], &ch[len]);
   }
 }
 } // namespace
@@ -169,61 +158,85 @@ xmlSAXHandler mySAXHandler =
 } // namespace
 
 XmlParser::XmlParser(ParserStateMachine* psm)
-  : psm_(psm)
+  : psm_(psm),
+    sessionData_(psm),
+    ctx_(xmlCreatePushParserCtxt(&mySAXHandler, &sessionData_, 0, 0, 0)),
+    lastError_(0)
 {}
 
-XmlParser::~XmlParser() {}
+XmlParser::~XmlParser()
+{
+  xmlFreeParserCtxt(ctx_);
+}
+
+ssize_t XmlParser::parseUpdate(const char* data, size_t size)
+{
+  if(lastError_ != 0) {
+    return lastError_;
+  }
+  int rv = xmlParseChunk(ctx_, data, size, 0);
+  if(rv != 0) {
+    return lastError_ = ERR_XML_PARSE;
+  } else {
+    return size;
+  }
+}
+
+ssize_t XmlParser::parseFinal(const char* data, size_t size)
+{
+  if(lastError_ != 0) {
+    return lastError_;
+  }
+  int rv = xmlParseChunk(ctx_, data, size, 1);
+  if(rv != 0) {
+    return lastError_ = ERR_XML_PARSE;
+  } else {
+    return size;
+  }
+}
 
-bool XmlParser::parseFile(const char* filename)
+int XmlParser::reset()
 {
-  SessionData sessionData(psm_);
-  // Old libxml2(at least 2.7.6, Ubuntu 10.04LTS) does not read stdin
-  // when "/dev/stdin" is passed as filename while 2.7.7 does. So we
-  // convert DEV_STDIN to "-" for compatibility.
-  const char* nfilename;
-  if(strcmp(filename, DEV_STDIN) == 0) {
-    nfilename = "-";
+  // TODO psm must be reset
+  sessionData_.reset();
+  int rv = xmlCtxtResetPush(ctx_, 0, 0, 0, 0);
+  if(rv != 0) {
+    return lastError_ = ERR_RESET;
   } else {
-    nfilename = filename;
+    return 0;
   }
-  int r = xmlSAXUserParseFile(&mySAXHandler, &sessionData, nfilename);
-  return r == 0 && psm_->finished();
 }
 
-bool XmlParser::parseBinaryStream(BinaryStream* bs)
+bool parseFile(const std::string& filename, ParserStateMachine* psm)
 {
-  const size_t bufSize = 4096;
-  unsigned char buf[bufSize];
-  ssize_t res = bs->readData(buf, 4, 0);
-  if(res != 4) {
-    return false;
+  int fd;
+  if(filename == DEV_STDIN) {
+    fd = STDIN_FILENO;
+  } else {
+    while((fd = a2open(utf8ToWChar(filename).c_str(),
+                       O_BINARY | O_RDONLY, OPEN_MODE)) == -1 && fd != EINTR);
+    if(fd == -1) {
+      return false;
+    }
   }
-  SessionData sessionData(psm_);
-  xmlParserCtxtPtr ctx = xmlCreatePushParserCtxt
-    (&mySAXHandler, &sessionData,
-     reinterpret_cast<const char*>(buf), res, 0);
-  auto_delete<xmlParserCtxtPtr> deleter(ctx, xmlFreeParserCtxt);
-  off_t readOffset = res;
-  while(1) {
-    ssize_t res = bs->readData(buf, bufSize, readOffset);
-    if(res == 0) {
+  XmlParser ps(psm);
+  char buf[4096];
+  ssize_t nread;
+  bool retval = true;
+  while((nread = read(fd, buf, sizeof(buf))) > 0) {
+    if(ps.parseUpdate(buf, nread) < 0) {
+      retval = false;
       break;
     }
-    if(xmlParseChunk(ctx, reinterpret_cast<const char*>(buf), res, 0) != 0) {
-      // TODO we need this? Just break is not suffice?
-      return false;
+  }
+  if(nread == 0 && retval) {
+    if(ps.parseFinal(0, 0) < 0) {
+      retval = false;
     }
-    readOffset += res;
   }
-  xmlParseChunk(ctx, reinterpret_cast<const char*>(buf), 0, 1);
-  return psm_->finished();
+  return retval;
 }
 
-bool XmlParser::parseMemory(const char* xml, size_t len)
-{
-  SessionData sessionData(psm_);
-  int r = xmlSAXUserParseMemory(&mySAXHandler, &sessionData, xml, len);
-  return r == 0 && psm_->finished();
-}
+} // namespace xml
 
 } // namespace aria2

+ 36 - 5
src/Xml2XmlParser.h

@@ -2,7 +2,7 @@
 /*
  * aria2 - The high speed download utility
  *
- * Copyright (C) 2011 Tatsuhiro Tsujikawa
+ * Copyright (C) 2012 Tatsuhiro Tsujikawa
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -37,25 +37,56 @@
 
 #include "common.h"
 
+#include <sys/types.h>
+
 #include <cstdlib>
+#include <string>
+#include <deque>
+
+#include <libxml/parser.h>
 
 namespace aria2 {
 
-class BinaryStream;
 class ParserStateMachine;
 
+namespace xml {
+
+enum XmlError {
+  ERR_XML_PARSE = -1,
+  ERR_RESET = -2
+};
+
+struct SessionData {
+  std::deque<std::string> charactersStack;
+  ParserStateMachine* psm;
+  SessionData(ParserStateMachine* psm)
+    : psm(psm)
+  {}
+  void reset()
+  {
+    charactersStack.clear();
+  }
+};
+
 class XmlParser {
 public:
   // This object does not delete psm.
   XmlParser(ParserStateMachine* psm);
   ~XmlParser();
-  bool parseFile(const char* filename);
-  bool parseBinaryStream(BinaryStream* binaryStream);
-  bool parseMemory(const char* xml, size_t size);
+  ssize_t parseUpdate(const char* data, size_t size);
+  ssize_t parseFinal(const char* data, size_t size);
+  int reset();
 private:
   ParserStateMachine* psm_;
+  SessionData sessionData_;
+  xmlParserCtxtPtr ctx_;
+  int lastError_;
 };
 
+bool parseFile(const std::string& filename, ParserStateMachine* psm);
+
+} // namespace xml
+
 } // namespace aria2
 
 #endif // D_XML2_XML_PARSER_H

+ 19 - 2
src/metalink_helper.cc

@@ -125,7 +125,7 @@ SharedHandle<Metalinker> parseFile
 {
   MetalinkParserStateMachine psm;
   psm.setBaseUri(baseUri);
-  if(!XmlParser(&psm).parseFile(filename.c_str())) {
+  if(!xml::parseFile(filename, &psm)) {
     throw DL_ABORT_EX2("Could not parse Metalink XML document.",
                        error_code::METALINK_PARSE_ERROR);
   }
@@ -142,7 +142,24 @@ SharedHandle<Metalinker> parseBinaryStream
 {
   MetalinkParserStateMachine psm;
   psm.setBaseUri(baseUri);
-  if(!XmlParser(&psm).parseBinaryStream(bs)) {
+  xml::XmlParser ps(&psm);
+  unsigned char buf[4096];
+  ssize_t nread;
+  off_t offread = 0;
+  bool retval = true;
+  while((nread = bs->readData(buf, sizeof(buf), offread)) > 0) {
+    if(ps.parseUpdate(reinterpret_cast<const char*>(buf), nread) < 0) {
+      retval = false;
+      break;
+    }
+    offread += nread;
+  }
+  if(nread == 0 && retval) {
+    if(ps.parseFinal(0, 0) < 0) {
+      retval = false;
+    }
+  }
+  if(!retval) {
     throw DL_ABORT_EX2("Could not parse Metalink XML document.",
                        error_code::METALINK_PARSE_ERROR);
   }

+ 1 - 1
src/rpc_helper.cc

@@ -53,7 +53,7 @@ namespace rpc {
 RpcRequest xmlParseMemory(const char* xml, size_t size)
 {
   XmlRpcRequestParserStateMachine psm;
-  if(!XmlParser(&psm).parseMemory(xml, size)) {
+  if(xml::XmlParser(&psm).parseFinal(xml, size) < 0) {
     throw DL_ABORT_EX(MSG_CANNOT_PARSE_XML_RPC_REQUEST);
   }
   SharedHandle<List> params;