ソースを参照

Rewritten ExpatXmlParser

Put common elements in both Xml2XmlParser and ExpatXmlParser in
XmlParser.
Tatsuhiro Tsujikawa 13 年 前
コミット
f6b2c3c080
7 ファイル変更186 行追加149 行削除
  1. 51 82
      src/ExpatXmlParser.cc
  2. 15 9
      src/ExpatXmlParser.h
  3. 1 1
      src/Makefile.am
  4. 1 32
      src/Xml2XmlParser.cc
  5. 2 25
      src/Xml2XmlParser.h
  6. 75 0
      src/XmlParser.cc
  7. 41 0
      src/XmlParser.h

+ 51 - 82
src/ExpatXmlParser.cc

@@ -36,13 +36,8 @@
 
 #include <cstdio>
 #include <cstring>
-#include <deque>
-
-#include <expat.h>
 
 #include "a2io.h"
-#include "BinaryStream.h"
-#include "BufferedFile.h"
 #include "ParserStateMachine.h"
 #include "A2STR.h"
 #include "a2functional.h"
@@ -50,15 +45,7 @@
 
 namespace aria2 {
 
-namespace {
-struct SessionData {
-  std::deque<std::string> charactersStack_;
-  ParserStateMachine* psm_;
-  SessionData(ParserStateMachine* psm)
-    : psm_(psm)
-  {}
-};
-} // namespace
+namespace xml {
 
 namespace {
 void splitNsName(const char** localname, const char** nsUri, const char* src)
@@ -101,14 +88,14 @@ void mlStartElement(void* userData, const char* nsName, const char** attrs)
   const char* prefix = 0;
   const char* nsUri = 0;
   splitNsName(&localname, &nsUri, nsName);
-  sd->psm_->beginElement(localname, prefix, nsUri, xmlAttrs);
+  sd->psm->beginElement(localname, prefix, nsUri, xmlAttrs);
   delete [] nsUri;
   for(std::vector<XmlAttr>::iterator i = xmlAttrs.begin(),
         eoi = xmlAttrs.end(); i != eoi; ++i) {
     delete [] (*i).nsUri;
   }
-  if(sd->psm_->needsCharactersBuffering()) {
-    sd->charactersStack_.push_front(A2STR::NIL);
+  if(sd->psm->needsCharactersBuffering()) {
+    sd->charactersStack.push_front(A2STR::NIL);
   }
 }
 } // namespace
@@ -122,11 +109,11 @@ void mlEndElement(void* userData, const char* nsName)
   splitNsName(&localname, &nsUri, nsName);
   SessionData* sd = reinterpret_cast<SessionData*>(userData);
   std::string characters;
-  if(sd->psm_->needsCharactersBuffering()) {
-    characters = sd->charactersStack_.front();
-    sd->charactersStack_.pop_front();
+  if(sd->psm->needsCharactersBuffering()) {
+    characters = sd->charactersStack.front();
+    sd->charactersStack.pop_front();
   }
-  sd->psm_->endElement(localname, prefix, nsUri, characters);
+  sd->psm->endElement(localname, prefix, nsUri, characters);
   delete [] nsUri;
 }
 } // namespace
@@ -135,92 +122,74 @@ namespace {
 void mlCharacters(void* userData, const char* ch, int len)
 {
   SessionData* sd = reinterpret_cast<SessionData*>(userData);
-  if(sd->psm_->needsCharactersBuffering()) {
-    sd->charactersStack_.front().append(&ch[0], &ch[len]);
+  if(sd->psm->needsCharactersBuffering()) {
+    sd->charactersStack.front().append(&ch[0], &ch[len]);
   }
 }
 } // namespace
 
-XmlParser::XmlParser(ParserStateMachine* psm)
-  : psm_(psm)
-{}
-
-XmlParser::~XmlParser() {}
-
 namespace {
-XML_Parser createParser(SessionData* sd)
+void setupParser(XML_Parser parser, SessionData *sd)
 {
-  XML_Parser parser = XML_ParserCreateNS(0, static_cast<const XML_Char>('\t'));
   XML_SetUserData(parser, sd);
   XML_SetElementHandler(parser, &mlStartElement, &mlEndElement);
   XML_SetCharacterDataHandler(parser, &mlCharacters);
-  return parser;
 }
 } // namespace
 
-bool XmlParser::parseFile(const char* filename)
+XmlParser::XmlParser(ParserStateMachine* psm)
+  : psm_(psm),
+    sessionData_(psm_),
+    ctx_(XML_ParserCreateNS(0, static_cast<const XML_Char>('\t'))),
+    lastError_(0)
 {
-  if(strcmp(filename, DEV_STDIN) == 0) {
-    BufferedFile fp(stdin);
-    return parseFile(fp);
-  } else {
-    BufferedFile fp(filename, BufferedFile::READ);
-    return parseFile(fp);
-  }
+  setupParser(ctx_, &sessionData_);
+}
+
+XmlParser::~XmlParser()
+{
+  XML_ParserFree(ctx_);
 }
 
-bool XmlParser::parseFile(BufferedFile& fp)
+ssize_t XmlParser::parseUpdate(const char* data, size_t size)
 {
-  if(!fp) {
-    return false;
+  if(lastError_ != 0) {
+    return lastError_;
   }
-  char buf[4096];
-  SessionData sessionData(psm_);
-  XML_Parser parser = createParser(&sessionData);
-  auto_delete<XML_Parser> deleter(parser, XML_ParserFree);
-  while(1) {
-    size_t res = fp.read(buf, sizeof(buf));
-    if(XML_Parse(parser, buf, res, 0) == XML_STATUS_ERROR) {
-      return false;
-    }
-    if(res < sizeof(buf)) {
-      break;
-    }
+  XML_Status rv = XML_Parse(ctx_, data, size, 0);
+  if(rv == XML_STATUS_ERROR) {
+    return lastError_ = ERR_XML_PARSE;
+  } else {
+    return size;
   }
-  return XML_Parse(parser, 0, 0, 1) != XML_STATUS_ERROR && psm_->finished();
 }
-         
-bool XmlParser::parseBinaryStream(BinaryStream* bs)
+
+ssize_t XmlParser::parseFinal(const char* data, size_t size)
 {
-  const ssize_t bufSize = 4096;
-  unsigned char buf[bufSize];
-  SessionData sessionData(psm_);
-  XML_Parser parser = createParser(&sessionData);
-  auto_delete<XML_Parser> deleter(parser, XML_ParserFree);
-  int64_t readOffset = 0;
-  while(1) {
-    ssize_t res = bs->readData(buf, bufSize, readOffset);
-    if(res == 0) {
-      break;
-    }
-    if(XML_Parse(parser, reinterpret_cast<const char*>(buf), res, 0) ==
-       XML_STATUS_ERROR) {
-      return false;
-    }
-    readOffset += res;
+  if(lastError_ != 0) {
+    return lastError_;
+  }
+  XML_Status rv = XML_Parse(ctx_, data, size, 1);
+  if(rv == XML_STATUS_ERROR) {
+    return lastError_ = ERR_XML_PARSE;
+  } else {
+    return size;
   }
-  return XML_Parse(parser, 0, 0, 1) != XML_STATUS_ERROR && psm_->finished();
 }
 
-bool XmlParser::parseMemory(const char* xml, size_t size)
+int XmlParser::reset()
 {
-  SessionData sessionData(psm_);
-  XML_Parser parser = createParser(&sessionData);
-  auto_delete<XML_Parser> deleter(parser, XML_ParserFree);
-  if(XML_Parse(parser, xml, size, 0) == XML_STATUS_ERROR) {
-    return false;
+  psm_->reset();
+  sessionData_.reset();
+  XML_Bool rv = XML_ParserReset(ctx_, 0);
+  if(rv == XML_FALSE) {
+    return lastError_ = ERR_RESET;
+  } else {
+    setupParser(ctx_, &sessionData_);
+    return 0;
   }
-  return XML_Parse(parser, 0, 0, 1) != XML_STATUS_ERROR && psm_->finished();
 }
 
+} // namespace xml
+
 } // namespace aria2

+ 15 - 9
src/ExpatXmlParser.h

@@ -2,7 +2,7 @@
 /*
  * aria2 - The high speed download utility
  *
- * Copyright (C) 2011 Tatsuhiro Tsujikawa
+ * Copyright (C) 2012 Tatsuhiro Tsujikawa
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -37,28 +37,34 @@
 
 #include "common.h"
 
+#include <sys/types.h>
 #include <cstdlib>
 
+#include <expat.h>
+
+#include "XmlParser.h"
+
 namespace aria2 {
 
-class BinaryStream;
-class ParserStateMachine;
-class BufferedFile;
+namespace xml {
 
 class XmlParser {
 public:
   // This object does not delete psm.
   XmlParser(ParserStateMachine* psm);
   ~XmlParser();
-  bool parseFile(const char* filename);
-  bool parseBinaryStream(BinaryStream* binaryStream);
-  bool parseMemory(const char* xml, size_t size);
+  ssize_t parseUpdate(const char* data, size_t size);
+  ssize_t parseFinal(const char* data, size_t size);
+  int reset();
 private:
-  bool parseFile(BufferedFile& fp);
-
   ParserStateMachine* psm_;
+  SessionData sessionData_;
+  XML_Parser ctx_;
+  int lastError_;
 };
 
+} // namespace xml
+
 } // namespace aria2
 
 #endif // D_EXPAT_XML_PARSER_H

+ 1 - 1
src/Makefile.am

@@ -253,7 +253,7 @@ endif # !ENABLE_WEBSOCKET
 
 if HAVE_SOME_XMLLIB
 SRCS += XmlAttr.cc XmlAttr.h\
-	XmlParser.h\
+	XmlParser.cc XmlParser.h\
 	ParserStateMachine.h
 endif # HAVE_SOME_XMLLIB
 

+ 1 - 32
src/Xml2XmlParser.cc

@@ -32,7 +32,7 @@
  * files in the program, then also delete it here.
  */
 /* copyright --> */
-#include "XmlParser.h"
+#include "Xml2XmlParser.h"
 
 #include <cassert>
 #include <cstring>
@@ -42,7 +42,6 @@
 #include "A2STR.h"
 #include "a2functional.h"
 #include "XmlAttr.h"
-#include "util.h"
 
 namespace aria2 {
 
@@ -207,36 +206,6 @@ int XmlParser::reset()
   }
 }
 
-bool parseFile(const std::string& filename, ParserStateMachine* psm)
-{
-  int fd;
-  if(filename == DEV_STDIN) {
-    fd = STDIN_FILENO;
-  } else {
-    while((fd = a2open(utf8ToWChar(filename).c_str(),
-                       O_BINARY | O_RDONLY, OPEN_MODE)) == -1 && fd != EINTR);
-    if(fd == -1) {
-      return false;
-    }
-  }
-  XmlParser ps(psm);
-  char buf[4096];
-  ssize_t nread;
-  bool retval = true;
-  while((nread = read(fd, buf, sizeof(buf))) > 0) {
-    if(ps.parseUpdate(buf, nread) < 0) {
-      retval = false;
-      break;
-    }
-  }
-  if(nread == 0 && retval) {
-    if(ps.parseFinal(0, 0) < 0) {
-      retval = false;
-    }
-  }
-  return retval;
-}
-
 } // namespace xml
 
 } // namespace aria2

+ 2 - 25
src/Xml2XmlParser.h

@@ -39,35 +39,14 @@
 
 #include <sys/types.h>
 
-#include <cstdlib>
-#include <string>
-#include <deque>
-
 #include <libxml/parser.h>
 
-namespace aria2 {
+#include "XmlParser.h"
 
-class ParserStateMachine;
+namespace aria2 {
 
 namespace xml {
 
-enum XmlError {
-  ERR_XML_PARSE = -1,
-  ERR_RESET = -2
-};
-
-struct SessionData {
-  std::deque<std::string> charactersStack;
-  ParserStateMachine* psm;
-  SessionData(ParserStateMachine* psm)
-    : psm(psm)
-  {}
-  void reset()
-  {
-    charactersStack.clear();
-  }
-};
-
 class XmlParser {
 public:
   // This object does not delete psm.
@@ -83,8 +62,6 @@ private:
   int lastError_;
 };
 
-bool parseFile(const std::string& filename, ParserStateMachine* psm);
-
 } // namespace xml
 
 } // namespace aria2

+ 75 - 0
src/XmlParser.cc

@@ -0,0 +1,75 @@
+/* <!-- copyright */
+/*
+ * aria2 - The high speed download utility
+ *
+ * Copyright (C) 2012 Tatsuhiro Tsujikawa
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * In addition, as a special exception, the copyright holders give
+ * permission to link the code of portions of this program with the
+ * OpenSSL library under certain conditions as described in each
+ * individual source file, and distribute linked combinations
+ * including the two.
+ * You must obey the GNU General Public License in all respects
+ * for all of the code used other than OpenSSL.  If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so.  If you
+ * do not wish to do so, delete this exception statement from your
+ * version.  If you delete this exception statement from all source
+ * files in the program, then also delete it here.
+ */
+/* copyright --> */
+#include "XmlParser.h"
+#include "a2io.h"
+#include "util.h"
+
+namespace aria2 {
+
+namespace xml {
+
+bool parseFile(const std::string& filename, ParserStateMachine* psm)
+{
+  int fd;
+  if(filename == DEV_STDIN) {
+    fd = STDIN_FILENO;
+  } else {
+    while((fd = a2open(utf8ToWChar(filename).c_str(),
+                       O_BINARY | O_RDONLY, OPEN_MODE)) == -1 && fd != EINTR);
+    if(fd == -1) {
+      return false;
+    }
+  }
+  XmlParser ps(psm);
+  char buf[4096];
+  ssize_t nread;
+  bool retval = true;
+  while((nread = read(fd, buf, sizeof(buf))) > 0) {
+    if(ps.parseUpdate(buf, nread) < 0) {
+      retval = false;
+      break;
+    }
+  }
+  if(nread == 0 && retval) {
+    if(ps.parseFinal(0, 0) < 0) {
+      retval = false;
+    }
+  }
+  return retval;
+}
+
+} // namespace xml
+
+} // namespace aria2

+ 41 - 0
src/XmlParser.h

@@ -37,10 +37,51 @@
 
 #include "common.h"
 
+#include <cstdlib>
+#include <string>
+#include <deque>
+
+namespace aria2 {
+
+class ParserStateMachine;
+
+namespace xml {
+
+enum XmlError {
+  ERR_XML_PARSE = -1,
+  ERR_RESET = -2
+};
+
+struct SessionData {
+  std::deque<std::string> charactersStack;
+  ParserStateMachine* psm;
+  SessionData(ParserStateMachine* psm)
+    : psm(psm)
+  {}
+  void reset()
+  {
+    charactersStack.clear();
+  }
+};
+
+} // namespace xml
+
+} // namespace aria2
+
 #ifdef HAVE_LIBXML2
 # include "Xml2XmlParser.h"
 #elif HAVE_LIBEXPAT
 # include "ExpatXmlParser.h"
 #endif
 
+namespace aria2 {
+
+namespace xml {
+
+bool parseFile(const std::string& filename, ParserStateMachine* psm);
+
+} // namespace xml
+
+} // namespace aria2
+
 #endif // D_XML_PARSER_H