Преглед изворни кода

Rewritten HTTP header parser

Tatsuhiro Tsujikawa пре 13 година
родитељ
комит
5fc1b1ad29

+ 7 - 13
src/HttpConnection.cc

@@ -64,7 +64,7 @@ namespace aria2 {
 HttpRequestEntry::HttpRequestEntry
 (const SharedHandle<HttpRequest>& httpRequest)
   : httpRequest_(httpRequest),
-    proc_(new HttpHeaderProcessor())
+    proc_(new HttpHeaderProcessor(HttpHeaderProcessor::CLIENT_PARSER))
 {}
 
 HttpRequestEntry::~HttpRequestEntry() {}
@@ -130,34 +130,28 @@ SharedHandle<HttpResponse> HttpConnection::receiveResponse()
     throw DL_ABORT_EX(EX_NO_HTTP_REQUEST_ENTRY_FOUND);
   }
   HttpRequestEntryHandle entry = outstandingHttpRequests_.front();
-  HttpHeaderProcessorHandle proc = entry->getHttpHeaderProcessor();
+  const SharedHandle<HttpHeaderProcessor>& proc =
+    entry->getHttpHeaderProcessor();
   if(socketRecvBuffer_->bufferEmpty()) {
     if(socketRecvBuffer_->recv() == 0 &&
        !socket_->wantRead() && !socket_->wantWrite()) {
       throw DL_RETRY_EX(EX_GOT_EOF);
     }
   }
-  proc->update(socketRecvBuffer_->getBuffer(),
-               socketRecvBuffer_->getBufferLength());
   SharedHandle<HttpResponse> httpResponse;
-  size_t shiftBufferLength;
-  if(proc->eoh()) {
-    SharedHandle<HttpHeader> httpHeader = proc->getHttpResponseHeader();
-    size_t putbackDataLength = proc->getPutBackDataLength();
+  if(proc->parse(socketRecvBuffer_->getBuffer(),
+                 socketRecvBuffer_->getBufferLength())) {
+    const SharedHandle<HttpHeader>& httpHeader = proc->getResult();
     A2_LOG_INFO(fmt(MSG_RECEIVE_RESPONSE,
                     cuid_,
                     proc->getHeaderString().c_str()));
-    assert(socketRecvBuffer_->getBufferLength() >= putbackDataLength);
-    shiftBufferLength = socketRecvBuffer_->getBufferLength()-putbackDataLength;
     httpResponse.reset(new HttpResponse());
     httpResponse->setCuid(cuid_);
     httpResponse->setHttpHeader(httpHeader);
     httpResponse->setHttpRequest(entry->getHttpRequest());
     outstandingHttpRequests_.pop_front();
-  } else {
-    shiftBufferLength = socketRecvBuffer_->getBufferLength();
   }
-  socketRecvBuffer_->shiftBuffer(shiftBufferLength);
+  socketRecvBuffer_->shiftBuffer(proc->getLastBytesProcessed());
   return httpResponse;
 }
 

+ 10 - 47
src/HttpHeader.cc

@@ -216,53 +216,6 @@ void HttpHeader::setRequestPath(const std::string& requestPath)
   requestPath_ = requestPath;
 }
 
-void HttpHeader::fill
-(std::string::const_iterator first,
- std::string::const_iterator last)
-{
-  std::string name;
-  std::string value;
-  while(first != last) {
-    std::string::const_iterator j = first;
-    while(j != last && *j != '\r' && *j != '\n') {
-      ++j;
-    }
-    if(first != j) {
-      std::string::const_iterator sep = std::find(first, j, ':');
-      if(sep == j) {
-        // multiline header?
-        if(*first == ' ' || *first == '\t') {
-          std::pair<std::string::const_iterator,
-                    std::string::const_iterator> p = util::stripIter(first, j);
-          if(!name.empty() && p.first != p.second) {
-            if(!value.empty()) {
-              value += " ";
-            }
-            value.append(p.first, p.second);
-          }
-        }
-      } else {
-        if(!name.empty()) {
-          put(name, value);
-        }
-        std::pair<std::string::const_iterator,
-                  std::string::const_iterator> p = util::stripIter(first, sep);
-        name.assign(p.first, p.second);
-        util::lowercase(name);
-        p = util::stripIter(sep+1, j);
-        value.assign(p.first, p.second);
-      }
-    }
-    while(j != last && (*j == '\r' || *j == '\n')) {
-      ++j;
-    }
-    first = j;
-  }
-  if(!name.empty()) {
-    put(name, value);
-  }
-}
-
 void HttpHeader::clearField()
 {
   table_.clear();
@@ -293,6 +246,16 @@ const std::string& HttpHeader::getRequestPath() const
   return requestPath_;
 }
 
+const std::string& HttpHeader::getReasonPhrase() const
+{
+  return reasonPhrase_;
+}
+
+void HttpHeader::setReasonPhrase(const std::string& reasonPhrase)
+{
+  reasonPhrase_ = reasonPhrase;
+}
+
 bool HttpHeader::fieldContains(const std::string& name,
                                const std::string& value)
 {

+ 7 - 6
src/HttpHeader.h

@@ -54,6 +54,9 @@ private:
   // HTTP status code, e.g. 200
   int statusCode_;
 
+  // The reason-phrase for the response
+  std::string reasonPhrase_;
+
   // HTTP version, e.g. HTTP/1.1
   std::string version_;
 
@@ -83,6 +86,10 @@ public:
 
   void setStatusCode(int code);
 
+  const std::string& getReasonPhrase() const;
+
+  void setReasonPhrase(const std::string& reasonPhrase);
+
   const std::string& getVersion() const;
 
   void setVersion(const std::string& version);
@@ -113,12 +120,6 @@ public:
     requestPath_.assign(first, last);
   }
 
-  // Parses header fields in [first, last). Field name is stored in
-  // lowercase.
-  void fill
-  (std::string::const_iterator first,
-   std::string::const_iterator last);
-
   // Clears table_. responseStatus_ and version_ are unchanged.
   void clearField();
 

+ 295 - 93
src/HttpHeaderProcessor.cc

@@ -2,7 +2,7 @@
 /*
  * aria2 - The high speed download utility
  *
- * Copyright (C) 2006 Tatsuhiro Tsujikawa
+ * Copyright (C) 2012 Tatsuhiro Tsujikawa
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -46,128 +46,330 @@
 
 namespace aria2 {
 
-HttpHeaderProcessor::HttpHeaderProcessor():
-  limit_(21/*lines*/*8190/*per line*/) {}
-// The above values come from Apache's documentation
-// http://httpd.apache.org/docs/2.2/en/mod/core.html: See
-// LimitRequestFieldSize and LimitRequestLine directive.  Also the
-// page states that the number of request fields rarely exceeds 20.
-// aria2 uses this class in both client and server side.
+namespace {
+enum {
+  // Server mode
+  PREV_METHOD,
+  METHOD,
+  PREV_PATH,
+  PATH,
+  PREV_REQ_VERSION,
+  REQ_VERSION,
+  // Client mode,
+  PREV_RES_VERSION,
+  RES_VERSION,
+  PREV_STATUS_CODE,
+  STATUS_CODE,
+  PREV_REASON_PHRASE,
+  REASON_PHRASE,
+  // name/value header fields
+  PREV_EOL,
+  PREV_FIELD_NAME,
+  FIELD_NAME,
+  PREV_FIELD_VALUE,
+  FIELD_VALUE,
+  // End of header
+  PREV_EOH,
+  HEADERS_COMPLETE
+};
+} // namespace
+
+HttpHeaderProcessor::HttpHeaderProcessor(ParserMode mode)
+  : mode_(mode),
+    state_(mode == CLIENT_PARSER ? PREV_RES_VERSION : PREV_METHOD),
+    lastBytesProcessed_(0),
+    result_(new HttpHeader())
+{}
 
 HttpHeaderProcessor::~HttpHeaderProcessor() {}
 
-void HttpHeaderProcessor::update(const unsigned char* data, size_t length)
+namespace {
+size_t getToken(std::string& buf,
+                const unsigned char* data, size_t length, size_t off)
 {
-  checkHeaderLimit(length);
-  buf_.append(&data[0], &data[length]);
+  size_t j;
+  for(j = off; j < length && !util::isLws(data[j]) && !util::isCRLF(data[j]);
+      ++j);
+  buf.append(&data[off], &data[j]);
+  return j-1;
 }
+} // namespace
 
-void HttpHeaderProcessor::update(const std::string& data)
+namespace {
+size_t getFieldNameToken(std::string& buf,
+                         const unsigned char* data, size_t length, size_t off)
 {
-  checkHeaderLimit(data.size());
-  buf_ += data;
+  size_t j;
+  for(j = off; j < length && data[j] != ':' &&
+        !util::isLws(data[j]) && !util::isCRLF(data[j]); ++j);
+  buf.append(&data[off], &data[j]);
+  return j-1;
 }
+} // namespace
 
-void HttpHeaderProcessor::checkHeaderLimit(size_t incomingLength)
+namespace {
+size_t getText(std::string& buf,
+               const unsigned char* data, size_t length, size_t off)
 {
-  if(buf_.size()+incomingLength > limit_) {
-    throw DL_ABORT_EX2("Too large http header",
-                       error_code::HTTP_PROTOCOL_ERROR);
-  }
+  size_t j;
+  for(j = off; j < length && !util::isCRLF(data[j]); ++j);
+  buf.append(&data[off], &data[j]);
+  return j-1;
 }
+} // namespace
 
-bool HttpHeaderProcessor::eoh() const
+bool HttpHeaderProcessor::parse(const unsigned char* data, size_t length)
 {
-  if(buf_.find("\r\n\r\n") == std::string::npos &&
-     buf_.find("\n\n") == std::string::npos) {
-    return false;
-  } else {
-    return true;
+  size_t i;
+  lastBytesProcessed_ = 0;
+  for(i = 0; i < length; ++i) {
+    unsigned char c = data[i];
+    switch(state_) {
+    case PREV_METHOD:
+      if(util::isLws(c) || util::isCRLF(c)) {
+        throw DL_ABORT_EX("Bad Request-Line: missing method");
+      } else {
+        i = getToken(buf_, data, length, i);
+        state_ = METHOD;
+      }
+      break;
+    case METHOD:
+      if(util::isLws(c)) {
+        result_->setMethod(buf_);
+        buf_.clear();
+        state_ = PREV_PATH;
+      } else if(util::isCRLF(c)) {
+        throw DL_ABORT_EX("Bad Request-Line: missing request-target");
+      } else {
+        i = getToken(buf_, data, length, i);
+      }
+      break;
+    case PREV_PATH:
+      if(util::isCRLF(c)) {
+        throw DL_ABORT_EX("Bad Request-Line: missing request-target");
+      } else if(!util::isLws(c)) {
+        i = getToken(buf_, data, length, i);
+        state_ = PATH;
+      }
+      break;
+    case PATH:
+      if(util::isLws(c)) {
+        result_->setRequestPath(buf_);
+        buf_.clear();
+        state_ = PREV_REQ_VERSION;
+      } else if(util::isCRLF(c)) {
+        throw DL_ABORT_EX("Bad Request-Line: missing HTTP-version");
+      } else {
+        i = getToken(buf_, data, length, i);
+      }
+      break;
+    case PREV_REQ_VERSION:
+      if(util::isCRLF(c)) {
+        throw DL_ABORT_EX("Bad Request-Line: missing HTTP-version");
+      } else if(!util::isLws(c)) {
+        i = getToken(buf_, data, length, i);
+        state_ = REQ_VERSION;
+      }
+      break;
+    case REQ_VERSION:
+      if(util::isCRLF(c)) {
+        result_->setVersion(buf_);
+        buf_.clear();
+        if(c == '\n') {
+          state_ = PREV_FIELD_NAME;
+        } else {
+          state_ = PREV_EOL;
+        }
+      } else if(util::isLws(c)) {
+        throw DL_ABORT_EX("Bad Request-Line: LWS after HTTP-version");
+      } else {
+        i = getToken(buf_, data, length, i);
+      }
+      break;
+    case PREV_RES_VERSION:
+      if(util::isLws(c) || util::isCRLF(c)) {
+        throw DL_ABORT_EX("Bad Status-Line: missing HTTP-version");
+      } else {
+        i = getToken(buf_, data, length, i);
+        state_ = RES_VERSION;
+      }
+      break;
+    case RES_VERSION:
+      if(util::isLws(c)) {
+        result_->setVersion(buf_);
+        buf_.clear();
+        state_ = PREV_STATUS_CODE;
+      } else if(util::isCRLF(c)) {
+        throw DL_ABORT_EX("Bad Status-Line: missing status-code");
+      }
+      break;
+    case PREV_STATUS_CODE:
+      if(util::isCRLF(c)) {
+        throw DL_ABORT_EX("Bad Status-Line: missing status-code");
+      } else if(!util::isLws(c)) {
+        state_ = STATUS_CODE;
+        i = getToken(buf_, data, length, i);
+      }
+      break;
+    case STATUS_CODE:
+      if(util::isLws(c) || util::isCRLF(c)) {
+        int statusCode = -1;
+        if(buf_.size() == 3 && util::isNumber(buf_.begin(), buf_.end())) {
+          statusCode = (buf_[0]-'0')*100 + (buf_[1]-'0')*10 + (buf_[2]-'0');
+        }
+        if(statusCode >= 100) {
+          result_->setStatusCode(statusCode);
+          buf_.clear();
+        } else {
+          throw DL_ABORT_EX("Bad status code: bad status-code");
+        }
+        if(c == '\r') {
+          state_ = PREV_EOL;
+        } else if(c == '\n') {
+          state_ = PREV_FIELD_NAME;
+        } else {
+          state_ = PREV_REASON_PHRASE;
+        }
+      } else {
+        i = getToken(buf_, data, length, i);
+      }
+      break;
+    case PREV_REASON_PHRASE:
+      if(util::isCRLF(c)) {
+        // The reason-phrase is completely optional.
+        if(c == '\n') {
+          state_ = PREV_FIELD_NAME;
+        } else {
+          state_ = PREV_EOL;
+        }
+      } else if(!util::isLws(c)) {
+        state_ = REASON_PHRASE;
+        i = getText(buf_, data, length, i);
+      }
+      break;
+    case REASON_PHRASE:
+      if(util::isCRLF(c)) {
+        result_->setReasonPhrase(buf_);
+        buf_.clear();
+        if(c == '\n') {
+          state_ = PREV_FIELD_NAME;
+        } else {
+          state_ = PREV_EOL;
+        }
+      } else {
+        i = getText(buf_, data, length, i);
+      }
+      break;
+    case PREV_EOL:
+      if(c == '\n') {
+        state_ = PREV_FIELD_NAME;
+      } else {
+        throw DL_ABORT_EX("Bad HTTP header: missing LF");
+      }
+      break;
+    case PREV_FIELD_NAME:
+      if(util::isLws(c)) {
+        // Evil Multi-line header field
+        state_ = FIELD_VALUE;
+      } else {
+        if(!lastFieldName_.empty()) {
+          util::lowercase(lastFieldName_);
+          result_->put(lastFieldName_, util::strip(buf_));
+          lastFieldName_.clear();
+          buf_.clear();
+        }
+        if(c == '\n') {
+          state_ = HEADERS_COMPLETE;
+        } else if(c == '\r') {
+          state_ = PREV_EOH;
+        } else {
+          state_ = FIELD_NAME;
+          i = getFieldNameToken(lastFieldName_, data, length, i);
+        }
+      }
+      break;
+    case FIELD_NAME:
+      if(util::isLws(c) || util::isCRLF(c)) {
+        throw DL_ABORT_EX("Bad HTTP header: missing ':'");
+      } else if(c == ':') {
+        state_ = PREV_FIELD_VALUE;
+      } else {
+        i = getFieldNameToken(lastFieldName_, data, length, i);
+      }
+      break;
+    case PREV_FIELD_VALUE:
+      if(c == '\r') {
+        state_ = PREV_EOL;
+      } else if(c == '\n') {
+        state_ = PREV_FIELD_NAME;
+      } else if(!util::isLws(c)) {
+        state_ = FIELD_VALUE;
+        i = getText(buf_, data, length, i);
+      }
+      break;
+    case FIELD_VALUE:
+      if(c == '\r') {
+        state_ = PREV_EOL;
+      } else if(c == '\n') {
+        state_ = PREV_FIELD_NAME;
+      } else {
+        i = getText(buf_, data, length, i);
+      }
+      break;
+    case PREV_EOH:
+      if(c == '\n') {
+        state_ = HEADERS_COMPLETE;
+      } else {
+        throw DL_ABORT_EX("Bad HTTP header: "
+                          "missing LF at the end of the header");
+      }
+      break;
+    case HEADERS_COMPLETE:
+      goto fin;
+    }
   }
+ fin:
+  // See Apache's documentation
+  // http://httpd.apache.org/docs/2.2/en/mod/core.html about size
+  // limit of HTTP headers. The page states that the number of request
+  // fields rarely exceeds 20.
+  if(lastFieldName_.size() > 1024 || buf_.size() > 8192) {
+    throw DL_ABORT_EX("Too large HTTP header");
+  }
+  lastBytesProcessed_ = i;
+  headers_.append(&data[0], &data[i]);
+  return state_ == HEADERS_COMPLETE;
 }
 
-size_t HttpHeaderProcessor::getPutBackDataLength() const
+bool HttpHeaderProcessor::parse(const std::string& data)
 {
-  std::string::size_type delimpos = std::string::npos;
-  if((delimpos = buf_.find("\r\n\r\n")) != std::string::npos) {
-    return buf_.size()-(delimpos+4);
-  } else if((delimpos = buf_.find("\n\n")) != std::string::npos) {
-    return buf_.size()-(delimpos+2);
-  } else {
-    return 0;
-  }
+  return parse(reinterpret_cast<const unsigned char*>(data.c_str()),
+               data.size());
 }
 
-void HttpHeaderProcessor::clear()
+size_t HttpHeaderProcessor::getLastBytesProcessed() const
 {
-  buf_.erase();
+  return lastBytesProcessed_;
 }
 
-SharedHandle<HttpHeader> HttpHeaderProcessor::getHttpResponseHeader()
+void HttpHeaderProcessor::clear()
 {
-  std::string::size_type delimpos = std::string::npos;
-  if(((delimpos = buf_.find("\r\n")) == std::string::npos &&
-      (delimpos = buf_.find("\n")) == std::string::npos) ||
-     delimpos < 12) {
-    throw DL_RETRY_EX(EX_NO_STATUS_HEADER);
-  }
-  int32_t statusCode;
-  if(!util::parseIntNoThrow(statusCode,
-                            std::string(buf_.begin()+9, buf_.begin()+12))) {
-    throw DL_RETRY_EX("Status code could not be parsed as integer.");
-  }
-  HttpHeaderHandle httpHeader(new HttpHeader());
-  httpHeader->setVersion(buf_.begin(), buf_.begin()+8);
-  httpHeader->setStatusCode(statusCode);
-  // TODO 1st line(HTTP/1.1 200...) is also send to HttpHeader, but it should
-  // not.
-  if((delimpos = buf_.find("\r\n\r\n")) == std::string::npos &&
-     (delimpos = buf_.find("\n\n")) == std::string::npos) {
-    delimpos = buf_.size();
-  }
-  httpHeader->fill(buf_.begin(), buf_.begin()+delimpos);
-  return httpHeader;
+  state_ = (mode_ == CLIENT_PARSER ? PREV_RES_VERSION : PREV_METHOD);
+  lastBytesProcessed_ = 0;
+  buf_.clear();
+  lastFieldName_.clear();
+  result_.reset(new HttpHeader());
+  headers_.clear();
 }
 
-SharedHandle<HttpHeader> HttpHeaderProcessor::getHttpRequestHeader()
+const SharedHandle<HttpHeader>& HttpHeaderProcessor::getResult() const
 {
-  // The minimum case of the first line is:
-  // GET / HTTP/1.x
-  // At least 14bytes before \r\n or \n.
-  std::string::size_type delimpos = std::string::npos;
-  if(((delimpos = buf_.find("\r\n")) == std::string::npos &&
-      (delimpos = buf_.find("\n")) == std::string::npos) ||
-     delimpos < 14) {
-    throw DL_RETRY_EX(EX_NO_STATUS_HEADER);
-  }
-  std::vector<Scip> firstLine;
-  util::splitIter(buf_.begin(), buf_.begin()+delimpos,
-                  std::back_inserter(firstLine), ' ', true);
-  if(firstLine.size() != 3) {
-    throw DL_ABORT_EX2("Malformed HTTP request header.",
-                       error_code::HTTP_PROTOCOL_ERROR);
-  }
-  SharedHandle<HttpHeader> httpHeader(new HttpHeader());
-  httpHeader->setMethod(firstLine[0].first, firstLine[0].second);
-  httpHeader->setRequestPath(firstLine[1].first, firstLine[1].second);
-  httpHeader->setVersion(firstLine[2].first, firstLine[2].second);
-  if((delimpos = buf_.find("\r\n\r\n")) == std::string::npos &&
-     (delimpos = buf_.find("\n\n")) == std::string::npos) {
-    delimpos = buf_.size();
-  }
-  httpHeader->fill(buf_.begin(), buf_.begin()+delimpos);
-  return httpHeader;
+  return result_;
 }
 
 std::string HttpHeaderProcessor::getHeaderString() const
 {
-  std::string::size_type delimpos = std::string::npos;
-  if((delimpos = buf_.find("\r\n\r\n")) == std::string::npos &&
-     (delimpos = buf_.find("\n\n")) == std::string::npos) {
-    return buf_;
-  } else {
-    return buf_.substr(0, delimpos);
-  }
+  return headers_;
 }
 
 } // namespace aria2

+ 27 - 27
src/HttpHeaderProcessor.h

@@ -2,7 +2,7 @@
 /*
  * aria2 - The high speed download utility
  *
- * Copyright (C) 2006 Tatsuhiro Tsujikawa
+ * Copyright (C) 2012 Tatsuhiro Tsujikawa
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -47,51 +47,51 @@ namespace aria2 {
 class HttpHeader;
 
 class HttpHeaderProcessor {
-private:
-  std::string buf_;
-  size_t limit_;
-
-  void checkHeaderLimit(size_t incomingLength);
-
 public:
-  HttpHeaderProcessor();
+  enum ParserMode {
+    CLIENT_PARSER,
+    SERVER_PARSER
+  };
 
-  ~HttpHeaderProcessor();
-
-  void update(const unsigned char* data, size_t length);
-
-  void update(const std::string& data);
+  HttpHeaderProcessor(ParserMode mode);
 
+  ~HttpHeaderProcessor();
   /**
-   * Returns true if end of header is reached.
+   * Parses incoming data. Returns true if end of header is reached.
+   * This function stops processing data when end of header is
+   * reached.
    */
-  bool eoh() const;
+  bool parse(const unsigned char* data, size_t length);
+  bool parse(const std::string& data);
 
   /**
-   * Retruns the number of bytes beyond the end of header.
+   * Retruns the number of bytes processed in the last invocation of
+   * parse().
    */
-  size_t getPutBackDataLength() const;
+  size_t getLastBytesProcessed() const;
 
   /**
    * Processes the received header as a http response header and returns
    * HttpHeader object.
    */
-  SharedHandle<HttpHeader> getHttpResponseHeader();
-
-  SharedHandle<HttpHeader> getHttpRequestHeader();
+  const SharedHandle<HttpHeader>& getResult() const;
 
   std::string getHeaderString() const;
 
+  /**
+   * Resets internal status and ready for next header processing.
+   */
   void clear();
-
-  void setHeaderLimit(size_t limit)
-  {
-    limit_ = limit;
-  }
+private:
+  ParserMode mode_;
+  int state_;
+  size_t lastBytesProcessed_;
+  std::string buf_;
+  std::string lastFieldName_;
+  SharedHandle<HttpHeader> result_;
+  std::string headers_;
 };
 
-typedef SharedHandle<HttpHeaderProcessor> HttpHeaderProcessorHandle;
-
 } // namespace aria2
 
 #endif // D_HTTP_HEADER_PROCESSOR_H

+ 9 - 11
src/HttpServer.cc

@@ -60,7 +60,8 @@ HttpServer::HttpServer
    socketRecvBuffer_(new SocketRecvBuffer(socket_)),
    socketBuffer_(socket),
    e_(e),
-   headerProcessor_(new HttpHeaderProcessor()),
+   headerProcessor_(new HttpHeaderProcessor
+                    (HttpHeaderProcessor::SERVER_PARSER)),
    keepAlive_(true),
    gzip_(false),
    acceptsPersistentConnection_(true),
@@ -129,15 +130,13 @@ SharedHandle<HttpHeader> HttpServer::receiveRequest()
       throw DL_ABORT_EX(EX_EOF_FROM_PEER);
     }
   }
-  headerProcessor_->update(socketRecvBuffer_->getBuffer(),
-                           socketRecvBuffer_->getBufferLength());
-  if(headerProcessor_->eoh()) {
-    SharedHandle<HttpHeader> header = headerProcessor_->getHttpRequestHeader();
-    size_t putbackDataLength = headerProcessor_->getPutBackDataLength();
+  SharedHandle<HttpHeader> header;
+  if(headerProcessor_->parse(socketRecvBuffer_->getBuffer(),
+                             socketRecvBuffer_->getBufferLength())) {
+    header = headerProcessor_->getResult();
     A2_LOG_INFO(fmt("HTTP Server received request\n%s",
                     headerProcessor_->getHeaderString().c_str()));
-    socketRecvBuffer_->shiftBuffer
-      (socketRecvBuffer_->getBufferLength()-putbackDataLength);
+    socketRecvBuffer_->shiftBuffer(headerProcessor_->getLastBytesProcessed());
     lastRequestHeader_ = header;
     lastBody_.clear();
     lastBody_.str("");
@@ -175,11 +174,10 @@ SharedHandle<HttpHeader> HttpServer::receiveRequest()
         break;
       }
     }
-    return header;
   } else {
-    socketRecvBuffer_->clearBuffer();
-    return SharedHandle<HttpHeader>();
+    socketRecvBuffer_->shiftBuffer(headerProcessor_->getLastBytesProcessed());
   }
+  return header;
 }
 
 bool HttpServer::receiveBody()

+ 3 - 4
src/LpdMessageReceiver.cc

@@ -87,13 +87,12 @@ SharedHandle<LpdMessage> LpdMessageReceiver::receiveMessage()
     if(length == 0) {
       return msg;
     }
-    HttpHeaderProcessor proc;
-    proc.update(buf, length);
-    if(!proc.eoh()) {
+    HttpHeaderProcessor proc(HttpHeaderProcessor::SERVER_PARSER);
+    if(!proc.parse(buf, length)) {
       msg.reset(new LpdMessage());
       return msg;
     }
-    SharedHandle<HttpHeader> header = proc.getHttpRequestHeader();
+    const SharedHandle<HttpHeader>& header = proc.getResult();
     static const std::string A2_INFOHASH = "infohash";
     static const std::string A2_PORT = "port";
     const std::string& infoHashString = header->find(A2_INFOHASH);

+ 9 - 0
src/util.cc

@@ -313,6 +313,15 @@ bool inRFC2616HttpToken(const char c)
     std::find(vbegin(chars), vend(chars), c) != vend(chars);
 }
 
+bool isLws(const char c)
+{
+  return c == ' ' || c == '\t';
+}
+bool isCRLF(const char c)
+{
+  return c == '\r' || c == '\n';
+}
+
 namespace {
 bool isUtf8Tail(unsigned char ch)
 {

+ 4 - 0
src/util.h

@@ -370,6 +370,10 @@ bool isHexDigit(const char c);
 
 bool isHexDigit(const std::string& s);
 
+bool isLws(const char c);
+
+bool isCRLF(const char c);
+
 template<typename InputIterator>
 bool isLowercase(InputIterator first, InputIterator last)
 {

+ 103 - 92
test/HttpHeaderProcessorTest.cc

@@ -13,12 +13,12 @@ namespace aria2 {
 class HttpHeaderProcessorTest:public CppUnit::TestFixture {
 
   CPPUNIT_TEST_SUITE(HttpHeaderProcessorTest);
-  CPPUNIT_TEST(testUpdate1);
-  CPPUNIT_TEST(testUpdate2);
-  CPPUNIT_TEST(testGetPutBackDataLength);
-  CPPUNIT_TEST(testGetPutBackDataLength_nullChar);
+  CPPUNIT_TEST(testParse1);
+  CPPUNIT_TEST(testParse2);
+  CPPUNIT_TEST(testParse3);
+  CPPUNIT_TEST(testGetLastBytesProcessed);
+  CPPUNIT_TEST(testGetLastBytesProcessed_nullChar);
   CPPUNIT_TEST(testGetHttpResponseHeader);
-  CPPUNIT_TEST(testGetHttpResponseHeader_empty);
   CPPUNIT_TEST(testGetHttpResponseHeader_statusOnly);
   CPPUNIT_TEST(testGetHttpResponseHeader_insufficientStatusLength);
   CPPUNIT_TEST(testBeyondLimit);
@@ -27,12 +27,12 @@ class HttpHeaderProcessorTest:public CppUnit::TestFixture {
   CPPUNIT_TEST_SUITE_END();
   
 public:
-  void testUpdate1();
-  void testUpdate2();
-  void testGetPutBackDataLength();
-  void testGetPutBackDataLength_nullChar();
+  void testParse1();
+  void testParse2();
+  void testParse3();
+  void testGetLastBytesProcessed();
+  void testGetLastBytesProcessed_nullChar();
   void testGetHttpResponseHeader();
-  void testGetHttpResponseHeader_empty();
   void testGetHttpResponseHeader_statusOnly();
   void testGetHttpResponseHeader_insufficientStatusLength();
   void testBeyondLimit();
@@ -43,60 +43,84 @@ public:
 
 CPPUNIT_TEST_SUITE_REGISTRATION( HttpHeaderProcessorTest );
 
-void HttpHeaderProcessorTest::testUpdate1()
+void HttpHeaderProcessorTest::testParse1()
 {
-  HttpHeaderProcessor proc;
+  HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER);
   std::string hd1 = "HTTP/1.1 200 OK\r\n";
-  proc.update(hd1);
-  CPPUNIT_ASSERT(!proc.eoh());
-  proc.update("\r\n");
-  CPPUNIT_ASSERT(proc.eoh());
+  CPPUNIT_ASSERT(!proc.parse(hd1));
+  CPPUNIT_ASSERT(proc.parse("\r\n"));
 }
 
-void HttpHeaderProcessorTest::testUpdate2()
+void HttpHeaderProcessorTest::testParse2()
 {
-  HttpHeaderProcessor proc;
+  HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER);
   std::string hd1 = "HTTP/1.1 200 OK\n";
-  proc.update(hd1);
-  CPPUNIT_ASSERT(!proc.eoh());
-  proc.update("\n");
-  CPPUNIT_ASSERT(proc.eoh());
+  CPPUNIT_ASSERT(!proc.parse(hd1));
+  CPPUNIT_ASSERT(proc.parse("\n"));
 }
 
-void HttpHeaderProcessorTest::testGetPutBackDataLength()
+void HttpHeaderProcessorTest::testParse3()
 {
-  HttpHeaderProcessor proc;
-  std::string hd1 = "HTTP/1.1 200 OK\r\n"
+  HttpHeaderProcessor proc(HttpHeaderProcessor::SERVER_PARSER);
+  std::string s =
+    "GET / HTTP/1.1\r\n"
+    "Host: aria2.sourceforge.net\r\n"
+    "Connection: close \r\n" // trailing white space (BWS)
+    "Multi-Line: text1\r\n" // Multi-line header
+    "  text2\r\n"
+    "  text3\r\n"
+    "Duplicate: foo\r\n"
+    "Duplicate: bar\r\n"
+    "\r\n";
+  CPPUNIT_ASSERT(proc.parse(s));
+  SharedHandle<HttpHeader> h = proc.getResult();
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2.sourceforge.net"),
+                       h->find("host"));
+  CPPUNIT_ASSERT_EQUAL(std::string("close"),
+                       h->find("connection"));
+  CPPUNIT_ASSERT_EQUAL(std::string("text1 text2 text3"),
+                       h->find("multi-line"));
+  CPPUNIT_ASSERT_EQUAL(std::string("foo"),
+                       h->findAll("duplicate")[0]);
+  CPPUNIT_ASSERT_EQUAL(std::string("bar"),
+                       h->findAll("duplicate")[1]);
+}
+
+void HttpHeaderProcessorTest::testGetLastBytesProcessed()
+{
+  HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER);
+  std::string hd1 =
+    "HTTP/1.1 200 OK\r\n"
     "\r\nputbackme";
-  proc.update(hd1);
-  CPPUNIT_ASSERT(proc.eoh());
-  CPPUNIT_ASSERT_EQUAL((size_t)9, proc.getPutBackDataLength());
+  CPPUNIT_ASSERT(proc.parse(hd1));
+  CPPUNIT_ASSERT_EQUAL((size_t)19, proc.getLastBytesProcessed());
 
   proc.clear();
 
-  std::string hd2 = "HTTP/1.1 200 OK\n"
+  std::string hd2 =
+    "HTTP/1.1 200 OK\n"
     "\nputbackme";
-  proc.update(hd2);
-  CPPUNIT_ASSERT(proc.eoh());
-  CPPUNIT_ASSERT_EQUAL((size_t)9, proc.getPutBackDataLength());
+  CPPUNIT_ASSERT(proc.parse(hd2));
+  CPPUNIT_ASSERT_EQUAL((size_t)17, proc.getLastBytesProcessed());
 }
 
-void HttpHeaderProcessorTest::testGetPutBackDataLength_nullChar()
+void HttpHeaderProcessorTest::testGetLastBytesProcessed_nullChar()
 {
-  HttpHeaderProcessor proc;
-  const char* x = "HTTP/1.1 200 OK\r\n"
+  HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER);
+  const char x[] =
+    "HTTP/1.1 200 OK\r\n"
     "foo: foo\0bar\r\n"
     "\r\nputbackme";
-  std::string hd1(&x[0], &x[42]);
-  proc.update(hd1);
-  CPPUNIT_ASSERT(proc.eoh());
-  CPPUNIT_ASSERT_EQUAL((size_t)9, proc.getPutBackDataLength());
+  std::string hd1(&x[0], &x[sizeof(x)-1]);
+  CPPUNIT_ASSERT(proc.parse(hd1));
+  CPPUNIT_ASSERT_EQUAL((size_t)33, proc.getLastBytesProcessed());
 }
 
 void HttpHeaderProcessorTest::testGetHttpResponseHeader()
 {
-  HttpHeaderProcessor proc;
-  std::string hd = "HTTP/1.1 200 OK\r\n"
+  HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER);
+  std::string hd =
+    "HTTP/1.1 404 Not Found\r\n"
     "Date: Mon, 25 Jun 2007 16:04:59 GMT\r\n"
     "Server: Apache/2.2.3 (Debian)\r\n"
     "Last-Modified: Tue, 12 Jun 2007 14:28:43 GMT\r\n"
@@ -108,10 +132,11 @@ void HttpHeaderProcessorTest::testGetHttpResponseHeader()
     "\r\n"
     "Entity: body";
 
-  proc.update(hd);
+  CPPUNIT_ASSERT(proc.parse(hd));
 
-  SharedHandle<HttpHeader> header = proc.getHttpResponseHeader();
-  CPPUNIT_ASSERT_EQUAL(200, header->getStatusCode());
+  SharedHandle<HttpHeader> header = proc.getResult();
+  CPPUNIT_ASSERT_EQUAL(404, header->getStatusCode());
+  CPPUNIT_ASSERT_EQUAL(std::string("Not Found"), header->getReasonPhrase());
   CPPUNIT_ASSERT_EQUAL(std::string("HTTP/1.1"), header->getVersion());
   CPPUNIT_ASSERT_EQUAL(std::string("Mon, 25 Jun 2007 16:04:59 GMT"),
                        header->find("date"));
@@ -124,66 +149,50 @@ void HttpHeaderProcessorTest::testGetHttpResponseHeader()
   CPPUNIT_ASSERT(!header->defined("entity"));
 }
 
-void HttpHeaderProcessorTest::testGetHttpResponseHeader_empty()
-{
-  HttpHeaderProcessor proc;
-
-  try {
-    proc.getHttpResponseHeader();
-    CPPUNIT_FAIL("Exception must be thrown.");
-  } catch(DlRetryEx& ex) {
-    std::cout << ex.stackTrace() << std::endl;
-  }
-  
-}
-
 void HttpHeaderProcessorTest::testGetHttpResponseHeader_statusOnly()
 {
-  HttpHeaderProcessor proc;
+  HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER);
 
   std::string hd = "HTTP/1.1 200\r\n\r\n";
-  proc.update(hd);
-  SharedHandle<HttpHeader> header = proc.getHttpResponseHeader();
+  CPPUNIT_ASSERT(proc.parse(hd));
+  SharedHandle<HttpHeader> header = proc.getResult();
   CPPUNIT_ASSERT_EQUAL(200, header->getStatusCode());
 }
 
 void HttpHeaderProcessorTest::testGetHttpResponseHeader_insufficientStatusLength()
 {
-  HttpHeaderProcessor proc;
+  HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER);
 
   std::string hd = "HTTP/1.1 20\r\n\r\n";
-  proc.update(hd);  
   try {
-    proc.getHttpResponseHeader();
+    proc.parse(hd);
     CPPUNIT_FAIL("Exception must be thrown.");
-  } catch(DlRetryEx& ex) {
-    std::cout << ex.stackTrace() << std::endl;
+  } catch(DlAbortEx& ex) {
+    // Success
   }
-  
 }
 
 void HttpHeaderProcessorTest::testBeyondLimit()
 {
-  HttpHeaderProcessor proc;
-  proc.setHeaderLimit(20);
+  HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER);
 
   std::string hd1 = "HTTP/1.1 200 OK\r\n";
-  std::string hd2 = "Date: Mon, 25 Jun 2007 16:04:59 GMT\r\n";
+  std::string hd2 = std::string(1025, 'A');
 
-  proc.update(hd1);
-  
+  proc.parse(hd1);
   try {
-    proc.update(hd2);
+    proc.parse(hd2);
     CPPUNIT_FAIL("Exception must be thrown.");
   } catch(DlAbortEx& ex) {
-    std::cout << ex.stackTrace() << std::endl;
+    // Success
   }
 }
 
 void HttpHeaderProcessorTest::testGetHeaderString()
 {
-  HttpHeaderProcessor proc;
-  std::string hd = "HTTP/1.1 200 OK\r\n"
+  HttpHeaderProcessor proc(HttpHeaderProcessor::CLIENT_PARSER);
+  std::string hd =
+    "HTTP/1.1 200 OK\r\n"
     "Date: Mon, 25 Jun 2007 16:04:59 GMT\r\n"
     "Server: Apache/2.2.3 (Debian)\r\n"
     "Last-Modified: Tue, 12 Jun 2007 14:28:43 GMT\r\n"
@@ -194,33 +203,35 @@ void HttpHeaderProcessorTest::testGetHeaderString()
     "Content-Type: text/html; charset=UTF-8\r\n"
     "\r\nputbackme";
 
-  proc.update(hd);
-
-  CPPUNIT_ASSERT_EQUAL(std::string("HTTP/1.1 200 OK\r\n"
-                                   "Date: Mon, 25 Jun 2007 16:04:59 GMT\r\n"
-                                   "Server: Apache/2.2.3 (Debian)\r\n"
-                                   "Last-Modified: Tue, 12 Jun 2007 14:28:43 GMT\r\n"
-                                   "ETag: \"594065-23e3-50825cc0\"\r\n"
-                                   "Accept-Ranges: bytes\r\n"
-                                   "Content-Length: 9187\r\n"
-                                   "Connection: close\r\n"
-                                   "Content-Type: text/html; charset=UTF-8"),
-                       proc.getHeaderString());
+  CPPUNIT_ASSERT(proc.parse(hd));
+
+  CPPUNIT_ASSERT_EQUAL
+    (std::string("HTTP/1.1 200 OK\r\n"
+                 "Date: Mon, 25 Jun 2007 16:04:59 GMT\r\n"
+                 "Server: Apache/2.2.3 (Debian)\r\n"
+                 "Last-Modified: Tue, 12 Jun 2007 14:28:43 GMT\r\n"
+                 "ETag: \"594065-23e3-50825cc0\"\r\n"
+                 "Accept-Ranges: bytes\r\n"
+                 "Content-Length: 9187\r\n"
+                 "Connection: close\r\n"
+                 "Content-Type: text/html; charset=UTF-8\r\n"
+                 "\r\n"),
+     proc.getHeaderString());
 }
 
 void HttpHeaderProcessorTest::testGetHttpRequestHeader()
 {
-  HttpHeaderProcessor proc;
-  std::string request = "GET /index.html HTTP/1.1\r\n"
+  HttpHeaderProcessor proc(HttpHeaderProcessor::SERVER_PARSER);
+  std::string request =
+    "GET /index.html HTTP/1.1\r\n"
     "Host: host\r\n"
     "Connection: close\r\n"
     "\r\n"
     "Entity: body";
 
-  proc.update(request);
+  CPPUNIT_ASSERT(proc.parse(request));
 
-  SharedHandle<HttpHeader> httpHeader = proc.getHttpRequestHeader();
-  CPPUNIT_ASSERT(httpHeader);
+  SharedHandle<HttpHeader> httpHeader = proc.getResult();
   CPPUNIT_ASSERT_EQUAL(std::string("GET"), httpHeader->getMethod());
   CPPUNIT_ASSERT_EQUAL(std::string("/index.html"),httpHeader->getRequestPath());
   CPPUNIT_ASSERT_EQUAL(std::string("HTTP/1.1"), httpHeader->getVersion());

+ 0 - 27
test/HttpHeaderTest.cc

@@ -13,7 +13,6 @@ class HttpHeaderTest:public CppUnit::TestFixture {
   CPPUNIT_TEST(testGetRange);
   CPPUNIT_TEST(testFindAll);
   CPPUNIT_TEST(testClearField);
-  CPPUNIT_TEST(testFill);
   CPPUNIT_TEST(testFieldContains);
   CPPUNIT_TEST_SUITE_END();
   
@@ -21,11 +20,9 @@ public:
   void testGetRange();
   void testFindAll();
   void testClearField();
-  void testFill();
   void testFieldContains();
 };
 
-
 CPPUNIT_TEST_SUITE_REGISTRATION( HttpHeaderTest );
 
 void HttpHeaderTest::testGetRange()
@@ -153,30 +150,6 @@ void HttpHeaderTest::testClearField()
   CPPUNIT_ASSERT_EQUAL(std::string(HttpHeader::HTTP_1_1), h.getVersion());
 }
 
-void HttpHeaderTest::testFill()
-{
-  std::string s =
-    "Host: aria2.sourceforge.net\r\n"
-    "Connection: close \r\n" // trailing white space
-    "Multi-Line: text1\r\n"
-    "  text2\r\n"
-    "  text3\r\n"
-    "Duplicate: foo\r\n"
-    "Duplicate: bar\r\n";
-  HttpHeader h;
-  h.fill(s.begin(), s.end());
-  CPPUNIT_ASSERT_EQUAL(std::string("aria2.sourceforge.net"),
-                       h.find("host"));
-  CPPUNIT_ASSERT_EQUAL(std::string("close"),
-                       h.find("connection"));
-  CPPUNIT_ASSERT_EQUAL(std::string("text1 text2 text3"),
-                       h.find("multi-line"));
-  CPPUNIT_ASSERT_EQUAL(std::string("foo"),
-                       h.findAll("duplicate")[0]);
-  CPPUNIT_ASSERT_EQUAL(std::string("bar"),
-                       h.findAll("duplicate")[1]);
-}
-
 void HttpHeaderTest::testFieldContains()
 {
   HttpHeader h;