Explorar o código

2010-02-18 Tatsuhiro Tsujikawa <t-tujikawa@users.sourceforge.net>

	Support RFC2231 "Parameter Value Character Set and Language
	Information" in Content-Disposition header.
	* src/HttpResponse.cc
	* src/util.cc
	* src/util.h
	* test/UtilTest.cc
Tatsuhiro Tsujikawa %!s(int64=15) %!d(string=hai) anos
pai
achega
2da32876a2
Modificáronse 5 ficheiros con 164 adicións e 39 borrados
  1. 9 0
      ChangeLog
  2. 1 1
      src/HttpResponse.cc
  3. 93 29
      src/util.cc
  4. 7 4
      src/util.h
  5. 54 5
      test/UtilTest.cc

+ 9 - 0
ChangeLog

@@ -1,3 +1,12 @@
+2010-02-18  Tatsuhiro Tsujikawa  <t-tujikawa@users.sourceforge.net>
+
+	Support RFC2231 "Parameter Value Character Set and Language
+	Information" in Content-Disposition header.
+	* src/HttpResponse.cc
+	* src/util.cc
+	* src/util.h
+	* test/UtilTest.cc
+
 2010-02-16  Tatsuhiro Tsujikawa  <t-tujikawa@users.sourceforge.net>
 
 	Print CXXFLAGS

+ 1 - 1
src/HttpResponse.cc

@@ -110,7 +110,7 @@ std::string HttpResponse::determinFilename() const
   } else {
     logger->info(MSG_CONTENT_DISPOSITION_DETECTED,
                  cuid, contentDisposition.c_str());
-    return util::urldecode(contentDisposition);
+    return contentDisposition;
   }
 }
 

+ 93 - 29
src/util.cc

@@ -115,7 +115,7 @@ void split(std::pair<std::string, std::string>& hp, const std::string& src, char
   hp.second = A2STR::NIL;
   std::string::size_type p = src.find(delim);
   if(p == std::string::npos) {
-    hp.first = src;
+    hp.first = trim(src);
     hp.second = A2STR::NIL;
   } else {
     hp.first = trim(src.substr(0, p));
@@ -130,7 +130,7 @@ std::pair<std::string, std::string> split(const std::string& src, const std::str
   hp.second = A2STR::NIL;
   std::string::size_type p = src.find_first_of(delims);
   if(p == std::string::npos) {
-    hp.first = src;
+    hp.first = trim(src);
     hp.second = A2STR::NIL;
   } else {
     hp.first = trim(src.substr(0, p));
@@ -610,36 +610,100 @@ void parsePrioritizePieceRange
   result.insert(result.end(), indexes.begin(), indexes.end());
 }
 
-std::string getContentDispositionFilename(const std::string& header) {
-  static const std::string keyName = "filename=";
-  std::string::size_type attributesp = header.find(keyName);
-  if(attributesp == std::string::npos) {
-    return A2STR::NIL;
-  }
-  std::string::size_type filenamesp = attributesp+keyName.size();
-  std::string::size_type filenameep;
-  if(filenamesp == header.size()) {
-    return A2STR::NIL;
-  }
-  
-  if(header[filenamesp] == '\'' || header[filenamesp] == '"') {
-    char quoteChar = header[filenamesp];
-    filenameep = header.find(quoteChar, filenamesp+1);
-  } else {
-    filenameep = header.find(';', filenamesp);
+static std::string trimBasename(const std::string& src)
+{
+  static const std::string TRIMMED("\r\n\t '\"");
+  std::string fn = File(trim(src, TRIMMED)).getBasename();
+  if(fn == ".." || fn == A2STR::DOT_C) {
+    fn = A2STR::NIL;
   }
-  if(filenameep == std::string::npos) {
-    filenameep = header.size();
+  return fn;
+}
+
+std::string iso8859ToUtf8(const std::string& src)
+{
+  std::string dest;
+  for(std::string::const_iterator itr = src.begin(); itr != src.end(); ++itr) {
+    unsigned char c = *itr;
+    if(0xa0 <= c && c <= 0xff) {
+      if(c <= 0xbf) {
+        dest += 0xc2;
+      } else {
+        dest += 0xc3;
+      }
+      dest += c&(~0x40);
+    } else {
+      dest += c;
+    }
   }
-  static const std::string TRIMMED("\r\n '\"");
-  std::string fn =
-    File(trim(header.substr
-              (filenamesp, filenameep-filenamesp), TRIMMED)).getBasename();
-  if(fn == ".." || fn == A2STR::DOT_C) {
-    return A2STR::NIL;
-  } else {
-    return fn;
+  return dest;
+}
+
+std::string getContentDispositionFilename(const std::string& header)
+{
+  std::string filename;
+  std::vector<std::string> params;
+  split(header, std::back_inserter(params), A2STR::SEMICOLON_C, true);
+  for(std::vector<std::string>::iterator i = params.begin();
+      i != params.end(); ++i) {
+    std::string& param = *i;
+    static const std::string keyName = "filename";
+    if(!startsWith(param, keyName)) {
+      continue;
+    }
+    std::string::iterator markeritr = param.begin()+keyName.size();
+    for(; markeritr != param.end() && *markeritr == ' '; ++markeritr);
+    if(markeritr == param.end()) {
+      continue;
+    }
+    if(*markeritr == '=') {
+      std::pair<std::string, std::string> paramPair;
+      split(paramPair, param, '=');
+      std::string value = paramPair.second;
+      if(value.empty()) {
+        continue;
+      }
+      std::string::iterator filenameLast;
+      if(*value.begin() == '\'' || *value.begin() == '"') {
+        char qc = *value.begin();
+        for(filenameLast = value.begin()+1;
+            filenameLast != value.end() && *filenameLast != qc;
+            ++filenameLast);
+      } else {
+        filenameLast = value.end();
+      }
+      value = trimBasename(std::string(value.begin(), filenameLast));
+      if(value.empty()) {
+        continue;
+      }
+      filename = urldecode(value);
+      // continue because there is a chance we can find filename*=...
+    } else if(*markeritr == '*') {
+      // See RFC2231 Section4 and draft-reschke-rfc2231-in-http.
+      // Please note that this function doesn't do charset conversion
+      // except that if iso-8859-1 is specified, it is converted to
+      // utf-8.
+      std::pair<std::string, std::string> paramPair;
+      split(paramPair, param, '=');
+      std::string value = paramPair.second;
+      std::vector<std::string> extValues;
+      split(value, std::back_inserter(extValues), "'", false, true);
+      if(extValues.size() != 3) {
+        continue;
+      }
+      value = trimBasename(extValues[2]);
+      if(value.empty()) {
+        continue;
+      }
+      value = urldecode(value);
+      if(extValues[0] == "iso-8859-1") {
+        value = iso8859ToUtf8(value);
+      }
+      filename = value;
+      break;
+    }
   }
+  return filename;
 }
 
 std::string randomAlpha(size_t length, const RandomizerHandle& randomizer) {

+ 7 - 4
src/util.h

@@ -212,7 +212,9 @@ void parsePrioritizePieceRange
  size_t pieceLength,
  uint64_t defaultSize = 1048576 /* 1MiB */);
 
-// this function temporarily put here
+// Converts ISO/IEC 8859-1 string src to utf-8.
+std::string iso8859ToUtf8(const std::string& src);
+
 std::string getContentDispositionFilename(const std::string& header);
 
 std::string randomAlpha(size_t length,
@@ -317,7 +319,8 @@ std::map<size_t, std::string> createIndexPathMap(std::istream& i);
  */
 template<typename OutputIterator>
 OutputIterator split(const std::string& src, OutputIterator out,
-                     const std::string& delims, bool doTrim = false)
+                     const std::string& delims, bool doTrim = false,
+                     bool allowEmpty = false)
 {
   std::string::size_type p = 0;
   while(1) {
@@ -327,7 +330,7 @@ OutputIterator split(const std::string& src, OutputIterator out,
       if(doTrim) {
         term = util::trim(term);
       }
-      if(!term.empty()) {
+      if(allowEmpty || !term.empty()) {
         *out = term;
         ++out;
       }
@@ -338,7 +341,7 @@ OutputIterator split(const std::string& src, OutputIterator out,
       term = util::trim(term);
     }
     p = np+1;
-    if(!term.empty()) {
+    if(allowEmpty || !term.empty()) {
       *out = term;
       ++out;
     }

+ 54 - 5
test/UtilTest.cc

@@ -154,11 +154,11 @@ void UtilTest::testSplit() {
 }
 
 void UtilTest::testSplit_many() {
-  std::deque<std::string> v1;
+  std::vector<std::string> v1;
   util::split("name1=value1; name2=value2; name3=value3",std::back_inserter(v1),
               ";", true);
-  CPPUNIT_ASSERT_EQUAL(3, (int)v1.size());
-  std::deque<std::string>::iterator itr = v1.begin();
+  CPPUNIT_ASSERT_EQUAL((size_t)3, v1.size());
+  std::vector<std::string>::iterator itr = v1.begin();
   CPPUNIT_ASSERT_EQUAL(std::string("name1=value1"), *itr++);
   CPPUNIT_ASSERT_EQUAL(std::string("name2=value2"), *itr++);
   CPPUNIT_ASSERT_EQUAL(std::string("name3=value3"), *itr++);
@@ -167,11 +167,28 @@ void UtilTest::testSplit_many() {
 
   util::split("name1=value1; name2=value2; name3=value3",std::back_inserter(v1),
               ";", false);
-  CPPUNIT_ASSERT_EQUAL(3, (int)v1.size());
+  CPPUNIT_ASSERT_EQUAL((size_t)3, v1.size());
   itr = v1.begin();
   CPPUNIT_ASSERT_EQUAL(std::string("name1=value1"), *itr++);
   CPPUNIT_ASSERT_EQUAL(std::string(" name2=value2"), *itr++);
   CPPUNIT_ASSERT_EQUAL(std::string(" name3=value3"), *itr++);
+
+  v1.clear();
+
+  util::split("k=v", std::back_inserter(v1), ";", false, true);
+  CPPUNIT_ASSERT_EQUAL((size_t)1, v1.size());
+  CPPUNIT_ASSERT_EQUAL(std::string("k=v"), v1[0]);
+
+  v1.clear();
+
+  util::split(" ", std::back_inserter(v1), ";", true, true);
+  CPPUNIT_ASSERT_EQUAL((size_t)1, v1.size());
+  CPPUNIT_ASSERT_EQUAL(std::string(""), v1[0]);
+
+  v1.clear();
+
+  util::split(" ", std::back_inserter(v1), ";", true);
+  CPPUNIT_ASSERT_EQUAL((size_t)0, v1.size());
 }
 
 void UtilTest::testEndsWith() {
@@ -276,7 +293,8 @@ void UtilTest::testGetContentDispositionFilename() {
   CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2"), util::getContentDispositionFilename(h8));
 
   std::string h9 = "attachment; filename=\"aria2.tar.bz2; creation-date=20 Jun 2007 00:00:00 GMT\"";
-  CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2; creation-date=20 Jun 2007 00:00:00 GMT"), util::getContentDispositionFilename(h9));
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2.tar.bz2"),
+                       util::getContentDispositionFilename(h9));
 
   std::string h10 = "attachment; filename=";
   CPPUNIT_ASSERT_EQUAL(std::string(""), util::getContentDispositionFilename(h10));
@@ -295,6 +313,37 @@ void UtilTest::testGetContentDispositionFilename() {
   std::string currentDir = "attachment; filename=.";
   CPPUNIT_ASSERT_EQUAL(std::string(),
                        util::getContentDispositionFilename(currentDir));
+  // RFC2231 Section4
+  std::string extparam1 = "attachment; filename * = UTF-8'ja'filename";
+  CPPUNIT_ASSERT_EQUAL(std::string("filename"),
+                       util::getContentDispositionFilename(extparam1));
+  std::string extparam2 = "filename*=''aria2";
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"),
+                       util::getContentDispositionFilename(extparam2));
+  std::string extparam3 = "filename*='''";
+  CPPUNIT_ASSERT_EQUAL(std::string(""),
+                       util::getContentDispositionFilename(extparam3));
+  std::string extparam4 = "filename*='aria2";
+  CPPUNIT_ASSERT_EQUAL(std::string(""),
+                       util::getContentDispositionFilename(extparam4));
+  std::string extparam5 = "filename*='''aria2";
+  CPPUNIT_ASSERT_EQUAL(std::string(""),
+                       util::getContentDispositionFilename(extparam5));
+  std::string extparam6 = "filename*";
+  CPPUNIT_ASSERT_EQUAL(std::string(""),
+                       util::getContentDispositionFilename(extparam6));
+  std::string extparam7 = "filename*=UTF-8''aria2;filename=hello%20world";
+  CPPUNIT_ASSERT_EQUAL(std::string("aria2"),
+                       util::getContentDispositionFilename(extparam7));
+  std::string extparam8 = "filename=aria2;filename*=UTF-8''hello%20world";
+  CPPUNIT_ASSERT_EQUAL(std::string("hello world"),
+                       util::getContentDispositionFilename(extparam8));
+  std::string extparam9 = "filename*=iso-8859-1''%A3";
+  std::string extparam9ans;
+  extparam9ans += 0xc2;
+  extparam9ans += 0xa3;
+  CPPUNIT_ASSERT_EQUAL(extparam9ans,
+                       util::getContentDispositionFilename(extparam9));
 }
 
 class Printer {