瀏覽代碼

2010-02-19 Tatsuhiro Tsujikawa <t-tujikawa@users.sourceforge.net>

	Added unit tests for util::getContentDispositionFilename() from
	http://greenbytes.de/tech/tc2231/ Fixed the function so that added
	tests are passed.
	* src/util.cc
	* test/UtilTest.cc
Tatsuhiro Tsujikawa 15 年之前
父節點
當前提交
780aaf9c80
共有 3 個文件被更改,包括 162 次插入45 次删除
  1. 8 0
      ChangeLog
  2. 112 41
      src/util.cc
  3. 42 4
      test/UtilTest.cc

+ 8 - 0
ChangeLog

@@ -1,3 +1,11 @@
+2010-02-19  Tatsuhiro Tsujikawa  <t-tujikawa@users.sourceforge.net>
+
+	Added unit tests for util::getContentDispositionFilename() from
+	http://greenbytes.de/tech/tc2231/ Fixed the function so that added
+	tests are passed.
+	* src/util.cc
+	* test/UtilTest.cc
+
 2010-02-18  Tatsuhiro Tsujikawa  <t-tujikawa@users.sourceforge.net>
 
 	Removed setlocale() for LC_CTYPE. It may affect isxdigit in

+ 112 - 41
src/util.cc

@@ -201,6 +201,21 @@ std::string replace(const std::string& target, const std::string& oldstr, const
   return result;
 }
 
+bool isAlpha(const char c)
+{
+  return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z');
+}
+
+bool isDigit(const char c)
+{
+  return '0' <= c && c <= '9';
+}
+
+bool isHexDigit(const char c)
+{
+  return isDigit(c) || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f');
+}
+
 bool inRFC3986ReservedChars(const char c)
 {
   static const char reserved[] = {
@@ -214,15 +229,34 @@ bool inRFC3986ReservedChars(const char c)
 bool inRFC3986UnreservedChars(const char c)
 {
   static const char unreserved[] = { '-', '.', '_', '~' };
-  return
-    // ALPHA
-    ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') ||
-    // DIGIT
-    ('0' <= c && c <= '9') ||
+  return isAlpha(c) || isDigit(c) ||
     std::find(&unreserved[0], &unreserved[arrayLength(unreserved)], c) !=
     &unreserved[arrayLength(unreserved)];
 }
 
+bool inRFC2978MIMECharset(const char c)
+{
+  static const char chars[] = {
+    '!', '#', '$', '%', '&',
+    '\'', '+', '-', '^', '_',
+    '`', '{', '}', '~'
+  };
+  return isAlpha(c) || isDigit(c) ||
+    std::find(&chars[0], &chars[arrayLength(chars)], c) !=
+    &chars[arrayLength(chars)];
+}
+
+bool inRFC2616HttpToken(const char c)
+{
+  static const char chars[] = {
+    '!', '#', '$', '%', '&', '\'', '*', '+', '-', '.',
+    '^', '_', '`', '|', '~'
+  };
+  return isAlpha(c) || isDigit(c) ||
+    std::find(&chars[0], &chars[arrayLength(chars)], c) !=
+    &chars[arrayLength(chars)];
+}
+
 std::string urlencode(const unsigned char* target, size_t len) {
   std::string dest;
   for(size_t i = 0; i < len; ++i) {
@@ -244,9 +278,7 @@ std::string urlencode(const std::string& target)
 std::string torrentUrlencode(const unsigned char* target, size_t len) {
   std::string dest;
   for(size_t i = 0; i < len; ++i) {
-    if(('0' <= target[i] && target[i] <= '9') ||
-       ('A' <= target[i] && target[i] <= 'Z') ||
-       ('a' <= target[i] && target[i] <= 'z')) {
+    if(isAlpha(target[i]) || isDigit(target[i])) {
       dest += target[i];
     } else {
       dest.append(StringFormat("%%%02X", target[i]).str());
@@ -267,7 +299,7 @@ std::string urldecode(const std::string& target) {
       itr != target.end(); ++itr) {
     if(*itr == '%') {
       if(itr+1 != target.end() && itr+2 != target.end() &&
-         isxdigit(*(itr+1)) && isxdigit(*(itr+2))) {
+         isHexDigit(*(itr+1)) && isHexDigit(*(itr+2))) {
         result += parseInt(std::string(itr+1, itr+3), 16);
         itr += 2;
       } else {
@@ -614,12 +646,16 @@ static std::string trimBasename(const std::string& src)
 {
   static const std::string TRIMMED("\r\n\t '\"");
   std::string fn = File(trim(src, TRIMMED)).getBasename();
+  std::string::iterator enditer = std::remove(fn.begin(), fn.end(), '\\');
+  fn = std::string(fn.begin(), enditer);
   if(fn == ".." || fn == A2STR::DOT_C) {
     fn = A2STR::NIL;
   }
   return fn;
 }
 
+// Converts ISO/IEC 8859-1 string to UTF-8 string.  If there is a
+// character not in ISO/IEC 8859-1, returns empty string.
 std::string iso8859ToUtf8(const std::string& src)
 {
   std::string dest;
@@ -632,6 +668,8 @@ std::string iso8859ToUtf8(const std::string& src)
         dest += 0xc3;
       }
       dest += c&(~0x40);
+    } else if(0x80 <= c && c <= 0x9f) {
+      return A2STR::NIL;
     } else {
       dest += c;
     }
@@ -648,41 +686,20 @@ std::string getContentDispositionFilename(const std::string& header)
       i != params.end(); ++i) {
     std::string& param = *i;
     static const std::string keyName = "filename";
-    if(!startsWith(param, keyName)) {
+    if(!startsWith(toLower(param), keyName) || param.size() == keyName.size()) {
       continue;
     }
     std::string::iterator markeritr = param.begin()+keyName.size();
-    for(; markeritr != param.end() && *markeritr == ' '; ++markeritr);
-    if(markeritr == param.end()) {
-      continue;
-    }
-    if(*markeritr == '=') {
-      std::pair<std::string, std::string> paramPair;
-      split(paramPair, param, '=');
-      std::string value = paramPair.second;
-      if(value.empty()) {
-        continue;
-      }
-      std::string::iterator filenameLast;
-      if(*value.begin() == '\'' || *value.begin() == '"') {
-        char qc = *value.begin();
-        for(filenameLast = value.begin()+1;
-            filenameLast != value.end() && *filenameLast != qc;
-            ++filenameLast);
-      } else {
-        filenameLast = value.end();
-      }
-      value = trimBasename(std::string(value.begin(), filenameLast));
-      if(value.empty()) {
-        continue;
-      }
-      filename = urldecode(value);
-      // continue because there is a chance we can find filename*=...
-    } else if(*markeritr == '*') {
+    if(*markeritr == '*') {
       // See RFC2231 Section4 and draft-reschke-rfc2231-in-http.
       // Please note that this function doesn't do charset conversion
       // except that if iso-8859-1 is specified, it is converted to
       // utf-8.
+      ++markeritr;
+      for(; markeritr != param.end() && *markeritr == ' '; ++markeritr);
+      if(markeritr == param.end() || *markeritr != '=') {
+        continue;
+      }
       std::pair<std::string, std::string> paramPair;
       split(paramPair, param, '=');
       std::string value = paramPair.second;
@@ -691,16 +708,70 @@ std::string getContentDispositionFilename(const std::string& header)
       if(extValues.size() != 3) {
         continue;
       }
-      value = trimBasename(extValues[2]);
-      if(value.empty()) {
+      bool bad = false;
+      const std::string& charset = extValues[0];
+      for(std::string::const_iterator j = charset.begin(); j != charset.end();
+          ++j) {
+        // Since we first split parameter by ', we can safely assume
+        // that ' is not included in charset.
+        if(!inRFC2978MIMECharset(*j)) {
+          bad = true;
+          break;
+        }
+      }
+      if(bad) {
         continue;
       }
-      value = urldecode(value);
-      if(extValues[0] == "iso-8859-1") {
+      bad = false;
+      value = extValues[2];
+      for(std::string::const_iterator j = value.begin(); j != value.end(); ++j){
+        if(*j == '%') {
+          if(j+1 != value.end() && isHexDigit(*(j+1)) &&
+             j+2 != value.end() && isHexDigit(*(j+2))) {
+            j += 2;
+          } else {
+            bad = true;
+            break;
+          }
+        } else {
+          if(*j == '*' || *j == '\'' || !inRFC2616HttpToken(*j)) {
+            bad = true;
+            break;
+          }
+        }
+      }
+      if(bad) {
+        continue;
+      }
+      value = trimBasename(urldecode(value));
+      if(toLower(extValues[0]) == "iso-8859-1") {
         value = iso8859ToUtf8(value);
       }
       filename = value;
       break;
+    } else {
+      for(; markeritr != param.end() && *markeritr == ' '; ++markeritr);
+      if(markeritr == param.end() || *markeritr != '=') {
+        continue;
+      }
+      std::pair<std::string, std::string> paramPair;
+      split(paramPair, param, '=');
+      std::string value = paramPair.second;
+      if(value.empty()) {
+        continue;
+      }
+      std::string::iterator filenameLast;
+      if(*value.begin() == '\'' || *value.begin() == '"') {
+        char qc = *value.begin();
+        for(filenameLast = value.begin()+1;
+            filenameLast != value.end() && *filenameLast != qc;
+            ++filenameLast);
+      } else {
+        filenameLast = value.end();
+      }
+      value = trimBasename(urldecode(std::string(value.begin(), filenameLast)));
+      filename = value;
+      // continue because there is a chance we can find filename*=...
     }
   }
   return filename;

+ 42 - 4
test/UtilTest.cc

@@ -314,9 +314,6 @@ void UtilTest::testGetContentDispositionFilename() {
   CPPUNIT_ASSERT_EQUAL(std::string(),
                        util::getContentDispositionFilename(currentDir));
   // RFC2231 Section4
-  std::string extparam1 = "attachment; filename * = UTF-8'ja'filename";
-  CPPUNIT_ASSERT_EQUAL(std::string("filename"),
-                       util::getContentDispositionFilename(extparam1));
   std::string extparam2 = "filename*=''aria2";
   CPPUNIT_ASSERT_EQUAL(std::string("aria2"),
                        util::getContentDispositionFilename(extparam2));
@@ -338,12 +335,53 @@ void UtilTest::testGetContentDispositionFilename() {
   std::string extparam8 = "filename=aria2;filename*=UTF-8''hello%20world";
   CPPUNIT_ASSERT_EQUAL(std::string("hello world"),
                        util::getContentDispositionFilename(extparam8));
-  std::string extparam9 = "filename*=iso-8859-1''%A3";
+  std::string extparam9 = "filename*=ISO-8859-1''%A3";
   std::string extparam9ans;
   extparam9ans += 0xc2;
   extparam9ans += 0xa3;
   CPPUNIT_ASSERT_EQUAL(extparam9ans,
                        util::getContentDispositionFilename(extparam9));
+
+  // Tests from http://greenbytes.de/tech/tc2231/
+  // attwithasciifnescapedchar
+  CPPUNIT_ASSERT_EQUAL
+    (std::string("foo.html"),
+     util::getContentDispositionFilename("filename=\"f\\oo.html\""));
+  // attwithasciifilenameucase
+  CPPUNIT_ASSERT_EQUAL
+    (std::string("foo.html"),
+     util::getContentDispositionFilename("FILENAME=\"foo.html\""));
+  // attwithisofn2231iso
+  CPPUNIT_ASSERT_EQUAL
+    (std::string("foo-ä.html"),
+     util::getContentDispositionFilename("filename*=iso-8859-1''foo-%E4.html"));
+  // attwithfn2231utf8
+  CPPUNIT_ASSERT_EQUAL
+    (std::string("foo-ä-€.html"),
+     util::getContentDispositionFilename
+     ("filename*=UTF-8''foo-%c3%a4-%e2%82%ac.html"));
+  // attwithfn2231utf8-bad
+  CPPUNIT_ASSERT_EQUAL
+    (std::string(""),
+     util::getContentDispositionFilename
+     ("filename*=iso-8859-1''foo-%c3%a4-%e2%82%ac.html"));
+  // attwithfn2231ws1
+  CPPUNIT_ASSERT_EQUAL
+    (std::string(""),
+     util::getContentDispositionFilename("filename *=UTF-8''foo-%c3%a4.html"));
+  // attwithfn2231ws2
+  CPPUNIT_ASSERT_EQUAL
+    (std::string("foo-ä.html"),
+     util::getContentDispositionFilename("filename*= UTF-8''foo-%c3%a4.html"));
+  // attwithfn2231ws3
+  CPPUNIT_ASSERT_EQUAL
+    (std::string("foo-ä.html"),
+     util::getContentDispositionFilename("filename* =UTF-8''foo-%c3%a4.html"));
+  // attwithfn2231quot
+  CPPUNIT_ASSERT_EQUAL
+    (std::string(""),
+     util::getContentDispositionFilename
+     ("filename*=\"UTF-8''foo-%c3%a4.html\""));
 }
 
 class Printer {