16 years ago · 780aaf9c80
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
 
				+2010-02-19  Tatsuhiro Tsujikawa  <t-tujikawa@users.sourceforge.net>
			
 
				+
			
 
				+	Added unit tests for util::getContentDispositionFilename() from
			
 
				+	http://greenbytes.de/tech/tc2231/ Fixed the function so that added
			
 
				+	tests are passed.
			
 
				+	* src/util.cc
			
 
				+	* test/UtilTest.cc
			
 
				+
			
 
				 2010-02-18  Tatsuhiro Tsujikawa  <t-tujikawa@users.sourceforge.net>
			
 
				 
			
 
				 	Removed setlocale() for LC_CTYPE. It may affect isxdigit in
			
--- a/src/util.cc
+++ b/src/util.cc
@@ -201,6 +201,21 @@ std::string replace(const std::string& target, const std::string& oldstr, const
 
				   return result;
			
 
				 }
			
 
				 
			
 
				+bool isAlpha(const char c)
			
 
				+{
			
 
				+  return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z');
			
 
				+}
			
 
				+
			
 
				+bool isDigit(const char c)
			
 
				+{
			
 
				+  return '0' <= c && c <= '9';
			
 
				+}
			
 
				+
			
 
				+bool isHexDigit(const char c)
			
 
				+{
			
 
				+  return isDigit(c) || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f');
			
 
				+}
			
 
				+
			
 
				 bool inRFC3986ReservedChars(const char c)
			
 
				 {
			
 
				   static const char reserved[] = {
			
@@ -214,15 +229,34 @@ bool inRFC3986ReservedChars(const char c)
 
				 bool inRFC3986UnreservedChars(const char c)
			
 
				 {
			
 
				   static const char unreserved[] = { '-', '.', '_', '~' };
			
 
				-  return
			
 
				-    // ALPHA
			
 
				-    ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') ||
			
 
				-    // DIGIT
			
 
				-    ('0' <= c && c <= '9') ||
			
 
				+  return isAlpha(c) || isDigit(c) ||
			
 
				     std::find(&unreserved[0], &unreserved[arrayLength(unreserved)], c) !=
			
 
				     &unreserved[arrayLength(unreserved)];
			
 
				 }
			
 
				 
			
 
				+bool inRFC2978MIMECharset(const char c)
			
 
				+{
			
 
				+  static const char chars[] = {
			
 
				+    '!', '#', '$', '%', '&',
			
 
				+    '\'', '+', '-', '^', '_',
			
 
				+    '`', '{', '}', '~'
			
 
				+  };
			
 
				+  return isAlpha(c) || isDigit(c) ||
			
 
				+    std::find(&chars[0], &chars[arrayLength(chars)], c) !=
			
 
				+    &chars[arrayLength(chars)];
			
 
				+}
			
 
				+
			
 
				+bool inRFC2616HttpToken(const char c)
			
 
				+{
			
 
				+  static const char chars[] = {
			
 
				+    '!', '#', '$', '%', '&', '\'', '*', '+', '-', '.',
			
 
				+    '^', '_', '`', '|', '~'
			
 
				+  };
			
 
				+  return isAlpha(c) || isDigit(c) ||
			
 
				+    std::find(&chars[0], &chars[arrayLength(chars)], c) !=
			
 
				+    &chars[arrayLength(chars)];
			
 
				+}
			
 
				+
			
 
				 std::string urlencode(const unsigned char* target, size_t len) {
			
 
				   std::string dest;
			
 
				   for(size_t i = 0; i < len; ++i) {
			
@@ -244,9 +278,7 @@ std::string urlencode(const std::string& target)
 
				 std::string torrentUrlencode(const unsigned char* target, size_t len) {
			
 
				   std::string dest;
			
 
				   for(size_t i = 0; i < len; ++i) {
			
 
				-    if(('0' <= target[i] && target[i] <= '9') ||
			
 
				-       ('A' <= target[i] && target[i] <= 'Z') ||
			
 
				-       ('a' <= target[i] && target[i] <= 'z')) {
			
 
				+    if(isAlpha(target[i]) || isDigit(target[i])) {
			
 
				       dest += target[i];
			
 
				     } else {
			
 
				       dest.append(StringFormat("%%%02X", target[i]).str());
			
@@ -267,7 +299,7 @@ std::string urldecode(const std::string& target) {
 
				       itr != target.end(); ++itr) {
			
 
				     if(*itr == '%') {
			
 
				       if(itr+1 != target.end() && itr+2 != target.end() &&
			
 
				-         isxdigit(*(itr+1)) && isxdigit(*(itr+2))) {
			
 
				+         isHexDigit(*(itr+1)) && isHexDigit(*(itr+2))) {
			
 
				         result += parseInt(std::string(itr+1, itr+3), 16);
			
 
				         itr += 2;
			
 
				       } else {
			
@@ -614,12 +646,16 @@ static std::string trimBasename(const std::string& src)
 
				 {
			
 
				   static const std::string TRIMMED("\r\n\t '\"");
			
 
				   std::string fn = File(trim(src, TRIMMED)).getBasename();
			
 
				+  std::string::iterator enditer = std::remove(fn.begin(), fn.end(), '\\');
			
 
				+  fn = std::string(fn.begin(), enditer);
			
 
				   if(fn == ".." || fn == A2STR::DOT_C) {
			
 
				     fn = A2STR::NIL;
			
 
				   }
			
 
				   return fn;
			
 
				 }
			
 
				 
			
 
				+// Converts ISO/IEC 8859-1 string to UTF-8 string.  If there is a
			
 
				+// character not in ISO/IEC 8859-1, returns empty string.
			
 
				 std::string iso8859ToUtf8(const std::string& src)
			
 
				 {
			
 
				   std::string dest;
			
@@ -632,6 +668,8 @@ std::string iso8859ToUtf8(const std::string& src)
 
				         dest += 0xc3;
			
 
				       }
			
 
				       dest += c&(~0x40);
			
 
				+    } else if(0x80 <= c && c <= 0x9f) {
			
 
				+      return A2STR::NIL;
			
 
				     } else {
			
 
				       dest += c;
			
 
				     }
			
@@ -648,41 +686,20 @@ std::string getContentDispositionFilename(const std::string& header)
 
				       i != params.end(); ++i) {
			
 
				     std::string& param = *i;
			
 
				     static const std::string keyName = "filename";
			
 
				-    if(!startsWith(param, keyName)) {
			
 
				+    if(!startsWith(toLower(param), keyName) || param.size() == keyName.size()) {
			
 
				       continue;
			
 
				     }
			
 
				     std::string::iterator markeritr = param.begin()+keyName.size();
			
 
				-    for(; markeritr != param.end() && *markeritr == ' '; ++markeritr);
			
 
				-    if(markeritr == param.end()) {
			
 
				-      continue;
			
 
				-    }
			
 
				-    if(*markeritr == '=') {
			
 
				-      std::pair<std::string, std::string> paramPair;
			
 
				-      split(paramPair, param, '=');
			
 
				-      std::string value = paramPair.second;
			
 
				-      if(value.empty()) {
			
 
				-        continue;
			
 
				-      }
			
 
				-      std::string::iterator filenameLast;
			
 
				-      if(*value.begin() == '\'' || *value.begin() == '"') {
			
 
				-        char qc = *value.begin();
			
 
				-        for(filenameLast = value.begin()+1;
			
 
				-            filenameLast != value.end() && *filenameLast != qc;
			
 
				-            ++filenameLast);
			
 
				-      } else {
			
 
				-        filenameLast = value.end();
			
 
				-      }
			
 
				-      value = trimBasename(std::string(value.begin(), filenameLast));
			
 
				-      if(value.empty()) {
			
 
				-        continue;
			
 
				-      }
			
 
				-      filename = urldecode(value);
			
 
				-      // continue because there is a chance we can find filename*=...
			
 
				-    } else if(*markeritr == '*') {
			
 
				+    if(*markeritr == '*') {
			
 
				       // See RFC2231 Section4 and draft-reschke-rfc2231-in-http.
			
 
				       // Please note that this function doesn't do charset conversion
			
 
				       // except that if iso-8859-1 is specified, it is converted to
			
 
				       // utf-8.
			
 
				+      ++markeritr;
			
 
				+      for(; markeritr != param.end() && *markeritr == ' '; ++markeritr);
			
 
				+      if(markeritr == param.end() || *markeritr != '=') {
			
 
				+        continue;
			
 
				+      }
			
 
				       std::pair<std::string, std::string> paramPair;
			
 
				       split(paramPair, param, '=');
			
 
				       std::string value = paramPair.second;
			
@@ -691,16 +708,70 @@ std::string getContentDispositionFilename(const std::string& header)
 
				       if(extValues.size() != 3) {
			
 
				         continue;
			
 
				       }
			
 
				-      value = trimBasename(extValues[2]);
			
 
				-      if(value.empty()) {
			
 
				+      bool bad = false;
			
 
				+      const std::string& charset = extValues[0];
			
 
				+      for(std::string::const_iterator j = charset.begin(); j != charset.end();
			
 
				+          ++j) {
			
 
				+        // Since we first split parameter by ', we can safely assume
			
 
				+        // that ' is not included in charset.
			
 
				+        if(!inRFC2978MIMECharset(*j)) {
			
 
				+          bad = true;
			
 
				+          break;
			
 
				+        }
			
 
				+      }
			
 
				+      if(bad) {
			
 
				         continue;
			
 
				       }
			
 
				-      value = urldecode(value);
			
 
				-      if(extValues[0] == "iso-8859-1") {
			
 
				+      bad = false;
			
 
				+      value = extValues[2];
			
 
				+      for(std::string::const_iterator j = value.begin(); j != value.end(); ++j){
			
 
				+        if(*j == '%') {
			
 
				+          if(j+1 != value.end() && isHexDigit(*(j+1)) &&
			
 
				+             j+2 != value.end() && isHexDigit(*(j+2))) {
			
 
				+            j += 2;
			
 
				+          } else {
			
 
				+            bad = true;
			
 
				+            break;
			
 
				+          }
			
 
				+        } else {
			
 
				+          if(*j == '*' || *j == '\'' || !inRFC2616HttpToken(*j)) {
			
 
				+            bad = true;
			
 
				+            break;
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+      if(bad) {
			
 
				+        continue;
			
 
				+      }
			
 
				+      value = trimBasename(urldecode(value));
			
 
				+      if(toLower(extValues[0]) == "iso-8859-1") {
			
 
				         value = iso8859ToUtf8(value);
			
 
				       }
			
 
				       filename = value;
			
 
				       break;
			
 
				+    } else {
			
 
				+      for(; markeritr != param.end() && *markeritr == ' '; ++markeritr);
			
 
				+      if(markeritr == param.end() || *markeritr != '=') {
			
 
				+        continue;
			
 
				+      }
			
 
				+      std::pair<std::string, std::string> paramPair;
			
 
				+      split(paramPair, param, '=');
			
 
				+      std::string value = paramPair.second;
			
 
				+      if(value.empty()) {
			
 
				+        continue;
			
 
				+      }
			
 
				+      std::string::iterator filenameLast;
			
 
				+      if(*value.begin() == '\'' || *value.begin() == '"') {
			
 
				+        char qc = *value.begin();
			
 
				+        for(filenameLast = value.begin()+1;
			
 
				+            filenameLast != value.end() && *filenameLast != qc;
			
 
				+            ++filenameLast);
			
 
				+      } else {
			
 
				+        filenameLast = value.end();
			
 
				+      }
			
 
				+      value = trimBasename(urldecode(std::string(value.begin(), filenameLast)));
			
 
				+      filename = value;
			
 
				+      // continue because there is a chance we can find filename*=...
			
 
				     }
			
 
				   }
			
 
				   return filename;
			
--- a/test/UtilTest.cc
+++ b/test/UtilTest.cc
@@ -314,9 +314,6 @@ void UtilTest::testGetContentDispositionFilename() {
 
				   CPPUNIT_ASSERT_EQUAL(std::string(),
			
 
				                        util::getContentDispositionFilename(currentDir));
			
 
				   // RFC2231 Section4
			
 
				-  std::string extparam1 = "attachment; filename * = UTF-8'ja'filename";
			
 
				-  CPPUNIT_ASSERT_EQUAL(std::string("filename"),
			
 
				-                       util::getContentDispositionFilename(extparam1));
			
 
				   std::string extparam2 = "filename*=''aria2";
			
 
				   CPPUNIT_ASSERT_EQUAL(std::string("aria2"),
			
 
				                        util::getContentDispositionFilename(extparam2));
			
@@ -338,12 +335,53 @@ void UtilTest::testGetContentDispositionFilename() {
 
				   std::string extparam8 = "filename=aria2;filename*=UTF-8''hello%20world";
			
 
				   CPPUNIT_ASSERT_EQUAL(std::string("hello world"),
			
 
				                        util::getContentDispositionFilename(extparam8));
			
 
				-  std::string extparam9 = "filename*=iso-8859-1''%A3";
			
 
				+  std::string extparam9 = "filename*=ISO-8859-1''%A3";
			
 
				   std::string extparam9ans;
			
 
				   extparam9ans += 0xc2;
			
 
				   extparam9ans += 0xa3;
			
 
				   CPPUNIT_ASSERT_EQUAL(extparam9ans,
			
 
				                        util::getContentDispositionFilename(extparam9));
			
 
				+
			
 
				+  // Tests from http://greenbytes.de/tech/tc2231/
			
 
				+  // attwithasciifnescapedchar
			
 
				+  CPPUNIT_ASSERT_EQUAL
			
 
				+    (std::string("foo.html"),
			
 
				+     util::getContentDispositionFilename("filename=\"f\\oo.html\""));
			
 
				+  // attwithasciifilenameucase
			
 
				+  CPPUNIT_ASSERT_EQUAL
			
 
				+    (std::string("foo.html"),
			
 
				+     util::getContentDispositionFilename("FILENAME=\"foo.html\""));
			
 
				+  // attwithisofn2231iso
			
 
				+  CPPUNIT_ASSERT_EQUAL
			
 
				+    (std::string("foo-ä.html"),
			
 
				+     util::getContentDispositionFilename("filename*=iso-8859-1''foo-%E4.html"));
			
 
				+  // attwithfn2231utf8
			
 
				+  CPPUNIT_ASSERT_EQUAL
			
 
				+    (std::string("foo-ä-€.html"),
			
 
				+     util::getContentDispositionFilename
			
 
				+     ("filename*=UTF-8''foo-%c3%a4-%e2%82%ac.html"));
			
 
				+  // attwithfn2231utf8-bad
			
 
				+  CPPUNIT_ASSERT_EQUAL
			
 
				+    (std::string(""),
			
 
				+     util::getContentDispositionFilename
			
 
				+     ("filename*=iso-8859-1''foo-%c3%a4-%e2%82%ac.html"));
			
 
				+  // attwithfn2231ws1
			
 
				+  CPPUNIT_ASSERT_EQUAL
			
 
				+    (std::string(""),
			
 
				+     util::getContentDispositionFilename("filename *=UTF-8''foo-%c3%a4.html"));
			
 
				+  // attwithfn2231ws2
			
 
				+  CPPUNIT_ASSERT_EQUAL
			
 
				+    (std::string("foo-ä.html"),
			
 
				+     util::getContentDispositionFilename("filename*= UTF-8''foo-%c3%a4.html"));
			
 
				+  // attwithfn2231ws3
			
 
				+  CPPUNIT_ASSERT_EQUAL
			
 
				+    (std::string("foo-ä.html"),
			
 
				+     util::getContentDispositionFilename("filename* =UTF-8''foo-%c3%a4.html"));
			
 
				+  // attwithfn2231quot
			
 
				+  CPPUNIT_ASSERT_EQUAL
			
 
				+    (std::string(""),
			
 
				+     util::getContentDispositionFilename
			
 
				+     ("filename*=\"UTF-8''foo-%c3%a4.html\""));
			
 
				 }
			
 
				 
			
 
				 class Printer {