Bläddra i källkod

Rewrite URI path component normalization

Tatsuhiro Tsujikawa 12 år sedan
förälder
incheckning
126a4bde61
3 ändrade filer med 191 tillägg och 9 borttagningar
  1. 147 9
      src/uri.cc
  2. 8 0
      src/uri.h
  3. 36 0
      test/UriTest.cc

+ 147 - 9
src/uri.cc

@@ -209,6 +209,148 @@ std::string construct(const UriStruct& us)
   return res;
 }
 
+enum {
+  NPATH_START,
+  NPATH_SLASH,
+  NPATH_SDOT,
+  NPATH_DDOT,
+  NPATH_PATHCOMP
+};
+
+std::string normalizePath(std::string path)
+{
+  std::string::iterator begin = path.begin(), out = begin;
+  int state = NPATH_START;
+  bool startWithSlash = false;
+  std::vector<int> range;
+  // 32 is arbitrary
+  range.reserve(32);
+  for(std::string::iterator in = begin, eoi = path.end(); in != eoi; ++in) {
+    switch(state) {
+    case NPATH_START:
+      switch(*in) {
+      case '.':
+        state = NPATH_SDOT;
+        range.push_back(in-begin);
+        break;
+      case '/':
+        startWithSlash = true;
+        state = NPATH_SLASH;
+        break;
+      default:
+        state = NPATH_PATHCOMP;
+        range.push_back(in-begin);
+        break;
+      }
+      break;
+    case NPATH_SLASH:
+      switch(*in) {
+      case '.':
+        state = NPATH_SDOT;
+        range.push_back(in-begin);
+        break;
+      case '/':
+        // drop duplicate '/'
+        break;
+      default:
+        state = NPATH_PATHCOMP;
+        range.push_back(in-begin);
+        break;
+      }
+      break;
+    case NPATH_SDOT:
+      switch(*in) {
+      case '.':
+        state = NPATH_DDOT;
+        break;
+      case '/':
+        // drop path component '.'
+        state = NPATH_SLASH;
+        range.pop_back();
+        break;
+      default:
+        state = NPATH_PATHCOMP;
+        break;
+      }
+      break;
+    case NPATH_DDOT:
+      switch(*in) {
+      case '/':
+        // drop previous path component before '..'
+        for(int i = 0; i < 3 && !range.empty(); ++i) {
+          range.pop_back();
+        }
+        state = NPATH_SLASH;
+        break;
+      default:
+        state = NPATH_PATHCOMP;
+        break;
+      }
+      break;
+    case NPATH_PATHCOMP:
+      if(*in == '/') {
+        range.push_back(in+1-begin);
+        state = NPATH_SLASH;
+      }
+      break;
+    }
+  }
+  switch(state) {
+  case NPATH_SDOT:
+    range.pop_back();
+    break;
+  case NPATH_DDOT:
+    for(int i = 0; i < 3 && !range.empty(); ++i) {
+      range.pop_back();
+    }
+    break;
+  case NPATH_PATHCOMP:
+    range.push_back(path.end()-begin);
+    break;
+  default:
+    break;
+  }
+  if(startWithSlash) {
+    ++out;
+  }
+  for(int i = 0; i < (int)range.size(); i += 2) {
+    std::string::iterator a = begin+range[i];
+    std::string::iterator b = begin+range[i+1];
+    if(a == out) {
+      out = b;
+    } else {
+      out = std::copy(a, b, out);
+    }
+  }
+  path.erase(out, path.end());
+  return path;
+}
+
+namespace {
+std::string joinPath(std::string basePath,
+                     std::string::const_iterator newPathFirst,
+                     std::string::const_iterator newPathLast)
+{
+  if(newPathFirst == newPathLast) {
+    return basePath;
+  } else if(basePath.empty() || *newPathFirst == '/') {
+    return normalizePath(std::string(newPathFirst, newPathLast));
+  } else if(basePath[basePath.size()-1] == '/') {
+    basePath.append(newPathFirst, newPathLast);
+    return normalizePath(basePath);
+  } else {
+    basePath += "/";
+    basePath.append(newPathFirst, newPathLast);
+    return normalizePath(basePath);
+  }
+}
+} // namespace
+
+std::string joinPath(const std::string& basePath, const std::string& newPath)
+{
+  return joinPath(basePath, newPath.begin(), newPath.end());
+}
+
 std::string joinUri(const std::string& baseUri, const std::string& uri)
 {
   UriStruct us;
@@ -219,11 +361,6 @@ std::string joinUri(const std::string& baseUri, const std::string& uri)
     if(!parse(bus, baseUri)) {
       return uri;
     }
-    std::vector<std::string> parts;
-    if(uri.empty() || uri[0] != '/') {
-      util::split(bus.dir.begin(), bus.dir.end(), std::back_inserter(parts),
-                  '/');
-    }
     std::string::const_iterator qend;
     for(qend = uri.begin(); qend != uri.end(); ++qend) {
       if(*qend == '#') {
@@ -236,14 +373,15 @@ std::string joinUri(const std::string& baseUri, const std::string& uri)
         break;
       }
     }
-    util::split(uri.begin(), end, std::back_inserter(parts), '/');
+    std::string newpath = joinPath(bus.dir, uri.begin(), end);
     bus.dir.clear();
     bus.file.clear();
     bus.query.clear();
     std::string res = construct(bus);
-    res += util::joinPath(parts.begin(), parts.end());
-    if((uri.begin() == end || *(end-1) == '/') && *(res.end()-1) != '/') {
-      res += "/";
+    if(!newpath.empty()) {
+      // res always ends with '/'. Since bus.dir also starts with '/',
+      // regardless of uri, newpath always starts with '/'.
+      res.append(newpath.begin()+1, newpath.end());
     }
     res.append(end, qend);
     return res;

+ 8 - 0
src/uri.h

@@ -82,6 +82,14 @@ std::string construct(const UriStruct& us);
 
 std::string joinUri(const std::string& baseUri, const std::string& uri);
 
+std::string joinPath(const std::string& basePath, const std::string& newPath);
+
+// Normalizes path so that: 1) it does not contain successive / and 2)
+// resolve path component '.' and '..'. If there is not enough path
+// component to resolve '..', those '..' are discarded. The resulting
+// path starts / only if path starts with /.
+std::string normalizePath(std::string path);
+
 } // namespace uri
 
 } // namespace aria2

+ 36 - 0
test/UriTest.cc

@@ -36,6 +36,7 @@ class UriTest:public CppUnit::TestFixture {
   CPPUNIT_TEST(testConstruct);
   CPPUNIT_TEST(testSwap);
   CPPUNIT_TEST(testJoinUri);
+  CPPUNIT_TEST(testJoinPath);
   CPPUNIT_TEST_SUITE_END();
 
 public:
@@ -66,6 +67,7 @@ public:
   void testConstruct();
   void testSwap();
   void testJoinUri();
+  void testJoinPath();
 };
 
 
@@ -527,6 +529,40 @@ void UriTest::testJoinUri()
                                "/file#a?q=x"));
 }
 
+void UriTest::testJoinPath()
+{
+  CPPUNIT_ASSERT_EQUAL(std::string("/b"), joinPath("/a", "/b"));
+  CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo"),
+                       joinPath("/alpha", "bravo"));
+  CPPUNIT_ASSERT_EQUAL(std::string("/bravo"),
+                       joinPath("/a", "/alpha/../bravo"));
+  CPPUNIT_ASSERT_EQUAL(std::string("/alpha/charlie/"),
+                       joinPath("/a", "/alpha/bravo/../charlie/"));
+  CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo/"),
+                       joinPath("/a", "/alpha////bravo//"));
+  CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo/"),
+                       joinPath("/a", "/alpha/././bravo/"));
+  CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo/"),
+                       joinPath("/a", "/alpha/bravo/./"));
+  CPPUNIT_ASSERT_EQUAL(std::string("/alpha/bravo/"),
+                       joinPath("/a", "/alpha/bravo/."));
+  CPPUNIT_ASSERT_EQUAL(std::string("/alpha/"),
+                       joinPath("/a", "/alpha/bravo/.."));
+  CPPUNIT_ASSERT_EQUAL(std::string("/alpha/"),
+                       joinPath("/", "../alpha/"));
+  CPPUNIT_ASSERT_EQUAL(std::string("/bravo/"),
+                       joinPath("/alpha", "../bravo/"));
+  CPPUNIT_ASSERT_EQUAL(std::string("/bravo/"),
+                       joinPath("/alpha", "../../bravo/"));
+  // If neither paths do not start with '/', the resulting path also
+  // does not start with '/'.
+  CPPUNIT_ASSERT_EQUAL(std::string("alpha/bravo"),
+                       joinPath("alpha", "bravo"));
+  CPPUNIT_ASSERT_EQUAL(std::string("bravo/"),
+                       joinPath("alpha", "../../bravo/"));
+
+}
+
 } // namespace uri
 
 } // namespace aria2