Browse Source

2010-04-02 Tatsuhiro Tsujikawa <t-tujikawa@users.sourceforge.net>

	Don't send "Accept: default, gzip" by default. This is because
	some server responds with "Content-Encoding: gzip" for files which
	itself is gzipped file and aria2 inflates them. This is a problem
	if user don't want to inflate the file. Apparently this is server
	configuration error, but I cannot do anything about this. So turn
	this off.  Added --http-accept-gzip option. If true is given to
	this option, aria2 sends 'Accept: deflate, gzip' request header
	and inflates response if remote server responds with
	'Content-Encoding: gzip' or 'Content-Encoding: deflate'.  This
	indicates we removed extension tgz hack in order not to inflate
	files with tgz extensions.
	* doc/aria2c.1.txt
	* src/HttpRequest.cc
	* src/HttpRequest.h
	* src/HttpRequestCommand.cc
	* src/HttpResponseCommand.cc
	* src/OptionHandlerFactory.cc
	* src/prefs.cc
	* src/prefs.h
	* src/usage_text.h
	* test/HttpRequestTest.cc
Tatsuhiro Tsujikawa 15 years ago
parent
commit
6996f07f5f

+ 24 - 0
ChangeLog

@@ -1,3 +1,27 @@
+2010-04-02  Tatsuhiro Tsujikawa  <t-tujikawa@users.sourceforge.net>
+
+	Don't send "Accept: default, gzip" by default. This is because
+	some server responds with "Content-Encoding: gzip" for files which
+	itself is gzipped file and aria2 inflates them. This is a problem
+	if user don't want to inflate the file. Apparently this is server
+	configuration error, but I cannot do anything about this. So turn
+	this off.  Added --http-accept-gzip option. If true is given to
+	this option, aria2 sends 'Accept: deflate, gzip' request header
+	and inflates response if remote server responds with
+	'Content-Encoding: gzip' or 'Content-Encoding: deflate'.  This
+	indicates we removed extension tgz hack in order not to inflate
+	files with tgz extensions.
+	* doc/aria2c.1.txt
+	* src/HttpRequest.cc
+	* src/HttpRequest.h
+	* src/HttpRequestCommand.cc
+	* src/HttpResponseCommand.cc
+	* src/OptionHandlerFactory.cc
+	* src/prefs.cc
+	* src/prefs.h
+	* src/usage_text.h
+	* test/HttpRequestTest.cc
+
 2010-04-02  Tatsuhiro Tsujikawa  <t-tujikawa@users.sourceforge.net>
 
 	Made aria2 not send

+ 24 - 2
doc/aria2c.1

@@ -2,12 +2,12 @@
 .\"     Title: aria2c
 .\"    Author: [FIXME: author] [see http://docbook.sf.net/el/author]
 .\" Generator: DocBook XSL Stylesheets v1.75.2 <http://docbook.sf.net/>
-.\"      Date: 03/30/2010
+.\"      Date: 04/02/2010
 .\"    Manual: Aria2 Manual
 .\"    Source: Aria2 1.9.1a
 .\"  Language: English
 .\"
-.TH "ARIA2C" "1" "03/30/2010" "Aria2 1\&.9\&.1a" "Aria2 Manual"
+.TH "ARIA2C" "1" "04/02/2010" "Aria2 1\&.9\&.1a" "Aria2 Manual"
 .\" -----------------------------------------------------------------
 .\" * Define some portability stuff
 .\" -----------------------------------------------------------------
@@ -350,6 +350,28 @@ option\&. Default:
 \fItrue\fR
 .RE
 .PP
+\fB\-\-http\-accept\-gzip\fR[=\fItrue\fR|\fIfalse\fR]
+.RS 4
+Send "Accept: deflate, gzip" request header and inflate response if remote server responds with "Content\-Encoding: gzip" or "Content\-Encoding: deflate"\&. Default:
+\fIfalse\fR
+.RE
+.if n \{\
+.sp
+.\}
+.RS 4
+.it 1 an-trap
+.nr an-no-space-flag 1
+.nr an-break-flag 1
+.br
+.ps +1
+\fBNote\fR
+.ps -1
+.br
+.sp
+Some server responds with "Content\-Encoding: gzip" for files which itself is gzipped file\&. aria2 inflates them anyway because of the response header\&.
+.sp .5v
+.RE
+.PP
 \fB\-\-http\-auth\-challenge\fR[=\fItrue\fR|\fIfalse\fR]
 .RS 4
 Send HTTP authorization header only when it is requested by the server\&. If

+ 23 - 1
doc/aria2c.1.html

@@ -973,6 +973,28 @@ aria2c -o myfile.zip "http://mirror1/file.zip" "http://mirror2/file.zip"</td>
 </p>
 </dd>
 <dt class="hdlist1">
+<strong>--http-accept-gzip</strong>[=<em>true</em>|<em>false</em>]
+</dt>
+<dd>
+<p>
+  Send "Accept: deflate, gzip" request header and inflate response if
+  remote server responds with "Content-Encoding: gzip" or
+  "Content-Encoding: deflate".  Default: <em>false</em>
+</p>
+</dd>
+</dl></div>
+<div class="admonitionblock">
+<table><tr>
+<td class="icon">
+<div class="title">Note</div>
+</td>
+<td class="content">Some server responds with "Content-Encoding: gzip" for files which
+itself is gzipped file. aria2 inflates them anyway because of the
+response header.</td>
+</tr></table>
+</div>
+<div class="dlist"><dl>
+<dt class="hdlist1">
 <strong>--http-auth-challenge</strong>[=<em>true</em>|<em>false</em>]
 </dt>
 <dd>
@@ -4084,7 +4106,7 @@ files in the program, then also delete it here.</p></div>
 <div id="footnotes"><hr /></div>
 <div id="footer">
 <div id="footer-text">
-Last updated 2010-03-30 23:43:40 JST
+Last updated 2010-04-02 23:04:52 JST
 </div>
 </div>
 </body>

+ 12 - 0
doc/aria2c.1.txt

@@ -244,6 +244,18 @@ HTTP Specific Options
   Verify the peer using certificates specified in *--ca-certificate* option.
   Default: 'true'
 
+*--http-accept-gzip*[='true'|'false']::
+
+  Send "Accept: deflate, gzip" request header and inflate response if
+  remote server responds with "Content-Encoding: gzip" or
+  "Content-Encoding: deflate".  Default: 'false'
+
+[NOTE]
+
+Some server responds with "Content-Encoding: gzip" for files which
+itself is gzipped file. aria2 inflates them anyway because of the
+response header.
+
 *--http-auth-challenge*[='true'|'false']::
   Send HTTP authorization header only when it is requested by the
   server. If 'false' is set, then authorization header is always sent

+ 5 - 2
src/HttpRequest.cc

@@ -57,7 +57,8 @@ const std::string HttpRequest::USER_AGENT("aria2");
 
 HttpRequest::HttpRequest():_contentEncodingEnabled(true),
                            userAgent(USER_AGENT),
-                           _noCache(true)
+                           _noCache(true),
+                           _acceptGzip(false)
 {}
 
 void HttpRequest::setSegment(const SharedHandle<Segment>& segment)
@@ -169,7 +170,9 @@ std::string HttpRequest::createRequest()
   if(_contentEncodingEnabled) {
     std::string acceptableEncodings;
 #ifdef HAVE_LIBZ
-    acceptableEncodings += "deflate, gzip";
+    if(_acceptGzip) {
+      acceptableEncodings += "deflate, gzip";
+    }
 #endif // HAVE_LIBZ
     if(!acceptableEncodings.empty()) {
       builtinHds.push_back

+ 17 - 0
src/HttpRequest.h

@@ -85,6 +85,8 @@ private:
 
   bool _noCache;
 
+  bool _acceptGzip;
+
   std::pair<std::string, std::string> getProxyAuthString() const;
 public:
   HttpRequest();
@@ -255,6 +257,21 @@ public:
   {
     _noCache = false;
   }
+
+  void enableAcceptGZip()
+  {
+    _acceptGzip = true;
+  }
+
+  void disableAcceptGZip()
+  {
+    _acceptGzip = false;
+  }
+
+  bool acceptGZip() const
+  {
+    return _acceptGzip;
+  }
 };
 
 } // namespace aria2

+ 5 - 0
src/HttpRequestCommand.cc

@@ -96,6 +96,11 @@ createHttpRequest(const SharedHandle<Request>& req,
   httpRequest->setProxyRequest(proxyRequest);
   httpRequest->addAcceptType(rg->getAcceptTypes().begin(),
                              rg->getAcceptTypes().end());
+  if(option->getAsBool(PREF_HTTP_ACCEPT_GZIP)) {
+    httpRequest->enableAcceptGZip();
+  } else {
+    httpRequest->disableAcceptGZip();
+  }
   if(option->getAsBool(PREF_HTTP_NO_CACHE)) {
     httpRequest->enableNoCache();
   } else {

+ 10 - 16
src/HttpResponseCommand.cc

@@ -192,25 +192,19 @@ void HttpResponseCommand::updateLastModifiedTime(const Time& lastModified)
   }
 }
 
-static bool fileIsGzipped(const SharedHandle<HttpResponse>& httpResponse)
-{
-  std::string filename =
-    util::toLower(httpResponse->getHttpRequest()->getRequest()->getFile());
-  return util::endsWith(filename, ".gz") || util::endsWith(filename, ".tgz");
-}
-
 bool HttpResponseCommand::shouldInflateContentEncoding
 (const SharedHandle<HttpResponse>& httpResponse)
 {
-  // Basically, on the fly inflation cannot be made with segment download,
-  // because in each segment we don't know where the date should be written.
-  // So turn off segmented downloading.
-  // Meanwhile, Some server returns content-encoding: gzip for .tgz files.
-  // I think those files should not be inflated by clients, because it is the
-  // original format of those files. So I made filename ending ".gz" or ".tgz"
-  // (case-insensitive) not inflated.
-  return httpResponse->isContentEncodingSpecified() &&
-    !fileIsGzipped(httpResponse);
+  // Basically, on the fly inflation cannot be made with segment
+  // download, because in each segment we don't know where the date
+  // should be written.  So turn off segmented downloading.
+  // Meanwhile, Some server returns content-encoding: gzip for .tgz
+  // files.  I think those files should not be inflated by clients,
+  // because it is the original format of those files. Current
+  // implementation just inflates these files nonetheless.
+  const std::string& ce = httpResponse->getContentEncoding();
+  return httpResponse->getHttpRequest()->acceptGZip() &&
+    (ce == "gzip" || ce == "deflate");
 }
 
 bool HttpResponseCommand::handleDefaultEncoding

+ 9 - 0
src/OptionHandlerFactory.cc

@@ -739,6 +739,15 @@ OptionHandlers OptionHandlerFactory::createOptionHandlers()
     op->addTag(TAG_HTTP);
     handlers.push_back(op);
   }
+  {
+    SharedHandle<OptionHandler> op(new BooleanOptionHandler
+                                   (PREF_HTTP_ACCEPT_GZIP,
+                                    TEXT_HTTP_ACCEPT_GZIP,
+                                    V_FALSE,
+                                    OptionHandler::OPT_ARG));
+    op->addTag(TAG_HTTP);
+    handlers.push_back(op);
+  }
   {
     SharedHandle<OptionHandler> op(new BooleanOptionHandler
                                    (PREF_HTTP_AUTH_CHALLENGE,

+ 2 - 0
src/prefs.cc

@@ -184,6 +184,8 @@ const std::string PREF_REMOVE_CONTROL_FILE("remove-control-file");
 const std::string PREF_ALWAYS_RESUME("always-resume");
 // value: 1*digit
 const std::string PREF_MAX_RESUME_FAILURE_TRIES("max-resume-failure-tries");
+// value: true | false
+const std::string PREF_HTTP_ACCEPT_GZIP("http-accept-gzip");
 
 /**
  * FTP related preferences

+ 2 - 0
src/prefs.h

@@ -188,6 +188,8 @@ extern const std::string PREF_REMOVE_CONTROL_FILE;
 extern const std::string PREF_ALWAYS_RESUME;
 // value: 1*digit
 extern const std::string PREF_MAX_RESUME_FAILURE_TRIES;
+// value: true | false
+extern const std::string PREF_HTTP_ACCEPT_GZIP;
 
 /**
  * FTP related preferences

+ 5 - 0
src/usage_text.h

@@ -667,3 +667,8 @@
     "                              instead.")
 #define TEXT_DHT_MESSAGE_TIMEOUT                \
   _(" --dht-message-timeout=SEC    Set timeout in seconds.")
+#define TEXT_HTTP_ACCEPT_GZIP                   \
+  _(" --http-accept-gzip[=true|false] Send 'Accept: deflate, gzip' request header\n" \
+    "                              and inflate response if remote server responds\n" \
+    "                              with 'Content-Encoding: gzip' or\n"  \
+    "                              'Content-Encoding: deflate'.")

+ 15 - 5
test/HttpRequestTest.cc

@@ -719,20 +719,30 @@ void HttpRequestTest::testEnableAcceptEncoding()
   acceptEncodings += "deflate, gzip";
 #endif // HAVE_LIBZ
   
-  std::string expectedText =
+  std::string expectedTextHead =
     "GET /archives/aria2-1.0.0.tar.bz2 HTTP/1.1\r\n"
     "User-Agent: aria2\r\n"
     "Accept: */*\r\n";
-  if(!acceptEncodings.empty()) {
-    expectedText += "Accept-Encoding: "+acceptEncodings+"\r\n";
-  }
-  expectedText +=
+  std::string expectedTextTail =
     "Host: localhost\r\n"
     "Pragma: no-cache\r\n"
     "Cache-Control: no-cache\r\n"
     "Connection: close\r\n"
     "\r\n";
 
+  std::string expectedText = expectedTextHead;
+  expectedText += expectedTextTail;
+  CPPUNIT_ASSERT_EQUAL(expectedText, httpRequest.createRequest());
+
+  expectedText = expectedTextHead;
+  if(!acceptEncodings.empty()) {
+    expectedText += "Accept-Encoding: ";
+    expectedText += acceptEncodings;
+    expectedText += "\r\n";
+  }
+  expectedText += expectedTextTail;
+
+  httpRequest.enableAcceptGZip();
   CPPUNIT_ASSERT_EQUAL(expectedText, httpRequest.createRequest());
 }