Announce

PukiWiki contents have been moved into SONOTS Plugin (20070703)

wget

wget をハックしてみた時のメモ

To make wget do url_unescape the downloaded filenames.

....

There was --restrict-file-names=nocontrol option! What a waste of time....

Only in wget-1.10.2_url_unescape/src: Makefile
Only in wget-1.10.2_url_unescape/src: config.h
diff -u wget-1.10.2/src/http.c wget-1.10.2_url_unescape/src/http.c
--- wget-1.10.2/src/http.c	2005-08-08 15:54:16.000000000 -0700
+++ wget-1.10.2_url_unescape/src/http.c	2006-01-29 00:04:13.311754100 -0800
@@ -1211,7 +1211,7 @@
     /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
        know the local filename so we can save to it. */
     assert (*hs->local_file != NULL);
-
+  url_unescape(*hs->local_file);
   /* Initialize certain elements of struct http_stat.  */
   hs->len = 0;
   hs->contlen = -1;
diff -u wget-1.10.2/src/url.c wget-1.10.2_url_unescape/src/url.c
--- wget-1.10.2/src/url.c	2005-07-01 10:16:06.000000000 -0700
+++ wget-1.10.2_url_unescape/src/url.c	2006-01-29 00:08:56.929576500 -0800
@@ -160,7 +160,7 @@
    The transformation is done in place.  If you need the original
    string intact, make a copy before calling this function.  */
 
-static void
+void
 url_unescape (char *s)
 {
   char *t = s;			/* t - tortoise */
@@ -1508,7 +1508,7 @@
 
      The exception is the case when file does exist and is a
      directory (see `mkalldirs' for explanation).  */
-
+  url_unescape(fname);
   if ((opt.noclobber || opt.always_rest || opt.timestamping || opt.dirstruct)
       && !(file_exists_p (fname) && !file_non_directory_p (fname)))
     return fname;
diff -u wget-1.10.2/src/url.h wget-1.10.2_url_unescape/src/url.h
--- wget-1.10.2/src/url.h	2003-09-21 15:47:14.000000000 -0700
+++ wget-1.10.2_url_unescape/src/url.h	2006-01-29 00:08:09.341147700 -0800
@@ -74,6 +74,7 @@
 /* Function declarations */
 
 char *url_escape PARAMS ((const char *));
+void url_unescape (char *s);
 
 struct url *url_parse PARAMS ((const char *, int *));
 const char *url_error PARAMS ((int));
diff -u wget-1.10.2/src/utils.c wget-1.10.2_url_unescape/src/utils.c
--- wget-1.10.2/src/utils.c	2005-06-27 07:12:20.000000000 -0700
+++ wget-1.10.2_url_unescape/src/utils.c	2006-01-29 00:08:42.438739700 -0800
@@ -106,6 +106,7 @@
 #include "wget.h"
 #include "utils.h"
 #include "hash.h"
+#include "url.h"
 
 #ifndef errno
 extern int errno;
@@ -605,7 +606,10 @@
 	 of intermediate directories to fail, as the initial path components
 	 are not necessarily directories!  */
       if (!file_exists_p (dir))
-	ret = mkdir (dir, 0777);
+	      {
+  url_unescape(dir);
+  ret = mkdir (dir, 0777);
+        }
       else
 	ret = 0;
       if (quit)
Only in wget-1.10.2_url_unescape/src: wget.exe