1 From ca0212ba19b64488b9e8459a762c11ecd6e7d0bd Mon Sep 17 00:00:00 2001
2 From: Petr Stodulka <pstodulk@redhat.com>
3 Date: Tue, 24 Nov 2015 17:56:11 +0100
4 Subject: [PATCH] print correctly non-ascii filenames
5
6 ---
7 extract.c | 289 ++++++++++++++++++++++++++++++++++++++++++++++++--------------
8 unzpriv.h | 7 ++
9 2 files changed, 233 insertions(+), 63 deletions(-)
10
11 diff --git a/extract.c b/extract.c
12 index 0ee4e93..741b7e0 100644
13 --- a/extract.c
14 +++ b/extract.c
15 @@ -2648,8 +2648,21 @@ static void set_deferred_symlink(__G__ slnk_entry)
16 } /* end function set_deferred_symlink() */
17 #endif /* SYMLINKS */
18
19 +/*
20 + * If Unicode is supported, assume we have what we need to do this
21 + * check using wide characters, avoiding MBCS issues.
22 + */
23
24 -
25 +#ifndef UZ_FNFILTER_REPLACECHAR
26 + /* A convenient choice for the replacement of unprintable char codes is
27 + * the "single char wildcard", as this character is quite unlikely to
28 + * appear in filenames by itself. The following default definition
29 + * sets the replacement char to a question mark as the most common
30 + * "single char wildcard"; this setting should be overridden in the
31 + * appropiate system-specific configuration header when needed.
32 + */
33 +# define UZ_FNFILTER_REPLACECHAR '?'
34 +#endif
35
36 /*************************/
37 /* Function fnfilter() */ /* here instead of in list.c for SFX */
38 @@ -2661,48 +2674,168 @@ char *fnfilter(raw, space, size) /* convert name to safely printable form */
39 extent size;
40 {
41 #ifndef NATIVE /* ASCII: filter ANSI escape codes, etc. */
42 - ZCONST uch *r=(ZCONST uch *)raw;
43 + ZCONST uch *r; // =(ZCONST uch *)raw;
44 uch *s=space;
45 uch *slim=NULL;
46 uch *se=NULL;
47 int have_overflow = FALSE;
48
49 - if (size > 0) {
50 - slim = space + size
51 -#ifdef _MBCS
52 - - (MB_CUR_MAX - 1)
53 -#endif
54 - - 4;
55 +# if defined( UNICODE_SUPPORT) && defined( _MBCS)
56 +/* If Unicode support is enabled, and we have multi-byte characters,
57 + * then do the isprint() checks by first converting to wide characters
58 + * and checking those. This avoids our having to parse multi-byte
59 + * characters for ourselves. After the wide-char replacements have been
60 + * made, the wide string is converted back to the local character set.
61 + */
62 + wchar_t *wstring; /* wchar_t version of raw */
63 + size_t wslen; /* length of wstring */
64 + wchar_t *wostring; /* wchar_t version of output string */
65 + size_t woslen; /* length of wostring */
66 + char *newraw; /* new raw */
67 +
68 + /* 2012-11-06 SMS.
69 + * Changed to check the value returned by mbstowcs(), and bypass the
70 + * Unicode processing if it fails. This seems to fix a problem
71 + * reported in the SourceForge forum, but it's not clear that we
72 + * should be doing any Unicode processing without some evidence that
73 + * the name actually is Unicode. (Check bit 11 in the flags before
74 + * coming here?)
75 + * http://sourceforge.net/p/infozip/bugs/40/
76 + */
77 +
78 + if (MB_CUR_MAX <= 1)
79 + {
80 + /* There's no point to converting multi-byte chars if there are
81 + * no multi-byte chars.
82 + */
83 + wslen = (size_t)-1;
84 }
85 - while (*r) {
86 - if (size > 0 && s >= slim && se == NULL) {
87 - se = s;
88 + else
89 + {
90 + /* Get Unicode wide character count (for storage allocation). */
91 + wslen = mbstowcs( NULL, raw, 0);
92 + }
93 +
94 + if (wslen != (size_t)-1)
95 + {
96 + /* Apparently valid Unicode. Allocate wide-char storage. */
97 + wstring = (wchar_t *)malloc((wslen + 1) * sizeof(wchar_t));
98 + if (wstring == NULL) {
99 + strcpy( (char *)space, raw);
100 + return (char *)space;
101 }
102 -#ifdef QDOS
103 - if (qlflag & 2) {
104 - if (*r == '/' || *r == '.') {
105 + wostring = (wchar_t *)malloc(2 * (wslen + 1) * sizeof(wchar_t));
106 + if (wostring == NULL) {
107 + free(wstring);
108 + strcpy( (char *)space, raw);
109 + return (char *)space;
110 + }
111 +
112 + /* Convert the multi-byte Unicode to wide chars. */
113 + wslen = mbstowcs(wstring, raw, wslen + 1);
114 +
115 + /* Filter the wide-character string. */
116 + fnfilterw( wstring, wostring, (2 * (wslen + 1) * sizeof(wchar_t)));
117 +
118 + /* Convert filtered wide chars back to multi-byte. */
119 + woslen = wcstombs( NULL, wostring, 0);
120 + if ((newraw = malloc(woslen + 1)) == NULL) {
121 + free(wstring);
122 + free(wostring);
123 + strcpy( (char *)space, raw);
124 + return (char *)space;
125 + }
126 + woslen = wcstombs( newraw, wostring, (woslen * MB_CUR_MAX) + 1);
127 +
128 + if (size > 0) {
129 + slim = space + size - 4;
130 + }
131 + r = (ZCONST uch *)newraw;
132 + while (*r) {
133 + if (size > 0 && s >= slim && se == NULL) {
134 + se = s;
135 + }
136 +# ifdef QDOS
137 + if (qlflag & 2) {
138 + if (*r == '/' || *r == '.') {
139 + if (se != NULL && (s > (space + (size-3)))) {
140 + have_overflow = TRUE;
141 + break;
142 + }
143 + ++r;
144 + *s++ = '_';
145 + continue;
146 + }
147 + } else
148 +# endif
149 + {
150 if (se != NULL && (s > (space + (size-3)))) {
151 have_overflow = TRUE;
152 break;
153 }
154 - ++r;
155 - *s++ = '_';
156 - continue;
157 + *s++ = *r++;
158 }
159 - } else
160 + }
161 + if (have_overflow) {
162 + strcpy((char *)se, "...");
163 + } else {
164 + *s = '\0';
165 + }
166 +
167 + free(wstring);
168 + free(wostring);
169 + free(newraw);
170 + }
171 + else
172 +# endif /* defined( UNICODE_SUPPORT) && defined( _MBCS) */
173 + {
174 + /* No Unicode support, or apparently invalid Unicode. */
175 + r = (ZCONST uch *)raw;
176 +
177 + if (size > 0) {
178 + slim = space + size
179 +#ifdef _MBCS
180 + - (MB_CUR_MAX - 1)
181 +#endif
182 + - 4;
183 + }
184 + while (*r) {
185 + if (size > 0 && s >= slim && se == NULL) {
186 + se = s;
187 + }
188 +#ifdef QDOS
189 + if (qlflag & 2) {
190 + if (*r == '/' || *r == '.') {
191 + if (se != NULL && (s > (space + (size-3)))) {
192 + have_overflow = TRUE;
193 + break;
194 + }
195 + ++r;
196 + *s++ = '_';
197 + continue;
198 + }
199 + } else
200 #endif
201 #ifdef HAVE_WORKING_ISPRINT
202 -# ifndef UZ_FNFILTER_REPLACECHAR
203 - /* A convenient choice for the replacement of unprintable char codes is
204 - * the "single char wildcard", as this character is quite unlikely to
205 - * appear in filenames by itself. The following default definition
206 - * sets the replacement char to a question mark as the most common
207 - * "single char wildcard"; this setting should be overridden in the
208 - * appropiate system-specific configuration header when needed.
209 - */
210 -# define UZ_FNFILTER_REPLACECHAR '?'
211 -# endif
212 - if (!isprint(*r)) {
213 + if (!isprint(*r)) {
214 + if (*r < 32) {
215 + /* ASCII control codes are escaped as "^{letter}". */
216 + if (se != NULL && (s > (space + (size-4)))) {
217 + have_overflow = TRUE;
218 + break;
219 + }
220 + *s++ = '^', *s++ = (uch)(64 + *r++);
221 + } else {
222 + /* Other unprintable codes are replaced by the
223 + * placeholder character. */
224 + if (se != NULL && (s > (space + (size-3)))) {
225 + have_overflow = TRUE;
226 + break;
227 + }
228 + *s++ = UZ_FNFILTER_REPLACECHAR;
229 + INCSTR(r);
230 + }
231 +#else /* !HAVE_WORKING_ISPRINT */
232 if (*r < 32) {
233 /* ASCII control codes are escaped as "^{letter}". */
234 if (se != NULL && (s > (space + (size-4)))) {
235 @@ -2710,47 +2843,30 @@ char *fnfilter(raw, space, size) /* convert name to safely printable form */
236 break;
237 }
238 *s++ = '^', *s++ = (uch)(64 + *r++);
239 +#endif /* ?HAVE_WORKING_ISPRINT */
240 } else {
241 - /* Other unprintable codes are replaced by the
242 - * placeholder character. */
243 +#ifdef _MBCS
244 + unsigned i = CLEN(r);
245 + if (se != NULL && (s > (space + (size-i-2)))) {
246 + have_overflow = TRUE;
247 + break;
248 + }
249 + for (; i > 0; i--)
250 + *s++ = *r++;
251 +#else
252 if (se != NULL && (s > (space + (size-3)))) {
253 have_overflow = TRUE;
254 break;
255 }
256 - *s++ = UZ_FNFILTER_REPLACECHAR;
257 - INCSTR(r);
258 - }
259 -#else /* !HAVE_WORKING_ISPRINT */
260 - if (*r < 32) {
261 - /* ASCII control codes are escaped as "^{letter}". */
262 - if (se != NULL && (s > (space + (size-4)))) {
263 - have_overflow = TRUE;
264 - break;
265 - }
266 - *s++ = '^', *s++ = (uch)(64 + *r++);
267 -#endif /* ?HAVE_WORKING_ISPRINT */
268 - } else {
269 -#ifdef _MBCS
270 - unsigned i = CLEN(r);
271 - if (se != NULL && (s > (space + (size-i-2)))) {
272 - have_overflow = TRUE;
273 - break;
274 - }
275 - for (; i > 0; i--)
276 *s++ = *r++;
277 -#else
278 - if (se != NULL && (s > (space + (size-3)))) {
279 - have_overflow = TRUE;
280 - break;
281 - }
282 - *s++ = *r++;
283 #endif
284 - }
285 - }
286 - if (have_overflow) {
287 - strcpy((char *)se, "...");
288 - } else {
289 - *s = '\0';
290 + }
291 + }
292 + if (have_overflow) {
293 + strcpy((char *)se, "...");
294 + } else {
295 + *s = '\0';
296 + }
297 }
298
299 #ifdef WINDLL
300 @@ -2772,6 +2888,53 @@ char *fnfilter(raw, space, size) /* convert name to safely printable form */
301 } /* end function fnfilter() */
302
303
304 +#if defined( UNICODE_SUPPORT) && defined( _MBCS)
305 +
306 +/****************************/
307 +/* Function fnfilter[w]() */ /* (Here instead of in list.c for SFX.) */
308 +/****************************/
309 +
310 +/* fnfilterw() - Convert wide name to safely printable form. */
311 +
312 +/* fnfilterw() - Convert wide-character name to safely printable form. */
313 +
314 +wchar_t *fnfilterw( src, dst, siz)
315 + ZCONST wchar_t *src; /* Pointer to source char (string). */
316 + wchar_t *dst; /* Pointer to destination char (string). */
317 + extent siz; /* Not used (!). */
318 +{
319 + wchar_t *dsx = dst;
320 +
321 + /* Filter the wide chars. */
322 + while (*src)
323 + {
324 + if (iswprint( *src))
325 + {
326 + /* Printable code. Copy it. */
327 + *dst++ = *src;
328 + }
329 + else
330 + {
331 + /* Unprintable code. Substitute something printable for it. */
332 + if (*src < 32)
333 + {
334 + /* Replace ASCII control code with "^{letter}". */
335 + *dst++ = (wchar_t)'^';
336 + *dst++ = (wchar_t)(64 + *src);
337 + }
338 + else
339 + {
340 + /* Replace other unprintable code with the placeholder. */
341 + *dst++ = (wchar_t)UZ_FNFILTER_REPLACECHAR;
342 + }
343 + }
344 + src++;
345 + }
346 + *dst = (wchar_t)0; /* NUL-terminate the destination string. */
347 + return dsx;
348 +} /* fnfilterw(). */
349 +
350 +#endif /* defined( UNICODE_SUPPORT) && defined( _MBCS) */
351
352
353 #ifdef SET_DIR_ATTRIB
354 diff --git a/unzpriv.h b/unzpriv.h
355 index 22d3923..e48a652 100644
356 --- a/unzpriv.h
357 +++ b/unzpriv.h
358 @@ -1212,6 +1212,7 @@
359 # ifdef UNICODE_WCHAR
360 # if !(defined(_WIN32_WCE) || defined(POCKET_UNZIP))
361 # include <wchar.h>
362 +# include <wctype.h>
363 # endif
364 # endif
365 # ifndef _MBCS /* no need to include <locale.h> twice, see below */
366 @@ -2410,6 +2411,12 @@ int memflush OF((__GPRO__ ZCONST uch *rawbuf, ulg size));
367 char *fnfilter OF((ZCONST char *raw, uch *space,
368 extent size));
369
370 +# if defined( UNICODE_SUPPORT) && defined( _MBCS)
371 +wchar_t *fnfilterw OF((ZCONST wchar_t *src, wchar_t *dst,
372 + extent siz));
373 +#endif
374 +
375 +
376 /*---------------------------------------------------------------------------
377 Decompression functions:
378 ---------------------------------------------------------------------------*/
379 --
380 2.4.3
|