summaryrefslogtreecommitdiff
path: root/unzip/unzip-6.0-alt-iconv-utf8-print.patch
blob: 0b0153ba54424fa095d9d2509c4d04eb4fc16383 (plain)
    1 From ca0212ba19b64488b9e8459a762c11ecd6e7d0bd Mon Sep 17 00:00:00 2001
    2 From: Petr Stodulka <pstodulk@redhat.com>
    3 Date: Tue, 24 Nov 2015 17:56:11 +0100
    4 Subject: [PATCH] print correctly non-ascii filenames
    5 
    6 ---
    7  extract.c | 289 ++++++++++++++++++++++++++++++++++++++++++++++++--------------
    8  unzpriv.h |   7 ++
    9  2 files changed, 233 insertions(+), 63 deletions(-)
   10 
   11 diff --git a/extract.c b/extract.c
   12 index 0ee4e93..741b7e0 100644
   13 --- a/extract.c
   14 +++ b/extract.c
   15 @@ -2648,8 +2648,21 @@ static void set_deferred_symlink(__G__ slnk_entry)
   16  } /* end function set_deferred_symlink() */
   17  #endif /* SYMLINKS */
   18  
   19 +/*
   20 + * If Unicode is supported, assume we have what we need to do this
   21 + * check using wide characters, avoiding MBCS issues.
   22 + */
   23  
   24 -
   25 +#ifndef UZ_FNFILTER_REPLACECHAR
   26 +        /* A convenient choice for the replacement of unprintable char codes is
   27 +         * the "single char wildcard", as this character is quite unlikely to
   28 +         * appear in filenames by itself.  The following default definition
   29 +         * sets the replacement char to a question mark as the most common
   30 +         * "single char wildcard"; this setting should be overridden in the
   31 +         * appropiate system-specific configuration header when needed.
   32 +         */
   33 +# define UZ_FNFILTER_REPLACECHAR      '?'
   34 +#endif
   35  
   36  /*************************/
   37  /*  Function fnfilter()  */        /* here instead of in list.c for SFX */
   38 @@ -2661,48 +2674,168 @@ char *fnfilter(raw, space, size)   /* convert name to safely printable form */
   39      extent size;
   40  {
   41  #ifndef NATIVE   /* ASCII:  filter ANSI escape codes, etc. */
   42 -    ZCONST uch *r=(ZCONST uch *)raw;
   43 +    ZCONST uch *r; // =(ZCONST uch *)raw;
   44      uch *s=space;
   45      uch *slim=NULL;
   46      uch *se=NULL;
   47      int have_overflow = FALSE;
   48  
   49 -    if (size > 0) {
   50 -        slim = space + size
   51 -#ifdef _MBCS
   52 -                     - (MB_CUR_MAX - 1)
   53 -#endif
   54 -                     - 4;
   55 +# if defined( UNICODE_SUPPORT) && defined( _MBCS)
   56 +/* If Unicode support is enabled, and we have multi-byte characters,
   57 + * then do the isprint() checks by first converting to wide characters
   58 + * and checking those.  This avoids our having to parse multi-byte
   59 + * characters for ourselves.  After the wide-char replacements have been
   60 + * made, the wide string is converted back to the local character set.
   61 + */
   62 +    wchar_t *wstring;    /* wchar_t version of raw */
   63 +    size_t wslen;        /* length of wstring */
   64 +    wchar_t *wostring;   /* wchar_t version of output string */
   65 +    size_t woslen;       /* length of wostring */
   66 +    char *newraw;        /* new raw */
   67 +
   68 +    /* 2012-11-06 SMS.
   69 +     * Changed to check the value returned by mbstowcs(), and bypass the
   70 +     * Unicode processing if it fails.  This seems to fix a problem
   71 +     * reported in the SourceForge forum, but it's not clear that we
   72 +     * should be doing any Unicode processing without some evidence that
   73 +     * the name actually is Unicode.  (Check bit 11 in the flags before
   74 +     * coming here?)
   75 +     * http://sourceforge.net/p/infozip/bugs/40/
   76 +     */
   77 +
   78 +    if (MB_CUR_MAX <= 1)
   79 +    {
   80 +        /* There's no point to converting multi-byte chars if there are
   81 +         * no multi-byte chars.
   82 +         */
   83 +        wslen = (size_t)-1;
   84      }
   85 -    while (*r) {
   86 -        if (size > 0 && s >= slim && se == NULL) {
   87 -            se = s;
   88 +    else
   89 +    {
   90 +        /* Get Unicode wide character count (for storage allocation). */
   91 +        wslen = mbstowcs( NULL, raw, 0);
   92 +    }
   93 +
   94 +    if (wslen != (size_t)-1)
   95 +    {
   96 +        /* Apparently valid Unicode.  Allocate wide-char storage. */
   97 +        wstring = (wchar_t *)malloc((wslen + 1) * sizeof(wchar_t));
   98 +        if (wstring == NULL) {
   99 +            strcpy( (char *)space, raw);
  100 +            return (char *)space;
  101          }
  102 -#ifdef QDOS
  103 -        if (qlflag & 2) {
  104 -            if (*r == '/' || *r == '.') {
  105 +        wostring = (wchar_t *)malloc(2 * (wslen + 1) * sizeof(wchar_t));
  106 +        if (wostring == NULL) {
  107 +            free(wstring);
  108 +            strcpy( (char *)space, raw);
  109 +            return (char *)space;
  110 +        }
  111 +
  112 +        /* Convert the multi-byte Unicode to wide chars. */
  113 +        wslen = mbstowcs(wstring, raw, wslen + 1);
  114 +
  115 +        /* Filter the wide-character string. */
  116 +        fnfilterw( wstring, wostring, (2 * (wslen + 1) * sizeof(wchar_t)));
  117 +
  118 +        /* Convert filtered wide chars back to multi-byte. */
  119 +        woslen = wcstombs( NULL, wostring, 0);
  120 +        if ((newraw = malloc(woslen + 1)) == NULL) {
  121 +            free(wstring);
  122 +            free(wostring);
  123 +            strcpy( (char *)space, raw);
  124 +            return (char *)space;
  125 +        }
  126 +        woslen = wcstombs( newraw, wostring, (woslen * MB_CUR_MAX) + 1);
  127 +
  128 +        if (size > 0) {
  129 +            slim = space + size - 4;
  130 +        }
  131 +        r = (ZCONST uch *)newraw;
  132 +        while (*r) {
  133 +            if (size > 0 && s >= slim && se == NULL) {
  134 +                se = s;
  135 +            }
  136 +#  ifdef QDOS
  137 +            if (qlflag & 2) {
  138 +                if (*r == '/' || *r == '.') {
  139 +                    if (se != NULL && (s > (space + (size-3)))) {
  140 +                        have_overflow = TRUE;
  141 +                        break;
  142 +                    }
  143 +                    ++r;
  144 +                    *s++ = '_';
  145 +                    continue;
  146 +                }
  147 +            } else
  148 +#  endif
  149 +            {
  150                  if (se != NULL && (s > (space + (size-3)))) {
  151                      have_overflow = TRUE;
  152                      break;
  153                  }
  154 -                ++r;
  155 -                *s++ = '_';
  156 -                continue;
  157 +                *s++ = *r++;
  158              }
  159 -        } else
  160 +        }
  161 +        if (have_overflow) {
  162 +            strcpy((char *)se, "...");
  163 +        } else {
  164 +            *s = '\0';
  165 +        }
  166 +
  167 +        free(wstring);
  168 +        free(wostring);
  169 +        free(newraw);
  170 +    }
  171 +    else
  172 +# endif /* defined( UNICODE_SUPPORT) && defined( _MBCS) */
  173 +    {
  174 +        /* No Unicode support, or apparently invalid Unicode. */
  175 +        r = (ZCONST uch *)raw;
  176 +
  177 +        if (size > 0) {
  178 +            slim = space + size
  179 +#ifdef _MBCS
  180 +                         - (MB_CUR_MAX - 1)
  181 +#endif
  182 +                         - 4;
  183 +        }
  184 +        while (*r) {
  185 +            if (size > 0 && s >= slim && se == NULL) {
  186 +                se = s;
  187 +            }
  188 +#ifdef QDOS
  189 +            if (qlflag & 2) {
  190 +                if (*r == '/' || *r == '.') {
  191 +                    if (se != NULL && (s > (space + (size-3)))) {
  192 +                        have_overflow = TRUE;
  193 +                        break;
  194 +                    }
  195 +                    ++r;
  196 +                    *s++ = '_';
  197 +                    continue;
  198 +                }
  199 +            } else
  200  #endif
  201  #ifdef HAVE_WORKING_ISPRINT
  202 -# ifndef UZ_FNFILTER_REPLACECHAR
  203 -    /* A convenient choice for the replacement of unprintable char codes is
  204 -     * the "single char wildcard", as this character is quite unlikely to
  205 -     * appear in filenames by itself.  The following default definition
  206 -     * sets the replacement char to a question mark as the most common
  207 -     * "single char wildcard"; this setting should be overridden in the
  208 -     * appropiate system-specific configuration header when needed.
  209 -     */
  210 -#   define UZ_FNFILTER_REPLACECHAR      '?'
  211 -# endif
  212 -        if (!isprint(*r)) {
  213 +            if (!isprint(*r)) {
  214 +                if (*r < 32) {
  215 +                    /* ASCII control codes are escaped as "^{letter}". */
  216 +                    if (se != NULL && (s > (space + (size-4)))) {
  217 +                        have_overflow = TRUE;
  218 +                        break;
  219 +                    }
  220 +                    *s++ = '^', *s++ = (uch)(64 + *r++);
  221 +                } else {
  222 +                    /* Other unprintable codes are replaced by the
  223 +                     * placeholder character. */
  224 +                    if (se != NULL && (s > (space + (size-3)))) {
  225 +                        have_overflow = TRUE;
  226 +                        break;
  227 +                    }
  228 +                    *s++ = UZ_FNFILTER_REPLACECHAR;
  229 +                    INCSTR(r);
  230 +                }
  231 +#else /* !HAVE_WORKING_ISPRINT */
  232              if (*r < 32) {
  233                  /* ASCII control codes are escaped as "^{letter}". */
  234                  if (se != NULL && (s > (space + (size-4)))) {
  235 @@ -2710,47 +2843,30 @@ char *fnfilter(raw, space, size)   /* convert name to safely printable form */
  236                      break;
  237                  }
  238                  *s++ = '^', *s++ = (uch)(64 + *r++);
  239 +#endif /* ?HAVE_WORKING_ISPRINT */
  240              } else {
  241 -                /* Other unprintable codes are replaced by the
  242 -                 * placeholder character. */
  243 +#ifdef _MBCS
  244 +                unsigned i = CLEN(r);
  245 +                if (se != NULL && (s > (space + (size-i-2)))) {
  246 +                    have_overflow = TRUE;
  247 +                    break;
  248 +                }
  249 +                for (; i > 0; i--)
  250 +                    *s++ = *r++;
  251 +#else
  252                  if (se != NULL && (s > (space + (size-3)))) {
  253                      have_overflow = TRUE;
  254                      break;
  255                  }
  256 -                *s++ = UZ_FNFILTER_REPLACECHAR;
  257 -                INCSTR(r);
  258 -            }
  259 -#else /* !HAVE_WORKING_ISPRINT */
  260 -        if (*r < 32) {
  261 -            /* ASCII control codes are escaped as "^{letter}". */
  262 -            if (se != NULL && (s > (space + (size-4)))) {
  263 -                have_overflow = TRUE;
  264 -                break;
  265 -            }
  266 -            *s++ = '^', *s++ = (uch)(64 + *r++);
  267 -#endif /* ?HAVE_WORKING_ISPRINT */
  268 -        } else {
  269 -#ifdef _MBCS
  270 -            unsigned i = CLEN(r);
  271 -            if (se != NULL && (s > (space + (size-i-2)))) {
  272 -                have_overflow = TRUE;
  273 -                break;
  274 -            }
  275 -            for (; i > 0; i--)
  276                  *s++ = *r++;
  277 -#else
  278 -            if (se != NULL && (s > (space + (size-3)))) {
  279 -                have_overflow = TRUE;
  280 -                break;
  281 -            }
  282 -            *s++ = *r++;
  283  #endif
  284 -         }
  285 -    }
  286 -    if (have_overflow) {
  287 -        strcpy((char *)se, "...");
  288 -    } else {
  289 -        *s = '\0';
  290 +             }
  291 +        }
  292 +        if (have_overflow) {
  293 +            strcpy((char *)se, "...");
  294 +        } else {
  295 +            *s = '\0';
  296 +        }
  297      }
  298  
  299  #ifdef WINDLL
  300 @@ -2772,6 +2888,53 @@ char *fnfilter(raw, space, size)   /* convert name to safely printable form */
  301  } /* end function fnfilter() */
  302  
  303  
  304 +#if defined( UNICODE_SUPPORT) && defined( _MBCS)
  305 +
  306 +/****************************/
  307 +/*  Function fnfilter[w]()  */  /* (Here instead of in list.c for SFX.) */
  308 +/****************************/
  309 +
  310 +/* fnfilterw() - Convert wide name to safely printable form. */
  311 +
  312 +/* fnfilterw() - Convert wide-character name to safely printable form. */
  313 +
  314 +wchar_t *fnfilterw( src, dst, siz)
  315 +    ZCONST wchar_t *src;        /* Pointer to source char (string). */
  316 +    wchar_t *dst;               /* Pointer to destination char (string). */
  317 +    extent siz;                 /* Not used (!). */
  318 +{
  319 +    wchar_t *dsx = dst;
  320 +
  321 +    /* Filter the wide chars. */
  322 +    while (*src)
  323 +    {
  324 +        if (iswprint( *src))
  325 +        {
  326 +            /* Printable code.  Copy it. */
  327 +            *dst++ = *src;
  328 +        }
  329 +        else
  330 +        {
  331 +            /* Unprintable code.  Substitute something printable for it. */
  332 +            if (*src < 32)
  333 +            {
  334 +                /* Replace ASCII control code with "^{letter}". */
  335 +                *dst++ = (wchar_t)'^';
  336 +                *dst++ = (wchar_t)(64 + *src);
  337 +            }
  338 +            else
  339 +            {
  340 +                /* Replace other unprintable code with the placeholder. */
  341 +                *dst++ = (wchar_t)UZ_FNFILTER_REPLACECHAR;
  342 +            }
  343 +        }
  344 +        src++;
  345 +    }
  346 +    *dst = (wchar_t)0;  /* NUL-terminate the destination string. */
  347 +    return dsx;
  348 +} /* fnfilterw(). */
  349 +
  350 +#endif /* defined( UNICODE_SUPPORT) && defined( _MBCS) */
  351  
  352  
  353  #ifdef SET_DIR_ATTRIB
  354 diff --git a/unzpriv.h b/unzpriv.h
  355 index 22d3923..e48a652 100644
  356 --- a/unzpriv.h
  357 +++ b/unzpriv.h
  358 @@ -1212,6 +1212,7 @@
  359  # ifdef UNICODE_WCHAR
  360  #  if !(defined(_WIN32_WCE) || defined(POCKET_UNZIP))
  361  #   include <wchar.h>
  362 +#   include <wctype.h>
  363  #  endif
  364  # endif
  365  # ifndef _MBCS  /* no need to include <locale.h> twice, see below */
  366 @@ -2410,6 +2411,12 @@ int    memflush                  OF((__GPRO__ ZCONST uch *rawbuf, ulg size));
  367  char  *fnfilter                  OF((ZCONST char *raw, uch *space,
  368                                       extent size));
  369  
  370 +# if defined( UNICODE_SUPPORT) && defined( _MBCS)
  371 +wchar_t *fnfilterw               OF((ZCONST wchar_t *src, wchar_t *dst,
  372 +                                     extent siz));
  373 +#endif
  374 +
  375 +
  376  /*---------------------------------------------------------------------------
  377      Decompression functions:
  378    ---------------------------------------------------------------------------*/
  379 -- 
  380 2.4.3

Generated by cgit