summaryrefslogtreecommitdiff
path: root/glibc/glibc-c-utf8-locale-2.patch
blob: 7064b8e810e41f055a3a4a90eac8e8fc4a8342cb (plain)
    1 commit 466f2be6c08070e9113ae2fdc7acd5d8828cba50
    2 Author: Carlos O'Donell <carlos@redhat.com>
    3 Date:   Wed Sep 1 15:19:19 2021 -0400
    4 
    5     Add generic C.UTF-8 locale (Bug 17318)
    6     
    7     We add a new C.UTF-8 locale. This locale is not builtin to glibc, but
    8     is provided as a distinct locale. The locale provides full support for
    9     UTF-8 and this includes full code point sorting via STRCMP-based
   10     collation (strcmp or wcscmp).
   11     
   12     The collation uses a new keyword 'codepoint_collation' which drops all
   13     collation rules and generates an empty zero rules collation to enable
   14     STRCMP usage in collation. This ensures that we get full code point
   15     sorting for C.UTF-8 with a minimal 1406 bytes of overhead (LC_COLLATE
   16     structure information and ASCII collating tables).
   17     
   18     The new locale is added to SUPPORTED. Minimal test data for specific
   19     code points (minus those not supported by collate-test) is provided in
   20     C.UTF-8.in, and this verifies code point sorting is working reasonably
   21     across the range. The locale was tested manually with the full set of
   22     code points without failure.
   23     
   24     The locale is harmonized with locales already shipping in various
   25     downstream distributions. A new tst-iconv9 test is added which verifies
   26     the C.UTF-8 locale is generally usable.
   27     
   28     Testing for fnmatch, regexec, and recomp is provided by extending
   29     bug-regex1, bugregex19, bug-regex4, bug-regex6, transbug, tst-fnmatch,
   30     tst-regcomp-truncated, and tst-regex to use C.UTF-8.
   31     
   32     Tested on x86_64 or i686 without regression.
   33     
   34     Reviewed-by: Florian Weimer <fweimer@redhat.com>
   35 
   36 diff --git a/iconv/Makefile b/iconv/Makefile
   37 index 07d77c9ecaafba1f..9993f2d3f3cd7498 100644
   38 --- a/iconv/Makefile
   39 +++ b/iconv/Makefile
   40 @@ -43,8 +43,19 @@ CFLAGS-charmap.c += -DCHARMAP_PATH='"$(i18ndir)/charmaps"' \
   41  CFLAGS-linereader.c += -DNO_TRANSLITERATION
   42  CFLAGS-simple-hash.c += -I../locale
   43  
   44 -tests	= tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 \
   45 -	  tst-iconv7 tst-iconv8 tst-iconv-mt tst-iconv-opt
   46 +tests = \
   47 +	tst-iconv1 \
   48 +	tst-iconv2 \
   49 +	tst-iconv3 \
   50 +	tst-iconv4 \
   51 +	tst-iconv5 \
   52 +	tst-iconv6 \
   53 +	tst-iconv7 \
   54 +	tst-iconv8 \
   55 +	tst-iconv9 \
   56 +	tst-iconv-mt \
   57 +	tst-iconv-opt \
   58 +	# tests
   59  
   60  others		= iconv_prog iconvconfig
   61  install-others-programs	= $(inst_bindir)/iconv
   62 @@ -83,10 +94,15 @@ endif
   63  include ../Rules
   64  
   65  ifeq ($(run-built-tests),yes)
   66 -LOCALES := en_US.UTF-8
   67 +# We have to generate locales (list sorted alphabetically)
   68 +LOCALES := \
   69 +	C.UTF-8 \
   70 +	en_US.UTF-8 \
   71 +	# LOCALES
   72  include ../gen-locales.mk
   73  
   74  $(objpfx)tst-iconv-opt.out: $(gen-locales)
   75 +$(objpfx)tst-iconv9.out: $(gen-locales)
   76  endif
   77  
   78  $(inst_bindir)/iconv: $(objpfx)iconv_prog $(+force)
   79 diff --git a/iconv/tst-iconv9.c b/iconv/tst-iconv9.c
   80 new file mode 100644
   81 index 0000000000000000..c46b1833d87b8e55
   82 --- /dev/null
   83 +++ b/iconv/tst-iconv9.c
   84 @@ -0,0 +1,87 @@
   85 +/* Verify that using C.UTF-8 works.
   86 +
   87 +   Copyright (C) 2021 Free Software Foundation, Inc.
   88 +   This file is part of the GNU C Library.
   89 +
   90 +   The GNU C Library is free software; you can redistribute it and/or
   91 +   modify it under the terms of the GNU Lesser General Public
   92 +   License as published by the Free Software Foundation; either
   93 +   version 2.1 of the License, or (at your option) any later version.
   94 +
   95 +   The GNU C Library is distributed in the hope that it will be useful,
   96 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
   97 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   98 +   Lesser General Public License for more details.
   99 +
  100 +   You should have received a copy of the GNU Lesser General Public
  101 +   License along with the GNU C Library; if not, see
  102 +   <https://www.gnu.org/licenses/>.  */
  103 +
  104 +#include <iconv.h>
  105 +#include <stddef.h>
  106 +#include <stdio.h>
  107 +#include <string.h>
  108 +#include <support/support.h>
  109 +#include <support/check.h>
  110 +
  111 +/* This test does two things:
  112 +   (1) Verify that we have likely included translit_combining in C.UTF-8.
  113 +   (2) Verify default_missing is '?' as expected.  */
  114 +
  115 +/* ISO-8859-1 encoding of "für".  */
  116 +char iso88591_in[] = { 0x66, 0xfc, 0x72, 0x0 };
  117 +/* ASCII transliteration is "fur" with C.UTF-8 translit_combining.  */
  118 +char ascii_exp[] = { 0x66, 0x75, 0x72, 0x0 };
  119 +
  120 +/* First 3-byte UTF-8 code point.  */
  121 +char utf8_in[] = { 0xe0, 0xa0, 0x80, 0x0 };
  122 +/* There is no ASCII transliteration for SAMARITAN LETTER ALAF
  123 +   so we get default_missing used which is '?'.  */
  124 +char default_missing_exp[] = { 0x3f, 0x0 };
  125 +
  126 +static int
  127 +do_test (void)
  128 +{
  129 +  char ascii_out[5];
  130 +  iconv_t cd;
  131 +  char *inbuf;
  132 +  char *outbuf;
  133 +  size_t inbytes;
  134 +  size_t outbytes;
  135 +  size_t n;
  136 +
  137 +  /* The C.UTF-8 locale should include translit_combining, which provides
  138 +     the transliteration for "LATIN SMALL LETTER U WITH DIAERESIS" which
  139 +     is not provided by locale/C-translit.h.in.  */
  140 +  xsetlocale (LC_ALL, "C.UTF-8");
  141 +
  142 +  /* From ISO-8859-1 to ASCII.  */
  143 +  cd = iconv_open ("ASCII//TRANSLIT,IGNORE", "ISO-8859-1");
  144 +  TEST_VERIFY (cd != (iconv_t) -1);
  145 +  inbuf = iso88591_in;
  146 +  inbytes = 3;
  147 +  outbuf = ascii_out;
  148 +  outbytes = 3;
  149 +  n = iconv (cd, &inbuf, &inbytes, &outbuf, &outbytes);
  150 +  TEST_VERIFY (n != -1);
  151 +  *outbuf = '\0';
  152 +  TEST_COMPARE_BLOB (ascii_out, 3, ascii_exp, 3);
  153 +  TEST_VERIFY (iconv_close (cd) == 0);
  154 +
  155 +  /* From UTF-8 to ASCII.  */
  156 +  cd = iconv_open ("ASCII//TRANSLIT,IGNORE", "UTF-8");
  157 +  TEST_VERIFY (cd != (iconv_t) -1);
  158 +  inbuf = utf8_in;
  159 +  inbytes = 3;
  160 +  outbuf = ascii_out;
  161 +  outbytes = 3;
  162 +  n = iconv (cd, &inbuf, &inbytes, &outbuf, &outbytes);
  163 +  TEST_VERIFY (n != -1);
  164 +  *outbuf = '\0';
  165 +  TEST_COMPARE_BLOB (ascii_out, 1, default_missing_exp, 1);
  166 +  TEST_VERIFY (iconv_close (cd) == 0);
  167 +
  168 +  return 0;
  169 +}
  170 +
  171 +#include <support/test-driver.c>
  172 diff --git a/localedata/C.UTF-8.in b/localedata/C.UTF-8.in
  173 new file mode 100644
  174 index 0000000000000000..c31dcc2aa045ee61
  175 --- /dev/null
  176 +++ b/localedata/C.UTF-8.in
  177 @@ -0,0 +1,157 @@
  178 + ; <U1>
  179 + ; <U2>
  180 + ; <U3>
  181 + ; <U4>
  182 + ; <U5>
  183 + ; <U6>
  184 + ; <U7>
  185 + ; <U8>
  186 + ; <UE>
  187 + ; <UF>
  188 + ; <U10>
  189 + ; <U11>
  190 + ; <U12>
  191 + ; <U13>
  192 + ; <U14>
  193 + ; <U15>
  194 + ; <U16>
  195 + ; <U17>
  196 + ; <U18>
  197 + ; <U19>
  198 + ; <U1A>
  199 + ; <U1B>
  200 + ; <U1C>
  201 + ; <U1D>
  202 + ; <U1E>
  203 + ; <U1F>
  204 +! ; <U21>
  205 +" ; <U22>
  206 +# ; <U23>
  207 +$ ; <U24>
  208 +% ; <U25>
  209 +& ; <U26>
  210 +' ; <U27>
  211 +) ; <U29>
  212 +* ; <U2A>
  213 ++ ; <U2B>
  214 +, ; <U2C>
  215 +- ; <U2D>
  216 +. ; <U2E>
  217 +/ ; <U2F>
  218 +0 ; <U30>
  219 +1 ; <U31>
  220 +2 ; <U32>
  221 +3 ; <U33>
  222 +4 ; <U34>
  223 +5 ; <U35>
  224 +6 ; <U36>
  225 +7 ; <U37>
  226 +8 ; <U38>
  227 +9 ; <U39>
  228 +< ; <U3C>
  229 += ; <U3D>
  230 +> ; <U3E>
  231 +? ; <U3F>
  232 +@ ; <U40>
  233 +A ; <U41>
  234 +B ; <U42>
  235 +C ; <U43>
  236 +D ; <U44>
  237 +E ; <U45>
  238 +F ; <U46>
  239 +G ; <U47>
  240 +H ; <U48>
  241 +I ; <U49>
  242 +J ; <U4A>
  243 +K ; <U4B>
  244 +L ; <U4C>
  245 +M ; <U4D>
  246 +N ; <U4E>
  247 +O ; <U4F>
  248 +P ; <U50>
  249 +Q ; <U51>
  250 +R ; <U52>
  251 +S ; <U53>
  252 +T ; <U54>
  253 +U ; <U55>
  254 +V ; <U56>
  255 +W ; <U57>
  256 +X ; <U58>
  257 +Y ; <U59>
  258 +Z ; <U5A>
  259 +[ ; <U5B>
  260 +\ ; <U5C>
  261 +] ; <U5D>
  262 +^ ; <U5E>
  263 +_ ; <U5F>
  264 +` ; <U60>
  265 +a ; <U61>
  266 +b ; <U62>
  267 +c ; <U63>
  268 +d ; <U64>
  269 +e ; <U65>
  270 +f ; <U66>
  271 +g ; <U67>
  272 +h ; <U68>
  273 +i ; <U69>
  274 +j ; <U6A>
  275 +k ; <U6B>
  276 +l ; <U6C>
  277 +m ; <U6D>
  278 +n ; <U6E>
  279 +o ; <U6F>
  280 +p ; <U70>
  281 +q ; <U71>
  282 +r ; <U72>
  283 +s ; <U73>
  284 +t ; <U74>
  285 +u ; <U75>
  286 +v ; <U76>
  287 +w ; <U77>
  288 +x ; <U78>
  289 +y ; <U79>
  290 +z ; <U7A>
  291 +{ ; <U7B>
  292 +| ; <U7C>
  293 +} ; <U7D>
  294 +~ ; <U7E>
  295 + ; <U7F>
  296 +€ ; <U80>
  297 +ÿ ; <UFF>
  298 +Ā ; <U100>
  299 +࿿ ; <UFFF>
  300 +က ; <U1000>
  301 +� ; <UFFFD>
  302 +￿ ; <UFFFF>
  303 +𐀀 ; <U10000>
  304 +🿿 ; <U1FFFF>
  305 +𠀀 ; <U20000>
  306 +𯿿 ; <U2FFFF>
  307 +𰀀 ; <U30000>
  308 +𿿾 ; <U3FFFE>
  309 +񀀀 ; <U40000>
  310 +񏿿 ; <U4FFFF>
  311 +񐀀 ; <U50000>
  312 +񟿿 ; <U5FFFF>
  313 +񠀀 ; <U60000>
  314 +񯿿 ; <U6FFFF>
  315 +񰀀 ; <U70000>
  316 +񿿿 ; <U7FFFF>
  317 +򀀀 ; <U80000>
  318 +򏿿 ; <U8FFFF>
  319 +򐀀 ; <U90000>
  320 +򟿿 ; <U9FFFF>
  321 +򠀀 ; <UA0000>
  322 +򯿿 ; <UAFFFF>
  323 +򰀀 ; <UB0000>
  324 +򿿿 ; <UBFFFF>
  325 +󀀁 ; <UC0001>
  326 +󏿌 ; <UCFFCC>
  327 +󐀎 ; <UD000E>
  328 +󟿿 ; <UDFFFF>
  329 +󠀁 ; <UE0001>
  330 +󯿿 ; <UEFFFF>
  331 +󰀁 ; <UF0001>
  332 +󿿿 ; <UFFFFF>
  333 +􀀁 ; <U100001>
  334 +􏿿 ; <U10FFFF>
  335 diff --git a/localedata/Makefile b/localedata/Makefile
  336 index 0341528b0407ae3b..c9dd5a954e8194cc 100644
  337 --- a/localedata/Makefile
  338 +++ b/localedata/Makefile
  339 @@ -47,6 +47,7 @@ test-input := \
  340  	bg_BG.UTF-8 \
  341  	br_FR.UTF-8 \
  342  	bs_BA.UTF-8 \
  343 +	C.UTF-8 \
  344  	ckb_IQ.UTF-8 \
  345  	cmn_TW.UTF-8 \
  346  	crh_UA.UTF-8 \
  347 @@ -206,6 +207,7 @@ LOCALES := \
  348  	bg_BG.UTF-8 \
  349  	br_FR.UTF-8 \
  350  	bs_BA.UTF-8 \
  351 +	C.UTF-8 \
  352  	ckb_IQ.UTF-8 \
  353  	cmn_TW.UTF-8 \
  354  	crh_UA.UTF-8 \
  355 diff --git a/localedata/SUPPORTED b/localedata/SUPPORTED
  356 index 34f7a7c3fe2b6526..546ce6cea16a8fdb 100644
  357 --- a/localedata/SUPPORTED
  358 +++ b/localedata/SUPPORTED
  359 @@ -79,6 +79,7 @@ brx_IN/UTF-8 \
  360  bs_BA.UTF-8/UTF-8 \
  361  bs_BA/ISO-8859-2 \
  362  byn_ER/UTF-8 \
  363 +C.UTF-8/UTF-8 \
  364  ca_AD.UTF-8/UTF-8 \
  365  ca_AD/ISO-8859-15 \
  366  ca_ES.UTF-8/UTF-8 \
  367 diff --git a/localedata/locales/C b/localedata/locales/C
  368 new file mode 100644
  369 index 0000000000000000..ca801c79cf7e953e
  370 --- /dev/null
  371 +++ b/localedata/locales/C
  372 @@ -0,0 +1,194 @@
  373 +escape_char /
  374 +comment_char %
  375 +% Locale for C locale in UTF-8
  376 +
  377 +LC_IDENTIFICATION
  378 +title      "C locale"
  379 +source     ""
  380 +address    ""
  381 +contact    ""
  382 +email      "bug-glibc-locales@gnu.org"
  383 +tel        ""
  384 +fax        ""
  385 +language   ""
  386 +territory  ""
  387 +revision   "2.0"
  388 +date       "2020-06-28"
  389 +category  "i18n:2012";LC_IDENTIFICATION
  390 +category  "i18n:2012";LC_CTYPE
  391 +category  "i18n:2012";LC_COLLATE
  392 +category  "i18n:2012";LC_TIME
  393 +category  "i18n:2012";LC_NUMERIC
  394 +category  "i18n:2012";LC_MONETARY
  395 +category  "i18n:2012";LC_MESSAGES
  396 +category  "i18n:2012";LC_PAPER
  397 +category  "i18n:2012";LC_NAME
  398 +category  "i18n:2012";LC_ADDRESS
  399 +category  "i18n:2012";LC_TELEPHONE
  400 +category  "i18n:2012";LC_MEASUREMENT
  401 +END LC_IDENTIFICATION
  402 +
  403 +LC_CTYPE
  404 +% Include only the i18n character type classes without any of the
  405 +% transliteration that i18n uses by default.
  406 +copy "i18n_ctype"
  407 +
  408 +% Include the neutral transliterations.  The builtin C and
  409 +% POSIX locales have +1600 transliterations that are built into
  410 +% the locales, and these are a superset of those.
  411 +translit_start
  412 +include "translit_neutral";""
  413 +% We must use '?' for default_missing because the transliteration
  414 +% framework includes it directly into the output and so it must
  415 +% be compatible with ASCII if that is the target character set.
  416 +default_missing <U003F>
  417 +translit_end
  418 +
  419 +% Include the transliterations that can convert combined characters.
  420 +% These are generally expected by users.
  421 +translit_start
  422 +include "translit_combining";""
  423 +translit_end
  424 +
  425 +END LC_CTYPE
  426 +
  427 +LC_COLLATE
  428 +% The keyword 'codepoint_collation' in any part of any LC_COLLATE
  429 +% immediately discards all collation information and causes the
  430 +% locale to use strcmp/wcscmp for collation comparison.  This is
  431 +% exactly what is needed for C (ASCII) or C.UTF-8.
  432 +codepoint_collation
  433 +END LC_COLLATE
  434 +
  435 +LC_MONETARY
  436 +
  437 +% This is the 14652 i18n fdcc-set definition for the LC_MONETARY
  438 +% category (except for the int_curr_symbol and currency_symbol, they are
  439 +% empty in the 14652 i18n fdcc-set definition and also empty in
  440 +% glibc/locale/C-monetary.c.).
  441 +int_curr_symbol     ""
  442 +currency_symbol     ""
  443 +mon_decimal_point   "."
  444 +mon_thousands_sep   ""
  445 +mon_grouping        -1
  446 +positive_sign       ""
  447 +negative_sign       "-"
  448 +int_frac_digits     -1
  449 +frac_digits         -1
  450 +p_cs_precedes       -1
  451 +int_p_sep_by_space  -1
  452 +p_sep_by_space      -1
  453 +n_cs_precedes       -1
  454 +int_n_sep_by_space  -1
  455 +n_sep_by_space      -1
  456 +p_sign_posn         -1
  457 +n_sign_posn         -1
  458 +%
  459 +END LC_MONETARY
  460 +
  461 +LC_NUMERIC
  462 +% This is the POSIX Locale definition for
  463 +% the LC_NUMERIC category.
  464 +%
  465 +decimal_point   "."
  466 +thousands_sep   ""
  467 +grouping        -1
  468 +END LC_NUMERIC
  469 +
  470 +LC_TIME
  471 +% This is the POSIX Locale definition for the LC_TIME category with the
  472 +% exception that time is per ISO 8601 and 24-hour.
  473 +%
  474 +% Abbreviated weekday names (%a)
  475 +abday       "Sun";"Mon";"Tue";"Wed";"Thu";"Fri";"Sat"
  476 +
  477 +% Full weekday names (%A)
  478 +day         "Sunday";"Monday";"Tuesday";"Wednesday";"Thursday";/
  479 +            "Friday";"Saturday"
  480 +
  481 +% Abbreviated month names (%b)
  482 +abmon       "Jan";"Feb";"Mar";"Apr";"May";"Jun";"Jul";"Aug";"Sep";/
  483 +            "Oct";"Nov";"Dec"
  484 +
  485 +% Full month names (%B)
  486 +mon         "January";"February";"March";"April";"May";"June";"July";/
  487 +            "August";"September";"October";"November";"December"
  488 +
  489 +% Week description, consists of three fields:
  490 +% 1. Number of days in a week.
  491 +% 2. Gregorian date that is a first weekday (19971130 for Sunday, 19971201 for Monday).
  492 +% 3. The weekday number to be contained in the first week of the year.
  493 +%
  494 +% ISO 8601 conforming applications should use the values 7, 19971201 (a
  495 +% Monday), and 4 (Thursday), respectively.
  496 +week    7;19971201;4
  497 +first_weekday	1
  498 +first_workday	2
  499 +
  500 +% Appropriate date and time representation (%c)
  501 +d_t_fmt "%a %b %e %H:%M:%S %Y"
  502 +
  503 +% Appropriate date representation (%x)
  504 +d_fmt   "%m/%d/%y"
  505 +
  506 +% Appropriate time representation (%X)
  507 +t_fmt   "%H:%M:%S"
  508 +
  509 +% Appropriate AM/PM time representation (%r)
  510 +t_fmt_ampm "%I:%M:%S %p"
  511 +
  512 +% Equivalent of AM/PM (%p)
  513 +am_pm	"AM";"PM"
  514 +
  515 +% Appropriate date representation (date(1))
  516 +date_fmt	"%a %b %e %H:%M:%S %Z %Y"
  517 +END LC_TIME
  518 +
  519 +LC_MESSAGES
  520 +% This is the POSIX Locale definition for
  521 +% the LC_NUMERIC category.
  522 +%
  523 +yesexpr "^[yY]"
  524 +noexpr  "^[nN]"
  525 +yesstr  "Yes"
  526 +nostr   "No"
  527 +END LC_MESSAGES
  528 +
  529 +LC_PAPER
  530 +% This is the ISO/IEC 14652 "i18n" definition for
  531 +% the LC_PAPER category.
  532 +% (A4 paper, this is also used in the built in C/POSIX
  533 +% locale in glibc/locale/C-paper.c)
  534 +height   297
  535 +width    210
  536 +END LC_PAPER
  537 +
  538 +LC_NAME
  539 +% This is the ISO/IEC 14652 "i18n" definition for
  540 +% the LC_NAME category.
  541 +% (also used in the built in C/POSIX locale in glibc/locale/C-name.c)
  542 +name_fmt    "%p%t%g%t%m%t%f"
  543 +END LC_NAME
  544 +
  545 +LC_ADDRESS
  546 +% This is the ISO/IEC 14652 "i18n" definition for
  547 +% the LC_ADDRESS category.
  548 +% (also used in the built in C/POSIX locale in glibc/locale/C-address.c)
  549 +postal_fmt    "%a%N%f%N%d%N%b%N%s %h %e %r%N%C-%z %T%N%c%N"
  550 +END LC_ADDRESS
  551 +
  552 +LC_TELEPHONE
  553 +% This is the ISO/IEC 14652 "i18n" definition for
  554 +% the LC_TELEPHONE category.
  555 +% "+%c %a %l"
  556 +tel_int_fmt    "+%c %a %l"
  557 +% (also used in the built in C/POSIX locale in glibc/locale/C-telephone.c)
  558 +END LC_TELEPHONE
  559 +
  560 +LC_MEASUREMENT
  561 +% This is the ISO/IEC 14652 "i18n" definition for
  562 +% the LC_MEASUREMENT category.
  563 +% (same as in the built in C/POSIX locale in glibc/locale/C-measurement.c)
  564 +%metric
  565 +measurement    1
  566 +END LC_MEASUREMENT
  567 diff --git a/posix/Makefile b/posix/Makefile
  568 index 059efb3cd2706cbe..a5229777eeb0e067 100644
  569 --- a/posix/Makefile
  570 +++ b/posix/Makefile
  571 @@ -190,9 +190,19 @@ $(objpfx)wordexp-tst.out: wordexp-tst.sh $(objpfx)wordexp-test
  572  	$(evaluate-test)
  573  endif
  574  
  575 -LOCALES := cs_CZ.UTF-8 da_DK.ISO-8859-1 de_DE.ISO-8859-1 de_DE.UTF-8 \
  576 -	   en_US.UTF-8 es_US.ISO-8859-1 es_US.UTF-8 ja_JP.EUC-JP tr_TR.UTF-8 \
  577 -	   cs_CZ.ISO-8859-2
  578 +LOCALES := \
  579 +	cs_CZ.ISO-8859-2 \
  580 +	cs_CZ.UTF-8 \
  581 +	C.UTF-8 \
  582 +	da_DK.ISO-8859-1 \
  583 +	de_DE.ISO-8859-1 \
  584 +	de_DE.UTF-8 \
  585 +	en_US.UTF-8 \
  586 +	es_US.ISO-8859-1 \
  587 +	es_US.UTF-8 \
  588 +	ja_JP.EUC-JP \
  589 +	tr_TR.UTF-8 \
  590 +	# LOCALES
  591  include ../gen-locales.mk
  592  
  593  $(objpfx)bug-regex1.out: $(gen-locales)
  594 diff --git a/posix/bug-regex1.c b/posix/bug-regex1.c
  595 index 38eb543951862492..7e9f4ec430a95631 100644
  596 --- a/posix/bug-regex1.c
  597 +++ b/posix/bug-regex1.c
  598 @@ -41,6 +41,26 @@ main (void)
  599  	puts (" -> OK");
  600      }
  601  
  602 +  puts ("in C.UTF-8 locale");
  603 +  setlocale (LC_ALL, "C.UTF-8");
  604 +  s = re_compile_pattern ("[an\371]*n", 7, &regex);
  605 +  if (s != NULL)
  606 +    {
  607 +      puts ("re_compile_pattern return non-NULL value");
  608 +      result = 1;
  609 +    }
  610 +  else
  611 +    {
  612 +      match = re_match (&regex, "an", 2, 0, &regs);
  613 +      if (match != 2)
  614 +	{
  615 +	  printf ("re_match returned %d, expected 2\n", match);
  616 +	  result = 1;
  617 +	}
  618 +      else
  619 +	puts (" -> OK");
  620 +    }
  621 +
  622    puts ("in de_DE.ISO-8859-1 locale");
  623    setlocale (LC_ALL, "de_DE.ISO-8859-1");
  624    s = re_compile_pattern ("[an]*n", 7, &regex);
  625 diff --git a/posix/bug-regex19.c b/posix/bug-regex19.c
  626 index b3fee0a7302c3263..e00ff60a14f994bf 100644
  627 --- a/posix/bug-regex19.c
  628 +++ b/posix/bug-regex19.c
  629 @@ -25,6 +25,7 @@
  630  #include <string.h>
  631  #include <locale.h>
  632  #include <libc-diag.h>
  633 +#include <support/support.h>
  634  
  635  #define BRE RE_SYNTAX_POSIX_BASIC
  636  #define ERE RE_SYNTAX_POSIX_EXTENDED
  637 @@ -407,8 +408,8 @@ do_mb_tests (const struct test_s *test)
  638    return 0;
  639  }
  640  
  641 -int
  642 -main (void)
  643 +static int
  644 +do_test (void)
  645  {
  646    size_t i;
  647    int ret = 0;
  648 @@ -417,20 +418,17 @@ main (void)
  649  
  650    for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i)
  651      {
  652 -      if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL)
  653 -	{
  654 -	  puts ("setlocale de_DE.ISO-8859-1 failed");
  655 -	  ret = 1;
  656 -	}
  657 +      xsetlocale (LC_ALL, "de_DE.ISO-8859-1");
  658        ret |= do_one_test (&tests[i], "");
  659 -      if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
  660 -	{
  661 -	  puts ("setlocale de_DE.UTF-8 failed");
  662 -	  ret = 1;
  663 -	}
  664 +      xsetlocale (LC_ALL, "de_DE.UTF-8");
  665 +      ret |= do_one_test (&tests[i], "UTF-8 ");
  666 +      ret |= do_mb_tests (&tests[i]);
  667 +      xsetlocale (LC_ALL, "C.UTF-8");
  668        ret |= do_one_test (&tests[i], "UTF-8 ");
  669        ret |= do_mb_tests (&tests[i]);
  670      }
  671  
  672    return ret;
  673  }
  674 +
  675 +#include <support/test-driver.c>
  676 diff --git a/posix/bug-regex4.c b/posix/bug-regex4.c
  677 index 8d5ae11567889301..6475833c525176b2 100644
  678 --- a/posix/bug-regex4.c
  679 +++ b/posix/bug-regex4.c
  680 @@ -32,8 +32,33 @@ main (void)
  681  
  682    memset (&regex, '\0', sizeof (regex));
  683  
  684 +  printf ("INFO: Checking C.\n");
  685    setlocale (LC_ALL, "C");
  686  
  687 +  s = re_compile_pattern ("ab[cde]", 7, &regex);
  688 +  if (s != NULL)
  689 +    {
  690 +      puts ("re_compile_pattern returned non-NULL value");
  691 +      result = 1;
  692 +    }
  693 +  else
  694 +    {
  695 +      match[0] = re_search_2 (&regex, "xyabez", 6, "", 0, 1, 5, NULL, 6);
  696 +      match[1] = re_search_2 (&regex, NULL, 0, "abc", 3, 0, 3, NULL, 3);
  697 +      match[2] = re_search_2 (&regex, "xya", 3, "bd", 2, 2, 3, NULL, 5);
  698 +      if (match[0] != 2 || match[1] != 0 || match[2] != 2)
  699 +	{
  700 +	  printf ("re_search_2 returned %d,%d,%d, expected 2,0,2\n",
  701 +		  match[0], match[1], match[2]);
  702 +	  result = 1;
  703 +	}
  704 +      else
  705 +	puts (" -> OK");
  706 +    }
  707 +
  708 +  printf ("INFO: Checking C.UTF-8.\n");
  709 +  setlocale (LC_ALL, "C.UTF-8");
  710 +
  711    s = re_compile_pattern ("ab[cde]", 7, &regex);
  712    if (s != NULL)
  713      {
  714 diff --git a/posix/bug-regex6.c b/posix/bug-regex6.c
  715 index 2bdf2126a49ee99b..0929b69b83c91e5e 100644
  716 --- a/posix/bug-regex6.c
  717 +++ b/posix/bug-regex6.c
  718 @@ -30,7 +30,7 @@ main (int argc, char *argv[])
  719    regex_t re;
  720    regmatch_t mat[10];
  721    int i, j, ret = 0;
  722 -  const char *locales[] = { "C", "de_DE.UTF-8" };
  723 +  const char *locales[] = { "C", "C.UTF-8", "de_DE.UTF-8" };
  724    const char *string = "http://www.regex.com/pattern/matching.html#intro";
  725    regmatch_t expect[10] = {
  726      { 0, 48 }, { 0, 5 }, { 0, 4 }, { 5, 20 }, { 7, 20 }, { 20, 42 },
  727 diff --git a/posix/transbug.c b/posix/transbug.c
  728 index d0983b4d44d04fd2..b240177cf72326ff 100644
  729 --- a/posix/transbug.c
  730 +++ b/posix/transbug.c
  731 @@ -116,16 +116,32 @@ do_test (void)
  732    static const char lower[] = "[[:lower:]]+";
  733    static const char upper[] = "[[:upper:]]+";
  734    struct re_registers regs[4];
  735 +  int result = 0;
  736  
  737 +#define CHECK(exp) \
  738 +  if (exp) { puts (#exp); result = 1; }
  739 +
  740 +  printf ("INFO: Checking C.\n");
  741    setlocale (LC_ALL, "C");
  742  
  743    (void) re_set_syntax (RE_SYNTAX_GNU_AWK);
  744  
  745 -  int result;
  746 -#define CHECK(exp) \
  747 -  if (exp) { puts (#exp); result = 1; }
  748 +  result |= run_test (lower, regs);
  749 +  result |= run_test (upper, &regs[2]);
  750 +  if (! result)
  751 +    {
  752 +      CHECK (regs[0].start[0] != regs[2].start[0]);
  753 +      CHECK (regs[0].end[0] != regs[2].end[0]);
  754 +      CHECK (regs[1].start[0] != regs[3].start[0]);
  755 +      CHECK (regs[1].end[0] != regs[3].end[0]);
  756 +    }
  757 +
  758 +  printf ("INFO: Checking C.UTF-8.\n");
  759 +  setlocale (LC_ALL, "C.UTF-8");
  760 +
  761 +  (void) re_set_syntax (RE_SYNTAX_GNU_AWK);
  762  
  763 -  result = run_test (lower, regs);
  764 +  result |= run_test (lower, regs);
  765    result |= run_test (upper, &regs[2]);
  766    if (! result)
  767      {
  768 diff --git a/posix/tst-fnmatch.input b/posix/tst-fnmatch.input
  769 index 67aac5aadafd8aeb..6ff5318032e0afb2 100644
  770 --- a/posix/tst-fnmatch.input
  771 +++ b/posix/tst-fnmatch.input
  772 @@ -472,6 +472,397 @@ C		"\\"			"[Z-\\]]"	       0
  773  C		"]"			"[Z-\\]]"	       0
  774  C		"-"			"[Z-\\]]"	       NOMATCH
  775  
  776 +# B.6 004(C)
  777 +C.UTF-8		 "!#%+,-./01234567889"	"!#%+,-./01234567889"  0
  778 +C.UTF-8		 ":;=@ABCDEFGHIJKLMNO"	":;=@ABCDEFGHIJKLMNO"  0
  779 +C.UTF-8		 "PQRSTUVWXYZ]abcdefg"	"PQRSTUVWXYZ]abcdefg"  0
  780 +C.UTF-8		 "hijklmnopqrstuvwxyz"	"hijklmnopqrstuvwxyz"  0
  781 +C.UTF-8		 "^_{}~"		"^_{}~"		       0
  782 +
  783 +# B.6 005(C)
  784 +C.UTF-8		 "\"$&'()"		"\\\"\\$\\&\\'\\(\\)"  0
  785 +C.UTF-8		 "*?[\\`|"		"\\*\\?\\[\\\\\\`\\|"  0
  786 +C.UTF-8		 "<>"			"\\<\\>"	       0
  787 +
  788 +# B.6 006(C)
  789 +C.UTF-8		 "?*["			"[?*[][?*[][?*[]"      0
  790 +C.UTF-8		 "a/b"			"?/b"		       0
  791 +
  792 +# B.6 007(C)
  793 +C.UTF-8		 "a/b"			"a?b"		       0
  794 +C.UTF-8		 "a/b"			"a/?"		       0
  795 +C.UTF-8		 "aa/b"			"?/b"		       NOMATCH
  796 +C.UTF-8		 "aa/b"			"a?b"		       NOMATCH
  797 +C.UTF-8		 "a/bb"			"a/?"		       NOMATCH
  798 +
  799 +# B.6 009(C)
  800 +C.UTF-8		 "abc"			"[abc]"		       NOMATCH
  801 +C.UTF-8		 "x"			"[abc]"		       NOMATCH
  802 +C.UTF-8		 "a"			"[abc]"		       0
  803 +C.UTF-8		 "["			"[[abc]"	       0
  804 +C.UTF-8		 "a"			"[][abc]"	       0
  805 +C.UTF-8		 "a]"			"[]a]]"		       0
  806 +
  807 +# B.6 010(C)
  808 +C.UTF-8		 "xyz"			"[!abc]"	       NOMATCH
  809 +C.UTF-8		 "x"			"[!abc]"	       0
  810 +C.UTF-8		 "a"			"[!abc]"	       NOMATCH
  811 +
  812 +# B.6 011(C)
  813 +C.UTF-8		 "]"			"[][abc]"	       0
  814 +C.UTF-8		 "abc]"			"[][abc]"	       NOMATCH
  815 +C.UTF-8		 "[]abc"		"[][]abc"	       NOMATCH
  816 +C.UTF-8		 "]"			"[!]]"		       NOMATCH
  817 +C.UTF-8		 "aa]"			"[!]a]"		       NOMATCH
  818 +C.UTF-8		 "]"			"[!a]"		       0
  819 +C.UTF-8		 "]]"			"[!a]]"		       0
  820 +
  821 +# B.6 012(C)
  822 +C.UTF-8		 "a"			"[[.a.]]"	       0
  823 +C.UTF-8		 "-"			"[[.-.]]"	       0
  824 +C.UTF-8		 "-"			"[[.-.][.].]]"	       0
  825 +C.UTF-8		 "-"			"[[.].][.-.]]"	       0
  826 +C.UTF-8		 "-"			"[[.-.][=u=]]"	       0
  827 +C.UTF-8		 "-"			"[[.-.][:alpha:]]"     0
  828 +C.UTF-8		 "a"			"[![.a.]]"	       NOMATCH
  829 +
  830 +# B.6 013(C)
  831 +C.UTF-8		 "a"			"[[.b.]]"	       NOMATCH
  832 +C.UTF-8		 "a"			"[[.b.][.c.]]"	       NOMATCH
  833 +C.UTF-8		 "a"			"[[.b.][=b=]]"	       NOMATCH
  834 +
  835 +
  836 +# B.6 015(C)
  837 +C.UTF-8		 "a"			"[[=a=]]"	       0
  838 +C.UTF-8		 "b"			"[[=a=]b]"	       0
  839 +C.UTF-8		 "b"			"[[=a=][=b=]]"	       0
  840 +C.UTF-8		 "a"			"[[=a=][=b=]]"	       0
  841 +C.UTF-8		 "a"			"[[=a=][.b.]]"	       0
  842 +C.UTF-8		 "a"			"[[=a=][:digit:]]"     0
  843 +
  844 +# B.6 016(C)
  845 +C.UTF-8		 "="			"[[=a=]b]"	       NOMATCH
  846 +C.UTF-8		 "]"			"[[=a=]b]"	       NOMATCH
  847 +C.UTF-8		 "a"			"[[=b=][=c=]]"	       NOMATCH
  848 +C.UTF-8		 "a"			"[[=b=][.].]]"	       NOMATCH
  849 +C.UTF-8		 "a"			"[[=b=][:digit:]]"     NOMATCH
  850 +
  851 +# B.6 017(C)
  852 +C.UTF-8		 "a"			"[[:alnum:]]"	       0
  853 +C.UTF-8		 "a"			"[![:alnum:]]"	       NOMATCH
  854 +C.UTF-8		 "-"			"[[:alnum:]]"	       NOMATCH
  855 +C.UTF-8		 "a]a"			"[[:alnum:]]a"	       NOMATCH
  856 +C.UTF-8		 "-"			"[[:alnum:]-]"	       0
  857 +C.UTF-8		 "aa"			"[[:alnum:]]a"	       0
  858 +C.UTF-8		 "-"			"[![:alnum:]]"	       0
  859 +C.UTF-8		 "]"			"[!][:alnum:]]"	       NOMATCH
  860 +C.UTF-8		 "["			"[![:alnum:][]"	       NOMATCH
  861 +C.UTF-8		 "a"			"[[:alnum:]]"	       0
  862 +C.UTF-8		 "b"			"[[:alnum:]]"	       0
  863 +C.UTF-8		 "c"			"[[:alnum:]]"	       0
  864 +C.UTF-8		 "d"			"[[:alnum:]]"	       0
  865 +C.UTF-8		 "e"			"[[:alnum:]]"	       0
  866 +C.UTF-8		 "f"			"[[:alnum:]]"	       0
  867 +C.UTF-8		 "g"			"[[:alnum:]]"	       0
  868 +C.UTF-8		 "h"			"[[:alnum:]]"	       0
  869 +C.UTF-8		 "i"			"[[:alnum:]]"	       0
  870 +C.UTF-8		 "j"			"[[:alnum:]]"	       0
  871 +C.UTF-8		 "k"			"[[:alnum:]]"	       0
  872 +C.UTF-8		 "l"			"[[:alnum:]]"	       0
  873 +C.UTF-8		 "m"			"[[:alnum:]]"	       0
  874 +C.UTF-8		 "n"			"[[:alnum:]]"	       0
  875 +C.UTF-8		 "o"			"[[:alnum:]]"	       0
  876 +C.UTF-8		 "p"			"[[:alnum:]]"	       0
  877 +C.UTF-8		 "q"			"[[:alnum:]]"	       0
  878 +C.UTF-8		 "r"			"[[:alnum:]]"	       0
  879 +C.UTF-8		 "s"			"[[:alnum:]]"	       0
  880 +C.UTF-8		 "t"			"[[:alnum:]]"	       0
  881 +C.UTF-8		 "u"			"[[:alnum:]]"	       0
  882 +C.UTF-8		 "v"			"[[:alnum:]]"	       0
  883 +C.UTF-8		 "w"			"[[:alnum:]]"	       0
  884 +C.UTF-8		 "x"			"[[:alnum:]]"	       0
  885 +C.UTF-8		 "y"			"[[:alnum:]]"	       0
  886 +C.UTF-8		 "z"			"[[:alnum:]]"	       0
  887 +C.UTF-8		 "A"			"[[:alnum:]]"	       0
  888 +C.UTF-8		 "B"			"[[:alnum:]]"	       0
  889 +C.UTF-8		 "C"			"[[:alnum:]]"	       0
  890 +C.UTF-8		 "D"			"[[:alnum:]]"	       0
  891 +C.UTF-8		 "E"			"[[:alnum:]]"	       0
  892 +C.UTF-8		 "F"			"[[:alnum:]]"	       0
  893 +C.UTF-8		 "G"			"[[:alnum:]]"	       0
  894 +C.UTF-8		 "H"			"[[:alnum:]]"	       0
  895 +C.UTF-8		 "I"			"[[:alnum:]]"	       0
  896 +C.UTF-8		 "J"			"[[:alnum:]]"	       0
  897 +C.UTF-8		 "K"			"[[:alnum:]]"	       0
  898 +C.UTF-8		 "L"			"[[:alnum:]]"	       0
  899 +C.UTF-8		 "M"			"[[:alnum:]]"	       0
  900 +C.UTF-8		 "N"			"[[:alnum:]]"	       0
  901 +C.UTF-8		 "O"			"[[:alnum:]]"	       0
  902 +C.UTF-8		 "P"			"[[:alnum:]]"	       0
  903 +C.UTF-8		 "Q"			"[[:alnum:]]"	       0
  904 +C.UTF-8		 "R"			"[[:alnum:]]"	       0
  905 +C.UTF-8		 "S"			"[[:alnum:]]"	       0
  906 +C.UTF-8		 "T"			"[[:alnum:]]"	       0
  907 +C.UTF-8		 "U"			"[[:alnum:]]"	       0
  908 +C.UTF-8		 "V"			"[[:alnum:]]"	       0
  909 +C.UTF-8		 "W"			"[[:alnum:]]"	       0
  910 +C.UTF-8		 "X"			"[[:alnum:]]"	       0
  911 +C.UTF-8		 "Y"			"[[:alnum:]]"	       0
  912 +C.UTF-8		 "Z"			"[[:alnum:]]"	       0
  913 +C.UTF-8		 "0"			"[[:alnum:]]"	       0
  914 +C.UTF-8		 "1"			"[[:alnum:]]"	       0
  915 +C.UTF-8		 "2"			"[[:alnum:]]"	       0
  916 +C.UTF-8		 "3"			"[[:alnum:]]"	       0
  917 +C.UTF-8		 "4"			"[[:alnum:]]"	       0
  918 +C.UTF-8		 "5"			"[[:alnum:]]"	       0
  919 +C.UTF-8		 "6"			"[[:alnum:]]"	       0
  920 +C.UTF-8		 "7"			"[[:alnum:]]"	       0
  921 +C.UTF-8		 "8"			"[[:alnum:]]"	       0
  922 +C.UTF-8		 "9"			"[[:alnum:]]"	       0
  923 +C.UTF-8		 "!"			"[[:alnum:]]"	       NOMATCH
  924 +C.UTF-8		 "#"			"[[:alnum:]]"	       NOMATCH
  925 +C.UTF-8		 "%"			"[[:alnum:]]"	       NOMATCH
  926 +C.UTF-8		 "+"			"[[:alnum:]]"	       NOMATCH
  927 +C.UTF-8		 ","			"[[:alnum:]]"	       NOMATCH
  928 +C.UTF-8		 "-"			"[[:alnum:]]"	       NOMATCH
  929 +C.UTF-8		 "."			"[[:alnum:]]"	       NOMATCH
  930 +C.UTF-8		 "/"			"[[:alnum:]]"	       NOMATCH
  931 +C.UTF-8		 ":"			"[[:alnum:]]"	       NOMATCH
  932 +C.UTF-8		 ";"			"[[:alnum:]]"	       NOMATCH
  933 +C.UTF-8		 "="			"[[:alnum:]]"	       NOMATCH
  934 +C.UTF-8		 "@"			"[[:alnum:]]"	       NOMATCH
  935 +C.UTF-8		 "["			"[[:alnum:]]"	       NOMATCH
  936 +C.UTF-8		 "\\"			"[[:alnum:]]"	       NOMATCH
  937 +C.UTF-8		 "]"			"[[:alnum:]]"	       NOMATCH
  938 +C.UTF-8		 "^"			"[[:alnum:]]"	       NOMATCH
  939 +C.UTF-8		 "_"			"[[:alnum:]]"	       NOMATCH
  940 +C.UTF-8		 "{"			"[[:alnum:]]"	       NOMATCH
  941 +C.UTF-8		 "}"			"[[:alnum:]]"	       NOMATCH
  942 +C.UTF-8		 "~"			"[[:alnum:]]"	       NOMATCH
  943 +C.UTF-8		 "\""			"[[:alnum:]]"	       NOMATCH
  944 +C.UTF-8		 "$"			"[[:alnum:]]"	       NOMATCH
  945 +C.UTF-8		 "&"			"[[:alnum:]]"	       NOMATCH
  946 +C.UTF-8		 "'"			"[[:alnum:]]"	       NOMATCH
  947 +C.UTF-8		 "("			"[[:alnum:]]"	       NOMATCH
  948 +C.UTF-8		 ")"			"[[:alnum:]]"	       NOMATCH
  949 +C.UTF-8		 "*"			"[[:alnum:]]"	       NOMATCH
  950 +C.UTF-8		 "?"			"[[:alnum:]]"	       NOMATCH
  951 +C.UTF-8		 "`"			"[[:alnum:]]"	       NOMATCH
  952 +C.UTF-8		 "|"			"[[:alnum:]]"	       NOMATCH
  953 +C.UTF-8		 "<"			"[[:alnum:]]"	       NOMATCH
  954 +C.UTF-8		 ">"			"[[:alnum:]]"	       NOMATCH
  955 +C.UTF-8		 "\t"			"[[:cntrl:]]"	       0
  956 +C.UTF-8		 "t"			"[[:cntrl:]]"	       NOMATCH
  957 +C.UTF-8		 "t"			"[[:lower:]]"	       0
  958 +C.UTF-8		 "\t"			"[[:lower:]]"	       NOMATCH
  959 +C.UTF-8		 "T"			"[[:lower:]]"	       NOMATCH
  960 +C.UTF-8		 "\t"			"[[:space:]]"	       0
  961 +C.UTF-8		 "t"			"[[:space:]]"	       NOMATCH
  962 +C.UTF-8		 "t"			"[[:alpha:]]"	       0
  963 +C.UTF-8		 "\t"			"[[:alpha:]]"	       NOMATCH
  964 +C.UTF-8		 "0"			"[[:digit:]]"	       0
  965 +C.UTF-8		 "\t"			"[[:digit:]]"	       NOMATCH
  966 +C.UTF-8		 "t"			"[[:digit:]]"	       NOMATCH
  967 +C.UTF-8		 "\t"			"[[:print:]]"	       NOMATCH
  968 +C.UTF-8		 "t"			"[[:print:]]"	       0
  969 +C.UTF-8		 "T"			"[[:upper:]]"	       0
  970 +C.UTF-8		 "\t"			"[[:upper:]]"	       NOMATCH
  971 +C.UTF-8		 "t"			"[[:upper:]]"	       NOMATCH
  972 +C.UTF-8		 "\t"			"[[:blank:]]"	       0
  973 +C.UTF-8		 "t"			"[[:blank:]]"	       NOMATCH
  974 +C.UTF-8		 "\t"			"[[:graph:]]"	       NOMATCH
  975 +C.UTF-8		 "t"			"[[:graph:]]"	       0
  976 +C.UTF-8		 "."			"[[:punct:]]"	       0
  977 +C.UTF-8		 "t"			"[[:punct:]]"	       NOMATCH
  978 +C.UTF-8		 "\t"			"[[:punct:]]"	       NOMATCH
  979 +C.UTF-8		 "0"			"[[:xdigit:]]"	       0
  980 +C.UTF-8		 "\t"			"[[:xdigit:]]"	       NOMATCH
  981 +C.UTF-8		 "a"			"[[:xdigit:]]"	       0
  982 +C.UTF-8		 "A"			"[[:xdigit:]]"	       0
  983 +C.UTF-8		 "t"			"[[:xdigit:]]"	       NOMATCH
  984 +C.UTF-8		 "a"			"[[alpha]]"	       NOMATCH
  985 +C.UTF-8		 "a"			"[[alpha:]]"	       NOMATCH
  986 +C.UTF-8		 "a]"			"[[alpha]]"	       0
  987 +C.UTF-8		 "a]"			"[[alpha:]]"	       0
  988 +C.UTF-8		 "a"			"[[:alpha:][.b.]]"     0
  989 +C.UTF-8		 "a"			"[[:alpha:][=b=]]"     0
  990 +C.UTF-8		 "a"			"[[:alpha:][:digit:]]" 0
  991 +C.UTF-8		 "a"			"[[:digit:][:alpha:]]" 0
  992 +
  993 +# B.6 018(C)
  994 +C.UTF-8		 "a"			"[a-c]"		       0
  995 +C.UTF-8		 "b"			"[a-c]"		       0
  996 +C.UTF-8		 "c"			"[a-c]"		       0
  997 +C.UTF-8		 "a"			"[b-c]"		       NOMATCH
  998 +C.UTF-8		 "d"			"[b-c]"		       NOMATCH
  999 +C.UTF-8		 "B"			"[a-c]"		       NOMATCH
 1000 +C.UTF-8		 "b"			"[A-C]"		       NOMATCH
 1001 +C.UTF-8		 ""			"[a-c]"		       NOMATCH
 1002 +C.UTF-8		 "as"			"[a-ca-z]"	       NOMATCH
 1003 +C.UTF-8		 "a"			"[[.a.]-c]"	       0
 1004 +C.UTF-8		 "a"			"[a-[.c.]]"	       0
 1005 +C.UTF-8		 "a"			"[[.a.]-[.c.]]"	       0
 1006 +C.UTF-8		 "b"			"[[.a.]-c]"	       0
 1007 +C.UTF-8		 "b"			"[a-[.c.]]"	       0
 1008 +C.UTF-8		 "b"			"[[.a.]-[.c.]]"	       0
 1009 +C.UTF-8		 "c"			"[[.a.]-c]"	       0
 1010 +C.UTF-8		 "c"			"[a-[.c.]]"	       0
 1011 +C.UTF-8		 "c"			"[[.a.]-[.c.]]"	       0
 1012 +C.UTF-8		 "d"			"[[.a.]-c]"	       NOMATCH
 1013 +C.UTF-8		 "d"			"[a-[.c.]]"	       NOMATCH
 1014 +C.UTF-8		 "d"			"[[.a.]-[.c.]]"	       NOMATCH
 1015 +
 1016 +# B.6 019(C)
 1017 +C.UTF-8		 "a"			"[c-a]"		       NOMATCH
 1018 +C.UTF-8		 "a"			"[[.c.]-a]"	       NOMATCH
 1019 +C.UTF-8		 "a"			"[c-[.a.]]"	       NOMATCH
 1020 +C.UTF-8		 "a"			"[[.c.]-[.a.]]"	       NOMATCH
 1021 +C.UTF-8		 "c"			"[c-a]"		       NOMATCH
 1022 +C.UTF-8		 "c"			"[[.c.]-a]"	       NOMATCH
 1023 +C.UTF-8		 "c"			"[c-[.a.]]"	       NOMATCH
 1024 +C.UTF-8		 "c"			"[[.c.]-[.a.]]"	       NOMATCH
 1025 +
 1026 +# B.6 020(C)
 1027 +C.UTF-8		 "a"			"[a-c0-9]"	       0
 1028 +C.UTF-8		 "d"			"[a-c0-9]"	       NOMATCH
 1029 +C.UTF-8		 "B"			"[a-c0-9]"	       NOMATCH
 1030 +
 1031 +# B.6 021(C)
 1032 +C.UTF-8		 "-"			"[-a]"		       0
 1033 +C.UTF-8		 "a"			"[-b]"		       NOMATCH
 1034 +C.UTF-8		 "-"			"[!-a]"		       NOMATCH
 1035 +C.UTF-8		 "a"			"[!-b]"		       0
 1036 +C.UTF-8		 "-"			"[a-c-0-9]"	       0
 1037 +C.UTF-8		 "b"			"[a-c-0-9]"	       0
 1038 +C.UTF-8		 "a:"			"a[0-9-a]"	       NOMATCH
 1039 +C.UTF-8		 "a:"			"a[09-a]"	       0
 1040 +
 1041 +# B.6 024(C)
 1042 +C.UTF-8		 ""			"*"		       0
 1043 +C.UTF-8		 "asd/sdf"		"*"		       0
 1044 +
 1045 +# B.6 025(C)
 1046 +C.UTF-8		 "as"			"[a-c][a-z]"	       0
 1047 +C.UTF-8		 "as"			"??"		       0
 1048 +
 1049 +# B.6 026(C)
 1050 +C.UTF-8		 "asd/sdf"		"as*df"		       0
 1051 +C.UTF-8		 "asd/sdf"		"as*"		       0
 1052 +C.UTF-8		 "asd/sdf"		"*df"		       0
 1053 +C.UTF-8		 "asd/sdf"		"as*dg"		       NOMATCH
 1054 +C.UTF-8		 "asdf"			"as*df"		       0
 1055 +C.UTF-8		 "asdf"			"as*df?"	       NOMATCH
 1056 +C.UTF-8		 "asdf"			"as*??"		       0
 1057 +C.UTF-8		 "asdf"			"a*???"		       0
 1058 +C.UTF-8		 "asdf"			"*????"		       0
 1059 +C.UTF-8		 "asdf"			"????*"		       0
 1060 +C.UTF-8		 "asdf"			"??*?"		       0
 1061 +
 1062 +# B.6 027(C)
 1063 +C.UTF-8		 "/"			"/"		       0
 1064 +C.UTF-8		 "/"			"/*"		       0
 1065 +C.UTF-8		 "/"			"*/"		       0
 1066 +C.UTF-8		 "/"			"/?"		       NOMATCH
 1067 +C.UTF-8		 "/"			"?/"		       NOMATCH
 1068 +C.UTF-8		 "/"			"?"		       0
 1069 +C.UTF-8		 "."			"?"		       0
 1070 +C.UTF-8		 "/."			"??"		       0
 1071 +C.UTF-8		 "/"			"[!a-c]"	       0
 1072 +C.UTF-8		 "."			"[!a-c]"	       0
 1073 +
 1074 +# B.6 029(C)
 1075 +C.UTF-8		 "/"			"/"		       0       PATHNAME
 1076 +C.UTF-8		 "//"			"//"		       0       PATHNAME
 1077 +C.UTF-8		 "/.a"			"/*"		       0       PATHNAME
 1078 +C.UTF-8		 "/.a"			"/?a"		       0       PATHNAME
 1079 +C.UTF-8		 "/.a"			"/[!a-z]a"	       0       PATHNAME
 1080 +C.UTF-8		 "/.a/.b"		"/*/?b"		       0       PATHNAME
 1081 +
 1082 +# B.6 030(C)
 1083 +C.UTF-8		 "/"			"?"		       NOMATCH PATHNAME
 1084 +C.UTF-8		 "/"			"*"		       NOMATCH PATHNAME
 1085 +C.UTF-8		 "a/b"			"a?b"		       NOMATCH PATHNAME
 1086 +C.UTF-8		 "/.a/.b"		"/*b"		       NOMATCH PATHNAME
 1087 +
 1088 +# B.6 031(C)
 1089 +C.UTF-8		 "/$"			"\\/\\$"	       0
 1090 +C.UTF-8		 "/["			"\\/\\["	       0
 1091 +C.UTF-8		 "/["			"\\/["		       0
 1092 +C.UTF-8		 "/[]"			"\\/\\[]"	       0
 1093 +
 1094 +# B.6 032(C)
 1095 +C.UTF-8		 "/$"			"\\/\\$"	       NOMATCH NOESCAPE
 1096 +C.UTF-8		 "/\\$"			"\\/\\$"	       NOMATCH NOESCAPE
 1097 +C.UTF-8		 "\\/\\$"		"\\/\\$"	       0       NOESCAPE
 1098 +
 1099 +# B.6 033(C)
 1100 +C.UTF-8		 ".asd"			".*"		       0       PERIOD
 1101 +C.UTF-8		 "/.asd"		"*"		       0       PERIOD
 1102 +C.UTF-8		 "/as/.df"		"*/?*f"		       0       PERIOD
 1103 +C.UTF-8		 "..asd"		".[!a-z]*"	       0       PERIOD
 1104 +
 1105 +# B.6 034(C)
 1106 +C.UTF-8		 ".asd"			"*"		       NOMATCH PERIOD
 1107 +C.UTF-8		 ".asd"			"?asd"		       NOMATCH PERIOD
 1108 +C.UTF-8		 ".asd"			"[!a-z]*"	       NOMATCH PERIOD
 1109 +
 1110 +# B.6 035(C)
 1111 +C.UTF-8		 "/."			"/."		       0       PATHNAME|PERIOD
 1112 +C.UTF-8		 "/.a./.b."		"/.*/.*"	       0       PATHNAME|PERIOD
 1113 +C.UTF-8		 "/.a./.b."		"/.??/.??"	       0       PATHNAME|PERIOD
 1114 +
 1115 +# B.6 036(C)
 1116 +C.UTF-8		 "/."			"*"		       NOMATCH PATHNAME|PERIOD
 1117 +C.UTF-8		 "/."			"/*"		       NOMATCH PATHNAME|PERIOD
 1118 +C.UTF-8		 "/."			"/?"		       NOMATCH PATHNAME|PERIOD
 1119 +C.UTF-8		 "/."			"/[!a-z]"	       NOMATCH PATHNAME|PERIOD
 1120 +C.UTF-8		 "/a./.b."		"/*/*"		       NOMATCH PATHNAME|PERIOD
 1121 +C.UTF-8		 "/a./.b."		"/??/???"	       NOMATCH PATHNAME|PERIOD
 1122 +
 1123 +# Some home-grown tests.
 1124 +C.UTF-8		"foobar"		"foo*[abc]z"	       NOMATCH
 1125 +C.UTF-8		"foobaz"		"foo*[abc][xyz]"       0
 1126 +C.UTF-8		"foobaz"		"foo?*[abc][xyz]"      0
 1127 +C.UTF-8		"foobaz"		"foo?*[abc][x/yz]"     0
 1128 +C.UTF-8		"foobaz"		"foo?*[abc]/[xyz]"     NOMATCH PATHNAME
 1129 +C.UTF-8		"a"			"a/"                   NOMATCH PATHNAME
 1130 +C.UTF-8		"a/"			"a"		       NOMATCH PATHNAME
 1131 +C.UTF-8		"//a"			"/a"		       NOMATCH PATHNAME
 1132 +C.UTF-8		"/a"			"//a"		       NOMATCH PATHNAME
 1133 +C.UTF-8		"az"			"[a-]z"		       0
 1134 +C.UTF-8		"bz"			"[ab-]z"	       0
 1135 +C.UTF-8		"cz"			"[ab-]z"	       NOMATCH
 1136 +C.UTF-8		"-z"			"[ab-]z"	       0
 1137 +C.UTF-8		"az"			"[-a]z"		       0
 1138 +C.UTF-8		"bz"			"[-ab]z"	       0
 1139 +C.UTF-8		"cz"			"[-ab]z"	       NOMATCH
 1140 +C.UTF-8		"-z"			"[-ab]z"	       0
 1141 +C.UTF-8		"\\"			"[\\\\-a]"	       0
 1142 +C.UTF-8		"_"			"[\\\\-a]"	       0
 1143 +C.UTF-8		"a"			"[\\\\-a]"	       0
 1144 +C.UTF-8		"-"			"[\\\\-a]"	       NOMATCH
 1145 +C.UTF-8		"\\"			"[\\]-a]"	       NOMATCH
 1146 +C.UTF-8		"_"			"[\\]-a]"	       0
 1147 +C.UTF-8		"a"			"[\\]-a]"	       0
 1148 +C.UTF-8		"]"			"[\\]-a]"	       0
 1149 +C.UTF-8		"-"			"[\\]-a]"	       NOMATCH
 1150 +C.UTF-8		"\\"			"[!\\\\-a]"	       NOMATCH
 1151 +C.UTF-8		"_"			"[!\\\\-a]"	       NOMATCH
 1152 +C.UTF-8		"a"			"[!\\\\-a]"	       NOMATCH
 1153 +C.UTF-8		"-"			"[!\\\\-a]"	       0
 1154 +C.UTF-8		"!"			"[\\!-]"	       0
 1155 +C.UTF-8		"-"			"[\\!-]"	       0
 1156 +C.UTF-8		"\\"			"[\\!-]"	       NOMATCH
 1157 +C.UTF-8		"Z"			"[Z-\\\\]"	       0
 1158 +C.UTF-8		"["			"[Z-\\\\]"	       0
 1159 +C.UTF-8		"\\"			"[Z-\\\\]"	       0
 1160 +C.UTF-8		"-"			"[Z-\\\\]"	       NOMATCH
 1161 +C.UTF-8		"Z"			"[Z-\\]]"	       0
 1162 +C.UTF-8		"["			"[Z-\\]]"	       0
 1163 +C.UTF-8		"\\"			"[Z-\\]]"	       0
 1164 +C.UTF-8		"]"			"[Z-\\]]"	       0
 1165 +C.UTF-8		"-"			"[Z-\\]]"	       NOMATCH
 1166 +
 1167  # Following are tests outside the scope of IEEE 2003.2 since they are using
 1168  # locales other than the C locale.  The main focus of the tests is on the
 1169  # handling of ranges and the recognition of character (vs bytes).
 1170 @@ -677,7 +1068,6 @@ C		 "x/y"			"*"		       0       PATHNAME|LEADING_DIR
 1171  C		 "x/y/z"		"*"		       0       PATHNAME|LEADING_DIR
 1172  C		 "x"			"*x"		       0       PATHNAME|LEADING_DIR
 1173  
 1174 -en_US.UTF-8	 "\366.csv"		"*.csv"                0
 1175  C		 "x/y"			"*x"		       0       PATHNAME|LEADING_DIR
 1176  C		 "x/y/z"		"*x"		       0       PATHNAME|LEADING_DIR
 1177  C		 "x"			"x*"		       0       PATHNAME|LEADING_DIR
 1178 @@ -693,6 +1083,33 @@ C		 "x"			"x?y"		       NOMATCH PATHNAME|LEADING_DIR
 1179  C		 "x/y"			"x?y"		       NOMATCH PATHNAME|LEADING_DIR
 1180  C		 "x/y/z"		"x?y"		       NOMATCH PATHNAME|LEADING_DIR
 1181  
 1182 +# Duplicate the "Test of GNU extensions." tests but for C.UTF-8.
 1183 +C.UTF-8		 "x"			"x"		       0       PATHNAME|LEADING_DIR
 1184 +C.UTF-8		 "x/y"			"x"		       0       PATHNAME|LEADING_DIR
 1185 +C.UTF-8		 "x/y/z"		"x"		       0       PATHNAME|LEADING_DIR
 1186 +C.UTF-8		 "x"			"*"		       0       PATHNAME|LEADING_DIR
 1187 +C.UTF-8		 "x/y"			"*"		       0       PATHNAME|LEADING_DIR
 1188 +C.UTF-8		 "x/y/z"		"*"		       0       PATHNAME|LEADING_DIR
 1189 +C.UTF-8		 "x"			"*x"		       0       PATHNAME|LEADING_DIR
 1190 +
 1191 +C.UTF-8		 "x/y"			"*x"		       0       PATHNAME|LEADING_DIR
 1192 +C.UTF-8		 "x/y/z"		"*x"		       0       PATHNAME|LEADING_DIR
 1193 +C.UTF-8		 "x"			"x*"		       0       PATHNAME|LEADING_DIR
 1194 +C.UTF-8		 "x/y"			"x*"		       0       PATHNAME|LEADING_DIR
 1195 +C.UTF-8		 "x/y/z"		"x*"		       0       PATHNAME|LEADING_DIR
 1196 +C.UTF-8		 "x"			"a"		       NOMATCH PATHNAME|LEADING_DIR
 1197 +C.UTF-8		 "x/y"			"a"		       NOMATCH PATHNAME|LEADING_DIR
 1198 +C.UTF-8		 "x/y/z"		"a"		       NOMATCH PATHNAME|LEADING_DIR
 1199 +C.UTF-8		 "x"			"x/y"		       NOMATCH PATHNAME|LEADING_DIR
 1200 +C.UTF-8		 "x/y"			"x/y"		       0       PATHNAME|LEADING_DIR
 1201 +C.UTF-8		 "x/y/z"		"x/y"		       0       PATHNAME|LEADING_DIR
 1202 +C.UTF-8		 "x"			"x?y"		       NOMATCH PATHNAME|LEADING_DIR
 1203 +C.UTF-8		 "x/y"			"x?y"		       NOMATCH PATHNAME|LEADING_DIR
 1204 +C.UTF-8		 "x/y/z"		"x?y"		       NOMATCH PATHNAME|LEADING_DIR
 1205 +
 1206 +# Bug 14185
 1207 +en_US.UTF-8	 "\366.csv"		"*.csv"                0
 1208 +
 1209  # ksh style matching.
 1210  C		"abcd"			"?@(a|b)*@(c)d"	       0       EXTMATCH
 1211  C		"/dev/udp/129.22.8.102/45" "/dev/@(tcp|udp)/*/*" 0     PATHNAME|EXTMATCH
 1212 @@ -822,3 +1239,133 @@ C		""			""		       0
 1213  C		""			""		       0       EXTMATCH
 1214  C		""			"*([abc])"	       0       EXTMATCH
 1215  C		""			"?([abc])"	       0       EXTMATCH
 1216 +
 1217 +# Duplicate the "ksh style matching." for C.UTF-8.
 1218 +C.UTF-8		"abcd"			"?@(a|b)*@(c)d"	       0       EXTMATCH
 1219 +C.UTF-8		"/dev/udp/129.22.8.102/45" "/dev/@(tcp|udp)/*/*" 0     PATHNAME|EXTMATCH
 1220 +C.UTF-8		"12"			"[1-9]*([0-9])"        0       EXTMATCH
 1221 +C.UTF-8		"12abc"			"[1-9]*([0-9])"        NOMATCH EXTMATCH
 1222 +C.UTF-8		"1"			"[1-9]*([0-9])"	       0       EXTMATCH
 1223 +C.UTF-8		"07"			"+([0-7])"	       0       EXTMATCH
 1224 +C.UTF-8		"0377"			"+([0-7])"	       0       EXTMATCH
 1225 +C.UTF-8		"09"			"+([0-7])"	       NOMATCH EXTMATCH
 1226 +C.UTF-8		"paragraph"		"para@(chute|graph)"   0       EXTMATCH
 1227 +C.UTF-8		"paramour"		"para@(chute|graph)"   NOMATCH EXTMATCH
 1228 +C.UTF-8		"para991"		"para?([345]|99)1"     0       EXTMATCH
 1229 +C.UTF-8		"para381"		"para?([345]|99)1"     NOMATCH EXTMATCH
 1230 +C.UTF-8		"paragraph"		"para*([0-9])"	       NOMATCH EXTMATCH
 1231 +C.UTF-8		"para"			"para*([0-9])"	       0       EXTMATCH
 1232 +C.UTF-8		"para13829383746592"	"para*([0-9])"	       0       EXTMATCH
 1233 +C.UTF-8		"paragraph"		"para+([0-9])"	       NOMATCH EXTMATCH
 1234 +C.UTF-8		"para"			"para+([0-9])"	       NOMATCH EXTMATCH
 1235 +C.UTF-8		"para987346523"		"para+([0-9])"	       0       EXTMATCH
 1236 +C.UTF-8		"paragraph"		"para!(*.[0-9])"       0       EXTMATCH
 1237 +C.UTF-8		"para.38"		"para!(*.[0-9])"       0       EXTMATCH
 1238 +C.UTF-8		"para.graph"		"para!(*.[0-9])"       0       EXTMATCH
 1239 +C.UTF-8		"para39"		"para!(*.[0-9])"       0       EXTMATCH
 1240 +C.UTF-8		""			"*(0|1|3|5|7|9)"       0       EXTMATCH
 1241 +C.UTF-8		"137577991"		"*(0|1|3|5|7|9)"       0       EXTMATCH
 1242 +C.UTF-8		"2468"			"*(0|1|3|5|7|9)"       NOMATCH EXTMATCH
 1243 +C.UTF-8		"1358"			"*(0|1|3|5|7|9)"       NOMATCH EXTMATCH
 1244 +C.UTF-8		"file.c"		"*.c?(c)"	       0       EXTMATCH
 1245 +C.UTF-8		"file.C"		"*.c?(c)"	       NOMATCH EXTMATCH
 1246 +C.UTF-8		"file.cc"		"*.c?(c)"	       0       EXTMATCH
 1247 +C.UTF-8		"file.ccc"		"*.c?(c)"	       NOMATCH EXTMATCH
 1248 +C.UTF-8		"parse.y"		"!(*.c|*.h|Makefile.in|config*|README)" 0 EXTMATCH
 1249 +C.UTF-8		"shell.c"		"!(*.c|*.h|Makefile.in|config*|README)" NOMATCH EXTMATCH
 1250 +C.UTF-8		"Makefile"		"!(*.c|*.h|Makefile.in|config*|README)" 0 EXTMATCH
 1251 +C.UTF-8		"VMS.FILE;1"		"*\;[1-9]*([0-9])"     0       EXTMATCH
 1252 +C.UTF-8		"VMS.FILE;0"		"*\;[1-9]*([0-9])"     NOMATCH EXTMATCH
 1253 +C.UTF-8		"VMS.FILE;"		"*\;[1-9]*([0-9])"     NOMATCH EXTMATCH
 1254 +C.UTF-8		"VMS.FILE;139"		"*\;[1-9]*([0-9])"     0       EXTMATCH
 1255 +C.UTF-8		"VMS.FILE;1N"		"*\;[1-9]*([0-9])"     NOMATCH EXTMATCH
 1256 +C.UTF-8		"abcfefg"		"ab**(e|f)"	       0       EXTMATCH
 1257 +C.UTF-8		"abcfefg"		"ab**(e|f)g"	       0       EXTMATCH
 1258 +C.UTF-8		"ab"			"ab*+(e|f)"	       NOMATCH EXTMATCH
 1259 +C.UTF-8		"abef"			"ab***ef"	       0       EXTMATCH
 1260 +C.UTF-8		"abef"			"ab**"		       0       EXTMATCH
 1261 +C.UTF-8		"fofo"			"*(f*(o))"	       0       EXTMATCH
 1262 +C.UTF-8		"ffo"			"*(f*(o))"	       0       EXTMATCH
 1263 +C.UTF-8		"foooofo"		"*(f*(o))"	       0       EXTMATCH
 1264 +C.UTF-8		"foooofof"		"*(f*(o))"	       0       EXTMATCH
 1265 +C.UTF-8		"fooofoofofooo"		"*(f*(o))"	       0       EXTMATCH
 1266 +C.UTF-8		"foooofof"		"*(f+(o))"	       NOMATCH EXTMATCH
 1267 +C.UTF-8		"xfoooofof"		"*(f*(o))"	       NOMATCH EXTMATCH
 1268 +C.UTF-8		"foooofofx"		"*(f*(o))"	       NOMATCH EXTMATCH
 1269 +C.UTF-8		"ofxoofxo"		"*(*(of*(o)x)o)"       0       EXTMATCH
 1270 +C.UTF-8		"ofooofoofofooo"	"*(f*(o))"	       NOMATCH EXTMATCH
 1271 +C.UTF-8		"foooxfooxfoxfooox"	"*(f*(o)x)"	       0       EXTMATCH
 1272 +C.UTF-8		"foooxfooxofoxfooox"	"*(f*(o)x)"	       NOMATCH EXTMATCH
 1273 +C.UTF-8		"foooxfooxfxfooox"	"*(f*(o)x)"	       0       EXTMATCH
 1274 +C.UTF-8		"ofxoofxo"		"*(*(of*(o)x)o)"       0       EXTMATCH
 1275 +C.UTF-8		"ofoooxoofxo"		"*(*(of*(o)x)o)"       0       EXTMATCH
 1276 +C.UTF-8		"ofoooxoofxoofoooxoofxo" "*(*(of*(o)x)o)"      0       EXTMATCH
 1277 +C.UTF-8		"ofoooxoofxoofoooxoofxoo" "*(*(of*(o)x)o)"     0       EXTMATCH
 1278 +C.UTF-8		"ofoooxoofxoofoooxoofxofo" "*(*(of*(o)x)o)"    NOMATCH EXTMATCH
 1279 +C.UTF-8		"ofoooxoofxoofoooxoofxooofxofxo" "*(*(of*(o)x)o)" 0    EXTMATCH
 1280 +C.UTF-8		"aac"			"*(@(a))a@(c)"	       0       EXTMATCH
 1281 +C.UTF-8		"ac"			"*(@(a))a@(c)"	       0       EXTMATCH
 1282 +C.UTF-8		"c"			"*(@(a))a@(c)"	       NOMATCH EXTMATCH
 1283 +C.UTF-8		"aaac"			"*(@(a))a@(c)"	       0       EXTMATCH
 1284 +C.UTF-8		"baaac"			"*(@(a))a@(c)"	       NOMATCH EXTMATCH
 1285 +C.UTF-8		"abcd"			"?@(a|b)*@(c)d"	       0       EXTMATCH
 1286 +C.UTF-8		"abcd"			"@(ab|a*@(b))*(c)d"    0       EXTMATCH
 1287 +C.UTF-8		"acd"			"@(ab|a*(b))*(c)d"     0       EXTMATCH
 1288 +C.UTF-8		"abbcd"			"@(ab|a*(b))*(c)d"     0       EXTMATCH
 1289 +C.UTF-8		"effgz"			"@(b+(c)d|e*(f)g?|?(h)i@(j|k))" 0 EXTMATCH
 1290 +C.UTF-8		"efgz"			"@(b+(c)d|e*(f)g?|?(h)i@(j|k))" 0 EXTMATCH
 1291 +C.UTF-8		"egz"			"@(b+(c)d|e*(f)g?|?(h)i@(j|k))" 0 EXTMATCH
 1292 +C.UTF-8		"egzefffgzbcdij"	"*(b+(c)d|e*(f)g?|?(h)i@(j|k))" 0 EXTMATCH
 1293 +C.UTF-8		"egz"			"@(b+(c)d|e+(f)g?|?(h)i@(j|k))" NOMATCH EXTMATCH
 1294 +C.UTF-8		"ofoofo"		"*(of+(o))"	       0       EXTMATCH
 1295 +C.UTF-8		"oxfoxoxfox"		"*(oxf+(ox))"	       0       EXTMATCH
 1296 +C.UTF-8		"oxfoxfox"		"*(oxf+(ox))"	       NOMATCH EXTMATCH
 1297 +C.UTF-8		"ofoofo"		"*(of+(o)|f)"	       0       EXTMATCH
 1298 +C.UTF-8		"foofoofo"		"@(foo|f|fo)*(f|of+(o))" 0     EXTMATCH
 1299 +C.UTF-8		"oofooofo"		"*(of|oof+(o))"	       0       EXTMATCH
 1300 +C.UTF-8		"fffooofoooooffoofffooofff" "*(*(f)*(o))"      0       EXTMATCH
 1301 +C.UTF-8		"fofoofoofofoo"		"*(fo|foo)"	       0       EXTMATCH
 1302 +C.UTF-8		"foo"			"!(x)"		       0       EXTMATCH
 1303 +C.UTF-8		"foo"			"!(x)*"		       0       EXTMATCH
 1304 +C.UTF-8		"foo"			"!(foo)"	       NOMATCH EXTMATCH
 1305 +C.UTF-8		"foo"			"!(foo)*"	       0       EXTMATCH
 1306 +C.UTF-8		"foobar"		"!(foo)"	       0       EXTMATCH
 1307 +C.UTF-8		"foobar"		"!(foo)*"	       0       EXTMATCH
 1308 +C.UTF-8		"moo.cow"		"!(*.*).!(*.*)"	       0       EXTMATCH
 1309 +C.UTF-8		"mad.moo.cow"		"!(*.*).!(*.*)"	       NOMATCH EXTMATCH
 1310 +C.UTF-8		"mucca.pazza"		"mu!(*(c))?.pa!(*(z))?" NOMATCH EXTMATCH
 1311 +C.UTF-8		"fff"			"!(f)"		       0       EXTMATCH
 1312 +C.UTF-8		"fff"			"*(!(f))"	       0       EXTMATCH
 1313 +C.UTF-8		"fff"			"+(!(f))"	       0       EXTMATCH
 1314 +C.UTF-8		"ooo"			"!(f)"		       0       EXTMATCH
 1315 +C.UTF-8		"ooo"			"*(!(f))"	       0       EXTMATCH
 1316 +C.UTF-8		"ooo"			"+(!(f))"	       0       EXTMATCH
 1317 +C.UTF-8		"foo"			"!(f)"		       0       EXTMATCH
 1318 +C.UTF-8		"foo"			"*(!(f))"	       0       EXTMATCH
 1319 +C.UTF-8		"foo"			"+(!(f))"	       0       EXTMATCH
 1320 +C.UTF-8		"f"			"!(f)"		       NOMATCH EXTMATCH
 1321 +C.UTF-8		"f"			"*(!(f))"	       NOMATCH EXTMATCH
 1322 +C.UTF-8		"f"			"+(!(f))"	       NOMATCH EXTMATCH
 1323 +C.UTF-8		"foot"			"@(!(z*)|*x)"	       0       EXTMATCH
 1324 +C.UTF-8		"zoot"			"@(!(z*)|*x)"	       NOMATCH EXTMATCH
 1325 +C.UTF-8		"foox"			"@(!(z*)|*x)"	       0       EXTMATCH
 1326 +C.UTF-8		"zoox"			"@(!(z*)|*x)"	       0       EXTMATCH
 1327 +C.UTF-8		"foo"			"*(!(foo))"	       0       EXTMATCH
 1328 +C.UTF-8		"foob"			"!(foo)b*"	       NOMATCH EXTMATCH
 1329 +C.UTF-8		"foobb"			"!(foo)b*"	       0       EXTMATCH
 1330 +C.UTF-8		"["			"*([a[])"	       0       EXTMATCH
 1331 +C.UTF-8		"]"			"*([]a[])"	       0       EXTMATCH
 1332 +C.UTF-8		"a"			"*([]a[])"	       0       EXTMATCH
 1333 +C.UTF-8		"b"			"*([!]a[])"	       0       EXTMATCH
 1334 +C.UTF-8		"["			"*([!]a[]|[[])"	       0       EXTMATCH
 1335 +C.UTF-8		"]"			"*([!]a[]|[]])"	       0       EXTMATCH
 1336 +C.UTF-8		"["			"!([!]a[])"	       0       EXTMATCH
 1337 +C.UTF-8		"]"			"!([!]a[])"	       0       EXTMATCH
 1338 +C.UTF-8		")"			"*([)])"	       0       EXTMATCH
 1339 +C.UTF-8		"*"			"*([*(])"	       0       EXTMATCH
 1340 +C.UTF-8		"abcd"			"*!(|a)cd"	       0       EXTMATCH
 1341 +C.UTF-8		"ab/.a"			"+([abc])/*"	       NOMATCH EXTMATCH|PATHNAME|PERIOD
 1342 +C.UTF-8		""			""		       0
 1343 +C.UTF-8		""			""		       0       EXTMATCH
 1344 +C.UTF-8		""			"*([abc])"	       0       EXTMATCH
 1345 +C.UTF-8		""			"?([abc])"	       0       EXTMATCH
 1346 diff --git a/posix/tst-regcomp-truncated.c b/posix/tst-regcomp-truncated.c
 1347 index 84195fcd2ec153b8..da3f97799e37c607 100644
 1348 --- a/posix/tst-regcomp-truncated.c
 1349 +++ b/posix/tst-regcomp-truncated.c
 1350 @@ -37,6 +37,7 @@
 1351  static const char locales[][17] =
 1352    {
 1353      "C",
 1354 +    "C.UTF-8",
 1355      "en_US.UTF-8",
 1356      "de_DE.ISO-8859-1",
 1357    };
 1358 diff --git a/posix/tst-regex.c b/posix/tst-regex.c
 1359 index e7c2b05e8666a16e..531128de2a9176fa 100644
 1360 --- a/posix/tst-regex.c
 1361 +++ b/posix/tst-regex.c
 1362 @@ -32,6 +32,7 @@
 1363  #include <sys/stat.h>
 1364  #include <sys/types.h>
 1365  #include <regex.h>
 1366 +#include <support/support.h>
 1367  
 1368  
 1369  #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
 1370 @@ -58,7 +59,7 @@ do_test (void)
 1371    const char *file;
 1372    int fd;
 1373    struct stat st;
 1374 -  int result;
 1375 +  int result = 0;
 1376    char *inmem;
 1377    char *outmem;
 1378    size_t inlen;
 1379 @@ -123,7 +124,7 @@ do_test (void)
 1380  
 1381    /* Run the actual tests.  All tests are run in a single-byte and a
 1382       multi-byte locale.  */
 1383 -  result = test_expr ("[äáàâéèêíìîñöóòôüúùû]", 4, 4);
 1384 +  result |= test_expr ("[äáàâéèêíìîñöóòôüúùû]", 4, 4);
 1385    result |= test_expr ("G.ran", 2, 3);
 1386    result |= test_expr ("G.\\{1\\}ran", 2, 3);
 1387    result |= test_expr ("G.*ran", 3, 44);
 1388 @@ -143,19 +144,33 @@ do_test (void)
 1389  static int
 1390  test_expr (const char *expr, int expected, int expectedicase)
 1391  {
 1392 -  int result;
 1393 +  int result = 0;
 1394    char *inmem;
 1395    char *outmem;
 1396    size_t inlen;
 1397    size_t outlen;
 1398    char *uexpr;
 1399  
 1400 -  /* First test: search with an UTF-8 locale.  */
 1401 -  if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
 1402 -    error (EXIT_FAILURE, 0, "cannot set locale de_DE.UTF-8");
 1403 +  /* First test: search with basic C.UTF-8 locale.  */
 1404 +  printf ("INFO: Testing C.UTF-8.\n");
 1405 +  xsetlocale (LC_ALL, "C.UTF-8");
 1406  
 1407    printf ("\nTest \"%s\" with multi-byte locale\n", expr);
 1408 -  result = run_test (expr, mem, memlen, 0, expected);
 1409 +  result |= run_test (expr, mem, memlen, 0, expected);
 1410 +  printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr);
 1411 +  result |= run_test (expr, mem, memlen, 1, expectedicase);
 1412 +  printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr);
 1413 +  result |= run_test_backwards (expr, mem, memlen, 0, expected);
 1414 +  printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n",
 1415 +	  expr);
 1416 +  result |= run_test_backwards (expr, mem, memlen, 1, expectedicase);
 1417 +
 1418 +  /* Second test: search with an UTF-8 locale.  */
 1419 +  printf ("INFO: Testing de_DE.UTF-8.\n");
 1420 +  xsetlocale (LC_ALL, "de_DE.UTF-8");
 1421 +
 1422 +  printf ("\nTest \"%s\" with multi-byte locale\n", expr);
 1423 +  result |= run_test (expr, mem, memlen, 0, expected);
 1424    printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr);
 1425    result |= run_test (expr, mem, memlen, 1, expectedicase);
 1426    printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr);
 1427 @@ -165,8 +180,8 @@ test_expr (const char *expr, int expected, int expectedicase)
 1428    result |= run_test_backwards (expr, mem, memlen, 1, expectedicase);
 1429  
 1430    /* Second test: search with an ISO-8859-1 locale.  */
 1431 -  if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL)
 1432 -    error (EXIT_FAILURE, 0, "cannot set locale de_DE.ISO-8859-1");
 1433 +  printf ("INFO: Testing de_DE.ISO-8859-1.\n");
 1434 +  xsetlocale (LC_ALL, "de_DE.ISO-8859-1");
 1435  
 1436    inmem = (char *) expr;
 1437    inlen = strlen (expr);

Generated by cgit