1 commit 466f2be6c08070e9113ae2fdc7acd5d8828cba50
2 Author: Carlos O'Donell <carlos@redhat.com>
3 Date: Wed Sep 1 15:19:19 2021 -0400
4
5 Add generic C.UTF-8 locale (Bug 17318)
6
7 We add a new C.UTF-8 locale. This locale is not builtin to glibc, but
8 is provided as a distinct locale. The locale provides full support for
9 UTF-8 and this includes full code point sorting via STRCMP-based
10 collation (strcmp or wcscmp).
11
12 The collation uses a new keyword 'codepoint_collation' which drops all
13 collation rules and generates an empty zero rules collation to enable
14 STRCMP usage in collation. This ensures that we get full code point
15 sorting for C.UTF-8 with a minimal 1406 bytes of overhead (LC_COLLATE
16 structure information and ASCII collating tables).
17
18 The new locale is added to SUPPORTED. Minimal test data for specific
19 code points (minus those not supported by collate-test) is provided in
20 C.UTF-8.in, and this verifies code point sorting is working reasonably
21 across the range. The locale was tested manually with the full set of
22 code points without failure.
23
24 The locale is harmonized with locales already shipping in various
25 downstream distributions. A new tst-iconv9 test is added which verifies
26 the C.UTF-8 locale is generally usable.
27
28 Testing for fnmatch, regexec, and recomp is provided by extending
29 bug-regex1, bugregex19, bug-regex4, bug-regex6, transbug, tst-fnmatch,
30 tst-regcomp-truncated, and tst-regex to use C.UTF-8.
31
32 Tested on x86_64 or i686 without regression.
33
34 Reviewed-by: Florian Weimer <fweimer@redhat.com>
35
36 diff --git a/iconv/Makefile b/iconv/Makefile
37 index 07d77c9ecaafba1f..9993f2d3f3cd7498 100644
38 --- a/iconv/Makefile
39 +++ b/iconv/Makefile
40 @@ -43,8 +43,19 @@ CFLAGS-charmap.c += -DCHARMAP_PATH='"$(i18ndir)/charmaps"' \
41 CFLAGS-linereader.c += -DNO_TRANSLITERATION
42 CFLAGS-simple-hash.c += -I../locale
43
44 -tests = tst-iconv1 tst-iconv2 tst-iconv3 tst-iconv4 tst-iconv5 tst-iconv6 \
45 - tst-iconv7 tst-iconv8 tst-iconv-mt tst-iconv-opt
46 +tests = \
47 + tst-iconv1 \
48 + tst-iconv2 \
49 + tst-iconv3 \
50 + tst-iconv4 \
51 + tst-iconv5 \
52 + tst-iconv6 \
53 + tst-iconv7 \
54 + tst-iconv8 \
55 + tst-iconv9 \
56 + tst-iconv-mt \
57 + tst-iconv-opt \
58 + # tests
59
60 others = iconv_prog iconvconfig
61 install-others-programs = $(inst_bindir)/iconv
62 @@ -83,10 +94,15 @@ endif
63 include ../Rules
64
65 ifeq ($(run-built-tests),yes)
66 -LOCALES := en_US.UTF-8
67 +# We have to generate locales (list sorted alphabetically)
68 +LOCALES := \
69 + C.UTF-8 \
70 + en_US.UTF-8 \
71 + # LOCALES
72 include ../gen-locales.mk
73
74 $(objpfx)tst-iconv-opt.out: $(gen-locales)
75 +$(objpfx)tst-iconv9.out: $(gen-locales)
76 endif
77
78 $(inst_bindir)/iconv: $(objpfx)iconv_prog $(+force)
79 diff --git a/iconv/tst-iconv9.c b/iconv/tst-iconv9.c
80 new file mode 100644
81 index 0000000000000000..c46b1833d87b8e55
82 --- /dev/null
83 +++ b/iconv/tst-iconv9.c
84 @@ -0,0 +1,87 @@
85 +/* Verify that using C.UTF-8 works.
86 +
87 + Copyright (C) 2021 Free Software Foundation, Inc.
88 + This file is part of the GNU C Library.
89 +
90 + The GNU C Library is free software; you can redistribute it and/or
91 + modify it under the terms of the GNU Lesser General Public
92 + License as published by the Free Software Foundation; either
93 + version 2.1 of the License, or (at your option) any later version.
94 +
95 + The GNU C Library is distributed in the hope that it will be useful,
96 + but WITHOUT ANY WARRANTY; without even the implied warranty of
97 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
98 + Lesser General Public License for more details.
99 +
100 + You should have received a copy of the GNU Lesser General Public
101 + License along with the GNU C Library; if not, see
102 + <https://www.gnu.org/licenses/>. */
103 +
104 +#include <iconv.h>
105 +#include <stddef.h>
106 +#include <stdio.h>
107 +#include <string.h>
108 +#include <support/support.h>
109 +#include <support/check.h>
110 +
111 +/* This test does two things:
112 + (1) Verify that we have likely included translit_combining in C.UTF-8.
113 + (2) Verify default_missing is '?' as expected. */
114 +
115 +/* ISO-8859-1 encoding of "für". */
116 +char iso88591_in[] = { 0x66, 0xfc, 0x72, 0x0 };
117 +/* ASCII transliteration is "fur" with C.UTF-8 translit_combining. */
118 +char ascii_exp[] = { 0x66, 0x75, 0x72, 0x0 };
119 +
120 +/* First 3-byte UTF-8 code point. */
121 +char utf8_in[] = { 0xe0, 0xa0, 0x80, 0x0 };
122 +/* There is no ASCII transliteration for SAMARITAN LETTER ALAF
123 + so we get default_missing used which is '?'. */
124 +char default_missing_exp[] = { 0x3f, 0x0 };
125 +
126 +static int
127 +do_test (void)
128 +{
129 + char ascii_out[5];
130 + iconv_t cd;
131 + char *inbuf;
132 + char *outbuf;
133 + size_t inbytes;
134 + size_t outbytes;
135 + size_t n;
136 +
137 + /* The C.UTF-8 locale should include translit_combining, which provides
138 + the transliteration for "LATIN SMALL LETTER U WITH DIAERESIS" which
139 + is not provided by locale/C-translit.h.in. */
140 + xsetlocale (LC_ALL, "C.UTF-8");
141 +
142 + /* From ISO-8859-1 to ASCII. */
143 + cd = iconv_open ("ASCII//TRANSLIT,IGNORE", "ISO-8859-1");
144 + TEST_VERIFY (cd != (iconv_t) -1);
145 + inbuf = iso88591_in;
146 + inbytes = 3;
147 + outbuf = ascii_out;
148 + outbytes = 3;
149 + n = iconv (cd, &inbuf, &inbytes, &outbuf, &outbytes);
150 + TEST_VERIFY (n != -1);
151 + *outbuf = '\0';
152 + TEST_COMPARE_BLOB (ascii_out, 3, ascii_exp, 3);
153 + TEST_VERIFY (iconv_close (cd) == 0);
154 +
155 + /* From UTF-8 to ASCII. */
156 + cd = iconv_open ("ASCII//TRANSLIT,IGNORE", "UTF-8");
157 + TEST_VERIFY (cd != (iconv_t) -1);
158 + inbuf = utf8_in;
159 + inbytes = 3;
160 + outbuf = ascii_out;
161 + outbytes = 3;
162 + n = iconv (cd, &inbuf, &inbytes, &outbuf, &outbytes);
163 + TEST_VERIFY (n != -1);
164 + *outbuf = '\0';
165 + TEST_COMPARE_BLOB (ascii_out, 1, default_missing_exp, 1);
166 + TEST_VERIFY (iconv_close (cd) == 0);
167 +
168 + return 0;
169 +}
170 +
171 +#include <support/test-driver.c>
172 diff --git a/localedata/C.UTF-8.in b/localedata/C.UTF-8.in
173 new file mode 100644
174 index 0000000000000000..c31dcc2aa045ee61
175 --- /dev/null
176 +++ b/localedata/C.UTF-8.in
177 @@ -0,0 +1,157 @@
178 + ; <U1>
179 + ; <U2>
180 + ; <U3>
181 + ; <U4>
182 + ; <U5>
183 + ; <U6>
184 + ; <U7>
185 + ; <U8>
186 + ; <UE>
187 + ; <UF>
188 + ; <U10>
189 + ; <U11>
190 + ; <U12>
191 + ; <U13>
192 + ; <U14>
193 + ; <U15>
194 + ; <U16>
195 + ; <U17>
196 + ; <U18>
197 + ; <U19>
198 + ; <U1A>
199 + ; <U1B>
200 + ; <U1C>
201 + ; <U1D>
202 + ; <U1E>
203 + ; <U1F>
204 +! ; <U21>
205 +" ; <U22>
206 +# ; <U23>
207 +$ ; <U24>
208 +% ; <U25>
209 +& ; <U26>
210 +' ; <U27>
211 +) ; <U29>
212 +* ; <U2A>
213 ++ ; <U2B>
214 +, ; <U2C>
215 +- ; <U2D>
216 +. ; <U2E>
217 +/ ; <U2F>
218 +0 ; <U30>
219 +1 ; <U31>
220 +2 ; <U32>
221 +3 ; <U33>
222 +4 ; <U34>
223 +5 ; <U35>
224 +6 ; <U36>
225 +7 ; <U37>
226 +8 ; <U38>
227 +9 ; <U39>
228 +< ; <U3C>
229 += ; <U3D>
230 +> ; <U3E>
231 +? ; <U3F>
232 +@ ; <U40>
233 +A ; <U41>
234 +B ; <U42>
235 +C ; <U43>
236 +D ; <U44>
237 +E ; <U45>
238 +F ; <U46>
239 +G ; <U47>
240 +H ; <U48>
241 +I ; <U49>
242 +J ; <U4A>
243 +K ; <U4B>
244 +L ; <U4C>
245 +M ; <U4D>
246 +N ; <U4E>
247 +O ; <U4F>
248 +P ; <U50>
249 +Q ; <U51>
250 +R ; <U52>
251 +S ; <U53>
252 +T ; <U54>
253 +U ; <U55>
254 +V ; <U56>
255 +W ; <U57>
256 +X ; <U58>
257 +Y ; <U59>
258 +Z ; <U5A>
259 +[ ; <U5B>
260 +\ ; <U5C>
261 +] ; <U5D>
262 +^ ; <U5E>
263 +_ ; <U5F>
264 +` ; <U60>
265 +a ; <U61>
266 +b ; <U62>
267 +c ; <U63>
268 +d ; <U64>
269 +e ; <U65>
270 +f ; <U66>
271 +g ; <U67>
272 +h ; <U68>
273 +i ; <U69>
274 +j ; <U6A>
275 +k ; <U6B>
276 +l ; <U6C>
277 +m ; <U6D>
278 +n ; <U6E>
279 +o ; <U6F>
280 +p ; <U70>
281 +q ; <U71>
282 +r ; <U72>
283 +s ; <U73>
284 +t ; <U74>
285 +u ; <U75>
286 +v ; <U76>
287 +w ; <U77>
288 +x ; <U78>
289 +y ; <U79>
290 +z ; <U7A>
291 +{ ; <U7B>
292 +| ; <U7C>
293 +} ; <U7D>
294 +~ ; <U7E>
295 + ; <U7F>
296 + ; <U80>
297 +ÿ ; <UFF>
298 +Ā ; <U100>
299 + ; <UFFF>
300 +က ; <U1000>
301 +� ; <UFFFD>
302 + ; <UFFFF>
303 +𐀀 ; <U10000>
304 + ; <U1FFFF>
305 +𠀀 ; <U20000>
306 + ; <U2FFFF>
307 +𰀀 ; <U30000>
308 + ; <U3FFFE>
309 + ; <U40000>
310 + ; <U4FFFF>
311 + ; <U50000>
312 + ; <U5FFFF>
313 + ; <U60000>
314 + ; <U6FFFF>
315 + ; <U70000>
316 + ; <U7FFFF>
317 + ; <U80000>
318 + ; <U8FFFF>
319 + ; <U90000>
320 + ; <U9FFFF>
321 + ; <UA0000>
322 + ; <UAFFFF>
323 + ; <UB0000>
324 + ; <UBFFFF>
325 + ; <UC0001>
326 + ; <UCFFCC>
327 + ; <UD000E>
328 + ; <UDFFFF>
329 + ; <UE0001>
330 + ; <UEFFFF>
331 + ; <UF0001>
332 + ; <UFFFFF>
333 + ; <U100001>
334 + ; <U10FFFF>
335 diff --git a/localedata/Makefile b/localedata/Makefile
336 index 0341528b0407ae3b..c9dd5a954e8194cc 100644
337 --- a/localedata/Makefile
338 +++ b/localedata/Makefile
339 @@ -47,6 +47,7 @@ test-input := \
340 bg_BG.UTF-8 \
341 br_FR.UTF-8 \
342 bs_BA.UTF-8 \
343 + C.UTF-8 \
344 ckb_IQ.UTF-8 \
345 cmn_TW.UTF-8 \
346 crh_UA.UTF-8 \
347 @@ -206,6 +207,7 @@ LOCALES := \
348 bg_BG.UTF-8 \
349 br_FR.UTF-8 \
350 bs_BA.UTF-8 \
351 + C.UTF-8 \
352 ckb_IQ.UTF-8 \
353 cmn_TW.UTF-8 \
354 crh_UA.UTF-8 \
355 diff --git a/localedata/SUPPORTED b/localedata/SUPPORTED
356 index 34f7a7c3fe2b6526..546ce6cea16a8fdb 100644
357 --- a/localedata/SUPPORTED
358 +++ b/localedata/SUPPORTED
359 @@ -79,6 +79,7 @@ brx_IN/UTF-8 \
360 bs_BA.UTF-8/UTF-8 \
361 bs_BA/ISO-8859-2 \
362 byn_ER/UTF-8 \
363 +C.UTF-8/UTF-8 \
364 ca_AD.UTF-8/UTF-8 \
365 ca_AD/ISO-8859-15 \
366 ca_ES.UTF-8/UTF-8 \
367 diff --git a/localedata/locales/C b/localedata/locales/C
368 new file mode 100644
369 index 0000000000000000..ca801c79cf7e953e
370 --- /dev/null
371 +++ b/localedata/locales/C
372 @@ -0,0 +1,194 @@
373 +escape_char /
374 +comment_char %
375 +% Locale for C locale in UTF-8
376 +
377 +LC_IDENTIFICATION
378 +title "C locale"
379 +source ""
380 +address ""
381 +contact ""
382 +email "bug-glibc-locales@gnu.org"
383 +tel ""
384 +fax ""
385 +language ""
386 +territory ""
387 +revision "2.0"
388 +date "2020-06-28"
389 +category "i18n:2012";LC_IDENTIFICATION
390 +category "i18n:2012";LC_CTYPE
391 +category "i18n:2012";LC_COLLATE
392 +category "i18n:2012";LC_TIME
393 +category "i18n:2012";LC_NUMERIC
394 +category "i18n:2012";LC_MONETARY
395 +category "i18n:2012";LC_MESSAGES
396 +category "i18n:2012";LC_PAPER
397 +category "i18n:2012";LC_NAME
398 +category "i18n:2012";LC_ADDRESS
399 +category "i18n:2012";LC_TELEPHONE
400 +category "i18n:2012";LC_MEASUREMENT
401 +END LC_IDENTIFICATION
402 +
403 +LC_CTYPE
404 +% Include only the i18n character type classes without any of the
405 +% transliteration that i18n uses by default.
406 +copy "i18n_ctype"
407 +
408 +% Include the neutral transliterations. The builtin C and
409 +% POSIX locales have +1600 transliterations that are built into
410 +% the locales, and these are a superset of those.
411 +translit_start
412 +include "translit_neutral";""
413 +% We must use '?' for default_missing because the transliteration
414 +% framework includes it directly into the output and so it must
415 +% be compatible with ASCII if that is the target character set.
416 +default_missing <U003F>
417 +translit_end
418 +
419 +% Include the transliterations that can convert combined characters.
420 +% These are generally expected by users.
421 +translit_start
422 +include "translit_combining";""
423 +translit_end
424 +
425 +END LC_CTYPE
426 +
427 +LC_COLLATE
428 +% The keyword 'codepoint_collation' in any part of any LC_COLLATE
429 +% immediately discards all collation information and causes the
430 +% locale to use strcmp/wcscmp for collation comparison. This is
431 +% exactly what is needed for C (ASCII) or C.UTF-8.
432 +codepoint_collation
433 +END LC_COLLATE
434 +
435 +LC_MONETARY
436 +
437 +% This is the 14652 i18n fdcc-set definition for the LC_MONETARY
438 +% category (except for the int_curr_symbol and currency_symbol, they are
439 +% empty in the 14652 i18n fdcc-set definition and also empty in
440 +% glibc/locale/C-monetary.c.).
441 +int_curr_symbol ""
442 +currency_symbol ""
443 +mon_decimal_point "."
444 +mon_thousands_sep ""
445 +mon_grouping -1
446 +positive_sign ""
447 +negative_sign "-"
448 +int_frac_digits -1
449 +frac_digits -1
450 +p_cs_precedes -1
451 +int_p_sep_by_space -1
452 +p_sep_by_space -1
453 +n_cs_precedes -1
454 +int_n_sep_by_space -1
455 +n_sep_by_space -1
456 +p_sign_posn -1
457 +n_sign_posn -1
458 +%
459 +END LC_MONETARY
460 +
461 +LC_NUMERIC
462 +% This is the POSIX Locale definition for
463 +% the LC_NUMERIC category.
464 +%
465 +decimal_point "."
466 +thousands_sep ""
467 +grouping -1
468 +END LC_NUMERIC
469 +
470 +LC_TIME
471 +% This is the POSIX Locale definition for the LC_TIME category with the
472 +% exception that time is per ISO 8601 and 24-hour.
473 +%
474 +% Abbreviated weekday names (%a)
475 +abday "Sun";"Mon";"Tue";"Wed";"Thu";"Fri";"Sat"
476 +
477 +% Full weekday names (%A)
478 +day "Sunday";"Monday";"Tuesday";"Wednesday";"Thursday";/
479 + "Friday";"Saturday"
480 +
481 +% Abbreviated month names (%b)
482 +abmon "Jan";"Feb";"Mar";"Apr";"May";"Jun";"Jul";"Aug";"Sep";/
483 + "Oct";"Nov";"Dec"
484 +
485 +% Full month names (%B)
486 +mon "January";"February";"March";"April";"May";"June";"July";/
487 + "August";"September";"October";"November";"December"
488 +
489 +% Week description, consists of three fields:
490 +% 1. Number of days in a week.
491 +% 2. Gregorian date that is a first weekday (19971130 for Sunday, 19971201 for Monday).
492 +% 3. The weekday number to be contained in the first week of the year.
493 +%
494 +% ISO 8601 conforming applications should use the values 7, 19971201 (a
495 +% Monday), and 4 (Thursday), respectively.
496 +week 7;19971201;4
497 +first_weekday 1
498 +first_workday 2
499 +
500 +% Appropriate date and time representation (%c)
501 +d_t_fmt "%a %b %e %H:%M:%S %Y"
502 +
503 +% Appropriate date representation (%x)
504 +d_fmt "%m/%d/%y"
505 +
506 +% Appropriate time representation (%X)
507 +t_fmt "%H:%M:%S"
508 +
509 +% Appropriate AM/PM time representation (%r)
510 +t_fmt_ampm "%I:%M:%S %p"
511 +
512 +% Equivalent of AM/PM (%p)
513 +am_pm "AM";"PM"
514 +
515 +% Appropriate date representation (date(1))
516 +date_fmt "%a %b %e %H:%M:%S %Z %Y"
517 +END LC_TIME
518 +
519 +LC_MESSAGES
520 +% This is the POSIX Locale definition for
521 +% the LC_NUMERIC category.
522 +%
523 +yesexpr "^[yY]"
524 +noexpr "^[nN]"
525 +yesstr "Yes"
526 +nostr "No"
527 +END LC_MESSAGES
528 +
529 +LC_PAPER
530 +% This is the ISO/IEC 14652 "i18n" definition for
531 +% the LC_PAPER category.
532 +% (A4 paper, this is also used in the built in C/POSIX
533 +% locale in glibc/locale/C-paper.c)
534 +height 297
535 +width 210
536 +END LC_PAPER
537 +
538 +LC_NAME
539 +% This is the ISO/IEC 14652 "i18n" definition for
540 +% the LC_NAME category.
541 +% (also used in the built in C/POSIX locale in glibc/locale/C-name.c)
542 +name_fmt "%p%t%g%t%m%t%f"
543 +END LC_NAME
544 +
545 +LC_ADDRESS
546 +% This is the ISO/IEC 14652 "i18n" definition for
547 +% the LC_ADDRESS category.
548 +% (also used in the built in C/POSIX locale in glibc/locale/C-address.c)
549 +postal_fmt "%a%N%f%N%d%N%b%N%s %h %e %r%N%C-%z %T%N%c%N"
550 +END LC_ADDRESS
551 +
552 +LC_TELEPHONE
553 +% This is the ISO/IEC 14652 "i18n" definition for
554 +% the LC_TELEPHONE category.
555 +% "+%c %a %l"
556 +tel_int_fmt "+%c %a %l"
557 +% (also used in the built in C/POSIX locale in glibc/locale/C-telephone.c)
558 +END LC_TELEPHONE
559 +
560 +LC_MEASUREMENT
561 +% This is the ISO/IEC 14652 "i18n" definition for
562 +% the LC_MEASUREMENT category.
563 +% (same as in the built in C/POSIX locale in glibc/locale/C-measurement.c)
564 +%metric
565 +measurement 1
566 +END LC_MEASUREMENT
567 diff --git a/posix/Makefile b/posix/Makefile
568 index 059efb3cd2706cbe..a5229777eeb0e067 100644
569 --- a/posix/Makefile
570 +++ b/posix/Makefile
571 @@ -190,9 +190,19 @@ $(objpfx)wordexp-tst.out: wordexp-tst.sh $(objpfx)wordexp-test
572 $(evaluate-test)
573 endif
574
575 -LOCALES := cs_CZ.UTF-8 da_DK.ISO-8859-1 de_DE.ISO-8859-1 de_DE.UTF-8 \
576 - en_US.UTF-8 es_US.ISO-8859-1 es_US.UTF-8 ja_JP.EUC-JP tr_TR.UTF-8 \
577 - cs_CZ.ISO-8859-2
578 +LOCALES := \
579 + cs_CZ.ISO-8859-2 \
580 + cs_CZ.UTF-8 \
581 + C.UTF-8 \
582 + da_DK.ISO-8859-1 \
583 + de_DE.ISO-8859-1 \
584 + de_DE.UTF-8 \
585 + en_US.UTF-8 \
586 + es_US.ISO-8859-1 \
587 + es_US.UTF-8 \
588 + ja_JP.EUC-JP \
589 + tr_TR.UTF-8 \
590 + # LOCALES
591 include ../gen-locales.mk
592
593 $(objpfx)bug-regex1.out: $(gen-locales)
594 diff --git a/posix/bug-regex1.c b/posix/bug-regex1.c
595 index 38eb543951862492..7e9f4ec430a95631 100644
596 --- a/posix/bug-regex1.c
597 +++ b/posix/bug-regex1.c
598 @@ -41,6 +41,26 @@ main (void)
599 puts (" -> OK");
600 }
601
602 + puts ("in C.UTF-8 locale");
603 + setlocale (LC_ALL, "C.UTF-8");
604 + s = re_compile_pattern ("[an\371]*n", 7, ®ex);
605 + if (s != NULL)
606 + {
607 + puts ("re_compile_pattern return non-NULL value");
608 + result = 1;
609 + }
610 + else
611 + {
612 + match = re_match (®ex, "an", 2, 0, ®s);
613 + if (match != 2)
614 + {
615 + printf ("re_match returned %d, expected 2\n", match);
616 + result = 1;
617 + }
618 + else
619 + puts (" -> OK");
620 + }
621 +
622 puts ("in de_DE.ISO-8859-1 locale");
623 setlocale (LC_ALL, "de_DE.ISO-8859-1");
624 s = re_compile_pattern ("[an]*n", 7, ®ex);
625 diff --git a/posix/bug-regex19.c b/posix/bug-regex19.c
626 index b3fee0a7302c3263..e00ff60a14f994bf 100644
627 --- a/posix/bug-regex19.c
628 +++ b/posix/bug-regex19.c
629 @@ -25,6 +25,7 @@
630 #include <string.h>
631 #include <locale.h>
632 #include <libc-diag.h>
633 +#include <support/support.h>
634
635 #define BRE RE_SYNTAX_POSIX_BASIC
636 #define ERE RE_SYNTAX_POSIX_EXTENDED
637 @@ -407,8 +408,8 @@ do_mb_tests (const struct test_s *test)
638 return 0;
639 }
640
641 -int
642 -main (void)
643 +static int
644 +do_test (void)
645 {
646 size_t i;
647 int ret = 0;
648 @@ -417,20 +418,17 @@ main (void)
649
650 for (i = 0; i < sizeof (tests) / sizeof (tests[0]); ++i)
651 {
652 - if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL)
653 - {
654 - puts ("setlocale de_DE.ISO-8859-1 failed");
655 - ret = 1;
656 - }
657 + xsetlocale (LC_ALL, "de_DE.ISO-8859-1");
658 ret |= do_one_test (&tests[i], "");
659 - if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
660 - {
661 - puts ("setlocale de_DE.UTF-8 failed");
662 - ret = 1;
663 - }
664 + xsetlocale (LC_ALL, "de_DE.UTF-8");
665 + ret |= do_one_test (&tests[i], "UTF-8 ");
666 + ret |= do_mb_tests (&tests[i]);
667 + xsetlocale (LC_ALL, "C.UTF-8");
668 ret |= do_one_test (&tests[i], "UTF-8 ");
669 ret |= do_mb_tests (&tests[i]);
670 }
671
672 return ret;
673 }
674 +
675 +#include <support/test-driver.c>
676 diff --git a/posix/bug-regex4.c b/posix/bug-regex4.c
677 index 8d5ae11567889301..6475833c525176b2 100644
678 --- a/posix/bug-regex4.c
679 +++ b/posix/bug-regex4.c
680 @@ -32,8 +32,33 @@ main (void)
681
682 memset (®ex, '\0', sizeof (regex));
683
684 + printf ("INFO: Checking C.\n");
685 setlocale (LC_ALL, "C");
686
687 + s = re_compile_pattern ("ab[cde]", 7, ®ex);
688 + if (s != NULL)
689 + {
690 + puts ("re_compile_pattern returned non-NULL value");
691 + result = 1;
692 + }
693 + else
694 + {
695 + match[0] = re_search_2 (®ex, "xyabez", 6, "", 0, 1, 5, NULL, 6);
696 + match[1] = re_search_2 (®ex, NULL, 0, "abc", 3, 0, 3, NULL, 3);
697 + match[2] = re_search_2 (®ex, "xya", 3, "bd", 2, 2, 3, NULL, 5);
698 + if (match[0] != 2 || match[1] != 0 || match[2] != 2)
699 + {
700 + printf ("re_search_2 returned %d,%d,%d, expected 2,0,2\n",
701 + match[0], match[1], match[2]);
702 + result = 1;
703 + }
704 + else
705 + puts (" -> OK");
706 + }
707 +
708 + printf ("INFO: Checking C.UTF-8.\n");
709 + setlocale (LC_ALL, "C.UTF-8");
710 +
711 s = re_compile_pattern ("ab[cde]", 7, ®ex);
712 if (s != NULL)
713 {
714 diff --git a/posix/bug-regex6.c b/posix/bug-regex6.c
715 index 2bdf2126a49ee99b..0929b69b83c91e5e 100644
716 --- a/posix/bug-regex6.c
717 +++ b/posix/bug-regex6.c
718 @@ -30,7 +30,7 @@ main (int argc, char *argv[])
719 regex_t re;
720 regmatch_t mat[10];
721 int i, j, ret = 0;
722 - const char *locales[] = { "C", "de_DE.UTF-8" };
723 + const char *locales[] = { "C", "C.UTF-8", "de_DE.UTF-8" };
724 const char *string = "http://www.regex.com/pattern/matching.html#intro";
725 regmatch_t expect[10] = {
726 { 0, 48 }, { 0, 5 }, { 0, 4 }, { 5, 20 }, { 7, 20 }, { 20, 42 },
727 diff --git a/posix/transbug.c b/posix/transbug.c
728 index d0983b4d44d04fd2..b240177cf72326ff 100644
729 --- a/posix/transbug.c
730 +++ b/posix/transbug.c
731 @@ -116,16 +116,32 @@ do_test (void)
732 static const char lower[] = "[[:lower:]]+";
733 static const char upper[] = "[[:upper:]]+";
734 struct re_registers regs[4];
735 + int result = 0;
736
737 +#define CHECK(exp) \
738 + if (exp) { puts (#exp); result = 1; }
739 +
740 + printf ("INFO: Checking C.\n");
741 setlocale (LC_ALL, "C");
742
743 (void) re_set_syntax (RE_SYNTAX_GNU_AWK);
744
745 - int result;
746 -#define CHECK(exp) \
747 - if (exp) { puts (#exp); result = 1; }
748 + result |= run_test (lower, regs);
749 + result |= run_test (upper, ®s[2]);
750 + if (! result)
751 + {
752 + CHECK (regs[0].start[0] != regs[2].start[0]);
753 + CHECK (regs[0].end[0] != regs[2].end[0]);
754 + CHECK (regs[1].start[0] != regs[3].start[0]);
755 + CHECK (regs[1].end[0] != regs[3].end[0]);
756 + }
757 +
758 + printf ("INFO: Checking C.UTF-8.\n");
759 + setlocale (LC_ALL, "C.UTF-8");
760 +
761 + (void) re_set_syntax (RE_SYNTAX_GNU_AWK);
762
763 - result = run_test (lower, regs);
764 + result |= run_test (lower, regs);
765 result |= run_test (upper, ®s[2]);
766 if (! result)
767 {
768 diff --git a/posix/tst-fnmatch.input b/posix/tst-fnmatch.input
769 index 67aac5aadafd8aeb..6ff5318032e0afb2 100644
770 --- a/posix/tst-fnmatch.input
771 +++ b/posix/tst-fnmatch.input
772 @@ -472,6 +472,397 @@ C "\\" "[Z-\\]]" 0
773 C "]" "[Z-\\]]" 0
774 C "-" "[Z-\\]]" NOMATCH
775
776 +# B.6 004(C)
777 +C.UTF-8 "!#%+,-./01234567889" "!#%+,-./01234567889" 0
778 +C.UTF-8 ":;=@ABCDEFGHIJKLMNO" ":;=@ABCDEFGHIJKLMNO" 0
779 +C.UTF-8 "PQRSTUVWXYZ]abcdefg" "PQRSTUVWXYZ]abcdefg" 0
780 +C.UTF-8 "hijklmnopqrstuvwxyz" "hijklmnopqrstuvwxyz" 0
781 +C.UTF-8 "^_{}~" "^_{}~" 0
782 +
783 +# B.6 005(C)
784 +C.UTF-8 "\"$&'()" "\\\"\\$\\&\\'\\(\\)" 0
785 +C.UTF-8 "*?[\\`|" "\\*\\?\\[\\\\\\`\\|" 0
786 +C.UTF-8 "<>" "\\<\\>" 0
787 +
788 +# B.6 006(C)
789 +C.UTF-8 "?*[" "[?*[][?*[][?*[]" 0
790 +C.UTF-8 "a/b" "?/b" 0
791 +
792 +# B.6 007(C)
793 +C.UTF-8 "a/b" "a?b" 0
794 +C.UTF-8 "a/b" "a/?" 0
795 +C.UTF-8 "aa/b" "?/b" NOMATCH
796 +C.UTF-8 "aa/b" "a?b" NOMATCH
797 +C.UTF-8 "a/bb" "a/?" NOMATCH
798 +
799 +# B.6 009(C)
800 +C.UTF-8 "abc" "[abc]" NOMATCH
801 +C.UTF-8 "x" "[abc]" NOMATCH
802 +C.UTF-8 "a" "[abc]" 0
803 +C.UTF-8 "[" "[[abc]" 0
804 +C.UTF-8 "a" "[][abc]" 0
805 +C.UTF-8 "a]" "[]a]]" 0
806 +
807 +# B.6 010(C)
808 +C.UTF-8 "xyz" "[!abc]" NOMATCH
809 +C.UTF-8 "x" "[!abc]" 0
810 +C.UTF-8 "a" "[!abc]" NOMATCH
811 +
812 +# B.6 011(C)
813 +C.UTF-8 "]" "[][abc]" 0
814 +C.UTF-8 "abc]" "[][abc]" NOMATCH
815 +C.UTF-8 "[]abc" "[][]abc" NOMATCH
816 +C.UTF-8 "]" "[!]]" NOMATCH
817 +C.UTF-8 "aa]" "[!]a]" NOMATCH
818 +C.UTF-8 "]" "[!a]" 0
819 +C.UTF-8 "]]" "[!a]]" 0
820 +
821 +# B.6 012(C)
822 +C.UTF-8 "a" "[[.a.]]" 0
823 +C.UTF-8 "-" "[[.-.]]" 0
824 +C.UTF-8 "-" "[[.-.][.].]]" 0
825 +C.UTF-8 "-" "[[.].][.-.]]" 0
826 +C.UTF-8 "-" "[[.-.][=u=]]" 0
827 +C.UTF-8 "-" "[[.-.][:alpha:]]" 0
828 +C.UTF-8 "a" "[![.a.]]" NOMATCH
829 +
830 +# B.6 013(C)
831 +C.UTF-8 "a" "[[.b.]]" NOMATCH
832 +C.UTF-8 "a" "[[.b.][.c.]]" NOMATCH
833 +C.UTF-8 "a" "[[.b.][=b=]]" NOMATCH
834 +
835 +
836 +# B.6 015(C)
837 +C.UTF-8 "a" "[[=a=]]" 0
838 +C.UTF-8 "b" "[[=a=]b]" 0
839 +C.UTF-8 "b" "[[=a=][=b=]]" 0
840 +C.UTF-8 "a" "[[=a=][=b=]]" 0
841 +C.UTF-8 "a" "[[=a=][.b.]]" 0
842 +C.UTF-8 "a" "[[=a=][:digit:]]" 0
843 +
844 +# B.6 016(C)
845 +C.UTF-8 "=" "[[=a=]b]" NOMATCH
846 +C.UTF-8 "]" "[[=a=]b]" NOMATCH
847 +C.UTF-8 "a" "[[=b=][=c=]]" NOMATCH
848 +C.UTF-8 "a" "[[=b=][.].]]" NOMATCH
849 +C.UTF-8 "a" "[[=b=][:digit:]]" NOMATCH
850 +
851 +# B.6 017(C)
852 +C.UTF-8 "a" "[[:alnum:]]" 0
853 +C.UTF-8 "a" "[![:alnum:]]" NOMATCH
854 +C.UTF-8 "-" "[[:alnum:]]" NOMATCH
855 +C.UTF-8 "a]a" "[[:alnum:]]a" NOMATCH
856 +C.UTF-8 "-" "[[:alnum:]-]" 0
857 +C.UTF-8 "aa" "[[:alnum:]]a" 0
858 +C.UTF-8 "-" "[![:alnum:]]" 0
859 +C.UTF-8 "]" "[!][:alnum:]]" NOMATCH
860 +C.UTF-8 "[" "[![:alnum:][]" NOMATCH
861 +C.UTF-8 "a" "[[:alnum:]]" 0
862 +C.UTF-8 "b" "[[:alnum:]]" 0
863 +C.UTF-8 "c" "[[:alnum:]]" 0
864 +C.UTF-8 "d" "[[:alnum:]]" 0
865 +C.UTF-8 "e" "[[:alnum:]]" 0
866 +C.UTF-8 "f" "[[:alnum:]]" 0
867 +C.UTF-8 "g" "[[:alnum:]]" 0
868 +C.UTF-8 "h" "[[:alnum:]]" 0
869 +C.UTF-8 "i" "[[:alnum:]]" 0
870 +C.UTF-8 "j" "[[:alnum:]]" 0
871 +C.UTF-8 "k" "[[:alnum:]]" 0
872 +C.UTF-8 "l" "[[:alnum:]]" 0
873 +C.UTF-8 "m" "[[:alnum:]]" 0
874 +C.UTF-8 "n" "[[:alnum:]]" 0
875 +C.UTF-8 "o" "[[:alnum:]]" 0
876 +C.UTF-8 "p" "[[:alnum:]]" 0
877 +C.UTF-8 "q" "[[:alnum:]]" 0
878 +C.UTF-8 "r" "[[:alnum:]]" 0
879 +C.UTF-8 "s" "[[:alnum:]]" 0
880 +C.UTF-8 "t" "[[:alnum:]]" 0
881 +C.UTF-8 "u" "[[:alnum:]]" 0
882 +C.UTF-8 "v" "[[:alnum:]]" 0
883 +C.UTF-8 "w" "[[:alnum:]]" 0
884 +C.UTF-8 "x" "[[:alnum:]]" 0
885 +C.UTF-8 "y" "[[:alnum:]]" 0
886 +C.UTF-8 "z" "[[:alnum:]]" 0
887 +C.UTF-8 "A" "[[:alnum:]]" 0
888 +C.UTF-8 "B" "[[:alnum:]]" 0
889 +C.UTF-8 "C" "[[:alnum:]]" 0
890 +C.UTF-8 "D" "[[:alnum:]]" 0
891 +C.UTF-8 "E" "[[:alnum:]]" 0
892 +C.UTF-8 "F" "[[:alnum:]]" 0
893 +C.UTF-8 "G" "[[:alnum:]]" 0
894 +C.UTF-8 "H" "[[:alnum:]]" 0
895 +C.UTF-8 "I" "[[:alnum:]]" 0
896 +C.UTF-8 "J" "[[:alnum:]]" 0
897 +C.UTF-8 "K" "[[:alnum:]]" 0
898 +C.UTF-8 "L" "[[:alnum:]]" 0
899 +C.UTF-8 "M" "[[:alnum:]]" 0
900 +C.UTF-8 "N" "[[:alnum:]]" 0
901 +C.UTF-8 "O" "[[:alnum:]]" 0
902 +C.UTF-8 "P" "[[:alnum:]]" 0
903 +C.UTF-8 "Q" "[[:alnum:]]" 0
904 +C.UTF-8 "R" "[[:alnum:]]" 0
905 +C.UTF-8 "S" "[[:alnum:]]" 0
906 +C.UTF-8 "T" "[[:alnum:]]" 0
907 +C.UTF-8 "U" "[[:alnum:]]" 0
908 +C.UTF-8 "V" "[[:alnum:]]" 0
909 +C.UTF-8 "W" "[[:alnum:]]" 0
910 +C.UTF-8 "X" "[[:alnum:]]" 0
911 +C.UTF-8 "Y" "[[:alnum:]]" 0
912 +C.UTF-8 "Z" "[[:alnum:]]" 0
913 +C.UTF-8 "0" "[[:alnum:]]" 0
914 +C.UTF-8 "1" "[[:alnum:]]" 0
915 +C.UTF-8 "2" "[[:alnum:]]" 0
916 +C.UTF-8 "3" "[[:alnum:]]" 0
917 +C.UTF-8 "4" "[[:alnum:]]" 0
918 +C.UTF-8 "5" "[[:alnum:]]" 0
919 +C.UTF-8 "6" "[[:alnum:]]" 0
920 +C.UTF-8 "7" "[[:alnum:]]" 0
921 +C.UTF-8 "8" "[[:alnum:]]" 0
922 +C.UTF-8 "9" "[[:alnum:]]" 0
923 +C.UTF-8 "!" "[[:alnum:]]" NOMATCH
924 +C.UTF-8 "#" "[[:alnum:]]" NOMATCH
925 +C.UTF-8 "%" "[[:alnum:]]" NOMATCH
926 +C.UTF-8 "+" "[[:alnum:]]" NOMATCH
927 +C.UTF-8 "," "[[:alnum:]]" NOMATCH
928 +C.UTF-8 "-" "[[:alnum:]]" NOMATCH
929 +C.UTF-8 "." "[[:alnum:]]" NOMATCH
930 +C.UTF-8 "/" "[[:alnum:]]" NOMATCH
931 +C.UTF-8 ":" "[[:alnum:]]" NOMATCH
932 +C.UTF-8 ";" "[[:alnum:]]" NOMATCH
933 +C.UTF-8 "=" "[[:alnum:]]" NOMATCH
934 +C.UTF-8 "@" "[[:alnum:]]" NOMATCH
935 +C.UTF-8 "[" "[[:alnum:]]" NOMATCH
936 +C.UTF-8 "\\" "[[:alnum:]]" NOMATCH
937 +C.UTF-8 "]" "[[:alnum:]]" NOMATCH
938 +C.UTF-8 "^" "[[:alnum:]]" NOMATCH
939 +C.UTF-8 "_" "[[:alnum:]]" NOMATCH
940 +C.UTF-8 "{" "[[:alnum:]]" NOMATCH
941 +C.UTF-8 "}" "[[:alnum:]]" NOMATCH
942 +C.UTF-8 "~" "[[:alnum:]]" NOMATCH
943 +C.UTF-8 "\"" "[[:alnum:]]" NOMATCH
944 +C.UTF-8 "$" "[[:alnum:]]" NOMATCH
945 +C.UTF-8 "&" "[[:alnum:]]" NOMATCH
946 +C.UTF-8 "'" "[[:alnum:]]" NOMATCH
947 +C.UTF-8 "(" "[[:alnum:]]" NOMATCH
948 +C.UTF-8 ")" "[[:alnum:]]" NOMATCH
949 +C.UTF-8 "*" "[[:alnum:]]" NOMATCH
950 +C.UTF-8 "?" "[[:alnum:]]" NOMATCH
951 +C.UTF-8 "`" "[[:alnum:]]" NOMATCH
952 +C.UTF-8 "|" "[[:alnum:]]" NOMATCH
953 +C.UTF-8 "<" "[[:alnum:]]" NOMATCH
954 +C.UTF-8 ">" "[[:alnum:]]" NOMATCH
955 +C.UTF-8 "\t" "[[:cntrl:]]" 0
956 +C.UTF-8 "t" "[[:cntrl:]]" NOMATCH
957 +C.UTF-8 "t" "[[:lower:]]" 0
958 +C.UTF-8 "\t" "[[:lower:]]" NOMATCH
959 +C.UTF-8 "T" "[[:lower:]]" NOMATCH
960 +C.UTF-8 "\t" "[[:space:]]" 0
961 +C.UTF-8 "t" "[[:space:]]" NOMATCH
962 +C.UTF-8 "t" "[[:alpha:]]" 0
963 +C.UTF-8 "\t" "[[:alpha:]]" NOMATCH
964 +C.UTF-8 "0" "[[:digit:]]" 0
965 +C.UTF-8 "\t" "[[:digit:]]" NOMATCH
966 +C.UTF-8 "t" "[[:digit:]]" NOMATCH
967 +C.UTF-8 "\t" "[[:print:]]" NOMATCH
968 +C.UTF-8 "t" "[[:print:]]" 0
969 +C.UTF-8 "T" "[[:upper:]]" 0
970 +C.UTF-8 "\t" "[[:upper:]]" NOMATCH
971 +C.UTF-8 "t" "[[:upper:]]" NOMATCH
972 +C.UTF-8 "\t" "[[:blank:]]" 0
973 +C.UTF-8 "t" "[[:blank:]]" NOMATCH
974 +C.UTF-8 "\t" "[[:graph:]]" NOMATCH
975 +C.UTF-8 "t" "[[:graph:]]" 0
976 +C.UTF-8 "." "[[:punct:]]" 0
977 +C.UTF-8 "t" "[[:punct:]]" NOMATCH
978 +C.UTF-8 "\t" "[[:punct:]]" NOMATCH
979 +C.UTF-8 "0" "[[:xdigit:]]" 0
980 +C.UTF-8 "\t" "[[:xdigit:]]" NOMATCH
981 +C.UTF-8 "a" "[[:xdigit:]]" 0
982 +C.UTF-8 "A" "[[:xdigit:]]" 0
983 +C.UTF-8 "t" "[[:xdigit:]]" NOMATCH
984 +C.UTF-8 "a" "[[alpha]]" NOMATCH
985 +C.UTF-8 "a" "[[alpha:]]" NOMATCH
986 +C.UTF-8 "a]" "[[alpha]]" 0
987 +C.UTF-8 "a]" "[[alpha:]]" 0
988 +C.UTF-8 "a" "[[:alpha:][.b.]]" 0
989 +C.UTF-8 "a" "[[:alpha:][=b=]]" 0
990 +C.UTF-8 "a" "[[:alpha:][:digit:]]" 0
991 +C.UTF-8 "a" "[[:digit:][:alpha:]]" 0
992 +
993 +# B.6 018(C)
994 +C.UTF-8 "a" "[a-c]" 0
995 +C.UTF-8 "b" "[a-c]" 0
996 +C.UTF-8 "c" "[a-c]" 0
997 +C.UTF-8 "a" "[b-c]" NOMATCH
998 +C.UTF-8 "d" "[b-c]" NOMATCH
999 +C.UTF-8 "B" "[a-c]" NOMATCH
1000 +C.UTF-8 "b" "[A-C]" NOMATCH
1001 +C.UTF-8 "" "[a-c]" NOMATCH
1002 +C.UTF-8 "as" "[a-ca-z]" NOMATCH
1003 +C.UTF-8 "a" "[[.a.]-c]" 0
1004 +C.UTF-8 "a" "[a-[.c.]]" 0
1005 +C.UTF-8 "a" "[[.a.]-[.c.]]" 0
1006 +C.UTF-8 "b" "[[.a.]-c]" 0
1007 +C.UTF-8 "b" "[a-[.c.]]" 0
1008 +C.UTF-8 "b" "[[.a.]-[.c.]]" 0
1009 +C.UTF-8 "c" "[[.a.]-c]" 0
1010 +C.UTF-8 "c" "[a-[.c.]]" 0
1011 +C.UTF-8 "c" "[[.a.]-[.c.]]" 0
1012 +C.UTF-8 "d" "[[.a.]-c]" NOMATCH
1013 +C.UTF-8 "d" "[a-[.c.]]" NOMATCH
1014 +C.UTF-8 "d" "[[.a.]-[.c.]]" NOMATCH
1015 +
1016 +# B.6 019(C)
1017 +C.UTF-8 "a" "[c-a]" NOMATCH
1018 +C.UTF-8 "a" "[[.c.]-a]" NOMATCH
1019 +C.UTF-8 "a" "[c-[.a.]]" NOMATCH
1020 +C.UTF-8 "a" "[[.c.]-[.a.]]" NOMATCH
1021 +C.UTF-8 "c" "[c-a]" NOMATCH
1022 +C.UTF-8 "c" "[[.c.]-a]" NOMATCH
1023 +C.UTF-8 "c" "[c-[.a.]]" NOMATCH
1024 +C.UTF-8 "c" "[[.c.]-[.a.]]" NOMATCH
1025 +
1026 +# B.6 020(C)
1027 +C.UTF-8 "a" "[a-c0-9]" 0
1028 +C.UTF-8 "d" "[a-c0-9]" NOMATCH
1029 +C.UTF-8 "B" "[a-c0-9]" NOMATCH
1030 +
1031 +# B.6 021(C)
1032 +C.UTF-8 "-" "[-a]" 0
1033 +C.UTF-8 "a" "[-b]" NOMATCH
1034 +C.UTF-8 "-" "[!-a]" NOMATCH
1035 +C.UTF-8 "a" "[!-b]" 0
1036 +C.UTF-8 "-" "[a-c-0-9]" 0
1037 +C.UTF-8 "b" "[a-c-0-9]" 0
1038 +C.UTF-8 "a:" "a[0-9-a]" NOMATCH
1039 +C.UTF-8 "a:" "a[09-a]" 0
1040 +
1041 +# B.6 024(C)
1042 +C.UTF-8 "" "*" 0
1043 +C.UTF-8 "asd/sdf" "*" 0
1044 +
1045 +# B.6 025(C)
1046 +C.UTF-8 "as" "[a-c][a-z]" 0
1047 +C.UTF-8 "as" "??" 0
1048 +
1049 +# B.6 026(C)
1050 +C.UTF-8 "asd/sdf" "as*df" 0
1051 +C.UTF-8 "asd/sdf" "as*" 0
1052 +C.UTF-8 "asd/sdf" "*df" 0
1053 +C.UTF-8 "asd/sdf" "as*dg" NOMATCH
1054 +C.UTF-8 "asdf" "as*df" 0
1055 +C.UTF-8 "asdf" "as*df?" NOMATCH
1056 +C.UTF-8 "asdf" "as*??" 0
1057 +C.UTF-8 "asdf" "a*???" 0
1058 +C.UTF-8 "asdf" "*????" 0
1059 +C.UTF-8 "asdf" "????*" 0
1060 +C.UTF-8 "asdf" "??*?" 0
1061 +
1062 +# B.6 027(C)
1063 +C.UTF-8 "/" "/" 0
1064 +C.UTF-8 "/" "/*" 0
1065 +C.UTF-8 "/" "*/" 0
1066 +C.UTF-8 "/" "/?" NOMATCH
1067 +C.UTF-8 "/" "?/" NOMATCH
1068 +C.UTF-8 "/" "?" 0
1069 +C.UTF-8 "." "?" 0
1070 +C.UTF-8 "/." "??" 0
1071 +C.UTF-8 "/" "[!a-c]" 0
1072 +C.UTF-8 "." "[!a-c]" 0
1073 +
1074 +# B.6 029(C)
1075 +C.UTF-8 "/" "/" 0 PATHNAME
1076 +C.UTF-8 "//" "//" 0 PATHNAME
1077 +C.UTF-8 "/.a" "/*" 0 PATHNAME
1078 +C.UTF-8 "/.a" "/?a" 0 PATHNAME
1079 +C.UTF-8 "/.a" "/[!a-z]a" 0 PATHNAME
1080 +C.UTF-8 "/.a/.b" "/*/?b" 0 PATHNAME
1081 +
1082 +# B.6 030(C)
1083 +C.UTF-8 "/" "?" NOMATCH PATHNAME
1084 +C.UTF-8 "/" "*" NOMATCH PATHNAME
1085 +C.UTF-8 "a/b" "a?b" NOMATCH PATHNAME
1086 +C.UTF-8 "/.a/.b" "/*b" NOMATCH PATHNAME
1087 +
1088 +# B.6 031(C)
1089 +C.UTF-8 "/$" "\\/\\$" 0
1090 +C.UTF-8 "/[" "\\/\\[" 0
1091 +C.UTF-8 "/[" "\\/[" 0
1092 +C.UTF-8 "/[]" "\\/\\[]" 0
1093 +
1094 +# B.6 032(C)
1095 +C.UTF-8 "/$" "\\/\\$" NOMATCH NOESCAPE
1096 +C.UTF-8 "/\\$" "\\/\\$" NOMATCH NOESCAPE
1097 +C.UTF-8 "\\/\\$" "\\/\\$" 0 NOESCAPE
1098 +
1099 +# B.6 033(C)
1100 +C.UTF-8 ".asd" ".*" 0 PERIOD
1101 +C.UTF-8 "/.asd" "*" 0 PERIOD
1102 +C.UTF-8 "/as/.df" "*/?*f" 0 PERIOD
1103 +C.UTF-8 "..asd" ".[!a-z]*" 0 PERIOD
1104 +
1105 +# B.6 034(C)
1106 +C.UTF-8 ".asd" "*" NOMATCH PERIOD
1107 +C.UTF-8 ".asd" "?asd" NOMATCH PERIOD
1108 +C.UTF-8 ".asd" "[!a-z]*" NOMATCH PERIOD
1109 +
1110 +# B.6 035(C)
1111 +C.UTF-8 "/." "/." 0 PATHNAME|PERIOD
1112 +C.UTF-8 "/.a./.b." "/.*/.*" 0 PATHNAME|PERIOD
1113 +C.UTF-8 "/.a./.b." "/.??/.??" 0 PATHNAME|PERIOD
1114 +
1115 +# B.6 036(C)
1116 +C.UTF-8 "/." "*" NOMATCH PATHNAME|PERIOD
1117 +C.UTF-8 "/." "/*" NOMATCH PATHNAME|PERIOD
1118 +C.UTF-8 "/." "/?" NOMATCH PATHNAME|PERIOD
1119 +C.UTF-8 "/." "/[!a-z]" NOMATCH PATHNAME|PERIOD
1120 +C.UTF-8 "/a./.b." "/*/*" NOMATCH PATHNAME|PERIOD
1121 +C.UTF-8 "/a./.b." "/??/???" NOMATCH PATHNAME|PERIOD
1122 +
1123 +# Some home-grown tests.
1124 +C.UTF-8 "foobar" "foo*[abc]z" NOMATCH
1125 +C.UTF-8 "foobaz" "foo*[abc][xyz]" 0
1126 +C.UTF-8 "foobaz" "foo?*[abc][xyz]" 0
1127 +C.UTF-8 "foobaz" "foo?*[abc][x/yz]" 0
1128 +C.UTF-8 "foobaz" "foo?*[abc]/[xyz]" NOMATCH PATHNAME
1129 +C.UTF-8 "a" "a/" NOMATCH PATHNAME
1130 +C.UTF-8 "a/" "a" NOMATCH PATHNAME
1131 +C.UTF-8 "//a" "/a" NOMATCH PATHNAME
1132 +C.UTF-8 "/a" "//a" NOMATCH PATHNAME
1133 +C.UTF-8 "az" "[a-]z" 0
1134 +C.UTF-8 "bz" "[ab-]z" 0
1135 +C.UTF-8 "cz" "[ab-]z" NOMATCH
1136 +C.UTF-8 "-z" "[ab-]z" 0
1137 +C.UTF-8 "az" "[-a]z" 0
1138 +C.UTF-8 "bz" "[-ab]z" 0
1139 +C.UTF-8 "cz" "[-ab]z" NOMATCH
1140 +C.UTF-8 "-z" "[-ab]z" 0
1141 +C.UTF-8 "\\" "[\\\\-a]" 0
1142 +C.UTF-8 "_" "[\\\\-a]" 0
1143 +C.UTF-8 "a" "[\\\\-a]" 0
1144 +C.UTF-8 "-" "[\\\\-a]" NOMATCH
1145 +C.UTF-8 "\\" "[\\]-a]" NOMATCH
1146 +C.UTF-8 "_" "[\\]-a]" 0
1147 +C.UTF-8 "a" "[\\]-a]" 0
1148 +C.UTF-8 "]" "[\\]-a]" 0
1149 +C.UTF-8 "-" "[\\]-a]" NOMATCH
1150 +C.UTF-8 "\\" "[!\\\\-a]" NOMATCH
1151 +C.UTF-8 "_" "[!\\\\-a]" NOMATCH
1152 +C.UTF-8 "a" "[!\\\\-a]" NOMATCH
1153 +C.UTF-8 "-" "[!\\\\-a]" 0
1154 +C.UTF-8 "!" "[\\!-]" 0
1155 +C.UTF-8 "-" "[\\!-]" 0
1156 +C.UTF-8 "\\" "[\\!-]" NOMATCH
1157 +C.UTF-8 "Z" "[Z-\\\\]" 0
1158 +C.UTF-8 "[" "[Z-\\\\]" 0
1159 +C.UTF-8 "\\" "[Z-\\\\]" 0
1160 +C.UTF-8 "-" "[Z-\\\\]" NOMATCH
1161 +C.UTF-8 "Z" "[Z-\\]]" 0
1162 +C.UTF-8 "[" "[Z-\\]]" 0
1163 +C.UTF-8 "\\" "[Z-\\]]" 0
1164 +C.UTF-8 "]" "[Z-\\]]" 0
1165 +C.UTF-8 "-" "[Z-\\]]" NOMATCH
1166 +
1167 # Following are tests outside the scope of IEEE 2003.2 since they are using
1168 # locales other than the C locale. The main focus of the tests is on the
1169 # handling of ranges and the recognition of character (vs bytes).
1170 @@ -677,7 +1068,6 @@ C "x/y" "*" 0 PATHNAME|LEADING_DIR
1171 C "x/y/z" "*" 0 PATHNAME|LEADING_DIR
1172 C "x" "*x" 0 PATHNAME|LEADING_DIR
1173
1174 -en_US.UTF-8 "\366.csv" "*.csv" 0
1175 C "x/y" "*x" 0 PATHNAME|LEADING_DIR
1176 C "x/y/z" "*x" 0 PATHNAME|LEADING_DIR
1177 C "x" "x*" 0 PATHNAME|LEADING_DIR
1178 @@ -693,6 +1083,33 @@ C "x" "x?y" NOMATCH PATHNAME|LEADING_DIR
1179 C "x/y" "x?y" NOMATCH PATHNAME|LEADING_DIR
1180 C "x/y/z" "x?y" NOMATCH PATHNAME|LEADING_DIR
1181
1182 +# Duplicate the "Test of GNU extensions." tests but for C.UTF-8.
1183 +C.UTF-8 "x" "x" 0 PATHNAME|LEADING_DIR
1184 +C.UTF-8 "x/y" "x" 0 PATHNAME|LEADING_DIR
1185 +C.UTF-8 "x/y/z" "x" 0 PATHNAME|LEADING_DIR
1186 +C.UTF-8 "x" "*" 0 PATHNAME|LEADING_DIR
1187 +C.UTF-8 "x/y" "*" 0 PATHNAME|LEADING_DIR
1188 +C.UTF-8 "x/y/z" "*" 0 PATHNAME|LEADING_DIR
1189 +C.UTF-8 "x" "*x" 0 PATHNAME|LEADING_DIR
1190 +
1191 +C.UTF-8 "x/y" "*x" 0 PATHNAME|LEADING_DIR
1192 +C.UTF-8 "x/y/z" "*x" 0 PATHNAME|LEADING_DIR
1193 +C.UTF-8 "x" "x*" 0 PATHNAME|LEADING_DIR
1194 +C.UTF-8 "x/y" "x*" 0 PATHNAME|LEADING_DIR
1195 +C.UTF-8 "x/y/z" "x*" 0 PATHNAME|LEADING_DIR
1196 +C.UTF-8 "x" "a" NOMATCH PATHNAME|LEADING_DIR
1197 +C.UTF-8 "x/y" "a" NOMATCH PATHNAME|LEADING_DIR
1198 +C.UTF-8 "x/y/z" "a" NOMATCH PATHNAME|LEADING_DIR
1199 +C.UTF-8 "x" "x/y" NOMATCH PATHNAME|LEADING_DIR
1200 +C.UTF-8 "x/y" "x/y" 0 PATHNAME|LEADING_DIR
1201 +C.UTF-8 "x/y/z" "x/y" 0 PATHNAME|LEADING_DIR
1202 +C.UTF-8 "x" "x?y" NOMATCH PATHNAME|LEADING_DIR
1203 +C.UTF-8 "x/y" "x?y" NOMATCH PATHNAME|LEADING_DIR
1204 +C.UTF-8 "x/y/z" "x?y" NOMATCH PATHNAME|LEADING_DIR
1205 +
1206 +# Bug 14185
1207 +en_US.UTF-8 "\366.csv" "*.csv" 0
1208 +
1209 # ksh style matching.
1210 C "abcd" "?@(a|b)*@(c)d" 0 EXTMATCH
1211 C "/dev/udp/129.22.8.102/45" "/dev/@(tcp|udp)/*/*" 0 PATHNAME|EXTMATCH
1212 @@ -822,3 +1239,133 @@ C "" "" 0
1213 C "" "" 0 EXTMATCH
1214 C "" "*([abc])" 0 EXTMATCH
1215 C "" "?([abc])" 0 EXTMATCH
1216 +
1217 +# Duplicate the "ksh style matching." for C.UTF-8.
1218 +C.UTF-8 "abcd" "?@(a|b)*@(c)d" 0 EXTMATCH
1219 +C.UTF-8 "/dev/udp/129.22.8.102/45" "/dev/@(tcp|udp)/*/*" 0 PATHNAME|EXTMATCH
1220 +C.UTF-8 "12" "[1-9]*([0-9])" 0 EXTMATCH
1221 +C.UTF-8 "12abc" "[1-9]*([0-9])" NOMATCH EXTMATCH
1222 +C.UTF-8 "1" "[1-9]*([0-9])" 0 EXTMATCH
1223 +C.UTF-8 "07" "+([0-7])" 0 EXTMATCH
1224 +C.UTF-8 "0377" "+([0-7])" 0 EXTMATCH
1225 +C.UTF-8 "09" "+([0-7])" NOMATCH EXTMATCH
1226 +C.UTF-8 "paragraph" "para@(chute|graph)" 0 EXTMATCH
1227 +C.UTF-8 "paramour" "para@(chute|graph)" NOMATCH EXTMATCH
1228 +C.UTF-8 "para991" "para?([345]|99)1" 0 EXTMATCH
1229 +C.UTF-8 "para381" "para?([345]|99)1" NOMATCH EXTMATCH
1230 +C.UTF-8 "paragraph" "para*([0-9])" NOMATCH EXTMATCH
1231 +C.UTF-8 "para" "para*([0-9])" 0 EXTMATCH
1232 +C.UTF-8 "para13829383746592" "para*([0-9])" 0 EXTMATCH
1233 +C.UTF-8 "paragraph" "para+([0-9])" NOMATCH EXTMATCH
1234 +C.UTF-8 "para" "para+([0-9])" NOMATCH EXTMATCH
1235 +C.UTF-8 "para987346523" "para+([0-9])" 0 EXTMATCH
1236 +C.UTF-8 "paragraph" "para!(*.[0-9])" 0 EXTMATCH
1237 +C.UTF-8 "para.38" "para!(*.[0-9])" 0 EXTMATCH
1238 +C.UTF-8 "para.graph" "para!(*.[0-9])" 0 EXTMATCH
1239 +C.UTF-8 "para39" "para!(*.[0-9])" 0 EXTMATCH
1240 +C.UTF-8 "" "*(0|1|3|5|7|9)" 0 EXTMATCH
1241 +C.UTF-8 "137577991" "*(0|1|3|5|7|9)" 0 EXTMATCH
1242 +C.UTF-8 "2468" "*(0|1|3|5|7|9)" NOMATCH EXTMATCH
1243 +C.UTF-8 "1358" "*(0|1|3|5|7|9)" NOMATCH EXTMATCH
1244 +C.UTF-8 "file.c" "*.c?(c)" 0 EXTMATCH
1245 +C.UTF-8 "file.C" "*.c?(c)" NOMATCH EXTMATCH
1246 +C.UTF-8 "file.cc" "*.c?(c)" 0 EXTMATCH
1247 +C.UTF-8 "file.ccc" "*.c?(c)" NOMATCH EXTMATCH
1248 +C.UTF-8 "parse.y" "!(*.c|*.h|Makefile.in|config*|README)" 0 EXTMATCH
1249 +C.UTF-8 "shell.c" "!(*.c|*.h|Makefile.in|config*|README)" NOMATCH EXTMATCH
1250 +C.UTF-8 "Makefile" "!(*.c|*.h|Makefile.in|config*|README)" 0 EXTMATCH
1251 +C.UTF-8 "VMS.FILE;1" "*\;[1-9]*([0-9])" 0 EXTMATCH
1252 +C.UTF-8 "VMS.FILE;0" "*\;[1-9]*([0-9])" NOMATCH EXTMATCH
1253 +C.UTF-8 "VMS.FILE;" "*\;[1-9]*([0-9])" NOMATCH EXTMATCH
1254 +C.UTF-8 "VMS.FILE;139" "*\;[1-9]*([0-9])" 0 EXTMATCH
1255 +C.UTF-8 "VMS.FILE;1N" "*\;[1-9]*([0-9])" NOMATCH EXTMATCH
1256 +C.UTF-8 "abcfefg" "ab**(e|f)" 0 EXTMATCH
1257 +C.UTF-8 "abcfefg" "ab**(e|f)g" 0 EXTMATCH
1258 +C.UTF-8 "ab" "ab*+(e|f)" NOMATCH EXTMATCH
1259 +C.UTF-8 "abef" "ab***ef" 0 EXTMATCH
1260 +C.UTF-8 "abef" "ab**" 0 EXTMATCH
1261 +C.UTF-8 "fofo" "*(f*(o))" 0 EXTMATCH
1262 +C.UTF-8 "ffo" "*(f*(o))" 0 EXTMATCH
1263 +C.UTF-8 "foooofo" "*(f*(o))" 0 EXTMATCH
1264 +C.UTF-8 "foooofof" "*(f*(o))" 0 EXTMATCH
1265 +C.UTF-8 "fooofoofofooo" "*(f*(o))" 0 EXTMATCH
1266 +C.UTF-8 "foooofof" "*(f+(o))" NOMATCH EXTMATCH
1267 +C.UTF-8 "xfoooofof" "*(f*(o))" NOMATCH EXTMATCH
1268 +C.UTF-8 "foooofofx" "*(f*(o))" NOMATCH EXTMATCH
1269 +C.UTF-8 "ofxoofxo" "*(*(of*(o)x)o)" 0 EXTMATCH
1270 +C.UTF-8 "ofooofoofofooo" "*(f*(o))" NOMATCH EXTMATCH
1271 +C.UTF-8 "foooxfooxfoxfooox" "*(f*(o)x)" 0 EXTMATCH
1272 +C.UTF-8 "foooxfooxofoxfooox" "*(f*(o)x)" NOMATCH EXTMATCH
1273 +C.UTF-8 "foooxfooxfxfooox" "*(f*(o)x)" 0 EXTMATCH
1274 +C.UTF-8 "ofxoofxo" "*(*(of*(o)x)o)" 0 EXTMATCH
1275 +C.UTF-8 "ofoooxoofxo" "*(*(of*(o)x)o)" 0 EXTMATCH
1276 +C.UTF-8 "ofoooxoofxoofoooxoofxo" "*(*(of*(o)x)o)" 0 EXTMATCH
1277 +C.UTF-8 "ofoooxoofxoofoooxoofxoo" "*(*(of*(o)x)o)" 0 EXTMATCH
1278 +C.UTF-8 "ofoooxoofxoofoooxoofxofo" "*(*(of*(o)x)o)" NOMATCH EXTMATCH
1279 +C.UTF-8 "ofoooxoofxoofoooxoofxooofxofxo" "*(*(of*(o)x)o)" 0 EXTMATCH
1280 +C.UTF-8 "aac" "*(@(a))a@(c)" 0 EXTMATCH
1281 +C.UTF-8 "ac" "*(@(a))a@(c)" 0 EXTMATCH
1282 +C.UTF-8 "c" "*(@(a))a@(c)" NOMATCH EXTMATCH
1283 +C.UTF-8 "aaac" "*(@(a))a@(c)" 0 EXTMATCH
1284 +C.UTF-8 "baaac" "*(@(a))a@(c)" NOMATCH EXTMATCH
1285 +C.UTF-8 "abcd" "?@(a|b)*@(c)d" 0 EXTMATCH
1286 +C.UTF-8 "abcd" "@(ab|a*@(b))*(c)d" 0 EXTMATCH
1287 +C.UTF-8 "acd" "@(ab|a*(b))*(c)d" 0 EXTMATCH
1288 +C.UTF-8 "abbcd" "@(ab|a*(b))*(c)d" 0 EXTMATCH
1289 +C.UTF-8 "effgz" "@(b+(c)d|e*(f)g?|?(h)i@(j|k))" 0 EXTMATCH
1290 +C.UTF-8 "efgz" "@(b+(c)d|e*(f)g?|?(h)i@(j|k))" 0 EXTMATCH
1291 +C.UTF-8 "egz" "@(b+(c)d|e*(f)g?|?(h)i@(j|k))" 0 EXTMATCH
1292 +C.UTF-8 "egzefffgzbcdij" "*(b+(c)d|e*(f)g?|?(h)i@(j|k))" 0 EXTMATCH
1293 +C.UTF-8 "egz" "@(b+(c)d|e+(f)g?|?(h)i@(j|k))" NOMATCH EXTMATCH
1294 +C.UTF-8 "ofoofo" "*(of+(o))" 0 EXTMATCH
1295 +C.UTF-8 "oxfoxoxfox" "*(oxf+(ox))" 0 EXTMATCH
1296 +C.UTF-8 "oxfoxfox" "*(oxf+(ox))" NOMATCH EXTMATCH
1297 +C.UTF-8 "ofoofo" "*(of+(o)|f)" 0 EXTMATCH
1298 +C.UTF-8 "foofoofo" "@(foo|f|fo)*(f|of+(o))" 0 EXTMATCH
1299 +C.UTF-8 "oofooofo" "*(of|oof+(o))" 0 EXTMATCH
1300 +C.UTF-8 "fffooofoooooffoofffooofff" "*(*(f)*(o))" 0 EXTMATCH
1301 +C.UTF-8 "fofoofoofofoo" "*(fo|foo)" 0 EXTMATCH
1302 +C.UTF-8 "foo" "!(x)" 0 EXTMATCH
1303 +C.UTF-8 "foo" "!(x)*" 0 EXTMATCH
1304 +C.UTF-8 "foo" "!(foo)" NOMATCH EXTMATCH
1305 +C.UTF-8 "foo" "!(foo)*" 0 EXTMATCH
1306 +C.UTF-8 "foobar" "!(foo)" 0 EXTMATCH
1307 +C.UTF-8 "foobar" "!(foo)*" 0 EXTMATCH
1308 +C.UTF-8 "moo.cow" "!(*.*).!(*.*)" 0 EXTMATCH
1309 +C.UTF-8 "mad.moo.cow" "!(*.*).!(*.*)" NOMATCH EXTMATCH
1310 +C.UTF-8 "mucca.pazza" "mu!(*(c))?.pa!(*(z))?" NOMATCH EXTMATCH
1311 +C.UTF-8 "fff" "!(f)" 0 EXTMATCH
1312 +C.UTF-8 "fff" "*(!(f))" 0 EXTMATCH
1313 +C.UTF-8 "fff" "+(!(f))" 0 EXTMATCH
1314 +C.UTF-8 "ooo" "!(f)" 0 EXTMATCH
1315 +C.UTF-8 "ooo" "*(!(f))" 0 EXTMATCH
1316 +C.UTF-8 "ooo" "+(!(f))" 0 EXTMATCH
1317 +C.UTF-8 "foo" "!(f)" 0 EXTMATCH
1318 +C.UTF-8 "foo" "*(!(f))" 0 EXTMATCH
1319 +C.UTF-8 "foo" "+(!(f))" 0 EXTMATCH
1320 +C.UTF-8 "f" "!(f)" NOMATCH EXTMATCH
1321 +C.UTF-8 "f" "*(!(f))" NOMATCH EXTMATCH
1322 +C.UTF-8 "f" "+(!(f))" NOMATCH EXTMATCH
1323 +C.UTF-8 "foot" "@(!(z*)|*x)" 0 EXTMATCH
1324 +C.UTF-8 "zoot" "@(!(z*)|*x)" NOMATCH EXTMATCH
1325 +C.UTF-8 "foox" "@(!(z*)|*x)" 0 EXTMATCH
1326 +C.UTF-8 "zoox" "@(!(z*)|*x)" 0 EXTMATCH
1327 +C.UTF-8 "foo" "*(!(foo))" 0 EXTMATCH
1328 +C.UTF-8 "foob" "!(foo)b*" NOMATCH EXTMATCH
1329 +C.UTF-8 "foobb" "!(foo)b*" 0 EXTMATCH
1330 +C.UTF-8 "[" "*([a[])" 0 EXTMATCH
1331 +C.UTF-8 "]" "*([]a[])" 0 EXTMATCH
1332 +C.UTF-8 "a" "*([]a[])" 0 EXTMATCH
1333 +C.UTF-8 "b" "*([!]a[])" 0 EXTMATCH
1334 +C.UTF-8 "[" "*([!]a[]|[[])" 0 EXTMATCH
1335 +C.UTF-8 "]" "*([!]a[]|[]])" 0 EXTMATCH
1336 +C.UTF-8 "[" "!([!]a[])" 0 EXTMATCH
1337 +C.UTF-8 "]" "!([!]a[])" 0 EXTMATCH
1338 +C.UTF-8 ")" "*([)])" 0 EXTMATCH
1339 +C.UTF-8 "*" "*([*(])" 0 EXTMATCH
1340 +C.UTF-8 "abcd" "*!(|a)cd" 0 EXTMATCH
1341 +C.UTF-8 "ab/.a" "+([abc])/*" NOMATCH EXTMATCH|PATHNAME|PERIOD
1342 +C.UTF-8 "" "" 0
1343 +C.UTF-8 "" "" 0 EXTMATCH
1344 +C.UTF-8 "" "*([abc])" 0 EXTMATCH
1345 +C.UTF-8 "" "?([abc])" 0 EXTMATCH
1346 diff --git a/posix/tst-regcomp-truncated.c b/posix/tst-regcomp-truncated.c
1347 index 84195fcd2ec153b8..da3f97799e37c607 100644
1348 --- a/posix/tst-regcomp-truncated.c
1349 +++ b/posix/tst-regcomp-truncated.c
1350 @@ -37,6 +37,7 @@
1351 static const char locales[][17] =
1352 {
1353 "C",
1354 + "C.UTF-8",
1355 "en_US.UTF-8",
1356 "de_DE.ISO-8859-1",
1357 };
1358 diff --git a/posix/tst-regex.c b/posix/tst-regex.c
1359 index e7c2b05e8666a16e..531128de2a9176fa 100644
1360 --- a/posix/tst-regex.c
1361 +++ b/posix/tst-regex.c
1362 @@ -32,6 +32,7 @@
1363 #include <sys/stat.h>
1364 #include <sys/types.h>
1365 #include <regex.h>
1366 +#include <support/support.h>
1367
1368
1369 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
1370 @@ -58,7 +59,7 @@ do_test (void)
1371 const char *file;
1372 int fd;
1373 struct stat st;
1374 - int result;
1375 + int result = 0;
1376 char *inmem;
1377 char *outmem;
1378 size_t inlen;
1379 @@ -123,7 +124,7 @@ do_test (void)
1380
1381 /* Run the actual tests. All tests are run in a single-byte and a
1382 multi-byte locale. */
1383 - result = test_expr ("[äáàâéèêíìîñöóòôüúùû]", 4, 4);
1384 + result |= test_expr ("[äáàâéèêíìîñöóòôüúùû]", 4, 4);
1385 result |= test_expr ("G.ran", 2, 3);
1386 result |= test_expr ("G.\\{1\\}ran", 2, 3);
1387 result |= test_expr ("G.*ran", 3, 44);
1388 @@ -143,19 +144,33 @@ do_test (void)
1389 static int
1390 test_expr (const char *expr, int expected, int expectedicase)
1391 {
1392 - int result;
1393 + int result = 0;
1394 char *inmem;
1395 char *outmem;
1396 size_t inlen;
1397 size_t outlen;
1398 char *uexpr;
1399
1400 - /* First test: search with an UTF-8 locale. */
1401 - if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
1402 - error (EXIT_FAILURE, 0, "cannot set locale de_DE.UTF-8");
1403 + /* First test: search with basic C.UTF-8 locale. */
1404 + printf ("INFO: Testing C.UTF-8.\n");
1405 + xsetlocale (LC_ALL, "C.UTF-8");
1406
1407 printf ("\nTest \"%s\" with multi-byte locale\n", expr);
1408 - result = run_test (expr, mem, memlen, 0, expected);
1409 + result |= run_test (expr, mem, memlen, 0, expected);
1410 + printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr);
1411 + result |= run_test (expr, mem, memlen, 1, expectedicase);
1412 + printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr);
1413 + result |= run_test_backwards (expr, mem, memlen, 0, expected);
1414 + printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n",
1415 + expr);
1416 + result |= run_test_backwards (expr, mem, memlen, 1, expectedicase);
1417 +
1418 + /* Second test: search with an UTF-8 locale. */
1419 + printf ("INFO: Testing de_DE.UTF-8.\n");
1420 + xsetlocale (LC_ALL, "de_DE.UTF-8");
1421 +
1422 + printf ("\nTest \"%s\" with multi-byte locale\n", expr);
1423 + result |= run_test (expr, mem, memlen, 0, expected);
1424 printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr);
1425 result |= run_test (expr, mem, memlen, 1, expectedicase);
1426 printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr);
1427 @@ -165,8 +180,8 @@ test_expr (const char *expr, int expected, int expectedicase)
1428 result |= run_test_backwards (expr, mem, memlen, 1, expectedicase);
1429
1430 /* Second test: search with an ISO-8859-1 locale. */
1431 - if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL)
1432 - error (EXIT_FAILURE, 0, "cannot set locale de_DE.ISO-8859-1");
1433 + printf ("INFO: Testing de_DE.ISO-8859-1.\n");
1434 + xsetlocale (LC_ALL, "de_DE.ISO-8859-1");
1435
1436 inmem = (char *) expr;
1437 inlen = strlen (expr);
|