summaryrefslogtreecommitdiff
path: root/glibc
diff options
context:
space:
mode:
authorJuergen Daubert <jue@jue.li>2022-02-09 11:53:46 +0100
committerJuergen Daubert <jue@jue.li>2022-02-09 11:53:46 +0100
commit701f83bc54d5b4f1919603381694362c57bb001c (patch)
treeed268ed4d725e1738462e91e0fbc597cea953cd8 /glibc
parente4c63b30d891690fea7afab7df959aaa80f5820b (diff)
downloadcore-701f83bc54d5b4f1919603381694362c57bb001c.tar.gz
core-701f83bc54d5b4f1919603381694362c57bb001c.tar.xz
glibc: sync with upstream 2.23 branch
Diffstat (limited to 'glibc')
-rw-r--r--glibc/.signature6
-rw-r--r--glibc/Pkgfile6
-rw-r--r--glibc/glibc-2.32-6.patch6671
-rw-r--r--glibc/glibc-2.32-7.patch17967
4 files changed, 17973 insertions, 6677 deletions
diff --git a/glibc/.signature b/glibc/.signature
index e5d5a599..dbf06195 100644
--- a/glibc/.signature
+++ b/glibc/.signature
@@ -1,10 +1,10 @@
untrusted comment: verify with /etc/ports/core.pub
-RWRJc1FUaeVeqk2uwhHr5qBGXvnIwjOlb4TZ9ezJT2bi3Z+8Ym4D2uHdm5+VLuIIRBBmUYjvoKwBTb1m7j6tV9D+0QxRS+wJeA0=
-SHA256 (Pkgfile) = d319ce6af6100d9570cdff2c31542c6404ffb7461283de1ae4d098d0853d5d5a
+RWRJc1FUaeVeqjtuBDt9yWal+iJYK0LWLJghiZ1hQdV2j6Ll57N+c9XCp1EGfeMSTdC+Gvnan8BuI2jIRtKjih5DTvd+TEJDrw8=
+SHA256 (Pkgfile) = bf6a5730d314fecf8a04f0029193a9c64eafabde1e8fc77a0860076d7eb37d7b
SHA256 (.footprint) = aa29daaba0d990bb954964b7605cf132588bcb5ee9cf56d219f2ed26e60eba7b
SHA256 (glibc-2.32.tar.xz) = 1627ea54f5a1a8467032563393e0901077626dc66f37f10ee6363bb722222836
SHA256 (linux-5.4.72.tar.xz) = 0e24645bd56fe5b55a7a662895f5562c103d71b54d097281f0c9c71ff22c1172
-SHA256 (glibc-2.32-6.patch) = 70f33f14f62cb2daddd9bbfe0ffdfb3bb01880f7cedbc71cb534e82343d4a3d4
+SHA256 (glibc-2.32-7.patch) = 765880b63e386bfab3898bbc273dbc683f737d6327289dcf968da2fe807575e4
SHA256 (hosts) = 5c02b256c105f1d4a12fb738d71c1bab9eb126533074d7a0c8a14b92670c9431
SHA256 (resolv.conf) = 72ccb58768a72a771ec37142bc361a18478a07ec9de6e925a20760794389bf51
SHA256 (nsswitch.conf) = 859b8984e5e90aff3cce8f9779996ae4033b280d2122840e9411e2f44a1c2e61
diff --git a/glibc/Pkgfile b/glibc/Pkgfile
index 3c179148..a761b676 100644
--- a/glibc/Pkgfile
+++ b/glibc/Pkgfile
@@ -4,10 +4,10 @@
name=glibc
version=2.32
-release=6
+release=7
source=(https://ftp.gnu.org/gnu/glibc/glibc-$version.tar.xz \
https://www.kernel.org/pub/linux/kernel/v5.x/linux-5.4.72.tar.xz \
- glibc-$version-6.patch \
+ glibc-$version-7.patch \
hosts resolv.conf nsswitch.conf host.conf ld.so.conf)
build() {
@@ -18,7 +18,7 @@ build() {
make INSTALL_HDR_PATH=$PKG/usr headers_install
chown root:root $PKG/usr
- patch -p1 -d $SRC/$name-${version:0:4} -i $SRC/$name-$version-6.patch
+ patch -p1 -d $SRC/$name-${version:0:4} -i $SRC/$name-$version-7.patch
mkdir $SRC/build
cd $SRC/build
diff --git a/glibc/glibc-2.32-6.patch b/glibc/glibc-2.32-6.patch
deleted file mode 100644
index 5d4a58d2..00000000
--- a/glibc/glibc-2.32-6.patch
+++ /dev/null
@@ -1,6671 +0,0 @@
-diff --git a/NEWS b/NEWS
-index 485b8ddffa..b29826f4f5 100644
---- a/NEWS
-+++ b/NEWS
-@@ -5,6 +5,27 @@ See the end for copying conditions.
- Please send GNU C library bug reports via <https://sourceware.org/bugzilla/>
- using `glibc' in the "product" field.
-
-+The following bugs are resolved with this release:
-+
-+ [20019] NULL pointer dereference in libc.so.6 IFUNC due to uninitialized GOT
-+ [26224] iconv hangs when converting some invalid inputs from several IBM
-+ character sets (CVE-2020-27618)
-+ [26534] libm.so 2.32 SIGILL in pow() due to FMA4 instruction on non-FMA4
-+ system
-+ [26555] string: strerrorname_np does not return the documented value
-+ [26600] Transaction ID collisions cause slow DNS lookups in getaddrinfo
-+ [26636] libc: 32-bit shmctl(IPC_INFO) crashes when shminfo struct is
-+ at the end of a memory mapping
-+ [26637] libc: semctl SEM_STAT_ANY fails to pass the buffer specified
-+ by the caller to the kernel
-+ [26639] libc: msgctl IPC_INFO and MSG_INFO return garbage
-+ [26853] aarch64: Missing unwind information in statically linked startup code
-+ [26932] libc: sh: Multiple floating point functions defined as stubs only
-+ [27130] "rep movsb" performance issue
-+ [27177] GLIBC_TUNABLES=glibc.cpu.x86_ibt=on:glibc.cpu.x86_shstk=on doesn't work
-+ [28524] Conversion from ISO-2022-JP-3 with iconv may emit spurious NULs
-+ [28607] Masked signals are delivered on thread exit
-+
- Version 2.32
-
- Major new features:
-@@ -185,6 +206,14 @@ Security related changes:
- Dytrych of the Cisco Security Assessment and Penetration Team (See
- TALOS-2020-1019).
-
-+ CVE-2020-27618: An infinite loop has been fixed in the iconv program when
-+ invoked with input containing redundant shift sequences in the IBM1364,
-+ IBM1371, IBM1388, IBM1390, or IBM1399 character sets.
-+
-+ CVE-2021-33574: The mq_notify function has a potential use-after-free
-+ issue when using a notification type of SIGEV_THREAD and a thread
-+ attribute with a non-default affinity mask.
-+
- The following bugs are resolved with this release:
-
- [9809] localedata: ckb_IQ: new Kurdish Sorani locale
-diff --git a/Rules b/Rules
-index 8b771f6095..beab969fde 100644
---- a/Rules
-+++ b/Rules
-@@ -155,6 +155,7 @@ xtests: tests $(xtests-special)
- else
- tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \
- $(tests-container:%=$(objpfx)%.out) \
-+ $(tests-mcheck:%=$(objpfx)%-mcheck.out) \
- $(tests-special) $(tests-printers-out)
- xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special)
- endif
-@@ -165,7 +166,7 @@ ifeq ($(run-built-tests),no)
- tests-expected =
- else
- tests-expected = $(tests) $(tests-internal) $(tests-printers) \
-- $(tests-container)
-+ $(tests-container) $(tests-mcheck:%=%-mcheck)
- endif
- tests:
- $(..)scripts/merge-test-results.sh -s $(objpfx) $(subdir) \
-@@ -191,6 +192,7 @@ else
- binaries-pie-tests =
- binaries-pie-notests =
- endif
-+binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck)
- else
- binaries-all-notests =
- binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs)
-@@ -200,6 +202,7 @@ binaries-static-tests =
- binaries-static =
- binaries-pie-tests =
- binaries-pie-notests =
-+binaries-mcheck-tests =
- endif
-
- binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests)
-@@ -223,6 +226,14 @@ $(addprefix $(objpfx),$(binaries-shared-tests)): %: %.o \
- $(+link-tests)
- endif
-
-+ifneq "$(strip $(binaries-mcheck-tests))" ""
-+$(addprefix $(objpfx),$(binaries-mcheck-tests)): %-mcheck: %.o \
-+ $(link-extra-libs-tests) \
-+ $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
-+ $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
-+ $(+link-tests)
-+endif
-+
- ifneq "$(strip $(binaries-pie-tests))" ""
- $(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \
- $(link-extra-libs-tests) \
-@@ -253,6 +264,12 @@ $(addprefix $(objpfx),$(binaries-static-tests)): %: %.o \
- $(+link-static-tests)
- endif
-
-+# All mcheck tests will be run with MALLOC_CHECK_=3
-+define mcheck-ENVS
-+$(1)-mcheck-ENV = MALLOC_CHECK_=3
-+endef
-+$(foreach t,$(tests-mcheck),$(eval $(call mcheck-ENVS,$(t))))
-+
- ifneq "$(strip $(tests) $(tests-internal) $(xtests) $(test-srcs))" ""
- # These are the implicit rules for making test outputs
- # from the test programs and whatever input files are present.
-diff --git a/debug/Makefile b/debug/Makefile
-index 3a60d7af7a..0036edd187 100644
---- a/debug/Makefile
-+++ b/debug/Makefile
-@@ -51,7 +51,7 @@ routines = backtrace backtracesyms backtracesymsfd noophooks \
- explicit_bzero_chk \
- stack_chk_fail fortify_fail \
- $(static-only-routines)
--static-only-routines := warning-nop stack_chk_fail_local
-+static-only-routines := stack_chk_fail_local
-
- # Don't add stack_chk_fail_local.o to libc.a since __stack_chk_fail_local
- # is an alias of __stack_chk_fail in stack_chk_fail.o.
-diff --git a/debug/warning-nop.c b/debug/warning-nop.c
-deleted file mode 100644
-index 4ab7e182b7..0000000000
---- a/debug/warning-nop.c
-+++ /dev/null
-@@ -1,70 +0,0 @@
--/* Dummy nop functions to elicit link-time warnings.
-- Copyright (C) 2005-2020 Free Software Foundation, Inc.
-- This file is part of the GNU C Library.
--
-- The GNU C Library is free software; you can redistribute it and/or
-- modify it under the terms of the GNU Lesser General Public
-- License as published by the Free Software Foundation; either
-- version 2.1 of the License, or (at your option) any later version.
--
-- In addition to the permissions in the GNU Lesser General Public
-- License, the Free Software Foundation gives you unlimited
-- permission to link the compiled version of this file with other
-- programs, and to distribute those programs without any restriction
-- coming from the use of this file. (The GNU Lesser General Public
-- License restrictions do apply in other respects; for example, they
-- cover modification of the file, and distribution when not linked
-- into another program.)
--
-- Note that people who make modified versions of this file are not
-- obligated to grant this special exception for their modified
-- versions; it is their choice whether to do so. The GNU Lesser
-- General Public License gives permission to release a modified
-- version without this exception; this exception also makes it
-- possible to release a modified version which carries forward this
-- exception.
--
-- The GNU C Library is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-- Lesser General Public License for more details.
--
-- You should have received a copy of the GNU Lesser General Public
-- License along with the GNU C Library; if not, see
-- <https://www.gnu.org/licenses/>. */
--
--#include <sys/cdefs.h>
--
--static void
--__attribute__ ((used))
--nop (void)
--{
--}
--
--/* Don't insert any other #include's before this #undef! */
--
--#undef __warndecl
--#define __warndecl(name, msg) \
-- extern void name (void) __attribute__ ((alias ("nop"))) attribute_hidden; \
-- link_warning (name, msg)
--
--#undef __USE_FORTIFY_LEVEL
--#define __USE_FORTIFY_LEVEL 99
--
--/* Following here we need an #include for each public header file
-- that uses __warndecl. */
--
--/* Define away to avoid warnings with compilers that do not have these
-- builtins. */
--#define __builtin___memcpy_chk(dest, src, len, bos) NULL
--#define __builtin___memmove_chk(dest, src, len, bos) NULL
--#define __builtin___mempcpy_chk(dest, src, len, bos) NULL
--#define __builtin___memset_chk(dest, ch, len, bos) NULL
--#define __builtin___stpcpy_chk(dest, src, bos) NULL
--#define __builtin___strcat_chk(dest, src, bos) NULL
--#define __builtin___strcpy_chk(dest, src, bos) NULL
--#define __builtin___strncat_chk(dest, src, len, bos) NULL
--#define __builtin___strncpy_chk(dest, src, len, bos) NULL
--#define __builtin_object_size(bos, level) 0
--
--#include <string.h>
-diff --git a/elf/Makefile b/elf/Makefile
-index 0b78721848..3ba7f4ecfc 100644
---- a/elf/Makefile
-+++ b/elf/Makefile
-@@ -1381,6 +1381,8 @@ CFLAGS-ifuncmain7pie.c += $(pie-ccflag)
- CFLAGS-ifuncmain9pie.c += $(pie-ccflag)
- CFLAGS-tst-ifunc-textrel.c += $(pic-ccflag)
-
-+LDFLAGS-ifuncmain6pie = -Wl,-z,lazy
-+
- $(objpfx)ifuncmain1pie: $(objpfx)ifuncmod1.so
- $(objpfx)ifuncmain1staticpie: $(objpfx)ifuncdep1pic.o
- $(objpfx)ifuncmain1vispie: $(objpfx)ifuncmod1.so
-@@ -1630,8 +1632,6 @@ $(objpfx)tst-nodelete-dlclose.out: $(objpfx)tst-nodelete-dlclose-dso.so \
-
- tst-env-setuid-ENV = MALLOC_CHECK_=2 MALLOC_MMAP_THRESHOLD_=4096 \
- LD_HWCAP_MASK=0x1
--tst-env-setuid-tunables-ENV = \
-- GLIBC_TUNABLES=glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096
-
- $(objpfx)tst-debug1: $(libdl)
- $(objpfx)tst-debug1.out: $(objpfx)tst-debug1mod1.so
-diff --git a/elf/dl-load.c b/elf/dl-load.c
-index e39980fb19..71867e7c1a 100644
---- a/elf/dl-load.c
-+++ b/elf/dl-load.c
-@@ -855,10 +855,12 @@ lose (int code, int fd, const char *name, char *realname, struct link_map *l,
-
- /* Process PT_GNU_PROPERTY program header PH in module L after
- PT_LOAD segments are mapped. Only one NT_GNU_PROPERTY_TYPE_0
-- note is handled which contains processor specific properties. */
-+ note is handled which contains processor specific properties.
-+ FD is -1 for the kernel mapped main executable otherwise it is
-+ the fd used for loading module L. */
-
- void
--_dl_process_pt_gnu_property (struct link_map *l, const ElfW(Phdr) *ph)
-+_dl_process_pt_gnu_property (struct link_map *l, int fd, const ElfW(Phdr) *ph)
- {
- const ElfW(Nhdr) *note = (const void *) (ph->p_vaddr + l->l_addr);
- const ElfW(Addr) size = ph->p_memsz;
-@@ -905,7 +907,7 @@ _dl_process_pt_gnu_property (struct link_map *l, const ElfW(Phdr) *ph)
- last_type = type;
-
- /* Target specific property processing. */
-- if (_dl_process_gnu_property (l, type, datasz, ptr) == 0)
-+ if (_dl_process_gnu_property (l, fd, type, datasz, ptr) == 0)
- return;
-
- /* Check the next property item. */
-@@ -1251,21 +1253,6 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
- maplength, has_holes, loader);
- if (__glibc_unlikely (errstring != NULL))
- goto call_lose;
--
-- /* Process program headers again after load segments are mapped in
-- case processing requires accessing those segments. Scan program
-- headers backward so that PT_NOTE can be skipped if PT_GNU_PROPERTY
-- exits. */
-- for (ph = &phdr[l->l_phnum]; ph != phdr; --ph)
-- switch (ph[-1].p_type)
-- {
-- case PT_NOTE:
-- _dl_process_pt_note (l, &ph[-1]);
-- break;
-- case PT_GNU_PROPERTY:
-- _dl_process_pt_gnu_property (l, &ph[-1]);
-- break;
-- }
- }
-
- if (l->l_ld == 0)
-@@ -1377,6 +1364,21 @@ cannot enable executable stack as shared object requires");
- if (l->l_tls_initimage != NULL)
- l->l_tls_initimage = (char *) l->l_tls_initimage + l->l_addr;
-
-+ /* Process program headers again after load segments are mapped in
-+ case processing requires accessing those segments. Scan program
-+ headers backward so that PT_NOTE can be skipped if PT_GNU_PROPERTY
-+ exits. */
-+ for (ph = &l->l_phdr[l->l_phnum]; ph != l->l_phdr; --ph)
-+ switch (ph[-1].p_type)
-+ {
-+ case PT_NOTE:
-+ _dl_process_pt_note (l, fd, &ph[-1]);
-+ break;
-+ case PT_GNU_PROPERTY:
-+ _dl_process_pt_gnu_property (l, fd, &ph[-1]);
-+ break;
-+ }
-+
- /* We are done mapping in the file. We no longer need the descriptor. */
- if (__glibc_unlikely (__close_nocancel (fd) != 0))
- {
-diff --git a/elf/dl-open.c b/elf/dl-open.c
-index 8769e47051..55b39e1bbe 100644
---- a/elf/dl-open.c
-+++ b/elf/dl-open.c
-@@ -887,7 +887,7 @@ no more namespaces available for dlmopen()"));
- /* Avoid keeping around a dangling reference to the libc.so link
- map in case it has been cached in libc_map. */
- if (!args.libc_already_loaded)
-- GL(dl_ns)[nsid].libc_map = NULL;
-+ GL(dl_ns)[args.nsid].libc_map = NULL;
-
- /* Remove the object from memory. It may be in an inconsistent
- state if relocation failed, for example. */
-diff --git a/elf/dl-tunables.c b/elf/dl-tunables.c
-index 26e6e26612..15b29bcb90 100644
---- a/elf/dl-tunables.c
-+++ b/elf/dl-tunables.c
-@@ -177,6 +177,7 @@ parse_tunables (char *tunestr, char *valstring)
- return;
-
- char *p = tunestr;
-+ size_t off = 0;
-
- while (true)
- {
-@@ -190,7 +191,11 @@ parse_tunables (char *tunestr, char *valstring)
- /* If we reach the end of the string before getting a valid name-value
- pair, bail out. */
- if (p[len] == '\0')
-- return;
-+ {
-+ if (__libc_enable_secure)
-+ tunestr[off] = '\0';
-+ return;
-+ }
-
- /* We did not find a valid name-value pair before encountering the
- colon. */
-@@ -216,35 +221,28 @@ parse_tunables (char *tunestr, char *valstring)
-
- if (tunable_is_name (cur->name, name))
- {
-- /* If we are in a secure context (AT_SECURE) then ignore the tunable
-- unless it is explicitly marked as secure. Tunable values take
-- precedence over their envvar aliases. */
-+ /* If we are in a secure context (AT_SECURE) then ignore the
-+ tunable unless it is explicitly marked as secure. Tunable
-+ values take precedence over their envvar aliases. We write
-+ the tunables that are not SXID_ERASE back to TUNESTR, thus
-+ dropping all SXID_ERASE tunables and any invalid or
-+ unrecognized tunables. */
- if (__libc_enable_secure)
- {
-- if (cur->security_level == TUNABLE_SECLEVEL_SXID_ERASE)
-+ if (cur->security_level != TUNABLE_SECLEVEL_SXID_ERASE)
- {
-- if (p[len] == '\0')
-- {
-- /* Last tunable in the valstring. Null-terminate and
-- return. */
-- *name = '\0';
-- return;
-- }
-- else
-- {
-- /* Remove the current tunable from the string. We do
-- this by overwriting the string starting from NAME
-- (which is where the current tunable begins) with
-- the remainder of the string. We then have P point
-- to NAME so that we continue in the correct
-- position in the valstring. */
-- char *q = &p[len + 1];
-- p = name;
-- while (*q != '\0')
-- *name++ = *q++;
-- name[0] = '\0';
-- len = 0;
-- }
-+ if (off > 0)
-+ tunestr[off++] = ':';
-+
-+ const char *n = cur->name;
-+
-+ while (*n != '\0')
-+ tunestr[off++] = *n++;
-+
-+ tunestr[off++] = '=';
-+
-+ for (size_t j = 0; j < len; j++)
-+ tunestr[off++] = value[j];
- }
-
- if (cur->security_level != TUNABLE_SECLEVEL_NONE)
-@@ -257,9 +255,7 @@ parse_tunables (char *tunestr, char *valstring)
- }
- }
-
-- if (p[len] == '\0')
-- return;
-- else
-+ if (p[len] != '\0')
- p += len + 1;
- }
- }
-diff --git a/elf/ifuncmain6pie.c b/elf/ifuncmain6pie.c
-index 04faeb86ef..4a01906836 100644
---- a/elf/ifuncmain6pie.c
-+++ b/elf/ifuncmain6pie.c
-@@ -9,7 +9,6 @@
- #include "ifunc-sel.h"
-
- typedef int (*foo_p) (void);
--extern foo_p foo_ptr;
-
- static int
- one (void)
-@@ -28,20 +27,17 @@ foo_ifunc (void)
- }
-
- extern int foo (void);
--extern foo_p get_foo (void);
-+extern int call_foo (void);
- extern foo_p get_foo_p (void);
-
--foo_p my_foo_ptr = foo;
-+foo_p foo_ptr = foo;
-
- int
- main (void)
- {
- foo_p p;
-
-- p = get_foo ();
-- if (p != foo)
-- abort ();
-- if ((*p) () != -30)
-+ if (call_foo () != -30)
- abort ();
-
- p = get_foo_p ();
-@@ -52,12 +48,8 @@ main (void)
-
- if (foo_ptr != foo)
- abort ();
-- if (my_foo_ptr != foo)
-- abort ();
- if ((*foo_ptr) () != -30)
- abort ();
-- if ((*my_foo_ptr) () != -30)
-- abort ();
- if (foo () != -30)
- abort ();
-
-diff --git a/elf/ifuncmod6.c b/elf/ifuncmod6.c
-index 2e16c1d06d..2f6d0715e6 100644
---- a/elf/ifuncmod6.c
-+++ b/elf/ifuncmod6.c
-@@ -4,7 +4,7 @@ extern int foo (void);
-
- typedef int (*foo_p) (void);
-
--foo_p foo_ptr = foo;
-+extern foo_p foo_ptr;
-
- foo_p
- get_foo_p (void)
-@@ -12,8 +12,8 @@ get_foo_p (void)
- return foo_ptr;
- }
-
--foo_p
--get_foo (void)
-+int
-+call_foo (void)
- {
-- return foo;
-+ return foo ();
- }
-diff --git a/elf/rtld.c b/elf/rtld.c
-index 5b882163fa..14a42ed00a 100644
---- a/elf/rtld.c
-+++ b/elf/rtld.c
-@@ -1534,10 +1534,10 @@ of this helper program; chances are you did not intend to run this program.\n\
- switch (ph[-1].p_type)
- {
- case PT_NOTE:
-- _dl_process_pt_note (main_map, &ph[-1]);
-+ _dl_process_pt_note (main_map, -1, &ph[-1]);
- break;
- case PT_GNU_PROPERTY:
-- _dl_process_pt_gnu_property (main_map, &ph[-1]);
-+ _dl_process_pt_gnu_property (main_map, -1, &ph[-1]);
- break;
- }
-
-diff --git a/elf/tst-env-setuid-tunables.c b/elf/tst-env-setuid-tunables.c
-index 971d5892b1..ca0c8c245c 100644
---- a/elf/tst-env-setuid-tunables.c
-+++ b/elf/tst-env-setuid-tunables.c
-@@ -25,35 +25,76 @@
- #include "config.h"
- #undef _LIBC
-
--#define test_parent test_parent_tunables
--#define test_child test_child_tunables
--
--static int test_child_tunables (void);
--static int test_parent_tunables (void);
--
--#include "tst-env-setuid.c"
--
--#define CHILD_VALSTRING_VALUE "glibc.malloc.mmap_threshold=4096"
--#define PARENT_VALSTRING_VALUE \
-- "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096"
-+#include <errno.h>
-+#include <fcntl.h>
-+#include <stdlib.h>
-+#include <stdint.h>
-+#include <stdio.h>
-+#include <string.h>
-+#include <sys/stat.h>
-+#include <sys/wait.h>
-+#include <unistd.h>
-+#include <intprops.h>
-+#include <array_length.h>
-+
-+#include <support/check.h>
-+#include <support/support.h>
-+#include <support/test-driver.h>
-+#include <support/capture_subprocess.h>
-+
-+const char *teststrings[] =
-+{
-+ "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096",
-+ "glibc.malloc.check=2:glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096",
-+ "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096:glibc.malloc.check=2",
-+ "glibc.malloc.perturb=0x800",
-+ "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096",
-+ "glibc.malloc.perturb=0x800:not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096",
-+ "glibc.not_valid.check=2:glibc.malloc.mmap_threshold=4096",
-+ "not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096",
-+ "glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096:glibc.malloc.check=2",
-+ "glibc.malloc.check=4:glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096",
-+ ":glibc.malloc.garbage=2:glibc.malloc.check=1",
-+ "glibc.malloc.check=1:glibc.malloc.check=2",
-+ "not_valid.malloc.check=2",
-+ "glibc.not_valid.check=2",
-+};
-+
-+const char *resultstrings[] =
-+{
-+ "glibc.malloc.mmap_threshold=4096",
-+ "glibc.malloc.mmap_threshold=4096",
-+ "glibc.malloc.mmap_threshold=4096",
-+ "glibc.malloc.perturb=0x800",
-+ "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096",
-+ "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096",
-+ "glibc.malloc.mmap_threshold=4096",
-+ "glibc.malloc.mmap_threshold=4096",
-+ "",
-+ "",
-+ "",
-+ "",
-+ "",
-+ "",
-+};
-
- static int
--test_child_tunables (void)
-+test_child (int off)
- {
- const char *val = getenv ("GLIBC_TUNABLES");
-
- #if HAVE_TUNABLES
-- if (val != NULL && strcmp (val, CHILD_VALSTRING_VALUE) == 0)
-+ if (val != NULL && strcmp (val, resultstrings[off]) == 0)
- return 0;
-
- if (val != NULL)
-- printf ("Unexpected GLIBC_TUNABLES VALUE %s\n", val);
-+ printf ("[%d] Unexpected GLIBC_TUNABLES VALUE %s\n", off, val);
-
- return 1;
- #else
- if (val != NULL)
- {
-- printf ("GLIBC_TUNABLES not cleared\n");
-+ printf ("[%d] GLIBC_TUNABLES not cleared\n", off);
- return 1;
- }
- return 0;
-@@ -61,15 +102,48 @@ test_child_tunables (void)
- }
-
- static int
--test_parent_tunables (void)
-+do_test (int argc, char **argv)
- {
-- const char *val = getenv ("GLIBC_TUNABLES");
-+ /* Setgid child process. */
-+ if (argc == 2)
-+ {
-+ if (getgid () == getegid ())
-+ /* This can happen if the file system is mounted nosuid. */
-+ FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n",
-+ (intmax_t) getgid ());
-
-- if (val != NULL && strcmp (val, PARENT_VALSTRING_VALUE) == 0)
-- return 0;
-+ int ret = test_child (atoi (argv[1]));
-
-- if (val != NULL)
-- printf ("Unexpected GLIBC_TUNABLES VALUE %s\n", val);
-+ if (ret != 0)
-+ exit (1);
-
-- return 1;
-+ exit (EXIT_SUCCESS);
-+ }
-+ else
-+ {
-+ int ret = 0;
-+
-+ /* Spawn tests. */
-+ for (int i = 0; i < array_length (teststrings); i++)
-+ {
-+ char buf[INT_BUFSIZE_BOUND (int)];
-+
-+ printf ("Spawned test for %s (%d)\n", teststrings[i], i);
-+ snprintf (buf, sizeof (buf), "%d\n", i);
-+ if (setenv ("GLIBC_TUNABLES", teststrings[i], 1) != 0)
-+ exit (1);
-+
-+ int status = support_capture_subprogram_self_sgid (buf);
-+
-+ /* Bail out early if unsupported. */
-+ if (WEXITSTATUS (status) == EXIT_UNSUPPORTED)
-+ return EXIT_UNSUPPORTED;
-+
-+ ret |= status;
-+ }
-+ return ret;
-+ }
- }
-+
-+#define TEST_FUNCTION_ARGV do_test
-+#include <support/test-driver.c>
-diff --git a/elf/tst-env-setuid.c b/elf/tst-env-setuid.c
-index 41dc79e83a..2dbccdb69e 100644
---- a/elf/tst-env-setuid.c
-+++ b/elf/tst-env-setuid.c
-@@ -29,173 +29,12 @@
- #include <sys/wait.h>
- #include <unistd.h>
-
-+#include <support/check.h>
- #include <support/support.h>
- #include <support/test-driver.h>
-+#include <support/capture_subprocess.h>
-
- static char SETGID_CHILD[] = "setgid-child";
--#define CHILD_STATUS 42
--
--/* Return a GID which is not our current GID, but is present in the
-- supplementary group list. */
--static gid_t
--choose_gid (void)
--{
-- const int count = 64;
-- gid_t groups[count];
-- int ret = getgroups (count, groups);
-- if (ret < 0)
-- {
-- printf ("getgroups: %m\n");
-- exit (1);
-- }
-- gid_t current = getgid ();
-- for (int i = 0; i < ret; ++i)
-- {
-- if (groups[i] != current)
-- return groups[i];
-- }
-- return 0;
--}
--
--/* Spawn and execute a program and verify that it returns the CHILD_STATUS. */
--static pid_t
--do_execve (char **args)
--{
-- pid_t kid = vfork ();
--
-- if (kid < 0)
-- {
-- printf ("vfork: %m\n");
-- return -1;
-- }
--
-- if (kid == 0)
-- {
-- /* Child process. */
-- execve (args[0], args, environ);
-- _exit (-errno);
-- }
--
-- if (kid < 0)
-- return 1;
--
-- int status;
--
-- if (waitpid (kid, &status, 0) < 0)
-- {
-- printf ("waitpid: %m\n");
-- return 1;
-- }
--
-- if (WEXITSTATUS (status) == EXIT_UNSUPPORTED)
-- return EXIT_UNSUPPORTED;
--
-- if (!WIFEXITED (status) || WEXITSTATUS (status) != CHILD_STATUS)
-- {
-- printf ("Unexpected exit status %d from child process\n",
-- WEXITSTATUS (status));
-- return 1;
-- }
-- return 0;
--}
--
--/* Copies the executable into a restricted directory, so that we can
-- safely make it SGID with the TARGET group ID. Then runs the
-- executable. */
--static int
--run_executable_sgid (gid_t target)
--{
-- char *dirname = xasprintf ("%s/tst-tunables-setuid.%jd",
-- test_dir, (intmax_t) getpid ());
-- char *execname = xasprintf ("%s/bin", dirname);
-- int infd = -1;
-- int outfd = -1;
-- int ret = 0;
-- if (mkdir (dirname, 0700) < 0)
-- {
-- printf ("mkdir: %m\n");
-- goto err;
-- }
-- infd = open ("/proc/self/exe", O_RDONLY);
-- if (infd < 0)
-- {
-- printf ("open (/proc/self/exe): %m\n");
-- goto err;
-- }
-- outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700);
-- if (outfd < 0)
-- {
-- printf ("open (%s): %m\n", execname);
-- goto err;
-- }
-- char buf[4096];
-- for (;;)
-- {
-- ssize_t rdcount = read (infd, buf, sizeof (buf));
-- if (rdcount < 0)
-- {
-- printf ("read: %m\n");
-- goto err;
-- }
-- if (rdcount == 0)
-- break;
-- char *p = buf;
-- char *end = buf + rdcount;
-- while (p != end)
-- {
-- ssize_t wrcount = write (outfd, buf, end - p);
-- if (wrcount == 0)
-- errno = ENOSPC;
-- if (wrcount <= 0)
-- {
-- printf ("write: %m\n");
-- goto err;
-- }
-- p += wrcount;
-- }
-- }
-- if (fchown (outfd, getuid (), target) < 0)
-- {
-- printf ("fchown (%s): %m\n", execname);
-- goto err;
-- }
-- if (fchmod (outfd, 02750) < 0)
-- {
-- printf ("fchmod (%s): %m\n", execname);
-- goto err;
-- }
-- if (close (outfd) < 0)
-- {
-- printf ("close (outfd): %m\n");
-- goto err;
-- }
-- if (close (infd) < 0)
-- {
-- printf ("close (infd): %m\n");
-- goto err;
-- }
--
-- char *args[] = {execname, SETGID_CHILD, NULL};
--
-- ret = do_execve (args);
--
--err:
-- if (outfd >= 0)
-- close (outfd);
-- if (infd >= 0)
-- close (infd);
-- if (execname)
-- {
-- unlink (execname);
-- free (execname);
-- }
-- if (dirname)
-- {
-- rmdir (dirname);
-- free (dirname);
-- }
-- return ret;
--}
-
- #ifndef test_child
- static int
-@@ -256,40 +95,32 @@ do_test (int argc, char **argv)
- if (argc == 2 && strcmp (argv[1], SETGID_CHILD) == 0)
- {
- if (getgid () == getegid ())
-- {
-- /* This can happen if the file system is mounted nosuid. */
-- fprintf (stderr, "SGID failed: GID and EGID match (%jd)\n",
-- (intmax_t) getgid ());
-- exit (EXIT_UNSUPPORTED);
-- }
-+ /* This can happen if the file system is mounted nosuid. */
-+ FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n",
-+ (intmax_t) getgid ());
-
- int ret = test_child ();
-
- if (ret != 0)
- exit (1);
-
-- exit (CHILD_STATUS);
-+ exit (EXIT_SUCCESS);
- }
- else
- {
- if (test_parent () != 0)
- exit (1);
-
-- /* Try running a setgid program. */
-- gid_t target = choose_gid ();
-- if (target == 0)
-- {
-- fprintf (stderr,
-- "Could not find a suitable GID for user %jd, skipping test\n",
-- (intmax_t) getuid ());
-- exit (0);
-- }
-+ int status = support_capture_subprogram_self_sgid (SETGID_CHILD);
-
-- return run_executable_sgid (target);
-- }
-+ if (WEXITSTATUS (status) == EXIT_UNSUPPORTED)
-+ return EXIT_UNSUPPORTED;
-+
-+ if (!WIFEXITED (status))
-+ FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status);
-
-- /* Something went wrong and our argv was corrupted. */
-- _exit (1);
-+ return 0;
-+ }
- }
-
- #define TEST_FUNCTION_ARGV do_test
-diff --git a/iconv/Versions b/iconv/Versions
-index 8a5f4cf780..d51af52fa3 100644
---- a/iconv/Versions
-+++ b/iconv/Versions
-@@ -6,7 +6,9 @@ libc {
- GLIBC_PRIVATE {
- # functions shared with iconv program
- __gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db;
-- __gconv_open; __gconv_create_spec;
-+
-+ # functions used elsewhere in glibc
-+ __gconv_open; __gconv_create_spec; __gconv_destroy_spec;
-
- # function used by the gconv modules
- __gconv_transliterate;
-diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c
-index 6ccd0773cc..4ba0aa99f5 100644
---- a/iconv/gconv_charset.c
-+++ b/iconv/gconv_charset.c
-@@ -216,3 +216,13 @@ out:
- return ret;
- }
- libc_hidden_def (__gconv_create_spec)
-+
-+
-+void
-+__gconv_destroy_spec (struct gconv_spec *conv_spec)
-+{
-+ free (conv_spec->fromcode);
-+ free (conv_spec->tocode);
-+ return;
-+}
-+libc_hidden_def (__gconv_destroy_spec)
-diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h
-index b39b09aea1..e9c122cf7e 100644
---- a/iconv/gconv_charset.h
-+++ b/iconv/gconv_charset.h
-@@ -48,33 +48,6 @@
- #define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE"
-
-
--/* This function accepts the charset names of the source and destination of the
-- conversion and populates *conv_spec with an equivalent conversion
-- specification that may later be used by __gconv_open. The charset names
-- might contain options in the form of suffixes that alter the conversion,
-- e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring
-- and truncating any suffix options in fromcode, and processing and truncating
-- any suffix options in tocode. Supported suffix options ("TRANSLIT" or
-- "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
-- to be set to true. Unrecognized suffix options are silently discarded. If
-- the function succeeds, it returns conv_spec back to the caller. It returns
-- NULL upon failure. */
--struct gconv_spec *
--__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
-- const char *tocode);
--libc_hidden_proto (__gconv_create_spec)
--
--
--/* This function frees all heap memory allocated by __gconv_create_spec. */
--static void __attribute__ ((unused))
--gconv_destroy_spec (struct gconv_spec *conv_spec)
--{
-- free (conv_spec->fromcode);
-- free (conv_spec->tocode);
-- return;
--}
--
--
- /* This function copies in-order, characters from the source 's' that are
- either alpha-numeric or one in one of these: "_-.,:/" - into the destination
- 'wp' while dropping all other characters. In the process, it converts all
-diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
-index e86938dae7..f721ce30ff 100644
---- a/iconv/gconv_int.h
-+++ b/iconv/gconv_int.h
-@@ -152,6 +152,27 @@ extern int __gconv_open (struct gconv_spec *conv_spec,
- __gconv_t *handle, int flags);
- libc_hidden_proto (__gconv_open)
-
-+/* This function accepts the charset names of the source and destination of the
-+ conversion and populates *conv_spec with an equivalent conversion
-+ specification that may later be used by __gconv_open. The charset names
-+ might contain options in the form of suffixes that alter the conversion,
-+ e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring
-+ and truncating any suffix options in fromcode, and processing and truncating
-+ any suffix options in tocode. Supported suffix options ("TRANSLIT" or
-+ "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
-+ to be set to true. Unrecognized suffix options are silently discarded. If
-+ the function succeeds, it returns conv_spec back to the caller. It returns
-+ NULL upon failure. */
-+extern struct gconv_spec *
-+__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
-+ const char *tocode);
-+libc_hidden_proto (__gconv_create_spec)
-+
-+/* This function frees all heap memory allocated by __gconv_create_spec. */
-+extern void
-+__gconv_destroy_spec (struct gconv_spec *conv_spec);
-+libc_hidden_proto (__gconv_destroy_spec)
-+
- /* Free resources associated with transformation descriptor CD. */
- extern int __gconv_close (__gconv_t cd)
- attribute_hidden;
-diff --git a/iconv/iconv_open.c b/iconv/iconv_open.c
-index dd54bc12e0..5b30055c04 100644
---- a/iconv/iconv_open.c
-+++ b/iconv/iconv_open.c
-@@ -39,7 +39,7 @@ iconv_open (const char *tocode, const char *fromcode)
-
- int res = __gconv_open (&conv_spec, &cd, 0);
-
-- gconv_destroy_spec (&conv_spec);
-+ __gconv_destroy_spec (&conv_spec);
-
- if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK)
- {
-diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
-index b4334faa57..d59979759c 100644
---- a/iconv/iconv_prog.c
-+++ b/iconv/iconv_prog.c
-@@ -184,7 +184,7 @@ main (int argc, char *argv[])
- /* Let's see whether we have these coded character sets. */
- res = __gconv_open (&conv_spec, &cd, 0);
-
-- gconv_destroy_spec (&conv_spec);
-+ __gconv_destroy_spec (&conv_spec);
-
- if (res != __GCONV_OK)
- {
-diff --git a/iconv/tst-iconv_prog.sh b/iconv/tst-iconv_prog.sh
-index 8298136b7f..d8db7b335c 100644
---- a/iconv/tst-iconv_prog.sh
-+++ b/iconv/tst-iconv_prog.sh
-@@ -102,12 +102,16 @@ hangarray=(
- "\x00\x80;-c;IBM1161;UTF-8//TRANSLIT//IGNORE"
- "\x00\xdb;-c;IBM1162;UTF-8//TRANSLIT//IGNORE"
- "\x00\x70;-c;IBM12712;UTF-8//TRANSLIT//IGNORE"
--# These are known hangs that are yet to be fixed:
--# "\x00\x0f;-c;IBM1364;UTF-8"
--# "\x00\x0f;-c;IBM1371;UTF-8"
--# "\x00\x0f;-c;IBM1388;UTF-8"
--# "\x00\x0f;-c;IBM1390;UTF-8"
--# "\x00\x0f;-c;IBM1399;UTF-8"
-+"\x00\x0f;-c;IBM1364;UTF-8"
-+"\x0e\x0e;-c;IBM1364;UTF-8"
-+"\x00\x0f;-c;IBM1371;UTF-8"
-+"\x0e\x0e;-c;IBM1371;UTF-8"
-+"\x00\x0f;-c;IBM1388;UTF-8"
-+"\x0e\x0e;-c;IBM1388;UTF-8"
-+"\x00\x0f;-c;IBM1390;UTF-8"
-+"\x0e\x0e;-c;IBM1390;UTF-8"
-+"\x00\x0f;-c;IBM1399;UTF-8"
-+"\x0e\x0e;-c;IBM1399;UTF-8"
- "\x00\x53;-c;IBM16804;UTF-8//TRANSLIT//IGNORE"
- "\x00\x41;-c;IBM274;UTF-8//TRANSLIT//IGNORE"
- "\x00\x41;-c;IBM275;UTF-8//TRANSLIT//IGNORE"
-diff --git a/iconvdata/Makefile b/iconvdata/Makefile
-index 4ec2741cdc..b67b4feeb4 100644
---- a/iconvdata/Makefile
-+++ b/iconvdata/Makefile
-@@ -1,4 +1,5 @@
- # Copyright (C) 1997-2020 Free Software Foundation, Inc.
-+# Copyright (C) The GNU Toolchain Authors.
- # This file is part of the GNU C Library.
-
- # The GNU C Library is free software; you can redistribute it and/or
-@@ -73,7 +74,8 @@ modules.so := $(addsuffix .so, $(modules))
- ifeq (yes,$(build-shared))
- tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
- tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
-- bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4
-+ bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \
-+ bug-iconv13 bug-iconv14 bug-iconv15
- ifeq ($(have-thread-library),yes)
- tests += bug-iconv3
- endif
-@@ -321,6 +323,10 @@ $(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \
- $(addprefix $(objpfx),$(modules.so))
- $(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \
- $(addprefix $(objpfx),$(modules.so))
-+$(objpfx)bug-iconv14.out: $(objpfx)gconv-modules \
-+ $(addprefix $(objpfx),$(modules.so))
-+$(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \
-+ $(addprefix $(objpfx),$(modules.so))
-
- $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \
- $(addprefix $(objpfx),$(modules.so)) \
-diff --git a/iconvdata/bug-iconv13.c b/iconvdata/bug-iconv13.c
-new file mode 100644
-index 0000000000..87aaff398e
---- /dev/null
-+++ b/iconvdata/bug-iconv13.c
-@@ -0,0 +1,53 @@
-+/* bug 24973: Test EUC-KR module
-+ Copyright (C) 2020 Free Software Foundation, Inc.
-+ This file is part of the GNU C Library.
-+
-+ The GNU C Library is free software; you can redistribute it and/or
-+ modify it under the terms of the GNU Lesser General Public
-+ License as published by the Free Software Foundation; either
-+ version 2.1 of the License, or (at your option) any later version.
-+
-+ The GNU C Library is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ Lesser General Public License for more details.
-+
-+ You should have received a copy of the GNU Lesser General Public
-+ License along with the GNU C Library; if not, see
-+ <https://www.gnu.org/licenses/>. */
-+
-+#include <errno.h>
-+#include <iconv.h>
-+#include <stdio.h>
-+#include <support/check.h>
-+
-+static int
-+do_test (void)
-+{
-+ iconv_t cd = iconv_open ("UTF-8//IGNORE", "EUC-KR");
-+ TEST_VERIFY_EXIT (cd != (iconv_t) -1);
-+
-+ /* 0xfe (->0x7e : row 94) and 0xc9 (->0x49 : row 41) are user-defined
-+ areas, which are not allowed and should be skipped over due to
-+ //IGNORE. The trailing 0xfe also is an incomplete sequence, which
-+ should be checked first. */
-+ char input[4] = { '\xc9', '\xa1', '\0', '\xfe' };
-+ char *inptr = input;
-+ size_t insize = sizeof (input);
-+ char output[4];
-+ char *outptr = output;
-+ size_t outsize = sizeof (output);
-+
-+ /* This used to crash due to buffer overrun. */
-+ TEST_VERIFY (iconv (cd, &inptr, &insize, &outptr, &outsize) == (size_t) -1);
-+ TEST_VERIFY (errno == EINVAL);
-+ /* The conversion should produce one character, the converted null
-+ character. */
-+ TEST_VERIFY (sizeof (output) - outsize == 1);
-+
-+ TEST_VERIFY_EXIT (iconv_close (cd) != -1);
-+
-+ return 0;
-+}
-+
-+#include <support/test-driver.c>
-diff --git a/iconvdata/bug-iconv14.c b/iconvdata/bug-iconv14.c
-new file mode 100644
-index 0000000000..902f140fa9
---- /dev/null
-+++ b/iconvdata/bug-iconv14.c
-@@ -0,0 +1,127 @@
-+/* Assertion in ISO-2022-JP-3 due to two-character sequence (bug 27256).
-+ Copyright (C) 2021 Free Software Foundation, Inc.
-+ This file is part of the GNU C Library.
-+
-+ The GNU C Library is free software; you can redistribute it and/or
-+ modify it under the terms of the GNU Lesser General Public
-+ License as published by the Free Software Foundation; either
-+ version 2.1 of the License, or (at your option) any later version.
-+
-+ The GNU C Library is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ Lesser General Public License for more details.
-+
-+ You should have received a copy of the GNU Lesser General Public
-+ License along with the GNU C Library; if not, see
-+ <https://www.gnu.org/licenses/>. */
-+
-+#include <iconv.h>
-+#include <string.h>
-+#include <errno.h>
-+#include <support/check.h>
-+
-+/* Use an escape sequence to return to the initial state. */
-+static void
-+with_escape_sequence (void)
-+{
-+ iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
-+ TEST_VERIFY_EXIT (c != (iconv_t) -1);
-+
-+ char in[] = "\e$(O+D\e(B";
-+ char *inbuf = in;
-+ size_t inleft = strlen (in);
-+ char out[3]; /* Space for one output character. */
-+ char *outbuf;
-+ size_t outleft;
-+
-+ outbuf = out;
-+ outleft = sizeof (out);
-+ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
-+ TEST_COMPARE (errno, E2BIG);
-+ TEST_COMPARE (inleft, 3);
-+ TEST_COMPARE (inbuf - in, strlen (in) - 3);
-+ TEST_COMPARE (outleft, sizeof (out) - 2);
-+ TEST_COMPARE (outbuf - out, 2);
-+ TEST_COMPARE (out[0] & 0xff, 0xc3);
-+ TEST_COMPARE (out[1] & 0xff, 0xa6);
-+
-+ /* Return to the initial shift state, producing the pending
-+ character. */
-+ outbuf = out;
-+ outleft = sizeof (out);
-+ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), 0);
-+ TEST_COMPARE (inleft, 0);
-+ TEST_COMPARE (inbuf - in, strlen (in));
-+ TEST_COMPARE (outleft, sizeof (out) - 2);
-+ TEST_COMPARE (outbuf - out, 2);
-+ TEST_COMPARE (out[0] & 0xff, 0xcc);
-+ TEST_COMPARE (out[1] & 0xff, 0x80);
-+
-+ /* Nothing should be flushed the second time. */
-+ outbuf = out;
-+ outleft = sizeof (out);
-+ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
-+ TEST_COMPARE (outleft, sizeof (out));
-+ TEST_COMPARE (outbuf - out, 0);
-+ TEST_COMPARE (out[0] & 0xff, 0xcc);
-+ TEST_COMPARE (out[1] & 0xff, 0x80);
-+
-+ TEST_COMPARE (iconv_close (c), 0);
-+}
-+
-+/* Use an explicit flush to return to the initial state. */
-+static void
-+with_flush (void)
-+{
-+ iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
-+ TEST_VERIFY_EXIT (c != (iconv_t) -1);
-+
-+ char in[] = "\e$(O+D";
-+ char *inbuf = in;
-+ size_t inleft = strlen (in);
-+ char out[3]; /* Space for one output character. */
-+ char *outbuf;
-+ size_t outleft;
-+
-+ outbuf = out;
-+ outleft = sizeof (out);
-+ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
-+ TEST_COMPARE (errno, E2BIG);
-+ TEST_COMPARE (inleft, 0);
-+ TEST_COMPARE (inbuf - in, strlen (in));
-+ TEST_COMPARE (outleft, sizeof (out) - 2);
-+ TEST_COMPARE (outbuf - out, 2);
-+ TEST_COMPARE (out[0] & 0xff, 0xc3);
-+ TEST_COMPARE (out[1] & 0xff, 0xa6);
-+
-+ /* Flush the pending character. */
-+ outbuf = out;
-+ outleft = sizeof (out);
-+ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
-+ TEST_COMPARE (outleft, sizeof (out) - 2);
-+ TEST_COMPARE (outbuf - out, 2);
-+ TEST_COMPARE (out[0] & 0xff, 0xcc);
-+ TEST_COMPARE (out[1] & 0xff, 0x80);
-+
-+ /* Nothing should be flushed the second time. */
-+ outbuf = out;
-+ outleft = sizeof (out);
-+ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
-+ TEST_COMPARE (outleft, sizeof (out));
-+ TEST_COMPARE (outbuf - out, 0);
-+ TEST_COMPARE (out[0] & 0xff, 0xcc);
-+ TEST_COMPARE (out[1] & 0xff, 0x80);
-+
-+ TEST_COMPARE (iconv_close (c), 0);
-+}
-+
-+static int
-+do_test (void)
-+{
-+ with_escape_sequence ();
-+ with_flush ();
-+ return 0;
-+}
-+
-+#include <support/test-driver.c>
-diff --git a/iconvdata/bug-iconv15.c b/iconvdata/bug-iconv15.c
-new file mode 100644
-index 0000000000..cc04bd0313
---- /dev/null
-+++ b/iconvdata/bug-iconv15.c
-@@ -0,0 +1,60 @@
-+/* Bug 28524: Conversion from ISO-2022-JP-3 with iconv
-+ may emit spurious NUL character on state reset.
-+ Copyright (C) The GNU Toolchain Authors.
-+ This file is part of the GNU C Library.
-+
-+ The GNU C Library is free software; you can redistribute it and/or
-+ modify it under the terms of the GNU Lesser General Public
-+ License as published by the Free Software Foundation; either
-+ version 2.1 of the License, or (at your option) any later version.
-+
-+ The GNU C Library is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ Lesser General Public License for more details.
-+
-+ You should have received a copy of the GNU Lesser General Public
-+ License along with the GNU C Library; if not, see
-+ <https://www.gnu.org/licenses/>. */
-+
-+#include <stddef.h>
-+#include <iconv.h>
-+#include <support/check.h>
-+
-+static int
-+do_test (void)
-+{
-+ char in[] = "\x1b(I";
-+ char *inbuf = in;
-+ size_t inleft = sizeof (in) - 1;
-+ char out[1];
-+ char *outbuf = out;
-+ size_t outleft = sizeof (out);
-+ iconv_t cd;
-+
-+ cd = iconv_open ("UTF8", "ISO-2022-JP-3");
-+ TEST_VERIFY_EXIT (cd != (iconv_t) -1);
-+
-+ /* First call to iconv should alter internal state.
-+ Now, JISX0201_Kana_set is selected and
-+ state value != ASCII_set. */
-+ TEST_VERIFY (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) != (size_t) -1);
-+
-+ /* No bytes should have been added to
-+ the output buffer at this point. */
-+ TEST_VERIFY (outbuf == out);
-+ TEST_VERIFY (outleft == sizeof (out));
-+
-+ /* Second call shall emit spurious NUL character in unpatched glibc. */
-+ TEST_VERIFY (iconv (cd, NULL, NULL, &outbuf, &outleft) != (size_t) -1);
-+
-+ /* No characters are expected to be produced. */
-+ TEST_VERIFY (outbuf == out);
-+ TEST_VERIFY (outleft == sizeof (out));
-+
-+ TEST_VERIFY_EXIT (iconv_close (cd) != -1);
-+
-+ return 0;
-+}
-+
-+#include <support/test-driver.c>
-diff --git a/iconvdata/euc-kr.c b/iconvdata/euc-kr.c
-index b0d56cf3ee..1045bae926 100644
---- a/iconvdata/euc-kr.c
-+++ b/iconvdata/euc-kr.c
-@@ -80,11 +80,7 @@ euckr_from_ucs4 (uint32_t ch, unsigned char *cp)
- \
- if (ch <= 0x9f) \
- ++inptr; \
-- /* 0xfe(->0x7e : row 94) and 0xc9(->0x59 : row 41) are \
-- user-defined areas. */ \
-- else if (__builtin_expect (ch == 0xa0, 0) \
-- || __builtin_expect (ch > 0xfe, 0) \
-- || __builtin_expect (ch == 0xc9, 0)) \
-+ else if (__glibc_unlikely (ch == 0xa0)) \
- { \
- /* This is illegal. */ \
- STANDARD_FROM_LOOP_ERR_HANDLER (1); \
-diff --git a/iconvdata/ibm1364.c b/iconvdata/ibm1364.c
-index 49e7267ab4..521f0825b7 100644
---- a/iconvdata/ibm1364.c
-+++ b/iconvdata/ibm1364.c
-@@ -158,24 +158,14 @@ enum
- \
- if (__builtin_expect (ch, 0) == SO) \
- { \
-- /* Shift OUT, change to DBCS converter. */ \
-- if (curcs == db) \
-- { \
-- result = __GCONV_ILLEGAL_INPUT; \
-- break; \
-- } \
-+ /* Shift OUT, change to DBCS converter (redundant escape okay). */ \
- curcs = db; \
- ++inptr; \
- continue; \
- } \
- if (__builtin_expect (ch, 0) == SI) \
- { \
-- /* Shift IN, change to SBCS converter. */ \
-- if (curcs == sb) \
-- { \
-- result = __GCONV_ILLEGAL_INPUT; \
-- break; \
-- } \
-+ /* Shift IN, change to SBCS converter (redundant escape okay). */ \
- curcs = sb; \
- ++inptr; \
- continue; \
-diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c
-index 8c3b7e627e..c7b470db61 100644
---- a/iconvdata/iso-2022-jp-3.c
-+++ b/iconvdata/iso-2022-jp-3.c
-@@ -1,5 +1,6 @@
- /* Conversion module for ISO-2022-JP-3.
- Copyright (C) 1998-2020 Free Software Foundation, Inc.
-+ Copyright (C) The GNU Toolchain Authors.
- This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998,
- and Bruno Haible <bruno@clisp.org>, 2002.
-@@ -67,10 +68,15 @@ enum
- CURRENT_SEL_MASK = 7 << 3
- };
-
--/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the state
-- also contains the last two bytes to be output, shifted by 6 bits, and a
-- one-bit indicator whether they must be preceded by the shift sequence,
-- in bit 22. */
-+/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the
-+ state also contains the last two bytes to be output, shifted by 6
-+ bits, and a one-bit indicator whether they must be preceded by the
-+ shift sequence, in bit 22. During ISO-2022-JP-3 to UCS-4
-+ conversion, COUNT may also contain a non-zero pending wide
-+ character, shifted by six bits. This happens for certain inputs in
-+ JISX0213_1_2004_set and JISX0213_2_set if the second wide character
-+ in a combining sequence cannot be written because the buffer is
-+ full. */
-
- /* Since this is a stateful encoding we have to provide code which resets
- the output state to the initial state. This has to be done during the
-@@ -80,10 +86,27 @@ enum
- { \
- if (FROM_DIRECTION) \
- { \
-- /* It's easy, we don't have to emit anything, we just reset the \
-- state for the input. */ \
-- data->__statep->__count &= 7; \
-- data->__statep->__count |= ASCII_set; \
-+ uint32_t ch = data->__statep->__count >> 6; \
-+ \
-+ if (__glibc_unlikely (ch != 0)) \
-+ { \
-+ if (__glibc_likely (outbuf + 4 <= outend)) \
-+ { \
-+ /* Write out the last character. */ \
-+ put32u (outbuf, ch); \
-+ outbuf += 4; \
-+ data->__statep->__count &= 7; \
-+ data->__statep->__count |= ASCII_set; \
-+ } \
-+ else \
-+ /* We don't have enough room in the output buffer. */ \
-+ status = __GCONV_FULL_OUTPUT; \
-+ } \
-+ else \
-+ { \
-+ data->__statep->__count &= 7; \
-+ data->__statep->__count |= ASCII_set; \
-+ } \
- } \
- else \
- { \
-@@ -151,7 +174,21 @@ enum
- #define LOOPFCT FROM_LOOP
- #define BODY \
- { \
-- uint32_t ch = *inptr; \
-+ uint32_t ch; \
-+ \
-+ /* Output any pending character. */ \
-+ ch = set >> 6; \
-+ if (__glibc_unlikely (ch != 0)) \
-+ { \
-+ put32 (outptr, ch); \
-+ outptr += 4; \
-+ /* Remove the pending character, but preserve state bits. */ \
-+ set &= (1 << 6) - 1; \
-+ continue; \
-+ } \
-+ \
-+ /* Otherwise read the next input byte. */ \
-+ ch = *inptr; \
- \
- /* Recognize escape sequences. */ \
- if (__glibc_unlikely (ch == ESC)) \
-@@ -297,21 +334,25 @@ enum
- uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
- uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
- \
-+ inptr += 2; \
-+ \
-+ put32 (outptr, u1); \
-+ outptr += 4; \
-+ \
- /* See whether we have room for two characters. */ \
-- if (outptr + 8 <= outend) \
-+ if (outptr + 4 <= outend) \
- { \
-- inptr += 2; \
-- put32 (outptr, u1); \
-- outptr += 4; \
- put32 (outptr, u2); \
- outptr += 4; \
- continue; \
- } \
-- else \
-- { \
-- result = __GCONV_FULL_OUTPUT; \
-- break; \
-- } \
-+ \
-+ /* Otherwise store only the first character now, and \
-+ put the second one into the queue. */ \
-+ set |= u2 << 6; \
-+ /* Tell the caller why we terminate the loop. */ \
-+ result = __GCONV_FULL_OUTPUT; \
-+ break; \
- } \
- \
- inptr += 2; \
-diff --git a/iconvdata/ksc5601.h b/iconvdata/ksc5601.h
-index d3eb3a4ff8..f5cdc72797 100644
---- a/iconvdata/ksc5601.h
-+++ b/iconvdata/ksc5601.h
-@@ -50,15 +50,15 @@ ksc5601_to_ucs4 (const unsigned char **s, size_t avail, unsigned char offset)
- unsigned char ch2;
- int idx;
-
-+ if (avail < 2)
-+ return 0;
-+
- /* row 94(0x7e) and row 41(0x49) are user-defined area in KS C 5601 */
-
- if (ch < offset || (ch - offset) <= 0x20 || (ch - offset) >= 0x7e
- || (ch - offset) == 0x49)
- return __UNKNOWN_10646_CHAR;
-
-- if (avail < 2)
-- return 0;
--
- ch2 = (*s)[1];
- if (ch2 < offset || (ch2 - offset) <= 0x20 || (ch2 - offset) >= 0x7f)
- return __UNKNOWN_10646_CHAR;
-diff --git a/intl/dcigettext.c b/intl/dcigettext.c
-index 2e7c662bc7..bd332e71da 100644
---- a/intl/dcigettext.c
-+++ b/intl/dcigettext.c
-@@ -1120,15 +1120,18 @@ _nl_find_msg (struct loaded_l10nfile *domain_file,
-
- # ifdef _LIBC
-
-- struct gconv_spec conv_spec
-- = { .fromcode = norm_add_slashes (charset, ""),
-- .tocode = norm_add_slashes (outcharset, ""),
-- /* We always want to use transliteration. */
-- .translit = true,
-- .ignore = false
-- };
-+ struct gconv_spec conv_spec;
-+
-+ __gconv_create_spec (&conv_spec, charset, outcharset);
-+
-+ /* We always want to use transliteration. */
-+ conv_spec.translit = true;
-+
- int r = __gconv_open (&conv_spec, &convd->conv,
- GCONV_AVOID_NOCONV);
-+
-+ __gconv_destroy_spec (&conv_spec);
-+
- if (__builtin_expect (r != __GCONV_OK, 0))
- {
- /* If the output encoding is the same there is
-diff --git a/intl/tst-codeset.c b/intl/tst-codeset.c
-index fd70432eca..e9f6e5e09f 100644
---- a/intl/tst-codeset.c
-+++ b/intl/tst-codeset.c
-@@ -22,13 +22,11 @@
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
-+#include <support/check.h>
-
- static int
- do_test (void)
- {
-- char *s;
-- int result = 0;
--
- unsetenv ("LANGUAGE");
- unsetenv ("OUTPUT_CHARSET");
- setlocale (LC_ALL, "de_DE.ISO-8859-1");
-@@ -36,25 +34,21 @@ do_test (void)
- bindtextdomain ("codeset", OBJPFX "domaindir");
-
- /* Here we expect output in ISO-8859-1. */
-- s = gettext ("cheese");
-- if (strcmp (s, "K\344se"))
-- {
-- printf ("call 1 returned: %s\n", s);
-- result = 1;
-- }
-+ TEST_COMPARE_STRING (gettext ("cheese"), "K\344se");
-
-+ /* Here we expect output in UTF-8. */
- bind_textdomain_codeset ("codeset", "UTF-8");
-+ TEST_COMPARE_STRING (gettext ("cheese"), "K\303\244se");
-
-- /* Here we expect output in UTF-8. */
-- s = gettext ("cheese");
-- if (strcmp (s, "K\303\244se"))
-- {
-- printf ("call 2 returned: %s\n", s);
-- result = 1;
-- }
--
-- return result;
-+ /* `a with umlaut' is transliterated to `ae'. */
-+ bind_textdomain_codeset ("codeset", "ASCII//TRANSLIT");
-+ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese");
-+
-+ /* Transliteration also works by default even if not set. */
-+ bind_textdomain_codeset ("codeset", "ASCII");
-+ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese");
-+
-+ return 0;
- }
-
--#define TEST_FUNCTION do_test ()
--#include "../test-skeleton.c"
-+#include <support/test-driver.c>
-diff --git a/malloc/Makefile b/malloc/Makefile
-index e22cbde22d..5093e8730e 100644
---- a/malloc/Makefile
-+++ b/malloc/Makefile
-@@ -62,6 +62,16 @@ endif
- tests += $(tests-static)
- test-srcs = tst-mtrace
-
-+# These tests either are run with MALLOC_CHECK_=3 by default or do not work
-+# with MALLOC_CHECK_=3 because they expect a specific failure.
-+tests-exclude-mcheck = tst-mcheck tst-malloc-usable \
-+ tst-interpose-nothread tst-interpose-static-nothread \
-+ tst-interpose-static-thread tst-malloc-too-large \
-+ tst-mxfast tst-safe-linking
-+
-+# Run all tests with MALLOC_CHECK_=3
-+tests-mcheck = $(filter-out $(tests-exclude-mcheck),$(tests))
-+
- routines = malloc morecore mcheck mtrace obstack reallocarray \
- scratch_buffer_grow scratch_buffer_grow_preserve \
- scratch_buffer_set_array_size \
-@@ -100,6 +110,11 @@ $(objpfx)tst-malloc-thread-exit: $(shared-thread-library)
- $(objpfx)tst-malloc-thread-fail: $(shared-thread-library)
- $(objpfx)tst-malloc-fork-deadlock: $(shared-thread-library)
- $(objpfx)tst-malloc-stats-cancellation: $(shared-thread-library)
-+$(objpfx)tst-malloc-backtrace-mcheck: $(shared-thread-library)
-+$(objpfx)tst-malloc-thread-exit-mcheck: $(shared-thread-library)
-+$(objpfx)tst-malloc-thread-fail-mcheck: $(shared-thread-library)
-+$(objpfx)tst-malloc-fork-deadlock-mcheck: $(shared-thread-library)
-+$(objpfx)tst-malloc-stats-cancellation-mcheck: $(shared-thread-library)
-
- # Export the __malloc_initialize_hook variable to libc.so.
- LDFLAGS-tst-mallocstate = -rdynamic
-@@ -239,6 +254,8 @@ $(tests:%=$(objpfx)%.o): CPPFLAGS += -DTEST_NO_MALLOPT
- $(objpfx)tst-interpose-nothread: $(objpfx)tst-interpose-aux-nothread.o
- $(objpfx)tst-interpose-thread: \
- $(objpfx)tst-interpose-aux-thread.o $(shared-thread-library)
-+$(objpfx)tst-interpose-thread-mcheck: \
-+ $(objpfx)tst-interpose-aux-thread.o $(shared-thread-library)
- $(objpfx)tst-interpose-static-nothread: $(objpfx)tst-interpose-aux-nothread.o
- $(objpfx)tst-interpose-static-thread: \
- $(objpfx)tst-interpose-aux-thread.o $(static-thread-library)
-@@ -256,3 +273,6 @@ $(objpfx)tst-dynarray-fail-mem.out: $(objpfx)tst-dynarray-fail.out
- $(objpfx)tst-malloc-tcache-leak: $(shared-thread-library)
- $(objpfx)tst-malloc_info: $(shared-thread-library)
- $(objpfx)tst-mallocfork2: $(shared-thread-library)
-+$(objpfx)tst-malloc-tcache-leak-mcheck: $(shared-thread-library)
-+$(objpfx)tst-malloc_info-mcheck: $(shared-thread-library)
-+$(objpfx)tst-mallocfork2-mcheck: $(shared-thread-library)
-diff --git a/manual/tunables.texi b/manual/tunables.texi
-index 23ef0d40e7..d72d7a5ec0 100644
---- a/manual/tunables.texi
-+++ b/manual/tunables.texi
-@@ -432,7 +432,11 @@ set shared cache size in bytes for use in memory and string routines.
-
- @deftp Tunable glibc.cpu.x86_non_temporal_threshold
- The @code{glibc.cpu.x86_non_temporal_threshold} tunable allows the user
--to set threshold in bytes for non temporal store.
-+to set threshold in bytes for non temporal store. Non temporal stores
-+give a hint to the hardware to move data directly to memory without
-+displacing other data from the cache. This tunable is used by some
-+platforms to determine when to use non temporal stores in operations
-+like memmove and memcpy.
-
- This tunable is specific to i386 and x86-64.
- @end deftp
-diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h
-index 19d9cc5cfe..38221d0b2a 100644
---- a/misc/sys/cdefs.h
-+++ b/misc/sys/cdefs.h
-@@ -124,13 +124,10 @@
- #define __bos0(ptr) __builtin_object_size (ptr, 0)
-
- #if __GNUC_PREREQ (4,3)
--# define __warndecl(name, msg) \
-- extern void name (void) __attribute__((__warning__ (msg)))
- # define __warnattr(msg) __attribute__((__warning__ (msg)))
- # define __errordecl(name, msg) \
- extern void name (void) __attribute__((__error__ (msg)))
- #else
--# define __warndecl(name, msg) extern void name (void)
- # define __warnattr(msg)
- # define __errordecl(name, msg) extern void name (void)
- #endif
-diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
-index 2cba3da38c..c217cda608 100644
---- a/nptl/pthread_create.c
-+++ b/nptl/pthread_create.c
-@@ -416,8 +416,6 @@ START_THREAD_DEFN
- unwind_buf.priv.data.prev = NULL;
- unwind_buf.priv.data.cleanup = NULL;
-
-- __libc_signal_restore_set (&pd->sigmask);
--
- /* Allow setxid from now onwards. */
- if (__glibc_unlikely (atomic_exchange_acq (&pd->setxid_futex, 0) == -2))
- futex_wake (&pd->setxid_futex, 1, FUTEX_PRIVATE);
-@@ -427,6 +425,8 @@ START_THREAD_DEFN
- /* Store the new cleanup handler info. */
- THREAD_SETMEM (pd, cleanup_jmp_buf, &unwind_buf);
-
-+ __libc_signal_restore_set (&pd->sigmask);
-+
- /* We are either in (a) or (b), and in either case we either own
- PD already (2) or are about to own PD (1), and so our only
- restriction would be that we can't free PD until we know we
-diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
-index 88c69d1e9c..381aa721ef 100644
---- a/nscd/netgroupcache.c
-+++ b/nscd/netgroupcache.c
-@@ -248,7 +248,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
- : NULL);
- ndomain = (ndomain ? newbuf + ndomaindiff
- : NULL);
-- buffer = newbuf;
-+ *tofreep = buffer = newbuf;
- }
-
- nhost = memcpy (buffer + bufused,
-@@ -319,7 +319,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
- else if (status == NSS_STATUS_TRYAGAIN && e == ERANGE)
- {
- buflen *= 2;
-- buffer = xrealloc (buffer, buflen);
-+ *tofreep = buffer = xrealloc (buffer, buflen);
- }
- else if (status == NSS_STATUS_RETURN
- || status == NSS_STATUS_NOTFOUND
-diff --git a/nss/tst-nss-files-hosts-long.root/etc/nsswitch.conf b/nss/tst-nss-files-hosts-long.root/etc/nsswitch.conf
-new file mode 100644
-index 0000000000..5b0c6a4199
---- /dev/null
-+++ b/nss/tst-nss-files-hosts-long.root/etc/nsswitch.conf
-@@ -0,0 +1 @@
-+hosts: files
-diff --git a/posix/bits/unistd.h b/posix/bits/unistd.h
-index 725a83eb0d..7e5bb6fb1e 100644
---- a/posix/bits/unistd.h
-+++ b/posix/bits/unistd.h
-@@ -193,10 +193,9 @@ __NTH (readlinkat (int __fd, const char *__restrict __path,
- #endif
-
- extern char *__getcwd_chk (char *__buf, size_t __size, size_t __buflen)
-- __THROW __wur __attr_access ((__write_only__, 1, 2));
-+ __THROW __wur;
- extern char *__REDIRECT_NTH (__getcwd_alias,
-- (char *__buf, size_t __size), getcwd)
-- __wur __attr_access ((__write_only__, 1, 2));
-+ (char *__buf, size_t __size), getcwd) __wur;
- extern char *__REDIRECT_NTH (__getcwd_chk_warn,
- (char *__buf, size_t __size, size_t __buflen),
- __getcwd_chk)
-diff --git a/posix/unistd.h b/posix/unistd.h
-index 32b8161619..acf9ee7e79 100644
---- a/posix/unistd.h
-+++ b/posix/unistd.h
-@@ -517,8 +517,7 @@ extern int fchdir (int __fd) __THROW __wur;
- an array is allocated with `malloc'; the array is SIZE
- bytes long, unless SIZE == 0, in which case it is as
- big as necessary. */
--extern char *getcwd (char *__buf, size_t __size) __THROW __wur
-- __attr_access ((__write_only__, 1, 2));
-+extern char *getcwd (char *__buf, size_t __size) __THROW __wur;
-
- #ifdef __USE_GNU
- /* Return a malloc'd string containing the current directory name.
-@@ -831,7 +830,7 @@ extern int symlinkat (const char *__from, int __tofd,
- /* Like readlink but a relative PATH is interpreted relative to FD. */
- extern ssize_t readlinkat (int __fd, const char *__restrict __path,
- char *__restrict __buf, size_t __len)
-- __THROW __nonnull ((2, 3)) __wur __attr_access ((__read_only__, 3, 4));
-+ __THROW __nonnull ((2, 3)) __wur __attr_access ((__write_only__, 3, 4));
- #endif
-
- /* Remove the link NAME. */
-diff --git a/posix/wordexp-test.c b/posix/wordexp-test.c
-index ed1b22308e..cb3f989cba 100644
---- a/posix/wordexp-test.c
-+++ b/posix/wordexp-test.c
-@@ -183,6 +183,7 @@ struct test_case_struct
- { 0, NULL, "$var", 0, 0, { NULL, }, IFS },
- { 0, NULL, "\"\\n\"", 0, 1, { "\\n", }, IFS },
- { 0, NULL, "", 0, 0, { NULL, }, IFS },
-+ { 0, NULL, "${1234567890123456789012}", 0, 0, { NULL, }, IFS },
-
- /* Flags not already covered (testit() has special handling for these) */
- { 0, NULL, "one two", WRDE_DOOFFS, 2, { "one", "two", }, IFS },
-diff --git a/posix/wordexp.c b/posix/wordexp.c
-index e082d94895..56289503a1 100644
---- a/posix/wordexp.c
-+++ b/posix/wordexp.c
-@@ -1399,7 +1399,7 @@ envsubst:
- /* Is it a numeric parameter? */
- else if (isdigit (env[0]))
- {
-- int n = atoi (env);
-+ unsigned long n = strtoul (env, NULL, 10);
-
- if (n >= __libc_argc)
- /* Substitute NULL. */
-diff --git a/resolv/Makefile b/resolv/Makefile
-index b61c0c3e0c..dbd8f8bf4f 100644
---- a/resolv/Makefile
-+++ b/resolv/Makefile
-@@ -61,6 +61,11 @@ tests += \
- tst-resolv-search \
- tst-resolv-trailing \
-
-+# This test calls __res_context_send directly, which is not exported
-+# from libresolv.
-+tests-internal += tst-resolv-txnid-collision
-+tests-static += tst-resolv-txnid-collision
-+
- # These tests need libdl.
- ifeq (yes,$(build-shared))
- tests += \
-@@ -191,6 +196,8 @@ $(objpfx)tst-resolv-search: $(objpfx)libresolv.so $(shared-thread-library)
- $(objpfx)tst-resolv-trailing: $(objpfx)libresolv.so $(shared-thread-library)
- $(objpfx)tst-resolv-threads: \
- $(libdl) $(objpfx)libresolv.so $(shared-thread-library)
-+$(objpfx)tst-resolv-txnid-collision: $(objpfx)libresolv.a \
-+ $(static-thread-library)
- $(objpfx)tst-resolv-canonname: \
- $(libdl) $(objpfx)libresolv.so $(shared-thread-library)
- $(objpfx)tst-resolv-trustad: $(objpfx)libresolv.so $(shared-thread-library)
-diff --git a/resolv/res_send.c b/resolv/res_send.c
-index 7e5fec6646..70e5066031 100644
---- a/resolv/res_send.c
-+++ b/resolv/res_send.c
-@@ -1342,15 +1342,6 @@ send_dg(res_state statp,
- *terrno = EMSGSIZE;
- return close_and_return_error (statp, resplen2);
- }
-- if ((recvresp1 || hp->id != anhp->id)
-- && (recvresp2 || hp2->id != anhp->id)) {
-- /*
-- * response from old query, ignore it.
-- * XXX - potential security hazard could
-- * be detected here.
-- */
-- goto wait;
-- }
-
- /* Paranoia check. Due to the connected UDP socket,
- the kernel has already filtered invalid addresses
-@@ -1360,15 +1351,24 @@ send_dg(res_state statp,
-
- /* Check for the correct header layout and a matching
- question. */
-- if ((recvresp1 || !res_queriesmatch(buf, buf + buflen,
-- *thisansp,
-- *thisansp
-- + *thisanssizp))
-- && (recvresp2 || !res_queriesmatch(buf2, buf2 + buflen2,
-- *thisansp,
-- *thisansp
-- + *thisanssizp)))
-- goto wait;
-+ int matching_query = 0; /* Default to no matching query. */
-+ if (!recvresp1
-+ && anhp->id == hp->id
-+ && res_queriesmatch (buf, buf + buflen,
-+ *thisansp, *thisansp + *thisanssizp))
-+ matching_query = 1;
-+ if (!recvresp2
-+ && anhp->id == hp2->id
-+ && res_queriesmatch (buf2, buf2 + buflen2,
-+ *thisansp, *thisansp + *thisanssizp))
-+ matching_query = 2;
-+ if (matching_query == 0)
-+ /* Spurious UDP packet. Drop it and continue
-+ waiting. */
-+ {
-+ need_recompute = 1;
-+ goto wait;
-+ }
-
- if (anhp->rcode == SERVFAIL ||
- anhp->rcode == NOTIMP ||
-@@ -1383,7 +1383,7 @@ send_dg(res_state statp,
- /* No data from the first reply. */
- resplen = 0;
- /* We are waiting for a possible second reply. */
-- if (hp->id == anhp->id)
-+ if (matching_query == 1)
- recvresp1 = 1;
- else
- recvresp2 = 1;
-@@ -1414,7 +1414,7 @@ send_dg(res_state statp,
- return (1);
- }
- /* Mark which reply we received. */
-- if (recvresp1 == 0 && hp->id == anhp->id)
-+ if (matching_query == 1)
- recvresp1 = 1;
- else
- recvresp2 = 1;
-diff --git a/resolv/tst-resolv-txnid-collision.c b/resolv/tst-resolv-txnid-collision.c
-new file mode 100644
-index 0000000000..189b76f126
---- /dev/null
-+++ b/resolv/tst-resolv-txnid-collision.c
-@@ -0,0 +1,334 @@
-+/* Test parallel queries with transaction ID collisions.
-+ Copyright (C) 2020 Free Software Foundation, Inc.
-+ This file is part of the GNU C Library.
-+
-+ The GNU C Library is free software; you can redistribute it and/or
-+ modify it under the terms of the GNU Lesser General Public
-+ License as published by the Free Software Foundation; either
-+ version 2.1 of the License, or (at your option) any later version.
-+
-+ The GNU C Library is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ Lesser General Public License for more details.
-+
-+ You should have received a copy of the GNU Lesser General Public
-+ License along with the GNU C Library; if not, see
-+ <https://www.gnu.org/licenses/>. */
-+
-+#include <arpa/nameser.h>
-+#include <array_length.h>
-+#include <resolv-internal.h>
-+#include <resolv_context.h>
-+#include <stdbool.h>
-+#include <stdio.h>
-+#include <string.h>
-+#include <support/check.h>
-+#include <support/check_nss.h>
-+#include <support/resolv_test.h>
-+#include <support/support.h>
-+#include <support/test-driver.h>
-+
-+/* Result of parsing a DNS question name.
-+
-+ A question name has the form reorder-N-M-rcode-C.example.net, where
-+ N and M are either 0 and 1, corresponding to the reorder member,
-+ and C is a number that will be stored in the rcode field.
-+
-+ Also see parse_qname below. */
-+struct parsed_qname
-+{
-+ /* The DNS response code requested from the first server. The
-+ second server always responds with RCODE zero. */
-+ int rcode;
-+
-+ /* Indicates whether to perform reordering in the responses from the
-+ respective server. */
-+ bool reorder[2];
-+};
-+
-+/* Fills *PARSED based on QNAME. */
-+static void
-+parse_qname (struct parsed_qname *parsed, const char *qname)
-+{
-+ int reorder0;
-+ int reorder1;
-+ int rcode;
-+ char *suffix;
-+ if (sscanf (qname, "reorder-%d-%d.rcode-%d.%ms",
-+ &reorder0, &reorder1, &rcode, &suffix) == 4)
-+ {
-+ if (reorder0 != 0)
-+ TEST_COMPARE (reorder0, 1);
-+ if (reorder1 != 0)
-+ TEST_COMPARE (reorder1, 1);
-+ TEST_VERIFY (rcode >= 0 && rcode <= 15);
-+ TEST_COMPARE_STRING (suffix, "example.net");
-+ free (suffix);
-+
-+ parsed->rcode = rcode;
-+ parsed->reorder[0] = reorder0;
-+ parsed->reorder[1] = reorder1;
-+ }
-+ else
-+ FAIL_EXIT1 ("unexpected query: %s", qname);
-+}
-+
-+/* Used to construct a response. The first server responds with an
-+ error, the second server succeeds. */
-+static void
-+build_response (const struct resolv_response_context *ctx,
-+ struct resolv_response_builder *b,
-+ const char *qname, uint16_t qclass, uint16_t qtype)
-+{
-+ struct parsed_qname parsed;
-+ parse_qname (&parsed, qname);
-+
-+ switch (ctx->server_index)
-+ {
-+ case 0:
-+ {
-+ struct resolv_response_flags flags = { 0 };
-+ if (parsed.rcode == 0)
-+ /* Simulate a delegation in case a NODATA (RCODE zero)
-+ response is requested. */
-+ flags.clear_ra = true;
-+ else
-+ flags.rcode = parsed.rcode;
-+
-+ resolv_response_init (b, flags);
-+ resolv_response_add_question (b, qname, qclass, qtype);
-+ }
-+ break;
-+
-+ case 1:
-+ {
-+ struct resolv_response_flags flags = { 0, };
-+ resolv_response_init (b, flags);
-+ resolv_response_add_question (b, qname, qclass, qtype);
-+
-+ resolv_response_section (b, ns_s_an);
-+ resolv_response_open_record (b, qname, qclass, qtype, 0);
-+ if (qtype == T_A)
-+ {
-+ char ipv4[4] = { 192, 0, 2, 1 };
-+ resolv_response_add_data (b, &ipv4, sizeof (ipv4));
-+ }
-+ else
-+ {
-+ char ipv6[16]
-+ = { 0x20, 0x01, 0xd, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
-+ resolv_response_add_data (b, &ipv6, sizeof (ipv6));
-+ }
-+ resolv_response_close_record (b);
-+ }
-+ break;
-+ }
-+}
-+
-+/* Used to reorder responses. */
-+struct resolv_response_context *previous_query;
-+
-+/* Used to keep track of the queries received. */
-+static int previous_server_index = -1;
-+static uint16_t previous_qtype;
-+
-+/* For each server, buffer the first query and then send both answers
-+ to the second query, reordered if requested. */
-+static void
-+response (const struct resolv_response_context *ctx,
-+ struct resolv_response_builder *b,
-+ const char *qname, uint16_t qclass, uint16_t qtype)
-+{
-+ TEST_VERIFY (qtype == T_A || qtype == T_AAAA);
-+ if (ctx->server_index != 0)
-+ TEST_COMPARE (ctx->server_index, 1);
-+
-+ struct parsed_qname parsed;
-+ parse_qname (&parsed, qname);
-+
-+ if (previous_query == NULL)
-+ {
-+ /* No buffered query. Record this query and do not send a
-+ response. */
-+ TEST_COMPARE (previous_qtype, 0);
-+ previous_query = resolv_response_context_duplicate (ctx);
-+ previous_qtype = qtype;
-+ resolv_response_drop (b);
-+ previous_server_index = ctx->server_index;
-+
-+ if (test_verbose)
-+ printf ("info: buffering first query for: %s\n", qname);
-+ }
-+ else
-+ {
-+ TEST_VERIFY (previous_query != 0);
-+ TEST_COMPARE (ctx->server_index, previous_server_index);
-+ TEST_VERIFY (previous_qtype != qtype); /* Not a duplicate. */
-+
-+ /* If reordering, send a response for this query explicitly, and
-+ then skip the implicit send. */
-+ if (parsed.reorder[ctx->server_index])
-+ {
-+ if (test_verbose)
-+ printf ("info: sending reordered second response for: %s\n",
-+ qname);
-+ build_response (ctx, b, qname, qclass, qtype);
-+ resolv_response_send_udp (ctx, b);
-+ resolv_response_drop (b);
-+ }
-+
-+ /* Build a response for the previous query and send it, thus
-+ reordering the two responses. */
-+ {
-+ if (test_verbose)
-+ printf ("info: sending first response for: %s\n", qname);
-+ struct resolv_response_builder *btmp
-+ = resolv_response_builder_allocate (previous_query->query_buffer,
-+ previous_query->query_length);
-+ build_response (ctx, btmp, qname, qclass, previous_qtype);
-+ resolv_response_send_udp (ctx, btmp);
-+ resolv_response_builder_free (btmp);
-+ }
-+
-+ /* If not reordering, send the reply as usual. */
-+ if (!parsed.reorder[ctx->server_index])
-+ {
-+ if (test_verbose)
-+ printf ("info: sending non-reordered second response for: %s\n",
-+ qname);
-+ build_response (ctx, b, qname, qclass, qtype);
-+ }
-+
-+ /* Unbuffer the response and prepare for the next query. */
-+ resolv_response_context_free (previous_query);
-+ previous_query = NULL;
-+ previous_qtype = 0;
-+ previous_server_index = -1;
-+ }
-+}
-+
-+/* Runs a query for QNAME and checks for the expected reply. See
-+ struct parsed_qname for the expected format for QNAME. */
-+static void
-+test_qname (const char *qname, int rcode)
-+{
-+ struct resolv_context *ctx = __resolv_context_get ();
-+ TEST_VERIFY_EXIT (ctx != NULL);
-+
-+ unsigned char q1[512];
-+ int q1len = res_mkquery (QUERY, qname, C_IN, T_A, NULL, 0, NULL,
-+ q1, sizeof (q1));
-+ TEST_VERIFY_EXIT (q1len > 12);
-+
-+ unsigned char q2[512];
-+ int q2len = res_mkquery (QUERY, qname, C_IN, T_AAAA, NULL, 0, NULL,
-+ q2, sizeof (q2));
-+ TEST_VERIFY_EXIT (q2len > 12);
-+
-+ /* Produce a transaction ID collision. */
-+ memcpy (q2, q1, 2);
-+
-+ unsigned char ans1[512];
-+ unsigned char *ans1p = ans1;
-+ unsigned char *ans2p = NULL;
-+ int nans2p = 0;
-+ int resplen2 = 0;
-+ int ans2p_malloced = 0;
-+
-+ /* Perform a parallel A/AAAA query. */
-+ int resplen1 = __res_context_send (ctx, q1, q1len, q2, q2len,
-+ ans1, sizeof (ans1), &ans1p,
-+ &ans2p, &nans2p,
-+ &resplen2, &ans2p_malloced);
-+
-+ TEST_VERIFY (resplen1 > 12);
-+ TEST_VERIFY (resplen2 > 12);
-+ if (resplen1 <= 12 || resplen2 <= 12)
-+ return;
-+
-+ if (rcode == 1 || rcode == 3)
-+ {
-+ /* Format Error and Name Error responses does not trigger
-+ switching to the next server. */
-+ TEST_COMPARE (ans1p[3] & 0x0f, rcode);
-+ TEST_COMPARE (ans2p[3] & 0x0f, rcode);
-+ return;
-+ }
-+
-+ /* The response should be successful. */
-+ TEST_COMPARE (ans1p[3] & 0x0f, 0);
-+ TEST_COMPARE (ans2p[3] & 0x0f, 0);
-+
-+ /* Due to bug 19691, the answer may not be in the slot matching the
-+ query. Assume that the AAAA response is the longer one. */
-+ unsigned char *a_answer;
-+ int a_answer_length;
-+ unsigned char *aaaa_answer;
-+ int aaaa_answer_length;
-+ if (resplen2 > resplen1)
-+ {
-+ a_answer = ans1p;
-+ a_answer_length = resplen1;
-+ aaaa_answer = ans2p;
-+ aaaa_answer_length = resplen2;
-+ }
-+ else
-+ {
-+ a_answer = ans2p;
-+ a_answer_length = resplen2;
-+ aaaa_answer = ans1p;
-+ aaaa_answer_length = resplen1;
-+ }
-+
-+ {
-+ char *expected = xasprintf ("name: %s\n"
-+ "address: 192.0.2.1\n",
-+ qname);
-+ check_dns_packet (qname, a_answer, a_answer_length, expected);
-+ free (expected);
-+ }
-+ {
-+ char *expected = xasprintf ("name: %s\n"
-+ "address: 2001:db8::1\n",
-+ qname);
-+ check_dns_packet (qname, aaaa_answer, aaaa_answer_length, expected);
-+ free (expected);
-+ }
-+
-+ if (ans2p_malloced)
-+ free (ans2p);
-+
-+ __resolv_context_put (ctx);
-+}
-+
-+static int
-+do_test (void)
-+{
-+ struct resolv_test *aux = resolv_test_start
-+ ((struct resolv_redirect_config)
-+ {
-+ .response_callback = response,
-+
-+ /* The response callback use global state (the previous_*
-+ variables), and query processing must therefore be
-+ serialized. */
-+ .single_thread_udp = true,
-+ });
-+
-+ for (int rcode = 0; rcode <= 5; ++rcode)
-+ for (int do_reorder_0 = 0; do_reorder_0 < 2; ++do_reorder_0)
-+ for (int do_reorder_1 = 0; do_reorder_1 < 2; ++do_reorder_1)
-+ {
-+ char *qname = xasprintf ("reorder-%d-%d.rcode-%d.example.net",
-+ do_reorder_0, do_reorder_1, rcode);
-+ test_qname (qname, rcode);
-+ free (qname);
-+ }
-+
-+ resolv_test_end (aux);
-+
-+ return 0;
-+}
-+
-+#include <support/test-driver.c>
-diff --git a/rt/Makefile b/rt/Makefile
-index dab5d62a57..93502cfaa7 100644
---- a/rt/Makefile
-+++ b/rt/Makefile
-@@ -44,6 +44,7 @@ tests := tst-shm tst-timer tst-timer2 \
- tst-aio7 tst-aio8 tst-aio9 tst-aio10 \
- tst-mqueue1 tst-mqueue2 tst-mqueue3 tst-mqueue4 \
- tst-mqueue5 tst-mqueue6 tst-mqueue7 tst-mqueue8 tst-mqueue9 \
-+ tst-bz28213 \
- tst-timer3 tst-timer4 tst-timer5 \
- tst-cpuclock2 tst-cputimer1 tst-cputimer2 tst-cputimer3 \
- tst-shm-cancel
-diff --git a/rt/tst-bz28213.c b/rt/tst-bz28213.c
-new file mode 100644
-index 0000000000..0c096b5a0a
---- /dev/null
-+++ b/rt/tst-bz28213.c
-@@ -0,0 +1,101 @@
-+/* Bug 28213: test for NULL pointer dereference in mq_notify.
-+ Copyright (C) The GNU Toolchain Authors.
-+ This file is part of the GNU C Library.
-+
-+ The GNU C Library is free software; you can redistribute it and/or
-+ modify it under the terms of the GNU Lesser General Public
-+ License as published by the Free Software Foundation; either
-+ version 2.1 of the License, or (at your option) any later version.
-+
-+ The GNU C Library is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ Lesser General Public License for more details.
-+
-+ You should have received a copy of the GNU Lesser General Public
-+ License along with the GNU C Library; if not, see
-+ <https://www.gnu.org/licenses/>. */
-+
-+#include <errno.h>
-+#include <sys/types.h>
-+#include <sys/stat.h>
-+#include <fcntl.h>
-+#include <unistd.h>
-+#include <mqueue.h>
-+#include <signal.h>
-+#include <stdlib.h>
-+#include <string.h>
-+#include <support/check.h>
-+
-+static mqd_t m = -1;
-+static const char msg[] = "hello";
-+
-+static void
-+check_bz28213_cb (union sigval sv)
-+{
-+ char buf[sizeof (msg)];
-+
-+ (void) sv;
-+
-+ TEST_VERIFY_EXIT ((size_t) mq_receive (m, buf, sizeof (buf), NULL)
-+ == sizeof (buf));
-+ TEST_VERIFY_EXIT (memcmp (buf, msg, sizeof (buf)) == 0);
-+
-+ exit (0);
-+}
-+
-+static void
-+check_bz28213 (void)
-+{
-+ struct sigevent sev;
-+
-+ memset (&sev, '\0', sizeof (sev));
-+ sev.sigev_notify = SIGEV_THREAD;
-+ sev.sigev_notify_function = check_bz28213_cb;
-+
-+ /* Step 1: Register & unregister notifier.
-+ Helper thread should receive NOTIFY_REMOVED notification.
-+ In a vulnerable version of glibc, NULL pointer dereference follows. */
-+ TEST_VERIFY_EXIT (mq_notify (m, &sev) == 0);
-+ TEST_VERIFY_EXIT (mq_notify (m, NULL) == 0);
-+
-+ /* Step 2: Once again, register notification.
-+ Try to send one message.
-+ Test is considered successful, if the callback does exit (0). */
-+ TEST_VERIFY_EXIT (mq_notify (m, &sev) == 0);
-+ TEST_VERIFY_EXIT (mq_send (m, msg, sizeof (msg), 1) == 0);
-+
-+ /* Wait... */
-+ pause ();
-+}
-+
-+static int
-+do_test (void)
-+{
-+ static const char m_name[] = "/bz28213_queue";
-+ struct mq_attr m_attr;
-+
-+ memset (&m_attr, '\0', sizeof (m_attr));
-+ m_attr.mq_maxmsg = 1;
-+ m_attr.mq_msgsize = sizeof (msg);
-+
-+ m = mq_open (m_name,
-+ O_RDWR | O_CREAT | O_EXCL,
-+ 0600,
-+ &m_attr);
-+
-+ if (m < 0)
-+ {
-+ if (errno == ENOSYS)
-+ FAIL_UNSUPPORTED ("POSIX message queues are not implemented\n");
-+ FAIL_EXIT1 ("Failed to create POSIX message queue: %m\n");
-+ }
-+
-+ TEST_VERIFY_EXIT (mq_unlink (m_name) == 0);
-+
-+ check_bz28213 ();
-+
-+ return 0;
-+}
-+
-+#include <support/test-driver.c>
-diff --git a/stdio-common/Makefile b/stdio-common/Makefile
-index 8475fd1f09..eff0c98d82 100644
---- a/stdio-common/Makefile
-+++ b/stdio-common/Makefile
-@@ -69,7 +69,8 @@ tests := tstscanf test_rdwr test-popen tstgetln test-fseek \
- tst-printf-bz25691 \
- tst-vfprintf-width-prec-alloc \
- tst-printf-fp-free \
-- tst-printf-fp-leak
-+ tst-printf-fp-leak \
-+ test-strerr
-
-
- test-srcs = tst-unbputc tst-printf tst-printfsz-islongdouble
-diff --git a/stdio-common/errlist.c b/stdio-common/errlist.c
-index d15f13a22a..2ecf121674 100644
---- a/stdio-common/errlist.c
-+++ b/stdio-common/errlist.c
-@@ -20,9 +20,13 @@
- #include <libintl.h>
- #include <array_length.h>
-
-+#ifndef ERR_MAP
-+# define ERR_MAP(n) n
-+#endif
-+
- const char *const _sys_errlist_internal[] =
- {
--#define _S(n, str) [n] = str,
-+#define _S(n, str) [ERR_MAP(n)] = str,
- #include <errlist.h>
- #undef _S
- };
-@@ -41,20 +45,21 @@ static const union sys_errname_t
- {
- #define MSGSTRFIELD1(line) str##line
- #define MSGSTRFIELD(line) MSGSTRFIELD1(line)
--#define _S(n, str) char MSGSTRFIELD(__LINE__)[sizeof(str)];
-+#define _S(n, str) char MSGSTRFIELD(__LINE__)[sizeof(#n)];
- #include <errlist.h>
- #undef _S
- };
- char str[0];
- } _sys_errname = { {
--#define _S(n, s) s,
-+#define _S(n, s) #n,
- #include <errlist.h>
- #undef _S
- } };
-
- static const unsigned short _sys_errnameidx[] =
- {
--#define _S(n, s) [n] = offsetof(union sys_errname_t, MSGSTRFIELD(__LINE__)),
-+#define _S(n, s) \
-+ [ERR_MAP(n)] = offsetof(union sys_errname_t, MSGSTRFIELD(__LINE__)),
- #include <errlist.h>
- #undef _S
- };
-diff --git a/stdio-common/test-strerr.c b/stdio-common/test-strerr.c
-index fded208118..d77b81d507 100644
---- a/stdio-common/test-strerr.c
-+++ b/stdio-common/test-strerr.c
-@@ -18,46 +18,672 @@
-
- #include <string.h>
- #include <errno.h>
--#include <array_length.h>
-
- #include <support/support.h>
- #include <support/check.h>
-
--#define N_(name) name
--
--static const char *const errlist[] =
-- {
--/* This file is auto-generated from errlist.def. */
--#include <errlist.h>
-- };
--
--#define MSGSTR_T errname_t
--#define MSGSTR errname
--#define MSGIDX errnameidx
--#include <errlist-name.h>
--#undef MSGSTR
--#undef MSGIDX
--
- static int
- do_test (void)
- {
-- TEST_VERIFY (strerrordesc_np (-1) == NULL);
-- TEST_VERIFY (strerrordesc_np (array_length (errlist)) == NULL);
-- for (size_t i = 0; i < array_length (errlist); i++)
-- {
-- if (errlist[i] == NULL)
-- continue;
-- TEST_COMPARE_STRING (strerrordesc_np (i), errlist[i]);
-- }
-+ TEST_COMPARE_STRING (strerrordesc_np (0), "Success");
-+ TEST_COMPARE_STRING (strerrorname_np (0), "0");
-
-- TEST_VERIFY (strerrorname_np (-1) == NULL);
-- TEST_VERIFY (strerrorname_np (array_length (errlist)) == NULL);
-- for (size_t i = 0; i < array_length (errlist); i++)
-- {
-- if (errlist[i] == NULL)
-- continue;
-- TEST_COMPARE_STRING (strerrorname_np (i), errname.str + errnameidx[i]);
-- }
-+#ifdef EPERM
-+ TEST_COMPARE_STRING (strerrordesc_np (EPERM), "Operation not permitted");
-+ TEST_COMPARE_STRING (strerrorname_np (EPERM), "EPERM");
-+#endif
-+#ifdef ENOENT
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOENT),
-+ "No such file or directory");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOENT), "ENOENT");
-+#endif
-+#ifdef ESRCH
-+ TEST_COMPARE_STRING (strerrordesc_np (ESRCH), "No such process");
-+ TEST_COMPARE_STRING (strerrorname_np (ESRCH), "ESRCH");
-+#endif
-+#ifdef EINTR
-+ TEST_COMPARE_STRING (strerrordesc_np (EINTR), "Interrupted system call");
-+ TEST_COMPARE_STRING (strerrorname_np (EINTR), "EINTR");
-+#endif
-+#ifdef EIO
-+ TEST_COMPARE_STRING (strerrordesc_np (EIO), "Input/output error");
-+ TEST_COMPARE_STRING (strerrorname_np (EIO), "EIO");
-+#endif
-+#ifdef ENXIO
-+ TEST_COMPARE_STRING (strerrordesc_np (ENXIO), "No such device or address");
-+ TEST_COMPARE_STRING (strerrorname_np (ENXIO), "ENXIO");
-+#endif
-+#ifdef E2BIG
-+ TEST_COMPARE_STRING (strerrordesc_np (E2BIG), "Argument list too long");
-+ TEST_COMPARE_STRING (strerrorname_np (E2BIG), "E2BIG");
-+#endif
-+#ifdef ENOEXEC
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOEXEC), "Exec format error");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOEXEC), "ENOEXEC");
-+#endif
-+#ifdef EBADF
-+ TEST_COMPARE_STRING (strerrordesc_np (EBADF), "Bad file descriptor");
-+ TEST_COMPARE_STRING (strerrorname_np (EBADF), "EBADF");
-+#endif
-+#ifdef ECHILD
-+ TEST_COMPARE_STRING (strerrordesc_np (ECHILD), "No child processes");
-+ TEST_COMPARE_STRING (strerrorname_np (ECHILD), "ECHILD");
-+#endif
-+#ifdef EDEADLK
-+ TEST_COMPARE_STRING (strerrordesc_np (EDEADLK),
-+ "Resource deadlock avoided");
-+ TEST_COMPARE_STRING (strerrorname_np (EDEADLK), "EDEADLK");
-+#endif
-+#ifdef ENOMEM
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOMEM), "Cannot allocate memory");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOMEM), "ENOMEM");
-+#endif
-+#ifdef EACCES
-+ TEST_COMPARE_STRING (strerrordesc_np (EACCES), "Permission denied");
-+ TEST_COMPARE_STRING (strerrorname_np (EACCES), "EACCES");
-+#endif
-+#ifdef EFAULT
-+ TEST_COMPARE_STRING (strerrordesc_np (EFAULT), "Bad address");
-+ TEST_COMPARE_STRING (strerrorname_np (EFAULT), "EFAULT");
-+#endif
-+#ifdef ENOTBLK
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOTBLK), "Block device required");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOTBLK), "ENOTBLK");
-+#endif
-+#ifdef EBUSY
-+ TEST_COMPARE_STRING (strerrordesc_np (EBUSY), "Device or resource busy");
-+ TEST_COMPARE_STRING (strerrorname_np (EBUSY), "EBUSY");
-+#endif
-+#ifdef EEXIST
-+ TEST_COMPARE_STRING (strerrordesc_np (EEXIST), "File exists");
-+ TEST_COMPARE_STRING (strerrorname_np (EEXIST), "EEXIST");
-+#endif
-+#ifdef EXDEV
-+ TEST_COMPARE_STRING (strerrordesc_np (EXDEV), "Invalid cross-device link");
-+ TEST_COMPARE_STRING (strerrorname_np (EXDEV), "EXDEV");
-+#endif
-+#ifdef ENODEV
-+ TEST_COMPARE_STRING (strerrordesc_np (ENODEV), "No such device");
-+ TEST_COMPARE_STRING (strerrorname_np (ENODEV), "ENODEV");
-+#endif
-+#ifdef ENOTDIR
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOTDIR), "Not a directory");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOTDIR), "ENOTDIR");
-+#endif
-+#ifdef EISDIR
-+ TEST_COMPARE_STRING (strerrordesc_np (EISDIR), "Is a directory");
-+ TEST_COMPARE_STRING (strerrorname_np (EISDIR), "EISDIR");
-+#endif
-+#ifdef EINVAL
-+ TEST_COMPARE_STRING (strerrordesc_np (EINVAL), "Invalid argument");
-+ TEST_COMPARE_STRING (strerrorname_np (EINVAL), "EINVAL");
-+#endif
-+#ifdef EMFILE
-+ TEST_COMPARE_STRING (strerrordesc_np (EMFILE), "Too many open files");
-+ TEST_COMPARE_STRING (strerrorname_np (EMFILE), "EMFILE");
-+#endif
-+#ifdef ENFILE
-+ TEST_COMPARE_STRING (strerrordesc_np (ENFILE),
-+ "Too many open files in system");
-+ TEST_COMPARE_STRING (strerrorname_np (ENFILE), "ENFILE");
-+#endif
-+#ifdef ENOTTY
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOTTY),
-+ "Inappropriate ioctl for device");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOTTY), "ENOTTY");
-+#endif
-+#ifdef ETXTBSY
-+ TEST_COMPARE_STRING (strerrordesc_np (ETXTBSY), "Text file busy");
-+ TEST_COMPARE_STRING (strerrorname_np (ETXTBSY), "ETXTBSY");
-+#endif
-+#ifdef EFBIG
-+ TEST_COMPARE_STRING (strerrordesc_np (EFBIG), "File too large");
-+ TEST_COMPARE_STRING (strerrorname_np (EFBIG), "EFBIG");
-+#endif
-+#ifdef ENOSPC
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOSPC), "No space left on device");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOSPC), "ENOSPC");
-+#endif
-+#ifdef ESPIPE
-+ TEST_COMPARE_STRING (strerrordesc_np (ESPIPE), "Illegal seek");
-+ TEST_COMPARE_STRING (strerrorname_np (ESPIPE), "ESPIPE");
-+#endif
-+#ifdef EROFS
-+ TEST_COMPARE_STRING (strerrordesc_np (EROFS), "Read-only file system");
-+ TEST_COMPARE_STRING (strerrorname_np (EROFS), "EROFS");
-+#endif
-+#ifdef EMLINK
-+ TEST_COMPARE_STRING (strerrordesc_np (EMLINK), "Too many links");
-+ TEST_COMPARE_STRING (strerrorname_np (EMLINK), "EMLINK");
-+#endif
-+#ifdef EPIPE
-+ TEST_COMPARE_STRING (strerrordesc_np (EPIPE), "Broken pipe");
-+ TEST_COMPARE_STRING (strerrorname_np (EPIPE), "EPIPE");
-+#endif
-+#ifdef EDOM
-+ TEST_COMPARE_STRING (strerrordesc_np (EDOM),
-+ "Numerical argument out of domain");
-+ TEST_COMPARE_STRING (strerrorname_np (EDOM), "EDOM");
-+#endif
-+#ifdef ERANGE
-+ TEST_COMPARE_STRING (strerrordesc_np (ERANGE),
-+ "Numerical result out of range");
-+ TEST_COMPARE_STRING (strerrorname_np (ERANGE), "ERANGE");
-+#endif
-+#ifdef EAGAIN
-+ TEST_COMPARE_STRING (strerrordesc_np (EAGAIN),
-+ "Resource temporarily unavailable");
-+ TEST_COMPARE_STRING (strerrorname_np (EAGAIN), "EAGAIN");
-+#endif
-+#ifdef EINPROGRESS
-+ TEST_COMPARE_STRING (strerrordesc_np (EINPROGRESS),
-+ "Operation now in progress");
-+ TEST_COMPARE_STRING (strerrorname_np (EINPROGRESS), "EINPROGRESS");
-+#endif
-+#ifdef EALREADY
-+ TEST_COMPARE_STRING (strerrordesc_np (EALREADY),
-+ "Operation already in progress");
-+ TEST_COMPARE_STRING (strerrorname_np (EALREADY), "EALREADY");
-+#endif
-+#ifdef ENOTSOCK
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOTSOCK),
-+ "Socket operation on non-socket");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOTSOCK), "ENOTSOCK");
-+#endif
-+#ifdef EMSGSIZE
-+ TEST_COMPARE_STRING (strerrordesc_np (EMSGSIZE), "Message too long");
-+ TEST_COMPARE_STRING (strerrorname_np (EMSGSIZE), "EMSGSIZE");
-+#endif
-+#ifdef EPROTOTYPE
-+ TEST_COMPARE_STRING (strerrordesc_np (EPROTOTYPE),
-+ "Protocol wrong type for socket");
-+ TEST_COMPARE_STRING (strerrorname_np (EPROTOTYPE), "EPROTOTYPE");
-+#endif
-+#ifdef ENOPROTOOPT
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOPROTOOPT),
-+ "Protocol not available");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOPROTOOPT), "ENOPROTOOPT");
-+#endif
-+#ifdef EPROTONOSUPPORT
-+ TEST_COMPARE_STRING (strerrordesc_np (EPROTONOSUPPORT),
-+ "Protocol not supported");
-+ TEST_COMPARE_STRING (strerrorname_np (EPROTONOSUPPORT), "EPROTONOSUPPORT");
-+#endif
-+#ifdef ESOCKTNOSUPPORT
-+ TEST_COMPARE_STRING (strerrordesc_np (ESOCKTNOSUPPORT),
-+ "Socket type not supported");
-+ TEST_COMPARE_STRING (strerrorname_np (ESOCKTNOSUPPORT), "ESOCKTNOSUPPORT");
-+#endif
-+#ifdef EOPNOTSUPP
-+ TEST_COMPARE_STRING (strerrordesc_np (EOPNOTSUPP),
-+ "Operation not supported");
-+ TEST_COMPARE_STRING (strerrorname_np (EOPNOTSUPP), "EOPNOTSUPP");
-+#endif
-+#ifdef EPFNOSUPPORT
-+ TEST_COMPARE_STRING (strerrordesc_np (EPFNOSUPPORT),
-+ "Protocol family not supported");
-+ TEST_COMPARE_STRING (strerrorname_np (EPFNOSUPPORT), "EPFNOSUPPORT");
-+#endif
-+#ifdef EAFNOSUPPORT
-+ TEST_COMPARE_STRING (strerrordesc_np (EAFNOSUPPORT),
-+ "Address family not supported by protocol");
-+ TEST_COMPARE_STRING (strerrorname_np (EAFNOSUPPORT), "EAFNOSUPPORT");
-+#endif
-+#ifdef EADDRINUSE
-+ TEST_COMPARE_STRING (strerrordesc_np (EADDRINUSE),
-+ "Address already in use");
-+ TEST_COMPARE_STRING (strerrorname_np (EADDRINUSE), "EADDRINUSE");
-+#endif
-+#ifdef EADDRNOTAVAIL
-+ TEST_COMPARE_STRING (strerrordesc_np (EADDRNOTAVAIL),
-+ "Cannot assign requested address");
-+ TEST_COMPARE_STRING (strerrorname_np (EADDRNOTAVAIL), "EADDRNOTAVAIL");
-+#endif
-+#ifdef ENETDOWN
-+ TEST_COMPARE_STRING (strerrordesc_np (ENETDOWN), "Network is down");
-+ TEST_COMPARE_STRING (strerrorname_np (ENETDOWN), "ENETDOWN");
-+#endif
-+#ifdef ENETUNREACH
-+ TEST_COMPARE_STRING (strerrordesc_np (ENETUNREACH),
-+ "Network is unreachable");
-+ TEST_COMPARE_STRING (strerrorname_np (ENETUNREACH), "ENETUNREACH");
-+#endif
-+#ifdef ENETRESET
-+ TEST_COMPARE_STRING (strerrordesc_np (ENETRESET),
-+ "Network dropped connection on reset");
-+ TEST_COMPARE_STRING (strerrorname_np (ENETRESET), "ENETRESET");
-+#endif
-+#ifdef ECONNABORTED
-+ TEST_COMPARE_STRING (strerrordesc_np (ECONNABORTED),
-+ "Software caused connection abort");
-+ TEST_COMPARE_STRING (strerrorname_np (ECONNABORTED), "ECONNABORTED");
-+#endif
-+#ifdef ECONNRESET
-+ TEST_COMPARE_STRING (strerrordesc_np (ECONNRESET),
-+ "Connection reset by peer");
-+ TEST_COMPARE_STRING (strerrorname_np (ECONNRESET), "ECONNRESET");
-+#endif
-+#ifdef ENOBUFS
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOBUFS),
-+ "No buffer space available");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOBUFS), "ENOBUFS");
-+#endif
-+#ifdef EISCONN
-+ TEST_COMPARE_STRING (strerrordesc_np (EISCONN),
-+ "Transport endpoint is already connected");
-+ TEST_COMPARE_STRING (strerrorname_np (EISCONN), "EISCONN");
-+#endif
-+#ifdef ENOTCONN
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOTCONN),
-+ "Transport endpoint is not connected");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOTCONN), "ENOTCONN");
-+#endif
-+#ifdef EDESTADDRREQ
-+ TEST_COMPARE_STRING (strerrordesc_np (EDESTADDRREQ),
-+ "Destination address required");
-+ TEST_COMPARE_STRING (strerrorname_np (EDESTADDRREQ), "EDESTADDRREQ");
-+#endif
-+#ifdef ESHUTDOWN
-+ TEST_COMPARE_STRING (strerrordesc_np (ESHUTDOWN),
-+ "Cannot send after transport endpoint shutdown");
-+ TEST_COMPARE_STRING (strerrorname_np (ESHUTDOWN), "ESHUTDOWN");
-+#endif
-+#ifdef ETOOMANYREFS
-+ TEST_COMPARE_STRING (strerrordesc_np (ETOOMANYREFS),
-+ "Too many references: cannot splice");
-+ TEST_COMPARE_STRING (strerrorname_np (ETOOMANYREFS), "ETOOMANYREFS");
-+#endif
-+#ifdef ETIMEDOUT
-+ TEST_COMPARE_STRING (strerrordesc_np (ETIMEDOUT), "Connection timed out");
-+ TEST_COMPARE_STRING (strerrorname_np (ETIMEDOUT), "ETIMEDOUT");
-+#endif
-+#ifdef ECONNREFUSED
-+ TEST_COMPARE_STRING (strerrordesc_np (ECONNREFUSED), "Connection refused");
-+ TEST_COMPARE_STRING (strerrorname_np (ECONNREFUSED), "ECONNREFUSED");
-+#endif
-+#ifdef ELOOP
-+ TEST_COMPARE_STRING (strerrordesc_np (ELOOP),
-+ "Too many levels of symbolic links");
-+ TEST_COMPARE_STRING (strerrorname_np (ELOOP), "ELOOP");
-+#endif
-+#ifdef ENAMETOOLONG
-+ TEST_COMPARE_STRING (strerrordesc_np (ENAMETOOLONG), "File name too long");
-+ TEST_COMPARE_STRING (strerrorname_np (ENAMETOOLONG), "ENAMETOOLONG");
-+#endif
-+#ifdef EHOSTDOWN
-+ TEST_COMPARE_STRING (strerrordesc_np (EHOSTDOWN), "Host is down");
-+ TEST_COMPARE_STRING (strerrorname_np (EHOSTDOWN), "EHOSTDOWN");
-+#endif
-+#ifdef EHOSTUNREACH
-+ TEST_COMPARE_STRING (strerrordesc_np (EHOSTUNREACH), "No route to host");
-+ TEST_COMPARE_STRING (strerrorname_np (EHOSTUNREACH), "EHOSTUNREACH");
-+#endif
-+#ifdef ENOTEMPTY
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOTEMPTY), "Directory not empty");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOTEMPTY), "ENOTEMPTY");
-+#endif
-+#ifdef EUSERS
-+ TEST_COMPARE_STRING (strerrordesc_np (EUSERS), "Too many users");
-+ TEST_COMPARE_STRING (strerrorname_np (EUSERS), "EUSERS");
-+#endif
-+#ifdef EDQUOT
-+ TEST_COMPARE_STRING (strerrordesc_np (EDQUOT), "Disk quota exceeded");
-+ TEST_COMPARE_STRING (strerrorname_np (EDQUOT), "EDQUOT");
-+#endif
-+#ifdef ESTALE
-+ TEST_COMPARE_STRING (strerrordesc_np (ESTALE), "Stale file handle");
-+ TEST_COMPARE_STRING (strerrorname_np (ESTALE), "ESTALE");
-+#endif
-+#ifdef EREMOTE
-+ TEST_COMPARE_STRING (strerrordesc_np (EREMOTE), "Object is remote");
-+ TEST_COMPARE_STRING (strerrorname_np (EREMOTE), "EREMOTE");
-+#endif
-+#ifdef ENOLCK
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOLCK), "No locks available");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOLCK), "ENOLCK");
-+#endif
-+#ifdef ENOSYS
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOSYS), "Function not implemented");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOSYS), "ENOSYS");
-+#endif
-+#ifdef EILSEQ
-+ TEST_COMPARE_STRING (strerrordesc_np (EILSEQ),
-+ "Invalid or incomplete multibyte or wide character");
-+ TEST_COMPARE_STRING (strerrorname_np (EILSEQ), "EILSEQ");
-+#endif
-+#ifdef EBADMSG
-+ TEST_COMPARE_STRING (strerrordesc_np (EBADMSG), "Bad message");
-+ TEST_COMPARE_STRING (strerrorname_np (EBADMSG), "EBADMSG");
-+#endif
-+#ifdef EIDRM
-+ TEST_COMPARE_STRING (strerrordesc_np (EIDRM), "Identifier removed");
-+ TEST_COMPARE_STRING (strerrorname_np (EIDRM), "EIDRM");
-+#endif
-+#ifdef EMULTIHOP
-+ TEST_COMPARE_STRING (strerrordesc_np (EMULTIHOP), "Multihop attempted");
-+ TEST_COMPARE_STRING (strerrorname_np (EMULTIHOP), "EMULTIHOP");
-+#endif
-+#ifdef ENODATA
-+ TEST_COMPARE_STRING (strerrordesc_np (ENODATA), "No data available");
-+ TEST_COMPARE_STRING (strerrorname_np (ENODATA), "ENODATA");
-+#endif
-+#ifdef ENOLINK
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOLINK), "Link has been severed");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOLINK), "ENOLINK");
-+#endif
-+#ifdef ENOMSG
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOMSG),
-+ "No message of desired type");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOMSG), "ENOMSG");
-+#endif
-+#ifdef ENOSR
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOSR), "Out of streams resources");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOSR), "ENOSR");
-+#endif
-+#ifdef ENOSTR
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOSTR), "Device not a stream");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOSTR), "ENOSTR");
-+#endif
-+#ifdef EOVERFLOW
-+ TEST_COMPARE_STRING (strerrordesc_np (EOVERFLOW),
-+ "Value too large for defined data type");
-+ TEST_COMPARE_STRING (strerrorname_np (EOVERFLOW), "EOVERFLOW");
-+#endif
-+#ifdef EPROTO
-+ TEST_COMPARE_STRING (strerrordesc_np (EPROTO), "Protocol error");
-+ TEST_COMPARE_STRING (strerrorname_np (EPROTO), "EPROTO");
-+#endif
-+#ifdef ETIME
-+ TEST_COMPARE_STRING (strerrordesc_np (ETIME), "Timer expired");
-+ TEST_COMPARE_STRING (strerrorname_np (ETIME), "ETIME");
-+#endif
-+#ifdef ECANCELED
-+ TEST_COMPARE_STRING (strerrordesc_np (ECANCELED), "Operation canceled");
-+ TEST_COMPARE_STRING (strerrorname_np (ECANCELED), "ECANCELED");
-+#endif
-+#ifdef EOWNERDEAD
-+ TEST_COMPARE_STRING (strerrordesc_np (EOWNERDEAD), "Owner died");
-+ TEST_COMPARE_STRING (strerrorname_np (EOWNERDEAD), "EOWNERDEAD");
-+#endif
-+#ifdef ENOTRECOVERABLE
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOTRECOVERABLE),
-+ "State not recoverable");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOTRECOVERABLE), "ENOTRECOVERABLE");
-+#endif
-+#ifdef ERESTART
-+ TEST_COMPARE_STRING (strerrordesc_np (ERESTART),
-+ "Interrupted system call should be restarted");
-+ TEST_COMPARE_STRING (strerrorname_np (ERESTART), "ERESTART");
-+#endif
-+#ifdef ECHRNG
-+ TEST_COMPARE_STRING (strerrordesc_np (ECHRNG),
-+ "Channel number out of range");
-+ TEST_COMPARE_STRING (strerrorname_np (ECHRNG), "ECHRNG");
-+#endif
-+#ifdef EL2NSYNC
-+ TEST_COMPARE_STRING (strerrordesc_np (EL2NSYNC),
-+ "Level 2 not synchronized");
-+ TEST_COMPARE_STRING (strerrorname_np (EL2NSYNC), "EL2NSYNC");
-+#endif
-+#ifdef EL3HLT
-+ TEST_COMPARE_STRING (strerrordesc_np (EL3HLT), "Level 3 halted");
-+ TEST_COMPARE_STRING (strerrorname_np (EL3HLT), "EL3HLT");
-+#endif
-+#ifdef EL3RST
-+ TEST_COMPARE_STRING (strerrordesc_np (EL3RST), "Level 3 reset");
-+ TEST_COMPARE_STRING (strerrorname_np (EL3RST), "EL3RST");
-+#endif
-+#ifdef ELNRNG
-+ TEST_COMPARE_STRING (strerrordesc_np (ELNRNG), "Link number out of range");
-+ TEST_COMPARE_STRING (strerrorname_np (ELNRNG), "ELNRNG");
-+#endif
-+#ifdef EUNATCH
-+ TEST_COMPARE_STRING (strerrordesc_np (EUNATCH),
-+ "Protocol driver not attached");
-+ TEST_COMPARE_STRING (strerrorname_np (EUNATCH), "EUNATCH");
-+#endif
-+#ifdef ENOCSI
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOCSI),
-+ "No CSI structure available");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOCSI), "ENOCSI");
-+#endif
-+#ifdef EL2HLT
-+ TEST_COMPARE_STRING (strerrordesc_np (EL2HLT), "Level 2 halted");
-+ TEST_COMPARE_STRING (strerrorname_np (EL2HLT), "EL2HLT");
-+#endif
-+#ifdef EBADE
-+ TEST_COMPARE_STRING (strerrordesc_np (EBADE), "Invalid exchange");
-+ TEST_COMPARE_STRING (strerrorname_np (EBADE), "EBADE");
-+#endif
-+#ifdef EBADR
-+ TEST_COMPARE_STRING (strerrordesc_np (EBADR),
-+ "Invalid request descriptor");
-+ TEST_COMPARE_STRING (strerrorname_np (EBADR), "EBADR");
-+#endif
-+#ifdef EXFULL
-+ TEST_COMPARE_STRING (strerrordesc_np (EXFULL), "Exchange full");
-+ TEST_COMPARE_STRING (strerrorname_np (EXFULL), "EXFULL");
-+#endif
-+#ifdef ENOANO
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOANO), "No anode");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOANO), "ENOANO");
-+#endif
-+#ifdef EBADRQC
-+ TEST_COMPARE_STRING (strerrordesc_np (EBADRQC), "Invalid request code");
-+ TEST_COMPARE_STRING (strerrorname_np (EBADRQC), "EBADRQC");
-+#endif
-+#ifdef EBADSLT
-+ TEST_COMPARE_STRING (strerrordesc_np (EBADSLT), "Invalid slot");
-+ TEST_COMPARE_STRING (strerrorname_np (EBADSLT), "EBADSLT");
-+#endif
-+#ifdef EBFONT
-+ TEST_COMPARE_STRING (strerrordesc_np (EBFONT), "Bad font file format");
-+ TEST_COMPARE_STRING (strerrorname_np (EBFONT), "EBFONT");
-+#endif
-+#ifdef ENONET
-+ TEST_COMPARE_STRING (strerrordesc_np (ENONET),
-+ "Machine is not on the network");
-+ TEST_COMPARE_STRING (strerrorname_np (ENONET), "ENONET");
-+#endif
-+#ifdef ENOPKG
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOPKG), "Package not installed");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOPKG), "ENOPKG");
-+#endif
-+#ifdef EADV
-+ TEST_COMPARE_STRING (strerrordesc_np (EADV), "Advertise error");
-+ TEST_COMPARE_STRING (strerrorname_np (EADV), "EADV");
-+#endif
-+#ifdef ESRMNT
-+ TEST_COMPARE_STRING (strerrordesc_np (ESRMNT), "Srmount error");
-+ TEST_COMPARE_STRING (strerrorname_np (ESRMNT), "ESRMNT");
-+#endif
-+#ifdef ECOMM
-+ TEST_COMPARE_STRING (strerrordesc_np (ECOMM),
-+ "Communication error on send");
-+ TEST_COMPARE_STRING (strerrorname_np (ECOMM), "ECOMM");
-+#endif
-+#ifdef EDOTDOT
-+ TEST_COMPARE_STRING (strerrordesc_np (EDOTDOT), "RFS specific error");
-+ TEST_COMPARE_STRING (strerrorname_np (EDOTDOT), "EDOTDOT");
-+#endif
-+#ifdef ENOTUNIQ
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOTUNIQ),
-+ "Name not unique on network");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOTUNIQ), "ENOTUNIQ");
-+#endif
-+#ifdef EBADFD
-+ TEST_COMPARE_STRING (strerrordesc_np (EBADFD),
-+ "File descriptor in bad state");
-+ TEST_COMPARE_STRING (strerrorname_np (EBADFD), "EBADFD");
-+#endif
-+#ifdef EREMCHG
-+ TEST_COMPARE_STRING (strerrordesc_np (EREMCHG), "Remote address changed");
-+ TEST_COMPARE_STRING (strerrorname_np (EREMCHG), "EREMCHG");
-+#endif
-+#ifdef ELIBACC
-+ TEST_COMPARE_STRING (strerrordesc_np (ELIBACC),
-+ "Can not access a needed shared library");
-+ TEST_COMPARE_STRING (strerrorname_np (ELIBACC), "ELIBACC");
-+#endif
-+#ifdef ELIBBAD
-+ TEST_COMPARE_STRING (strerrordesc_np (ELIBBAD),
-+ "Accessing a corrupted shared library");
-+ TEST_COMPARE_STRING (strerrorname_np (ELIBBAD), "ELIBBAD");
-+#endif
-+#ifdef ELIBSCN
-+ TEST_COMPARE_STRING (strerrordesc_np (ELIBSCN),
-+ ".lib section in a.out corrupted");
-+ TEST_COMPARE_STRING (strerrorname_np (ELIBSCN), "ELIBSCN");
-+#endif
-+#ifdef ELIBMAX
-+ TEST_COMPARE_STRING (strerrordesc_np (ELIBMAX),
-+ "Attempting to link in too many shared libraries");
-+ TEST_COMPARE_STRING (strerrorname_np (ELIBMAX), "ELIBMAX");
-+#endif
-+#ifdef ELIBEXEC
-+ TEST_COMPARE_STRING (strerrordesc_np (ELIBEXEC),
-+ "Cannot exec a shared library directly");
-+ TEST_COMPARE_STRING (strerrorname_np (ELIBEXEC), "ELIBEXEC");
-+#endif
-+#ifdef ESTRPIPE
-+ TEST_COMPARE_STRING (strerrordesc_np (ESTRPIPE), "Streams pipe error");
-+ TEST_COMPARE_STRING (strerrorname_np (ESTRPIPE), "ESTRPIPE");
-+#endif
-+#ifdef EUCLEAN
-+ TEST_COMPARE_STRING (strerrordesc_np (EUCLEAN),
-+ "Structure needs cleaning");
-+ TEST_COMPARE_STRING (strerrorname_np (EUCLEAN), "EUCLEAN");
-+#endif
-+#ifdef ENOTNAM
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOTNAM),
-+ "Not a XENIX named type file");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOTNAM), "ENOTNAM");
-+#endif
-+#ifdef ENAVAIL
-+ TEST_COMPARE_STRING (strerrordesc_np (ENAVAIL),
-+ "No XENIX semaphores available");
-+ TEST_COMPARE_STRING (strerrorname_np (ENAVAIL), "ENAVAIL");
-+#endif
-+#ifdef EISNAM
-+ TEST_COMPARE_STRING (strerrordesc_np (EISNAM), "Is a named type file");
-+ TEST_COMPARE_STRING (strerrorname_np (EISNAM), "EISNAM");
-+#endif
-+#ifdef EREMOTEIO
-+ TEST_COMPARE_STRING (strerrordesc_np (EREMOTEIO), "Remote I/O error");
-+ TEST_COMPARE_STRING (strerrorname_np (EREMOTEIO), "EREMOTEIO");
-+#endif
-+#ifdef ENOMEDIUM
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOMEDIUM), "No medium found");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOMEDIUM), "ENOMEDIUM");
-+#endif
-+#ifdef EMEDIUMTYPE
-+ TEST_COMPARE_STRING (strerrordesc_np (EMEDIUMTYPE), "Wrong medium type");
-+ TEST_COMPARE_STRING (strerrorname_np (EMEDIUMTYPE), "EMEDIUMTYPE");
-+#endif
-+#ifdef ENOKEY
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOKEY),
-+ "Required key not available");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOKEY), "ENOKEY");
-+#endif
-+#ifdef EKEYEXPIRED
-+ TEST_COMPARE_STRING (strerrordesc_np (EKEYEXPIRED), "Key has expired");
-+ TEST_COMPARE_STRING (strerrorname_np (EKEYEXPIRED), "EKEYEXPIRED");
-+#endif
-+#ifdef EKEYREVOKED
-+ TEST_COMPARE_STRING (strerrordesc_np (EKEYREVOKED),
-+ "Key has been revoked");
-+ TEST_COMPARE_STRING (strerrorname_np (EKEYREVOKED), "EKEYREVOKED");
-+#endif
-+#ifdef EKEYREJECTED
-+ TEST_COMPARE_STRING (strerrordesc_np (EKEYREJECTED),
-+ "Key was rejected by service");
-+ TEST_COMPARE_STRING (strerrorname_np (EKEYREJECTED), "EKEYREJECTED");
-+#endif
-+#ifdef ERFKILL
-+ TEST_COMPARE_STRING (strerrordesc_np (ERFKILL),
-+ "Operation not possible due to RF-kill");
-+ TEST_COMPARE_STRING (strerrorname_np (ERFKILL), "ERFKILL");
-+#endif
-+#ifdef EHWPOISON
-+ TEST_COMPARE_STRING (strerrordesc_np (EHWPOISON),
-+ "Memory page has hardware error");
-+ TEST_COMPARE_STRING (strerrorname_np (EHWPOISON), "EHWPOISON");
-+#endif
-+#ifdef EBADRPC
-+ TEST_COMPARE_STRING (strerrordesc_np (EBADRPC), "RPC struct is bad");
-+ TEST_COMPARE_STRING (strerrorname_np (EBADRPC), "EBADRPC");
-+#endif
-+#ifdef EFTYPE
-+ TEST_COMPARE_STRING (strerrordesc_np (EFTYPE),
-+ "Inappropriate file type or format");
-+ TEST_COMPARE_STRING (strerrorname_np (EFTYPE), "EFTYPE");
-+#endif
-+#ifdef EPROCUNAVAIL
-+ TEST_COMPARE_STRING (strerrordesc_np (EPROCUNAVAIL),
-+ "RPC bad procedure for program");
-+ TEST_COMPARE_STRING (strerrorname_np (EPROCUNAVAIL), "EPROCUNAVAIL");
-+#endif
-+#ifdef EAUTH
-+ TEST_COMPARE_STRING (strerrordesc_np (EAUTH), "Authentication error");
-+ TEST_COMPARE_STRING (strerrorname_np (EAUTH), "EAUTH");
-+#endif
-+#ifdef EDIED
-+ TEST_COMPARE_STRING (strerrordesc_np (EDIED), "Translator died");
-+ TEST_COMPARE_STRING (strerrorname_np (EDIED), "EDIED");
-+#endif
-+#ifdef ERPCMISMATCH
-+ TEST_COMPARE_STRING (strerrordesc_np (ERPCMISMATCH), "RPC version wrong");
-+ TEST_COMPARE_STRING (strerrorname_np (ERPCMISMATCH), "ERPCMISMATCH");
-+#endif
-+#ifdef EGREGIOUS
-+ TEST_COMPARE_STRING (strerrordesc_np (EGREGIOUS),
-+ "You really blew it this time");
-+ TEST_COMPARE_STRING (strerrorname_np (EGREGIOUS), "EGREGIOUS");
-+#endif
-+#ifdef EPROCLIM
-+ TEST_COMPARE_STRING (strerrordesc_np (EPROCLIM), "Too many processes");
-+ TEST_COMPARE_STRING (strerrorname_np (EPROCLIM), "EPROCLIM");
-+#endif
-+#ifdef EGRATUITOUS
-+ TEST_COMPARE_STRING (strerrordesc_np (EGRATUITOUS), "Gratuitous error");
-+ TEST_COMPARE_STRING (strerrorname_np (EGRATUITOUS), "EGRATUITOUS");
-+#endif
-+#if defined (ENOTSUP) && ENOTSUP != EOPNOTSUPP
-+ TEST_COMPARE_STRING (strerrordesc_np (ENOTSUP), "Not supported");
-+ TEST_COMPARE_STRING (strerrorname_np (ENOTSUP), "ENOTSUP");
-+#endif
-+#ifdef EPROGMISMATCH
-+ TEST_COMPARE_STRING (strerrordesc_np (EPROGMISMATCH),
-+ "RPC program version wrong");
-+ TEST_COMPARE_STRING (strerrorname_np (EPROGMISMATCH), "EPROGMISMATCH");
-+#endif
-+#ifdef EBACKGROUND
-+ TEST_COMPARE_STRING (strerrordesc_np (EBACKGROUND),
-+ "Inappropriate operation for background process");
-+ TEST_COMPARE_STRING (strerrorname_np (EBACKGROUND), "EBACKGROUND");
-+#endif
-+#ifdef EIEIO
-+ TEST_COMPARE_STRING (strerrordesc_np (EIEIO), "Computer bought the farm");
-+ TEST_COMPARE_STRING (strerrorname_np (EIEIO), "EIEIO");
-+#endif
-+#if defined (EWOULDBLOCK) && EWOULDBLOCK != EAGAIN
-+ TEST_COMPARE_STRING (strerrordesc_np (EWOULDBLOCK),
-+ "Operation would block");
-+ TEST_COMPARE_STRING (strerrorname_np (EWOULDBLOCK), "EWOULDBLOCK");
-+#endif
-+#ifdef ENEEDAUTH
-+ TEST_COMPARE_STRING (strerrordesc_np (ENEEDAUTH), "Need authenticator");
-+ TEST_COMPARE_STRING (strerrorname_np (ENEEDAUTH), "ENEEDAUTH");
-+#endif
-+#ifdef ED
-+ TEST_COMPARE_STRING (strerrordesc_np (ED), "?");
-+ TEST_COMPARE_STRING (strerrorname_np (ED), "ED");
-+#endif
-+#ifdef EPROGUNAVAIL
-+ TEST_COMPARE_STRING (strerrordesc_np (EPROGUNAVAIL),
-+ "RPC program not available");
-+ TEST_COMPARE_STRING (strerrorname_np (EPROGUNAVAIL), "EPROGUNAVAIL");
-+#endif
-
- return 0;
- }
-diff --git a/stdio-common/vfscanf-internal.c b/stdio-common/vfscanf-internal.c
-index 95b46dcbeb..3a323547f9 100644
---- a/stdio-common/vfscanf-internal.c
-+++ b/stdio-common/vfscanf-internal.c
-@@ -277,7 +277,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
- #endif
- {
- va_list arg;
-- const CHAR_T *f = format;
-+ const UCHAR_T *f = (const UCHAR_T *) format;
- UCHAR_T fc; /* Current character of the format. */
- WINT_T done = 0; /* Assignments done. */
- size_t read_in = 0; /* Chars read in. */
-@@ -415,10 +415,11 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
- #endif
-
- #ifndef COMPILE_WSCANF
-- if (!isascii ((unsigned char) *f))
-+ if (!isascii (*f))
- {
- /* Non-ASCII, may be a multibyte. */
-- int len = __mbrlen (f, strlen (f), &state);
-+ int len = __mbrlen ((const char *) f, strlen ((const char *) f),
-+ &state);
- if (len > 0)
- {
- do
-@@ -426,7 +427,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
- c = inchar ();
- if (__glibc_unlikely (c == EOF))
- input_error ();
-- else if (c != (unsigned char) *f++)
-+ else if (c != *f++)
- {
- ungetc_not_eof (c, s);
- conv_error ();
-@@ -484,9 +485,9 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
- char_buffer_rewind (&charbuf);
-
- /* Check for a positional parameter specification. */
-- if (ISDIGIT ((UCHAR_T) *f))
-+ if (ISDIGIT (*f))
- {
-- argpos = read_int ((const UCHAR_T **) &f);
-+ argpos = read_int (&f);
- if (*f == L_('$'))
- ++f;
- else
-@@ -521,8 +522,8 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
-
- /* Find the maximum field width. */
- width = 0;
-- if (ISDIGIT ((UCHAR_T) *f))
-- width = read_int ((const UCHAR_T **) &f);
-+ if (ISDIGIT (*f))
-+ width = read_int (&f);
- got_width:
- if (width == 0)
- width = -1;
-@@ -2522,12 +2523,11 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
- }
-
- while ((fc = *f++) != '\0' && fc != ']')
-- if (fc == '-' && *f != '\0' && *f != ']'
-- && (unsigned char) f[-2] <= (unsigned char) *f)
-+ if (fc == '-' && *f != '\0' && *f != ']' && f[-2] <= *f)
- {
- /* Add all characters from the one before the '-'
- up to (but not including) the next format char. */
-- for (fc = (unsigned char) f[-2]; fc < (unsigned char) *f; ++fc)
-+ for (fc = f[-2]; fc < *f; ++fc)
- ((char *)charbuf.scratch.data)[fc] = 1;
- }
- else
-diff --git a/stdlib/tst-secure-getenv.c b/stdlib/tst-secure-getenv.c
-index 3cfe9a05c3..d4b1139c5e 100644
---- a/stdlib/tst-secure-getenv.c
-+++ b/stdlib/tst-secure-getenv.c
-@@ -30,167 +30,12 @@
- #include <sys/wait.h>
- #include <unistd.h>
-
-+#include <support/check.h>
- #include <support/support.h>
-+#include <support/capture_subprocess.h>
- #include <support/test-driver.h>
-
- static char MAGIC_ARGUMENT[] = "run-actual-test";
--#define MAGIC_STATUS 19
--
--/* Return a GID which is not our current GID, but is present in the
-- supplementary group list. */
--static gid_t
--choose_gid (void)
--{
-- int count = getgroups (0, NULL);
-- if (count < 0)
-- {
-- printf ("getgroups: %m\n");
-- exit (1);
-- }
-- gid_t *groups;
-- groups = xcalloc (count, sizeof (*groups));
-- int ret = getgroups (count, groups);
-- if (ret < 0)
-- {
-- printf ("getgroups: %m\n");
-- exit (1);
-- }
-- gid_t current = getgid ();
-- gid_t not_current = 0;
-- for (int i = 0; i < ret; ++i)
-- {
-- if (groups[i] != current)
-- {
-- not_current = groups[i];
-- break;
-- }
-- }
-- free (groups);
-- return not_current;
--}
--
--
--/* Copies the executable into a restricted directory, so that we can
-- safely make it SGID with the TARGET group ID. Then runs the
-- executable. */
--static int
--run_executable_sgid (gid_t target)
--{
-- char *dirname = xasprintf ("%s/secure-getenv.%jd",
-- test_dir, (intmax_t) getpid ());
-- char *execname = xasprintf ("%s/bin", dirname);
-- int infd = -1;
-- int outfd = -1;
-- int ret = -1;
-- if (mkdir (dirname, 0700) < 0)
-- {
-- printf ("mkdir: %m\n");
-- goto err;
-- }
-- infd = open ("/proc/self/exe", O_RDONLY);
-- if (infd < 0)
-- {
-- printf ("open (/proc/self/exe): %m\n");
-- goto err;
-- }
-- outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700);
-- if (outfd < 0)
-- {
-- printf ("open (%s): %m\n", execname);
-- goto err;
-- }
-- char buf[4096];
-- for (;;)
-- {
-- ssize_t rdcount = read (infd, buf, sizeof (buf));
-- if (rdcount < 0)
-- {
-- printf ("read: %m\n");
-- goto err;
-- }
-- if (rdcount == 0)
-- break;
-- char *p = buf;
-- char *end = buf + rdcount;
-- while (p != end)
-- {
-- ssize_t wrcount = write (outfd, buf, end - p);
-- if (wrcount == 0)
-- errno = ENOSPC;
-- if (wrcount <= 0)
-- {
-- printf ("write: %m\n");
-- goto err;
-- }
-- p += wrcount;
-- }
-- }
-- if (fchown (outfd, getuid (), target) < 0)
-- {
-- printf ("fchown (%s): %m\n", execname);
-- goto err;
-- }
-- if (fchmod (outfd, 02750) < 0)
-- {
-- printf ("fchmod (%s): %m\n", execname);
-- goto err;
-- }
-- if (close (outfd) < 0)
-- {
-- printf ("close (outfd): %m\n");
-- goto err;
-- }
-- if (close (infd) < 0)
-- {
-- printf ("close (infd): %m\n");
-- goto err;
-- }
--
-- int kid = fork ();
-- if (kid < 0)
-- {
-- printf ("fork: %m\n");
-- goto err;
-- }
-- if (kid == 0)
-- {
-- /* Child process. */
-- char *args[] = { execname, MAGIC_ARGUMENT, NULL };
-- execve (execname, args, environ);
-- printf ("execve (%s): %m\n", execname);
-- _exit (1);
-- }
-- int status;
-- if (waitpid (kid, &status, 0) < 0)
-- {
-- printf ("waitpid: %m\n");
-- goto err;
-- }
-- if (!WIFEXITED (status) || WEXITSTATUS (status) != MAGIC_STATUS)
-- {
-- printf ("Unexpected exit status %d from child process\n",
-- status);
-- goto err;
-- }
-- ret = 0;
--
--err:
-- if (outfd >= 0)
-- close (outfd);
-- if (infd >= 0)
-- close (infd);
-- if (execname)
-- {
-- unlink (execname);
-- free (execname);
-- }
-- if (dirname)
-- {
-- rmdir (dirname);
-- free (dirname);
-- }
-- return ret;
--}
-
- static int
- do_test (void)
-@@ -212,15 +57,15 @@ do_test (void)
- exit (1);
- }
-
-- gid_t target = choose_gid ();
-- if (target == 0)
-- {
-- fprintf (stderr,
-- "Could not find a suitable GID for user %jd, skipping test\n",
-- (intmax_t) getuid ());
-- exit (0);
-- }
-- return run_executable_sgid (target);
-+ int status = support_capture_subprogram_self_sgid (MAGIC_ARGUMENT);
-+
-+ if (WEXITSTATUS (status) == EXIT_UNSUPPORTED)
-+ return EXIT_UNSUPPORTED;
-+
-+ if (!WIFEXITED (status))
-+ FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status);
-+
-+ return 0;
- }
-
- static void
-@@ -229,23 +74,15 @@ alternative_main (int argc, char **argv)
- if (argc == 2 && strcmp (argv[1], MAGIC_ARGUMENT) == 0)
- {
- if (getgid () == getegid ())
-- {
-- /* This can happen if the file system is mounted nosuid. */
-- fprintf (stderr, "SGID failed: GID and EGID match (%jd)\n",
-- (intmax_t) getgid ());
-- exit (MAGIC_STATUS);
-- }
-+ /* This can happen if the file system is mounted nosuid. */
-+ FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n",
-+ (intmax_t) getgid ());
- if (getenv ("PATH") == NULL)
-- {
-- printf ("PATH variable not present\n");
-- exit (3);
-- }
-+ FAIL_EXIT (3, "PATH variable not present\n");
- if (secure_getenv ("PATH") != NULL)
-- {
-- printf ("PATH variable not filtered out\n");
-- exit (4);
-- }
-- exit (MAGIC_STATUS);
-+ FAIL_EXIT (4, "PATH variable not filtered out\n");
-+
-+ exit (EXIT_SUCCESS);
- }
- }
-
-diff --git a/string/bits/string_fortified.h b/string/bits/string_fortified.h
-index 309d0f39b2..c8d3051af8 100644
---- a/string/bits/string_fortified.h
-+++ b/string/bits/string_fortified.h
-@@ -22,11 +22,6 @@
- # error "Never use <bits/string_fortified.h> directly; include <string.h> instead."
- #endif
-
--#if !__GNUC_PREREQ (5,0)
--__warndecl (__warn_memset_zero_len,
-- "memset used with constant zero length parameter; this could be due to transposed parameters");
--#endif
--
- __fortify_function void *
- __NTH (memcpy (void *__restrict __dest, const void *__restrict __src,
- size_t __len))
-@@ -58,16 +53,6 @@ __NTH (mempcpy (void *__restrict __dest, const void *__restrict __src,
- __fortify_function void *
- __NTH (memset (void *__dest, int __ch, size_t __len))
- {
-- /* GCC-5.0 and newer implements these checks in the compiler, so we don't
-- need them here. */
--#if !__GNUC_PREREQ (5,0)
-- if (__builtin_constant_p (__len) && __len == 0
-- && (!__builtin_constant_p (__ch) || __ch != 0))
-- {
-- __warn_memset_zero_len ();
-- return __dest;
-- }
--#endif
- return __builtin___memset_chk (__dest, __ch, __len, __bos0 (__dest));
- }
-
-diff --git a/support/Makefile b/support/Makefile
-index 93faafddf9..3d3aff5ff9 100644
---- a/support/Makefile
-+++ b/support/Makefile
-@@ -35,6 +35,8 @@ libsupport-routines = \
- ignore_stderr \
- next_to_fault \
- oom_error \
-+ resolv_response_context_duplicate \
-+ resolv_response_context_free \
- resolv_test \
- set_fortify_handler \
- support-xfstat \
-@@ -133,6 +135,7 @@ libsupport-routines = \
- xpthread_join \
- xpthread_key_create \
- xpthread_key_delete \
-+ xpthread_kill \
- xpthread_mutex_consistent \
- xpthread_mutex_destroy \
- xpthread_mutex_init \
-diff --git a/support/capture_subprocess.h b/support/capture_subprocess.h
-index 9808750f80..421f657678 100644
---- a/support/capture_subprocess.h
-+++ b/support/capture_subprocess.h
-@@ -41,6 +41,12 @@ struct support_capture_subprocess support_capture_subprocess
- struct support_capture_subprocess support_capture_subprogram
- (const char *file, char *const argv[]);
-
-+/* Copy the running program into a setgid binary and run it with CHILD_ID
-+ argument. If execution is successful, return the exit status of the child
-+ program, otherwise return a non-zero failure exit code. */
-+int support_capture_subprogram_self_sgid
-+ (char *child_id);
-+
- /* Deallocate the subprocess data captured by
- support_capture_subprocess. */
- void support_capture_subprocess_free (struct support_capture_subprocess *);
-diff --git a/support/resolv_response_context_duplicate.c b/support/resolv_response_context_duplicate.c
-new file mode 100644
-index 0000000000..f9c5c3462a
---- /dev/null
-+++ b/support/resolv_response_context_duplicate.c
-@@ -0,0 +1,37 @@
-+/* Duplicate a response context used in DNS resolver tests.
-+ Copyright (C) 2020 Free Software Foundation, Inc.
-+ This file is part of the GNU C Library.
-+
-+ The GNU C Library is free software; you can redistribute it and/or
-+ modify it under the terms of the GNU Lesser General Public
-+ License as published by the Free Software Foundation; either
-+ version 2.1 of the License, or (at your option) any later version.
-+
-+ The GNU C Library is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ Lesser General Public License for more details.
-+
-+ You should have received a copy of the GNU Lesser General Public
-+ License along with the GNU C Library; if not, see
-+ <https://www.gnu.org/licenses/>. */
-+
-+#include <string.h>
-+#include <support/resolv_test.h>
-+#include <support/support.h>
-+
-+struct resolv_response_context *
-+resolv_response_context_duplicate (const struct resolv_response_context *ctx)
-+{
-+ struct resolv_response_context *result = xmalloc (sizeof (*result));
-+ memcpy (result, ctx, sizeof (*result));
-+ if (result->client_address != NULL)
-+ {
-+ result->client_address = xmalloc (result->client_address_length);
-+ memcpy (result->client_address, ctx->client_address,
-+ result->client_address_length);
-+ }
-+ result->query_buffer = xmalloc (result->query_length);
-+ memcpy (result->query_buffer, ctx->query_buffer, result->query_length);
-+ return result;
-+}
-diff --git a/support/resolv_response_context_free.c b/support/resolv_response_context_free.c
-new file mode 100644
-index 0000000000..b88c05ffd4
---- /dev/null
-+++ b/support/resolv_response_context_free.c
-@@ -0,0 +1,28 @@
-+/* Free a response context used in DNS resolver tests.
-+ Copyright (C) 2020 Free Software Foundation, Inc.
-+ This file is part of the GNU C Library.
-+
-+ The GNU C Library is free software; you can redistribute it and/or
-+ modify it under the terms of the GNU Lesser General Public
-+ License as published by the Free Software Foundation; either
-+ version 2.1 of the License, or (at your option) any later version.
-+
-+ The GNU C Library is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ Lesser General Public License for more details.
-+
-+ You should have received a copy of the GNU Lesser General Public
-+ License along with the GNU C Library; if not, see
-+ <https://www.gnu.org/licenses/>. */
-+
-+#include <stdlib.h>
-+#include <support/resolv_test.h>
-+
-+void
-+resolv_response_context_free (struct resolv_response_context *ctx)
-+{
-+ free (ctx->query_buffer);
-+ free (ctx->client_address);
-+ free (ctx);
-+}
-diff --git a/support/resolv_test.c b/support/resolv_test.c
-index 53b7fc41ab..9878a040a3 100644
---- a/support/resolv_test.c
-+++ b/support/resolv_test.c
-@@ -181,7 +181,9 @@ resolv_response_init (struct resolv_response_builder *b,
- b->buffer[2] |= b->query_buffer[2] & 0x01; /* Copy the RD bit. */
- if (flags.tc)
- b->buffer[2] |= 0x02;
-- b->buffer[3] = 0x80 | flags.rcode; /* Always set RA. */
-+ b->buffer[3] = flags.rcode;
-+ if (!flags.clear_ra)
-+ b->buffer[3] |= 0x80;
- if (flags.ad)
- b->buffer[3] |= 0x20;
-
-@@ -434,9 +436,9 @@ resolv_response_buffer (const struct resolv_response_builder *b)
- return result;
- }
-
--static struct resolv_response_builder *
--response_builder_allocate
-- (const unsigned char *query_buffer, size_t query_length)
-+struct resolv_response_builder *
-+resolv_response_builder_allocate (const unsigned char *query_buffer,
-+ size_t query_length)
- {
- struct resolv_response_builder *b = xmalloc (sizeof (*b));
- memset (b, 0, offsetof (struct resolv_response_builder, buffer));
-@@ -445,8 +447,8 @@ response_builder_allocate
- return b;
- }
-
--static void
--response_builder_free (struct resolv_response_builder *b)
-+void
-+resolv_response_builder_free (struct resolv_response_builder *b)
- {
- tdestroy (b->compression_offsets, free);
- free (b);
-@@ -661,13 +663,17 @@ server_thread_udp_process_one (struct resolv_test *obj, int server_index)
-
- struct resolv_response_context ctx =
- {
-+ .test = obj,
-+ .client_address = &peer,
-+ .client_address_length = peerlen,
- .query_buffer = query,
- .query_length = length,
- .server_index = server_index,
- .tcp = false,
- .edns = qinfo.edns,
- };
-- struct resolv_response_builder *b = response_builder_allocate (query, length);
-+ struct resolv_response_builder *b
-+ = resolv_response_builder_allocate (query, length);
- obj->config.response_callback
- (&ctx, b, qinfo.qname, qinfo.qclass, qinfo.qtype);
-
-@@ -684,7 +690,7 @@ server_thread_udp_process_one (struct resolv_test *obj, int server_index)
- if (b->offset >= 12)
- printf ("info: UDP server %d: sending response:"
- " %zu bytes, RCODE %d (for %s/%u/%u)\n",
-- server_index, b->offset, b->buffer[3] & 0x0f,
-+ ctx.server_index, b->offset, b->buffer[3] & 0x0f,
- qinfo.qname, qinfo.qclass, qinfo.qtype);
- else
- printf ("info: UDP server %d: sending response: %zu bytes"
-@@ -694,23 +700,31 @@ server_thread_udp_process_one (struct resolv_test *obj, int server_index)
- if (b->truncate_bytes > 0)
- printf ("info: truncated by %u bytes\n", b->truncate_bytes);
- }
-- size_t to_send = b->offset;
-- if (to_send < b->truncate_bytes)
-- to_send = 0;
-- else
-- to_send -= b->truncate_bytes;
--
-- /* Ignore most errors here because the other end may have closed
-- the socket. */
-- if (sendto (obj->servers[server_index].socket_udp,
-- b->buffer, to_send, 0,
-- (struct sockaddr *) &peer, peerlen) < 0)
-- TEST_VERIFY_EXIT (errno != EBADF);
-+ resolv_response_send_udp (&ctx, b);
- }
-- response_builder_free (b);
-+ resolv_response_builder_free (b);
- return true;
- }
-
-+void
-+resolv_response_send_udp (const struct resolv_response_context *ctx,
-+ struct resolv_response_builder *b)
-+{
-+ TEST_VERIFY_EXIT (!ctx->tcp);
-+ size_t to_send = b->offset;
-+ if (to_send < b->truncate_bytes)
-+ to_send = 0;
-+ else
-+ to_send -= b->truncate_bytes;
-+
-+ /* Ignore most errors here because the other end may have closed
-+ the socket. */
-+ if (sendto (ctx->test->servers[ctx->server_index].socket_udp,
-+ b->buffer, to_send, 0,
-+ ctx->client_address, ctx->client_address_length) < 0)
-+ TEST_VERIFY_EXIT (errno != EBADF);
-+}
-+
- /* UDP thread_callback function. Variant for one thread per
- server. */
- static void
-@@ -897,14 +911,15 @@ server_thread_tcp_client (void *arg)
-
- struct resolv_response_context ctx =
- {
-+ .test = closure->obj,
- .query_buffer = query_buffer,
- .query_length = query_length,
- .server_index = closure->server_index,
- .tcp = true,
- .edns = qinfo.edns,
- };
-- struct resolv_response_builder *b = response_builder_allocate
-- (query_buffer, query_length);
-+ struct resolv_response_builder *b
-+ = resolv_response_builder_allocate (query_buffer, query_length);
- closure->obj->config.response_callback
- (&ctx, b, qinfo.qname, qinfo.qclass, qinfo.qtype);
-
-@@ -936,7 +951,7 @@ server_thread_tcp_client (void *arg)
- writev_fully (closure->client_socket, buffers, 2);
- }
- bool close_flag = b->close;
-- response_builder_free (b);
-+ resolv_response_builder_free (b);
- free (query_buffer);
- if (close_flag)
- break;
-diff --git a/support/resolv_test.h b/support/resolv_test.h
-index 67819469a0..31a5c1c3e7 100644
---- a/support/resolv_test.h
-+++ b/support/resolv_test.h
-@@ -35,25 +35,36 @@ struct resolv_edns_info
- uint16_t payload_size;
- };
-
-+/* This opaque struct collects information about the resolver testing
-+ currently in progress. */
-+struct resolv_test;
-+
- /* This struct provides context information when the response callback
- specified in struct resolv_redirect_config is invoked. */
- struct resolv_response_context
- {
-- const unsigned char *query_buffer;
-+ struct resolv_test *test;
-+ void *client_address;
-+ size_t client_address_length;
-+ unsigned char *query_buffer;
- size_t query_length;
- int server_index;
- bool tcp;
- struct resolv_edns_info edns;
- };
-
-+/* Produces a deep copy of the context. */
-+struct resolv_response_context *
-+ resolv_response_context_duplicate (const struct resolv_response_context *);
-+
-+/* Frees the copy. For the context passed to the response function,
-+ this happens implicitly. */
-+void resolv_response_context_free (struct resolv_response_context *);
-+
- /* This opaque struct is used to construct responses from within the
- response callback function. */
- struct resolv_response_builder;
-
--/* This opaque struct collects information about the resolver testing
-- currently in progress. */
--struct resolv_test;
--
- enum
- {
- /* Maximum number of test servers supported by the framework. */
-@@ -137,6 +148,10 @@ struct resolv_response_flags
- /* If true, the AD (authenticated data) flag will be set. */
- bool ad;
-
-+ /* If true, do not set the RA (recursion available) flag in the
-+ response. */
-+ bool clear_ra;
-+
- /* Initial section count values. Can be used to artificially
- increase the counts, for malformed packet testing.*/
- unsigned short qdcount;
-@@ -188,6 +203,22 @@ void resolv_response_close (struct resolv_response_builder *);
- /* The size of the response packet built so far. */
- size_t resolv_response_length (const struct resolv_response_builder *);
-
-+/* Allocates a response builder tied to a specific query packet,
-+ starting at QUERY_BUFFER, containing QUERY_LENGTH bytes. */
-+struct resolv_response_builder *
-+ resolv_response_builder_allocate (const unsigned char *query_buffer,
-+ size_t query_length);
-+
-+/* Deallocates a response buffer. */
-+void resolv_response_builder_free (struct resolv_response_builder *);
-+
-+/* Sends a UDP response using a specific context. This can be used to
-+ reorder or duplicate responses, along with
-+ resolv_response_context_duplicate and
-+ response_builder_allocate. */
-+void resolv_response_send_udp (const struct resolv_response_context *,
-+ struct resolv_response_builder *);
-+
- __END_DECLS
-
- #endif /* SUPPORT_RESOLV_TEST_H */
-diff --git a/support/subprocess.h b/support/subprocess.h
-index 8b442fd5c0..34ffd02e8e 100644
---- a/support/subprocess.h
-+++ b/support/subprocess.h
-@@ -38,6 +38,11 @@ struct support_subprocess support_subprocess
- struct support_subprocess support_subprogram
- (const char *file, char *const argv[]);
-
-+/* Invoke program FILE with ARGV arguments by using posix_spawn and wait for it
-+ to complete. Return program exit status. */
-+int support_subprogram_wait
-+ (const char *file, char *const argv[]);
-+
- /* Wait for the subprocess indicated by PROC::PID. Return the status
- indicate by waitpid call. */
- int support_process_wait (struct support_subprocess *proc);
-diff --git a/support/support_capture_subprocess.c b/support/support_capture_subprocess.c
-index eeed676e3d..28a37df67f 100644
---- a/support/support_capture_subprocess.c
-+++ b/support/support_capture_subprocess.c
-@@ -20,11 +20,14 @@
- #include <support/capture_subprocess.h>
-
- #include <errno.h>
-+#include <fcntl.h>
- #include <stdlib.h>
- #include <support/check.h>
- #include <support/xunistd.h>
- #include <support/xsocket.h>
- #include <support/xspawn.h>
-+#include <support/support.h>
-+#include <support/test-driver.h>
-
- static void
- transfer (const char *what, struct pollfd *pfd, struct xmemstream *stream)
-@@ -36,7 +39,7 @@ transfer (const char *what, struct pollfd *pfd, struct xmemstream *stream)
- if (ret < 0)
- {
- support_record_failure ();
-- printf ("error: reading from subprocess %s: %m", what);
-+ printf ("error: reading from subprocess %s: %m\n", what);
- pfd->events = 0;
- pfd->revents = 0;
- }
-@@ -102,6 +105,129 @@ support_capture_subprogram (const char *file, char *const argv[])
- return result;
- }
-
-+/* Copies the executable into a restricted directory, so that we can
-+ safely make it SGID with the TARGET group ID. Then runs the
-+ executable. */
-+static int
-+copy_and_spawn_sgid (char *child_id, gid_t gid)
-+{
-+ char *dirname = xasprintf ("%s/tst-tunables-setuid.%jd",
-+ test_dir, (intmax_t) getpid ());
-+ char *execname = xasprintf ("%s/bin", dirname);
-+ int infd = -1;
-+ int outfd = -1;
-+ int ret = 1, status = 1;
-+
-+ TEST_VERIFY (mkdir (dirname, 0700) == 0);
-+ if (support_record_failure_is_failed ())
-+ goto err;
-+
-+ infd = open ("/proc/self/exe", O_RDONLY);
-+ if (infd < 0)
-+ FAIL_UNSUPPORTED ("unsupported: Cannot read binary from procfs\n");
-+
-+ outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700);
-+ TEST_VERIFY (outfd >= 0);
-+ if (support_record_failure_is_failed ())
-+ goto err;
-+
-+ char buf[4096];
-+ for (;;)
-+ {
-+ ssize_t rdcount = read (infd, buf, sizeof (buf));
-+ TEST_VERIFY (rdcount >= 0);
-+ if (support_record_failure_is_failed ())
-+ goto err;
-+ if (rdcount == 0)
-+ break;
-+ char *p = buf;
-+ char *end = buf + rdcount;
-+ while (p != end)
-+ {
-+ ssize_t wrcount = write (outfd, buf, end - p);
-+ if (wrcount == 0)
-+ errno = ENOSPC;
-+ TEST_VERIFY (wrcount > 0);
-+ if (support_record_failure_is_failed ())
-+ goto err;
-+ p += wrcount;
-+ }
-+ }
-+ TEST_VERIFY (fchown (outfd, getuid (), gid) == 0);
-+ if (support_record_failure_is_failed ())
-+ goto err;
-+ TEST_VERIFY (fchmod (outfd, 02750) == 0);
-+ if (support_record_failure_is_failed ())
-+ goto err;
-+ TEST_VERIFY (close (outfd) == 0);
-+ if (support_record_failure_is_failed ())
-+ goto err;
-+ TEST_VERIFY (close (infd) == 0);
-+ if (support_record_failure_is_failed ())
-+ goto err;
-+
-+ /* We have the binary, now spawn the subprocess. Avoid using
-+ support_subprogram because we only want the program exit status, not the
-+ contents. */
-+ ret = 0;
-+
-+ char * const args[] = {execname, child_id, NULL};
-+
-+ status = support_subprogram_wait (args[0], args);
-+
-+err:
-+ if (outfd >= 0)
-+ close (outfd);
-+ if (infd >= 0)
-+ close (infd);
-+ if (execname != NULL)
-+ {
-+ unlink (execname);
-+ free (execname);
-+ }
-+ if (dirname != NULL)
-+ {
-+ rmdir (dirname);
-+ free (dirname);
-+ }
-+
-+ if (ret != 0)
-+ FAIL_EXIT1("Failed to make sgid executable for test\n");
-+
-+ return status;
-+}
-+
-+int
-+support_capture_subprogram_self_sgid (char *child_id)
-+{
-+ gid_t target = 0;
-+ const int count = 64;
-+ gid_t groups[count];
-+
-+ /* Get a GID which is not our current GID, but is present in the
-+ supplementary group list. */
-+ int ret = getgroups (count, groups);
-+ if (ret < 0)
-+ FAIL_UNSUPPORTED("Could not get group list for user %jd\n",
-+ (intmax_t) getuid ());
-+
-+ gid_t current = getgid ();
-+ for (int i = 0; i < ret; ++i)
-+ {
-+ if (groups[i] != current)
-+ {
-+ target = groups[i];
-+ break;
-+ }
-+ }
-+
-+ if (target == 0)
-+ FAIL_UNSUPPORTED("Could not find a suitable GID for user %jd\n",
-+ (intmax_t) getuid ());
-+
-+ return copy_and_spawn_sgid (child_id, target);
-+}
-+
- void
- support_capture_subprocess_free (struct support_capture_subprocess *p)
- {
-diff --git a/support/support_subprocess.c b/support/support_subprocess.c
-index 36e3a77af2..4a25828111 100644
---- a/support/support_subprocess.c
-+++ b/support/support_subprocess.c
-@@ -27,7 +27,7 @@
- #include <support/subprocess.h>
-
- static struct support_subprocess
--support_suprocess_init (void)
-+support_subprocess_init (void)
- {
- struct support_subprocess result;
-
-@@ -48,7 +48,7 @@ support_suprocess_init (void)
- struct support_subprocess
- support_subprocess (void (*callback) (void *), void *closure)
- {
-- struct support_subprocess result = support_suprocess_init ();
-+ struct support_subprocess result = support_subprocess_init ();
-
- result.pid = xfork ();
- if (result.pid == 0)
-@@ -71,7 +71,7 @@ support_subprocess (void (*callback) (void *), void *closure)
- struct support_subprocess
- support_subprogram (const char *file, char *const argv[])
- {
-- struct support_subprocess result = support_suprocess_init ();
-+ struct support_subprocess result = support_subprocess_init ();
-
- posix_spawn_file_actions_t fa;
- /* posix_spawn_file_actions_init does not fail. */
-@@ -84,7 +84,7 @@ support_subprogram (const char *file, char *const argv[])
- xposix_spawn_file_actions_addclose (&fa, result.stdout_pipe[1]);
- xposix_spawn_file_actions_addclose (&fa, result.stderr_pipe[1]);
-
-- result.pid = xposix_spawn (file, &fa, NULL, argv, NULL);
-+ result.pid = xposix_spawn (file, &fa, NULL, argv, environ);
-
- xclose (result.stdout_pipe[1]);
- xclose (result.stderr_pipe[1]);
-@@ -92,6 +92,19 @@ support_subprogram (const char *file, char *const argv[])
- return result;
- }
-
-+int
-+support_subprogram_wait (const char *file, char *const argv[])
-+{
-+ posix_spawn_file_actions_t fa;
-+
-+ posix_spawn_file_actions_init (&fa);
-+ struct support_subprocess res = support_subprocess_init ();
-+
-+ res.pid = xposix_spawn (file, &fa, NULL, argv, environ);
-+
-+ return support_process_wait (&res);
-+}
-+
- int
- support_process_wait (struct support_subprocess *proc)
- {
-diff --git a/support/xpthread_kill.c b/support/xpthread_kill.c
-new file mode 100644
-index 0000000000..111a75d85e
---- /dev/null
-+++ b/support/xpthread_kill.c
-@@ -0,0 +1,26 @@
-+/* pthread_kill with error checking.
-+ Copyright (C) 2021 Free Software Foundation, Inc.
-+ This file is part of the GNU C Library.
-+
-+ The GNU C Library is free software; you can redistribute it and/or
-+ modify it under the terms of the GNU Lesser General Public
-+ License as published by the Free Software Foundation; either
-+ version 2.1 of the License, or (at your option) any later version.
-+
-+ The GNU C Library is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ Lesser General Public License for more details.
-+
-+ You should have received a copy of the GNU Lesser General Public
-+ License along with the GNU C Library; if not, see
-+ <https://www.gnu.org/licenses/>. */
-+
-+#include <signal.h>
-+#include <support/xthread.h>
-+
-+void
-+xpthread_kill (pthread_t thr, int signo)
-+{
-+ xpthread_check_return ("pthread_kill", pthread_kill (thr, signo));
-+}
-diff --git a/support/xthread.h b/support/xthread.h
-index 05f8d4a7d9..cb1fc30da0 100644
---- a/support/xthread.h
-+++ b/support/xthread.h
-@@ -75,6 +75,8 @@ void xpthread_attr_setstacksize (pthread_attr_t *attr,
- void xpthread_attr_setguardsize (pthread_attr_t *attr,
- size_t guardsize);
-
-+void xpthread_kill (pthread_t thr, int signo);
-+
- /* Set the stack size in ATTR to a small value, but still large enough
- to cover most internal glibc stack usage. */
- void support_set_small_thread_stack_size (pthread_attr_t *attr);
-diff --git a/sysdeps/aarch64/dl-bti.c b/sysdeps/aarch64/dl-bti.c
-index 196e462520..cf7624aaa2 100644
---- a/sysdeps/aarch64/dl-bti.c
-+++ b/sysdeps/aarch64/dl-bti.c
-@@ -19,43 +19,76 @@
- #include <errno.h>
- #include <libintl.h>
- #include <ldsodefs.h>
-+#include <sys/mman.h>
-
--static int
--enable_bti (struct link_map *map, const char *program)
-+/* See elf/dl-load.h. */
-+#ifndef MAP_COPY
-+# define MAP_COPY (MAP_PRIVATE | MAP_DENYWRITE)
-+#endif
-+
-+/* Enable BTI protection for MAP. */
-+
-+void
-+_dl_bti_protect (struct link_map *map, int fd)
- {
-+ const size_t pagesz = GLRO(dl_pagesize);
- const ElfW(Phdr) *phdr;
-- unsigned prot;
-
- for (phdr = map->l_phdr; phdr < &map->l_phdr[map->l_phnum]; ++phdr)
- if (phdr->p_type == PT_LOAD && (phdr->p_flags & PF_X))
- {
-- void *start = (void *) (phdr->p_vaddr + map->l_addr);
-- size_t len = phdr->p_memsz;
-+ size_t vstart = ALIGN_DOWN (phdr->p_vaddr, pagesz);
-+ size_t vend = ALIGN_UP (phdr->p_vaddr + phdr->p_filesz, pagesz);
-+ off_t off = ALIGN_DOWN (phdr->p_offset, pagesz);
-+ void *start = (void *) (vstart + map->l_addr);
-+ size_t len = vend - vstart;
-
-- prot = PROT_EXEC | PROT_BTI;
-+ unsigned prot = PROT_EXEC | PROT_BTI;
- if (phdr->p_flags & PF_R)
- prot |= PROT_READ;
- if (phdr->p_flags & PF_W)
- prot |= PROT_WRITE;
-
-- if (__mprotect (start, len, prot) < 0)
-- {
-- if (program)
-- _dl_fatal_printf ("%s: mprotect failed to turn on BTI\n",
-- map->l_name);
-- else
-- _dl_signal_error (errno, map->l_name, "dlopen",
-- N_("mprotect failed to turn on BTI"));
-- }
-+ if (fd == -1)
-+ /* Ignore failures for kernel mapped binaries. */
-+ __mprotect (start, len, prot);
-+ else
-+ map->l_mach.bti_fail = __mmap (start, len, prot,
-+ MAP_FIXED|MAP_COPY|MAP_FILE,
-+ fd, off) == MAP_FAILED;
- }
-- return 0;
- }
-
--/* Enable BTI for L if required. */
-+
-+static void
-+bti_failed (struct link_map *l, const char *program)
-+{
-+ if (program)
-+ _dl_fatal_printf ("%s: %s: failed to turn on BTI protection\n",
-+ program, l->l_name);
-+ else
-+ /* Note: the errno value is not available any more. */
-+ _dl_signal_error (0, l->l_name, "dlopen",
-+ N_("failed to turn on BTI protection"));
-+}
-+
-+
-+/* Enable BTI for L and its dependencies. */
-
- void
- _dl_bti_check (struct link_map *l, const char *program)
- {
-- if (GLRO(dl_aarch64_cpu_features).bti && l->l_mach.bti)
-- enable_bti (l, program);
-+ if (!GLRO(dl_aarch64_cpu_features).bti)
-+ return;
-+
-+ if (l->l_mach.bti_fail)
-+ bti_failed (l, program);
-+
-+ unsigned int i = l->l_searchlist.r_nlist;
-+ while (i-- > 0)
-+ {
-+ struct link_map *dep = l->l_initfini[i];
-+ if (dep->l_mach.bti_fail)
-+ bti_failed (dep, program);
-+ }
- }
-diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
-index 70b9ed3925..fde7cfd9e2 100644
---- a/sysdeps/aarch64/dl-machine.h
-+++ b/sysdeps/aarch64/dl-machine.h
-@@ -395,13 +395,6 @@ elf_machine_lazy_rel (struct link_map *map,
- /* Check for unexpected PLT reloc type. */
- if (__builtin_expect (r_type == AARCH64_R(JUMP_SLOT), 1))
- {
-- if (map->l_mach.plt == 0)
-- {
-- /* Prelinking. */
-- *reloc_addr += l_addr;
-- return;
-- }
--
- if (__glibc_unlikely (map->l_info[DT_AARCH64 (VARIANT_PCS)] != NULL))
- {
- /* Check the symbol table for variant PCS symbols. */
-@@ -425,7 +418,10 @@ elf_machine_lazy_rel (struct link_map *map,
- }
- }
-
-- *reloc_addr = map->l_mach.plt;
-+ if (map->l_mach.plt == 0)
-+ *reloc_addr += l_addr;
-+ else
-+ *reloc_addr = map->l_mach.plt;
- }
- else if (__builtin_expect (r_type == AARCH64_R(TLSDESC), 1))
- {
-diff --git a/sysdeps/aarch64/dl-prop.h b/sysdeps/aarch64/dl-prop.h
-index b0785bda83..e926e54984 100644
---- a/sysdeps/aarch64/dl-prop.h
-+++ b/sysdeps/aarch64/dl-prop.h
-@@ -19,6 +19,8 @@
- #ifndef _DL_PROP_H
- #define _DL_PROP_H
-
-+extern void _dl_bti_protect (struct link_map *, int) attribute_hidden;
-+
- extern void _dl_bti_check (struct link_map *, const char *)
- attribute_hidden;
-
-@@ -35,14 +37,18 @@ _dl_open_check (struct link_map *m)
- }
-
- static inline void __attribute__ ((always_inline))
--_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph)
-+_dl_process_pt_note (struct link_map *l, int fd, const ElfW(Phdr) *ph)
- {
- }
-
- static inline int
--_dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz,
-- void *data)
-+_dl_process_gnu_property (struct link_map *l, int fd, uint32_t type,
-+ uint32_t datasz, void *data)
- {
-+ if (!GLRO(dl_aarch64_cpu_features).bti)
-+ /* Skip note processing. */
-+ return 0;
-+
- if (type == GNU_PROPERTY_AARCH64_FEATURE_1_AND)
- {
- /* Stop if the property note is ill-formed. */
-@@ -51,7 +57,7 @@ _dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz,
-
- unsigned int feature_1 = *(unsigned int *) data;
- if (feature_1 & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)
-- l->l_mach.bti = true;
-+ _dl_bti_protect (l, fd);
-
- /* Stop if we processed the property note. */
- return 0;
-diff --git a/sysdeps/aarch64/linkmap.h b/sysdeps/aarch64/linkmap.h
-index 847a03ace2..b3f7663b07 100644
---- a/sysdeps/aarch64/linkmap.h
-+++ b/sysdeps/aarch64/linkmap.h
-@@ -22,5 +22,5 @@ struct link_map_machine
- {
- ElfW(Addr) plt; /* Address of .plt */
- void *tlsdesc_table; /* Address of TLS descriptor hash table. */
-- bool bti; /* Branch Target Identification is enabled. */
-+ bool bti_fail; /* Failed to enable Branch Target Identification. */
- };
-diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
-index 7cf5f033e8..799d60c98c 100644
---- a/sysdeps/aarch64/multiarch/memcpy.c
-+++ b/sysdeps/aarch64/multiarch/memcpy.c
-@@ -41,7 +41,8 @@ libc_ifunc (__libc_memcpy,
- ? __memcpy_falkor
- : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)
- ? __memcpy_thunderx2
-- : (IS_NEOVERSE_N1 (midr)
-+ : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr)
-+ || IS_NEOVERSE_V1 (midr)
- ? __memcpy_simd
- : __memcpy_generic)))));
-
-diff --git a/sysdeps/aarch64/multiarch/memcpy_advsimd.S b/sysdeps/aarch64/multiarch/memcpy_advsimd.S
-index d4ba747777..48bb6d7ca4 100644
---- a/sysdeps/aarch64/multiarch/memcpy_advsimd.S
-+++ b/sysdeps/aarch64/multiarch/memcpy_advsimd.S
-@@ -223,12 +223,13 @@ L(copy_long_backwards):
- b.ls L(copy64_from_start)
-
- L(loop64_backwards):
-- stp A_q, B_q, [dstend, -32]
-+ str B_q, [dstend, -16]
-+ str A_q, [dstend, -32]
- ldp A_q, B_q, [srcend, -96]
-- stp C_q, D_q, [dstend, -64]
-+ str D_q, [dstend, -48]
-+ str C_q, [dstend, -64]!
- ldp C_q, D_q, [srcend, -128]
- sub srcend, srcend, 64
-- sub dstend, dstend, 64
- subs count, count, 64
- b.hi L(loop64_backwards)
-
-diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
-index ad10aa8ac6..46a4cb3a54 100644
---- a/sysdeps/aarch64/multiarch/memmove.c
-+++ b/sysdeps/aarch64/multiarch/memmove.c
-@@ -41,7 +41,8 @@ libc_ifunc (__libc_memmove,
- ? __memmove_falkor
- : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)
- ? __memmove_thunderx2
-- : (IS_NEOVERSE_N1 (midr)
-+ : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr)
-+ || IS_NEOVERSE_V1 (midr)
- ? __memmove_simd
- : __memmove_generic)))));
-
-diff --git a/sysdeps/aarch64/start.S b/sysdeps/aarch64/start.S
-index 75393e1c18..1998ea95d4 100644
---- a/sysdeps/aarch64/start.S
-+++ b/sysdeps/aarch64/start.S
-@@ -43,11 +43,9 @@
- */
-
- .text
-- .globl _start
-- .type _start,#function
--_start:
-- BTI_C
-+ENTRY(_start)
- /* Create an initial frame with 0 LR and FP */
-+ cfi_undefined (x30)
- mov x29, #0
- mov x30, #0
-
-@@ -101,8 +99,10 @@ _start:
- because crt1.o and rcrt1.o share code and the later must avoid the
- use of GOT relocations before __libc_start_main is called. */
- __wrap_main:
-+ BTI_C
- b main
- #endif
-+END(_start)
-
- /* Define a symbol for the first piece of initialized data. */
- .data
-diff --git a/sysdeps/generic/dl-prop.h b/sysdeps/generic/dl-prop.h
-index f1cf576fe3..df27ff8e6a 100644
---- a/sysdeps/generic/dl-prop.h
-+++ b/sysdeps/generic/dl-prop.h
-@@ -37,15 +37,15 @@ _dl_open_check (struct link_map *m)
- }
-
- static inline void __attribute__ ((always_inline))
--_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph)
-+_dl_process_pt_note (struct link_map *l, int fd, const ElfW(Phdr) *ph)
- {
- }
-
- /* Called for each property in the NT_GNU_PROPERTY_TYPE_0 note of L,
- processing of the properties continues until this returns 0. */
- static inline int __attribute__ ((always_inline))
--_dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz,
-- void *data)
-+_dl_process_gnu_property (struct link_map *l, int fd, uint32_t type,
-+ uint32_t datasz, void *data)
- {
- return 0;
- }
-diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
-index ba114ab4b1..62ac40d81b 100644
---- a/sysdeps/generic/ldsodefs.h
-+++ b/sysdeps/generic/ldsodefs.h
-@@ -919,8 +919,9 @@ extern void _dl_rtld_di_serinfo (struct link_map *loader,
- Dl_serinfo *si, bool counting);
-
- /* Process PT_GNU_PROPERTY program header PH in module L after
-- PT_LOAD segments are mapped. */
--void _dl_process_pt_gnu_property (struct link_map *l, const ElfW(Phdr) *ph);
-+ PT_LOAD segments are mapped from file FD. */
-+void _dl_process_pt_gnu_property (struct link_map *l, int fd,
-+ const ElfW(Phdr) *ph);
-
-
- /* Search loaded objects' symbol tables for a definition of the symbol
-diff --git a/sysdeps/generic/unwind.h b/sysdeps/generic/unwind.h
-index b667a5b652..c229603af3 100644
---- a/sysdeps/generic/unwind.h
-+++ b/sysdeps/generic/unwind.h
-@@ -75,15 +75,21 @@ typedef void (*_Unwind_Exception_Cleanup_Fn) (_Unwind_Reason_Code,
-
- struct _Unwind_Exception
- {
-- _Unwind_Exception_Class exception_class;
-- _Unwind_Exception_Cleanup_Fn exception_cleanup;
-- _Unwind_Word private_1;
-- _Unwind_Word private_2;
--
-- /* @@@ The IA-64 ABI says that this structure must be double-word aligned.
-- Taking that literally does not make much sense generically. Instead we
-- provide the maximum alignment required by any type for the machine. */
--} __attribute__((__aligned__));
-+ union
-+ {
-+ struct
-+ {
-+ _Unwind_Exception_Class exception_class;
-+ _Unwind_Exception_Cleanup_Fn exception_cleanup;
-+ _Unwind_Word private_1;
-+ _Unwind_Word private_2;
-+ };
-+
-+ /* The IA-64 ABI says that this structure must be double-word aligned. */
-+ _Unwind_Word unwind_exception_align[2]
-+ __attribute__ ((__aligned__ (2 * sizeof (_Unwind_Word))));
-+ };
-+};
-
-
- /* The ACTIONS argument to the personality routine is a bitwise OR of one
-diff --git a/sysdeps/gnu/errlist.h b/sysdeps/gnu/errlist.h
-index 5d11ed723d..6329e5f393 100644
---- a/sysdeps/gnu/errlist.h
-+++ b/sysdeps/gnu/errlist.h
-@@ -1,24 +1,21 @@
--#ifndef ERR_MAP
--#define ERR_MAP(value) value
--#endif
--_S(ERR_MAP(0), N_("Success"))
-+_S(0, N_("Success"))
- #ifdef EPERM
- /*
- TRANS Only the owner of the file (or other resource)
- TRANS or processes with special privileges can perform the operation. */
--_S(ERR_MAP(EPERM), N_("Operation not permitted"))
-+_S(EPERM, N_("Operation not permitted"))
- #endif
- #ifdef ENOENT
- /*
- TRANS This is a ``file doesn't exist'' error
- TRANS for ordinary files that are referenced in contexts where they are
- TRANS expected to already exist. */
--_S(ERR_MAP(ENOENT), N_("No such file or directory"))
-+_S(ENOENT, N_("No such file or directory"))
- #endif
- #ifdef ESRCH
- /*
- TRANS No process matches the specified process ID. */
--_S(ERR_MAP(ESRCH), N_("No such process"))
-+_S(ESRCH, N_("No such process"))
- #endif
- #ifdef EINTR
- /*
-@@ -29,12 +26,12 @@ TRANS
- TRANS You can choose to have functions resume after a signal that is handled,
- TRANS rather than failing with @code{EINTR}; see @ref{Interrupted
- TRANS Primitives}. */
--_S(ERR_MAP(EINTR), N_("Interrupted system call"))
-+_S(EINTR, N_("Interrupted system call"))
- #endif
- #ifdef EIO
- /*
- TRANS Usually used for physical read or write errors. */
--_S(ERR_MAP(EIO), N_("Input/output error"))
-+_S(EIO, N_("Input/output error"))
- #endif
- #ifdef ENXIO
- /*
-@@ -43,7 +40,7 @@ TRANS represented by a file you specified, and it couldn't find the device.
- TRANS This can mean that the device file was installed incorrectly, or that
- TRANS the physical device is missing or not correctly attached to the
- TRANS computer. */
--_S(ERR_MAP(ENXIO), N_("No such device or address"))
-+_S(ENXIO, N_("No such device or address"))
- #endif
- #ifdef E2BIG
- /*
-@@ -51,27 +48,27 @@ TRANS Used when the arguments passed to a new program
- TRANS being executed with one of the @code{exec} functions (@pxref{Executing a
- TRANS File}) occupy too much memory space. This condition never arises on
- TRANS @gnuhurdsystems{}. */
--_S(ERR_MAP(E2BIG), N_("Argument list too long"))
-+_S(E2BIG, N_("Argument list too long"))
- #endif
- #ifdef ENOEXEC
- /*
- TRANS Invalid executable file format. This condition is detected by the
- TRANS @code{exec} functions; see @ref{Executing a File}. */
--_S(ERR_MAP(ENOEXEC), N_("Exec format error"))
-+_S(ENOEXEC, N_("Exec format error"))
- #endif
- #ifdef EBADF
- /*
- TRANS For example, I/O on a descriptor that has been
- TRANS closed or reading from a descriptor open only for writing (or vice
- TRANS versa). */
--_S(ERR_MAP(EBADF), N_("Bad file descriptor"))
-+_S(EBADF, N_("Bad file descriptor"))
- #endif
- #ifdef ECHILD
- /*
- TRANS This error happens on operations that are
- TRANS supposed to manipulate child processes, when there aren't any processes
- TRANS to manipulate. */
--_S(ERR_MAP(ECHILD), N_("No child processes"))
-+_S(ECHILD, N_("No child processes"))
- #endif
- #ifdef EDEADLK
- /*
-@@ -79,74 +76,74 @@ TRANS Allocating a system resource would have resulted in a
- TRANS deadlock situation. The system does not guarantee that it will notice
- TRANS all such situations. This error means you got lucky and the system
- TRANS noticed; it might just hang. @xref{File Locks}, for an example. */
--_S(ERR_MAP(EDEADLK), N_("Resource deadlock avoided"))
-+_S(EDEADLK, N_("Resource deadlock avoided"))
- #endif
- #ifdef ENOMEM
- /*
- TRANS The system cannot allocate more virtual memory
- TRANS because its capacity is full. */
--_S(ERR_MAP(ENOMEM), N_("Cannot allocate memory"))
-+_S(ENOMEM, N_("Cannot allocate memory"))
- #endif
- #ifdef EACCES
- /*
- TRANS The file permissions do not allow the attempted operation. */
--_S(ERR_MAP(EACCES), N_("Permission denied"))
-+_S(EACCES, N_("Permission denied"))
- #endif
- #ifdef EFAULT
- /*
- TRANS An invalid pointer was detected.
- TRANS On @gnuhurdsystems{}, this error never happens; you get a signal instead. */
--_S(ERR_MAP(EFAULT), N_("Bad address"))
-+_S(EFAULT, N_("Bad address"))
- #endif
- #ifdef ENOTBLK
- /*
- TRANS A file that isn't a block special file was given in a situation that
- TRANS requires one. For example, trying to mount an ordinary file as a file
- TRANS system in Unix gives this error. */
--_S(ERR_MAP(ENOTBLK), N_("Block device required"))
-+_S(ENOTBLK, N_("Block device required"))
- #endif
- #ifdef EBUSY
- /*
- TRANS A system resource that can't be shared is already in use.
- TRANS For example, if you try to delete a file that is the root of a currently
- TRANS mounted filesystem, you get this error. */
--_S(ERR_MAP(EBUSY), N_("Device or resource busy"))
-+_S(EBUSY, N_("Device or resource busy"))
- #endif
- #ifdef EEXIST
- /*
- TRANS An existing file was specified in a context where it only
- TRANS makes sense to specify a new file. */
--_S(ERR_MAP(EEXIST), N_("File exists"))
-+_S(EEXIST, N_("File exists"))
- #endif
- #ifdef EXDEV
- /*
- TRANS An attempt to make an improper link across file systems was detected.
- TRANS This happens not only when you use @code{link} (@pxref{Hard Links}) but
- TRANS also when you rename a file with @code{rename} (@pxref{Renaming Files}). */
--_S(ERR_MAP(EXDEV), N_("Invalid cross-device link"))
-+_S(EXDEV, N_("Invalid cross-device link"))
- #endif
- #ifdef ENODEV
- /*
- TRANS The wrong type of device was given to a function that expects a
- TRANS particular sort of device. */
--_S(ERR_MAP(ENODEV), N_("No such device"))
-+_S(ENODEV, N_("No such device"))
- #endif
- #ifdef ENOTDIR
- /*
- TRANS A file that isn't a directory was specified when a directory is required. */
--_S(ERR_MAP(ENOTDIR), N_("Not a directory"))
-+_S(ENOTDIR, N_("Not a directory"))
- #endif
- #ifdef EISDIR
- /*
- TRANS You cannot open a directory for writing,
- TRANS or create or remove hard links to it. */
--_S(ERR_MAP(EISDIR), N_("Is a directory"))
-+_S(EISDIR, N_("Is a directory"))
- #endif
- #ifdef EINVAL
- /*
- TRANS This is used to indicate various kinds of problems
- TRANS with passing the wrong argument to a library function. */
--_S(ERR_MAP(EINVAL), N_("Invalid argument"))
-+_S(EINVAL, N_("Invalid argument"))
- #endif
- #ifdef EMFILE
- /*
-@@ -157,20 +154,20 @@ TRANS In BSD and GNU, the number of open files is controlled by a resource
- TRANS limit that can usually be increased. If you get this error, you might
- TRANS want to increase the @code{RLIMIT_NOFILE} limit or make it unlimited;
- TRANS @pxref{Limits on Resources}. */
--_S(ERR_MAP(EMFILE), N_("Too many open files"))
-+_S(EMFILE, N_("Too many open files"))
- #endif
- #ifdef ENFILE
- /*
- TRANS There are too many distinct file openings in the entire system. Note
- TRANS that any number of linked channels count as just one file opening; see
- TRANS @ref{Linked Channels}. This error never occurs on @gnuhurdsystems{}. */
--_S(ERR_MAP(ENFILE), N_("Too many open files in system"))
-+_S(ENFILE, N_("Too many open files in system"))
- #endif
- #ifdef ENOTTY
- /*
- TRANS Inappropriate I/O control operation, such as trying to set terminal
- TRANS modes on an ordinary file. */
--_S(ERR_MAP(ENOTTY), N_("Inappropriate ioctl for device"))
-+_S(ENOTTY, N_("Inappropriate ioctl for device"))
- #endif
- #ifdef ETXTBSY
- /*
-@@ -179,35 +176,35 @@ TRANS write to a file that is currently being executed. Often using a
- TRANS debugger to run a program is considered having it open for writing and
- TRANS will cause this error. (The name stands for ``text file busy''.) This
- TRANS is not an error on @gnuhurdsystems{}; the text is copied as necessary. */
--_S(ERR_MAP(ETXTBSY), N_("Text file busy"))
-+_S(ETXTBSY, N_("Text file busy"))
- #endif
- #ifdef EFBIG
- /*
- TRANS The size of a file would be larger than allowed by the system. */
--_S(ERR_MAP(EFBIG), N_("File too large"))
-+_S(EFBIG, N_("File too large"))
- #endif
- #ifdef ENOSPC
- /*
- TRANS Write operation on a file failed because the
- TRANS disk is full. */
--_S(ERR_MAP(ENOSPC), N_("No space left on device"))
-+_S(ENOSPC, N_("No space left on device"))
- #endif
- #ifdef ESPIPE
- /*
- TRANS Invalid seek operation (such as on a pipe). */
--_S(ERR_MAP(ESPIPE), N_("Illegal seek"))
-+_S(ESPIPE, N_("Illegal seek"))
- #endif
- #ifdef EROFS
- /*
- TRANS An attempt was made to modify something on a read-only file system. */
--_S(ERR_MAP(EROFS), N_("Read-only file system"))
-+_S(EROFS, N_("Read-only file system"))
- #endif
- #ifdef EMLINK
- /*
- TRANS The link count of a single file would become too large.
- TRANS @code{rename} can cause this error if the file being renamed already has
- TRANS as many links as it can take (@pxref{Renaming Files}). */
--_S(ERR_MAP(EMLINK), N_("Too many links"))
-+_S(EMLINK, N_("Too many links"))
- #endif
- #ifdef EPIPE
- /*
-@@ -216,19 +213,19 @@ TRANS Every library function that returns this error code also generates a
- TRANS @code{SIGPIPE} signal; this signal terminates the program if not handled
- TRANS or blocked. Thus, your program will never actually see @code{EPIPE}
- TRANS unless it has handled or blocked @code{SIGPIPE}. */
--_S(ERR_MAP(EPIPE), N_("Broken pipe"))
-+_S(EPIPE, N_("Broken pipe"))
- #endif
- #ifdef EDOM
- /*
- TRANS Used by mathematical functions when an argument value does
- TRANS not fall into the domain over which the function is defined. */
--_S(ERR_MAP(EDOM), N_("Numerical argument out of domain"))
-+_S(EDOM, N_("Numerical argument out of domain"))
- #endif
- #ifdef ERANGE
- /*
- TRANS Used by mathematical functions when the result value is
- TRANS not representable because of overflow or underflow. */
--_S(ERR_MAP(ERANGE), N_("Numerical result out of range"))
-+_S(ERANGE, N_("Numerical result out of range"))
- #endif
- #ifdef EAGAIN
- /*
-@@ -261,7 +258,7 @@ TRANS Such shortages are usually fairly serious and affect the whole system,
- TRANS so usually an interactive program should report the error to the user
- TRANS and return to its command loop.
- TRANS @end itemize */
--_S(ERR_MAP(EAGAIN), N_("Resource temporarily unavailable"))
-+_S(EAGAIN, N_("Resource temporarily unavailable"))
- #endif
- #ifdef EINPROGRESS
- /*
-@@ -273,47 +270,47 @@ TRANS the operation has begun and will take some time. Attempts to manipulate
- TRANS the object before the call completes return @code{EALREADY}. You can
- TRANS use the @code{select} function to find out when the pending operation
- TRANS has completed; @pxref{Waiting for I/O}. */
--_S(ERR_MAP(EINPROGRESS), N_("Operation now in progress"))
-+_S(EINPROGRESS, N_("Operation now in progress"))
- #endif
- #ifdef EALREADY
- /*
- TRANS An operation is already in progress on an object that has non-blocking
- TRANS mode selected. */
--_S(ERR_MAP(EALREADY), N_("Operation already in progress"))
-+_S(EALREADY, N_("Operation already in progress"))
- #endif
- #ifdef ENOTSOCK
- /*
- TRANS A file that isn't a socket was specified when a socket is required. */
--_S(ERR_MAP(ENOTSOCK), N_("Socket operation on non-socket"))
-+_S(ENOTSOCK, N_("Socket operation on non-socket"))
- #endif
- #ifdef EMSGSIZE
- /*
- TRANS The size of a message sent on a socket was larger than the supported
- TRANS maximum size. */
--_S(ERR_MAP(EMSGSIZE), N_("Message too long"))
-+_S(EMSGSIZE, N_("Message too long"))
- #endif
- #ifdef EPROTOTYPE
- /*
- TRANS The socket type does not support the requested communications protocol. */
--_S(ERR_MAP(EPROTOTYPE), N_("Protocol wrong type for socket"))
-+_S(EPROTOTYPE, N_("Protocol wrong type for socket"))
- #endif
- #ifdef ENOPROTOOPT
- /*
- TRANS You specified a socket option that doesn't make sense for the
- TRANS particular protocol being used by the socket. @xref{Socket Options}. */
--_S(ERR_MAP(ENOPROTOOPT), N_("Protocol not available"))
-+_S(ENOPROTOOPT, N_("Protocol not available"))
- #endif
- #ifdef EPROTONOSUPPORT
- /*
- TRANS The socket domain does not support the requested communications protocol
- TRANS (perhaps because the requested protocol is completely invalid).
- TRANS @xref{Creating a Socket}. */
--_S(ERR_MAP(EPROTONOSUPPORT), N_("Protocol not supported"))
-+_S(EPROTONOSUPPORT, N_("Protocol not supported"))
- #endif
- #ifdef ESOCKTNOSUPPORT
- /*
- TRANS The socket type is not supported. */
--_S(ERR_MAP(ESOCKTNOSUPPORT), N_("Socket type not supported"))
-+_S(ESOCKTNOSUPPORT, N_("Socket type not supported"))
- #endif
- #ifdef EOPNOTSUPP
- /*
-@@ -323,71 +320,71 @@ TRANS implemented for all communications protocols. On @gnuhurdsystems{}, this
- TRANS error can happen for many calls when the object does not support the
- TRANS particular operation; it is a generic indication that the server knows
- TRANS nothing to do for that call. */
--_S(ERR_MAP(EOPNOTSUPP), N_("Operation not supported"))
-+_S(EOPNOTSUPP, N_("Operation not supported"))
- #endif
- #ifdef EPFNOSUPPORT
- /*
- TRANS The socket communications protocol family you requested is not supported. */
--_S(ERR_MAP(EPFNOSUPPORT), N_("Protocol family not supported"))
-+_S(EPFNOSUPPORT, N_("Protocol family not supported"))
- #endif
- #ifdef EAFNOSUPPORT
- /*
- TRANS The address family specified for a socket is not supported; it is
- TRANS inconsistent with the protocol being used on the socket. @xref{Sockets}. */
--_S(ERR_MAP(EAFNOSUPPORT), N_("Address family not supported by protocol"))
-+_S(EAFNOSUPPORT, N_("Address family not supported by protocol"))
- #endif
- #ifdef EADDRINUSE
- /*
- TRANS The requested socket address is already in use. @xref{Socket Addresses}. */
--_S(ERR_MAP(EADDRINUSE), N_("Address already in use"))
-+_S(EADDRINUSE, N_("Address already in use"))
- #endif
- #ifdef EADDRNOTAVAIL
- /*
- TRANS The requested socket address is not available; for example, you tried
- TRANS to give a socket a name that doesn't match the local host name.
- TRANS @xref{Socket Addresses}. */
--_S(ERR_MAP(EADDRNOTAVAIL), N_("Cannot assign requested address"))
-+_S(EADDRNOTAVAIL, N_("Cannot assign requested address"))
- #endif
- #ifdef ENETDOWN
- /*
- TRANS A socket operation failed because the network was down. */
--_S(ERR_MAP(ENETDOWN), N_("Network is down"))
-+_S(ENETDOWN, N_("Network is down"))
- #endif
- #ifdef ENETUNREACH
- /*
- TRANS A socket operation failed because the subnet containing the remote host
- TRANS was unreachable. */
--_S(ERR_MAP(ENETUNREACH), N_("Network is unreachable"))
-+_S(ENETUNREACH, N_("Network is unreachable"))
- #endif
- #ifdef ENETRESET
- /*
- TRANS A network connection was reset because the remote host crashed. */
--_S(ERR_MAP(ENETRESET), N_("Network dropped connection on reset"))
-+_S(ENETRESET, N_("Network dropped connection on reset"))
- #endif
- #ifdef ECONNABORTED
- /*
- TRANS A network connection was aborted locally. */
--_S(ERR_MAP(ECONNABORTED), N_("Software caused connection abort"))
-+_S(ECONNABORTED, N_("Software caused connection abort"))
- #endif
- #ifdef ECONNRESET
- /*
- TRANS A network connection was closed for reasons outside the control of the
- TRANS local host, such as by the remote machine rebooting or an unrecoverable
- TRANS protocol violation. */
--_S(ERR_MAP(ECONNRESET), N_("Connection reset by peer"))
-+_S(ECONNRESET, N_("Connection reset by peer"))
- #endif
- #ifdef ENOBUFS
- /*
- TRANS The kernel's buffers for I/O operations are all in use. In GNU, this
- TRANS error is always synonymous with @code{ENOMEM}; you may get one or the
- TRANS other from network operations. */
--_S(ERR_MAP(ENOBUFS), N_("No buffer space available"))
-+_S(ENOBUFS, N_("No buffer space available"))
- #endif
- #ifdef EISCONN
- /*
- TRANS You tried to connect a socket that is already connected.
- TRANS @xref{Connecting}. */
--_S(ERR_MAP(EISCONN), N_("Transport endpoint is already connected"))
-+_S(EISCONN, N_("Transport endpoint is already connected"))
- #endif
- #ifdef ENOTCONN
- /*
-@@ -395,74 +392,74 @@ TRANS The socket is not connected to anything. You get this error when you
- TRANS try to transmit data over a socket, without first specifying a
- TRANS destination for the data. For a connectionless socket (for datagram
- TRANS protocols, such as UDP), you get @code{EDESTADDRREQ} instead. */
--_S(ERR_MAP(ENOTCONN), N_("Transport endpoint is not connected"))
-+_S(ENOTCONN, N_("Transport endpoint is not connected"))
- #endif
- #ifdef EDESTADDRREQ
- /*
- TRANS No default destination address was set for the socket. You get this
- TRANS error when you try to transmit data over a connectionless socket,
- TRANS without first specifying a destination for the data with @code{connect}. */
--_S(ERR_MAP(EDESTADDRREQ), N_("Destination address required"))
-+_S(EDESTADDRREQ, N_("Destination address required"))
- #endif
- #ifdef ESHUTDOWN
- /*
- TRANS The socket has already been shut down. */
--_S(ERR_MAP(ESHUTDOWN), N_("Cannot send after transport endpoint shutdown"))
-+_S(ESHUTDOWN, N_("Cannot send after transport endpoint shutdown"))
- #endif
- #ifdef ETOOMANYREFS
--_S(ERR_MAP(ETOOMANYREFS), N_("Too many references: cannot splice"))
-+_S(ETOOMANYREFS, N_("Too many references: cannot splice"))
- #endif
- #ifdef ETIMEDOUT
- /*
- TRANS A socket operation with a specified timeout received no response during
- TRANS the timeout period. */
--_S(ERR_MAP(ETIMEDOUT), N_("Connection timed out"))
-+_S(ETIMEDOUT, N_("Connection timed out"))
- #endif
- #ifdef ECONNREFUSED
- /*
- TRANS A remote host refused to allow the network connection (typically because
- TRANS it is not running the requested service). */
--_S(ERR_MAP(ECONNREFUSED), N_("Connection refused"))
-+_S(ECONNREFUSED, N_("Connection refused"))
- #endif
- #ifdef ELOOP
- /*
- TRANS Too many levels of symbolic links were encountered in looking up a file name.
- TRANS This often indicates a cycle of symbolic links. */
--_S(ERR_MAP(ELOOP), N_("Too many levels of symbolic links"))
-+_S(ELOOP, N_("Too many levels of symbolic links"))
- #endif
- #ifdef ENAMETOOLONG
- /*
- TRANS Filename too long (longer than @code{PATH_MAX}; @pxref{Limits for
- TRANS Files}) or host name too long (in @code{gethostname} or
- TRANS @code{sethostname}; @pxref{Host Identification}). */
--_S(ERR_MAP(ENAMETOOLONG), N_("File name too long"))
-+_S(ENAMETOOLONG, N_("File name too long"))
- #endif
- #ifdef EHOSTDOWN
- /*
- TRANS The remote host for a requested network connection is down. */
--_S(ERR_MAP(EHOSTDOWN), N_("Host is down"))
-+_S(EHOSTDOWN, N_("Host is down"))
- #endif
- /*
- TRANS The remote host for a requested network connection is not reachable. */
- #ifdef EHOSTUNREACH
--_S(ERR_MAP(EHOSTUNREACH), N_("No route to host"))
-+_S(EHOSTUNREACH, N_("No route to host"))
- #endif
- #ifdef ENOTEMPTY
- /*
- TRANS Directory not empty, where an empty directory was expected. Typically,
- TRANS this error occurs when you are trying to delete a directory. */
--_S(ERR_MAP(ENOTEMPTY), N_("Directory not empty"))
-+_S(ENOTEMPTY, N_("Directory not empty"))
- #endif
- #ifdef EUSERS
- /*
- TRANS The file quota system is confused because there are too many users.
- TRANS @c This can probably happen in a GNU system when using NFS. */
--_S(ERR_MAP(EUSERS), N_("Too many users"))
-+_S(EUSERS, N_("Too many users"))
- #endif
- #ifdef EDQUOT
- /*
- TRANS The user's disk quota was exceeded. */
--_S(ERR_MAP(EDQUOT), N_("Disk quota exceeded"))
-+_S(EDQUOT, N_("Disk quota exceeded"))
- #endif
- #ifdef ESTALE
- /*
-@@ -471,7 +468,7 @@ TRANS file system which is due to file system rearrangements on the server host
- TRANS for NFS file systems or corruption in other file systems.
- TRANS Repairing this condition usually requires unmounting, possibly repairing
- TRANS and remounting the file system. */
--_S(ERR_MAP(ESTALE), N_("Stale file handle"))
-+_S(ESTALE, N_("Stale file handle"))
- #endif
- #ifdef EREMOTE
- /*
-@@ -479,7 +476,7 @@ TRANS An attempt was made to NFS-mount a remote file system with a file name tha
- TRANS already specifies an NFS-mounted file.
- TRANS (This is an error on some operating systems, but we expect it to work
- TRANS properly on @gnuhurdsystems{}, making this error code impossible.) */
--_S(ERR_MAP(EREMOTE), N_("Object is remote"))
-+_S(EREMOTE, N_("Object is remote"))
- #endif
- #ifdef ENOLCK
- /*
-@@ -487,7 +484,7 @@ TRANS This is used by the file locking facilities; see
- TRANS @ref{File Locks}. This error is never generated by @gnuhurdsystems{}, but
- TRANS it can result from an operation to an NFS server running another
- TRANS operating system. */
--_S(ERR_MAP(ENOLCK), N_("No locks available"))
-+_S(ENOLCK, N_("No locks available"))
- #endif
- #ifdef ENOSYS
- /*
-@@ -496,46 +493,46 @@ TRANS not implemented at all, either in the C library itself or in the
- TRANS operating system. When you get this error, you can be sure that this
- TRANS particular function will always fail with @code{ENOSYS} unless you
- TRANS install a new version of the C library or the operating system. */
--_S(ERR_MAP(ENOSYS), N_("Function not implemented"))
-+_S(ENOSYS, N_("Function not implemented"))
- #endif
- #ifdef EILSEQ
- /*
- TRANS While decoding a multibyte character the function came along an invalid
- TRANS or an incomplete sequence of bytes or the given wide character is invalid. */
--_S(ERR_MAP(EILSEQ), N_("Invalid or incomplete multibyte or wide character"))
-+_S(EILSEQ, N_("Invalid or incomplete multibyte or wide character"))
- #endif
- #ifdef EBADMSG
--_S(ERR_MAP(EBADMSG), N_("Bad message"))
-+_S(EBADMSG, N_("Bad message"))
- #endif
- #ifdef EIDRM
--_S(ERR_MAP(EIDRM), N_("Identifier removed"))
-+_S(EIDRM, N_("Identifier removed"))
- #endif
- #ifdef EMULTIHOP
--_S(ERR_MAP(EMULTIHOP), N_("Multihop attempted"))
-+_S(EMULTIHOP, N_("Multihop attempted"))
- #endif
- #ifdef ENODATA
--_S(ERR_MAP(ENODATA), N_("No data available"))
-+_S(ENODATA, N_("No data available"))
- #endif
- #ifdef ENOLINK
--_S(ERR_MAP(ENOLINK), N_("Link has been severed"))
-+_S(ENOLINK, N_("Link has been severed"))
- #endif
- #ifdef ENOMSG
--_S(ERR_MAP(ENOMSG), N_("No message of desired type"))
-+_S(ENOMSG, N_("No message of desired type"))
- #endif
- #ifdef ENOSR
--_S(ERR_MAP(ENOSR), N_("Out of streams resources"))
-+_S(ENOSR, N_("Out of streams resources"))
- #endif
- #ifdef ENOSTR
--_S(ERR_MAP(ENOSTR), N_("Device not a stream"))
-+_S(ENOSTR, N_("Device not a stream"))
- #endif
- #ifdef EOVERFLOW
--_S(ERR_MAP(EOVERFLOW), N_("Value too large for defined data type"))
-+_S(EOVERFLOW, N_("Value too large for defined data type"))
- #endif
- #ifdef EPROTO
--_S(ERR_MAP(EPROTO), N_("Protocol error"))
-+_S(EPROTO, N_("Protocol error"))
- #endif
- #ifdef ETIME
--_S(ERR_MAP(ETIME), N_("Timer expired"))
-+_S(ETIME, N_("Timer expired"))
- #endif
- #ifdef ECANCELED
- /*
-@@ -543,148 +540,148 @@ TRANS An asynchronous operation was canceled before it
- TRANS completed. @xref{Asynchronous I/O}. When you call @code{aio_cancel},
- TRANS the normal result is for the operations affected to complete with this
- TRANS error; @pxref{Cancel AIO Operations}. */
--_S(ERR_MAP(ECANCELED), N_("Operation canceled"))
-+_S(ECANCELED, N_("Operation canceled"))
- #endif
- #ifdef EOWNERDEAD
--_S(ERR_MAP(EOWNERDEAD), N_("Owner died"))
-+_S(EOWNERDEAD, N_("Owner died"))
- #endif
- #ifdef ENOTRECOVERABLE
--_S(ERR_MAP(ENOTRECOVERABLE), N_("State not recoverable"))
-+_S(ENOTRECOVERABLE, N_("State not recoverable"))
- #endif
- #ifdef ERESTART
--_S(ERR_MAP(ERESTART), N_("Interrupted system call should be restarted"))
-+_S(ERESTART, N_("Interrupted system call should be restarted"))
- #endif
- #ifdef ECHRNG
--_S(ERR_MAP(ECHRNG), N_("Channel number out of range"))
-+_S(ECHRNG, N_("Channel number out of range"))
- #endif
- #ifdef EL2NSYNC
--_S(ERR_MAP(EL2NSYNC), N_("Level 2 not synchronized"))
-+_S(EL2NSYNC, N_("Level 2 not synchronized"))
- #endif
- #ifdef EL3HLT
--_S(ERR_MAP(EL3HLT), N_("Level 3 halted"))
-+_S(EL3HLT, N_("Level 3 halted"))
- #endif
- #ifdef EL3RST
--_S(ERR_MAP(EL3RST), N_("Level 3 reset"))
-+_S(EL3RST, N_("Level 3 reset"))
- #endif
- #ifdef ELNRNG
--_S(ERR_MAP(ELNRNG), N_("Link number out of range"))
-+_S(ELNRNG, N_("Link number out of range"))
- #endif
- #ifdef EUNATCH
--_S(ERR_MAP(EUNATCH), N_("Protocol driver not attached"))
-+_S(EUNATCH, N_("Protocol driver not attached"))
- #endif
- #ifdef ENOCSI
--_S(ERR_MAP(ENOCSI), N_("No CSI structure available"))
-+_S(ENOCSI, N_("No CSI structure available"))
- #endif
- #ifdef EL2HLT
--_S(ERR_MAP(EL2HLT), N_("Level 2 halted"))
-+_S(EL2HLT, N_("Level 2 halted"))
- #endif
- #ifdef EBADE
--_S(ERR_MAP(EBADE), N_("Invalid exchange"))
-+_S(EBADE, N_("Invalid exchange"))
- #endif
- #ifdef EBADR
--_S(ERR_MAP(EBADR), N_("Invalid request descriptor"))
-+_S(EBADR, N_("Invalid request descriptor"))
- #endif
- #ifdef EXFULL
--_S(ERR_MAP(EXFULL), N_("Exchange full"))
-+_S(EXFULL, N_("Exchange full"))
- #endif
- #ifdef ENOANO
--_S(ERR_MAP(ENOANO), N_("No anode"))
-+_S(ENOANO, N_("No anode"))
- #endif
- #ifdef EBADRQC
--_S(ERR_MAP(EBADRQC), N_("Invalid request code"))
-+_S(EBADRQC, N_("Invalid request code"))
- #endif
- #ifdef EBADSLT
--_S(ERR_MAP(EBADSLT), N_("Invalid slot"))
-+_S(EBADSLT, N_("Invalid slot"))
- #endif
- #ifdef EBFONT
--_S(ERR_MAP(EBFONT), N_("Bad font file format"))
-+_S(EBFONT, N_("Bad font file format"))
- #endif
- #ifdef ENONET
--_S(ERR_MAP(ENONET), N_("Machine is not on the network"))
-+_S(ENONET, N_("Machine is not on the network"))
- #endif
- #ifdef ENOPKG
--_S(ERR_MAP(ENOPKG), N_("Package not installed"))
-+_S(ENOPKG, N_("Package not installed"))
- #endif
- #ifdef EADV
--_S(ERR_MAP(EADV), N_("Advertise error"))
-+_S(EADV, N_("Advertise error"))
- #endif
- #ifdef ESRMNT
--_S(ERR_MAP(ESRMNT), N_("Srmount error"))
-+_S(ESRMNT, N_("Srmount error"))
- #endif
- #ifdef ECOMM
--_S(ERR_MAP(ECOMM), N_("Communication error on send"))
-+_S(ECOMM, N_("Communication error on send"))
- #endif
- #ifdef EDOTDOT
--_S(ERR_MAP(EDOTDOT), N_("RFS specific error"))
-+_S(EDOTDOT, N_("RFS specific error"))
- #endif
- #ifdef ENOTUNIQ
--_S(ERR_MAP(ENOTUNIQ), N_("Name not unique on network"))
-+_S(ENOTUNIQ, N_("Name not unique on network"))
- #endif
- #ifdef EBADFD
--_S(ERR_MAP(EBADFD), N_("File descriptor in bad state"))
-+_S(EBADFD, N_("File descriptor in bad state"))
- #endif
- #ifdef EREMCHG
--_S(ERR_MAP(EREMCHG), N_("Remote address changed"))
-+_S(EREMCHG, N_("Remote address changed"))
- #endif
- #ifdef ELIBACC
--_S(ERR_MAP(ELIBACC), N_("Can not access a needed shared library"))
-+_S(ELIBACC, N_("Can not access a needed shared library"))
- #endif
- #ifdef ELIBBAD
--_S(ERR_MAP(ELIBBAD), N_("Accessing a corrupted shared library"))
-+_S(ELIBBAD, N_("Accessing a corrupted shared library"))
- #endif
- #ifdef ELIBSCN
--_S(ERR_MAP(ELIBSCN), N_(".lib section in a.out corrupted"))
-+_S(ELIBSCN, N_(".lib section in a.out corrupted"))
- #endif
- #ifdef ELIBMAX
--_S(ERR_MAP(ELIBMAX), N_("Attempting to link in too many shared libraries"))
-+_S(ELIBMAX, N_("Attempting to link in too many shared libraries"))
- #endif
- #ifdef ELIBEXEC
--_S(ERR_MAP(ELIBEXEC), N_("Cannot exec a shared library directly"))
-+_S(ELIBEXEC, N_("Cannot exec a shared library directly"))
- #endif
- #ifdef ESTRPIPE
--_S(ERR_MAP(ESTRPIPE), N_("Streams pipe error"))
-+_S(ESTRPIPE, N_("Streams pipe error"))
- #endif
- #ifdef EUCLEAN
--_S(ERR_MAP(EUCLEAN), N_("Structure needs cleaning"))
-+_S(EUCLEAN, N_("Structure needs cleaning"))
- #endif
- #ifdef ENOTNAM
--_S(ERR_MAP(ENOTNAM), N_("Not a XENIX named type file"))
-+_S(ENOTNAM, N_("Not a XENIX named type file"))
- #endif
- #ifdef ENAVAIL
--_S(ERR_MAP(ENAVAIL), N_("No XENIX semaphores available"))
-+_S(ENAVAIL, N_("No XENIX semaphores available"))
- #endif
- #ifdef EISNAM
--_S(ERR_MAP(EISNAM), N_("Is a named type file"))
-+_S(EISNAM, N_("Is a named type file"))
- #endif
- #ifdef EREMOTEIO
--_S(ERR_MAP(EREMOTEIO), N_("Remote I/O error"))
-+_S(EREMOTEIO, N_("Remote I/O error"))
- #endif
- #ifdef ENOMEDIUM
--_S(ERR_MAP(ENOMEDIUM), N_("No medium found"))
-+_S(ENOMEDIUM, N_("No medium found"))
- #endif
- #ifdef EMEDIUMTYPE
--_S(ERR_MAP(EMEDIUMTYPE), N_("Wrong medium type"))
-+_S(EMEDIUMTYPE, N_("Wrong medium type"))
- #endif
- #ifdef ENOKEY
--_S(ERR_MAP(ENOKEY), N_("Required key not available"))
-+_S(ENOKEY, N_("Required key not available"))
- #endif
- #ifdef EKEYEXPIRED
--_S(ERR_MAP(EKEYEXPIRED), N_("Key has expired"))
-+_S(EKEYEXPIRED, N_("Key has expired"))
- #endif
- #ifdef EKEYREVOKED
--_S(ERR_MAP(EKEYREVOKED), N_("Key has been revoked"))
-+_S(EKEYREVOKED, N_("Key has been revoked"))
- #endif
- #ifdef EKEYREJECTED
--_S(ERR_MAP(EKEYREJECTED), N_("Key was rejected by service"))
-+_S(EKEYREJECTED, N_("Key was rejected by service"))
- #endif
- #ifdef ERFKILL
--_S(ERR_MAP(ERFKILL), N_("Operation not possible due to RF-kill"))
-+_S(ERFKILL, N_("Operation not possible due to RF-kill"))
- #endif
- #ifdef EHWPOISON
--_S(ERR_MAP(EHWPOISON), N_("Memory page has hardware error"))
-+_S(EHWPOISON, N_("Memory page has hardware error"))
- #endif
- #ifdef EBADRPC
--_S(ERR_MAP(EBADRPC), N_("RPC struct is bad"))
-+_S(EBADRPC, N_("RPC struct is bad"))
- #endif
- #ifdef EFTYPE
- /*
-@@ -693,40 +690,40 @@ TRANS operation, or a data file had the wrong format.
- TRANS
- TRANS On some systems @code{chmod} returns this error if you try to set the
- TRANS sticky bit on a non-directory file; @pxref{Setting Permissions}. */
--_S(ERR_MAP(EFTYPE), N_("Inappropriate file type or format"))
-+_S(EFTYPE, N_("Inappropriate file type or format"))
- #endif
- #ifdef EPROCUNAVAIL
--_S(ERR_MAP(EPROCUNAVAIL), N_("RPC bad procedure for program"))
-+_S(EPROCUNAVAIL, N_("RPC bad procedure for program"))
- #endif
- #ifdef EAUTH
--_S(ERR_MAP(EAUTH), N_("Authentication error"))
-+_S(EAUTH, N_("Authentication error"))
- #endif
- #ifdef EDIED
- /*
- TRANS On @gnuhurdsystems{}, opening a file returns this error when the file is
- TRANS translated by a program and the translator program dies while starting
- TRANS up, before it has connected to the file. */
--_S(ERR_MAP(EDIED), N_("Translator died"))
-+_S(EDIED, N_("Translator died"))
- #endif
- #ifdef ERPCMISMATCH
--_S(ERR_MAP(ERPCMISMATCH), N_("RPC version wrong"))
-+_S(ERPCMISMATCH, N_("RPC version wrong"))
- #endif
- #ifdef EGREGIOUS
- /*
- TRANS You did @strong{what}? */
--_S(ERR_MAP(EGREGIOUS), N_("You really blew it this time"))
-+_S(EGREGIOUS, N_("You really blew it this time"))
- #endif
- #ifdef EPROCLIM
- /*
- TRANS This means that the per-user limit on new process would be exceeded by
- TRANS an attempted @code{fork}. @xref{Limits on Resources}, for details on
- TRANS the @code{RLIMIT_NPROC} limit. */
--_S(ERR_MAP(EPROCLIM), N_("Too many processes"))
-+_S(EPROCLIM, N_("Too many processes"))
- #endif
- #ifdef EGRATUITOUS
- /*
- TRANS This error code has no purpose. */
--_S(ERR_MAP(EGRATUITOUS), N_("Gratuitous error"))
-+_S(EGRATUITOUS, N_("Gratuitous error"))
- #endif
- #if defined (ENOTSUP) && ENOTSUP != EOPNOTSUPP
- /*
-@@ -742,10 +739,10 @@ TRANS values.
- TRANS
- TRANS If the entire function is not available at all in the implementation,
- TRANS it returns @code{ENOSYS} instead. */
--_S(ERR_MAP(ENOTSUP), N_("Not supported"))
-+_S(ENOTSUP, N_("Not supported"))
- #endif
- #ifdef EPROGMISMATCH
--_S(ERR_MAP(EPROGMISMATCH), N_("RPC program version wrong"))
-+_S(EPROGMISMATCH, N_("RPC program version wrong"))
- #endif
- #ifdef EBACKGROUND
- /*
-@@ -755,7 +752,7 @@ TRANS foreground process group of the terminal. Users do not usually see this
- TRANS error because functions such as @code{read} and @code{write} translate
- TRANS it into a @code{SIGTTIN} or @code{SIGTTOU} signal. @xref{Job Control},
- TRANS for information on process groups and these signals. */
--_S(ERR_MAP(EBACKGROUND), N_("Inappropriate operation for background process"))
-+_S(EBACKGROUND, N_("Inappropriate operation for background process"))
- #endif
- #ifdef EIEIO
- /*
-@@ -773,7 +770,7 @@ TRANS @c "bought the farm" means "died". -jtobey
- TRANS @c
- TRANS @c Translators, please do not translate this litteraly, translate it into
- TRANS @c an idiomatic funny way of saying that the computer died. */
--_S(ERR_MAP(EIEIO), N_("Computer bought the farm"))
-+_S(EIEIO, N_("Computer bought the farm"))
- #endif
- #if defined (EWOULDBLOCK) && EWOULDBLOCK != EAGAIN
- /*
-@@ -782,18 +779,18 @@ TRANS The values are always the same, on every operating system.
- TRANS
- TRANS C libraries in many older Unix systems have @code{EWOULDBLOCK} as a
- TRANS separate error code. */
--_S(ERR_MAP(EWOULDBLOCK), N_("Operation would block"))
-+_S(EWOULDBLOCK, N_("Operation would block"))
- #endif
- #ifdef ENEEDAUTH
--_S(ERR_MAP(ENEEDAUTH), N_("Need authenticator"))
-+_S(ENEEDAUTH, N_("Need authenticator"))
- #endif
- #ifdef ED
- /*
- TRANS The experienced user will know what is wrong.
- TRANS @c This error code is a joke. Its perror text is part of the joke.
- TRANS @c Don't change it. */
--_S(ERR_MAP(ED), N_("?"))
-+_S(ED, N_("?"))
- #endif
- #ifdef EPROGUNAVAIL
--_S(ERR_MAP(EPROGUNAVAIL), N_("RPC program not available"))
-+_S(EPROGUNAVAIL, N_("RPC program not available"))
- #endif
-diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
-index 0f08079e48..672d8f27ce 100644
---- a/sysdeps/i386/dl-machine.h
-+++ b/sysdeps/i386/dl-machine.h
-@@ -338,16 +338,22 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc,
- {
- # ifndef RTLD_BOOTSTRAP
- if (sym_map != map
-- && sym_map->l_type != lt_executable
- && !sym_map->l_relocated)
- {
- const char *strtab
- = (const char *) D_PTR (map, l_info[DT_STRTAB]);
-- _dl_error_printf ("\
-+ if (sym_map->l_type == lt_executable)
-+ _dl_fatal_printf ("\
-+%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \
-+and creates an unsatisfiable circular dependency.\n",
-+ RTLD_PROGNAME, strtab + refsym->st_name,
-+ map->l_name);
-+ else
-+ _dl_error_printf ("\
- %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n",
-- RTLD_PROGNAME, map->l_name,
-- sym_map->l_name,
-- strtab + refsym->st_name);
-+ RTLD_PROGNAME, map->l_name,
-+ sym_map->l_name,
-+ strtab + refsym->st_name);
- }
- # endif
- value = ((Elf32_Addr (*) (void)) value) ();
-diff --git a/sysdeps/powerpc/powerpc64/backtrace.c b/sysdeps/powerpc/powerpc64/backtrace.c
-index 8a53a1088f..362a2b713c 100644
---- a/sysdeps/powerpc/powerpc64/backtrace.c
-+++ b/sysdeps/powerpc/powerpc64/backtrace.c
-@@ -54,11 +54,22 @@ struct signal_frame_64 {
- /* We don't care about the rest, since the IP value is at 'uc' field. */
- };
-
-+/* Test if the address match to the inside the trampoline code.
-+ Up to and including kernel 5.8, returning from an interrupt or syscall to a
-+ signal handler starts execution directly at the handler's entry point, with
-+ LR set to address of the sigreturn trampoline (the vDSO symbol).
-+ Newer kernels will branch to signal handler from the trampoline instead, so
-+ checking the stacktrace against the vDSO entrypoint does not work in such
-+ case.
-+ The vDSO branches with a 'bctrl' instruction, so checking either the
-+ vDSO address itself and the next instruction should cover all kernel
-+ versions. */
- static inline bool
- is_sigtramp_address (void *nip)
- {
- #ifdef HAVE_SIGTRAMP_RT64
-- if (nip == GLRO (dl_vdso_sigtramp_rt64))
-+ if (nip == GLRO (dl_vdso_sigtramp_rt64) ||
-+ nip == GLRO (dl_vdso_sigtramp_rt64) + 4)
- return true;
- #endif
- return false;
-diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile
-index 920d875420..bf9b7f7223 100644
---- a/sysdeps/pthread/Makefile
-+++ b/sysdeps/pthread/Makefile
-@@ -107,6 +107,7 @@ tests += tst-cnd-basic tst-mtx-trylock tst-cnd-broadcast \
- tst-unload \
- tst-unwind-thread \
- tst-pt-vfork1 tst-pt-vfork2 tst-vfork1x tst-vfork2x \
-+ tst-pthread-exit-signal \
-
-
- # Files which must not be linked with libpthread.
-diff --git a/sysdeps/pthread/tst-pthread-exit-signal.c b/sysdeps/pthread/tst-pthread-exit-signal.c
-new file mode 100644
-index 0000000000..b4526fe663
---- /dev/null
-+++ b/sysdeps/pthread/tst-pthread-exit-signal.c
-@@ -0,0 +1,45 @@
-+/* Test that pending signals are not delivered on thread exit (bug 28607).
-+ Copyright (C) 2021 Free Software Foundation, Inc.
-+ This file is part of the GNU C Library.
-+
-+ The GNU C Library is free software; you can redistribute it and/or
-+ modify it under the terms of the GNU Lesser General Public
-+ License as published by the Free Software Foundation; either
-+ version 2.1 of the License, or (at your option) any later version.
-+
-+ The GNU C Library is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ Lesser General Public License for more details.
-+
-+ You should have received a copy of the GNU Lesser General Public
-+ License along with the GNU C Library; if not, see
-+ <https://www.gnu.org/licenses/>. */
-+
-+/* Due to bug 28607, pthread_kill (or pthread_cancel) restored the
-+ signal mask during during thread exit, triggering the delivery of a
-+ blocked pending signal (SIGUSR1 in this test). */
-+
-+#include <support/xthread.h>
-+#include <support/xsignal.h>
-+
-+static void *
-+threadfunc (void *closure)
-+{
-+ sigset_t sigmask;
-+ sigfillset (&sigmask);
-+ xpthread_sigmask (SIG_SETMASK, &sigmask, NULL);
-+ xpthread_kill (pthread_self (), SIGUSR1);
-+ pthread_exit (NULL);
-+ return NULL;
-+}
-+
-+static int
-+do_test (void)
-+{
-+ pthread_t thr = xpthread_create (NULL, threadfunc, NULL);
-+ xpthread_join (thr);
-+ return 0;
-+}
-+
-+#include <support/test-driver.c>
-diff --git a/sysdeps/s390/configure b/sysdeps/s390/configure
-index fa46e9e351..e7f576338d 100644
---- a/sysdeps/s390/configure
-+++ b/sysdeps/s390/configure
-@@ -123,7 +123,9 @@ void testinsn (char *buf)
- __asm__ (".machine \"arch13\" \n\t"
- ".machinemode \"zarch_nohighgprs\" \n\t"
- "lghi %%r0,16 \n\t"
-- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
-+ "mvcrl 0(%0),32(%0) \n\t"
-+ "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
-+ : : "a" (buf) : "memory", "r0");
- }
- EOF
- if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
-@@ -271,7 +273,9 @@ else
- void testinsn (char *buf)
- {
- __asm__ ("lghi %%r0,16 \n\t"
-- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
-+ "mvcrl 0(%0),32(%0) \n\t"
-+ "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
-+ : : "a" (buf) : "memory", "r0");
- }
- EOF
- if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
-diff --git a/sysdeps/s390/configure.ac b/sysdeps/s390/configure.ac
-index 3ed5a8ef87..5c3479e8cf 100644
---- a/sysdeps/s390/configure.ac
-+++ b/sysdeps/s390/configure.ac
-@@ -88,7 +88,9 @@ void testinsn (char *buf)
- __asm__ (".machine \"arch13\" \n\t"
- ".machinemode \"zarch_nohighgprs\" \n\t"
- "lghi %%r0,16 \n\t"
-- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
-+ "mvcrl 0(%0),32(%0) \n\t"
-+ "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
-+ : : "a" (buf) : "memory", "r0");
- }
- EOF
- dnl test, if assembler supports S390 arch13 instructions
-@@ -195,7 +197,9 @@ cat > conftest.c <<\EOF
- void testinsn (char *buf)
- {
- __asm__ ("lghi %%r0,16 \n\t"
-- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
-+ "mvcrl 0(%0),32(%0) \n\t"
-+ "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
-+ : : "a" (buf) : "memory", "r0");
- }
- EOF
- dnl test, if assembler supports S390 arch13 zarch instructions as default
-diff --git a/sysdeps/s390/memmove.c b/sysdeps/s390/memmove.c
-index 5fc85e129f..ee59b5de14 100644
---- a/sysdeps/s390/memmove.c
-+++ b/sysdeps/s390/memmove.c
-@@ -43,7 +43,7 @@ extern __typeof (__redirect_memmove) MEMMOVE_ARCH13 attribute_hidden;
- s390_libc_ifunc_expr (__redirect_memmove, memmove,
- ({
- s390_libc_ifunc_expr_stfle_init ();
-- (HAVE_MEMMOVE_ARCH13
-+ (HAVE_MEMMOVE_ARCH13 && (hwcap & HWCAP_S390_VXRS_EXT2)
- && S390_IS_ARCH13_MIE3 (stfle_bits))
- ? MEMMOVE_ARCH13
- : (HAVE_MEMMOVE_Z13 && (hwcap & HWCAP_S390_VX))
-diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c
-index e6195c6e26..17c0cc3952 100644
---- a/sysdeps/s390/multiarch/ifunc-impl-list.c
-+++ b/sysdeps/s390/multiarch/ifunc-impl-list.c
-@@ -171,7 +171,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
- IFUNC_IMPL (i, name, memmove,
- # if HAVE_MEMMOVE_ARCH13
- IFUNC_IMPL_ADD (array, i, memmove,
-- S390_IS_ARCH13_MIE3 (stfle_bits),
-+ ((dl_hwcap & HWCAP_S390_VXRS_EXT2)
-+ && S390_IS_ARCH13_MIE3 (stfle_bits)),
- MEMMOVE_ARCH13)
- # endif
- # if HAVE_MEMMOVE_Z13
-diff --git a/sysdeps/sh/be/sh4/fpu/Implies b/sysdeps/sh/be/sh4/fpu/Implies
-new file mode 100644
-index 0000000000..71b28ee1a4
---- /dev/null
-+++ b/sysdeps/sh/be/sh4/fpu/Implies
-@@ -0,0 +1 @@
-+sh/sh4/fpu
-diff --git a/sysdeps/sh/le/sh4/fpu/Implies b/sysdeps/sh/le/sh4/fpu/Implies
-new file mode 100644
-index 0000000000..71b28ee1a4
---- /dev/null
-+++ b/sysdeps/sh/le/sh4/fpu/Implies
-@@ -0,0 +1 @@
-+sh/sh4/fpu
-diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
-index 9b2a253032..34748ffcd1 100644
---- a/sysdeps/unix/sysv/linux/Makefile
-+++ b/sysdeps/unix/sysv/linux/Makefile
-@@ -100,7 +100,7 @@ tests += tst-clone tst-clone2 tst-clone3 tst-fanotify tst-personality \
- tst-quota tst-sync_file_range tst-sysconf-iov_max tst-ttyname \
- test-errno-linux tst-memfd_create tst-mlock2 tst-pkey \
- tst-rlimit-infinity tst-ofdlocks tst-gettid tst-gettid-kill \
-- tst-tgkill
-+ tst-tgkill tst-sysvsem-linux tst-sysvmsg-linux tst-sysvshm-linux
- tests-internal += tst-ofdlocks-compat tst-sigcontext-get_pc
-
- CFLAGS-tst-sigcontext-get_pc.c = -fasynchronous-unwind-tables
-diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
-index fc688450ee..00a4d0c8e7 100644
---- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
-+++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
-@@ -54,6 +54,10 @@
- && MIDR_PARTNUM(midr) == 0x000)
- #define IS_NEOVERSE_N1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \
- && MIDR_PARTNUM(midr) == 0xd0c)
-+#define IS_NEOVERSE_N2(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \
-+ && MIDR_PARTNUM(midr) == 0xd49)
-+#define IS_NEOVERSE_V1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \
-+ && MIDR_PARTNUM(midr) == 0xd40)
-
- #define IS_EMAG(midr) (MIDR_IMPLEMENTOR(midr) == 'P' \
- && MIDR_PARTNUM(midr) == 0x000)
-diff --git a/sysdeps/unix/sysv/linux/mq_notify.c b/sysdeps/unix/sysv/linux/mq_notify.c
-index 61bbb03b64..2bb98172c8 100644
---- a/sysdeps/unix/sysv/linux/mq_notify.c
-+++ b/sysdeps/unix/sysv/linux/mq_notify.c
-@@ -132,9 +132,12 @@ helper_thread (void *arg)
- to wait until it is done with it. */
- (void) __pthread_barrier_wait (&notify_barrier);
- }
-- else if (data.raw[NOTIFY_COOKIE_LEN - 1] == NOTIFY_REMOVED)
-- /* The only state we keep is the copy of the thread attributes. */
-- free (data.attr);
-+ else if (data.raw[NOTIFY_COOKIE_LEN - 1] == NOTIFY_REMOVED && data.attr != NULL)
-+ {
-+ /* The only state we keep is the copy of the thread attributes. */
-+ pthread_attr_destroy (data.attr);
-+ free (data.attr);
-+ }
- }
- return NULL;
- }
-@@ -255,8 +258,14 @@ mq_notify (mqd_t mqdes, const struct sigevent *notification)
- if (data.attr == NULL)
- return -1;
-
-- memcpy (data.attr, notification->sigev_notify_attributes,
-- sizeof (pthread_attr_t));
-+ int ret = __pthread_attr_copy (data.attr,
-+ notification->sigev_notify_attributes);
-+ if (ret != 0)
-+ {
-+ free (data.attr);
-+ __set_errno (ret);
-+ return -1;
-+ }
- }
-
- /* Construct the new request. */
-@@ -269,8 +278,11 @@ mq_notify (mqd_t mqdes, const struct sigevent *notification)
- int retval = INLINE_SYSCALL (mq_notify, 2, mqdes, &se);
-
- /* If it failed, free the allocated memory. */
-- if (__glibc_unlikely (retval != 0))
-- free (data.attr);
-+ if (retval != 0 && data.attr != NULL)
-+ {
-+ pthread_attr_destroy (data.attr);
-+ free (data.attr);
-+ }
-
- return retval;
- }
-diff --git a/sysdeps/unix/sysv/linux/msgctl.c b/sysdeps/unix/sysv/linux/msgctl.c
-index 0776472d5e..a1f24ab242 100644
---- a/sysdeps/unix/sysv/linux/msgctl.c
-+++ b/sysdeps/unix/sysv/linux/msgctl.c
-@@ -90,8 +90,15 @@ __msgctl64 (int msqid, int cmd, struct __msqid64_ds *buf)
- struct kernel_msqid64_ds ksemid, *arg = NULL;
- if (buf != NULL)
- {
-- msqid64_to_kmsqid64 (buf, &ksemid);
-- arg = &ksemid;
-+ /* This is a Linux extension where kernel returns a 'struct msginfo'
-+ instead. */
-+ if (cmd == IPC_INFO || cmd == MSG_INFO)
-+ arg = (struct kernel_msqid64_ds *) buf;
-+ else
-+ {
-+ msqid64_to_kmsqid64 (buf, &ksemid);
-+ arg = &ksemid;
-+ }
- }
- # ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T
- if (cmd == IPC_SET)
-@@ -169,8 +176,15 @@ __msgctl (int msqid, int cmd, struct msqid_ds *buf)
- struct __msqid64_ds msqid64, *buf64 = NULL;
- if (buf != NULL)
- {
-- msqid_to_msqid64 (&msqid64, buf);
-- buf64 = &msqid64;
-+ /* This is a Linux extension where kernel returns a 'struct msginfo'
-+ instead. */
-+ if (cmd == IPC_INFO || cmd == MSG_INFO)
-+ buf64 = (struct __msqid64_ds *) buf;
-+ else
-+ {
-+ msqid_to_msqid64 (&msqid64, buf);
-+ buf64 = &msqid64;
-+ }
- }
-
- int ret = __msgctl64 (msqid, cmd, buf64);
-diff --git a/sysdeps/unix/sysv/linux/semctl.c b/sysdeps/unix/sysv/linux/semctl.c
-index f131a26fc7..1cdabde8f2 100644
---- a/sysdeps/unix/sysv/linux/semctl.c
-+++ b/sysdeps/unix/sysv/linux/semctl.c
-@@ -102,6 +102,7 @@ semun64_to_ksemun64 (int cmd, union semun64 semun64,
- r.array = semun64.array;
- break;
- case SEM_STAT:
-+ case SEM_STAT_ANY:
- case IPC_STAT:
- case IPC_SET:
- r.buf = buf;
-@@ -150,6 +151,7 @@ __semctl64 (int semid, int semnum, int cmd, ...)
- case IPC_STAT: /* arg.buf */
- case IPC_SET:
- case SEM_STAT:
-+ case SEM_STAT_ANY:
- case IPC_INFO: /* arg.__buf */
- case SEM_INFO:
- va_start (ap, cmd);
-@@ -238,6 +240,7 @@ semun_to_semun64 (int cmd, union semun semun, struct __semid64_ds *semid64)
- r.array = semun.array;
- break;
- case SEM_STAT:
-+ case SEM_STAT_ANY:
- case IPC_STAT:
- case IPC_SET:
- r.buf = semid64;
-@@ -267,6 +270,7 @@ __semctl (int semid, int semnum, int cmd, ...)
- case IPC_STAT: /* arg.buf */
- case IPC_SET:
- case SEM_STAT:
-+ case SEM_STAT_ANY:
- case IPC_INFO: /* arg.__buf */
- case SEM_INFO:
- va_start (ap, cmd);
-@@ -321,6 +325,7 @@ __semctl_mode16 (int semid, int semnum, int cmd, ...)
- case IPC_STAT: /* arg.buf */
- case IPC_SET:
- case SEM_STAT:
-+ case SEM_STAT_ANY:
- case IPC_INFO: /* arg.__buf */
- case SEM_INFO:
- va_start (ap, cmd);
-@@ -354,6 +359,7 @@ __old_semctl (int semid, int semnum, int cmd, ...)
- case IPC_STAT: /* arg.buf */
- case IPC_SET:
- case SEM_STAT:
-+ case SEM_STAT_ANY:
- case IPC_INFO: /* arg.__buf */
- case SEM_INFO:
- va_start (ap, cmd);
-diff --git a/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies
-new file mode 100644
-index 0000000000..7eeaf15a5a
---- /dev/null
-+++ b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies
-@@ -0,0 +1 @@
-+unix/sysv/linux/sh/sh4/fpu
-diff --git a/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies
-new file mode 100644
-index 0000000000..7eeaf15a5a
---- /dev/null
-+++ b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies
-@@ -0,0 +1 @@
-+unix/sysv/linux/sh/sh4/fpu
-diff --git a/sysdeps/unix/sysv/linux/shmctl.c b/sysdeps/unix/sysv/linux/shmctl.c
-index 76d88441f1..1d19a798b1 100644
---- a/sysdeps/unix/sysv/linux/shmctl.c
-+++ b/sysdeps/unix/sysv/linux/shmctl.c
-@@ -90,8 +90,15 @@ __shmctl64 (int shmid, int cmd, struct __shmid64_ds *buf)
- struct kernel_shmid64_ds kshmid, *arg = NULL;
- if (buf != NULL)
- {
-- shmid64_to_kshmid64 (buf, &kshmid);
-- arg = &kshmid;
-+ /* This is a Linux extension where kernel expects either a
-+ 'struct shminfo' (IPC_INFO) or 'struct shm_info' (SHM_INFO). */
-+ if (cmd == IPC_INFO || cmd == SHM_INFO)
-+ arg = (struct kernel_shmid64_ds *) buf;
-+ else
-+ {
-+ shmid64_to_kshmid64 (buf, &kshmid);
-+ arg = &kshmid;
-+ }
- }
- # ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T
- if (cmd == IPC_SET)
-@@ -107,7 +114,6 @@ __shmctl64 (int shmid, int cmd, struct __shmid64_ds *buf)
-
- switch (cmd)
- {
-- case IPC_INFO:
- case IPC_STAT:
- case SHM_STAT:
- case SHM_STAT_ANY:
-@@ -168,8 +174,15 @@ __shmctl (int shmid, int cmd, struct shmid_ds *buf)
- struct __shmid64_ds shmid64, *buf64 = NULL;
- if (buf != NULL)
- {
-- shmid_to_shmid64 (&shmid64, buf);
-- buf64 = &shmid64;
-+ /* This is a Linux extension where kernel expects either a
-+ 'struct shminfo' (IPC_INFO) or 'struct shm_info' (SHM_INFO). */
-+ if (cmd == IPC_INFO || cmd == SHM_INFO)
-+ buf64 = (struct __shmid64_ds *) buf;
-+ else
-+ {
-+ shmid_to_shmid64 (&shmid64, buf);
-+ buf64 = &shmid64;
-+ }
- }
-
- int ret = __shmctl64 (shmid, cmd, buf64);
-@@ -178,7 +191,6 @@ __shmctl (int shmid, int cmd, struct shmid_ds *buf)
-
- switch (cmd)
- {
-- case IPC_INFO:
- case IPC_STAT:
- case SHM_STAT:
- case SHM_STAT_ANY:
-diff --git a/sysdeps/unix/sysv/linux/tst-sysvmsg-linux.c b/sysdeps/unix/sysv/linux/tst-sysvmsg-linux.c
-new file mode 100644
-index 0000000000..630f4f792c
---- /dev/null
-+++ b/sysdeps/unix/sysv/linux/tst-sysvmsg-linux.c
-@@ -0,0 +1,177 @@
-+/* Basic tests for Linux SYSV message queue extensions.
-+ Copyright (C) 2020 Free Software Foundation, Inc.
-+ This file is part of the GNU C Library.
-+
-+ The GNU C Library is free software; you can redistribute it and/or
-+ modify it under the terms of the GNU Lesser General Public
-+ License as published by the Free Software Foundation; either
-+ version 2.1 of the License, or (at your option) any later version.
-+
-+ The GNU C Library is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ Lesser General Public License for more details.
-+
-+ You should have received a copy of the GNU Lesser General Public
-+ License along with the GNU C Library; if not, see
-+ <https://www.gnu.org/licenses/>. */
-+
-+#include <sys/ipc.h>
-+#include <sys/msg.h>
-+#include <errno.h>
-+#include <stdlib.h>
-+#include <stdbool.h>
-+#include <stdio.h>
-+
-+#include <support/check.h>
-+#include <support/temp_file.h>
-+
-+#define MSGQ_MODE 0644
-+
-+/* These are for the temporary file we generate. */
-+static char *name;
-+static int msqid;
-+
-+static void
-+remove_msq (void)
-+{
-+ /* Enforce message queue removal in case of early test failure.
-+ Ignore error since the msg may already have being removed. */
-+ msgctl (msqid, IPC_RMID, NULL);
-+}
-+
-+static void
-+do_prepare (int argc, char *argv[])
-+{
-+ TEST_VERIFY_EXIT (create_temp_file ("tst-sysvmsg.", &name) != -1);
-+}
-+
-+#define PREPARE do_prepare
-+
-+struct test_msginfo
-+{
-+ int msgmax;
-+ int msgmnb;
-+ int msgmni;
-+};
-+
-+/* It tries to obtain some system-wide SysV messsage queue information from
-+ /proc to check against IPC_INFO/MSG_INFO. The /proc only returns the
-+ tunables value of MSGMAX, MSGMNB, and MSGMNI.
-+
-+ The kernel also returns constant value for MSGSSZ, MSGSEG and also MSGMAP,
-+ MSGPOOL, and MSGTQL (for IPC_INFO). The issue to check them is they might
-+ change over kernel releases. */
-+
-+static int
-+read_proc_file (const char *file)
-+{
-+ FILE *f = fopen (file, "r");
-+ if (f == NULL)
-+ FAIL_UNSUPPORTED ("/proc is not mounted or %s is not available", file);
-+
-+ int v;
-+ int r = fscanf (f, "%d", & v);
-+ TEST_VERIFY_EXIT (r == 1);
-+
-+ fclose (f);
-+ return v;
-+}
-+
-+
-+/* Check if the message queue with IDX (index into the kernel's internal
-+ array) matches the one with KEY. The CMD is either MSG_STAT or
-+ MSG_STAT_ANY. */
-+
-+static bool
-+check_msginfo (int idx, key_t key, int cmd)
-+{
-+ struct msqid_ds msginfo;
-+ int mid = msgctl (idx, cmd, &msginfo);
-+ /* Ignore unused array slot returned by the kernel or information from
-+ unknown message queue. */
-+ if ((mid == -1 && errno == EINVAL) || mid != msqid)
-+ return false;
-+
-+ if (mid == -1)
-+ FAIL_EXIT1 ("msgctl with %s failed: %m",
-+ cmd == MSG_STAT ? "MSG_STAT" : "MSG_STAT_ANY");
-+
-+ TEST_COMPARE (msginfo.msg_perm.__key, key);
-+ TEST_COMPARE (msginfo.msg_perm.mode, MSGQ_MODE);
-+ TEST_COMPARE (msginfo.msg_qnum, 0);
-+
-+ return true;
-+}
-+
-+static int
-+do_test (void)
-+{
-+ atexit (remove_msq);
-+
-+ key_t key = ftok (name, 'G');
-+ if (key == -1)
-+ FAIL_EXIT1 ("ftok failed: %m");
-+
-+ msqid = msgget (key, MSGQ_MODE | IPC_CREAT);
-+ if (msqid == -1)
-+ FAIL_EXIT1 ("msgget failed: %m");
-+
-+ struct test_msginfo tipcinfo;
-+ tipcinfo.msgmax = read_proc_file ("/proc/sys/kernel/msgmax");
-+ tipcinfo.msgmnb = read_proc_file ("/proc/sys/kernel/msgmnb");
-+ tipcinfo.msgmni = read_proc_file ("/proc/sys/kernel/msgmni");
-+
-+ int msqidx;
-+
-+ {
-+ struct msginfo ipcinfo;
-+ msqidx = msgctl (msqid, IPC_INFO, (struct msqid_ds *) &ipcinfo);
-+ if (msqidx == -1)
-+ FAIL_EXIT1 ("msgctl with IPC_INFO failed: %m");
-+
-+ TEST_COMPARE (ipcinfo.msgmax, tipcinfo.msgmax);
-+ TEST_COMPARE (ipcinfo.msgmnb, tipcinfo.msgmnb);
-+ TEST_COMPARE (ipcinfo.msgmni, tipcinfo.msgmni);
-+ }
-+
-+ /* Same as before but with MSG_INFO. */
-+ {
-+ struct msginfo ipcinfo;
-+ msqidx = msgctl (msqid, MSG_INFO, (struct msqid_ds *) &ipcinfo);
-+ if (msqidx == -1)
-+ FAIL_EXIT1 ("msgctl with IPC_INFO failed: %m");
-+
-+ TEST_COMPARE (ipcinfo.msgmax, tipcinfo.msgmax);
-+ TEST_COMPARE (ipcinfo.msgmnb, tipcinfo.msgmnb);
-+ TEST_COMPARE (ipcinfo.msgmni, tipcinfo.msgmni);
-+ }
-+
-+ /* We check if the created message queue shows in global list. */
-+ bool found = false;
-+ for (int i = 0; i <= msqidx; i++)
-+ {
-+ /* We can't tell apart if MSG_STAT_ANY is not supported (kernel older
-+ than 4.17) or if the index used is invalid. So it just check if the
-+ value returned from a valid call matches the created message
-+ queue. */
-+ check_msginfo (i, key, MSG_STAT_ANY);
-+
-+ if (check_msginfo (i, key, MSG_STAT))
-+ {
-+ found = true;
-+ break;
-+ }
-+ }
-+
-+ if (!found)
-+ FAIL_EXIT1 ("msgctl with MSG_STAT/MSG_STAT_ANY could not find the "
-+ "created message queue");
-+
-+ if (msgctl (msqid, IPC_RMID, NULL) == -1)
-+ FAIL_EXIT1 ("msgctl failed");
-+
-+ return 0;
-+}
-+
-+#include <support/test-driver.c>
-diff --git a/sysdeps/unix/sysv/linux/tst-sysvsem-linux.c b/sysdeps/unix/sysv/linux/tst-sysvsem-linux.c
-new file mode 100644
-index 0000000000..45f19e2d37
---- /dev/null
-+++ b/sysdeps/unix/sysv/linux/tst-sysvsem-linux.c
-@@ -0,0 +1,184 @@
-+/* Basic tests for Linux SYSV semaphore extensions.
-+ Copyright (C) 2020 Free Software Foundation, Inc.
-+ This file is part of the GNU C Library.
-+
-+ The GNU C Library is free software; you can redistribute it and/or
-+ modify it under the terms of the GNU Lesser General Public
-+ License as published by the Free Software Foundation; either
-+ version 2.1 of the License, or (at your option) any later version.
-+
-+ The GNU C Library is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ Lesser General Public License for more details.
-+
-+ You should have received a copy of the GNU Lesser General Public
-+ License along with the GNU C Library; if not, see
-+ <https://www.gnu.org/licenses/>. */
-+
-+#include <sys/ipc.h>
-+#include <sys/sem.h>
-+#include <errno.h>
-+#include <stdlib.h>
-+#include <stdbool.h>
-+#include <stdio.h>
-+
-+#include <support/check.h>
-+#include <support/temp_file.h>
-+
-+/* These are for the temporary file we generate. */
-+static char *name;
-+static int semid;
-+
-+static void
-+remove_sem (void)
-+{
-+ /* Enforce message queue removal in case of early test failure.
-+ Ignore error since the sem may already have being removed. */
-+ semctl (semid, 0, IPC_RMID, 0);
-+}
-+
-+static void
-+do_prepare (int argc, char *argv[])
-+{
-+ TEST_VERIFY_EXIT (create_temp_file ("tst-sysvsem.", &name) != -1);
-+}
-+
-+#define PREPARE do_prepare
-+
-+#define SEM_MODE 0644
-+
-+union semun
-+{
-+ int val;
-+ struct semid_ds *buf;
-+ unsigned short *array;
-+ struct seminfo *__buf;
-+};
-+
-+struct test_seminfo
-+{
-+ int semmsl;
-+ int semmns;
-+ int semopm;
-+ int semmni;
-+};
-+
-+/* It tries to obtain some system-wide SysV semaphore information from /proc
-+ to check against IPC_INFO/SEM_INFO. The /proc only returns the tunables
-+ value of SEMMSL, SEMMNS, SEMOPM, and SEMMNI.
-+
-+ The kernel also returns constant value for SEMVMX, SEMMNU, SEMMAP, SEMUME,
-+ and also SEMUSZ and SEMAEM (for IPC_INFO). The issue to check them is they
-+ might change over kernel releases. */
-+
-+static void
-+read_sem_stat (struct test_seminfo *tseminfo)
-+{
-+ FILE *f = fopen ("/proc/sys/kernel/sem", "r");
-+ if (f == NULL)
-+ FAIL_UNSUPPORTED ("/proc is not mounted or /proc/sys/kernel/sem is not "
-+ "available");
-+
-+ int r = fscanf (f, "%d %d %d %d",
-+ &tseminfo->semmsl, &tseminfo->semmns, &tseminfo->semopm,
-+ &tseminfo->semmni);
-+ TEST_VERIFY_EXIT (r == 4);
-+
-+ fclose (f);
-+}
-+
-+
-+/* Check if the semaphore with IDX (index into the kernel's internal array)
-+ matches the one with KEY. The CMD is either SEM_STAT or SEM_STAT_ANY. */
-+
-+static bool
-+check_seminfo (int idx, key_t key, int cmd)
-+{
-+ struct semid_ds seminfo;
-+ int sid = semctl (idx, 0, cmd, (union semun) { .buf = &seminfo });
-+ /* Ignore unused array slot returned by the kernel or information from
-+ unknown semaphores. */
-+ if ((sid == -1 && errno == EINVAL) || sid != semid)
-+ return false;
-+
-+ if (sid == -1)
-+ FAIL_EXIT1 ("semctl with SEM_STAT failed (errno=%d)", errno);
-+
-+ TEST_COMPARE (seminfo.sem_perm.__key, key);
-+ TEST_COMPARE (seminfo.sem_perm.mode, SEM_MODE);
-+ TEST_COMPARE (seminfo.sem_nsems, 1);
-+
-+ return true;
-+}
-+
-+static int
-+do_test (void)
-+{
-+ atexit (remove_sem);
-+
-+ key_t key = ftok (name, 'G');
-+ if (key == -1)
-+ FAIL_EXIT1 ("ftok failed: %m");
-+
-+ semid = semget (key, 1, IPC_CREAT | IPC_EXCL | SEM_MODE);
-+ if (semid == -1)
-+ FAIL_EXIT1 ("semget failed: %m");
-+
-+ struct test_seminfo tipcinfo;
-+ read_sem_stat (&tipcinfo);
-+
-+ int semidx;
-+
-+ {
-+ struct seminfo ipcinfo;
-+ semidx = semctl (semid, 0, IPC_INFO, (union semun) { .__buf = &ipcinfo });
-+ if (semidx == -1)
-+ FAIL_EXIT1 ("semctl with IPC_INFO failed: %m");
-+
-+ TEST_COMPARE (ipcinfo.semmsl, tipcinfo.semmsl);
-+ TEST_COMPARE (ipcinfo.semmns, tipcinfo.semmns);
-+ TEST_COMPARE (ipcinfo.semopm, tipcinfo.semopm);
-+ TEST_COMPARE (ipcinfo.semmni, tipcinfo.semmni);
-+ }
-+
-+ /* Same as before but with SEM_INFO. */
-+ {
-+ struct seminfo ipcinfo;
-+ semidx = semctl (semid, 0, SEM_INFO, (union semun) { .__buf = &ipcinfo });
-+ if (semidx == -1)
-+ FAIL_EXIT1 ("semctl with IPC_INFO failed: %m");
-+
-+ TEST_COMPARE (ipcinfo.semmsl, tipcinfo.semmsl);
-+ TEST_COMPARE (ipcinfo.semmns, tipcinfo.semmns);
-+ TEST_COMPARE (ipcinfo.semopm, tipcinfo.semopm);
-+ TEST_COMPARE (ipcinfo.semmni, tipcinfo.semmni);
-+ }
-+
-+ /* We check if the created semaphore shows in the system-wide status. */
-+ bool found = false;
-+ for (int i = 0; i <= semidx; i++)
-+ {
-+ /* We can't tell apart if SEM_STAT_ANY is not supported (kernel older
-+ than 4.17) or if the index used is invalid. So it just check if
-+ value returned from a valid call matches the created semaphore. */
-+ check_seminfo (i, key, SEM_STAT_ANY);
-+
-+ if (check_seminfo (i, key, SEM_STAT))
-+ {
-+ found = true;
-+ break;
-+ }
-+ }
-+
-+ if (!found)
-+ FAIL_EXIT1 ("semctl with SEM_STAT/SEM_STAT_ANY could not find the "
-+ "created semaphore");
-+
-+ if (semctl (semid, 0, IPC_RMID, 0) == -1)
-+ FAIL_EXIT1 ("semctl failed: %m");
-+
-+ return 0;
-+}
-+
-+#include <support/test-driver.c>
-diff --git a/sysdeps/unix/sysv/linux/tst-sysvshm-linux.c b/sysdeps/unix/sysv/linux/tst-sysvshm-linux.c
-new file mode 100644
-index 0000000000..bb154592a6
---- /dev/null
-+++ b/sysdeps/unix/sysv/linux/tst-sysvshm-linux.c
-@@ -0,0 +1,188 @@
-+/* Basic tests for Linux SYSV shared memory extensions.
-+ Copyright (C) 2020 Free Software Foundation, Inc.
-+ This file is part of the GNU C Library.
-+
-+ The GNU C Library is free software; you can redistribute it and/or
-+ modify it under the terms of the GNU Lesser General Public
-+ License as published by the Free Software Foundation; either
-+ version 2.1 of the License, or (at your option) any later version.
-+
-+ The GNU C Library is distributed in the hope that it will be useful,
-+ but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-+ Lesser General Public License for more details.
-+
-+ You should have received a copy of the GNU Lesser General Public
-+ License along with the GNU C Library; if not, see
-+ <https://www.gnu.org/licenses/>. */
-+
-+#include <sys/ipc.h>
-+#include <sys/shm.h>
-+#include <errno.h>
-+#include <stdlib.h>
-+#include <stdbool.h>
-+#include <stdio.h>
-+#include <unistd.h>
-+#include <inttypes.h>
-+#include <limits.h>
-+
-+#include <support/check.h>
-+#include <support/temp_file.h>
-+
-+#define SHM_MODE 0644
-+
-+/* These are for the temporary file we generate. */
-+static char *name;
-+static int shmid;
-+static long int pgsz;
-+
-+static void
-+remove_shm (void)
-+{
-+ /* Enforce message queue removal in case of early test failure.
-+ Ignore error since the shm may already have being removed. */
-+ shmctl (shmid, IPC_RMID, NULL);
-+}
-+
-+static void
-+do_prepare (int argc, char *argv[])
-+{
-+ TEST_VERIFY_EXIT (create_temp_file ("tst-sysvshm.", &name) != -1);
-+}
-+
-+#define PREPARE do_prepare
-+
-+struct test_shminfo
-+{
-+ __syscall_ulong_t shmall;
-+ __syscall_ulong_t shmmax;
-+ __syscall_ulong_t shmmni;
-+};
-+
-+/* It tries to obtain some system-wide SysV shared memory information from
-+ /proc to check against IPC_INFO/SHM_INFO. The /proc only returns the
-+ tunables value of SHMALL, SHMMAX, and SHMMNI. */
-+
-+static uint64_t
-+read_proc_file (const char *file)
-+{
-+ FILE *f = fopen (file, "r");
-+ if (f == NULL)
-+ FAIL_UNSUPPORTED ("/proc is not mounted or %s is not available", file);
-+
-+ /* Handle 32-bit binaries running on 64-bit kernels. */
-+ uint64_t v;
-+ int r = fscanf (f, "%" SCNu64, &v);
-+ TEST_VERIFY_EXIT (r == 1);
-+
-+ fclose (f);
-+ return v;
-+}
-+
-+
-+/* Check if the message queue with IDX (index into the kernel's internal
-+ array) matches the one with KEY. The CMD is either SHM_STAT or
-+ SHM_STAT_ANY. */
-+
-+static bool
-+check_shminfo (int idx, key_t key, int cmd)
-+{
-+ struct shmid_ds shminfo;
-+ int sid = shmctl (idx, cmd, &shminfo);
-+ /* Ignore unused array slot returned by the kernel or information from
-+ unknown message queue. */
-+ if ((sid == -1 && errno == EINVAL) || sid != shmid)
-+ return false;
-+
-+ if (sid == -1)
-+ FAIL_EXIT1 ("shmctl with %s failed: %m",
-+ cmd == SHM_STAT ? "SHM_STAT" : "SHM_STAT_ANY");
-+
-+ TEST_COMPARE (shminfo.shm_perm.__key, key);
-+ TEST_COMPARE (shminfo.shm_perm.mode, SHM_MODE);
-+ TEST_COMPARE (shminfo.shm_segsz, pgsz);
-+
-+ return true;
-+}
-+
-+static int
-+do_test (void)
-+{
-+ atexit (remove_shm);
-+
-+ pgsz = sysconf (_SC_PAGESIZE);
-+ if (pgsz == -1)
-+ FAIL_EXIT1 ("sysconf (_SC_PAGESIZE) failed: %m");
-+
-+ key_t key = ftok (name, 'G');
-+ if (key == -1)
-+ FAIL_EXIT1 ("ftok failed: %m");
-+
-+ shmid = shmget (key, pgsz, IPC_CREAT | IPC_EXCL | SHM_MODE);
-+ if (shmid == -1)
-+ FAIL_EXIT1 ("shmget failed: %m");
-+
-+ /* It does not check shmmax because kernel clamp its value to INT_MAX for:
-+
-+ 1. Compat symbols with IPC_64, i.e, 32-bit binaries running on 64-bit
-+ kernels.
-+
-+ 2. Default symbol without IPC_64 (defined as IPC_OLD within Linux) and
-+ glibc always use IPC_64 for 32-bit ABIs (to support 64-bit time_t).
-+ It means that 32-bit binaries running on 32-bit kernels will not see
-+ shmmax being clamped.
-+
-+ And finding out whether the compat symbol is used would require checking
-+ the underlying kernel against the current ABI. The shmall and shmmni
-+ already provided enough coverage. */
-+
-+ struct test_shminfo tipcinfo;
-+ tipcinfo.shmall = read_proc_file ("/proc/sys/kernel/shmall");
-+ tipcinfo.shmmni = read_proc_file ("/proc/sys/kernel/shmmni");
-+
-+ int shmidx;
-+
-+ /* Note: SHM_INFO does not return a shminfo, but rather a 'struct shm_info'.
-+ It is tricky to verify its values since the syscall returns system wide
-+ resources consumed by shared memory. The shmctl implementation handles
-+ SHM_INFO as IPC_INFO, so the IPC_INFO test should validate SHM_INFO as
-+ well. */
-+
-+ {
-+ struct shminfo ipcinfo;
-+ shmidx = shmctl (shmid, IPC_INFO, (struct shmid_ds *) &ipcinfo);
-+ if (shmidx == -1)
-+ FAIL_EXIT1 ("shmctl with IPC_INFO failed: %m");
-+
-+ TEST_COMPARE (ipcinfo.shmall, tipcinfo.shmall);
-+ TEST_COMPARE (ipcinfo.shmmni, tipcinfo.shmmni);
-+ }
-+
-+ /* We check if the created shared memory shows in the global list. */
-+ bool found = false;
-+ for (int i = 0; i <= shmidx; i++)
-+ {
-+ /* We can't tell apart if SHM_STAT_ANY is not supported (kernel older
-+ than 4.17) or if the index used is invalid. So it just check if
-+ value returned from a valid call matches the created message
-+ queue. */
-+ check_shminfo (i, key, SHM_STAT_ANY);
-+
-+ if (check_shminfo (i, key, SHM_STAT))
-+ {
-+ found = true;
-+ break;
-+ }
-+ }
-+
-+ if (!found)
-+ FAIL_EXIT1 ("shmctl with SHM_STAT/SHM_STAT_ANY could not find the "
-+ "created shared memory");
-+
-+ if (shmctl (shmid, IPC_RMID, NULL) == -1)
-+ FAIL_EXIT1 ("shmctl failed");
-+
-+ return 0;
-+}
-+
-+#include <support/test-driver.c>
-diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
-index a6736aef25..9736a13e7b 100644
---- a/sysdeps/x86/Makefile
-+++ b/sysdeps/x86/Makefile
-@@ -12,6 +12,12 @@ endif
- ifeq ($(subdir),setjmp)
- gen-as-const-headers += jmp_buf-ssp.sym
- sysdep_routines += __longjmp_cancel
-+ifneq ($(enable-cet),no)
-+ifneq ($(have-tunables),no)
-+tests += tst-setjmp-cet
-+tst-setjmp-cet-ENV = GLIBC_TUNABLES=glibc.cpu.x86_ibt=on:glibc.cpu.x86_shstk=on
-+endif
-+endif
- endif
-
- ifeq ($(subdir),string)
-diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
-index 217c21c34f..3fb4a028d8 100644
---- a/sysdeps/x86/cacheinfo.c
-+++ b/sysdeps/x86/cacheinfo.c
-@@ -808,7 +808,7 @@ init_cacheinfo (void)
- threads = 1 << ((ecx >> 12) & 0x0f);
- }
-
-- if (threads == 0)
-+ if (threads == 0 || cpu_features->basic.family >= 0x17)
- {
- /* If APIC ID width is not available, use logical
- processor count. */
-@@ -823,8 +823,22 @@ init_cacheinfo (void)
- if (threads > 0)
- shared /= threads;
-
-- /* Account for exclusive L2 and L3 caches. */
-- shared += core;
-+ /* Get shared cache per ccx for Zen architectures. */
-+ if (cpu_features->basic.family >= 0x17)
-+ {
-+ unsigned int eax;
-+
-+ /* Get number of threads share the L3 cache in CCX. */
-+ __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
-+
-+ unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
-+ shared *= threads_per_ccx;
-+ }
-+ else
-+ {
-+ /* Account for exclusive L2 and L3 caches. */
-+ shared += core;
-+ }
- }
- }
-
-@@ -854,14 +868,20 @@ init_cacheinfo (void)
- __x86_shared_cache_size = shared;
- }
-
-- /* The large memcpy micro benchmark in glibc shows that 6 times of
-- shared cache size is the approximate value above which non-temporal
-- store becomes faster on a 8-core processor. This is the 3/4 of the
-- total shared cache size. */
-+ /* The default setting for the non_temporal threshold is 3/4 of one
-+ thread's share of the chip's cache. For most Intel and AMD processors
-+ with an initial release date between 2017 and 2020, a thread's typical
-+ share of the cache is from 500 KBytes to 2 MBytes. Using the 3/4
-+ threshold leaves 125 KBytes to 500 KBytes of the thread's data
-+ in cache after a maximum temporal copy, which will maintain
-+ in cache a reasonable portion of the thread's stack and other
-+ active data. If the threshold is set higher than one thread's
-+ share of the cache, it has a substantial risk of negatively
-+ impacting the performance of other threads running on the chip. */
- __x86_shared_non_temporal_threshold
- = (cpu_features->non_temporal_threshold != 0
- ? cpu_features->non_temporal_threshold
-- : __x86_shared_cache_size * threads * 3 / 4);
-+ : __x86_shared_cache_size * 3 / 4);
-
- /* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8. */
- unsigned int minimum_rep_movsb_threshold;
-diff --git a/sysdeps/x86/dl-cet.c b/sysdeps/x86/dl-cet.c
-index 03572f7af6..3cc54a8d53 100644
---- a/sysdeps/x86/dl-cet.c
-+++ b/sysdeps/x86/dl-cet.c
-@@ -47,7 +47,10 @@ dl_cet_check (struct link_map *m, const char *program)
- /* No legacy object check if both IBT and SHSTK are always on. */
- if (enable_ibt_type == cet_always_on
- && enable_shstk_type == cet_always_on)
-- return;
-+ {
-+ THREAD_SETMEM (THREAD_SELF, header.feature_1, GL(dl_x86_feature_1));
-+ return;
-+ }
-
- /* Check if IBT is enabled by kernel. */
- bool ibt_enabled
-diff --git a/sysdeps/x86/dl-prop.h b/sysdeps/x86/dl-prop.h
-index 89911e19e2..4eb3b85a7b 100644
---- a/sysdeps/x86/dl-prop.h
-+++ b/sysdeps/x86/dl-prop.h
-@@ -145,15 +145,15 @@ _dl_process_cet_property_note (struct link_map *l,
- }
-
- static inline void __attribute__ ((unused))
--_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph)
-+_dl_process_pt_note (struct link_map *l, int fd, const ElfW(Phdr) *ph)
- {
- const ElfW(Nhdr) *note = (const void *) (ph->p_vaddr + l->l_addr);
- _dl_process_cet_property_note (l, note, ph->p_memsz, ph->p_align);
- }
-
- static inline int __attribute__ ((always_inline))
--_dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz,
-- void *data)
-+_dl_process_gnu_property (struct link_map *l, int fd, uint32_t type,
-+ uint32_t datasz, void *data)
- {
- return 0;
- }
-diff --git a/sysdeps/x86/tst-setjmp-cet.c b/sysdeps/x86/tst-setjmp-cet.c
-new file mode 100644
-index 0000000000..42c795d2a8
---- /dev/null
-+++ b/sysdeps/x86/tst-setjmp-cet.c
-@@ -0,0 +1 @@
-+#include <setjmp/tst-setjmp.c>
-diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
-old mode 100644
-new mode 100755
-index 84f82c2406..fc1840e23f
---- a/sysdeps/x86_64/configure
-+++ b/sysdeps/x86_64/configure
-@@ -107,39 +107,6 @@ if test x"$build_mathvec" = xnotset; then
- build_mathvec=yes
- fi
-
--if test "$static_pie" = yes; then
-- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for linker static PIE support" >&5
--$as_echo_n "checking for linker static PIE support... " >&6; }
--if ${libc_cv_ld_static_pie+:} false; then :
-- $as_echo_n "(cached) " >&6
--else
-- cat > conftest.s <<\EOF
-- .text
-- .global _start
-- .weak foo
--_start:
-- leaq foo(%rip), %rax
--EOF
-- libc_cv_pie_option="-Wl,-pie"
-- if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&5'
-- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
-- (eval $ac_try) 2>&5
-- ac_status=$?
-- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-- test $ac_status = 0; }; }; then
-- libc_cv_ld_static_pie=yes
-- else
-- libc_cv_ld_static_pie=no
-- fi
--rm -f conftest*
--fi
--{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ld_static_pie" >&5
--$as_echo "$libc_cv_ld_static_pie" >&6; }
-- if test "$libc_cv_ld_static_pie" != yes; then
-- as_fn_error $? "linker support for static PIE needed" "$LINENO" 5
-- fi
--fi
--
- $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
-
-
-diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
-index cdaba0c075..611a7d9ba3 100644
---- a/sysdeps/x86_64/configure.ac
-+++ b/sysdeps/x86_64/configure.ac
-@@ -53,31 +53,6 @@ if test x"$build_mathvec" = xnotset; then
- build_mathvec=yes
- fi
-
--dnl Check if linker supports static PIE with the fix for
--dnl
--dnl https://sourceware.org/bugzilla/show_bug.cgi?id=21782
--dnl
--if test "$static_pie" = yes; then
-- AC_CACHE_CHECK(for linker static PIE support, libc_cv_ld_static_pie, [dnl
--cat > conftest.s <<\EOF
-- .text
-- .global _start
-- .weak foo
--_start:
-- leaq foo(%rip), %rax
--EOF
-- libc_cv_pie_option="-Wl,-pie"
-- if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&AS_MESSAGE_LOG_FD); then
-- libc_cv_ld_static_pie=yes
-- else
-- libc_cv_ld_static_pie=no
-- fi
--rm -f conftest*])
-- if test "$libc_cv_ld_static_pie" != yes; then
-- AC_MSG_ERROR([linker support for static PIE needed])
-- fi
--fi
--
- dnl It is always possible to access static and hidden symbols in an
- dnl position independent way.
- AC_DEFINE(PI_STATIC_AND_HIDDEN)
-diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
-index ca73d8fef9..363a749cb2 100644
---- a/sysdeps/x86_64/dl-machine.h
-+++ b/sysdeps/x86_64/dl-machine.h
-@@ -315,16 +315,22 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
- {
- # ifndef RTLD_BOOTSTRAP
- if (sym_map != map
-- && sym_map->l_type != lt_executable
- && !sym_map->l_relocated)
- {
- const char *strtab
- = (const char *) D_PTR (map, l_info[DT_STRTAB]);
-- _dl_error_printf ("\
-+ if (sym_map->l_type == lt_executable)
-+ _dl_fatal_printf ("\
-+%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \
-+and creates an unsatisfiable circular dependency.\n",
-+ RTLD_PROGNAME, strtab + refsym->st_name,
-+ map->l_name);
-+ else
-+ _dl_error_printf ("\
- %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n",
-- RTLD_PROGNAME, map->l_name,
-- sym_map->l_name,
-- strtab + refsym->st_name);
-+ RTLD_PROGNAME, map->l_name,
-+ sym_map->l_name,
-+ strtab + refsym->st_name);
- }
- # endif
- value = ((ElfW(Addr) (*) (void)) value) ();
-diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h
-index 7659758972..e5fd5ac9cb 100644
---- a/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h
-+++ b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h
-@@ -32,7 +32,7 @@ IFUNC_SELECTOR (void)
- && CPU_FEATURE_USABLE_P (cpu_features, AVX2))
- return OPTIMIZE (fma);
-
-- if (CPU_FEATURE_USABLE_P (cpu_features, FMA))
-+ if (CPU_FEATURE_USABLE_P (cpu_features, FMA4))
- return OPTIMIZE (fma4);
-
- return OPTIMIZE (sse2);
-diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
-index bd5dc1a3f3..092f364bb6 100644
---- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
-+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
-@@ -56,6 +56,13 @@
- # endif
- #endif
-
-+/* Avoid short distance rep movsb only with non-SSE vector. */
-+#ifndef AVOID_SHORT_DISTANCE_REP_MOVSB
-+# define AVOID_SHORT_DISTANCE_REP_MOVSB (VEC_SIZE > 16)
-+#else
-+# define AVOID_SHORT_DISTANCE_REP_MOVSB 0
-+#endif
-+
- #ifndef PREFETCH
- # define PREFETCH(addr) prefetcht0 addr
- #endif
-@@ -243,7 +250,21 @@ L(movsb):
- cmpq %r9, %rdi
- /* Avoid slow backward REP MOVSB. */
- jb L(more_8x_vec_backward)
-+# if AVOID_SHORT_DISTANCE_REP_MOVSB
-+ movq %rdi, %rcx
-+ subq %rsi, %rcx
-+ jmp 2f
-+# endif
- 1:
-+# if AVOID_SHORT_DISTANCE_REP_MOVSB
-+ movq %rsi, %rcx
-+ subq %rdi, %rcx
-+2:
-+/* Avoid "rep movsb" if RCX, the distance between source and destination,
-+ is N*4GB + [1..63] with N >= 0. */
-+ cmpl $63, %ecx
-+ jbe L(more_2x_vec) /* Avoid "rep movsb" if ECX <= 63. */
-+# endif
- mov %RDX_LP, %RCX_LP
- rep movsb
- L(nop):
-diff --git a/sysvipc/test-sysvsem.c b/sysvipc/test-sysvsem.c
-index 01dbff343a..b7284e0b48 100644
---- a/sysvipc/test-sysvsem.c
-+++ b/sysvipc/test-sysvsem.c
-@@ -20,6 +20,7 @@
- #include <stdlib.h>
- #include <errno.h>
- #include <string.h>
-+#include <stdbool.h>
- #include <sys/types.h>
- #include <sys/ipc.h>
- #include <sys/sem.h>
-diff --git a/version.h b/version.h
-index 83cd196798..e6ca7a8857 100644
---- a/version.h
-+++ b/version.h
-@@ -1,4 +1,4 @@
- /* This file just defines the current version number of libc. */
-
--#define RELEASE "release"
-+#define RELEASE "stable"
- #define VERSION "2.32"
-diff -pruN glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/64/configure glibc-2.32/sysdeps/unix/sysv/linux/x86_64/64/configure
---- glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/64/configure 2021-09-18 21:02:32.741186019 +1000
-+++ glibc-2.32/sysdeps/unix/sysv/linux/x86_64/64/configure 2021-09-18 21:03:05.314302356 +1000
-@@ -4,10 +4,10 @@
- test -n "$libc_cv_slibdir" ||
- case "$prefix" in
- /usr | /usr/)
-- libc_cv_slibdir='/lib64'
-- libc_cv_rtlddir='/lib64'
-+ libc_cv_slibdir='/lib'
-+ libc_cv_rtlddir='/lib'
- if test "$libdir" = '${exec_prefix}/lib'; then
-- libdir='${exec_prefix}/lib64';
-+ libdir='${exec_prefix}/lib';
- # Locale data can be shared between 32-bit and 64-bit libraries.
- libc_cv_complocaledir='${exec_prefix}/lib/locale'
- fi
-diff -pruN glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/ldconfig.h glibc-2.32/sysdeps/unix/sysv/linux/x86_64/ldconfig.h
---- glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/ldconfig.h 2021-09-18 21:02:32.742186053 +1000
-+++ glibc-2.32/sysdeps/unix/sysv/linux/x86_64/ldconfig.h 2021-09-18 21:03:05.314302356 +1000
-@@ -18,9 +18,9 @@
- #include <sysdeps/generic/ldconfig.h>
-
- #define SYSDEP_KNOWN_INTERPRETER_NAMES \
-- { "/lib/ld-linux.so.2", FLAG_ELF_LIBC6 }, \
-+ { "/lib32/ld-linux.so.2", FLAG_ELF_LIBC6 }, \
- { "/libx32/ld-linux-x32.so.2", FLAG_ELF_LIBC6 }, \
-- { "/lib64/ld-linux-x86-64.so.2", FLAG_ELF_LIBC6 },
-+ { "/lib/ld-linux-x86-64.so.2", FLAG_ELF_LIBC6 },
- #define SYSDEP_KNOWN_LIBRARY_NAMES \
- { "libc.so.6", FLAG_ELF_LIBC6 }, \
- { "libm.so.6", FLAG_ELF_LIBC6 },
diff --git a/glibc/glibc-2.32-7.patch b/glibc/glibc-2.32-7.patch
new file mode 100644
index 00000000..d7949e11
--- /dev/null
+++ b/glibc/glibc-2.32-7.patch
@@ -0,0 +1,17967 @@
+diff --git a/NEWS b/NEWS
+index 485b8ddffa..d138a45519 100644
+--- a/NEWS
++++ b/NEWS
+@@ -5,6 +5,30 @@ See the end for copying conditions.
+ Please send GNU C library bug reports via <https://sourceware.org/bugzilla/>
+ using `glibc' in the "product" field.
+
++The following bugs are resolved with this release:
++
++ [20019] NULL pointer dereference in libc.so.6 IFUNC due to uninitialized GOT
++ [26224] iconv hangs when converting some invalid inputs from several IBM
++ character sets (CVE-2020-27618)
++ [26534] libm.so 2.32 SIGILL in pow() due to FMA4 instruction on non-FMA4
++ system
++ [26555] string: strerrorname_np does not return the documented value
++ [26600] Transaction ID collisions cause slow DNS lookups in getaddrinfo
++ [26636] libc: 32-bit shmctl(IPC_INFO) crashes when shminfo struct is
++ at the end of a memory mapping
++ [26637] libc: semctl SEM_STAT_ANY fails to pass the buffer specified
++ by the caller to the kernel
++ [26639] libc: msgctl IPC_INFO and MSG_INFO return garbage
++ [26853] aarch64: Missing unwind information in statically linked startup code
++ [26932] libc: sh: Multiple floating point functions defined as stubs only
++ [27130] "rep movsb" performance issue
++ [27177] GLIBC_TUNABLES=glibc.cpu.x86_ibt=on:glibc.cpu.x86_shstk=on doesn't work
++ [27457] vzeroupper use in AVX2 multiarch string functions cause HTM aborts
++ [27974] Overflow bug in some implementation of wcsnlen, wmemchr, and wcsncat
++ [28524] Conversion from ISO-2022-JP-3 with iconv may emit spurious NULs
++ [28607] Masked signals are delivered on thread exit
++ [28755] overflow bug in wcsncmp_avx2 and wcsncmp_evex
++
+ Version 2.32
+
+ Major new features:
+@@ -185,6 +209,14 @@ Security related changes:
+ Dytrych of the Cisco Security Assessment and Penetration Team (See
+ TALOS-2020-1019).
+
++ CVE-2020-27618: An infinite loop has been fixed in the iconv program when
++ invoked with input containing redundant shift sequences in the IBM1364,
++ IBM1371, IBM1388, IBM1390, or IBM1399 character sets.
++
++ CVE-2021-33574: The mq_notify function has a potential use-after-free
++ issue when using a notification type of SIGEV_THREAD and a thread
++ attribute with a non-default affinity mask.
++
+ The following bugs are resolved with this release:
+
+ [9809] localedata: ckb_IQ: new Kurdish Sorani locale
+diff --git a/Rules b/Rules
+index 8b771f6095..beab969fde 100644
+--- a/Rules
++++ b/Rules
+@@ -155,6 +155,7 @@ xtests: tests $(xtests-special)
+ else
+ tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \
+ $(tests-container:%=$(objpfx)%.out) \
++ $(tests-mcheck:%=$(objpfx)%-mcheck.out) \
+ $(tests-special) $(tests-printers-out)
+ xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special)
+ endif
+@@ -165,7 +166,7 @@ ifeq ($(run-built-tests),no)
+ tests-expected =
+ else
+ tests-expected = $(tests) $(tests-internal) $(tests-printers) \
+- $(tests-container)
++ $(tests-container) $(tests-mcheck:%=%-mcheck)
+ endif
+ tests:
+ $(..)scripts/merge-test-results.sh -s $(objpfx) $(subdir) \
+@@ -191,6 +192,7 @@ else
+ binaries-pie-tests =
+ binaries-pie-notests =
+ endif
++binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck)
+ else
+ binaries-all-notests =
+ binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs)
+@@ -200,6 +202,7 @@ binaries-static-tests =
+ binaries-static =
+ binaries-pie-tests =
+ binaries-pie-notests =
++binaries-mcheck-tests =
+ endif
+
+ binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests)
+@@ -223,6 +226,14 @@ $(addprefix $(objpfx),$(binaries-shared-tests)): %: %.o \
+ $(+link-tests)
+ endif
+
++ifneq "$(strip $(binaries-mcheck-tests))" ""
++$(addprefix $(objpfx),$(binaries-mcheck-tests)): %-mcheck: %.o \
++ $(link-extra-libs-tests) \
++ $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
++ $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
++ $(+link-tests)
++endif
++
+ ifneq "$(strip $(binaries-pie-tests))" ""
+ $(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \
+ $(link-extra-libs-tests) \
+@@ -253,6 +264,12 @@ $(addprefix $(objpfx),$(binaries-static-tests)): %: %.o \
+ $(+link-static-tests)
+ endif
+
++# All mcheck tests will be run with MALLOC_CHECK_=3
++define mcheck-ENVS
++$(1)-mcheck-ENV = MALLOC_CHECK_=3
++endef
++$(foreach t,$(tests-mcheck),$(eval $(call mcheck-ENVS,$(t))))
++
+ ifneq "$(strip $(tests) $(tests-internal) $(xtests) $(test-srcs))" ""
+ # These are the implicit rules for making test outputs
+ # from the test programs and whatever input files are present.
+diff --git a/debug/Makefile b/debug/Makefile
+index 3a60d7af7a..0036edd187 100644
+--- a/debug/Makefile
++++ b/debug/Makefile
+@@ -51,7 +51,7 @@ routines = backtrace backtracesyms backtracesymsfd noophooks \
+ explicit_bzero_chk \
+ stack_chk_fail fortify_fail \
+ $(static-only-routines)
+-static-only-routines := warning-nop stack_chk_fail_local
++static-only-routines := stack_chk_fail_local
+
+ # Don't add stack_chk_fail_local.o to libc.a since __stack_chk_fail_local
+ # is an alias of __stack_chk_fail in stack_chk_fail.o.
+diff --git a/debug/warning-nop.c b/debug/warning-nop.c
+deleted file mode 100644
+index 4ab7e182b7..0000000000
+--- a/debug/warning-nop.c
++++ /dev/null
+@@ -1,70 +0,0 @@
+-/* Dummy nop functions to elicit link-time warnings.
+- Copyright (C) 2005-2020 Free Software Foundation, Inc.
+- This file is part of the GNU C Library.
+-
+- The GNU C Library is free software; you can redistribute it and/or
+- modify it under the terms of the GNU Lesser General Public
+- License as published by the Free Software Foundation; either
+- version 2.1 of the License, or (at your option) any later version.
+-
+- In addition to the permissions in the GNU Lesser General Public
+- License, the Free Software Foundation gives you unlimited
+- permission to link the compiled version of this file with other
+- programs, and to distribute those programs without any restriction
+- coming from the use of this file. (The GNU Lesser General Public
+- License restrictions do apply in other respects; for example, they
+- cover modification of the file, and distribution when not linked
+- into another program.)
+-
+- Note that people who make modified versions of this file are not
+- obligated to grant this special exception for their modified
+- versions; it is their choice whether to do so. The GNU Lesser
+- General Public License gives permission to release a modified
+- version without this exception; this exception also makes it
+- possible to release a modified version which carries forward this
+- exception.
+-
+- The GNU C Library is distributed in the hope that it will be useful,
+- but WITHOUT ANY WARRANTY; without even the implied warranty of
+- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+- Lesser General Public License for more details.
+-
+- You should have received a copy of the GNU Lesser General Public
+- License along with the GNU C Library; if not, see
+- <https://www.gnu.org/licenses/>. */
+-
+-#include <sys/cdefs.h>
+-
+-static void
+-__attribute__ ((used))
+-nop (void)
+-{
+-}
+-
+-/* Don't insert any other #include's before this #undef! */
+-
+-#undef __warndecl
+-#define __warndecl(name, msg) \
+- extern void name (void) __attribute__ ((alias ("nop"))) attribute_hidden; \
+- link_warning (name, msg)
+-
+-#undef __USE_FORTIFY_LEVEL
+-#define __USE_FORTIFY_LEVEL 99
+-
+-/* Following here we need an #include for each public header file
+- that uses __warndecl. */
+-
+-/* Define away to avoid warnings with compilers that do not have these
+- builtins. */
+-#define __builtin___memcpy_chk(dest, src, len, bos) NULL
+-#define __builtin___memmove_chk(dest, src, len, bos) NULL
+-#define __builtin___mempcpy_chk(dest, src, len, bos) NULL
+-#define __builtin___memset_chk(dest, ch, len, bos) NULL
+-#define __builtin___stpcpy_chk(dest, src, bos) NULL
+-#define __builtin___strcat_chk(dest, src, bos) NULL
+-#define __builtin___strcpy_chk(dest, src, bos) NULL
+-#define __builtin___strncat_chk(dest, src, len, bos) NULL
+-#define __builtin___strncpy_chk(dest, src, len, bos) NULL
+-#define __builtin_object_size(bos, level) 0
+-
+-#include <string.h>
+diff --git a/elf/Makefile b/elf/Makefile
+index 0b78721848..3ba7f4ecfc 100644
+--- a/elf/Makefile
++++ b/elf/Makefile
+@@ -1381,6 +1381,8 @@ CFLAGS-ifuncmain7pie.c += $(pie-ccflag)
+ CFLAGS-ifuncmain9pie.c += $(pie-ccflag)
+ CFLAGS-tst-ifunc-textrel.c += $(pic-ccflag)
+
++LDFLAGS-ifuncmain6pie = -Wl,-z,lazy
++
+ $(objpfx)ifuncmain1pie: $(objpfx)ifuncmod1.so
+ $(objpfx)ifuncmain1staticpie: $(objpfx)ifuncdep1pic.o
+ $(objpfx)ifuncmain1vispie: $(objpfx)ifuncmod1.so
+@@ -1630,8 +1632,6 @@ $(objpfx)tst-nodelete-dlclose.out: $(objpfx)tst-nodelete-dlclose-dso.so \
+
+ tst-env-setuid-ENV = MALLOC_CHECK_=2 MALLOC_MMAP_THRESHOLD_=4096 \
+ LD_HWCAP_MASK=0x1
+-tst-env-setuid-tunables-ENV = \
+- GLIBC_TUNABLES=glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096
+
+ $(objpfx)tst-debug1: $(libdl)
+ $(objpfx)tst-debug1.out: $(objpfx)tst-debug1mod1.so
+diff --git a/elf/dl-load.c b/elf/dl-load.c
+index e39980fb19..71867e7c1a 100644
+--- a/elf/dl-load.c
++++ b/elf/dl-load.c
+@@ -855,10 +855,12 @@ lose (int code, int fd, const char *name, char *realname, struct link_map *l,
+
+ /* Process PT_GNU_PROPERTY program header PH in module L after
+ PT_LOAD segments are mapped. Only one NT_GNU_PROPERTY_TYPE_0
+- note is handled which contains processor specific properties. */
++ note is handled which contains processor specific properties.
++ FD is -1 for the kernel mapped main executable otherwise it is
++ the fd used for loading module L. */
+
+ void
+-_dl_process_pt_gnu_property (struct link_map *l, const ElfW(Phdr) *ph)
++_dl_process_pt_gnu_property (struct link_map *l, int fd, const ElfW(Phdr) *ph)
+ {
+ const ElfW(Nhdr) *note = (const void *) (ph->p_vaddr + l->l_addr);
+ const ElfW(Addr) size = ph->p_memsz;
+@@ -905,7 +907,7 @@ _dl_process_pt_gnu_property (struct link_map *l, const ElfW(Phdr) *ph)
+ last_type = type;
+
+ /* Target specific property processing. */
+- if (_dl_process_gnu_property (l, type, datasz, ptr) == 0)
++ if (_dl_process_gnu_property (l, fd, type, datasz, ptr) == 0)
+ return;
+
+ /* Check the next property item. */
+@@ -1251,21 +1253,6 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
+ maplength, has_holes, loader);
+ if (__glibc_unlikely (errstring != NULL))
+ goto call_lose;
+-
+- /* Process program headers again after load segments are mapped in
+- case processing requires accessing those segments. Scan program
+- headers backward so that PT_NOTE can be skipped if PT_GNU_PROPERTY
+- exits. */
+- for (ph = &phdr[l->l_phnum]; ph != phdr; --ph)
+- switch (ph[-1].p_type)
+- {
+- case PT_NOTE:
+- _dl_process_pt_note (l, &ph[-1]);
+- break;
+- case PT_GNU_PROPERTY:
+- _dl_process_pt_gnu_property (l, &ph[-1]);
+- break;
+- }
+ }
+
+ if (l->l_ld == 0)
+@@ -1377,6 +1364,21 @@ cannot enable executable stack as shared object requires");
+ if (l->l_tls_initimage != NULL)
+ l->l_tls_initimage = (char *) l->l_tls_initimage + l->l_addr;
+
++ /* Process program headers again after load segments are mapped in
++ case processing requires accessing those segments. Scan program
++ headers backward so that PT_NOTE can be skipped if PT_GNU_PROPERTY
++ exits. */
++ for (ph = &l->l_phdr[l->l_phnum]; ph != l->l_phdr; --ph)
++ switch (ph[-1].p_type)
++ {
++ case PT_NOTE:
++ _dl_process_pt_note (l, fd, &ph[-1]);
++ break;
++ case PT_GNU_PROPERTY:
++ _dl_process_pt_gnu_property (l, fd, &ph[-1]);
++ break;
++ }
++
+ /* We are done mapping in the file. We no longer need the descriptor. */
+ if (__glibc_unlikely (__close_nocancel (fd) != 0))
+ {
+diff --git a/elf/dl-open.c b/elf/dl-open.c
+index 8769e47051..55b39e1bbe 100644
+--- a/elf/dl-open.c
++++ b/elf/dl-open.c
+@@ -887,7 +887,7 @@ no more namespaces available for dlmopen()"));
+ /* Avoid keeping around a dangling reference to the libc.so link
+ map in case it has been cached in libc_map. */
+ if (!args.libc_already_loaded)
+- GL(dl_ns)[nsid].libc_map = NULL;
++ GL(dl_ns)[args.nsid].libc_map = NULL;
+
+ /* Remove the object from memory. It may be in an inconsistent
+ state if relocation failed, for example. */
+diff --git a/elf/dl-tunables.c b/elf/dl-tunables.c
+index 26e6e26612..15b29bcb90 100644
+--- a/elf/dl-tunables.c
++++ b/elf/dl-tunables.c
+@@ -177,6 +177,7 @@ parse_tunables (char *tunestr, char *valstring)
+ return;
+
+ char *p = tunestr;
++ size_t off = 0;
+
+ while (true)
+ {
+@@ -190,7 +191,11 @@ parse_tunables (char *tunestr, char *valstring)
+ /* If we reach the end of the string before getting a valid name-value
+ pair, bail out. */
+ if (p[len] == '\0')
+- return;
++ {
++ if (__libc_enable_secure)
++ tunestr[off] = '\0';
++ return;
++ }
+
+ /* We did not find a valid name-value pair before encountering the
+ colon. */
+@@ -216,35 +221,28 @@ parse_tunables (char *tunestr, char *valstring)
+
+ if (tunable_is_name (cur->name, name))
+ {
+- /* If we are in a secure context (AT_SECURE) then ignore the tunable
+- unless it is explicitly marked as secure. Tunable values take
+- precedence over their envvar aliases. */
++ /* If we are in a secure context (AT_SECURE) then ignore the
++ tunable unless it is explicitly marked as secure. Tunable
++ values take precedence over their envvar aliases. We write
++ the tunables that are not SXID_ERASE back to TUNESTR, thus
++ dropping all SXID_ERASE tunables and any invalid or
++ unrecognized tunables. */
+ if (__libc_enable_secure)
+ {
+- if (cur->security_level == TUNABLE_SECLEVEL_SXID_ERASE)
++ if (cur->security_level != TUNABLE_SECLEVEL_SXID_ERASE)
+ {
+- if (p[len] == '\0')
+- {
+- /* Last tunable in the valstring. Null-terminate and
+- return. */
+- *name = '\0';
+- return;
+- }
+- else
+- {
+- /* Remove the current tunable from the string. We do
+- this by overwriting the string starting from NAME
+- (which is where the current tunable begins) with
+- the remainder of the string. We then have P point
+- to NAME so that we continue in the correct
+- position in the valstring. */
+- char *q = &p[len + 1];
+- p = name;
+- while (*q != '\0')
+- *name++ = *q++;
+- name[0] = '\0';
+- len = 0;
+- }
++ if (off > 0)
++ tunestr[off++] = ':';
++
++ const char *n = cur->name;
++
++ while (*n != '\0')
++ tunestr[off++] = *n++;
++
++ tunestr[off++] = '=';
++
++ for (size_t j = 0; j < len; j++)
++ tunestr[off++] = value[j];
+ }
+
+ if (cur->security_level != TUNABLE_SECLEVEL_NONE)
+@@ -257,9 +255,7 @@ parse_tunables (char *tunestr, char *valstring)
+ }
+ }
+
+- if (p[len] == '\0')
+- return;
+- else
++ if (p[len] != '\0')
+ p += len + 1;
+ }
+ }
+diff --git a/elf/ifuncmain6pie.c b/elf/ifuncmain6pie.c
+index 04faeb86ef..4a01906836 100644
+--- a/elf/ifuncmain6pie.c
++++ b/elf/ifuncmain6pie.c
+@@ -9,7 +9,6 @@
+ #include "ifunc-sel.h"
+
+ typedef int (*foo_p) (void);
+-extern foo_p foo_ptr;
+
+ static int
+ one (void)
+@@ -28,20 +27,17 @@ foo_ifunc (void)
+ }
+
+ extern int foo (void);
+-extern foo_p get_foo (void);
++extern int call_foo (void);
+ extern foo_p get_foo_p (void);
+
+-foo_p my_foo_ptr = foo;
++foo_p foo_ptr = foo;
+
+ int
+ main (void)
+ {
+ foo_p p;
+
+- p = get_foo ();
+- if (p != foo)
+- abort ();
+- if ((*p) () != -30)
++ if (call_foo () != -30)
+ abort ();
+
+ p = get_foo_p ();
+@@ -52,12 +48,8 @@ main (void)
+
+ if (foo_ptr != foo)
+ abort ();
+- if (my_foo_ptr != foo)
+- abort ();
+ if ((*foo_ptr) () != -30)
+ abort ();
+- if ((*my_foo_ptr) () != -30)
+- abort ();
+ if (foo () != -30)
+ abort ();
+
+diff --git a/elf/ifuncmod6.c b/elf/ifuncmod6.c
+index 2e16c1d06d..2f6d0715e6 100644
+--- a/elf/ifuncmod6.c
++++ b/elf/ifuncmod6.c
+@@ -4,7 +4,7 @@ extern int foo (void);
+
+ typedef int (*foo_p) (void);
+
+-foo_p foo_ptr = foo;
++extern foo_p foo_ptr;
+
+ foo_p
+ get_foo_p (void)
+@@ -12,8 +12,8 @@ get_foo_p (void)
+ return foo_ptr;
+ }
+
+-foo_p
+-get_foo (void)
++int
++call_foo (void)
+ {
+- return foo;
++ return foo ();
+ }
+diff --git a/elf/rtld.c b/elf/rtld.c
+index 5b882163fa..14a42ed00a 100644
+--- a/elf/rtld.c
++++ b/elf/rtld.c
+@@ -1534,10 +1534,10 @@ of this helper program; chances are you did not intend to run this program.\n\
+ switch (ph[-1].p_type)
+ {
+ case PT_NOTE:
+- _dl_process_pt_note (main_map, &ph[-1]);
++ _dl_process_pt_note (main_map, -1, &ph[-1]);
+ break;
+ case PT_GNU_PROPERTY:
+- _dl_process_pt_gnu_property (main_map, &ph[-1]);
++ _dl_process_pt_gnu_property (main_map, -1, &ph[-1]);
+ break;
+ }
+
+diff --git a/elf/tst-env-setuid-tunables.c b/elf/tst-env-setuid-tunables.c
+index 971d5892b1..ca0c8c245c 100644
+--- a/elf/tst-env-setuid-tunables.c
++++ b/elf/tst-env-setuid-tunables.c
+@@ -25,35 +25,76 @@
+ #include "config.h"
+ #undef _LIBC
+
+-#define test_parent test_parent_tunables
+-#define test_child test_child_tunables
+-
+-static int test_child_tunables (void);
+-static int test_parent_tunables (void);
+-
+-#include "tst-env-setuid.c"
+-
+-#define CHILD_VALSTRING_VALUE "glibc.malloc.mmap_threshold=4096"
+-#define PARENT_VALSTRING_VALUE \
+- "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096"
++#include <errno.h>
++#include <fcntl.h>
++#include <stdlib.h>
++#include <stdint.h>
++#include <stdio.h>
++#include <string.h>
++#include <sys/stat.h>
++#include <sys/wait.h>
++#include <unistd.h>
++#include <intprops.h>
++#include <array_length.h>
++
++#include <support/check.h>
++#include <support/support.h>
++#include <support/test-driver.h>
++#include <support/capture_subprocess.h>
++
++const char *teststrings[] =
++{
++ "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096",
++ "glibc.malloc.check=2:glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096",
++ "glibc.malloc.check=2:glibc.malloc.mmap_threshold=4096:glibc.malloc.check=2",
++ "glibc.malloc.perturb=0x800",
++ "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096",
++ "glibc.malloc.perturb=0x800:not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096",
++ "glibc.not_valid.check=2:glibc.malloc.mmap_threshold=4096",
++ "not_valid.malloc.check=2:glibc.malloc.mmap_threshold=4096",
++ "glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096:glibc.malloc.check=2",
++ "glibc.malloc.check=4:glibc.malloc.garbage=2:glibc.maoc.mmap_threshold=4096",
++ ":glibc.malloc.garbage=2:glibc.malloc.check=1",
++ "glibc.malloc.check=1:glibc.malloc.check=2",
++ "not_valid.malloc.check=2",
++ "glibc.not_valid.check=2",
++};
++
++const char *resultstrings[] =
++{
++ "glibc.malloc.mmap_threshold=4096",
++ "glibc.malloc.mmap_threshold=4096",
++ "glibc.malloc.mmap_threshold=4096",
++ "glibc.malloc.perturb=0x800",
++ "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096",
++ "glibc.malloc.perturb=0x800:glibc.malloc.mmap_threshold=4096",
++ "glibc.malloc.mmap_threshold=4096",
++ "glibc.malloc.mmap_threshold=4096",
++ "",
++ "",
++ "",
++ "",
++ "",
++ "",
++};
+
+ static int
+-test_child_tunables (void)
++test_child (int off)
+ {
+ const char *val = getenv ("GLIBC_TUNABLES");
+
+ #if HAVE_TUNABLES
+- if (val != NULL && strcmp (val, CHILD_VALSTRING_VALUE) == 0)
++ if (val != NULL && strcmp (val, resultstrings[off]) == 0)
+ return 0;
+
+ if (val != NULL)
+- printf ("Unexpected GLIBC_TUNABLES VALUE %s\n", val);
++ printf ("[%d] Unexpected GLIBC_TUNABLES VALUE %s\n", off, val);
+
+ return 1;
+ #else
+ if (val != NULL)
+ {
+- printf ("GLIBC_TUNABLES not cleared\n");
++ printf ("[%d] GLIBC_TUNABLES not cleared\n", off);
+ return 1;
+ }
+ return 0;
+@@ -61,15 +102,48 @@ test_child_tunables (void)
+ }
+
+ static int
+-test_parent_tunables (void)
++do_test (int argc, char **argv)
+ {
+- const char *val = getenv ("GLIBC_TUNABLES");
++ /* Setgid child process. */
++ if (argc == 2)
++ {
++ if (getgid () == getegid ())
++ /* This can happen if the file system is mounted nosuid. */
++ FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n",
++ (intmax_t) getgid ());
+
+- if (val != NULL && strcmp (val, PARENT_VALSTRING_VALUE) == 0)
+- return 0;
++ int ret = test_child (atoi (argv[1]));
+
+- if (val != NULL)
+- printf ("Unexpected GLIBC_TUNABLES VALUE %s\n", val);
++ if (ret != 0)
++ exit (1);
+
+- return 1;
++ exit (EXIT_SUCCESS);
++ }
++ else
++ {
++ int ret = 0;
++
++ /* Spawn tests. */
++ for (int i = 0; i < array_length (teststrings); i++)
++ {
++ char buf[INT_BUFSIZE_BOUND (int)];
++
++ printf ("Spawned test for %s (%d)\n", teststrings[i], i);
++ snprintf (buf, sizeof (buf), "%d\n", i);
++ if (setenv ("GLIBC_TUNABLES", teststrings[i], 1) != 0)
++ exit (1);
++
++ int status = support_capture_subprogram_self_sgid (buf);
++
++ /* Bail out early if unsupported. */
++ if (WEXITSTATUS (status) == EXIT_UNSUPPORTED)
++ return EXIT_UNSUPPORTED;
++
++ ret |= status;
++ }
++ return ret;
++ }
+ }
++
++#define TEST_FUNCTION_ARGV do_test
++#include <support/test-driver.c>
+diff --git a/elf/tst-env-setuid.c b/elf/tst-env-setuid.c
+index 41dc79e83a..2dbccdb69e 100644
+--- a/elf/tst-env-setuid.c
++++ b/elf/tst-env-setuid.c
+@@ -29,173 +29,12 @@
+ #include <sys/wait.h>
+ #include <unistd.h>
+
++#include <support/check.h>
+ #include <support/support.h>
+ #include <support/test-driver.h>
++#include <support/capture_subprocess.h>
+
+ static char SETGID_CHILD[] = "setgid-child";
+-#define CHILD_STATUS 42
+-
+-/* Return a GID which is not our current GID, but is present in the
+- supplementary group list. */
+-static gid_t
+-choose_gid (void)
+-{
+- const int count = 64;
+- gid_t groups[count];
+- int ret = getgroups (count, groups);
+- if (ret < 0)
+- {
+- printf ("getgroups: %m\n");
+- exit (1);
+- }
+- gid_t current = getgid ();
+- for (int i = 0; i < ret; ++i)
+- {
+- if (groups[i] != current)
+- return groups[i];
+- }
+- return 0;
+-}
+-
+-/* Spawn and execute a program and verify that it returns the CHILD_STATUS. */
+-static pid_t
+-do_execve (char **args)
+-{
+- pid_t kid = vfork ();
+-
+- if (kid < 0)
+- {
+- printf ("vfork: %m\n");
+- return -1;
+- }
+-
+- if (kid == 0)
+- {
+- /* Child process. */
+- execve (args[0], args, environ);
+- _exit (-errno);
+- }
+-
+- if (kid < 0)
+- return 1;
+-
+- int status;
+-
+- if (waitpid (kid, &status, 0) < 0)
+- {
+- printf ("waitpid: %m\n");
+- return 1;
+- }
+-
+- if (WEXITSTATUS (status) == EXIT_UNSUPPORTED)
+- return EXIT_UNSUPPORTED;
+-
+- if (!WIFEXITED (status) || WEXITSTATUS (status) != CHILD_STATUS)
+- {
+- printf ("Unexpected exit status %d from child process\n",
+- WEXITSTATUS (status));
+- return 1;
+- }
+- return 0;
+-}
+-
+-/* Copies the executable into a restricted directory, so that we can
+- safely make it SGID with the TARGET group ID. Then runs the
+- executable. */
+-static int
+-run_executable_sgid (gid_t target)
+-{
+- char *dirname = xasprintf ("%s/tst-tunables-setuid.%jd",
+- test_dir, (intmax_t) getpid ());
+- char *execname = xasprintf ("%s/bin", dirname);
+- int infd = -1;
+- int outfd = -1;
+- int ret = 0;
+- if (mkdir (dirname, 0700) < 0)
+- {
+- printf ("mkdir: %m\n");
+- goto err;
+- }
+- infd = open ("/proc/self/exe", O_RDONLY);
+- if (infd < 0)
+- {
+- printf ("open (/proc/self/exe): %m\n");
+- goto err;
+- }
+- outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700);
+- if (outfd < 0)
+- {
+- printf ("open (%s): %m\n", execname);
+- goto err;
+- }
+- char buf[4096];
+- for (;;)
+- {
+- ssize_t rdcount = read (infd, buf, sizeof (buf));
+- if (rdcount < 0)
+- {
+- printf ("read: %m\n");
+- goto err;
+- }
+- if (rdcount == 0)
+- break;
+- char *p = buf;
+- char *end = buf + rdcount;
+- while (p != end)
+- {
+- ssize_t wrcount = write (outfd, buf, end - p);
+- if (wrcount == 0)
+- errno = ENOSPC;
+- if (wrcount <= 0)
+- {
+- printf ("write: %m\n");
+- goto err;
+- }
+- p += wrcount;
+- }
+- }
+- if (fchown (outfd, getuid (), target) < 0)
+- {
+- printf ("fchown (%s): %m\n", execname);
+- goto err;
+- }
+- if (fchmod (outfd, 02750) < 0)
+- {
+- printf ("fchmod (%s): %m\n", execname);
+- goto err;
+- }
+- if (close (outfd) < 0)
+- {
+- printf ("close (outfd): %m\n");
+- goto err;
+- }
+- if (close (infd) < 0)
+- {
+- printf ("close (infd): %m\n");
+- goto err;
+- }
+-
+- char *args[] = {execname, SETGID_CHILD, NULL};
+-
+- ret = do_execve (args);
+-
+-err:
+- if (outfd >= 0)
+- close (outfd);
+- if (infd >= 0)
+- close (infd);
+- if (execname)
+- {
+- unlink (execname);
+- free (execname);
+- }
+- if (dirname)
+- {
+- rmdir (dirname);
+- free (dirname);
+- }
+- return ret;
+-}
+
+ #ifndef test_child
+ static int
+@@ -256,40 +95,32 @@ do_test (int argc, char **argv)
+ if (argc == 2 && strcmp (argv[1], SETGID_CHILD) == 0)
+ {
+ if (getgid () == getegid ())
+- {
+- /* This can happen if the file system is mounted nosuid. */
+- fprintf (stderr, "SGID failed: GID and EGID match (%jd)\n",
+- (intmax_t) getgid ());
+- exit (EXIT_UNSUPPORTED);
+- }
++ /* This can happen if the file system is mounted nosuid. */
++ FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n",
++ (intmax_t) getgid ());
+
+ int ret = test_child ();
+
+ if (ret != 0)
+ exit (1);
+
+- exit (CHILD_STATUS);
++ exit (EXIT_SUCCESS);
+ }
+ else
+ {
+ if (test_parent () != 0)
+ exit (1);
+
+- /* Try running a setgid program. */
+- gid_t target = choose_gid ();
+- if (target == 0)
+- {
+- fprintf (stderr,
+- "Could not find a suitable GID for user %jd, skipping test\n",
+- (intmax_t) getuid ());
+- exit (0);
+- }
++ int status = support_capture_subprogram_self_sgid (SETGID_CHILD);
+
+- return run_executable_sgid (target);
+- }
++ if (WEXITSTATUS (status) == EXIT_UNSUPPORTED)
++ return EXIT_UNSUPPORTED;
++
++ if (!WIFEXITED (status))
++ FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status);
+
+- /* Something went wrong and our argv was corrupted. */
+- _exit (1);
++ return 0;
++ }
+ }
+
+ #define TEST_FUNCTION_ARGV do_test
+diff --git a/iconv/Versions b/iconv/Versions
+index 8a5f4cf780..d51af52fa3 100644
+--- a/iconv/Versions
++++ b/iconv/Versions
+@@ -6,7 +6,9 @@ libc {
+ GLIBC_PRIVATE {
+ # functions shared with iconv program
+ __gconv_get_alias_db; __gconv_get_cache; __gconv_get_modules_db;
+- __gconv_open; __gconv_create_spec;
++
++ # functions used elsewhere in glibc
++ __gconv_open; __gconv_create_spec; __gconv_destroy_spec;
+
+ # function used by the gconv modules
+ __gconv_transliterate;
+diff --git a/iconv/gconv_charset.c b/iconv/gconv_charset.c
+index 6ccd0773cc..4ba0aa99f5 100644
+--- a/iconv/gconv_charset.c
++++ b/iconv/gconv_charset.c
+@@ -216,3 +216,13 @@ out:
+ return ret;
+ }
+ libc_hidden_def (__gconv_create_spec)
++
++
++void
++__gconv_destroy_spec (struct gconv_spec *conv_spec)
++{
++ free (conv_spec->fromcode);
++ free (conv_spec->tocode);
++ return;
++}
++libc_hidden_def (__gconv_destroy_spec)
+diff --git a/iconv/gconv_charset.h b/iconv/gconv_charset.h
+index b39b09aea1..e9c122cf7e 100644
+--- a/iconv/gconv_charset.h
++++ b/iconv/gconv_charset.h
+@@ -48,33 +48,6 @@
+ #define GCONV_IGNORE_ERRORS_SUFFIX "IGNORE"
+
+
+-/* This function accepts the charset names of the source and destination of the
+- conversion and populates *conv_spec with an equivalent conversion
+- specification that may later be used by __gconv_open. The charset names
+- might contain options in the form of suffixes that alter the conversion,
+- e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring
+- and truncating any suffix options in fromcode, and processing and truncating
+- any suffix options in tocode. Supported suffix options ("TRANSLIT" or
+- "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
+- to be set to true. Unrecognized suffix options are silently discarded. If
+- the function succeeds, it returns conv_spec back to the caller. It returns
+- NULL upon failure. */
+-struct gconv_spec *
+-__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
+- const char *tocode);
+-libc_hidden_proto (__gconv_create_spec)
+-
+-
+-/* This function frees all heap memory allocated by __gconv_create_spec. */
+-static void __attribute__ ((unused))
+-gconv_destroy_spec (struct gconv_spec *conv_spec)
+-{
+- free (conv_spec->fromcode);
+- free (conv_spec->tocode);
+- return;
+-}
+-
+-
+ /* This function copies in-order, characters from the source 's' that are
+ either alpha-numeric or one in one of these: "_-.,:/" - into the destination
+ 'wp' while dropping all other characters. In the process, it converts all
+diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
+index e86938dae7..f721ce30ff 100644
+--- a/iconv/gconv_int.h
++++ b/iconv/gconv_int.h
+@@ -152,6 +152,27 @@ extern int __gconv_open (struct gconv_spec *conv_spec,
+ __gconv_t *handle, int flags);
+ libc_hidden_proto (__gconv_open)
+
++/* This function accepts the charset names of the source and destination of the
++ conversion and populates *conv_spec with an equivalent conversion
++ specification that may later be used by __gconv_open. The charset names
++ might contain options in the form of suffixes that alter the conversion,
++ e.g. "ISO-10646/UTF-8/TRANSLIT". It processes the charset names, ignoring
++ and truncating any suffix options in fromcode, and processing and truncating
++ any suffix options in tocode. Supported suffix options ("TRANSLIT" or
++ "IGNORE") when found in tocode lead to the corresponding flag in *conv_spec
++ to be set to true. Unrecognized suffix options are silently discarded. If
++ the function succeeds, it returns conv_spec back to the caller. It returns
++ NULL upon failure. */
++extern struct gconv_spec *
++__gconv_create_spec (struct gconv_spec *conv_spec, const char *fromcode,
++ const char *tocode);
++libc_hidden_proto (__gconv_create_spec)
++
++/* This function frees all heap memory allocated by __gconv_create_spec. */
++extern void
++__gconv_destroy_spec (struct gconv_spec *conv_spec);
++libc_hidden_proto (__gconv_destroy_spec)
++
+ /* Free resources associated with transformation descriptor CD. */
+ extern int __gconv_close (__gconv_t cd)
+ attribute_hidden;
+diff --git a/iconv/iconv_open.c b/iconv/iconv_open.c
+index dd54bc12e0..5b30055c04 100644
+--- a/iconv/iconv_open.c
++++ b/iconv/iconv_open.c
+@@ -39,7 +39,7 @@ iconv_open (const char *tocode, const char *fromcode)
+
+ int res = __gconv_open (&conv_spec, &cd, 0);
+
+- gconv_destroy_spec (&conv_spec);
++ __gconv_destroy_spec (&conv_spec);
+
+ if (__builtin_expect (res, __GCONV_OK) != __GCONV_OK)
+ {
+diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
+index b4334faa57..d59979759c 100644
+--- a/iconv/iconv_prog.c
++++ b/iconv/iconv_prog.c
+@@ -184,7 +184,7 @@ main (int argc, char *argv[])
+ /* Let's see whether we have these coded character sets. */
+ res = __gconv_open (&conv_spec, &cd, 0);
+
+- gconv_destroy_spec (&conv_spec);
++ __gconv_destroy_spec (&conv_spec);
+
+ if (res != __GCONV_OK)
+ {
+diff --git a/iconv/tst-iconv_prog.sh b/iconv/tst-iconv_prog.sh
+index 8298136b7f..d8db7b335c 100644
+--- a/iconv/tst-iconv_prog.sh
++++ b/iconv/tst-iconv_prog.sh
+@@ -102,12 +102,16 @@ hangarray=(
+ "\x00\x80;-c;IBM1161;UTF-8//TRANSLIT//IGNORE"
+ "\x00\xdb;-c;IBM1162;UTF-8//TRANSLIT//IGNORE"
+ "\x00\x70;-c;IBM12712;UTF-8//TRANSLIT//IGNORE"
+-# These are known hangs that are yet to be fixed:
+-# "\x00\x0f;-c;IBM1364;UTF-8"
+-# "\x00\x0f;-c;IBM1371;UTF-8"
+-# "\x00\x0f;-c;IBM1388;UTF-8"
+-# "\x00\x0f;-c;IBM1390;UTF-8"
+-# "\x00\x0f;-c;IBM1399;UTF-8"
++"\x00\x0f;-c;IBM1364;UTF-8"
++"\x0e\x0e;-c;IBM1364;UTF-8"
++"\x00\x0f;-c;IBM1371;UTF-8"
++"\x0e\x0e;-c;IBM1371;UTF-8"
++"\x00\x0f;-c;IBM1388;UTF-8"
++"\x0e\x0e;-c;IBM1388;UTF-8"
++"\x00\x0f;-c;IBM1390;UTF-8"
++"\x0e\x0e;-c;IBM1390;UTF-8"
++"\x00\x0f;-c;IBM1399;UTF-8"
++"\x0e\x0e;-c;IBM1399;UTF-8"
+ "\x00\x53;-c;IBM16804;UTF-8//TRANSLIT//IGNORE"
+ "\x00\x41;-c;IBM274;UTF-8//TRANSLIT//IGNORE"
+ "\x00\x41;-c;IBM275;UTF-8//TRANSLIT//IGNORE"
+diff --git a/iconvdata/Makefile b/iconvdata/Makefile
+index 4ec2741cdc..b67b4feeb4 100644
+--- a/iconvdata/Makefile
++++ b/iconvdata/Makefile
+@@ -1,4 +1,5 @@
+ # Copyright (C) 1997-2020 Free Software Foundation, Inc.
++# Copyright (C) The GNU Toolchain Authors.
+ # This file is part of the GNU C Library.
+
+ # The GNU C Library is free software; you can redistribute it and/or
+@@ -73,7 +74,8 @@ modules.so := $(addsuffix .so, $(modules))
+ ifeq (yes,$(build-shared))
+ tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
+ tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
+- bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4
++ bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \
++ bug-iconv13 bug-iconv14 bug-iconv15
+ ifeq ($(have-thread-library),yes)
+ tests += bug-iconv3
+ endif
+@@ -321,6 +323,10 @@ $(objpfx)bug-iconv10.out: $(objpfx)gconv-modules \
+ $(addprefix $(objpfx),$(modules.so))
+ $(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \
+ $(addprefix $(objpfx),$(modules.so))
++$(objpfx)bug-iconv14.out: $(objpfx)gconv-modules \
++ $(addprefix $(objpfx),$(modules.so))
++$(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \
++ $(addprefix $(objpfx),$(modules.so))
+
+ $(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \
+ $(addprefix $(objpfx),$(modules.so)) \
+diff --git a/iconvdata/bug-iconv13.c b/iconvdata/bug-iconv13.c
+new file mode 100644
+index 0000000000..87aaff398e
+--- /dev/null
++++ b/iconvdata/bug-iconv13.c
+@@ -0,0 +1,53 @@
++/* bug 24973: Test EUC-KR module
++ Copyright (C) 2020 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <errno.h>
++#include <iconv.h>
++#include <stdio.h>
++#include <support/check.h>
++
++static int
++do_test (void)
++{
++ iconv_t cd = iconv_open ("UTF-8//IGNORE", "EUC-KR");
++ TEST_VERIFY_EXIT (cd != (iconv_t) -1);
++
++ /* 0xfe (->0x7e : row 94) and 0xc9 (->0x49 : row 41) are user-defined
++ areas, which are not allowed and should be skipped over due to
++ //IGNORE. The trailing 0xfe also is an incomplete sequence, which
++ should be checked first. */
++ char input[4] = { '\xc9', '\xa1', '\0', '\xfe' };
++ char *inptr = input;
++ size_t insize = sizeof (input);
++ char output[4];
++ char *outptr = output;
++ size_t outsize = sizeof (output);
++
++ /* This used to crash due to buffer overrun. */
++ TEST_VERIFY (iconv (cd, &inptr, &insize, &outptr, &outsize) == (size_t) -1);
++ TEST_VERIFY (errno == EINVAL);
++ /* The conversion should produce one character, the converted null
++ character. */
++ TEST_VERIFY (sizeof (output) - outsize == 1);
++
++ TEST_VERIFY_EXIT (iconv_close (cd) != -1);
++
++ return 0;
++}
++
++#include <support/test-driver.c>
+diff --git a/iconvdata/bug-iconv14.c b/iconvdata/bug-iconv14.c
+new file mode 100644
+index 0000000000..902f140fa9
+--- /dev/null
++++ b/iconvdata/bug-iconv14.c
+@@ -0,0 +1,127 @@
++/* Assertion in ISO-2022-JP-3 due to two-character sequence (bug 27256).
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <iconv.h>
++#include <string.h>
++#include <errno.h>
++#include <support/check.h>
++
++/* Use an escape sequence to return to the initial state. */
++static void
++with_escape_sequence (void)
++{
++ iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
++ TEST_VERIFY_EXIT (c != (iconv_t) -1);
++
++ char in[] = "\e$(O+D\e(B";
++ char *inbuf = in;
++ size_t inleft = strlen (in);
++ char out[3]; /* Space for one output character. */
++ char *outbuf;
++ size_t outleft;
++
++ outbuf = out;
++ outleft = sizeof (out);
++ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
++ TEST_COMPARE (errno, E2BIG);
++ TEST_COMPARE (inleft, 3);
++ TEST_COMPARE (inbuf - in, strlen (in) - 3);
++ TEST_COMPARE (outleft, sizeof (out) - 2);
++ TEST_COMPARE (outbuf - out, 2);
++ TEST_COMPARE (out[0] & 0xff, 0xc3);
++ TEST_COMPARE (out[1] & 0xff, 0xa6);
++
++ /* Return to the initial shift state, producing the pending
++ character. */
++ outbuf = out;
++ outleft = sizeof (out);
++ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), 0);
++ TEST_COMPARE (inleft, 0);
++ TEST_COMPARE (inbuf - in, strlen (in));
++ TEST_COMPARE (outleft, sizeof (out) - 2);
++ TEST_COMPARE (outbuf - out, 2);
++ TEST_COMPARE (out[0] & 0xff, 0xcc);
++ TEST_COMPARE (out[1] & 0xff, 0x80);
++
++ /* Nothing should be flushed the second time. */
++ outbuf = out;
++ outleft = sizeof (out);
++ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
++ TEST_COMPARE (outleft, sizeof (out));
++ TEST_COMPARE (outbuf - out, 0);
++ TEST_COMPARE (out[0] & 0xff, 0xcc);
++ TEST_COMPARE (out[1] & 0xff, 0x80);
++
++ TEST_COMPARE (iconv_close (c), 0);
++}
++
++/* Use an explicit flush to return to the initial state. */
++static void
++with_flush (void)
++{
++ iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
++ TEST_VERIFY_EXIT (c != (iconv_t) -1);
++
++ char in[] = "\e$(O+D";
++ char *inbuf = in;
++ size_t inleft = strlen (in);
++ char out[3]; /* Space for one output character. */
++ char *outbuf;
++ size_t outleft;
++
++ outbuf = out;
++ outleft = sizeof (out);
++ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
++ TEST_COMPARE (errno, E2BIG);
++ TEST_COMPARE (inleft, 0);
++ TEST_COMPARE (inbuf - in, strlen (in));
++ TEST_COMPARE (outleft, sizeof (out) - 2);
++ TEST_COMPARE (outbuf - out, 2);
++ TEST_COMPARE (out[0] & 0xff, 0xc3);
++ TEST_COMPARE (out[1] & 0xff, 0xa6);
++
++ /* Flush the pending character. */
++ outbuf = out;
++ outleft = sizeof (out);
++ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
++ TEST_COMPARE (outleft, sizeof (out) - 2);
++ TEST_COMPARE (outbuf - out, 2);
++ TEST_COMPARE (out[0] & 0xff, 0xcc);
++ TEST_COMPARE (out[1] & 0xff, 0x80);
++
++ /* Nothing should be flushed the second time. */
++ outbuf = out;
++ outleft = sizeof (out);
++ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
++ TEST_COMPARE (outleft, sizeof (out));
++ TEST_COMPARE (outbuf - out, 0);
++ TEST_COMPARE (out[0] & 0xff, 0xcc);
++ TEST_COMPARE (out[1] & 0xff, 0x80);
++
++ TEST_COMPARE (iconv_close (c), 0);
++}
++
++static int
++do_test (void)
++{
++ with_escape_sequence ();
++ with_flush ();
++ return 0;
++}
++
++#include <support/test-driver.c>
+diff --git a/iconvdata/bug-iconv15.c b/iconvdata/bug-iconv15.c
+new file mode 100644
+index 0000000000..cc04bd0313
+--- /dev/null
++++ b/iconvdata/bug-iconv15.c
+@@ -0,0 +1,60 @@
++/* Bug 28524: Conversion from ISO-2022-JP-3 with iconv
++ may emit spurious NUL character on state reset.
++ Copyright (C) The GNU Toolchain Authors.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <stddef.h>
++#include <iconv.h>
++#include <support/check.h>
++
++static int
++do_test (void)
++{
++ char in[] = "\x1b(I";
++ char *inbuf = in;
++ size_t inleft = sizeof (in) - 1;
++ char out[1];
++ char *outbuf = out;
++ size_t outleft = sizeof (out);
++ iconv_t cd;
++
++ cd = iconv_open ("UTF8", "ISO-2022-JP-3");
++ TEST_VERIFY_EXIT (cd != (iconv_t) -1);
++
++ /* First call to iconv should alter internal state.
++ Now, JISX0201_Kana_set is selected and
++ state value != ASCII_set. */
++ TEST_VERIFY (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) != (size_t) -1);
++
++ /* No bytes should have been added to
++ the output buffer at this point. */
++ TEST_VERIFY (outbuf == out);
++ TEST_VERIFY (outleft == sizeof (out));
++
++ /* Second call shall emit spurious NUL character in unpatched glibc. */
++ TEST_VERIFY (iconv (cd, NULL, NULL, &outbuf, &outleft) != (size_t) -1);
++
++ /* No characters are expected to be produced. */
++ TEST_VERIFY (outbuf == out);
++ TEST_VERIFY (outleft == sizeof (out));
++
++ TEST_VERIFY_EXIT (iconv_close (cd) != -1);
++
++ return 0;
++}
++
++#include <support/test-driver.c>
+diff --git a/iconvdata/euc-kr.c b/iconvdata/euc-kr.c
+index b0d56cf3ee..1045bae926 100644
+--- a/iconvdata/euc-kr.c
++++ b/iconvdata/euc-kr.c
+@@ -80,11 +80,7 @@ euckr_from_ucs4 (uint32_t ch, unsigned char *cp)
+ \
+ if (ch <= 0x9f) \
+ ++inptr; \
+- /* 0xfe(->0x7e : row 94) and 0xc9(->0x59 : row 41) are \
+- user-defined areas. */ \
+- else if (__builtin_expect (ch == 0xa0, 0) \
+- || __builtin_expect (ch > 0xfe, 0) \
+- || __builtin_expect (ch == 0xc9, 0)) \
++ else if (__glibc_unlikely (ch == 0xa0)) \
+ { \
+ /* This is illegal. */ \
+ STANDARD_FROM_LOOP_ERR_HANDLER (1); \
+diff --git a/iconvdata/ibm1364.c b/iconvdata/ibm1364.c
+index 49e7267ab4..521f0825b7 100644
+--- a/iconvdata/ibm1364.c
++++ b/iconvdata/ibm1364.c
+@@ -158,24 +158,14 @@ enum
+ \
+ if (__builtin_expect (ch, 0) == SO) \
+ { \
+- /* Shift OUT, change to DBCS converter. */ \
+- if (curcs == db) \
+- { \
+- result = __GCONV_ILLEGAL_INPUT; \
+- break; \
+- } \
++ /* Shift OUT, change to DBCS converter (redundant escape okay). */ \
+ curcs = db; \
+ ++inptr; \
+ continue; \
+ } \
+ if (__builtin_expect (ch, 0) == SI) \
+ { \
+- /* Shift IN, change to SBCS converter. */ \
+- if (curcs == sb) \
+- { \
+- result = __GCONV_ILLEGAL_INPUT; \
+- break; \
+- } \
++ /* Shift IN, change to SBCS converter (redundant escape okay). */ \
+ curcs = sb; \
+ ++inptr; \
+ continue; \
+diff --git a/iconvdata/iso-2022-jp-3.c b/iconvdata/iso-2022-jp-3.c
+index 8c3b7e627e..c7b470db61 100644
+--- a/iconvdata/iso-2022-jp-3.c
++++ b/iconvdata/iso-2022-jp-3.c
+@@ -1,5 +1,6 @@
+ /* Conversion module for ISO-2022-JP-3.
+ Copyright (C) 1998-2020 Free Software Foundation, Inc.
++ Copyright (C) The GNU Toolchain Authors.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998,
+ and Bruno Haible <bruno@clisp.org>, 2002.
+@@ -67,10 +68,15 @@ enum
+ CURRENT_SEL_MASK = 7 << 3
+ };
+
+-/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the state
+- also contains the last two bytes to be output, shifted by 6 bits, and a
+- one-bit indicator whether they must be preceded by the shift sequence,
+- in bit 22. */
++/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the
++ state also contains the last two bytes to be output, shifted by 6
++ bits, and a one-bit indicator whether they must be preceded by the
++ shift sequence, in bit 22. During ISO-2022-JP-3 to UCS-4
++ conversion, COUNT may also contain a non-zero pending wide
++ character, shifted by six bits. This happens for certain inputs in
++ JISX0213_1_2004_set and JISX0213_2_set if the second wide character
++ in a combining sequence cannot be written because the buffer is
++ full. */
+
+ /* Since this is a stateful encoding we have to provide code which resets
+ the output state to the initial state. This has to be done during the
+@@ -80,10 +86,27 @@ enum
+ { \
+ if (FROM_DIRECTION) \
+ { \
+- /* It's easy, we don't have to emit anything, we just reset the \
+- state for the input. */ \
+- data->__statep->__count &= 7; \
+- data->__statep->__count |= ASCII_set; \
++ uint32_t ch = data->__statep->__count >> 6; \
++ \
++ if (__glibc_unlikely (ch != 0)) \
++ { \
++ if (__glibc_likely (outbuf + 4 <= outend)) \
++ { \
++ /* Write out the last character. */ \
++ put32u (outbuf, ch); \
++ outbuf += 4; \
++ data->__statep->__count &= 7; \
++ data->__statep->__count |= ASCII_set; \
++ } \
++ else \
++ /* We don't have enough room in the output buffer. */ \
++ status = __GCONV_FULL_OUTPUT; \
++ } \
++ else \
++ { \
++ data->__statep->__count &= 7; \
++ data->__statep->__count |= ASCII_set; \
++ } \
+ } \
+ else \
+ { \
+@@ -151,7 +174,21 @@ enum
+ #define LOOPFCT FROM_LOOP
+ #define BODY \
+ { \
+- uint32_t ch = *inptr; \
++ uint32_t ch; \
++ \
++ /* Output any pending character. */ \
++ ch = set >> 6; \
++ if (__glibc_unlikely (ch != 0)) \
++ { \
++ put32 (outptr, ch); \
++ outptr += 4; \
++ /* Remove the pending character, but preserve state bits. */ \
++ set &= (1 << 6) - 1; \
++ continue; \
++ } \
++ \
++ /* Otherwise read the next input byte. */ \
++ ch = *inptr; \
+ \
+ /* Recognize escape sequences. */ \
+ if (__glibc_unlikely (ch == ESC)) \
+@@ -297,21 +334,25 @@ enum
+ uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
+ uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
+ \
++ inptr += 2; \
++ \
++ put32 (outptr, u1); \
++ outptr += 4; \
++ \
+ /* See whether we have room for two characters. */ \
+- if (outptr + 8 <= outend) \
++ if (outptr + 4 <= outend) \
+ { \
+- inptr += 2; \
+- put32 (outptr, u1); \
+- outptr += 4; \
+ put32 (outptr, u2); \
+ outptr += 4; \
+ continue; \
+ } \
+- else \
+- { \
+- result = __GCONV_FULL_OUTPUT; \
+- break; \
+- } \
++ \
++ /* Otherwise store only the first character now, and \
++ put the second one into the queue. */ \
++ set |= u2 << 6; \
++ /* Tell the caller why we terminate the loop. */ \
++ result = __GCONV_FULL_OUTPUT; \
++ break; \
+ } \
+ \
+ inptr += 2; \
+diff --git a/iconvdata/ksc5601.h b/iconvdata/ksc5601.h
+index d3eb3a4ff8..f5cdc72797 100644
+--- a/iconvdata/ksc5601.h
++++ b/iconvdata/ksc5601.h
+@@ -50,15 +50,15 @@ ksc5601_to_ucs4 (const unsigned char **s, size_t avail, unsigned char offset)
+ unsigned char ch2;
+ int idx;
+
++ if (avail < 2)
++ return 0;
++
+ /* row 94(0x7e) and row 41(0x49) are user-defined area in KS C 5601 */
+
+ if (ch < offset || (ch - offset) <= 0x20 || (ch - offset) >= 0x7e
+ || (ch - offset) == 0x49)
+ return __UNKNOWN_10646_CHAR;
+
+- if (avail < 2)
+- return 0;
+-
+ ch2 = (*s)[1];
+ if (ch2 < offset || (ch2 - offset) <= 0x20 || (ch2 - offset) >= 0x7f)
+ return __UNKNOWN_10646_CHAR;
+diff --git a/intl/dcigettext.c b/intl/dcigettext.c
+index 2e7c662bc7..bd332e71da 100644
+--- a/intl/dcigettext.c
++++ b/intl/dcigettext.c
+@@ -1120,15 +1120,18 @@ _nl_find_msg (struct loaded_l10nfile *domain_file,
+
+ # ifdef _LIBC
+
+- struct gconv_spec conv_spec
+- = { .fromcode = norm_add_slashes (charset, ""),
+- .tocode = norm_add_slashes (outcharset, ""),
+- /* We always want to use transliteration. */
+- .translit = true,
+- .ignore = false
+- };
++ struct gconv_spec conv_spec;
++
++ __gconv_create_spec (&conv_spec, charset, outcharset);
++
++ /* We always want to use transliteration. */
++ conv_spec.translit = true;
++
+ int r = __gconv_open (&conv_spec, &convd->conv,
+ GCONV_AVOID_NOCONV);
++
++ __gconv_destroy_spec (&conv_spec);
++
+ if (__builtin_expect (r != __GCONV_OK, 0))
+ {
+ /* If the output encoding is the same there is
+diff --git a/intl/tst-codeset.c b/intl/tst-codeset.c
+index fd70432eca..e9f6e5e09f 100644
+--- a/intl/tst-codeset.c
++++ b/intl/tst-codeset.c
+@@ -22,13 +22,11 @@
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
++#include <support/check.h>
+
+ static int
+ do_test (void)
+ {
+- char *s;
+- int result = 0;
+-
+ unsetenv ("LANGUAGE");
+ unsetenv ("OUTPUT_CHARSET");
+ setlocale (LC_ALL, "de_DE.ISO-8859-1");
+@@ -36,25 +34,21 @@ do_test (void)
+ bindtextdomain ("codeset", OBJPFX "domaindir");
+
+ /* Here we expect output in ISO-8859-1. */
+- s = gettext ("cheese");
+- if (strcmp (s, "K\344se"))
+- {
+- printf ("call 1 returned: %s\n", s);
+- result = 1;
+- }
++ TEST_COMPARE_STRING (gettext ("cheese"), "K\344se");
+
++ /* Here we expect output in UTF-8. */
+ bind_textdomain_codeset ("codeset", "UTF-8");
++ TEST_COMPARE_STRING (gettext ("cheese"), "K\303\244se");
+
+- /* Here we expect output in UTF-8. */
+- s = gettext ("cheese");
+- if (strcmp (s, "K\303\244se"))
+- {
+- printf ("call 2 returned: %s\n", s);
+- result = 1;
+- }
+-
+- return result;
++ /* `a with umlaut' is transliterated to `ae'. */
++ bind_textdomain_codeset ("codeset", "ASCII//TRANSLIT");
++ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese");
++
++ /* Transliteration also works by default even if not set. */
++ bind_textdomain_codeset ("codeset", "ASCII");
++ TEST_COMPARE_STRING (gettext ("cheese"), "Kaese");
++
++ return 0;
+ }
+
+-#define TEST_FUNCTION do_test ()
+-#include "../test-skeleton.c"
++#include <support/test-driver.c>
+diff --git a/malloc/Makefile b/malloc/Makefile
+index e22cbde22d..5093e8730e 100644
+--- a/malloc/Makefile
++++ b/malloc/Makefile
+@@ -62,6 +62,16 @@ endif
+ tests += $(tests-static)
+ test-srcs = tst-mtrace
+
++# These tests either are run with MALLOC_CHECK_=3 by default or do not work
++# with MALLOC_CHECK_=3 because they expect a specific failure.
++tests-exclude-mcheck = tst-mcheck tst-malloc-usable \
++ tst-interpose-nothread tst-interpose-static-nothread \
++ tst-interpose-static-thread tst-malloc-too-large \
++ tst-mxfast tst-safe-linking
++
++# Run all tests with MALLOC_CHECK_=3
++tests-mcheck = $(filter-out $(tests-exclude-mcheck),$(tests))
++
+ routines = malloc morecore mcheck mtrace obstack reallocarray \
+ scratch_buffer_grow scratch_buffer_grow_preserve \
+ scratch_buffer_set_array_size \
+@@ -100,6 +110,11 @@ $(objpfx)tst-malloc-thread-exit: $(shared-thread-library)
+ $(objpfx)tst-malloc-thread-fail: $(shared-thread-library)
+ $(objpfx)tst-malloc-fork-deadlock: $(shared-thread-library)
+ $(objpfx)tst-malloc-stats-cancellation: $(shared-thread-library)
++$(objpfx)tst-malloc-backtrace-mcheck: $(shared-thread-library)
++$(objpfx)tst-malloc-thread-exit-mcheck: $(shared-thread-library)
++$(objpfx)tst-malloc-thread-fail-mcheck: $(shared-thread-library)
++$(objpfx)tst-malloc-fork-deadlock-mcheck: $(shared-thread-library)
++$(objpfx)tst-malloc-stats-cancellation-mcheck: $(shared-thread-library)
+
+ # Export the __malloc_initialize_hook variable to libc.so.
+ LDFLAGS-tst-mallocstate = -rdynamic
+@@ -239,6 +254,8 @@ $(tests:%=$(objpfx)%.o): CPPFLAGS += -DTEST_NO_MALLOPT
+ $(objpfx)tst-interpose-nothread: $(objpfx)tst-interpose-aux-nothread.o
+ $(objpfx)tst-interpose-thread: \
+ $(objpfx)tst-interpose-aux-thread.o $(shared-thread-library)
++$(objpfx)tst-interpose-thread-mcheck: \
++ $(objpfx)tst-interpose-aux-thread.o $(shared-thread-library)
+ $(objpfx)tst-interpose-static-nothread: $(objpfx)tst-interpose-aux-nothread.o
+ $(objpfx)tst-interpose-static-thread: \
+ $(objpfx)tst-interpose-aux-thread.o $(static-thread-library)
+@@ -256,3 +273,6 @@ $(objpfx)tst-dynarray-fail-mem.out: $(objpfx)tst-dynarray-fail.out
+ $(objpfx)tst-malloc-tcache-leak: $(shared-thread-library)
+ $(objpfx)tst-malloc_info: $(shared-thread-library)
+ $(objpfx)tst-mallocfork2: $(shared-thread-library)
++$(objpfx)tst-malloc-tcache-leak-mcheck: $(shared-thread-library)
++$(objpfx)tst-malloc_info-mcheck: $(shared-thread-library)
++$(objpfx)tst-mallocfork2-mcheck: $(shared-thread-library)
+diff --git a/manual/tunables.texi b/manual/tunables.texi
+index 23ef0d40e7..d72d7a5ec0 100644
+--- a/manual/tunables.texi
++++ b/manual/tunables.texi
+@@ -432,7 +432,11 @@ set shared cache size in bytes for use in memory and string routines.
+
+ @deftp Tunable glibc.cpu.x86_non_temporal_threshold
+ The @code{glibc.cpu.x86_non_temporal_threshold} tunable allows the user
+-to set threshold in bytes for non temporal store.
++to set threshold in bytes for non temporal store. Non temporal stores
++give a hint to the hardware to move data directly to memory without
++displacing other data from the cache. This tunable is used by some
++platforms to determine when to use non temporal stores in operations
++like memmove and memcpy.
+
+ This tunable is specific to i386 and x86-64.
+ @end deftp
+diff --git a/misc/sys/cdefs.h b/misc/sys/cdefs.h
+index 19d9cc5cfe..38221d0b2a 100644
+--- a/misc/sys/cdefs.h
++++ b/misc/sys/cdefs.h
+@@ -124,13 +124,10 @@
+ #define __bos0(ptr) __builtin_object_size (ptr, 0)
+
+ #if __GNUC_PREREQ (4,3)
+-# define __warndecl(name, msg) \
+- extern void name (void) __attribute__((__warning__ (msg)))
+ # define __warnattr(msg) __attribute__((__warning__ (msg)))
+ # define __errordecl(name, msg) \
+ extern void name (void) __attribute__((__error__ (msg)))
+ #else
+-# define __warndecl(name, msg) extern void name (void)
+ # define __warnattr(msg)
+ # define __errordecl(name, msg) extern void name (void)
+ #endif
+diff --git a/nptl/pthread_create.c b/nptl/pthread_create.c
+index 2cba3da38c..c217cda608 100644
+--- a/nptl/pthread_create.c
++++ b/nptl/pthread_create.c
+@@ -416,8 +416,6 @@ START_THREAD_DEFN
+ unwind_buf.priv.data.prev = NULL;
+ unwind_buf.priv.data.cleanup = NULL;
+
+- __libc_signal_restore_set (&pd->sigmask);
+-
+ /* Allow setxid from now onwards. */
+ if (__glibc_unlikely (atomic_exchange_acq (&pd->setxid_futex, 0) == -2))
+ futex_wake (&pd->setxid_futex, 1, FUTEX_PRIVATE);
+@@ -427,6 +425,8 @@ START_THREAD_DEFN
+ /* Store the new cleanup handler info. */
+ THREAD_SETMEM (pd, cleanup_jmp_buf, &unwind_buf);
+
++ __libc_signal_restore_set (&pd->sigmask);
++
+ /* We are either in (a) or (b), and in either case we either own
+ PD already (2) or are about to own PD (1), and so our only
+ restriction would be that we can't free PD until we know we
+diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c
+index 88c69d1e9c..381aa721ef 100644
+--- a/nscd/netgroupcache.c
++++ b/nscd/netgroupcache.c
+@@ -248,7 +248,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
+ : NULL);
+ ndomain = (ndomain ? newbuf + ndomaindiff
+ : NULL);
+- buffer = newbuf;
++ *tofreep = buffer = newbuf;
+ }
+
+ nhost = memcpy (buffer + bufused,
+@@ -319,7 +319,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req,
+ else if (status == NSS_STATUS_TRYAGAIN && e == ERANGE)
+ {
+ buflen *= 2;
+- buffer = xrealloc (buffer, buflen);
++ *tofreep = buffer = xrealloc (buffer, buflen);
+ }
+ else if (status == NSS_STATUS_RETURN
+ || status == NSS_STATUS_NOTFOUND
+diff --git a/nss/tst-nss-files-hosts-long.root/etc/nsswitch.conf b/nss/tst-nss-files-hosts-long.root/etc/nsswitch.conf
+new file mode 100644
+index 0000000000..5b0c6a4199
+--- /dev/null
++++ b/nss/tst-nss-files-hosts-long.root/etc/nsswitch.conf
+@@ -0,0 +1 @@
++hosts: files
+diff --git a/posix/bits/unistd.h b/posix/bits/unistd.h
+index 725a83eb0d..7e5bb6fb1e 100644
+--- a/posix/bits/unistd.h
++++ b/posix/bits/unistd.h
+@@ -193,10 +193,9 @@ __NTH (readlinkat (int __fd, const char *__restrict __path,
+ #endif
+
+ extern char *__getcwd_chk (char *__buf, size_t __size, size_t __buflen)
+- __THROW __wur __attr_access ((__write_only__, 1, 2));
++ __THROW __wur;
+ extern char *__REDIRECT_NTH (__getcwd_alias,
+- (char *__buf, size_t __size), getcwd)
+- __wur __attr_access ((__write_only__, 1, 2));
++ (char *__buf, size_t __size), getcwd) __wur;
+ extern char *__REDIRECT_NTH (__getcwd_chk_warn,
+ (char *__buf, size_t __size, size_t __buflen),
+ __getcwd_chk)
+diff --git a/posix/unistd.h b/posix/unistd.h
+index 32b8161619..acf9ee7e79 100644
+--- a/posix/unistd.h
++++ b/posix/unistd.h
+@@ -517,8 +517,7 @@ extern int fchdir (int __fd) __THROW __wur;
+ an array is allocated with `malloc'; the array is SIZE
+ bytes long, unless SIZE == 0, in which case it is as
+ big as necessary. */
+-extern char *getcwd (char *__buf, size_t __size) __THROW __wur
+- __attr_access ((__write_only__, 1, 2));
++extern char *getcwd (char *__buf, size_t __size) __THROW __wur;
+
+ #ifdef __USE_GNU
+ /* Return a malloc'd string containing the current directory name.
+@@ -831,7 +830,7 @@ extern int symlinkat (const char *__from, int __tofd,
+ /* Like readlink but a relative PATH is interpreted relative to FD. */
+ extern ssize_t readlinkat (int __fd, const char *__restrict __path,
+ char *__restrict __buf, size_t __len)
+- __THROW __nonnull ((2, 3)) __wur __attr_access ((__read_only__, 3, 4));
++ __THROW __nonnull ((2, 3)) __wur __attr_access ((__write_only__, 3, 4));
+ #endif
+
+ /* Remove the link NAME. */
+diff --git a/posix/wordexp-test.c b/posix/wordexp-test.c
+index ed1b22308e..cb3f989cba 100644
+--- a/posix/wordexp-test.c
++++ b/posix/wordexp-test.c
+@@ -183,6 +183,7 @@ struct test_case_struct
+ { 0, NULL, "$var", 0, 0, { NULL, }, IFS },
+ { 0, NULL, "\"\\n\"", 0, 1, { "\\n", }, IFS },
+ { 0, NULL, "", 0, 0, { NULL, }, IFS },
++ { 0, NULL, "${1234567890123456789012}", 0, 0, { NULL, }, IFS },
+
+ /* Flags not already covered (testit() has special handling for these) */
+ { 0, NULL, "one two", WRDE_DOOFFS, 2, { "one", "two", }, IFS },
+diff --git a/posix/wordexp.c b/posix/wordexp.c
+index e082d94895..56289503a1 100644
+--- a/posix/wordexp.c
++++ b/posix/wordexp.c
+@@ -1399,7 +1399,7 @@ envsubst:
+ /* Is it a numeric parameter? */
+ else if (isdigit (env[0]))
+ {
+- int n = atoi (env);
++ unsigned long n = strtoul (env, NULL, 10);
+
+ if (n >= __libc_argc)
+ /* Substitute NULL. */
+diff --git a/resolv/Makefile b/resolv/Makefile
+index b61c0c3e0c..dbd8f8bf4f 100644
+--- a/resolv/Makefile
++++ b/resolv/Makefile
+@@ -61,6 +61,11 @@ tests += \
+ tst-resolv-search \
+ tst-resolv-trailing \
+
++# This test calls __res_context_send directly, which is not exported
++# from libresolv.
++tests-internal += tst-resolv-txnid-collision
++tests-static += tst-resolv-txnid-collision
++
+ # These tests need libdl.
+ ifeq (yes,$(build-shared))
+ tests += \
+@@ -191,6 +196,8 @@ $(objpfx)tst-resolv-search: $(objpfx)libresolv.so $(shared-thread-library)
+ $(objpfx)tst-resolv-trailing: $(objpfx)libresolv.so $(shared-thread-library)
+ $(objpfx)tst-resolv-threads: \
+ $(libdl) $(objpfx)libresolv.so $(shared-thread-library)
++$(objpfx)tst-resolv-txnid-collision: $(objpfx)libresolv.a \
++ $(static-thread-library)
+ $(objpfx)tst-resolv-canonname: \
+ $(libdl) $(objpfx)libresolv.so $(shared-thread-library)
+ $(objpfx)tst-resolv-trustad: $(objpfx)libresolv.so $(shared-thread-library)
+diff --git a/resolv/res_send.c b/resolv/res_send.c
+index 7e5fec6646..70e5066031 100644
+--- a/resolv/res_send.c
++++ b/resolv/res_send.c
+@@ -1342,15 +1342,6 @@ send_dg(res_state statp,
+ *terrno = EMSGSIZE;
+ return close_and_return_error (statp, resplen2);
+ }
+- if ((recvresp1 || hp->id != anhp->id)
+- && (recvresp2 || hp2->id != anhp->id)) {
+- /*
+- * response from old query, ignore it.
+- * XXX - potential security hazard could
+- * be detected here.
+- */
+- goto wait;
+- }
+
+ /* Paranoia check. Due to the connected UDP socket,
+ the kernel has already filtered invalid addresses
+@@ -1360,15 +1351,24 @@ send_dg(res_state statp,
+
+ /* Check for the correct header layout and a matching
+ question. */
+- if ((recvresp1 || !res_queriesmatch(buf, buf + buflen,
+- *thisansp,
+- *thisansp
+- + *thisanssizp))
+- && (recvresp2 || !res_queriesmatch(buf2, buf2 + buflen2,
+- *thisansp,
+- *thisansp
+- + *thisanssizp)))
+- goto wait;
++ int matching_query = 0; /* Default to no matching query. */
++ if (!recvresp1
++ && anhp->id == hp->id
++ && res_queriesmatch (buf, buf + buflen,
++ *thisansp, *thisansp + *thisanssizp))
++ matching_query = 1;
++ if (!recvresp2
++ && anhp->id == hp2->id
++ && res_queriesmatch (buf2, buf2 + buflen2,
++ *thisansp, *thisansp + *thisanssizp))
++ matching_query = 2;
++ if (matching_query == 0)
++ /* Spurious UDP packet. Drop it and continue
++ waiting. */
++ {
++ need_recompute = 1;
++ goto wait;
++ }
+
+ if (anhp->rcode == SERVFAIL ||
+ anhp->rcode == NOTIMP ||
+@@ -1383,7 +1383,7 @@ send_dg(res_state statp,
+ /* No data from the first reply. */
+ resplen = 0;
+ /* We are waiting for a possible second reply. */
+- if (hp->id == anhp->id)
++ if (matching_query == 1)
+ recvresp1 = 1;
+ else
+ recvresp2 = 1;
+@@ -1414,7 +1414,7 @@ send_dg(res_state statp,
+ return (1);
+ }
+ /* Mark which reply we received. */
+- if (recvresp1 == 0 && hp->id == anhp->id)
++ if (matching_query == 1)
+ recvresp1 = 1;
+ else
+ recvresp2 = 1;
+diff --git a/resolv/tst-resolv-txnid-collision.c b/resolv/tst-resolv-txnid-collision.c
+new file mode 100644
+index 0000000000..189b76f126
+--- /dev/null
++++ b/resolv/tst-resolv-txnid-collision.c
+@@ -0,0 +1,334 @@
++/* Test parallel queries with transaction ID collisions.
++ Copyright (C) 2020 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <arpa/nameser.h>
++#include <array_length.h>
++#include <resolv-internal.h>
++#include <resolv_context.h>
++#include <stdbool.h>
++#include <stdio.h>
++#include <string.h>
++#include <support/check.h>
++#include <support/check_nss.h>
++#include <support/resolv_test.h>
++#include <support/support.h>
++#include <support/test-driver.h>
++
++/* Result of parsing a DNS question name.
++
++ A question name has the form reorder-N-M-rcode-C.example.net, where
++ N and M are either 0 and 1, corresponding to the reorder member,
++ and C is a number that will be stored in the rcode field.
++
++ Also see parse_qname below. */
++struct parsed_qname
++{
++ /* The DNS response code requested from the first server. The
++ second server always responds with RCODE zero. */
++ int rcode;
++
++ /* Indicates whether to perform reordering in the responses from the
++ respective server. */
++ bool reorder[2];
++};
++
++/* Fills *PARSED based on QNAME. */
++static void
++parse_qname (struct parsed_qname *parsed, const char *qname)
++{
++ int reorder0;
++ int reorder1;
++ int rcode;
++ char *suffix;
++ if (sscanf (qname, "reorder-%d-%d.rcode-%d.%ms",
++ &reorder0, &reorder1, &rcode, &suffix) == 4)
++ {
++ if (reorder0 != 0)
++ TEST_COMPARE (reorder0, 1);
++ if (reorder1 != 0)
++ TEST_COMPARE (reorder1, 1);
++ TEST_VERIFY (rcode >= 0 && rcode <= 15);
++ TEST_COMPARE_STRING (suffix, "example.net");
++ free (suffix);
++
++ parsed->rcode = rcode;
++ parsed->reorder[0] = reorder0;
++ parsed->reorder[1] = reorder1;
++ }
++ else
++ FAIL_EXIT1 ("unexpected query: %s", qname);
++}
++
++/* Used to construct a response. The first server responds with an
++ error, the second server succeeds. */
++static void
++build_response (const struct resolv_response_context *ctx,
++ struct resolv_response_builder *b,
++ const char *qname, uint16_t qclass, uint16_t qtype)
++{
++ struct parsed_qname parsed;
++ parse_qname (&parsed, qname);
++
++ switch (ctx->server_index)
++ {
++ case 0:
++ {
++ struct resolv_response_flags flags = { 0 };
++ if (parsed.rcode == 0)
++ /* Simulate a delegation in case a NODATA (RCODE zero)
++ response is requested. */
++ flags.clear_ra = true;
++ else
++ flags.rcode = parsed.rcode;
++
++ resolv_response_init (b, flags);
++ resolv_response_add_question (b, qname, qclass, qtype);
++ }
++ break;
++
++ case 1:
++ {
++ struct resolv_response_flags flags = { 0, };
++ resolv_response_init (b, flags);
++ resolv_response_add_question (b, qname, qclass, qtype);
++
++ resolv_response_section (b, ns_s_an);
++ resolv_response_open_record (b, qname, qclass, qtype, 0);
++ if (qtype == T_A)
++ {
++ char ipv4[4] = { 192, 0, 2, 1 };
++ resolv_response_add_data (b, &ipv4, sizeof (ipv4));
++ }
++ else
++ {
++ char ipv6[16]
++ = { 0x20, 0x01, 0xd, 0xb8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
++ resolv_response_add_data (b, &ipv6, sizeof (ipv6));
++ }
++ resolv_response_close_record (b);
++ }
++ break;
++ }
++}
++
++/* Used to reorder responses. */
++struct resolv_response_context *previous_query;
++
++/* Used to keep track of the queries received. */
++static int previous_server_index = -1;
++static uint16_t previous_qtype;
++
++/* For each server, buffer the first query and then send both answers
++ to the second query, reordered if requested. */
++static void
++response (const struct resolv_response_context *ctx,
++ struct resolv_response_builder *b,
++ const char *qname, uint16_t qclass, uint16_t qtype)
++{
++ TEST_VERIFY (qtype == T_A || qtype == T_AAAA);
++ if (ctx->server_index != 0)
++ TEST_COMPARE (ctx->server_index, 1);
++
++ struct parsed_qname parsed;
++ parse_qname (&parsed, qname);
++
++ if (previous_query == NULL)
++ {
++ /* No buffered query. Record this query and do not send a
++ response. */
++ TEST_COMPARE (previous_qtype, 0);
++ previous_query = resolv_response_context_duplicate (ctx);
++ previous_qtype = qtype;
++ resolv_response_drop (b);
++ previous_server_index = ctx->server_index;
++
++ if (test_verbose)
++ printf ("info: buffering first query for: %s\n", qname);
++ }
++ else
++ {
++ TEST_VERIFY (previous_query != 0);
++ TEST_COMPARE (ctx->server_index, previous_server_index);
++ TEST_VERIFY (previous_qtype != qtype); /* Not a duplicate. */
++
++ /* If reordering, send a response for this query explicitly, and
++ then skip the implicit send. */
++ if (parsed.reorder[ctx->server_index])
++ {
++ if (test_verbose)
++ printf ("info: sending reordered second response for: %s\n",
++ qname);
++ build_response (ctx, b, qname, qclass, qtype);
++ resolv_response_send_udp (ctx, b);
++ resolv_response_drop (b);
++ }
++
++ /* Build a response for the previous query and send it, thus
++ reordering the two responses. */
++ {
++ if (test_verbose)
++ printf ("info: sending first response for: %s\n", qname);
++ struct resolv_response_builder *btmp
++ = resolv_response_builder_allocate (previous_query->query_buffer,
++ previous_query->query_length);
++ build_response (ctx, btmp, qname, qclass, previous_qtype);
++ resolv_response_send_udp (ctx, btmp);
++ resolv_response_builder_free (btmp);
++ }
++
++ /* If not reordering, send the reply as usual. */
++ if (!parsed.reorder[ctx->server_index])
++ {
++ if (test_verbose)
++ printf ("info: sending non-reordered second response for: %s\n",
++ qname);
++ build_response (ctx, b, qname, qclass, qtype);
++ }
++
++ /* Unbuffer the response and prepare for the next query. */
++ resolv_response_context_free (previous_query);
++ previous_query = NULL;
++ previous_qtype = 0;
++ previous_server_index = -1;
++ }
++}
++
++/* Runs a query for QNAME and checks for the expected reply. See
++ struct parsed_qname for the expected format for QNAME. */
++static void
++test_qname (const char *qname, int rcode)
++{
++ struct resolv_context *ctx = __resolv_context_get ();
++ TEST_VERIFY_EXIT (ctx != NULL);
++
++ unsigned char q1[512];
++ int q1len = res_mkquery (QUERY, qname, C_IN, T_A, NULL, 0, NULL,
++ q1, sizeof (q1));
++ TEST_VERIFY_EXIT (q1len > 12);
++
++ unsigned char q2[512];
++ int q2len = res_mkquery (QUERY, qname, C_IN, T_AAAA, NULL, 0, NULL,
++ q2, sizeof (q2));
++ TEST_VERIFY_EXIT (q2len > 12);
++
++ /* Produce a transaction ID collision. */
++ memcpy (q2, q1, 2);
++
++ unsigned char ans1[512];
++ unsigned char *ans1p = ans1;
++ unsigned char *ans2p = NULL;
++ int nans2p = 0;
++ int resplen2 = 0;
++ int ans2p_malloced = 0;
++
++ /* Perform a parallel A/AAAA query. */
++ int resplen1 = __res_context_send (ctx, q1, q1len, q2, q2len,
++ ans1, sizeof (ans1), &ans1p,
++ &ans2p, &nans2p,
++ &resplen2, &ans2p_malloced);
++
++ TEST_VERIFY (resplen1 > 12);
++ TEST_VERIFY (resplen2 > 12);
++ if (resplen1 <= 12 || resplen2 <= 12)
++ return;
++
++ if (rcode == 1 || rcode == 3)
++ {
++ /* Format Error and Name Error responses does not trigger
++ switching to the next server. */
++ TEST_COMPARE (ans1p[3] & 0x0f, rcode);
++ TEST_COMPARE (ans2p[3] & 0x0f, rcode);
++ return;
++ }
++
++ /* The response should be successful. */
++ TEST_COMPARE (ans1p[3] & 0x0f, 0);
++ TEST_COMPARE (ans2p[3] & 0x0f, 0);
++
++ /* Due to bug 19691, the answer may not be in the slot matching the
++ query. Assume that the AAAA response is the longer one. */
++ unsigned char *a_answer;
++ int a_answer_length;
++ unsigned char *aaaa_answer;
++ int aaaa_answer_length;
++ if (resplen2 > resplen1)
++ {
++ a_answer = ans1p;
++ a_answer_length = resplen1;
++ aaaa_answer = ans2p;
++ aaaa_answer_length = resplen2;
++ }
++ else
++ {
++ a_answer = ans2p;
++ a_answer_length = resplen2;
++ aaaa_answer = ans1p;
++ aaaa_answer_length = resplen1;
++ }
++
++ {
++ char *expected = xasprintf ("name: %s\n"
++ "address: 192.0.2.1\n",
++ qname);
++ check_dns_packet (qname, a_answer, a_answer_length, expected);
++ free (expected);
++ }
++ {
++ char *expected = xasprintf ("name: %s\n"
++ "address: 2001:db8::1\n",
++ qname);
++ check_dns_packet (qname, aaaa_answer, aaaa_answer_length, expected);
++ free (expected);
++ }
++
++ if (ans2p_malloced)
++ free (ans2p);
++
++ __resolv_context_put (ctx);
++}
++
++static int
++do_test (void)
++{
++ struct resolv_test *aux = resolv_test_start
++ ((struct resolv_redirect_config)
++ {
++ .response_callback = response,
++
++ /* The response callback use global state (the previous_*
++ variables), and query processing must therefore be
++ serialized. */
++ .single_thread_udp = true,
++ });
++
++ for (int rcode = 0; rcode <= 5; ++rcode)
++ for (int do_reorder_0 = 0; do_reorder_0 < 2; ++do_reorder_0)
++ for (int do_reorder_1 = 0; do_reorder_1 < 2; ++do_reorder_1)
++ {
++ char *qname = xasprintf ("reorder-%d-%d.rcode-%d.example.net",
++ do_reorder_0, do_reorder_1, rcode);
++ test_qname (qname, rcode);
++ free (qname);
++ }
++
++ resolv_test_end (aux);
++
++ return 0;
++}
++
++#include <support/test-driver.c>
+diff --git a/rt/Makefile b/rt/Makefile
+index dab5d62a57..93502cfaa7 100644
+--- a/rt/Makefile
++++ b/rt/Makefile
+@@ -44,6 +44,7 @@ tests := tst-shm tst-timer tst-timer2 \
+ tst-aio7 tst-aio8 tst-aio9 tst-aio10 \
+ tst-mqueue1 tst-mqueue2 tst-mqueue3 tst-mqueue4 \
+ tst-mqueue5 tst-mqueue6 tst-mqueue7 tst-mqueue8 tst-mqueue9 \
++ tst-bz28213 \
+ tst-timer3 tst-timer4 tst-timer5 \
+ tst-cpuclock2 tst-cputimer1 tst-cputimer2 tst-cputimer3 \
+ tst-shm-cancel
+diff --git a/rt/tst-bz28213.c b/rt/tst-bz28213.c
+new file mode 100644
+index 0000000000..0c096b5a0a
+--- /dev/null
++++ b/rt/tst-bz28213.c
+@@ -0,0 +1,101 @@
++/* Bug 28213: test for NULL pointer dereference in mq_notify.
++ Copyright (C) The GNU Toolchain Authors.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <errno.h>
++#include <sys/types.h>
++#include <sys/stat.h>
++#include <fcntl.h>
++#include <unistd.h>
++#include <mqueue.h>
++#include <signal.h>
++#include <stdlib.h>
++#include <string.h>
++#include <support/check.h>
++
++static mqd_t m = -1;
++static const char msg[] = "hello";
++
++static void
++check_bz28213_cb (union sigval sv)
++{
++ char buf[sizeof (msg)];
++
++ (void) sv;
++
++ TEST_VERIFY_EXIT ((size_t) mq_receive (m, buf, sizeof (buf), NULL)
++ == sizeof (buf));
++ TEST_VERIFY_EXIT (memcmp (buf, msg, sizeof (buf)) == 0);
++
++ exit (0);
++}
++
++static void
++check_bz28213 (void)
++{
++ struct sigevent sev;
++
++ memset (&sev, '\0', sizeof (sev));
++ sev.sigev_notify = SIGEV_THREAD;
++ sev.sigev_notify_function = check_bz28213_cb;
++
++ /* Step 1: Register & unregister notifier.
++ Helper thread should receive NOTIFY_REMOVED notification.
++ In a vulnerable version of glibc, NULL pointer dereference follows. */
++ TEST_VERIFY_EXIT (mq_notify (m, &sev) == 0);
++ TEST_VERIFY_EXIT (mq_notify (m, NULL) == 0);
++
++ /* Step 2: Once again, register notification.
++ Try to send one message.
++ Test is considered successful, if the callback does exit (0). */
++ TEST_VERIFY_EXIT (mq_notify (m, &sev) == 0);
++ TEST_VERIFY_EXIT (mq_send (m, msg, sizeof (msg), 1) == 0);
++
++ /* Wait... */
++ pause ();
++}
++
++static int
++do_test (void)
++{
++ static const char m_name[] = "/bz28213_queue";
++ struct mq_attr m_attr;
++
++ memset (&m_attr, '\0', sizeof (m_attr));
++ m_attr.mq_maxmsg = 1;
++ m_attr.mq_msgsize = sizeof (msg);
++
++ m = mq_open (m_name,
++ O_RDWR | O_CREAT | O_EXCL,
++ 0600,
++ &m_attr);
++
++ if (m < 0)
++ {
++ if (errno == ENOSYS)
++ FAIL_UNSUPPORTED ("POSIX message queues are not implemented\n");
++ FAIL_EXIT1 ("Failed to create POSIX message queue: %m\n");
++ }
++
++ TEST_VERIFY_EXIT (mq_unlink (m_name) == 0);
++
++ check_bz28213 ();
++
++ return 0;
++}
++
++#include <support/test-driver.c>
+diff --git a/stdio-common/Makefile b/stdio-common/Makefile
+index 8475fd1f09..eff0c98d82 100644
+--- a/stdio-common/Makefile
++++ b/stdio-common/Makefile
+@@ -69,7 +69,8 @@ tests := tstscanf test_rdwr test-popen tstgetln test-fseek \
+ tst-printf-bz25691 \
+ tst-vfprintf-width-prec-alloc \
+ tst-printf-fp-free \
+- tst-printf-fp-leak
++ tst-printf-fp-leak \
++ test-strerr
+
+
+ test-srcs = tst-unbputc tst-printf tst-printfsz-islongdouble
+diff --git a/stdio-common/errlist.c b/stdio-common/errlist.c
+index d15f13a22a..2ecf121674 100644
+--- a/stdio-common/errlist.c
++++ b/stdio-common/errlist.c
+@@ -20,9 +20,13 @@
+ #include <libintl.h>
+ #include <array_length.h>
+
++#ifndef ERR_MAP
++# define ERR_MAP(n) n
++#endif
++
+ const char *const _sys_errlist_internal[] =
+ {
+-#define _S(n, str) [n] = str,
++#define _S(n, str) [ERR_MAP(n)] = str,
+ #include <errlist.h>
+ #undef _S
+ };
+@@ -41,20 +45,21 @@ static const union sys_errname_t
+ {
+ #define MSGSTRFIELD1(line) str##line
+ #define MSGSTRFIELD(line) MSGSTRFIELD1(line)
+-#define _S(n, str) char MSGSTRFIELD(__LINE__)[sizeof(str)];
++#define _S(n, str) char MSGSTRFIELD(__LINE__)[sizeof(#n)];
+ #include <errlist.h>
+ #undef _S
+ };
+ char str[0];
+ } _sys_errname = { {
+-#define _S(n, s) s,
++#define _S(n, s) #n,
+ #include <errlist.h>
+ #undef _S
+ } };
+
+ static const unsigned short _sys_errnameidx[] =
+ {
+-#define _S(n, s) [n] = offsetof(union sys_errname_t, MSGSTRFIELD(__LINE__)),
++#define _S(n, s) \
++ [ERR_MAP(n)] = offsetof(union sys_errname_t, MSGSTRFIELD(__LINE__)),
+ #include <errlist.h>
+ #undef _S
+ };
+diff --git a/stdio-common/test-strerr.c b/stdio-common/test-strerr.c
+index fded208118..d77b81d507 100644
+--- a/stdio-common/test-strerr.c
++++ b/stdio-common/test-strerr.c
+@@ -18,46 +18,672 @@
+
+ #include <string.h>
+ #include <errno.h>
+-#include <array_length.h>
+
+ #include <support/support.h>
+ #include <support/check.h>
+
+-#define N_(name) name
+-
+-static const char *const errlist[] =
+- {
+-/* This file is auto-generated from errlist.def. */
+-#include <errlist.h>
+- };
+-
+-#define MSGSTR_T errname_t
+-#define MSGSTR errname
+-#define MSGIDX errnameidx
+-#include <errlist-name.h>
+-#undef MSGSTR
+-#undef MSGIDX
+-
+ static int
+ do_test (void)
+ {
+- TEST_VERIFY (strerrordesc_np (-1) == NULL);
+- TEST_VERIFY (strerrordesc_np (array_length (errlist)) == NULL);
+- for (size_t i = 0; i < array_length (errlist); i++)
+- {
+- if (errlist[i] == NULL)
+- continue;
+- TEST_COMPARE_STRING (strerrordesc_np (i), errlist[i]);
+- }
++ TEST_COMPARE_STRING (strerrordesc_np (0), "Success");
++ TEST_COMPARE_STRING (strerrorname_np (0), "0");
+
+- TEST_VERIFY (strerrorname_np (-1) == NULL);
+- TEST_VERIFY (strerrorname_np (array_length (errlist)) == NULL);
+- for (size_t i = 0; i < array_length (errlist); i++)
+- {
+- if (errlist[i] == NULL)
+- continue;
+- TEST_COMPARE_STRING (strerrorname_np (i), errname.str + errnameidx[i]);
+- }
++#ifdef EPERM
++ TEST_COMPARE_STRING (strerrordesc_np (EPERM), "Operation not permitted");
++ TEST_COMPARE_STRING (strerrorname_np (EPERM), "EPERM");
++#endif
++#ifdef ENOENT
++ TEST_COMPARE_STRING (strerrordesc_np (ENOENT),
++ "No such file or directory");
++ TEST_COMPARE_STRING (strerrorname_np (ENOENT), "ENOENT");
++#endif
++#ifdef ESRCH
++ TEST_COMPARE_STRING (strerrordesc_np (ESRCH), "No such process");
++ TEST_COMPARE_STRING (strerrorname_np (ESRCH), "ESRCH");
++#endif
++#ifdef EINTR
++ TEST_COMPARE_STRING (strerrordesc_np (EINTR), "Interrupted system call");
++ TEST_COMPARE_STRING (strerrorname_np (EINTR), "EINTR");
++#endif
++#ifdef EIO
++ TEST_COMPARE_STRING (strerrordesc_np (EIO), "Input/output error");
++ TEST_COMPARE_STRING (strerrorname_np (EIO), "EIO");
++#endif
++#ifdef ENXIO
++ TEST_COMPARE_STRING (strerrordesc_np (ENXIO), "No such device or address");
++ TEST_COMPARE_STRING (strerrorname_np (ENXIO), "ENXIO");
++#endif
++#ifdef E2BIG
++ TEST_COMPARE_STRING (strerrordesc_np (E2BIG), "Argument list too long");
++ TEST_COMPARE_STRING (strerrorname_np (E2BIG), "E2BIG");
++#endif
++#ifdef ENOEXEC
++ TEST_COMPARE_STRING (strerrordesc_np (ENOEXEC), "Exec format error");
++ TEST_COMPARE_STRING (strerrorname_np (ENOEXEC), "ENOEXEC");
++#endif
++#ifdef EBADF
++ TEST_COMPARE_STRING (strerrordesc_np (EBADF), "Bad file descriptor");
++ TEST_COMPARE_STRING (strerrorname_np (EBADF), "EBADF");
++#endif
++#ifdef ECHILD
++ TEST_COMPARE_STRING (strerrordesc_np (ECHILD), "No child processes");
++ TEST_COMPARE_STRING (strerrorname_np (ECHILD), "ECHILD");
++#endif
++#ifdef EDEADLK
++ TEST_COMPARE_STRING (strerrordesc_np (EDEADLK),
++ "Resource deadlock avoided");
++ TEST_COMPARE_STRING (strerrorname_np (EDEADLK), "EDEADLK");
++#endif
++#ifdef ENOMEM
++ TEST_COMPARE_STRING (strerrordesc_np (ENOMEM), "Cannot allocate memory");
++ TEST_COMPARE_STRING (strerrorname_np (ENOMEM), "ENOMEM");
++#endif
++#ifdef EACCES
++ TEST_COMPARE_STRING (strerrordesc_np (EACCES), "Permission denied");
++ TEST_COMPARE_STRING (strerrorname_np (EACCES), "EACCES");
++#endif
++#ifdef EFAULT
++ TEST_COMPARE_STRING (strerrordesc_np (EFAULT), "Bad address");
++ TEST_COMPARE_STRING (strerrorname_np (EFAULT), "EFAULT");
++#endif
++#ifdef ENOTBLK
++ TEST_COMPARE_STRING (strerrordesc_np (ENOTBLK), "Block device required");
++ TEST_COMPARE_STRING (strerrorname_np (ENOTBLK), "ENOTBLK");
++#endif
++#ifdef EBUSY
++ TEST_COMPARE_STRING (strerrordesc_np (EBUSY), "Device or resource busy");
++ TEST_COMPARE_STRING (strerrorname_np (EBUSY), "EBUSY");
++#endif
++#ifdef EEXIST
++ TEST_COMPARE_STRING (strerrordesc_np (EEXIST), "File exists");
++ TEST_COMPARE_STRING (strerrorname_np (EEXIST), "EEXIST");
++#endif
++#ifdef EXDEV
++ TEST_COMPARE_STRING (strerrordesc_np (EXDEV), "Invalid cross-device link");
++ TEST_COMPARE_STRING (strerrorname_np (EXDEV), "EXDEV");
++#endif
++#ifdef ENODEV
++ TEST_COMPARE_STRING (strerrordesc_np (ENODEV), "No such device");
++ TEST_COMPARE_STRING (strerrorname_np (ENODEV), "ENODEV");
++#endif
++#ifdef ENOTDIR
++ TEST_COMPARE_STRING (strerrordesc_np (ENOTDIR), "Not a directory");
++ TEST_COMPARE_STRING (strerrorname_np (ENOTDIR), "ENOTDIR");
++#endif
++#ifdef EISDIR
++ TEST_COMPARE_STRING (strerrordesc_np (EISDIR), "Is a directory");
++ TEST_COMPARE_STRING (strerrorname_np (EISDIR), "EISDIR");
++#endif
++#ifdef EINVAL
++ TEST_COMPARE_STRING (strerrordesc_np (EINVAL), "Invalid argument");
++ TEST_COMPARE_STRING (strerrorname_np (EINVAL), "EINVAL");
++#endif
++#ifdef EMFILE
++ TEST_COMPARE_STRING (strerrordesc_np (EMFILE), "Too many open files");
++ TEST_COMPARE_STRING (strerrorname_np (EMFILE), "EMFILE");
++#endif
++#ifdef ENFILE
++ TEST_COMPARE_STRING (strerrordesc_np (ENFILE),
++ "Too many open files in system");
++ TEST_COMPARE_STRING (strerrorname_np (ENFILE), "ENFILE");
++#endif
++#ifdef ENOTTY
++ TEST_COMPARE_STRING (strerrordesc_np (ENOTTY),
++ "Inappropriate ioctl for device");
++ TEST_COMPARE_STRING (strerrorname_np (ENOTTY), "ENOTTY");
++#endif
++#ifdef ETXTBSY
++ TEST_COMPARE_STRING (strerrordesc_np (ETXTBSY), "Text file busy");
++ TEST_COMPARE_STRING (strerrorname_np (ETXTBSY), "ETXTBSY");
++#endif
++#ifdef EFBIG
++ TEST_COMPARE_STRING (strerrordesc_np (EFBIG), "File too large");
++ TEST_COMPARE_STRING (strerrorname_np (EFBIG), "EFBIG");
++#endif
++#ifdef ENOSPC
++ TEST_COMPARE_STRING (strerrordesc_np (ENOSPC), "No space left on device");
++ TEST_COMPARE_STRING (strerrorname_np (ENOSPC), "ENOSPC");
++#endif
++#ifdef ESPIPE
++ TEST_COMPARE_STRING (strerrordesc_np (ESPIPE), "Illegal seek");
++ TEST_COMPARE_STRING (strerrorname_np (ESPIPE), "ESPIPE");
++#endif
++#ifdef EROFS
++ TEST_COMPARE_STRING (strerrordesc_np (EROFS), "Read-only file system");
++ TEST_COMPARE_STRING (strerrorname_np (EROFS), "EROFS");
++#endif
++#ifdef EMLINK
++ TEST_COMPARE_STRING (strerrordesc_np (EMLINK), "Too many links");
++ TEST_COMPARE_STRING (strerrorname_np (EMLINK), "EMLINK");
++#endif
++#ifdef EPIPE
++ TEST_COMPARE_STRING (strerrordesc_np (EPIPE), "Broken pipe");
++ TEST_COMPARE_STRING (strerrorname_np (EPIPE), "EPIPE");
++#endif
++#ifdef EDOM
++ TEST_COMPARE_STRING (strerrordesc_np (EDOM),
++ "Numerical argument out of domain");
++ TEST_COMPARE_STRING (strerrorname_np (EDOM), "EDOM");
++#endif
++#ifdef ERANGE
++ TEST_COMPARE_STRING (strerrordesc_np (ERANGE),
++ "Numerical result out of range");
++ TEST_COMPARE_STRING (strerrorname_np (ERANGE), "ERANGE");
++#endif
++#ifdef EAGAIN
++ TEST_COMPARE_STRING (strerrordesc_np (EAGAIN),
++ "Resource temporarily unavailable");
++ TEST_COMPARE_STRING (strerrorname_np (EAGAIN), "EAGAIN");
++#endif
++#ifdef EINPROGRESS
++ TEST_COMPARE_STRING (strerrordesc_np (EINPROGRESS),
++ "Operation now in progress");
++ TEST_COMPARE_STRING (strerrorname_np (EINPROGRESS), "EINPROGRESS");
++#endif
++#ifdef EALREADY
++ TEST_COMPARE_STRING (strerrordesc_np (EALREADY),
++ "Operation already in progress");
++ TEST_COMPARE_STRING (strerrorname_np (EALREADY), "EALREADY");
++#endif
++#ifdef ENOTSOCK
++ TEST_COMPARE_STRING (strerrordesc_np (ENOTSOCK),
++ "Socket operation on non-socket");
++ TEST_COMPARE_STRING (strerrorname_np (ENOTSOCK), "ENOTSOCK");
++#endif
++#ifdef EMSGSIZE
++ TEST_COMPARE_STRING (strerrordesc_np (EMSGSIZE), "Message too long");
++ TEST_COMPARE_STRING (strerrorname_np (EMSGSIZE), "EMSGSIZE");
++#endif
++#ifdef EPROTOTYPE
++ TEST_COMPARE_STRING (strerrordesc_np (EPROTOTYPE),
++ "Protocol wrong type for socket");
++ TEST_COMPARE_STRING (strerrorname_np (EPROTOTYPE), "EPROTOTYPE");
++#endif
++#ifdef ENOPROTOOPT
++ TEST_COMPARE_STRING (strerrordesc_np (ENOPROTOOPT),
++ "Protocol not available");
++ TEST_COMPARE_STRING (strerrorname_np (ENOPROTOOPT), "ENOPROTOOPT");
++#endif
++#ifdef EPROTONOSUPPORT
++ TEST_COMPARE_STRING (strerrordesc_np (EPROTONOSUPPORT),
++ "Protocol not supported");
++ TEST_COMPARE_STRING (strerrorname_np (EPROTONOSUPPORT), "EPROTONOSUPPORT");
++#endif
++#ifdef ESOCKTNOSUPPORT
++ TEST_COMPARE_STRING (strerrordesc_np (ESOCKTNOSUPPORT),
++ "Socket type not supported");
++ TEST_COMPARE_STRING (strerrorname_np (ESOCKTNOSUPPORT), "ESOCKTNOSUPPORT");
++#endif
++#ifdef EOPNOTSUPP
++ TEST_COMPARE_STRING (strerrordesc_np (EOPNOTSUPP),
++ "Operation not supported");
++ TEST_COMPARE_STRING (strerrorname_np (EOPNOTSUPP), "EOPNOTSUPP");
++#endif
++#ifdef EPFNOSUPPORT
++ TEST_COMPARE_STRING (strerrordesc_np (EPFNOSUPPORT),
++ "Protocol family not supported");
++ TEST_COMPARE_STRING (strerrorname_np (EPFNOSUPPORT), "EPFNOSUPPORT");
++#endif
++#ifdef EAFNOSUPPORT
++ TEST_COMPARE_STRING (strerrordesc_np (EAFNOSUPPORT),
++ "Address family not supported by protocol");
++ TEST_COMPARE_STRING (strerrorname_np (EAFNOSUPPORT), "EAFNOSUPPORT");
++#endif
++#ifdef EADDRINUSE
++ TEST_COMPARE_STRING (strerrordesc_np (EADDRINUSE),
++ "Address already in use");
++ TEST_COMPARE_STRING (strerrorname_np (EADDRINUSE), "EADDRINUSE");
++#endif
++#ifdef EADDRNOTAVAIL
++ TEST_COMPARE_STRING (strerrordesc_np (EADDRNOTAVAIL),
++ "Cannot assign requested address");
++ TEST_COMPARE_STRING (strerrorname_np (EADDRNOTAVAIL), "EADDRNOTAVAIL");
++#endif
++#ifdef ENETDOWN
++ TEST_COMPARE_STRING (strerrordesc_np (ENETDOWN), "Network is down");
++ TEST_COMPARE_STRING (strerrorname_np (ENETDOWN), "ENETDOWN");
++#endif
++#ifdef ENETUNREACH
++ TEST_COMPARE_STRING (strerrordesc_np (ENETUNREACH),
++ "Network is unreachable");
++ TEST_COMPARE_STRING (strerrorname_np (ENETUNREACH), "ENETUNREACH");
++#endif
++#ifdef ENETRESET
++ TEST_COMPARE_STRING (strerrordesc_np (ENETRESET),
++ "Network dropped connection on reset");
++ TEST_COMPARE_STRING (strerrorname_np (ENETRESET), "ENETRESET");
++#endif
++#ifdef ECONNABORTED
++ TEST_COMPARE_STRING (strerrordesc_np (ECONNABORTED),
++ "Software caused connection abort");
++ TEST_COMPARE_STRING (strerrorname_np (ECONNABORTED), "ECONNABORTED");
++#endif
++#ifdef ECONNRESET
++ TEST_COMPARE_STRING (strerrordesc_np (ECONNRESET),
++ "Connection reset by peer");
++ TEST_COMPARE_STRING (strerrorname_np (ECONNRESET), "ECONNRESET");
++#endif
++#ifdef ENOBUFS
++ TEST_COMPARE_STRING (strerrordesc_np (ENOBUFS),
++ "No buffer space available");
++ TEST_COMPARE_STRING (strerrorname_np (ENOBUFS), "ENOBUFS");
++#endif
++#ifdef EISCONN
++ TEST_COMPARE_STRING (strerrordesc_np (EISCONN),
++ "Transport endpoint is already connected");
++ TEST_COMPARE_STRING (strerrorname_np (EISCONN), "EISCONN");
++#endif
++#ifdef ENOTCONN
++ TEST_COMPARE_STRING (strerrordesc_np (ENOTCONN),
++ "Transport endpoint is not connected");
++ TEST_COMPARE_STRING (strerrorname_np (ENOTCONN), "ENOTCONN");
++#endif
++#ifdef EDESTADDRREQ
++ TEST_COMPARE_STRING (strerrordesc_np (EDESTADDRREQ),
++ "Destination address required");
++ TEST_COMPARE_STRING (strerrorname_np (EDESTADDRREQ), "EDESTADDRREQ");
++#endif
++#ifdef ESHUTDOWN
++ TEST_COMPARE_STRING (strerrordesc_np (ESHUTDOWN),
++ "Cannot send after transport endpoint shutdown");
++ TEST_COMPARE_STRING (strerrorname_np (ESHUTDOWN), "ESHUTDOWN");
++#endif
++#ifdef ETOOMANYREFS
++ TEST_COMPARE_STRING (strerrordesc_np (ETOOMANYREFS),
++ "Too many references: cannot splice");
++ TEST_COMPARE_STRING (strerrorname_np (ETOOMANYREFS), "ETOOMANYREFS");
++#endif
++#ifdef ETIMEDOUT
++ TEST_COMPARE_STRING (strerrordesc_np (ETIMEDOUT), "Connection timed out");
++ TEST_COMPARE_STRING (strerrorname_np (ETIMEDOUT), "ETIMEDOUT");
++#endif
++#ifdef ECONNREFUSED
++ TEST_COMPARE_STRING (strerrordesc_np (ECONNREFUSED), "Connection refused");
++ TEST_COMPARE_STRING (strerrorname_np (ECONNREFUSED), "ECONNREFUSED");
++#endif
++#ifdef ELOOP
++ TEST_COMPARE_STRING (strerrordesc_np (ELOOP),
++ "Too many levels of symbolic links");
++ TEST_COMPARE_STRING (strerrorname_np (ELOOP), "ELOOP");
++#endif
++#ifdef ENAMETOOLONG
++ TEST_COMPARE_STRING (strerrordesc_np (ENAMETOOLONG), "File name too long");
++ TEST_COMPARE_STRING (strerrorname_np (ENAMETOOLONG), "ENAMETOOLONG");
++#endif
++#ifdef EHOSTDOWN
++ TEST_COMPARE_STRING (strerrordesc_np (EHOSTDOWN), "Host is down");
++ TEST_COMPARE_STRING (strerrorname_np (EHOSTDOWN), "EHOSTDOWN");
++#endif
++#ifdef EHOSTUNREACH
++ TEST_COMPARE_STRING (strerrordesc_np (EHOSTUNREACH), "No route to host");
++ TEST_COMPARE_STRING (strerrorname_np (EHOSTUNREACH), "EHOSTUNREACH");
++#endif
++#ifdef ENOTEMPTY
++ TEST_COMPARE_STRING (strerrordesc_np (ENOTEMPTY), "Directory not empty");
++ TEST_COMPARE_STRING (strerrorname_np (ENOTEMPTY), "ENOTEMPTY");
++#endif
++#ifdef EUSERS
++ TEST_COMPARE_STRING (strerrordesc_np (EUSERS), "Too many users");
++ TEST_COMPARE_STRING (strerrorname_np (EUSERS), "EUSERS");
++#endif
++#ifdef EDQUOT
++ TEST_COMPARE_STRING (strerrordesc_np (EDQUOT), "Disk quota exceeded");
++ TEST_COMPARE_STRING (strerrorname_np (EDQUOT), "EDQUOT");
++#endif
++#ifdef ESTALE
++ TEST_COMPARE_STRING (strerrordesc_np (ESTALE), "Stale file handle");
++ TEST_COMPARE_STRING (strerrorname_np (ESTALE), "ESTALE");
++#endif
++#ifdef EREMOTE
++ TEST_COMPARE_STRING (strerrordesc_np (EREMOTE), "Object is remote");
++ TEST_COMPARE_STRING (strerrorname_np (EREMOTE), "EREMOTE");
++#endif
++#ifdef ENOLCK
++ TEST_COMPARE_STRING (strerrordesc_np (ENOLCK), "No locks available");
++ TEST_COMPARE_STRING (strerrorname_np (ENOLCK), "ENOLCK");
++#endif
++#ifdef ENOSYS
++ TEST_COMPARE_STRING (strerrordesc_np (ENOSYS), "Function not implemented");
++ TEST_COMPARE_STRING (strerrorname_np (ENOSYS), "ENOSYS");
++#endif
++#ifdef EILSEQ
++ TEST_COMPARE_STRING (strerrordesc_np (EILSEQ),
++ "Invalid or incomplete multibyte or wide character");
++ TEST_COMPARE_STRING (strerrorname_np (EILSEQ), "EILSEQ");
++#endif
++#ifdef EBADMSG
++ TEST_COMPARE_STRING (strerrordesc_np (EBADMSG), "Bad message");
++ TEST_COMPARE_STRING (strerrorname_np (EBADMSG), "EBADMSG");
++#endif
++#ifdef EIDRM
++ TEST_COMPARE_STRING (strerrordesc_np (EIDRM), "Identifier removed");
++ TEST_COMPARE_STRING (strerrorname_np (EIDRM), "EIDRM");
++#endif
++#ifdef EMULTIHOP
++ TEST_COMPARE_STRING (strerrordesc_np (EMULTIHOP), "Multihop attempted");
++ TEST_COMPARE_STRING (strerrorname_np (EMULTIHOP), "EMULTIHOP");
++#endif
++#ifdef ENODATA
++ TEST_COMPARE_STRING (strerrordesc_np (ENODATA), "No data available");
++ TEST_COMPARE_STRING (strerrorname_np (ENODATA), "ENODATA");
++#endif
++#ifdef ENOLINK
++ TEST_COMPARE_STRING (strerrordesc_np (ENOLINK), "Link has been severed");
++ TEST_COMPARE_STRING (strerrorname_np (ENOLINK), "ENOLINK");
++#endif
++#ifdef ENOMSG
++ TEST_COMPARE_STRING (strerrordesc_np (ENOMSG),
++ "No message of desired type");
++ TEST_COMPARE_STRING (strerrorname_np (ENOMSG), "ENOMSG");
++#endif
++#ifdef ENOSR
++ TEST_COMPARE_STRING (strerrordesc_np (ENOSR), "Out of streams resources");
++ TEST_COMPARE_STRING (strerrorname_np (ENOSR), "ENOSR");
++#endif
++#ifdef ENOSTR
++ TEST_COMPARE_STRING (strerrordesc_np (ENOSTR), "Device not a stream");
++ TEST_COMPARE_STRING (strerrorname_np (ENOSTR), "ENOSTR");
++#endif
++#ifdef EOVERFLOW
++ TEST_COMPARE_STRING (strerrordesc_np (EOVERFLOW),
++ "Value too large for defined data type");
++ TEST_COMPARE_STRING (strerrorname_np (EOVERFLOW), "EOVERFLOW");
++#endif
++#ifdef EPROTO
++ TEST_COMPARE_STRING (strerrordesc_np (EPROTO), "Protocol error");
++ TEST_COMPARE_STRING (strerrorname_np (EPROTO), "EPROTO");
++#endif
++#ifdef ETIME
++ TEST_COMPARE_STRING (strerrordesc_np (ETIME), "Timer expired");
++ TEST_COMPARE_STRING (strerrorname_np (ETIME), "ETIME");
++#endif
++#ifdef ECANCELED
++ TEST_COMPARE_STRING (strerrordesc_np (ECANCELED), "Operation canceled");
++ TEST_COMPARE_STRING (strerrorname_np (ECANCELED), "ECANCELED");
++#endif
++#ifdef EOWNERDEAD
++ TEST_COMPARE_STRING (strerrordesc_np (EOWNERDEAD), "Owner died");
++ TEST_COMPARE_STRING (strerrorname_np (EOWNERDEAD), "EOWNERDEAD");
++#endif
++#ifdef ENOTRECOVERABLE
++ TEST_COMPARE_STRING (strerrordesc_np (ENOTRECOVERABLE),
++ "State not recoverable");
++ TEST_COMPARE_STRING (strerrorname_np (ENOTRECOVERABLE), "ENOTRECOVERABLE");
++#endif
++#ifdef ERESTART
++ TEST_COMPARE_STRING (strerrordesc_np (ERESTART),
++ "Interrupted system call should be restarted");
++ TEST_COMPARE_STRING (strerrorname_np (ERESTART), "ERESTART");
++#endif
++#ifdef ECHRNG
++ TEST_COMPARE_STRING (strerrordesc_np (ECHRNG),
++ "Channel number out of range");
++ TEST_COMPARE_STRING (strerrorname_np (ECHRNG), "ECHRNG");
++#endif
++#ifdef EL2NSYNC
++ TEST_COMPARE_STRING (strerrordesc_np (EL2NSYNC),
++ "Level 2 not synchronized");
++ TEST_COMPARE_STRING (strerrorname_np (EL2NSYNC), "EL2NSYNC");
++#endif
++#ifdef EL3HLT
++ TEST_COMPARE_STRING (strerrordesc_np (EL3HLT), "Level 3 halted");
++ TEST_COMPARE_STRING (strerrorname_np (EL3HLT), "EL3HLT");
++#endif
++#ifdef EL3RST
++ TEST_COMPARE_STRING (strerrordesc_np (EL3RST), "Level 3 reset");
++ TEST_COMPARE_STRING (strerrorname_np (EL3RST), "EL3RST");
++#endif
++#ifdef ELNRNG
++ TEST_COMPARE_STRING (strerrordesc_np (ELNRNG), "Link number out of range");
++ TEST_COMPARE_STRING (strerrorname_np (ELNRNG), "ELNRNG");
++#endif
++#ifdef EUNATCH
++ TEST_COMPARE_STRING (strerrordesc_np (EUNATCH),
++ "Protocol driver not attached");
++ TEST_COMPARE_STRING (strerrorname_np (EUNATCH), "EUNATCH");
++#endif
++#ifdef ENOCSI
++ TEST_COMPARE_STRING (strerrordesc_np (ENOCSI),
++ "No CSI structure available");
++ TEST_COMPARE_STRING (strerrorname_np (ENOCSI), "ENOCSI");
++#endif
++#ifdef EL2HLT
++ TEST_COMPARE_STRING (strerrordesc_np (EL2HLT), "Level 2 halted");
++ TEST_COMPARE_STRING (strerrorname_np (EL2HLT), "EL2HLT");
++#endif
++#ifdef EBADE
++ TEST_COMPARE_STRING (strerrordesc_np (EBADE), "Invalid exchange");
++ TEST_COMPARE_STRING (strerrorname_np (EBADE), "EBADE");
++#endif
++#ifdef EBADR
++ TEST_COMPARE_STRING (strerrordesc_np (EBADR),
++ "Invalid request descriptor");
++ TEST_COMPARE_STRING (strerrorname_np (EBADR), "EBADR");
++#endif
++#ifdef EXFULL
++ TEST_COMPARE_STRING (strerrordesc_np (EXFULL), "Exchange full");
++ TEST_COMPARE_STRING (strerrorname_np (EXFULL), "EXFULL");
++#endif
++#ifdef ENOANO
++ TEST_COMPARE_STRING (strerrordesc_np (ENOANO), "No anode");
++ TEST_COMPARE_STRING (strerrorname_np (ENOANO), "ENOANO");
++#endif
++#ifdef EBADRQC
++ TEST_COMPARE_STRING (strerrordesc_np (EBADRQC), "Invalid request code");
++ TEST_COMPARE_STRING (strerrorname_np (EBADRQC), "EBADRQC");
++#endif
++#ifdef EBADSLT
++ TEST_COMPARE_STRING (strerrordesc_np (EBADSLT), "Invalid slot");
++ TEST_COMPARE_STRING (strerrorname_np (EBADSLT), "EBADSLT");
++#endif
++#ifdef EBFONT
++ TEST_COMPARE_STRING (strerrordesc_np (EBFONT), "Bad font file format");
++ TEST_COMPARE_STRING (strerrorname_np (EBFONT), "EBFONT");
++#endif
++#ifdef ENONET
++ TEST_COMPARE_STRING (strerrordesc_np (ENONET),
++ "Machine is not on the network");
++ TEST_COMPARE_STRING (strerrorname_np (ENONET), "ENONET");
++#endif
++#ifdef ENOPKG
++ TEST_COMPARE_STRING (strerrordesc_np (ENOPKG), "Package not installed");
++ TEST_COMPARE_STRING (strerrorname_np (ENOPKG), "ENOPKG");
++#endif
++#ifdef EADV
++ TEST_COMPARE_STRING (strerrordesc_np (EADV), "Advertise error");
++ TEST_COMPARE_STRING (strerrorname_np (EADV), "EADV");
++#endif
++#ifdef ESRMNT
++ TEST_COMPARE_STRING (strerrordesc_np (ESRMNT), "Srmount error");
++ TEST_COMPARE_STRING (strerrorname_np (ESRMNT), "ESRMNT");
++#endif
++#ifdef ECOMM
++ TEST_COMPARE_STRING (strerrordesc_np (ECOMM),
++ "Communication error on send");
++ TEST_COMPARE_STRING (strerrorname_np (ECOMM), "ECOMM");
++#endif
++#ifdef EDOTDOT
++ TEST_COMPARE_STRING (strerrordesc_np (EDOTDOT), "RFS specific error");
++ TEST_COMPARE_STRING (strerrorname_np (EDOTDOT), "EDOTDOT");
++#endif
++#ifdef ENOTUNIQ
++ TEST_COMPARE_STRING (strerrordesc_np (ENOTUNIQ),
++ "Name not unique on network");
++ TEST_COMPARE_STRING (strerrorname_np (ENOTUNIQ), "ENOTUNIQ");
++#endif
++#ifdef EBADFD
++ TEST_COMPARE_STRING (strerrordesc_np (EBADFD),
++ "File descriptor in bad state");
++ TEST_COMPARE_STRING (strerrorname_np (EBADFD), "EBADFD");
++#endif
++#ifdef EREMCHG
++ TEST_COMPARE_STRING (strerrordesc_np (EREMCHG), "Remote address changed");
++ TEST_COMPARE_STRING (strerrorname_np (EREMCHG), "EREMCHG");
++#endif
++#ifdef ELIBACC
++ TEST_COMPARE_STRING (strerrordesc_np (ELIBACC),
++ "Can not access a needed shared library");
++ TEST_COMPARE_STRING (strerrorname_np (ELIBACC), "ELIBACC");
++#endif
++#ifdef ELIBBAD
++ TEST_COMPARE_STRING (strerrordesc_np (ELIBBAD),
++ "Accessing a corrupted shared library");
++ TEST_COMPARE_STRING (strerrorname_np (ELIBBAD), "ELIBBAD");
++#endif
++#ifdef ELIBSCN
++ TEST_COMPARE_STRING (strerrordesc_np (ELIBSCN),
++ ".lib section in a.out corrupted");
++ TEST_COMPARE_STRING (strerrorname_np (ELIBSCN), "ELIBSCN");
++#endif
++#ifdef ELIBMAX
++ TEST_COMPARE_STRING (strerrordesc_np (ELIBMAX),
++ "Attempting to link in too many shared libraries");
++ TEST_COMPARE_STRING (strerrorname_np (ELIBMAX), "ELIBMAX");
++#endif
++#ifdef ELIBEXEC
++ TEST_COMPARE_STRING (strerrordesc_np (ELIBEXEC),
++ "Cannot exec a shared library directly");
++ TEST_COMPARE_STRING (strerrorname_np (ELIBEXEC), "ELIBEXEC");
++#endif
++#ifdef ESTRPIPE
++ TEST_COMPARE_STRING (strerrordesc_np (ESTRPIPE), "Streams pipe error");
++ TEST_COMPARE_STRING (strerrorname_np (ESTRPIPE), "ESTRPIPE");
++#endif
++#ifdef EUCLEAN
++ TEST_COMPARE_STRING (strerrordesc_np (EUCLEAN),
++ "Structure needs cleaning");
++ TEST_COMPARE_STRING (strerrorname_np (EUCLEAN), "EUCLEAN");
++#endif
++#ifdef ENOTNAM
++ TEST_COMPARE_STRING (strerrordesc_np (ENOTNAM),
++ "Not a XENIX named type file");
++ TEST_COMPARE_STRING (strerrorname_np (ENOTNAM), "ENOTNAM");
++#endif
++#ifdef ENAVAIL
++ TEST_COMPARE_STRING (strerrordesc_np (ENAVAIL),
++ "No XENIX semaphores available");
++ TEST_COMPARE_STRING (strerrorname_np (ENAVAIL), "ENAVAIL");
++#endif
++#ifdef EISNAM
++ TEST_COMPARE_STRING (strerrordesc_np (EISNAM), "Is a named type file");
++ TEST_COMPARE_STRING (strerrorname_np (EISNAM), "EISNAM");
++#endif
++#ifdef EREMOTEIO
++ TEST_COMPARE_STRING (strerrordesc_np (EREMOTEIO), "Remote I/O error");
++ TEST_COMPARE_STRING (strerrorname_np (EREMOTEIO), "EREMOTEIO");
++#endif
++#ifdef ENOMEDIUM
++ TEST_COMPARE_STRING (strerrordesc_np (ENOMEDIUM), "No medium found");
++ TEST_COMPARE_STRING (strerrorname_np (ENOMEDIUM), "ENOMEDIUM");
++#endif
++#ifdef EMEDIUMTYPE
++ TEST_COMPARE_STRING (strerrordesc_np (EMEDIUMTYPE), "Wrong medium type");
++ TEST_COMPARE_STRING (strerrorname_np (EMEDIUMTYPE), "EMEDIUMTYPE");
++#endif
++#ifdef ENOKEY
++ TEST_COMPARE_STRING (strerrordesc_np (ENOKEY),
++ "Required key not available");
++ TEST_COMPARE_STRING (strerrorname_np (ENOKEY), "ENOKEY");
++#endif
++#ifdef EKEYEXPIRED
++ TEST_COMPARE_STRING (strerrordesc_np (EKEYEXPIRED), "Key has expired");
++ TEST_COMPARE_STRING (strerrorname_np (EKEYEXPIRED), "EKEYEXPIRED");
++#endif
++#ifdef EKEYREVOKED
++ TEST_COMPARE_STRING (strerrordesc_np (EKEYREVOKED),
++ "Key has been revoked");
++ TEST_COMPARE_STRING (strerrorname_np (EKEYREVOKED), "EKEYREVOKED");
++#endif
++#ifdef EKEYREJECTED
++ TEST_COMPARE_STRING (strerrordesc_np (EKEYREJECTED),
++ "Key was rejected by service");
++ TEST_COMPARE_STRING (strerrorname_np (EKEYREJECTED), "EKEYREJECTED");
++#endif
++#ifdef ERFKILL
++ TEST_COMPARE_STRING (strerrordesc_np (ERFKILL),
++ "Operation not possible due to RF-kill");
++ TEST_COMPARE_STRING (strerrorname_np (ERFKILL), "ERFKILL");
++#endif
++#ifdef EHWPOISON
++ TEST_COMPARE_STRING (strerrordesc_np (EHWPOISON),
++ "Memory page has hardware error");
++ TEST_COMPARE_STRING (strerrorname_np (EHWPOISON), "EHWPOISON");
++#endif
++#ifdef EBADRPC
++ TEST_COMPARE_STRING (strerrordesc_np (EBADRPC), "RPC struct is bad");
++ TEST_COMPARE_STRING (strerrorname_np (EBADRPC), "EBADRPC");
++#endif
++#ifdef EFTYPE
++ TEST_COMPARE_STRING (strerrordesc_np (EFTYPE),
++ "Inappropriate file type or format");
++ TEST_COMPARE_STRING (strerrorname_np (EFTYPE), "EFTYPE");
++#endif
++#ifdef EPROCUNAVAIL
++ TEST_COMPARE_STRING (strerrordesc_np (EPROCUNAVAIL),
++ "RPC bad procedure for program");
++ TEST_COMPARE_STRING (strerrorname_np (EPROCUNAVAIL), "EPROCUNAVAIL");
++#endif
++#ifdef EAUTH
++ TEST_COMPARE_STRING (strerrordesc_np (EAUTH), "Authentication error");
++ TEST_COMPARE_STRING (strerrorname_np (EAUTH), "EAUTH");
++#endif
++#ifdef EDIED
++ TEST_COMPARE_STRING (strerrordesc_np (EDIED), "Translator died");
++ TEST_COMPARE_STRING (strerrorname_np (EDIED), "EDIED");
++#endif
++#ifdef ERPCMISMATCH
++ TEST_COMPARE_STRING (strerrordesc_np (ERPCMISMATCH), "RPC version wrong");
++ TEST_COMPARE_STRING (strerrorname_np (ERPCMISMATCH), "ERPCMISMATCH");
++#endif
++#ifdef EGREGIOUS
++ TEST_COMPARE_STRING (strerrordesc_np (EGREGIOUS),
++ "You really blew it this time");
++ TEST_COMPARE_STRING (strerrorname_np (EGREGIOUS), "EGREGIOUS");
++#endif
++#ifdef EPROCLIM
++ TEST_COMPARE_STRING (strerrordesc_np (EPROCLIM), "Too many processes");
++ TEST_COMPARE_STRING (strerrorname_np (EPROCLIM), "EPROCLIM");
++#endif
++#ifdef EGRATUITOUS
++ TEST_COMPARE_STRING (strerrordesc_np (EGRATUITOUS), "Gratuitous error");
++ TEST_COMPARE_STRING (strerrorname_np (EGRATUITOUS), "EGRATUITOUS");
++#endif
++#if defined (ENOTSUP) && ENOTSUP != EOPNOTSUPP
++ TEST_COMPARE_STRING (strerrordesc_np (ENOTSUP), "Not supported");
++ TEST_COMPARE_STRING (strerrorname_np (ENOTSUP), "ENOTSUP");
++#endif
++#ifdef EPROGMISMATCH
++ TEST_COMPARE_STRING (strerrordesc_np (EPROGMISMATCH),
++ "RPC program version wrong");
++ TEST_COMPARE_STRING (strerrorname_np (EPROGMISMATCH), "EPROGMISMATCH");
++#endif
++#ifdef EBACKGROUND
++ TEST_COMPARE_STRING (strerrordesc_np (EBACKGROUND),
++ "Inappropriate operation for background process");
++ TEST_COMPARE_STRING (strerrorname_np (EBACKGROUND), "EBACKGROUND");
++#endif
++#ifdef EIEIO
++ TEST_COMPARE_STRING (strerrordesc_np (EIEIO), "Computer bought the farm");
++ TEST_COMPARE_STRING (strerrorname_np (EIEIO), "EIEIO");
++#endif
++#if defined (EWOULDBLOCK) && EWOULDBLOCK != EAGAIN
++ TEST_COMPARE_STRING (strerrordesc_np (EWOULDBLOCK),
++ "Operation would block");
++ TEST_COMPARE_STRING (strerrorname_np (EWOULDBLOCK), "EWOULDBLOCK");
++#endif
++#ifdef ENEEDAUTH
++ TEST_COMPARE_STRING (strerrordesc_np (ENEEDAUTH), "Need authenticator");
++ TEST_COMPARE_STRING (strerrorname_np (ENEEDAUTH), "ENEEDAUTH");
++#endif
++#ifdef ED
++ TEST_COMPARE_STRING (strerrordesc_np (ED), "?");
++ TEST_COMPARE_STRING (strerrorname_np (ED), "ED");
++#endif
++#ifdef EPROGUNAVAIL
++ TEST_COMPARE_STRING (strerrordesc_np (EPROGUNAVAIL),
++ "RPC program not available");
++ TEST_COMPARE_STRING (strerrorname_np (EPROGUNAVAIL), "EPROGUNAVAIL");
++#endif
+
+ return 0;
+ }
+diff --git a/stdio-common/vfscanf-internal.c b/stdio-common/vfscanf-internal.c
+index 95b46dcbeb..3a323547f9 100644
+--- a/stdio-common/vfscanf-internal.c
++++ b/stdio-common/vfscanf-internal.c
+@@ -277,7 +277,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
+ #endif
+ {
+ va_list arg;
+- const CHAR_T *f = format;
++ const UCHAR_T *f = (const UCHAR_T *) format;
+ UCHAR_T fc; /* Current character of the format. */
+ WINT_T done = 0; /* Assignments done. */
+ size_t read_in = 0; /* Chars read in. */
+@@ -415,10 +415,11 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
+ #endif
+
+ #ifndef COMPILE_WSCANF
+- if (!isascii ((unsigned char) *f))
++ if (!isascii (*f))
+ {
+ /* Non-ASCII, may be a multibyte. */
+- int len = __mbrlen (f, strlen (f), &state);
++ int len = __mbrlen ((const char *) f, strlen ((const char *) f),
++ &state);
+ if (len > 0)
+ {
+ do
+@@ -426,7 +427,7 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
+ c = inchar ();
+ if (__glibc_unlikely (c == EOF))
+ input_error ();
+- else if (c != (unsigned char) *f++)
++ else if (c != *f++)
+ {
+ ungetc_not_eof (c, s);
+ conv_error ();
+@@ -484,9 +485,9 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
+ char_buffer_rewind (&charbuf);
+
+ /* Check for a positional parameter specification. */
+- if (ISDIGIT ((UCHAR_T) *f))
++ if (ISDIGIT (*f))
+ {
+- argpos = read_int ((const UCHAR_T **) &f);
++ argpos = read_int (&f);
+ if (*f == L_('$'))
+ ++f;
+ else
+@@ -521,8 +522,8 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
+
+ /* Find the maximum field width. */
+ width = 0;
+- if (ISDIGIT ((UCHAR_T) *f))
+- width = read_int ((const UCHAR_T **) &f);
++ if (ISDIGIT (*f))
++ width = read_int (&f);
+ got_width:
+ if (width == 0)
+ width = -1;
+@@ -2522,12 +2523,11 @@ __vfscanf_internal (FILE *s, const char *format, va_list argptr,
+ }
+
+ while ((fc = *f++) != '\0' && fc != ']')
+- if (fc == '-' && *f != '\0' && *f != ']'
+- && (unsigned char) f[-2] <= (unsigned char) *f)
++ if (fc == '-' && *f != '\0' && *f != ']' && f[-2] <= *f)
+ {
+ /* Add all characters from the one before the '-'
+ up to (but not including) the next format char. */
+- for (fc = (unsigned char) f[-2]; fc < (unsigned char) *f; ++fc)
++ for (fc = f[-2]; fc < *f; ++fc)
+ ((char *)charbuf.scratch.data)[fc] = 1;
+ }
+ else
+diff --git a/stdlib/tst-secure-getenv.c b/stdlib/tst-secure-getenv.c
+index 3cfe9a05c3..d4b1139c5e 100644
+--- a/stdlib/tst-secure-getenv.c
++++ b/stdlib/tst-secure-getenv.c
+@@ -30,167 +30,12 @@
+ #include <sys/wait.h>
+ #include <unistd.h>
+
++#include <support/check.h>
+ #include <support/support.h>
++#include <support/capture_subprocess.h>
+ #include <support/test-driver.h>
+
+ static char MAGIC_ARGUMENT[] = "run-actual-test";
+-#define MAGIC_STATUS 19
+-
+-/* Return a GID which is not our current GID, but is present in the
+- supplementary group list. */
+-static gid_t
+-choose_gid (void)
+-{
+- int count = getgroups (0, NULL);
+- if (count < 0)
+- {
+- printf ("getgroups: %m\n");
+- exit (1);
+- }
+- gid_t *groups;
+- groups = xcalloc (count, sizeof (*groups));
+- int ret = getgroups (count, groups);
+- if (ret < 0)
+- {
+- printf ("getgroups: %m\n");
+- exit (1);
+- }
+- gid_t current = getgid ();
+- gid_t not_current = 0;
+- for (int i = 0; i < ret; ++i)
+- {
+- if (groups[i] != current)
+- {
+- not_current = groups[i];
+- break;
+- }
+- }
+- free (groups);
+- return not_current;
+-}
+-
+-
+-/* Copies the executable into a restricted directory, so that we can
+- safely make it SGID with the TARGET group ID. Then runs the
+- executable. */
+-static int
+-run_executable_sgid (gid_t target)
+-{
+- char *dirname = xasprintf ("%s/secure-getenv.%jd",
+- test_dir, (intmax_t) getpid ());
+- char *execname = xasprintf ("%s/bin", dirname);
+- int infd = -1;
+- int outfd = -1;
+- int ret = -1;
+- if (mkdir (dirname, 0700) < 0)
+- {
+- printf ("mkdir: %m\n");
+- goto err;
+- }
+- infd = open ("/proc/self/exe", O_RDONLY);
+- if (infd < 0)
+- {
+- printf ("open (/proc/self/exe): %m\n");
+- goto err;
+- }
+- outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700);
+- if (outfd < 0)
+- {
+- printf ("open (%s): %m\n", execname);
+- goto err;
+- }
+- char buf[4096];
+- for (;;)
+- {
+- ssize_t rdcount = read (infd, buf, sizeof (buf));
+- if (rdcount < 0)
+- {
+- printf ("read: %m\n");
+- goto err;
+- }
+- if (rdcount == 0)
+- break;
+- char *p = buf;
+- char *end = buf + rdcount;
+- while (p != end)
+- {
+- ssize_t wrcount = write (outfd, buf, end - p);
+- if (wrcount == 0)
+- errno = ENOSPC;
+- if (wrcount <= 0)
+- {
+- printf ("write: %m\n");
+- goto err;
+- }
+- p += wrcount;
+- }
+- }
+- if (fchown (outfd, getuid (), target) < 0)
+- {
+- printf ("fchown (%s): %m\n", execname);
+- goto err;
+- }
+- if (fchmod (outfd, 02750) < 0)
+- {
+- printf ("fchmod (%s): %m\n", execname);
+- goto err;
+- }
+- if (close (outfd) < 0)
+- {
+- printf ("close (outfd): %m\n");
+- goto err;
+- }
+- if (close (infd) < 0)
+- {
+- printf ("close (infd): %m\n");
+- goto err;
+- }
+-
+- int kid = fork ();
+- if (kid < 0)
+- {
+- printf ("fork: %m\n");
+- goto err;
+- }
+- if (kid == 0)
+- {
+- /* Child process. */
+- char *args[] = { execname, MAGIC_ARGUMENT, NULL };
+- execve (execname, args, environ);
+- printf ("execve (%s): %m\n", execname);
+- _exit (1);
+- }
+- int status;
+- if (waitpid (kid, &status, 0) < 0)
+- {
+- printf ("waitpid: %m\n");
+- goto err;
+- }
+- if (!WIFEXITED (status) || WEXITSTATUS (status) != MAGIC_STATUS)
+- {
+- printf ("Unexpected exit status %d from child process\n",
+- status);
+- goto err;
+- }
+- ret = 0;
+-
+-err:
+- if (outfd >= 0)
+- close (outfd);
+- if (infd >= 0)
+- close (infd);
+- if (execname)
+- {
+- unlink (execname);
+- free (execname);
+- }
+- if (dirname)
+- {
+- rmdir (dirname);
+- free (dirname);
+- }
+- return ret;
+-}
+
+ static int
+ do_test (void)
+@@ -212,15 +57,15 @@ do_test (void)
+ exit (1);
+ }
+
+- gid_t target = choose_gid ();
+- if (target == 0)
+- {
+- fprintf (stderr,
+- "Could not find a suitable GID for user %jd, skipping test\n",
+- (intmax_t) getuid ());
+- exit (0);
+- }
+- return run_executable_sgid (target);
++ int status = support_capture_subprogram_self_sgid (MAGIC_ARGUMENT);
++
++ if (WEXITSTATUS (status) == EXIT_UNSUPPORTED)
++ return EXIT_UNSUPPORTED;
++
++ if (!WIFEXITED (status))
++ FAIL_EXIT1 ("Unexpected exit status %d from child process\n", status);
++
++ return 0;
+ }
+
+ static void
+@@ -229,23 +74,15 @@ alternative_main (int argc, char **argv)
+ if (argc == 2 && strcmp (argv[1], MAGIC_ARGUMENT) == 0)
+ {
+ if (getgid () == getegid ())
+- {
+- /* This can happen if the file system is mounted nosuid. */
+- fprintf (stderr, "SGID failed: GID and EGID match (%jd)\n",
+- (intmax_t) getgid ());
+- exit (MAGIC_STATUS);
+- }
++ /* This can happen if the file system is mounted nosuid. */
++ FAIL_UNSUPPORTED ("SGID failed: GID and EGID match (%jd)\n",
++ (intmax_t) getgid ());
+ if (getenv ("PATH") == NULL)
+- {
+- printf ("PATH variable not present\n");
+- exit (3);
+- }
++ FAIL_EXIT (3, "PATH variable not present\n");
+ if (secure_getenv ("PATH") != NULL)
+- {
+- printf ("PATH variable not filtered out\n");
+- exit (4);
+- }
+- exit (MAGIC_STATUS);
++ FAIL_EXIT (4, "PATH variable not filtered out\n");
++
++ exit (EXIT_SUCCESS);
+ }
+ }
+
+diff --git a/string/bits/string_fortified.h b/string/bits/string_fortified.h
+index 309d0f39b2..c8d3051af8 100644
+--- a/string/bits/string_fortified.h
++++ b/string/bits/string_fortified.h
+@@ -22,11 +22,6 @@
+ # error "Never use <bits/string_fortified.h> directly; include <string.h> instead."
+ #endif
+
+-#if !__GNUC_PREREQ (5,0)
+-__warndecl (__warn_memset_zero_len,
+- "memset used with constant zero length parameter; this could be due to transposed parameters");
+-#endif
+-
+ __fortify_function void *
+ __NTH (memcpy (void *__restrict __dest, const void *__restrict __src,
+ size_t __len))
+@@ -58,16 +53,6 @@ __NTH (mempcpy (void *__restrict __dest, const void *__restrict __src,
+ __fortify_function void *
+ __NTH (memset (void *__dest, int __ch, size_t __len))
+ {
+- /* GCC-5.0 and newer implements these checks in the compiler, so we don't
+- need them here. */
+-#if !__GNUC_PREREQ (5,0)
+- if (__builtin_constant_p (__len) && __len == 0
+- && (!__builtin_constant_p (__ch) || __ch != 0))
+- {
+- __warn_memset_zero_len ();
+- return __dest;
+- }
+-#endif
+ return __builtin___memset_chk (__dest, __ch, __len, __bos0 (__dest));
+ }
+
+diff --git a/string/test-memchr.c b/string/test-memchr.c
+index 5dd0aa5470..de70e794d9 100644
+--- a/string/test-memchr.c
++++ b/string/test-memchr.c
+@@ -65,8 +65,8 @@ do_one_test (impl_t *impl, const CHAR *s, int c, size_t n, CHAR *exp_res)
+ CHAR *res = CALL (impl, s, c, n);
+ if (res != exp_res)
+ {
+- error (0, 0, "Wrong result in function %s %p %p", impl->name,
+- res, exp_res);
++ error (0, 0, "Wrong result in function %s (%p, %d, %zu) -> %p != %p",
++ impl->name, s, c, n, res, exp_res);
+ ret = 1;
+ return;
+ }
+@@ -91,7 +91,7 @@ do_test (size_t align, size_t pos, size_t len, size_t n, int seek_char)
+ }
+ buf[align + len] = 0;
+
+- if (pos < len)
++ if (pos < MIN(n, len))
+ {
+ buf[align + pos] = seek_char;
+ buf[align + len] = -seek_char;
+@@ -107,6 +107,38 @@ do_test (size_t align, size_t pos, size_t len, size_t n, int seek_char)
+ do_one_test (impl, (CHAR *) (buf + align), seek_char, n, result);
+ }
+
++static void
++do_overflow_tests (void)
++{
++ size_t i, j, len;
++ const size_t one = 1;
++ uintptr_t buf_addr = (uintptr_t) buf1;
++
++ for (i = 0; i < 750; ++i)
++ {
++ do_test (0, i, 751, SIZE_MAX - i, BIG_CHAR);
++ do_test (0, i, 751, i - buf_addr, BIG_CHAR);
++ do_test (0, i, 751, -buf_addr - i, BIG_CHAR);
++ do_test (0, i, 751, SIZE_MAX - buf_addr - i, BIG_CHAR);
++ do_test (0, i, 751, SIZE_MAX - buf_addr + i, BIG_CHAR);
++
++ len = 0;
++ for (j = 8 * sizeof(size_t) - 1; j ; --j)
++ {
++ len |= one << j;
++ do_test (0, i, 751, len - i, BIG_CHAR);
++ do_test (0, i, 751, len + i, BIG_CHAR);
++ do_test (0, i, 751, len - buf_addr - i, BIG_CHAR);
++ do_test (0, i, 751, len - buf_addr + i, BIG_CHAR);
++
++ do_test (0, i, 751, ~len - i, BIG_CHAR);
++ do_test (0, i, 751, ~len + i, BIG_CHAR);
++ do_test (0, i, 751, ~len - buf_addr - i, BIG_CHAR);
++ do_test (0, i, 751, ~len - buf_addr + i, BIG_CHAR);
++ }
++ }
++}
++
+ static void
+ do_random_tests (void)
+ {
+@@ -221,6 +253,7 @@ test_main (void)
+ do_test (page_size / 2 - i, i, i, 1, 0x9B);
+
+ do_random_tests ();
++ do_overflow_tests ();
+ return ret;
+ }
+
+diff --git a/string/test-strncat.c b/string/test-strncat.c
+index abbacb95c6..0c7f68d086 100644
+--- a/string/test-strncat.c
++++ b/string/test-strncat.c
+@@ -134,6 +134,66 @@ do_test (size_t align1, size_t align2, size_t len1, size_t len2,
+ }
+ }
+
++static void
++do_overflow_tests (void)
++{
++ size_t i, j, len;
++ const size_t one = 1;
++ CHAR *s1, *s2;
++ uintptr_t s1_addr;
++ s1 = (CHAR *) buf1;
++ s2 = (CHAR *) buf2;
++ s1_addr = (uintptr_t)s1;
++ for (j = 0; j < 200; ++j)
++ s2[j] = 32 + 23 * j % (BIG_CHAR - 32);
++ s2[200] = 0;
++ for (i = 0; i < 750; ++i) {
++ for (j = 0; j < i; ++j)
++ s1[j] = 32 + 23 * j % (BIG_CHAR - 32);
++ s1[i] = '\0';
++
++ FOR_EACH_IMPL (impl, 0)
++ {
++ s2[200] = '\0';
++ do_one_test (impl, s2, s1, SIZE_MAX - i);
++ s2[200] = '\0';
++ do_one_test (impl, s2, s1, i - s1_addr);
++ s2[200] = '\0';
++ do_one_test (impl, s2, s1, -s1_addr - i);
++ s2[200] = '\0';
++ do_one_test (impl, s2, s1, SIZE_MAX - s1_addr - i);
++ s2[200] = '\0';
++ do_one_test (impl, s2, s1, SIZE_MAX - s1_addr + i);
++ }
++
++ len = 0;
++ for (j = 8 * sizeof(size_t) - 1; j ; --j)
++ {
++ len |= one << j;
++ FOR_EACH_IMPL (impl, 0)
++ {
++ s2[200] = '\0';
++ do_one_test (impl, s2, s1, len - i);
++ s2[200] = '\0';
++ do_one_test (impl, s2, s1, len + i);
++ s2[200] = '\0';
++ do_one_test (impl, s2, s1, len - s1_addr - i);
++ s2[200] = '\0';
++ do_one_test (impl, s2, s1, len - s1_addr + i);
++
++ s2[200] = '\0';
++ do_one_test (impl, s2, s1, ~len - i);
++ s2[200] = '\0';
++ do_one_test (impl, s2, s1, ~len + i);
++ s2[200] = '\0';
++ do_one_test (impl, s2, s1, ~len - s1_addr - i);
++ s2[200] = '\0';
++ do_one_test (impl, s2, s1, ~len - s1_addr + i);
++ }
++ }
++ }
++}
++
+ static void
+ do_random_tests (void)
+ {
+@@ -316,6 +376,7 @@ test_main (void)
+ }
+
+ do_random_tests ();
++ do_overflow_tests ();
+ return ret;
+ }
+
+diff --git a/string/test-strnlen.c b/string/test-strnlen.c
+index 80ac9e8602..a1a6746cc9 100644
+--- a/string/test-strnlen.c
++++ b/string/test-strnlen.c
+@@ -27,6 +27,7 @@
+
+ #ifndef WIDE
+ # define STRNLEN strnlen
++# define MEMSET memset
+ # define CHAR char
+ # define BIG_CHAR CHAR_MAX
+ # define MIDDLE_CHAR 127
+@@ -34,6 +35,7 @@
+ #else
+ # include <wchar.h>
+ # define STRNLEN wcsnlen
++# define MEMSET wmemset
+ # define CHAR wchar_t
+ # define BIG_CHAR WCHAR_MAX
+ # define MIDDLE_CHAR 1121
+@@ -87,6 +89,38 @@ do_test (size_t align, size_t len, size_t maxlen, int max_char)
+ do_one_test (impl, (CHAR *) (buf + align), maxlen, MIN (len, maxlen));
+ }
+
++static void
++do_overflow_tests (void)
++{
++ size_t i, j, len;
++ const size_t one = 1;
++ uintptr_t buf_addr = (uintptr_t) buf1;
++
++ for (i = 0; i < 750; ++i)
++ {
++ do_test (0, i, SIZE_MAX - i, BIG_CHAR);
++ do_test (0, i, i - buf_addr, BIG_CHAR);
++ do_test (0, i, -buf_addr - i, BIG_CHAR);
++ do_test (0, i, SIZE_MAX - buf_addr - i, BIG_CHAR);
++ do_test (0, i, SIZE_MAX - buf_addr + i, BIG_CHAR);
++
++ len = 0;
++ for (j = 8 * sizeof(size_t) - 1; j ; --j)
++ {
++ len |= one << j;
++ do_test (0, i, len - i, BIG_CHAR);
++ do_test (0, i, len + i, BIG_CHAR);
++ do_test (0, i, len - buf_addr - i, BIG_CHAR);
++ do_test (0, i, len - buf_addr + i, BIG_CHAR);
++
++ do_test (0, i, ~len - i, BIG_CHAR);
++ do_test (0, i, ~len + i, BIG_CHAR);
++ do_test (0, i, ~len - buf_addr - i, BIG_CHAR);
++ do_test (0, i, ~len - buf_addr + i, BIG_CHAR);
++ }
++ }
++}
++
+ static void
+ do_random_tests (void)
+ {
+@@ -153,7 +187,7 @@ do_page_tests (void)
+ size_t last_offset = (page_size / sizeof (CHAR)) - 1;
+
+ CHAR *s = (CHAR *) buf2;
+- memset (s, 65, (last_offset - 1));
++ MEMSET (s, 65, (last_offset - 1));
+ s[last_offset] = 0;
+
+ /* Place short strings ending at page boundary. */
+@@ -196,6 +230,35 @@ do_page_tests (void)
+ }
+ }
+
++/* Tests meant to unveil fail on implementations that access bytes
++ beyond the maxium length. */
++
++static void
++do_page_2_tests (void)
++{
++ size_t i, exp_len, offset;
++ size_t last_offset = page_size / sizeof (CHAR);
++
++ CHAR *s = (CHAR *) buf2;
++ MEMSET (s, 65, last_offset);
++
++ /* Place short strings ending at page boundary without the null
++ byte. */
++ offset = last_offset;
++ for (i = 0; i < 128; i++)
++ {
++ /* Decrease offset to stress several sizes and alignments. */
++ offset--;
++ exp_len = last_offset - offset;
++ FOR_EACH_IMPL (impl, 0)
++ {
++ /* If an implementation goes beyond EXP_LEN, it will trigger
++ the segfault. */
++ do_one_test (impl, (CHAR *) (s + offset), exp_len, exp_len);
++ }
++ }
++}
++
+ int
+ test_main (void)
+ {
+@@ -242,6 +305,8 @@ test_main (void)
+
+ do_random_tests ();
+ do_page_tests ();
++ do_page_2_tests ();
++ do_overflow_tests ();
+ return ret;
+ }
+
+diff --git a/support/Makefile b/support/Makefile
+index 93faafddf9..3d3aff5ff9 100644
+--- a/support/Makefile
++++ b/support/Makefile
+@@ -35,6 +35,8 @@ libsupport-routines = \
+ ignore_stderr \
+ next_to_fault \
+ oom_error \
++ resolv_response_context_duplicate \
++ resolv_response_context_free \
+ resolv_test \
+ set_fortify_handler \
+ support-xfstat \
+@@ -133,6 +135,7 @@ libsupport-routines = \
+ xpthread_join \
+ xpthread_key_create \
+ xpthread_key_delete \
++ xpthread_kill \
+ xpthread_mutex_consistent \
+ xpthread_mutex_destroy \
+ xpthread_mutex_init \
+diff --git a/support/capture_subprocess.h b/support/capture_subprocess.h
+index 9808750f80..421f657678 100644
+--- a/support/capture_subprocess.h
++++ b/support/capture_subprocess.h
+@@ -41,6 +41,12 @@ struct support_capture_subprocess support_capture_subprocess
+ struct support_capture_subprocess support_capture_subprogram
+ (const char *file, char *const argv[]);
+
++/* Copy the running program into a setgid binary and run it with CHILD_ID
++ argument. If execution is successful, return the exit status of the child
++ program, otherwise return a non-zero failure exit code. */
++int support_capture_subprogram_self_sgid
++ (char *child_id);
++
+ /* Deallocate the subprocess data captured by
+ support_capture_subprocess. */
+ void support_capture_subprocess_free (struct support_capture_subprocess *);
+diff --git a/support/resolv_response_context_duplicate.c b/support/resolv_response_context_duplicate.c
+new file mode 100644
+index 0000000000..f9c5c3462a
+--- /dev/null
++++ b/support/resolv_response_context_duplicate.c
+@@ -0,0 +1,37 @@
++/* Duplicate a response context used in DNS resolver tests.
++ Copyright (C) 2020 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <string.h>
++#include <support/resolv_test.h>
++#include <support/support.h>
++
++struct resolv_response_context *
++resolv_response_context_duplicate (const struct resolv_response_context *ctx)
++{
++ struct resolv_response_context *result = xmalloc (sizeof (*result));
++ memcpy (result, ctx, sizeof (*result));
++ if (result->client_address != NULL)
++ {
++ result->client_address = xmalloc (result->client_address_length);
++ memcpy (result->client_address, ctx->client_address,
++ result->client_address_length);
++ }
++ result->query_buffer = xmalloc (result->query_length);
++ memcpy (result->query_buffer, ctx->query_buffer, result->query_length);
++ return result;
++}
+diff --git a/support/resolv_response_context_free.c b/support/resolv_response_context_free.c
+new file mode 100644
+index 0000000000..b88c05ffd4
+--- /dev/null
++++ b/support/resolv_response_context_free.c
+@@ -0,0 +1,28 @@
++/* Free a response context used in DNS resolver tests.
++ Copyright (C) 2020 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <stdlib.h>
++#include <support/resolv_test.h>
++
++void
++resolv_response_context_free (struct resolv_response_context *ctx)
++{
++ free (ctx->query_buffer);
++ free (ctx->client_address);
++ free (ctx);
++}
+diff --git a/support/resolv_test.c b/support/resolv_test.c
+index 53b7fc41ab..9878a040a3 100644
+--- a/support/resolv_test.c
++++ b/support/resolv_test.c
+@@ -181,7 +181,9 @@ resolv_response_init (struct resolv_response_builder *b,
+ b->buffer[2] |= b->query_buffer[2] & 0x01; /* Copy the RD bit. */
+ if (flags.tc)
+ b->buffer[2] |= 0x02;
+- b->buffer[3] = 0x80 | flags.rcode; /* Always set RA. */
++ b->buffer[3] = flags.rcode;
++ if (!flags.clear_ra)
++ b->buffer[3] |= 0x80;
+ if (flags.ad)
+ b->buffer[3] |= 0x20;
+
+@@ -434,9 +436,9 @@ resolv_response_buffer (const struct resolv_response_builder *b)
+ return result;
+ }
+
+-static struct resolv_response_builder *
+-response_builder_allocate
+- (const unsigned char *query_buffer, size_t query_length)
++struct resolv_response_builder *
++resolv_response_builder_allocate (const unsigned char *query_buffer,
++ size_t query_length)
+ {
+ struct resolv_response_builder *b = xmalloc (sizeof (*b));
+ memset (b, 0, offsetof (struct resolv_response_builder, buffer));
+@@ -445,8 +447,8 @@ response_builder_allocate
+ return b;
+ }
+
+-static void
+-response_builder_free (struct resolv_response_builder *b)
++void
++resolv_response_builder_free (struct resolv_response_builder *b)
+ {
+ tdestroy (b->compression_offsets, free);
+ free (b);
+@@ -661,13 +663,17 @@ server_thread_udp_process_one (struct resolv_test *obj, int server_index)
+
+ struct resolv_response_context ctx =
+ {
++ .test = obj,
++ .client_address = &peer,
++ .client_address_length = peerlen,
+ .query_buffer = query,
+ .query_length = length,
+ .server_index = server_index,
+ .tcp = false,
+ .edns = qinfo.edns,
+ };
+- struct resolv_response_builder *b = response_builder_allocate (query, length);
++ struct resolv_response_builder *b
++ = resolv_response_builder_allocate (query, length);
+ obj->config.response_callback
+ (&ctx, b, qinfo.qname, qinfo.qclass, qinfo.qtype);
+
+@@ -684,7 +690,7 @@ server_thread_udp_process_one (struct resolv_test *obj, int server_index)
+ if (b->offset >= 12)
+ printf ("info: UDP server %d: sending response:"
+ " %zu bytes, RCODE %d (for %s/%u/%u)\n",
+- server_index, b->offset, b->buffer[3] & 0x0f,
++ ctx.server_index, b->offset, b->buffer[3] & 0x0f,
+ qinfo.qname, qinfo.qclass, qinfo.qtype);
+ else
+ printf ("info: UDP server %d: sending response: %zu bytes"
+@@ -694,23 +700,31 @@ server_thread_udp_process_one (struct resolv_test *obj, int server_index)
+ if (b->truncate_bytes > 0)
+ printf ("info: truncated by %u bytes\n", b->truncate_bytes);
+ }
+- size_t to_send = b->offset;
+- if (to_send < b->truncate_bytes)
+- to_send = 0;
+- else
+- to_send -= b->truncate_bytes;
+-
+- /* Ignore most errors here because the other end may have closed
+- the socket. */
+- if (sendto (obj->servers[server_index].socket_udp,
+- b->buffer, to_send, 0,
+- (struct sockaddr *) &peer, peerlen) < 0)
+- TEST_VERIFY_EXIT (errno != EBADF);
++ resolv_response_send_udp (&ctx, b);
+ }
+- response_builder_free (b);
++ resolv_response_builder_free (b);
+ return true;
+ }
+
++void
++resolv_response_send_udp (const struct resolv_response_context *ctx,
++ struct resolv_response_builder *b)
++{
++ TEST_VERIFY_EXIT (!ctx->tcp);
++ size_t to_send = b->offset;
++ if (to_send < b->truncate_bytes)
++ to_send = 0;
++ else
++ to_send -= b->truncate_bytes;
++
++ /* Ignore most errors here because the other end may have closed
++ the socket. */
++ if (sendto (ctx->test->servers[ctx->server_index].socket_udp,
++ b->buffer, to_send, 0,
++ ctx->client_address, ctx->client_address_length) < 0)
++ TEST_VERIFY_EXIT (errno != EBADF);
++}
++
+ /* UDP thread_callback function. Variant for one thread per
+ server. */
+ static void
+@@ -897,14 +911,15 @@ server_thread_tcp_client (void *arg)
+
+ struct resolv_response_context ctx =
+ {
++ .test = closure->obj,
+ .query_buffer = query_buffer,
+ .query_length = query_length,
+ .server_index = closure->server_index,
+ .tcp = true,
+ .edns = qinfo.edns,
+ };
+- struct resolv_response_builder *b = response_builder_allocate
+- (query_buffer, query_length);
++ struct resolv_response_builder *b
++ = resolv_response_builder_allocate (query_buffer, query_length);
+ closure->obj->config.response_callback
+ (&ctx, b, qinfo.qname, qinfo.qclass, qinfo.qtype);
+
+@@ -936,7 +951,7 @@ server_thread_tcp_client (void *arg)
+ writev_fully (closure->client_socket, buffers, 2);
+ }
+ bool close_flag = b->close;
+- response_builder_free (b);
++ resolv_response_builder_free (b);
+ free (query_buffer);
+ if (close_flag)
+ break;
+diff --git a/support/resolv_test.h b/support/resolv_test.h
+index 67819469a0..31a5c1c3e7 100644
+--- a/support/resolv_test.h
++++ b/support/resolv_test.h
+@@ -35,25 +35,36 @@ struct resolv_edns_info
+ uint16_t payload_size;
+ };
+
++/* This opaque struct collects information about the resolver testing
++ currently in progress. */
++struct resolv_test;
++
+ /* This struct provides context information when the response callback
+ specified in struct resolv_redirect_config is invoked. */
+ struct resolv_response_context
+ {
+- const unsigned char *query_buffer;
++ struct resolv_test *test;
++ void *client_address;
++ size_t client_address_length;
++ unsigned char *query_buffer;
+ size_t query_length;
+ int server_index;
+ bool tcp;
+ struct resolv_edns_info edns;
+ };
+
++/* Produces a deep copy of the context. */
++struct resolv_response_context *
++ resolv_response_context_duplicate (const struct resolv_response_context *);
++
++/* Frees the copy. For the context passed to the response function,
++ this happens implicitly. */
++void resolv_response_context_free (struct resolv_response_context *);
++
+ /* This opaque struct is used to construct responses from within the
+ response callback function. */
+ struct resolv_response_builder;
+
+-/* This opaque struct collects information about the resolver testing
+- currently in progress. */
+-struct resolv_test;
+-
+ enum
+ {
+ /* Maximum number of test servers supported by the framework. */
+@@ -137,6 +148,10 @@ struct resolv_response_flags
+ /* If true, the AD (authenticated data) flag will be set. */
+ bool ad;
+
++ /* If true, do not set the RA (recursion available) flag in the
++ response. */
++ bool clear_ra;
++
+ /* Initial section count values. Can be used to artificially
+ increase the counts, for malformed packet testing.*/
+ unsigned short qdcount;
+@@ -188,6 +203,22 @@ void resolv_response_close (struct resolv_response_builder *);
+ /* The size of the response packet built so far. */
+ size_t resolv_response_length (const struct resolv_response_builder *);
+
++/* Allocates a response builder tied to a specific query packet,
++ starting at QUERY_BUFFER, containing QUERY_LENGTH bytes. */
++struct resolv_response_builder *
++ resolv_response_builder_allocate (const unsigned char *query_buffer,
++ size_t query_length);
++
++/* Deallocates a response buffer. */
++void resolv_response_builder_free (struct resolv_response_builder *);
++
++/* Sends a UDP response using a specific context. This can be used to
++ reorder or duplicate responses, along with
++ resolv_response_context_duplicate and
++ response_builder_allocate. */
++void resolv_response_send_udp (const struct resolv_response_context *,
++ struct resolv_response_builder *);
++
+ __END_DECLS
+
+ #endif /* SUPPORT_RESOLV_TEST_H */
+diff --git a/support/subprocess.h b/support/subprocess.h
+index 8b442fd5c0..34ffd02e8e 100644
+--- a/support/subprocess.h
++++ b/support/subprocess.h
+@@ -38,6 +38,11 @@ struct support_subprocess support_subprocess
+ struct support_subprocess support_subprogram
+ (const char *file, char *const argv[]);
+
++/* Invoke program FILE with ARGV arguments by using posix_spawn and wait for it
++ to complete. Return program exit status. */
++int support_subprogram_wait
++ (const char *file, char *const argv[]);
++
+ /* Wait for the subprocess indicated by PROC::PID. Return the status
+ indicate by waitpid call. */
+ int support_process_wait (struct support_subprocess *proc);
+diff --git a/support/support_capture_subprocess.c b/support/support_capture_subprocess.c
+index eeed676e3d..28a37df67f 100644
+--- a/support/support_capture_subprocess.c
++++ b/support/support_capture_subprocess.c
+@@ -20,11 +20,14 @@
+ #include <support/capture_subprocess.h>
+
+ #include <errno.h>
++#include <fcntl.h>
+ #include <stdlib.h>
+ #include <support/check.h>
+ #include <support/xunistd.h>
+ #include <support/xsocket.h>
+ #include <support/xspawn.h>
++#include <support/support.h>
++#include <support/test-driver.h>
+
+ static void
+ transfer (const char *what, struct pollfd *pfd, struct xmemstream *stream)
+@@ -36,7 +39,7 @@ transfer (const char *what, struct pollfd *pfd, struct xmemstream *stream)
+ if (ret < 0)
+ {
+ support_record_failure ();
+- printf ("error: reading from subprocess %s: %m", what);
++ printf ("error: reading from subprocess %s: %m\n", what);
+ pfd->events = 0;
+ pfd->revents = 0;
+ }
+@@ -102,6 +105,129 @@ support_capture_subprogram (const char *file, char *const argv[])
+ return result;
+ }
+
++/* Copies the executable into a restricted directory, so that we can
++ safely make it SGID with the TARGET group ID. Then runs the
++ executable. */
++static int
++copy_and_spawn_sgid (char *child_id, gid_t gid)
++{
++ char *dirname = xasprintf ("%s/tst-tunables-setuid.%jd",
++ test_dir, (intmax_t) getpid ());
++ char *execname = xasprintf ("%s/bin", dirname);
++ int infd = -1;
++ int outfd = -1;
++ int ret = 1, status = 1;
++
++ TEST_VERIFY (mkdir (dirname, 0700) == 0);
++ if (support_record_failure_is_failed ())
++ goto err;
++
++ infd = open ("/proc/self/exe", O_RDONLY);
++ if (infd < 0)
++ FAIL_UNSUPPORTED ("unsupported: Cannot read binary from procfs\n");
++
++ outfd = open (execname, O_WRONLY | O_CREAT | O_EXCL, 0700);
++ TEST_VERIFY (outfd >= 0);
++ if (support_record_failure_is_failed ())
++ goto err;
++
++ char buf[4096];
++ for (;;)
++ {
++ ssize_t rdcount = read (infd, buf, sizeof (buf));
++ TEST_VERIFY (rdcount >= 0);
++ if (support_record_failure_is_failed ())
++ goto err;
++ if (rdcount == 0)
++ break;
++ char *p = buf;
++ char *end = buf + rdcount;
++ while (p != end)
++ {
++ ssize_t wrcount = write (outfd, buf, end - p);
++ if (wrcount == 0)
++ errno = ENOSPC;
++ TEST_VERIFY (wrcount > 0);
++ if (support_record_failure_is_failed ())
++ goto err;
++ p += wrcount;
++ }
++ }
++ TEST_VERIFY (fchown (outfd, getuid (), gid) == 0);
++ if (support_record_failure_is_failed ())
++ goto err;
++ TEST_VERIFY (fchmod (outfd, 02750) == 0);
++ if (support_record_failure_is_failed ())
++ goto err;
++ TEST_VERIFY (close (outfd) == 0);
++ if (support_record_failure_is_failed ())
++ goto err;
++ TEST_VERIFY (close (infd) == 0);
++ if (support_record_failure_is_failed ())
++ goto err;
++
++ /* We have the binary, now spawn the subprocess. Avoid using
++ support_subprogram because we only want the program exit status, not the
++ contents. */
++ ret = 0;
++
++ char * const args[] = {execname, child_id, NULL};
++
++ status = support_subprogram_wait (args[0], args);
++
++err:
++ if (outfd >= 0)
++ close (outfd);
++ if (infd >= 0)
++ close (infd);
++ if (execname != NULL)
++ {
++ unlink (execname);
++ free (execname);
++ }
++ if (dirname != NULL)
++ {
++ rmdir (dirname);
++ free (dirname);
++ }
++
++ if (ret != 0)
++ FAIL_EXIT1("Failed to make sgid executable for test\n");
++
++ return status;
++}
++
++int
++support_capture_subprogram_self_sgid (char *child_id)
++{
++ gid_t target = 0;
++ const int count = 64;
++ gid_t groups[count];
++
++ /* Get a GID which is not our current GID, but is present in the
++ supplementary group list. */
++ int ret = getgroups (count, groups);
++ if (ret < 0)
++ FAIL_UNSUPPORTED("Could not get group list for user %jd\n",
++ (intmax_t) getuid ());
++
++ gid_t current = getgid ();
++ for (int i = 0; i < ret; ++i)
++ {
++ if (groups[i] != current)
++ {
++ target = groups[i];
++ break;
++ }
++ }
++
++ if (target == 0)
++ FAIL_UNSUPPORTED("Could not find a suitable GID for user %jd\n",
++ (intmax_t) getuid ());
++
++ return copy_and_spawn_sgid (child_id, target);
++}
++
+ void
+ support_capture_subprocess_free (struct support_capture_subprocess *p)
+ {
+diff --git a/support/support_subprocess.c b/support/support_subprocess.c
+index 36e3a77af2..4a25828111 100644
+--- a/support/support_subprocess.c
++++ b/support/support_subprocess.c
+@@ -27,7 +27,7 @@
+ #include <support/subprocess.h>
+
+ static struct support_subprocess
+-support_suprocess_init (void)
++support_subprocess_init (void)
+ {
+ struct support_subprocess result;
+
+@@ -48,7 +48,7 @@ support_suprocess_init (void)
+ struct support_subprocess
+ support_subprocess (void (*callback) (void *), void *closure)
+ {
+- struct support_subprocess result = support_suprocess_init ();
++ struct support_subprocess result = support_subprocess_init ();
+
+ result.pid = xfork ();
+ if (result.pid == 0)
+@@ -71,7 +71,7 @@ support_subprocess (void (*callback) (void *), void *closure)
+ struct support_subprocess
+ support_subprogram (const char *file, char *const argv[])
+ {
+- struct support_subprocess result = support_suprocess_init ();
++ struct support_subprocess result = support_subprocess_init ();
+
+ posix_spawn_file_actions_t fa;
+ /* posix_spawn_file_actions_init does not fail. */
+@@ -84,7 +84,7 @@ support_subprogram (const char *file, char *const argv[])
+ xposix_spawn_file_actions_addclose (&fa, result.stdout_pipe[1]);
+ xposix_spawn_file_actions_addclose (&fa, result.stderr_pipe[1]);
+
+- result.pid = xposix_spawn (file, &fa, NULL, argv, NULL);
++ result.pid = xposix_spawn (file, &fa, NULL, argv, environ);
+
+ xclose (result.stdout_pipe[1]);
+ xclose (result.stderr_pipe[1]);
+@@ -92,6 +92,19 @@ support_subprogram (const char *file, char *const argv[])
+ return result;
+ }
+
++int
++support_subprogram_wait (const char *file, char *const argv[])
++{
++ posix_spawn_file_actions_t fa;
++
++ posix_spawn_file_actions_init (&fa);
++ struct support_subprocess res = support_subprocess_init ();
++
++ res.pid = xposix_spawn (file, &fa, NULL, argv, environ);
++
++ return support_process_wait (&res);
++}
++
+ int
+ support_process_wait (struct support_subprocess *proc)
+ {
+diff --git a/support/xpthread_kill.c b/support/xpthread_kill.c
+new file mode 100644
+index 0000000000..111a75d85e
+--- /dev/null
++++ b/support/xpthread_kill.c
+@@ -0,0 +1,26 @@
++/* pthread_kill with error checking.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <signal.h>
++#include <support/xthread.h>
++
++void
++xpthread_kill (pthread_t thr, int signo)
++{
++ xpthread_check_return ("pthread_kill", pthread_kill (thr, signo));
++}
+diff --git a/support/xthread.h b/support/xthread.h
+index 05f8d4a7d9..cb1fc30da0 100644
+--- a/support/xthread.h
++++ b/support/xthread.h
+@@ -75,6 +75,8 @@ void xpthread_attr_setstacksize (pthread_attr_t *attr,
+ void xpthread_attr_setguardsize (pthread_attr_t *attr,
+ size_t guardsize);
+
++void xpthread_kill (pthread_t thr, int signo);
++
+ /* Set the stack size in ATTR to a small value, but still large enough
+ to cover most internal glibc stack usage. */
+ void support_set_small_thread_stack_size (pthread_attr_t *attr);
+diff --git a/sysdeps/aarch64/dl-bti.c b/sysdeps/aarch64/dl-bti.c
+index 196e462520..cf7624aaa2 100644
+--- a/sysdeps/aarch64/dl-bti.c
++++ b/sysdeps/aarch64/dl-bti.c
+@@ -19,43 +19,76 @@
+ #include <errno.h>
+ #include <libintl.h>
+ #include <ldsodefs.h>
++#include <sys/mman.h>
+
+-static int
+-enable_bti (struct link_map *map, const char *program)
++/* See elf/dl-load.h. */
++#ifndef MAP_COPY
++# define MAP_COPY (MAP_PRIVATE | MAP_DENYWRITE)
++#endif
++
++/* Enable BTI protection for MAP. */
++
++void
++_dl_bti_protect (struct link_map *map, int fd)
+ {
++ const size_t pagesz = GLRO(dl_pagesize);
+ const ElfW(Phdr) *phdr;
+- unsigned prot;
+
+ for (phdr = map->l_phdr; phdr < &map->l_phdr[map->l_phnum]; ++phdr)
+ if (phdr->p_type == PT_LOAD && (phdr->p_flags & PF_X))
+ {
+- void *start = (void *) (phdr->p_vaddr + map->l_addr);
+- size_t len = phdr->p_memsz;
++ size_t vstart = ALIGN_DOWN (phdr->p_vaddr, pagesz);
++ size_t vend = ALIGN_UP (phdr->p_vaddr + phdr->p_filesz, pagesz);
++ off_t off = ALIGN_DOWN (phdr->p_offset, pagesz);
++ void *start = (void *) (vstart + map->l_addr);
++ size_t len = vend - vstart;
+
+- prot = PROT_EXEC | PROT_BTI;
++ unsigned prot = PROT_EXEC | PROT_BTI;
+ if (phdr->p_flags & PF_R)
+ prot |= PROT_READ;
+ if (phdr->p_flags & PF_W)
+ prot |= PROT_WRITE;
+
+- if (__mprotect (start, len, prot) < 0)
+- {
+- if (program)
+- _dl_fatal_printf ("%s: mprotect failed to turn on BTI\n",
+- map->l_name);
+- else
+- _dl_signal_error (errno, map->l_name, "dlopen",
+- N_("mprotect failed to turn on BTI"));
+- }
++ if (fd == -1)
++ /* Ignore failures for kernel mapped binaries. */
++ __mprotect (start, len, prot);
++ else
++ map->l_mach.bti_fail = __mmap (start, len, prot,
++ MAP_FIXED|MAP_COPY|MAP_FILE,
++ fd, off) == MAP_FAILED;
+ }
+- return 0;
+ }
+
+-/* Enable BTI for L if required. */
++
++static void
++bti_failed (struct link_map *l, const char *program)
++{
++ if (program)
++ _dl_fatal_printf ("%s: %s: failed to turn on BTI protection\n",
++ program, l->l_name);
++ else
++ /* Note: the errno value is not available any more. */
++ _dl_signal_error (0, l->l_name, "dlopen",
++ N_("failed to turn on BTI protection"));
++}
++
++
++/* Enable BTI for L and its dependencies. */
+
+ void
+ _dl_bti_check (struct link_map *l, const char *program)
+ {
+- if (GLRO(dl_aarch64_cpu_features).bti && l->l_mach.bti)
+- enable_bti (l, program);
++ if (!GLRO(dl_aarch64_cpu_features).bti)
++ return;
++
++ if (l->l_mach.bti_fail)
++ bti_failed (l, program);
++
++ unsigned int i = l->l_searchlist.r_nlist;
++ while (i-- > 0)
++ {
++ struct link_map *dep = l->l_initfini[i];
++ if (dep->l_mach.bti_fail)
++ bti_failed (dep, program);
++ }
+ }
+diff --git a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
+index 70b9ed3925..fde7cfd9e2 100644
+--- a/sysdeps/aarch64/dl-machine.h
++++ b/sysdeps/aarch64/dl-machine.h
+@@ -395,13 +395,6 @@ elf_machine_lazy_rel (struct link_map *map,
+ /* Check for unexpected PLT reloc type. */
+ if (__builtin_expect (r_type == AARCH64_R(JUMP_SLOT), 1))
+ {
+- if (map->l_mach.plt == 0)
+- {
+- /* Prelinking. */
+- *reloc_addr += l_addr;
+- return;
+- }
+-
+ if (__glibc_unlikely (map->l_info[DT_AARCH64 (VARIANT_PCS)] != NULL))
+ {
+ /* Check the symbol table for variant PCS symbols. */
+@@ -425,7 +418,10 @@ elf_machine_lazy_rel (struct link_map *map,
+ }
+ }
+
+- *reloc_addr = map->l_mach.plt;
++ if (map->l_mach.plt == 0)
++ *reloc_addr += l_addr;
++ else
++ *reloc_addr = map->l_mach.plt;
+ }
+ else if (__builtin_expect (r_type == AARCH64_R(TLSDESC), 1))
+ {
+diff --git a/sysdeps/aarch64/dl-prop.h b/sysdeps/aarch64/dl-prop.h
+index b0785bda83..e926e54984 100644
+--- a/sysdeps/aarch64/dl-prop.h
++++ b/sysdeps/aarch64/dl-prop.h
+@@ -19,6 +19,8 @@
+ #ifndef _DL_PROP_H
+ #define _DL_PROP_H
+
++extern void _dl_bti_protect (struct link_map *, int) attribute_hidden;
++
+ extern void _dl_bti_check (struct link_map *, const char *)
+ attribute_hidden;
+
+@@ -35,14 +37,18 @@ _dl_open_check (struct link_map *m)
+ }
+
+ static inline void __attribute__ ((always_inline))
+-_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph)
++_dl_process_pt_note (struct link_map *l, int fd, const ElfW(Phdr) *ph)
+ {
+ }
+
+ static inline int
+-_dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz,
+- void *data)
++_dl_process_gnu_property (struct link_map *l, int fd, uint32_t type,
++ uint32_t datasz, void *data)
+ {
++ if (!GLRO(dl_aarch64_cpu_features).bti)
++ /* Skip note processing. */
++ return 0;
++
+ if (type == GNU_PROPERTY_AARCH64_FEATURE_1_AND)
+ {
+ /* Stop if the property note is ill-formed. */
+@@ -51,7 +57,7 @@ _dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz,
+
+ unsigned int feature_1 = *(unsigned int *) data;
+ if (feature_1 & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)
+- l->l_mach.bti = true;
++ _dl_bti_protect (l, fd);
+
+ /* Stop if we processed the property note. */
+ return 0;
+diff --git a/sysdeps/aarch64/linkmap.h b/sysdeps/aarch64/linkmap.h
+index 847a03ace2..b3f7663b07 100644
+--- a/sysdeps/aarch64/linkmap.h
++++ b/sysdeps/aarch64/linkmap.h
+@@ -22,5 +22,5 @@ struct link_map_machine
+ {
+ ElfW(Addr) plt; /* Address of .plt */
+ void *tlsdesc_table; /* Address of TLS descriptor hash table. */
+- bool bti; /* Branch Target Identification is enabled. */
++ bool bti_fail; /* Failed to enable Branch Target Identification. */
+ };
+diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c
+index 7cf5f033e8..799d60c98c 100644
+--- a/sysdeps/aarch64/multiarch/memcpy.c
++++ b/sysdeps/aarch64/multiarch/memcpy.c
+@@ -41,7 +41,8 @@ libc_ifunc (__libc_memcpy,
+ ? __memcpy_falkor
+ : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)
+ ? __memcpy_thunderx2
+- : (IS_NEOVERSE_N1 (midr)
++ : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr)
++ || IS_NEOVERSE_V1 (midr)
+ ? __memcpy_simd
+ : __memcpy_generic)))));
+
+diff --git a/sysdeps/aarch64/multiarch/memcpy_advsimd.S b/sysdeps/aarch64/multiarch/memcpy_advsimd.S
+index d4ba747777..48bb6d7ca4 100644
+--- a/sysdeps/aarch64/multiarch/memcpy_advsimd.S
++++ b/sysdeps/aarch64/multiarch/memcpy_advsimd.S
+@@ -223,12 +223,13 @@ L(copy_long_backwards):
+ b.ls L(copy64_from_start)
+
+ L(loop64_backwards):
+- stp A_q, B_q, [dstend, -32]
++ str B_q, [dstend, -16]
++ str A_q, [dstend, -32]
+ ldp A_q, B_q, [srcend, -96]
+- stp C_q, D_q, [dstend, -64]
++ str D_q, [dstend, -48]
++ str C_q, [dstend, -64]!
+ ldp C_q, D_q, [srcend, -128]
+ sub srcend, srcend, 64
+- sub dstend, dstend, 64
+ subs count, count, 64
+ b.hi L(loop64_backwards)
+
+diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
+index ad10aa8ac6..46a4cb3a54 100644
+--- a/sysdeps/aarch64/multiarch/memmove.c
++++ b/sysdeps/aarch64/multiarch/memmove.c
+@@ -41,7 +41,8 @@ libc_ifunc (__libc_memmove,
+ ? __memmove_falkor
+ : (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)
+ ? __memmove_thunderx2
+- : (IS_NEOVERSE_N1 (midr)
++ : (IS_NEOVERSE_N1 (midr) || IS_NEOVERSE_N2 (midr)
++ || IS_NEOVERSE_V1 (midr)
+ ? __memmove_simd
+ : __memmove_generic)))));
+
+diff --git a/sysdeps/aarch64/start.S b/sysdeps/aarch64/start.S
+index 75393e1c18..1998ea95d4 100644
+--- a/sysdeps/aarch64/start.S
++++ b/sysdeps/aarch64/start.S
+@@ -43,11 +43,9 @@
+ */
+
+ .text
+- .globl _start
+- .type _start,#function
+-_start:
+- BTI_C
++ENTRY(_start)
+ /* Create an initial frame with 0 LR and FP */
++ cfi_undefined (x30)
+ mov x29, #0
+ mov x30, #0
+
+@@ -101,8 +99,10 @@ _start:
+ because crt1.o and rcrt1.o share code and the later must avoid the
+ use of GOT relocations before __libc_start_main is called. */
+ __wrap_main:
++ BTI_C
+ b main
+ #endif
++END(_start)
+
+ /* Define a symbol for the first piece of initialized data. */
+ .data
+diff --git a/sysdeps/generic/dl-prop.h b/sysdeps/generic/dl-prop.h
+index f1cf576fe3..df27ff8e6a 100644
+--- a/sysdeps/generic/dl-prop.h
++++ b/sysdeps/generic/dl-prop.h
+@@ -37,15 +37,15 @@ _dl_open_check (struct link_map *m)
+ }
+
+ static inline void __attribute__ ((always_inline))
+-_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph)
++_dl_process_pt_note (struct link_map *l, int fd, const ElfW(Phdr) *ph)
+ {
+ }
+
+ /* Called for each property in the NT_GNU_PROPERTY_TYPE_0 note of L,
+ processing of the properties continues until this returns 0. */
+ static inline int __attribute__ ((always_inline))
+-_dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz,
+- void *data)
++_dl_process_gnu_property (struct link_map *l, int fd, uint32_t type,
++ uint32_t datasz, void *data)
+ {
+ return 0;
+ }
+diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
+index ba114ab4b1..62ac40d81b 100644
+--- a/sysdeps/generic/ldsodefs.h
++++ b/sysdeps/generic/ldsodefs.h
+@@ -919,8 +919,9 @@ extern void _dl_rtld_di_serinfo (struct link_map *loader,
+ Dl_serinfo *si, bool counting);
+
+ /* Process PT_GNU_PROPERTY program header PH in module L after
+- PT_LOAD segments are mapped. */
+-void _dl_process_pt_gnu_property (struct link_map *l, const ElfW(Phdr) *ph);
++ PT_LOAD segments are mapped from file FD. */
++void _dl_process_pt_gnu_property (struct link_map *l, int fd,
++ const ElfW(Phdr) *ph);
+
+
+ /* Search loaded objects' symbol tables for a definition of the symbol
+diff --git a/sysdeps/generic/unwind.h b/sysdeps/generic/unwind.h
+index b667a5b652..c229603af3 100644
+--- a/sysdeps/generic/unwind.h
++++ b/sysdeps/generic/unwind.h
+@@ -75,15 +75,21 @@ typedef void (*_Unwind_Exception_Cleanup_Fn) (_Unwind_Reason_Code,
+
+ struct _Unwind_Exception
+ {
+- _Unwind_Exception_Class exception_class;
+- _Unwind_Exception_Cleanup_Fn exception_cleanup;
+- _Unwind_Word private_1;
+- _Unwind_Word private_2;
+-
+- /* @@@ The IA-64 ABI says that this structure must be double-word aligned.
+- Taking that literally does not make much sense generically. Instead we
+- provide the maximum alignment required by any type for the machine. */
+-} __attribute__((__aligned__));
++ union
++ {
++ struct
++ {
++ _Unwind_Exception_Class exception_class;
++ _Unwind_Exception_Cleanup_Fn exception_cleanup;
++ _Unwind_Word private_1;
++ _Unwind_Word private_2;
++ };
++
++ /* The IA-64 ABI says that this structure must be double-word aligned. */
++ _Unwind_Word unwind_exception_align[2]
++ __attribute__ ((__aligned__ (2 * sizeof (_Unwind_Word))));
++ };
++};
+
+
+ /* The ACTIONS argument to the personality routine is a bitwise OR of one
+diff --git a/sysdeps/gnu/errlist.h b/sysdeps/gnu/errlist.h
+index 5d11ed723d..6329e5f393 100644
+--- a/sysdeps/gnu/errlist.h
++++ b/sysdeps/gnu/errlist.h
+@@ -1,24 +1,21 @@
+-#ifndef ERR_MAP
+-#define ERR_MAP(value) value
+-#endif
+-_S(ERR_MAP(0), N_("Success"))
++_S(0, N_("Success"))
+ #ifdef EPERM
+ /*
+ TRANS Only the owner of the file (or other resource)
+ TRANS or processes with special privileges can perform the operation. */
+-_S(ERR_MAP(EPERM), N_("Operation not permitted"))
++_S(EPERM, N_("Operation not permitted"))
+ #endif
+ #ifdef ENOENT
+ /*
+ TRANS This is a ``file doesn't exist'' error
+ TRANS for ordinary files that are referenced in contexts where they are
+ TRANS expected to already exist. */
+-_S(ERR_MAP(ENOENT), N_("No such file or directory"))
++_S(ENOENT, N_("No such file or directory"))
+ #endif
+ #ifdef ESRCH
+ /*
+ TRANS No process matches the specified process ID. */
+-_S(ERR_MAP(ESRCH), N_("No such process"))
++_S(ESRCH, N_("No such process"))
+ #endif
+ #ifdef EINTR
+ /*
+@@ -29,12 +26,12 @@ TRANS
+ TRANS You can choose to have functions resume after a signal that is handled,
+ TRANS rather than failing with @code{EINTR}; see @ref{Interrupted
+ TRANS Primitives}. */
+-_S(ERR_MAP(EINTR), N_("Interrupted system call"))
++_S(EINTR, N_("Interrupted system call"))
+ #endif
+ #ifdef EIO
+ /*
+ TRANS Usually used for physical read or write errors. */
+-_S(ERR_MAP(EIO), N_("Input/output error"))
++_S(EIO, N_("Input/output error"))
+ #endif
+ #ifdef ENXIO
+ /*
+@@ -43,7 +40,7 @@ TRANS represented by a file you specified, and it couldn't find the device.
+ TRANS This can mean that the device file was installed incorrectly, or that
+ TRANS the physical device is missing or not correctly attached to the
+ TRANS computer. */
+-_S(ERR_MAP(ENXIO), N_("No such device or address"))
++_S(ENXIO, N_("No such device or address"))
+ #endif
+ #ifdef E2BIG
+ /*
+@@ -51,27 +48,27 @@ TRANS Used when the arguments passed to a new program
+ TRANS being executed with one of the @code{exec} functions (@pxref{Executing a
+ TRANS File}) occupy too much memory space. This condition never arises on
+ TRANS @gnuhurdsystems{}. */
+-_S(ERR_MAP(E2BIG), N_("Argument list too long"))
++_S(E2BIG, N_("Argument list too long"))
+ #endif
+ #ifdef ENOEXEC
+ /*
+ TRANS Invalid executable file format. This condition is detected by the
+ TRANS @code{exec} functions; see @ref{Executing a File}. */
+-_S(ERR_MAP(ENOEXEC), N_("Exec format error"))
++_S(ENOEXEC, N_("Exec format error"))
+ #endif
+ #ifdef EBADF
+ /*
+ TRANS For example, I/O on a descriptor that has been
+ TRANS closed or reading from a descriptor open only for writing (or vice
+ TRANS versa). */
+-_S(ERR_MAP(EBADF), N_("Bad file descriptor"))
++_S(EBADF, N_("Bad file descriptor"))
+ #endif
+ #ifdef ECHILD
+ /*
+ TRANS This error happens on operations that are
+ TRANS supposed to manipulate child processes, when there aren't any processes
+ TRANS to manipulate. */
+-_S(ERR_MAP(ECHILD), N_("No child processes"))
++_S(ECHILD, N_("No child processes"))
+ #endif
+ #ifdef EDEADLK
+ /*
+@@ -79,74 +76,74 @@ TRANS Allocating a system resource would have resulted in a
+ TRANS deadlock situation. The system does not guarantee that it will notice
+ TRANS all such situations. This error means you got lucky and the system
+ TRANS noticed; it might just hang. @xref{File Locks}, for an example. */
+-_S(ERR_MAP(EDEADLK), N_("Resource deadlock avoided"))
++_S(EDEADLK, N_("Resource deadlock avoided"))
+ #endif
+ #ifdef ENOMEM
+ /*
+ TRANS The system cannot allocate more virtual memory
+ TRANS because its capacity is full. */
+-_S(ERR_MAP(ENOMEM), N_("Cannot allocate memory"))
++_S(ENOMEM, N_("Cannot allocate memory"))
+ #endif
+ #ifdef EACCES
+ /*
+ TRANS The file permissions do not allow the attempted operation. */
+-_S(ERR_MAP(EACCES), N_("Permission denied"))
++_S(EACCES, N_("Permission denied"))
+ #endif
+ #ifdef EFAULT
+ /*
+ TRANS An invalid pointer was detected.
+ TRANS On @gnuhurdsystems{}, this error never happens; you get a signal instead. */
+-_S(ERR_MAP(EFAULT), N_("Bad address"))
++_S(EFAULT, N_("Bad address"))
+ #endif
+ #ifdef ENOTBLK
+ /*
+ TRANS A file that isn't a block special file was given in a situation that
+ TRANS requires one. For example, trying to mount an ordinary file as a file
+ TRANS system in Unix gives this error. */
+-_S(ERR_MAP(ENOTBLK), N_("Block device required"))
++_S(ENOTBLK, N_("Block device required"))
+ #endif
+ #ifdef EBUSY
+ /*
+ TRANS A system resource that can't be shared is already in use.
+ TRANS For example, if you try to delete a file that is the root of a currently
+ TRANS mounted filesystem, you get this error. */
+-_S(ERR_MAP(EBUSY), N_("Device or resource busy"))
++_S(EBUSY, N_("Device or resource busy"))
+ #endif
+ #ifdef EEXIST
+ /*
+ TRANS An existing file was specified in a context where it only
+ TRANS makes sense to specify a new file. */
+-_S(ERR_MAP(EEXIST), N_("File exists"))
++_S(EEXIST, N_("File exists"))
+ #endif
+ #ifdef EXDEV
+ /*
+ TRANS An attempt to make an improper link across file systems was detected.
+ TRANS This happens not only when you use @code{link} (@pxref{Hard Links}) but
+ TRANS also when you rename a file with @code{rename} (@pxref{Renaming Files}). */
+-_S(ERR_MAP(EXDEV), N_("Invalid cross-device link"))
++_S(EXDEV, N_("Invalid cross-device link"))
+ #endif
+ #ifdef ENODEV
+ /*
+ TRANS The wrong type of device was given to a function that expects a
+ TRANS particular sort of device. */
+-_S(ERR_MAP(ENODEV), N_("No such device"))
++_S(ENODEV, N_("No such device"))
+ #endif
+ #ifdef ENOTDIR
+ /*
+ TRANS A file that isn't a directory was specified when a directory is required. */
+-_S(ERR_MAP(ENOTDIR), N_("Not a directory"))
++_S(ENOTDIR, N_("Not a directory"))
+ #endif
+ #ifdef EISDIR
+ /*
+ TRANS You cannot open a directory for writing,
+ TRANS or create or remove hard links to it. */
+-_S(ERR_MAP(EISDIR), N_("Is a directory"))
++_S(EISDIR, N_("Is a directory"))
+ #endif
+ #ifdef EINVAL
+ /*
+ TRANS This is used to indicate various kinds of problems
+ TRANS with passing the wrong argument to a library function. */
+-_S(ERR_MAP(EINVAL), N_("Invalid argument"))
++_S(EINVAL, N_("Invalid argument"))
+ #endif
+ #ifdef EMFILE
+ /*
+@@ -157,20 +154,20 @@ TRANS In BSD and GNU, the number of open files is controlled by a resource
+ TRANS limit that can usually be increased. If you get this error, you might
+ TRANS want to increase the @code{RLIMIT_NOFILE} limit or make it unlimited;
+ TRANS @pxref{Limits on Resources}. */
+-_S(ERR_MAP(EMFILE), N_("Too many open files"))
++_S(EMFILE, N_("Too many open files"))
+ #endif
+ #ifdef ENFILE
+ /*
+ TRANS There are too many distinct file openings in the entire system. Note
+ TRANS that any number of linked channels count as just one file opening; see
+ TRANS @ref{Linked Channels}. This error never occurs on @gnuhurdsystems{}. */
+-_S(ERR_MAP(ENFILE), N_("Too many open files in system"))
++_S(ENFILE, N_("Too many open files in system"))
+ #endif
+ #ifdef ENOTTY
+ /*
+ TRANS Inappropriate I/O control operation, such as trying to set terminal
+ TRANS modes on an ordinary file. */
+-_S(ERR_MAP(ENOTTY), N_("Inappropriate ioctl for device"))
++_S(ENOTTY, N_("Inappropriate ioctl for device"))
+ #endif
+ #ifdef ETXTBSY
+ /*
+@@ -179,35 +176,35 @@ TRANS write to a file that is currently being executed. Often using a
+ TRANS debugger to run a program is considered having it open for writing and
+ TRANS will cause this error. (The name stands for ``text file busy''.) This
+ TRANS is not an error on @gnuhurdsystems{}; the text is copied as necessary. */
+-_S(ERR_MAP(ETXTBSY), N_("Text file busy"))
++_S(ETXTBSY, N_("Text file busy"))
+ #endif
+ #ifdef EFBIG
+ /*
+ TRANS The size of a file would be larger than allowed by the system. */
+-_S(ERR_MAP(EFBIG), N_("File too large"))
++_S(EFBIG, N_("File too large"))
+ #endif
+ #ifdef ENOSPC
+ /*
+ TRANS Write operation on a file failed because the
+ TRANS disk is full. */
+-_S(ERR_MAP(ENOSPC), N_("No space left on device"))
++_S(ENOSPC, N_("No space left on device"))
+ #endif
+ #ifdef ESPIPE
+ /*
+ TRANS Invalid seek operation (such as on a pipe). */
+-_S(ERR_MAP(ESPIPE), N_("Illegal seek"))
++_S(ESPIPE, N_("Illegal seek"))
+ #endif
+ #ifdef EROFS
+ /*
+ TRANS An attempt was made to modify something on a read-only file system. */
+-_S(ERR_MAP(EROFS), N_("Read-only file system"))
++_S(EROFS, N_("Read-only file system"))
+ #endif
+ #ifdef EMLINK
+ /*
+ TRANS The link count of a single file would become too large.
+ TRANS @code{rename} can cause this error if the file being renamed already has
+ TRANS as many links as it can take (@pxref{Renaming Files}). */
+-_S(ERR_MAP(EMLINK), N_("Too many links"))
++_S(EMLINK, N_("Too many links"))
+ #endif
+ #ifdef EPIPE
+ /*
+@@ -216,19 +213,19 @@ TRANS Every library function that returns this error code also generates a
+ TRANS @code{SIGPIPE} signal; this signal terminates the program if not handled
+ TRANS or blocked. Thus, your program will never actually see @code{EPIPE}
+ TRANS unless it has handled or blocked @code{SIGPIPE}. */
+-_S(ERR_MAP(EPIPE), N_("Broken pipe"))
++_S(EPIPE, N_("Broken pipe"))
+ #endif
+ #ifdef EDOM
+ /*
+ TRANS Used by mathematical functions when an argument value does
+ TRANS not fall into the domain over which the function is defined. */
+-_S(ERR_MAP(EDOM), N_("Numerical argument out of domain"))
++_S(EDOM, N_("Numerical argument out of domain"))
+ #endif
+ #ifdef ERANGE
+ /*
+ TRANS Used by mathematical functions when the result value is
+ TRANS not representable because of overflow or underflow. */
+-_S(ERR_MAP(ERANGE), N_("Numerical result out of range"))
++_S(ERANGE, N_("Numerical result out of range"))
+ #endif
+ #ifdef EAGAIN
+ /*
+@@ -261,7 +258,7 @@ TRANS Such shortages are usually fairly serious and affect the whole system,
+ TRANS so usually an interactive program should report the error to the user
+ TRANS and return to its command loop.
+ TRANS @end itemize */
+-_S(ERR_MAP(EAGAIN), N_("Resource temporarily unavailable"))
++_S(EAGAIN, N_("Resource temporarily unavailable"))
+ #endif
+ #ifdef EINPROGRESS
+ /*
+@@ -273,47 +270,47 @@ TRANS the operation has begun and will take some time. Attempts to manipulate
+ TRANS the object before the call completes return @code{EALREADY}. You can
+ TRANS use the @code{select} function to find out when the pending operation
+ TRANS has completed; @pxref{Waiting for I/O}. */
+-_S(ERR_MAP(EINPROGRESS), N_("Operation now in progress"))
++_S(EINPROGRESS, N_("Operation now in progress"))
+ #endif
+ #ifdef EALREADY
+ /*
+ TRANS An operation is already in progress on an object that has non-blocking
+ TRANS mode selected. */
+-_S(ERR_MAP(EALREADY), N_("Operation already in progress"))
++_S(EALREADY, N_("Operation already in progress"))
+ #endif
+ #ifdef ENOTSOCK
+ /*
+ TRANS A file that isn't a socket was specified when a socket is required. */
+-_S(ERR_MAP(ENOTSOCK), N_("Socket operation on non-socket"))
++_S(ENOTSOCK, N_("Socket operation on non-socket"))
+ #endif
+ #ifdef EMSGSIZE
+ /*
+ TRANS The size of a message sent on a socket was larger than the supported
+ TRANS maximum size. */
+-_S(ERR_MAP(EMSGSIZE), N_("Message too long"))
++_S(EMSGSIZE, N_("Message too long"))
+ #endif
+ #ifdef EPROTOTYPE
+ /*
+ TRANS The socket type does not support the requested communications protocol. */
+-_S(ERR_MAP(EPROTOTYPE), N_("Protocol wrong type for socket"))
++_S(EPROTOTYPE, N_("Protocol wrong type for socket"))
+ #endif
+ #ifdef ENOPROTOOPT
+ /*
+ TRANS You specified a socket option that doesn't make sense for the
+ TRANS particular protocol being used by the socket. @xref{Socket Options}. */
+-_S(ERR_MAP(ENOPROTOOPT), N_("Protocol not available"))
++_S(ENOPROTOOPT, N_("Protocol not available"))
+ #endif
+ #ifdef EPROTONOSUPPORT
+ /*
+ TRANS The socket domain does not support the requested communications protocol
+ TRANS (perhaps because the requested protocol is completely invalid).
+ TRANS @xref{Creating a Socket}. */
+-_S(ERR_MAP(EPROTONOSUPPORT), N_("Protocol not supported"))
++_S(EPROTONOSUPPORT, N_("Protocol not supported"))
+ #endif
+ #ifdef ESOCKTNOSUPPORT
+ /*
+ TRANS The socket type is not supported. */
+-_S(ERR_MAP(ESOCKTNOSUPPORT), N_("Socket type not supported"))
++_S(ESOCKTNOSUPPORT, N_("Socket type not supported"))
+ #endif
+ #ifdef EOPNOTSUPP
+ /*
+@@ -323,71 +320,71 @@ TRANS implemented for all communications protocols. On @gnuhurdsystems{}, this
+ TRANS error can happen for many calls when the object does not support the
+ TRANS particular operation; it is a generic indication that the server knows
+ TRANS nothing to do for that call. */
+-_S(ERR_MAP(EOPNOTSUPP), N_("Operation not supported"))
++_S(EOPNOTSUPP, N_("Operation not supported"))
+ #endif
+ #ifdef EPFNOSUPPORT
+ /*
+ TRANS The socket communications protocol family you requested is not supported. */
+-_S(ERR_MAP(EPFNOSUPPORT), N_("Protocol family not supported"))
++_S(EPFNOSUPPORT, N_("Protocol family not supported"))
+ #endif
+ #ifdef EAFNOSUPPORT
+ /*
+ TRANS The address family specified for a socket is not supported; it is
+ TRANS inconsistent with the protocol being used on the socket. @xref{Sockets}. */
+-_S(ERR_MAP(EAFNOSUPPORT), N_("Address family not supported by protocol"))
++_S(EAFNOSUPPORT, N_("Address family not supported by protocol"))
+ #endif
+ #ifdef EADDRINUSE
+ /*
+ TRANS The requested socket address is already in use. @xref{Socket Addresses}. */
+-_S(ERR_MAP(EADDRINUSE), N_("Address already in use"))
++_S(EADDRINUSE, N_("Address already in use"))
+ #endif
+ #ifdef EADDRNOTAVAIL
+ /*
+ TRANS The requested socket address is not available; for example, you tried
+ TRANS to give a socket a name that doesn't match the local host name.
+ TRANS @xref{Socket Addresses}. */
+-_S(ERR_MAP(EADDRNOTAVAIL), N_("Cannot assign requested address"))
++_S(EADDRNOTAVAIL, N_("Cannot assign requested address"))
+ #endif
+ #ifdef ENETDOWN
+ /*
+ TRANS A socket operation failed because the network was down. */
+-_S(ERR_MAP(ENETDOWN), N_("Network is down"))
++_S(ENETDOWN, N_("Network is down"))
+ #endif
+ #ifdef ENETUNREACH
+ /*
+ TRANS A socket operation failed because the subnet containing the remote host
+ TRANS was unreachable. */
+-_S(ERR_MAP(ENETUNREACH), N_("Network is unreachable"))
++_S(ENETUNREACH, N_("Network is unreachable"))
+ #endif
+ #ifdef ENETRESET
+ /*
+ TRANS A network connection was reset because the remote host crashed. */
+-_S(ERR_MAP(ENETRESET), N_("Network dropped connection on reset"))
++_S(ENETRESET, N_("Network dropped connection on reset"))
+ #endif
+ #ifdef ECONNABORTED
+ /*
+ TRANS A network connection was aborted locally. */
+-_S(ERR_MAP(ECONNABORTED), N_("Software caused connection abort"))
++_S(ECONNABORTED, N_("Software caused connection abort"))
+ #endif
+ #ifdef ECONNRESET
+ /*
+ TRANS A network connection was closed for reasons outside the control of the
+ TRANS local host, such as by the remote machine rebooting or an unrecoverable
+ TRANS protocol violation. */
+-_S(ERR_MAP(ECONNRESET), N_("Connection reset by peer"))
++_S(ECONNRESET, N_("Connection reset by peer"))
+ #endif
+ #ifdef ENOBUFS
+ /*
+ TRANS The kernel's buffers for I/O operations are all in use. In GNU, this
+ TRANS error is always synonymous with @code{ENOMEM}; you may get one or the
+ TRANS other from network operations. */
+-_S(ERR_MAP(ENOBUFS), N_("No buffer space available"))
++_S(ENOBUFS, N_("No buffer space available"))
+ #endif
+ #ifdef EISCONN
+ /*
+ TRANS You tried to connect a socket that is already connected.
+ TRANS @xref{Connecting}. */
+-_S(ERR_MAP(EISCONN), N_("Transport endpoint is already connected"))
++_S(EISCONN, N_("Transport endpoint is already connected"))
+ #endif
+ #ifdef ENOTCONN
+ /*
+@@ -395,74 +392,74 @@ TRANS The socket is not connected to anything. You get this error when you
+ TRANS try to transmit data over a socket, without first specifying a
+ TRANS destination for the data. For a connectionless socket (for datagram
+ TRANS protocols, such as UDP), you get @code{EDESTADDRREQ} instead. */
+-_S(ERR_MAP(ENOTCONN), N_("Transport endpoint is not connected"))
++_S(ENOTCONN, N_("Transport endpoint is not connected"))
+ #endif
+ #ifdef EDESTADDRREQ
+ /*
+ TRANS No default destination address was set for the socket. You get this
+ TRANS error when you try to transmit data over a connectionless socket,
+ TRANS without first specifying a destination for the data with @code{connect}. */
+-_S(ERR_MAP(EDESTADDRREQ), N_("Destination address required"))
++_S(EDESTADDRREQ, N_("Destination address required"))
+ #endif
+ #ifdef ESHUTDOWN
+ /*
+ TRANS The socket has already been shut down. */
+-_S(ERR_MAP(ESHUTDOWN), N_("Cannot send after transport endpoint shutdown"))
++_S(ESHUTDOWN, N_("Cannot send after transport endpoint shutdown"))
+ #endif
+ #ifdef ETOOMANYREFS
+-_S(ERR_MAP(ETOOMANYREFS), N_("Too many references: cannot splice"))
++_S(ETOOMANYREFS, N_("Too many references: cannot splice"))
+ #endif
+ #ifdef ETIMEDOUT
+ /*
+ TRANS A socket operation with a specified timeout received no response during
+ TRANS the timeout period. */
+-_S(ERR_MAP(ETIMEDOUT), N_("Connection timed out"))
++_S(ETIMEDOUT, N_("Connection timed out"))
+ #endif
+ #ifdef ECONNREFUSED
+ /*
+ TRANS A remote host refused to allow the network connection (typically because
+ TRANS it is not running the requested service). */
+-_S(ERR_MAP(ECONNREFUSED), N_("Connection refused"))
++_S(ECONNREFUSED, N_("Connection refused"))
+ #endif
+ #ifdef ELOOP
+ /*
+ TRANS Too many levels of symbolic links were encountered in looking up a file name.
+ TRANS This often indicates a cycle of symbolic links. */
+-_S(ERR_MAP(ELOOP), N_("Too many levels of symbolic links"))
++_S(ELOOP, N_("Too many levels of symbolic links"))
+ #endif
+ #ifdef ENAMETOOLONG
+ /*
+ TRANS Filename too long (longer than @code{PATH_MAX}; @pxref{Limits for
+ TRANS Files}) or host name too long (in @code{gethostname} or
+ TRANS @code{sethostname}; @pxref{Host Identification}). */
+-_S(ERR_MAP(ENAMETOOLONG), N_("File name too long"))
++_S(ENAMETOOLONG, N_("File name too long"))
+ #endif
+ #ifdef EHOSTDOWN
+ /*
+ TRANS The remote host for a requested network connection is down. */
+-_S(ERR_MAP(EHOSTDOWN), N_("Host is down"))
++_S(EHOSTDOWN, N_("Host is down"))
+ #endif
+ /*
+ TRANS The remote host for a requested network connection is not reachable. */
+ #ifdef EHOSTUNREACH
+-_S(ERR_MAP(EHOSTUNREACH), N_("No route to host"))
++_S(EHOSTUNREACH, N_("No route to host"))
+ #endif
+ #ifdef ENOTEMPTY
+ /*
+ TRANS Directory not empty, where an empty directory was expected. Typically,
+ TRANS this error occurs when you are trying to delete a directory. */
+-_S(ERR_MAP(ENOTEMPTY), N_("Directory not empty"))
++_S(ENOTEMPTY, N_("Directory not empty"))
+ #endif
+ #ifdef EUSERS
+ /*
+ TRANS The file quota system is confused because there are too many users.
+ TRANS @c This can probably happen in a GNU system when using NFS. */
+-_S(ERR_MAP(EUSERS), N_("Too many users"))
++_S(EUSERS, N_("Too many users"))
+ #endif
+ #ifdef EDQUOT
+ /*
+ TRANS The user's disk quota was exceeded. */
+-_S(ERR_MAP(EDQUOT), N_("Disk quota exceeded"))
++_S(EDQUOT, N_("Disk quota exceeded"))
+ #endif
+ #ifdef ESTALE
+ /*
+@@ -471,7 +468,7 @@ TRANS file system which is due to file system rearrangements on the server host
+ TRANS for NFS file systems or corruption in other file systems.
+ TRANS Repairing this condition usually requires unmounting, possibly repairing
+ TRANS and remounting the file system. */
+-_S(ERR_MAP(ESTALE), N_("Stale file handle"))
++_S(ESTALE, N_("Stale file handle"))
+ #endif
+ #ifdef EREMOTE
+ /*
+@@ -479,7 +476,7 @@ TRANS An attempt was made to NFS-mount a remote file system with a file name tha
+ TRANS already specifies an NFS-mounted file.
+ TRANS (This is an error on some operating systems, but we expect it to work
+ TRANS properly on @gnuhurdsystems{}, making this error code impossible.) */
+-_S(ERR_MAP(EREMOTE), N_("Object is remote"))
++_S(EREMOTE, N_("Object is remote"))
+ #endif
+ #ifdef ENOLCK
+ /*
+@@ -487,7 +484,7 @@ TRANS This is used by the file locking facilities; see
+ TRANS @ref{File Locks}. This error is never generated by @gnuhurdsystems{}, but
+ TRANS it can result from an operation to an NFS server running another
+ TRANS operating system. */
+-_S(ERR_MAP(ENOLCK), N_("No locks available"))
++_S(ENOLCK, N_("No locks available"))
+ #endif
+ #ifdef ENOSYS
+ /*
+@@ -496,46 +493,46 @@ TRANS not implemented at all, either in the C library itself or in the
+ TRANS operating system. When you get this error, you can be sure that this
+ TRANS particular function will always fail with @code{ENOSYS} unless you
+ TRANS install a new version of the C library or the operating system. */
+-_S(ERR_MAP(ENOSYS), N_("Function not implemented"))
++_S(ENOSYS, N_("Function not implemented"))
+ #endif
+ #ifdef EILSEQ
+ /*
+ TRANS While decoding a multibyte character the function came along an invalid
+ TRANS or an incomplete sequence of bytes or the given wide character is invalid. */
+-_S(ERR_MAP(EILSEQ), N_("Invalid or incomplete multibyte or wide character"))
++_S(EILSEQ, N_("Invalid or incomplete multibyte or wide character"))
+ #endif
+ #ifdef EBADMSG
+-_S(ERR_MAP(EBADMSG), N_("Bad message"))
++_S(EBADMSG, N_("Bad message"))
+ #endif
+ #ifdef EIDRM
+-_S(ERR_MAP(EIDRM), N_("Identifier removed"))
++_S(EIDRM, N_("Identifier removed"))
+ #endif
+ #ifdef EMULTIHOP
+-_S(ERR_MAP(EMULTIHOP), N_("Multihop attempted"))
++_S(EMULTIHOP, N_("Multihop attempted"))
+ #endif
+ #ifdef ENODATA
+-_S(ERR_MAP(ENODATA), N_("No data available"))
++_S(ENODATA, N_("No data available"))
+ #endif
+ #ifdef ENOLINK
+-_S(ERR_MAP(ENOLINK), N_("Link has been severed"))
++_S(ENOLINK, N_("Link has been severed"))
+ #endif
+ #ifdef ENOMSG
+-_S(ERR_MAP(ENOMSG), N_("No message of desired type"))
++_S(ENOMSG, N_("No message of desired type"))
+ #endif
+ #ifdef ENOSR
+-_S(ERR_MAP(ENOSR), N_("Out of streams resources"))
++_S(ENOSR, N_("Out of streams resources"))
+ #endif
+ #ifdef ENOSTR
+-_S(ERR_MAP(ENOSTR), N_("Device not a stream"))
++_S(ENOSTR, N_("Device not a stream"))
+ #endif
+ #ifdef EOVERFLOW
+-_S(ERR_MAP(EOVERFLOW), N_("Value too large for defined data type"))
++_S(EOVERFLOW, N_("Value too large for defined data type"))
+ #endif
+ #ifdef EPROTO
+-_S(ERR_MAP(EPROTO), N_("Protocol error"))
++_S(EPROTO, N_("Protocol error"))
+ #endif
+ #ifdef ETIME
+-_S(ERR_MAP(ETIME), N_("Timer expired"))
++_S(ETIME, N_("Timer expired"))
+ #endif
+ #ifdef ECANCELED
+ /*
+@@ -543,148 +540,148 @@ TRANS An asynchronous operation was canceled before it
+ TRANS completed. @xref{Asynchronous I/O}. When you call @code{aio_cancel},
+ TRANS the normal result is for the operations affected to complete with this
+ TRANS error; @pxref{Cancel AIO Operations}. */
+-_S(ERR_MAP(ECANCELED), N_("Operation canceled"))
++_S(ECANCELED, N_("Operation canceled"))
+ #endif
+ #ifdef EOWNERDEAD
+-_S(ERR_MAP(EOWNERDEAD), N_("Owner died"))
++_S(EOWNERDEAD, N_("Owner died"))
+ #endif
+ #ifdef ENOTRECOVERABLE
+-_S(ERR_MAP(ENOTRECOVERABLE), N_("State not recoverable"))
++_S(ENOTRECOVERABLE, N_("State not recoverable"))
+ #endif
+ #ifdef ERESTART
+-_S(ERR_MAP(ERESTART), N_("Interrupted system call should be restarted"))
++_S(ERESTART, N_("Interrupted system call should be restarted"))
+ #endif
+ #ifdef ECHRNG
+-_S(ERR_MAP(ECHRNG), N_("Channel number out of range"))
++_S(ECHRNG, N_("Channel number out of range"))
+ #endif
+ #ifdef EL2NSYNC
+-_S(ERR_MAP(EL2NSYNC), N_("Level 2 not synchronized"))
++_S(EL2NSYNC, N_("Level 2 not synchronized"))
+ #endif
+ #ifdef EL3HLT
+-_S(ERR_MAP(EL3HLT), N_("Level 3 halted"))
++_S(EL3HLT, N_("Level 3 halted"))
+ #endif
+ #ifdef EL3RST
+-_S(ERR_MAP(EL3RST), N_("Level 3 reset"))
++_S(EL3RST, N_("Level 3 reset"))
+ #endif
+ #ifdef ELNRNG
+-_S(ERR_MAP(ELNRNG), N_("Link number out of range"))
++_S(ELNRNG, N_("Link number out of range"))
+ #endif
+ #ifdef EUNATCH
+-_S(ERR_MAP(EUNATCH), N_("Protocol driver not attached"))
++_S(EUNATCH, N_("Protocol driver not attached"))
+ #endif
+ #ifdef ENOCSI
+-_S(ERR_MAP(ENOCSI), N_("No CSI structure available"))
++_S(ENOCSI, N_("No CSI structure available"))
+ #endif
+ #ifdef EL2HLT
+-_S(ERR_MAP(EL2HLT), N_("Level 2 halted"))
++_S(EL2HLT, N_("Level 2 halted"))
+ #endif
+ #ifdef EBADE
+-_S(ERR_MAP(EBADE), N_("Invalid exchange"))
++_S(EBADE, N_("Invalid exchange"))
+ #endif
+ #ifdef EBADR
+-_S(ERR_MAP(EBADR), N_("Invalid request descriptor"))
++_S(EBADR, N_("Invalid request descriptor"))
+ #endif
+ #ifdef EXFULL
+-_S(ERR_MAP(EXFULL), N_("Exchange full"))
++_S(EXFULL, N_("Exchange full"))
+ #endif
+ #ifdef ENOANO
+-_S(ERR_MAP(ENOANO), N_("No anode"))
++_S(ENOANO, N_("No anode"))
+ #endif
+ #ifdef EBADRQC
+-_S(ERR_MAP(EBADRQC), N_("Invalid request code"))
++_S(EBADRQC, N_("Invalid request code"))
+ #endif
+ #ifdef EBADSLT
+-_S(ERR_MAP(EBADSLT), N_("Invalid slot"))
++_S(EBADSLT, N_("Invalid slot"))
+ #endif
+ #ifdef EBFONT
+-_S(ERR_MAP(EBFONT), N_("Bad font file format"))
++_S(EBFONT, N_("Bad font file format"))
+ #endif
+ #ifdef ENONET
+-_S(ERR_MAP(ENONET), N_("Machine is not on the network"))
++_S(ENONET, N_("Machine is not on the network"))
+ #endif
+ #ifdef ENOPKG
+-_S(ERR_MAP(ENOPKG), N_("Package not installed"))
++_S(ENOPKG, N_("Package not installed"))
+ #endif
+ #ifdef EADV
+-_S(ERR_MAP(EADV), N_("Advertise error"))
++_S(EADV, N_("Advertise error"))
+ #endif
+ #ifdef ESRMNT
+-_S(ERR_MAP(ESRMNT), N_("Srmount error"))
++_S(ESRMNT, N_("Srmount error"))
+ #endif
+ #ifdef ECOMM
+-_S(ERR_MAP(ECOMM), N_("Communication error on send"))
++_S(ECOMM, N_("Communication error on send"))
+ #endif
+ #ifdef EDOTDOT
+-_S(ERR_MAP(EDOTDOT), N_("RFS specific error"))
++_S(EDOTDOT, N_("RFS specific error"))
+ #endif
+ #ifdef ENOTUNIQ
+-_S(ERR_MAP(ENOTUNIQ), N_("Name not unique on network"))
++_S(ENOTUNIQ, N_("Name not unique on network"))
+ #endif
+ #ifdef EBADFD
+-_S(ERR_MAP(EBADFD), N_("File descriptor in bad state"))
++_S(EBADFD, N_("File descriptor in bad state"))
+ #endif
+ #ifdef EREMCHG
+-_S(ERR_MAP(EREMCHG), N_("Remote address changed"))
++_S(EREMCHG, N_("Remote address changed"))
+ #endif
+ #ifdef ELIBACC
+-_S(ERR_MAP(ELIBACC), N_("Can not access a needed shared library"))
++_S(ELIBACC, N_("Can not access a needed shared library"))
+ #endif
+ #ifdef ELIBBAD
+-_S(ERR_MAP(ELIBBAD), N_("Accessing a corrupted shared library"))
++_S(ELIBBAD, N_("Accessing a corrupted shared library"))
+ #endif
+ #ifdef ELIBSCN
+-_S(ERR_MAP(ELIBSCN), N_(".lib section in a.out corrupted"))
++_S(ELIBSCN, N_(".lib section in a.out corrupted"))
+ #endif
+ #ifdef ELIBMAX
+-_S(ERR_MAP(ELIBMAX), N_("Attempting to link in too many shared libraries"))
++_S(ELIBMAX, N_("Attempting to link in too many shared libraries"))
+ #endif
+ #ifdef ELIBEXEC
+-_S(ERR_MAP(ELIBEXEC), N_("Cannot exec a shared library directly"))
++_S(ELIBEXEC, N_("Cannot exec a shared library directly"))
+ #endif
+ #ifdef ESTRPIPE
+-_S(ERR_MAP(ESTRPIPE), N_("Streams pipe error"))
++_S(ESTRPIPE, N_("Streams pipe error"))
+ #endif
+ #ifdef EUCLEAN
+-_S(ERR_MAP(EUCLEAN), N_("Structure needs cleaning"))
++_S(EUCLEAN, N_("Structure needs cleaning"))
+ #endif
+ #ifdef ENOTNAM
+-_S(ERR_MAP(ENOTNAM), N_("Not a XENIX named type file"))
++_S(ENOTNAM, N_("Not a XENIX named type file"))
+ #endif
+ #ifdef ENAVAIL
+-_S(ERR_MAP(ENAVAIL), N_("No XENIX semaphores available"))
++_S(ENAVAIL, N_("No XENIX semaphores available"))
+ #endif
+ #ifdef EISNAM
+-_S(ERR_MAP(EISNAM), N_("Is a named type file"))
++_S(EISNAM, N_("Is a named type file"))
+ #endif
+ #ifdef EREMOTEIO
+-_S(ERR_MAP(EREMOTEIO), N_("Remote I/O error"))
++_S(EREMOTEIO, N_("Remote I/O error"))
+ #endif
+ #ifdef ENOMEDIUM
+-_S(ERR_MAP(ENOMEDIUM), N_("No medium found"))
++_S(ENOMEDIUM, N_("No medium found"))
+ #endif
+ #ifdef EMEDIUMTYPE
+-_S(ERR_MAP(EMEDIUMTYPE), N_("Wrong medium type"))
++_S(EMEDIUMTYPE, N_("Wrong medium type"))
+ #endif
+ #ifdef ENOKEY
+-_S(ERR_MAP(ENOKEY), N_("Required key not available"))
++_S(ENOKEY, N_("Required key not available"))
+ #endif
+ #ifdef EKEYEXPIRED
+-_S(ERR_MAP(EKEYEXPIRED), N_("Key has expired"))
++_S(EKEYEXPIRED, N_("Key has expired"))
+ #endif
+ #ifdef EKEYREVOKED
+-_S(ERR_MAP(EKEYREVOKED), N_("Key has been revoked"))
++_S(EKEYREVOKED, N_("Key has been revoked"))
+ #endif
+ #ifdef EKEYREJECTED
+-_S(ERR_MAP(EKEYREJECTED), N_("Key was rejected by service"))
++_S(EKEYREJECTED, N_("Key was rejected by service"))
+ #endif
+ #ifdef ERFKILL
+-_S(ERR_MAP(ERFKILL), N_("Operation not possible due to RF-kill"))
++_S(ERFKILL, N_("Operation not possible due to RF-kill"))
+ #endif
+ #ifdef EHWPOISON
+-_S(ERR_MAP(EHWPOISON), N_("Memory page has hardware error"))
++_S(EHWPOISON, N_("Memory page has hardware error"))
+ #endif
+ #ifdef EBADRPC
+-_S(ERR_MAP(EBADRPC), N_("RPC struct is bad"))
++_S(EBADRPC, N_("RPC struct is bad"))
+ #endif
+ #ifdef EFTYPE
+ /*
+@@ -693,40 +690,40 @@ TRANS operation, or a data file had the wrong format.
+ TRANS
+ TRANS On some systems @code{chmod} returns this error if you try to set the
+ TRANS sticky bit on a non-directory file; @pxref{Setting Permissions}. */
+-_S(ERR_MAP(EFTYPE), N_("Inappropriate file type or format"))
++_S(EFTYPE, N_("Inappropriate file type or format"))
+ #endif
+ #ifdef EPROCUNAVAIL
+-_S(ERR_MAP(EPROCUNAVAIL), N_("RPC bad procedure for program"))
++_S(EPROCUNAVAIL, N_("RPC bad procedure for program"))
+ #endif
+ #ifdef EAUTH
+-_S(ERR_MAP(EAUTH), N_("Authentication error"))
++_S(EAUTH, N_("Authentication error"))
+ #endif
+ #ifdef EDIED
+ /*
+ TRANS On @gnuhurdsystems{}, opening a file returns this error when the file is
+ TRANS translated by a program and the translator program dies while starting
+ TRANS up, before it has connected to the file. */
+-_S(ERR_MAP(EDIED), N_("Translator died"))
++_S(EDIED, N_("Translator died"))
+ #endif
+ #ifdef ERPCMISMATCH
+-_S(ERR_MAP(ERPCMISMATCH), N_("RPC version wrong"))
++_S(ERPCMISMATCH, N_("RPC version wrong"))
+ #endif
+ #ifdef EGREGIOUS
+ /*
+ TRANS You did @strong{what}? */
+-_S(ERR_MAP(EGREGIOUS), N_("You really blew it this time"))
++_S(EGREGIOUS, N_("You really blew it this time"))
+ #endif
+ #ifdef EPROCLIM
+ /*
+ TRANS This means that the per-user limit on new process would be exceeded by
+ TRANS an attempted @code{fork}. @xref{Limits on Resources}, for details on
+ TRANS the @code{RLIMIT_NPROC} limit. */
+-_S(ERR_MAP(EPROCLIM), N_("Too many processes"))
++_S(EPROCLIM, N_("Too many processes"))
+ #endif
+ #ifdef EGRATUITOUS
+ /*
+ TRANS This error code has no purpose. */
+-_S(ERR_MAP(EGRATUITOUS), N_("Gratuitous error"))
++_S(EGRATUITOUS, N_("Gratuitous error"))
+ #endif
+ #if defined (ENOTSUP) && ENOTSUP != EOPNOTSUPP
+ /*
+@@ -742,10 +739,10 @@ TRANS values.
+ TRANS
+ TRANS If the entire function is not available at all in the implementation,
+ TRANS it returns @code{ENOSYS} instead. */
+-_S(ERR_MAP(ENOTSUP), N_("Not supported"))
++_S(ENOTSUP, N_("Not supported"))
+ #endif
+ #ifdef EPROGMISMATCH
+-_S(ERR_MAP(EPROGMISMATCH), N_("RPC program version wrong"))
++_S(EPROGMISMATCH, N_("RPC program version wrong"))
+ #endif
+ #ifdef EBACKGROUND
+ /*
+@@ -755,7 +752,7 @@ TRANS foreground process group of the terminal. Users do not usually see this
+ TRANS error because functions such as @code{read} and @code{write} translate
+ TRANS it into a @code{SIGTTIN} or @code{SIGTTOU} signal. @xref{Job Control},
+ TRANS for information on process groups and these signals. */
+-_S(ERR_MAP(EBACKGROUND), N_("Inappropriate operation for background process"))
++_S(EBACKGROUND, N_("Inappropriate operation for background process"))
+ #endif
+ #ifdef EIEIO
+ /*
+@@ -773,7 +770,7 @@ TRANS @c "bought the farm" means "died". -jtobey
+ TRANS @c
+ TRANS @c Translators, please do not translate this litteraly, translate it into
+ TRANS @c an idiomatic funny way of saying that the computer died. */
+-_S(ERR_MAP(EIEIO), N_("Computer bought the farm"))
++_S(EIEIO, N_("Computer bought the farm"))
+ #endif
+ #if defined (EWOULDBLOCK) && EWOULDBLOCK != EAGAIN
+ /*
+@@ -782,18 +779,18 @@ TRANS The values are always the same, on every operating system.
+ TRANS
+ TRANS C libraries in many older Unix systems have @code{EWOULDBLOCK} as a
+ TRANS separate error code. */
+-_S(ERR_MAP(EWOULDBLOCK), N_("Operation would block"))
++_S(EWOULDBLOCK, N_("Operation would block"))
+ #endif
+ #ifdef ENEEDAUTH
+-_S(ERR_MAP(ENEEDAUTH), N_("Need authenticator"))
++_S(ENEEDAUTH, N_("Need authenticator"))
+ #endif
+ #ifdef ED
+ /*
+ TRANS The experienced user will know what is wrong.
+ TRANS @c This error code is a joke. Its perror text is part of the joke.
+ TRANS @c Don't change it. */
+-_S(ERR_MAP(ED), N_("?"))
++_S(ED, N_("?"))
+ #endif
+ #ifdef EPROGUNAVAIL
+-_S(ERR_MAP(EPROGUNAVAIL), N_("RPC program not available"))
++_S(EPROGUNAVAIL, N_("RPC program not available"))
+ #endif
+diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h
+index 0f08079e48..672d8f27ce 100644
+--- a/sysdeps/i386/dl-machine.h
++++ b/sysdeps/i386/dl-machine.h
+@@ -338,16 +338,22 @@ elf_machine_rel (struct link_map *map, const Elf32_Rel *reloc,
+ {
+ # ifndef RTLD_BOOTSTRAP
+ if (sym_map != map
+- && sym_map->l_type != lt_executable
+ && !sym_map->l_relocated)
+ {
+ const char *strtab
+ = (const char *) D_PTR (map, l_info[DT_STRTAB]);
+- _dl_error_printf ("\
++ if (sym_map->l_type == lt_executable)
++ _dl_fatal_printf ("\
++%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \
++and creates an unsatisfiable circular dependency.\n",
++ RTLD_PROGNAME, strtab + refsym->st_name,
++ map->l_name);
++ else
++ _dl_error_printf ("\
+ %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n",
+- RTLD_PROGNAME, map->l_name,
+- sym_map->l_name,
+- strtab + refsym->st_name);
++ RTLD_PROGNAME, map->l_name,
++ sym_map->l_name,
++ strtab + refsym->st_name);
+ }
+ # endif
+ value = ((Elf32_Addr (*) (void)) value) ();
+diff --git a/sysdeps/powerpc/powerpc64/backtrace.c b/sysdeps/powerpc/powerpc64/backtrace.c
+index 8a53a1088f..362a2b713c 100644
+--- a/sysdeps/powerpc/powerpc64/backtrace.c
++++ b/sysdeps/powerpc/powerpc64/backtrace.c
+@@ -54,11 +54,22 @@ struct signal_frame_64 {
+ /* We don't care about the rest, since the IP value is at 'uc' field. */
+ };
+
++/* Test if the address match to the inside the trampoline code.
++ Up to and including kernel 5.8, returning from an interrupt or syscall to a
++ signal handler starts execution directly at the handler's entry point, with
++ LR set to address of the sigreturn trampoline (the vDSO symbol).
++ Newer kernels will branch to signal handler from the trampoline instead, so
++ checking the stacktrace against the vDSO entrypoint does not work in such
++ case.
++ The vDSO branches with a 'bctrl' instruction, so checking either the
++ vDSO address itself and the next instruction should cover all kernel
++ versions. */
+ static inline bool
+ is_sigtramp_address (void *nip)
+ {
+ #ifdef HAVE_SIGTRAMP_RT64
+- if (nip == GLRO (dl_vdso_sigtramp_rt64))
++ if (nip == GLRO (dl_vdso_sigtramp_rt64) ||
++ nip == GLRO (dl_vdso_sigtramp_rt64) + 4)
+ return true;
+ #endif
+ return false;
+diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile
+index 920d875420..bf9b7f7223 100644
+--- a/sysdeps/pthread/Makefile
++++ b/sysdeps/pthread/Makefile
+@@ -107,6 +107,7 @@ tests += tst-cnd-basic tst-mtx-trylock tst-cnd-broadcast \
+ tst-unload \
+ tst-unwind-thread \
+ tst-pt-vfork1 tst-pt-vfork2 tst-vfork1x tst-vfork2x \
++ tst-pthread-exit-signal \
+
+
+ # Files which must not be linked with libpthread.
+diff --git a/sysdeps/pthread/tst-pthread-exit-signal.c b/sysdeps/pthread/tst-pthread-exit-signal.c
+new file mode 100644
+index 0000000000..b4526fe663
+--- /dev/null
++++ b/sysdeps/pthread/tst-pthread-exit-signal.c
+@@ -0,0 +1,45 @@
++/* Test that pending signals are not delivered on thread exit (bug 28607).
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++/* Due to bug 28607, pthread_kill (or pthread_cancel) restored the
++ signal mask during during thread exit, triggering the delivery of a
++ blocked pending signal (SIGUSR1 in this test). */
++
++#include <support/xthread.h>
++#include <support/xsignal.h>
++
++static void *
++threadfunc (void *closure)
++{
++ sigset_t sigmask;
++ sigfillset (&sigmask);
++ xpthread_sigmask (SIG_SETMASK, &sigmask, NULL);
++ xpthread_kill (pthread_self (), SIGUSR1);
++ pthread_exit (NULL);
++ return NULL;
++}
++
++static int
++do_test (void)
++{
++ pthread_t thr = xpthread_create (NULL, threadfunc, NULL);
++ xpthread_join (thr);
++ return 0;
++}
++
++#include <support/test-driver.c>
+diff --git a/sysdeps/s390/configure b/sysdeps/s390/configure
+index fa46e9e351..e7f576338d 100644
+--- a/sysdeps/s390/configure
++++ b/sysdeps/s390/configure
+@@ -123,7 +123,9 @@ void testinsn (char *buf)
+ __asm__ (".machine \"arch13\" \n\t"
+ ".machinemode \"zarch_nohighgprs\" \n\t"
+ "lghi %%r0,16 \n\t"
+- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
++ "mvcrl 0(%0),32(%0) \n\t"
++ "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
++ : : "a" (buf) : "memory", "r0");
+ }
+ EOF
+ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
+@@ -271,7 +273,9 @@ else
+ void testinsn (char *buf)
+ {
+ __asm__ ("lghi %%r0,16 \n\t"
+- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
++ "mvcrl 0(%0),32(%0) \n\t"
++ "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
++ : : "a" (buf) : "memory", "r0");
+ }
+ EOF
+ if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS --shared conftest.c
+diff --git a/sysdeps/s390/configure.ac b/sysdeps/s390/configure.ac
+index 3ed5a8ef87..5c3479e8cf 100644
+--- a/sysdeps/s390/configure.ac
++++ b/sysdeps/s390/configure.ac
+@@ -88,7 +88,9 @@ void testinsn (char *buf)
+ __asm__ (".machine \"arch13\" \n\t"
+ ".machinemode \"zarch_nohighgprs\" \n\t"
+ "lghi %%r0,16 \n\t"
+- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
++ "mvcrl 0(%0),32(%0) \n\t"
++ "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
++ : : "a" (buf) : "memory", "r0");
+ }
+ EOF
+ dnl test, if assembler supports S390 arch13 instructions
+@@ -195,7 +197,9 @@ cat > conftest.c <<\EOF
+ void testinsn (char *buf)
+ {
+ __asm__ ("lghi %%r0,16 \n\t"
+- "mvcrl 0(%0),32(%0)" : : "a" (buf) : "memory", "r0");
++ "mvcrl 0(%0),32(%0) \n\t"
++ "vstrs %%v20,%%v20,%%v20,%%v20,0,2"
++ : : "a" (buf) : "memory", "r0");
+ }
+ EOF
+ dnl test, if assembler supports S390 arch13 zarch instructions as default
+diff --git a/sysdeps/s390/memmove.c b/sysdeps/s390/memmove.c
+index 5fc85e129f..ee59b5de14 100644
+--- a/sysdeps/s390/memmove.c
++++ b/sysdeps/s390/memmove.c
+@@ -43,7 +43,7 @@ extern __typeof (__redirect_memmove) MEMMOVE_ARCH13 attribute_hidden;
+ s390_libc_ifunc_expr (__redirect_memmove, memmove,
+ ({
+ s390_libc_ifunc_expr_stfle_init ();
+- (HAVE_MEMMOVE_ARCH13
++ (HAVE_MEMMOVE_ARCH13 && (hwcap & HWCAP_S390_VXRS_EXT2)
+ && S390_IS_ARCH13_MIE3 (stfle_bits))
+ ? MEMMOVE_ARCH13
+ : (HAVE_MEMMOVE_Z13 && (hwcap & HWCAP_S390_VX))
+diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c
+index e6195c6e26..17c0cc3952 100644
+--- a/sysdeps/s390/multiarch/ifunc-impl-list.c
++++ b/sysdeps/s390/multiarch/ifunc-impl-list.c
+@@ -171,7 +171,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL (i, name, memmove,
+ # if HAVE_MEMMOVE_ARCH13
+ IFUNC_IMPL_ADD (array, i, memmove,
+- S390_IS_ARCH13_MIE3 (stfle_bits),
++ ((dl_hwcap & HWCAP_S390_VXRS_EXT2)
++ && S390_IS_ARCH13_MIE3 (stfle_bits)),
+ MEMMOVE_ARCH13)
+ # endif
+ # if HAVE_MEMMOVE_Z13
+diff --git a/sysdeps/sh/be/sh4/fpu/Implies b/sysdeps/sh/be/sh4/fpu/Implies
+new file mode 100644
+index 0000000000..71b28ee1a4
+--- /dev/null
++++ b/sysdeps/sh/be/sh4/fpu/Implies
+@@ -0,0 +1 @@
++sh/sh4/fpu
+diff --git a/sysdeps/sh/le/sh4/fpu/Implies b/sysdeps/sh/le/sh4/fpu/Implies
+new file mode 100644
+index 0000000000..71b28ee1a4
+--- /dev/null
++++ b/sysdeps/sh/le/sh4/fpu/Implies
+@@ -0,0 +1 @@
++sh/sh4/fpu
+diff --git a/sysdeps/unix/sysv/linux/Makefile b/sysdeps/unix/sysv/linux/Makefile
+index 9b2a253032..34748ffcd1 100644
+--- a/sysdeps/unix/sysv/linux/Makefile
++++ b/sysdeps/unix/sysv/linux/Makefile
+@@ -100,7 +100,7 @@ tests += tst-clone tst-clone2 tst-clone3 tst-fanotify tst-personality \
+ tst-quota tst-sync_file_range tst-sysconf-iov_max tst-ttyname \
+ test-errno-linux tst-memfd_create tst-mlock2 tst-pkey \
+ tst-rlimit-infinity tst-ofdlocks tst-gettid tst-gettid-kill \
+- tst-tgkill
++ tst-tgkill tst-sysvsem-linux tst-sysvmsg-linux tst-sysvshm-linux
+ tests-internal += tst-ofdlocks-compat tst-sigcontext-get_pc
+
+ CFLAGS-tst-sigcontext-get_pc.c = -fasynchronous-unwind-tables
+diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
+index fc688450ee..00a4d0c8e7 100644
+--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.h
+@@ -54,6 +54,10 @@
+ && MIDR_PARTNUM(midr) == 0x000)
+ #define IS_NEOVERSE_N1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \
+ && MIDR_PARTNUM(midr) == 0xd0c)
++#define IS_NEOVERSE_N2(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \
++ && MIDR_PARTNUM(midr) == 0xd49)
++#define IS_NEOVERSE_V1(midr) (MIDR_IMPLEMENTOR(midr) == 'A' \
++ && MIDR_PARTNUM(midr) == 0xd40)
+
+ #define IS_EMAG(midr) (MIDR_IMPLEMENTOR(midr) == 'P' \
+ && MIDR_PARTNUM(midr) == 0x000)
+diff --git a/sysdeps/unix/sysv/linux/mq_notify.c b/sysdeps/unix/sysv/linux/mq_notify.c
+index 61bbb03b64..2bb98172c8 100644
+--- a/sysdeps/unix/sysv/linux/mq_notify.c
++++ b/sysdeps/unix/sysv/linux/mq_notify.c
+@@ -132,9 +132,12 @@ helper_thread (void *arg)
+ to wait until it is done with it. */
+ (void) __pthread_barrier_wait (&notify_barrier);
+ }
+- else if (data.raw[NOTIFY_COOKIE_LEN - 1] == NOTIFY_REMOVED)
+- /* The only state we keep is the copy of the thread attributes. */
+- free (data.attr);
++ else if (data.raw[NOTIFY_COOKIE_LEN - 1] == NOTIFY_REMOVED && data.attr != NULL)
++ {
++ /* The only state we keep is the copy of the thread attributes. */
++ pthread_attr_destroy (data.attr);
++ free (data.attr);
++ }
+ }
+ return NULL;
+ }
+@@ -255,8 +258,14 @@ mq_notify (mqd_t mqdes, const struct sigevent *notification)
+ if (data.attr == NULL)
+ return -1;
+
+- memcpy (data.attr, notification->sigev_notify_attributes,
+- sizeof (pthread_attr_t));
++ int ret = __pthread_attr_copy (data.attr,
++ notification->sigev_notify_attributes);
++ if (ret != 0)
++ {
++ free (data.attr);
++ __set_errno (ret);
++ return -1;
++ }
+ }
+
+ /* Construct the new request. */
+@@ -269,8 +278,11 @@ mq_notify (mqd_t mqdes, const struct sigevent *notification)
+ int retval = INLINE_SYSCALL (mq_notify, 2, mqdes, &se);
+
+ /* If it failed, free the allocated memory. */
+- if (__glibc_unlikely (retval != 0))
+- free (data.attr);
++ if (retval != 0 && data.attr != NULL)
++ {
++ pthread_attr_destroy (data.attr);
++ free (data.attr);
++ }
+
+ return retval;
+ }
+diff --git a/sysdeps/unix/sysv/linux/msgctl.c b/sysdeps/unix/sysv/linux/msgctl.c
+index 0776472d5e..a1f24ab242 100644
+--- a/sysdeps/unix/sysv/linux/msgctl.c
++++ b/sysdeps/unix/sysv/linux/msgctl.c
+@@ -90,8 +90,15 @@ __msgctl64 (int msqid, int cmd, struct __msqid64_ds *buf)
+ struct kernel_msqid64_ds ksemid, *arg = NULL;
+ if (buf != NULL)
+ {
+- msqid64_to_kmsqid64 (buf, &ksemid);
+- arg = &ksemid;
++ /* This is a Linux extension where kernel returns a 'struct msginfo'
++ instead. */
++ if (cmd == IPC_INFO || cmd == MSG_INFO)
++ arg = (struct kernel_msqid64_ds *) buf;
++ else
++ {
++ msqid64_to_kmsqid64 (buf, &ksemid);
++ arg = &ksemid;
++ }
+ }
+ # ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T
+ if (cmd == IPC_SET)
+@@ -169,8 +176,15 @@ __msgctl (int msqid, int cmd, struct msqid_ds *buf)
+ struct __msqid64_ds msqid64, *buf64 = NULL;
+ if (buf != NULL)
+ {
+- msqid_to_msqid64 (&msqid64, buf);
+- buf64 = &msqid64;
++ /* This is a Linux extension where kernel returns a 'struct msginfo'
++ instead. */
++ if (cmd == IPC_INFO || cmd == MSG_INFO)
++ buf64 = (struct __msqid64_ds *) buf;
++ else
++ {
++ msqid_to_msqid64 (&msqid64, buf);
++ buf64 = &msqid64;
++ }
+ }
+
+ int ret = __msgctl64 (msqid, cmd, buf64);
+diff --git a/sysdeps/unix/sysv/linux/semctl.c b/sysdeps/unix/sysv/linux/semctl.c
+index f131a26fc7..1cdabde8f2 100644
+--- a/sysdeps/unix/sysv/linux/semctl.c
++++ b/sysdeps/unix/sysv/linux/semctl.c
+@@ -102,6 +102,7 @@ semun64_to_ksemun64 (int cmd, union semun64 semun64,
+ r.array = semun64.array;
+ break;
+ case SEM_STAT:
++ case SEM_STAT_ANY:
+ case IPC_STAT:
+ case IPC_SET:
+ r.buf = buf;
+@@ -150,6 +151,7 @@ __semctl64 (int semid, int semnum, int cmd, ...)
+ case IPC_STAT: /* arg.buf */
+ case IPC_SET:
+ case SEM_STAT:
++ case SEM_STAT_ANY:
+ case IPC_INFO: /* arg.__buf */
+ case SEM_INFO:
+ va_start (ap, cmd);
+@@ -238,6 +240,7 @@ semun_to_semun64 (int cmd, union semun semun, struct __semid64_ds *semid64)
+ r.array = semun.array;
+ break;
+ case SEM_STAT:
++ case SEM_STAT_ANY:
+ case IPC_STAT:
+ case IPC_SET:
+ r.buf = semid64;
+@@ -267,6 +270,7 @@ __semctl (int semid, int semnum, int cmd, ...)
+ case IPC_STAT: /* arg.buf */
+ case IPC_SET:
+ case SEM_STAT:
++ case SEM_STAT_ANY:
+ case IPC_INFO: /* arg.__buf */
+ case SEM_INFO:
+ va_start (ap, cmd);
+@@ -321,6 +325,7 @@ __semctl_mode16 (int semid, int semnum, int cmd, ...)
+ case IPC_STAT: /* arg.buf */
+ case IPC_SET:
+ case SEM_STAT:
++ case SEM_STAT_ANY:
+ case IPC_INFO: /* arg.__buf */
+ case SEM_INFO:
+ va_start (ap, cmd);
+@@ -354,6 +359,7 @@ __old_semctl (int semid, int semnum, int cmd, ...)
+ case IPC_STAT: /* arg.buf */
+ case IPC_SET:
+ case SEM_STAT:
++ case SEM_STAT_ANY:
+ case IPC_INFO: /* arg.__buf */
+ case SEM_INFO:
+ va_start (ap, cmd);
+diff --git a/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies
+new file mode 100644
+index 0000000000..7eeaf15a5a
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/sh/be/sh4/fpu/Implies
+@@ -0,0 +1 @@
++unix/sysv/linux/sh/sh4/fpu
+diff --git a/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies
+new file mode 100644
+index 0000000000..7eeaf15a5a
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/sh/le/sh4/fpu/Implies
+@@ -0,0 +1 @@
++unix/sysv/linux/sh/sh4/fpu
+diff --git a/sysdeps/unix/sysv/linux/shmctl.c b/sysdeps/unix/sysv/linux/shmctl.c
+index 76d88441f1..1d19a798b1 100644
+--- a/sysdeps/unix/sysv/linux/shmctl.c
++++ b/sysdeps/unix/sysv/linux/shmctl.c
+@@ -90,8 +90,15 @@ __shmctl64 (int shmid, int cmd, struct __shmid64_ds *buf)
+ struct kernel_shmid64_ds kshmid, *arg = NULL;
+ if (buf != NULL)
+ {
+- shmid64_to_kshmid64 (buf, &kshmid);
+- arg = &kshmid;
++ /* This is a Linux extension where kernel expects either a
++ 'struct shminfo' (IPC_INFO) or 'struct shm_info' (SHM_INFO). */
++ if (cmd == IPC_INFO || cmd == SHM_INFO)
++ arg = (struct kernel_shmid64_ds *) buf;
++ else
++ {
++ shmid64_to_kshmid64 (buf, &kshmid);
++ arg = &kshmid;
++ }
+ }
+ # ifdef __ASSUME_SYSVIPC_BROKEN_MODE_T
+ if (cmd == IPC_SET)
+@@ -107,7 +114,6 @@ __shmctl64 (int shmid, int cmd, struct __shmid64_ds *buf)
+
+ switch (cmd)
+ {
+- case IPC_INFO:
+ case IPC_STAT:
+ case SHM_STAT:
+ case SHM_STAT_ANY:
+@@ -168,8 +174,15 @@ __shmctl (int shmid, int cmd, struct shmid_ds *buf)
+ struct __shmid64_ds shmid64, *buf64 = NULL;
+ if (buf != NULL)
+ {
+- shmid_to_shmid64 (&shmid64, buf);
+- buf64 = &shmid64;
++ /* This is a Linux extension where kernel expects either a
++ 'struct shminfo' (IPC_INFO) or 'struct shm_info' (SHM_INFO). */
++ if (cmd == IPC_INFO || cmd == SHM_INFO)
++ buf64 = (struct __shmid64_ds *) buf;
++ else
++ {
++ shmid_to_shmid64 (&shmid64, buf);
++ buf64 = &shmid64;
++ }
+ }
+
+ int ret = __shmctl64 (shmid, cmd, buf64);
+@@ -178,7 +191,6 @@ __shmctl (int shmid, int cmd, struct shmid_ds *buf)
+
+ switch (cmd)
+ {
+- case IPC_INFO:
+ case IPC_STAT:
+ case SHM_STAT:
+ case SHM_STAT_ANY:
+diff --git a/sysdeps/unix/sysv/linux/tst-sysvmsg-linux.c b/sysdeps/unix/sysv/linux/tst-sysvmsg-linux.c
+new file mode 100644
+index 0000000000..630f4f792c
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/tst-sysvmsg-linux.c
+@@ -0,0 +1,177 @@
++/* Basic tests for Linux SYSV message queue extensions.
++ Copyright (C) 2020 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <sys/ipc.h>
++#include <sys/msg.h>
++#include <errno.h>
++#include <stdlib.h>
++#include <stdbool.h>
++#include <stdio.h>
++
++#include <support/check.h>
++#include <support/temp_file.h>
++
++#define MSGQ_MODE 0644
++
++/* These are for the temporary file we generate. */
++static char *name;
++static int msqid;
++
++static void
++remove_msq (void)
++{
++ /* Enforce message queue removal in case of early test failure.
++ Ignore error since the msg may already have being removed. */
++ msgctl (msqid, IPC_RMID, NULL);
++}
++
++static void
++do_prepare (int argc, char *argv[])
++{
++ TEST_VERIFY_EXIT (create_temp_file ("tst-sysvmsg.", &name) != -1);
++}
++
++#define PREPARE do_prepare
++
++struct test_msginfo
++{
++ int msgmax;
++ int msgmnb;
++ int msgmni;
++};
++
++/* It tries to obtain some system-wide SysV messsage queue information from
++ /proc to check against IPC_INFO/MSG_INFO. The /proc only returns the
++ tunables value of MSGMAX, MSGMNB, and MSGMNI.
++
++ The kernel also returns constant value for MSGSSZ, MSGSEG and also MSGMAP,
++ MSGPOOL, and MSGTQL (for IPC_INFO). The issue to check them is they might
++ change over kernel releases. */
++
++static int
++read_proc_file (const char *file)
++{
++ FILE *f = fopen (file, "r");
++ if (f == NULL)
++ FAIL_UNSUPPORTED ("/proc is not mounted or %s is not available", file);
++
++ int v;
++ int r = fscanf (f, "%d", & v);
++ TEST_VERIFY_EXIT (r == 1);
++
++ fclose (f);
++ return v;
++}
++
++
++/* Check if the message queue with IDX (index into the kernel's internal
++ array) matches the one with KEY. The CMD is either MSG_STAT or
++ MSG_STAT_ANY. */
++
++static bool
++check_msginfo (int idx, key_t key, int cmd)
++{
++ struct msqid_ds msginfo;
++ int mid = msgctl (idx, cmd, &msginfo);
++ /* Ignore unused array slot returned by the kernel or information from
++ unknown message queue. */
++ if ((mid == -1 && errno == EINVAL) || mid != msqid)
++ return false;
++
++ if (mid == -1)
++ FAIL_EXIT1 ("msgctl with %s failed: %m",
++ cmd == MSG_STAT ? "MSG_STAT" : "MSG_STAT_ANY");
++
++ TEST_COMPARE (msginfo.msg_perm.__key, key);
++ TEST_COMPARE (msginfo.msg_perm.mode, MSGQ_MODE);
++ TEST_COMPARE (msginfo.msg_qnum, 0);
++
++ return true;
++}
++
++static int
++do_test (void)
++{
++ atexit (remove_msq);
++
++ key_t key = ftok (name, 'G');
++ if (key == -1)
++ FAIL_EXIT1 ("ftok failed: %m");
++
++ msqid = msgget (key, MSGQ_MODE | IPC_CREAT);
++ if (msqid == -1)
++ FAIL_EXIT1 ("msgget failed: %m");
++
++ struct test_msginfo tipcinfo;
++ tipcinfo.msgmax = read_proc_file ("/proc/sys/kernel/msgmax");
++ tipcinfo.msgmnb = read_proc_file ("/proc/sys/kernel/msgmnb");
++ tipcinfo.msgmni = read_proc_file ("/proc/sys/kernel/msgmni");
++
++ int msqidx;
++
++ {
++ struct msginfo ipcinfo;
++ msqidx = msgctl (msqid, IPC_INFO, (struct msqid_ds *) &ipcinfo);
++ if (msqidx == -1)
++ FAIL_EXIT1 ("msgctl with IPC_INFO failed: %m");
++
++ TEST_COMPARE (ipcinfo.msgmax, tipcinfo.msgmax);
++ TEST_COMPARE (ipcinfo.msgmnb, tipcinfo.msgmnb);
++ TEST_COMPARE (ipcinfo.msgmni, tipcinfo.msgmni);
++ }
++
++ /* Same as before but with MSG_INFO. */
++ {
++ struct msginfo ipcinfo;
++ msqidx = msgctl (msqid, MSG_INFO, (struct msqid_ds *) &ipcinfo);
++ if (msqidx == -1)
++ FAIL_EXIT1 ("msgctl with IPC_INFO failed: %m");
++
++ TEST_COMPARE (ipcinfo.msgmax, tipcinfo.msgmax);
++ TEST_COMPARE (ipcinfo.msgmnb, tipcinfo.msgmnb);
++ TEST_COMPARE (ipcinfo.msgmni, tipcinfo.msgmni);
++ }
++
++ /* We check if the created message queue shows in global list. */
++ bool found = false;
++ for (int i = 0; i <= msqidx; i++)
++ {
++ /* We can't tell apart if MSG_STAT_ANY is not supported (kernel older
++ than 4.17) or if the index used is invalid. So it just check if the
++ value returned from a valid call matches the created message
++ queue. */
++ check_msginfo (i, key, MSG_STAT_ANY);
++
++ if (check_msginfo (i, key, MSG_STAT))
++ {
++ found = true;
++ break;
++ }
++ }
++
++ if (!found)
++ FAIL_EXIT1 ("msgctl with MSG_STAT/MSG_STAT_ANY could not find the "
++ "created message queue");
++
++ if (msgctl (msqid, IPC_RMID, NULL) == -1)
++ FAIL_EXIT1 ("msgctl failed");
++
++ return 0;
++}
++
++#include <support/test-driver.c>
+diff --git a/sysdeps/unix/sysv/linux/tst-sysvsem-linux.c b/sysdeps/unix/sysv/linux/tst-sysvsem-linux.c
+new file mode 100644
+index 0000000000..45f19e2d37
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/tst-sysvsem-linux.c
+@@ -0,0 +1,184 @@
++/* Basic tests for Linux SYSV semaphore extensions.
++ Copyright (C) 2020 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <sys/ipc.h>
++#include <sys/sem.h>
++#include <errno.h>
++#include <stdlib.h>
++#include <stdbool.h>
++#include <stdio.h>
++
++#include <support/check.h>
++#include <support/temp_file.h>
++
++/* These are for the temporary file we generate. */
++static char *name;
++static int semid;
++
++static void
++remove_sem (void)
++{
++ /* Enforce message queue removal in case of early test failure.
++ Ignore error since the sem may already have being removed. */
++ semctl (semid, 0, IPC_RMID, 0);
++}
++
++static void
++do_prepare (int argc, char *argv[])
++{
++ TEST_VERIFY_EXIT (create_temp_file ("tst-sysvsem.", &name) != -1);
++}
++
++#define PREPARE do_prepare
++
++#define SEM_MODE 0644
++
++union semun
++{
++ int val;
++ struct semid_ds *buf;
++ unsigned short *array;
++ struct seminfo *__buf;
++};
++
++struct test_seminfo
++{
++ int semmsl;
++ int semmns;
++ int semopm;
++ int semmni;
++};
++
++/* It tries to obtain some system-wide SysV semaphore information from /proc
++ to check against IPC_INFO/SEM_INFO. The /proc only returns the tunables
++ value of SEMMSL, SEMMNS, SEMOPM, and SEMMNI.
++
++ The kernel also returns constant value for SEMVMX, SEMMNU, SEMMAP, SEMUME,
++ and also SEMUSZ and SEMAEM (for IPC_INFO). The issue to check them is they
++ might change over kernel releases. */
++
++static void
++read_sem_stat (struct test_seminfo *tseminfo)
++{
++ FILE *f = fopen ("/proc/sys/kernel/sem", "r");
++ if (f == NULL)
++ FAIL_UNSUPPORTED ("/proc is not mounted or /proc/sys/kernel/sem is not "
++ "available");
++
++ int r = fscanf (f, "%d %d %d %d",
++ &tseminfo->semmsl, &tseminfo->semmns, &tseminfo->semopm,
++ &tseminfo->semmni);
++ TEST_VERIFY_EXIT (r == 4);
++
++ fclose (f);
++}
++
++
++/* Check if the semaphore with IDX (index into the kernel's internal array)
++ matches the one with KEY. The CMD is either SEM_STAT or SEM_STAT_ANY. */
++
++static bool
++check_seminfo (int idx, key_t key, int cmd)
++{
++ struct semid_ds seminfo;
++ int sid = semctl (idx, 0, cmd, (union semun) { .buf = &seminfo });
++ /* Ignore unused array slot returned by the kernel or information from
++ unknown semaphores. */
++ if ((sid == -1 && errno == EINVAL) || sid != semid)
++ return false;
++
++ if (sid == -1)
++ FAIL_EXIT1 ("semctl with SEM_STAT failed (errno=%d)", errno);
++
++ TEST_COMPARE (seminfo.sem_perm.__key, key);
++ TEST_COMPARE (seminfo.sem_perm.mode, SEM_MODE);
++ TEST_COMPARE (seminfo.sem_nsems, 1);
++
++ return true;
++}
++
++static int
++do_test (void)
++{
++ atexit (remove_sem);
++
++ key_t key = ftok (name, 'G');
++ if (key == -1)
++ FAIL_EXIT1 ("ftok failed: %m");
++
++ semid = semget (key, 1, IPC_CREAT | IPC_EXCL | SEM_MODE);
++ if (semid == -1)
++ FAIL_EXIT1 ("semget failed: %m");
++
++ struct test_seminfo tipcinfo;
++ read_sem_stat (&tipcinfo);
++
++ int semidx;
++
++ {
++ struct seminfo ipcinfo;
++ semidx = semctl (semid, 0, IPC_INFO, (union semun) { .__buf = &ipcinfo });
++ if (semidx == -1)
++ FAIL_EXIT1 ("semctl with IPC_INFO failed: %m");
++
++ TEST_COMPARE (ipcinfo.semmsl, tipcinfo.semmsl);
++ TEST_COMPARE (ipcinfo.semmns, tipcinfo.semmns);
++ TEST_COMPARE (ipcinfo.semopm, tipcinfo.semopm);
++ TEST_COMPARE (ipcinfo.semmni, tipcinfo.semmni);
++ }
++
++ /* Same as before but with SEM_INFO. */
++ {
++ struct seminfo ipcinfo;
++ semidx = semctl (semid, 0, SEM_INFO, (union semun) { .__buf = &ipcinfo });
++ if (semidx == -1)
++ FAIL_EXIT1 ("semctl with IPC_INFO failed: %m");
++
++ TEST_COMPARE (ipcinfo.semmsl, tipcinfo.semmsl);
++ TEST_COMPARE (ipcinfo.semmns, tipcinfo.semmns);
++ TEST_COMPARE (ipcinfo.semopm, tipcinfo.semopm);
++ TEST_COMPARE (ipcinfo.semmni, tipcinfo.semmni);
++ }
++
++ /* We check if the created semaphore shows in the system-wide status. */
++ bool found = false;
++ for (int i = 0; i <= semidx; i++)
++ {
++ /* We can't tell apart if SEM_STAT_ANY is not supported (kernel older
++ than 4.17) or if the index used is invalid. So it just check if
++ value returned from a valid call matches the created semaphore. */
++ check_seminfo (i, key, SEM_STAT_ANY);
++
++ if (check_seminfo (i, key, SEM_STAT))
++ {
++ found = true;
++ break;
++ }
++ }
++
++ if (!found)
++ FAIL_EXIT1 ("semctl with SEM_STAT/SEM_STAT_ANY could not find the "
++ "created semaphore");
++
++ if (semctl (semid, 0, IPC_RMID, 0) == -1)
++ FAIL_EXIT1 ("semctl failed: %m");
++
++ return 0;
++}
++
++#include <support/test-driver.c>
+diff --git a/sysdeps/unix/sysv/linux/tst-sysvshm-linux.c b/sysdeps/unix/sysv/linux/tst-sysvshm-linux.c
+new file mode 100644
+index 0000000000..bb154592a6
+--- /dev/null
++++ b/sysdeps/unix/sysv/linux/tst-sysvshm-linux.c
+@@ -0,0 +1,188 @@
++/* Basic tests for Linux SYSV shared memory extensions.
++ Copyright (C) 2020 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <sys/ipc.h>
++#include <sys/shm.h>
++#include <errno.h>
++#include <stdlib.h>
++#include <stdbool.h>
++#include <stdio.h>
++#include <unistd.h>
++#include <inttypes.h>
++#include <limits.h>
++
++#include <support/check.h>
++#include <support/temp_file.h>
++
++#define SHM_MODE 0644
++
++/* These are for the temporary file we generate. */
++static char *name;
++static int shmid;
++static long int pgsz;
++
++static void
++remove_shm (void)
++{
++ /* Enforce message queue removal in case of early test failure.
++ Ignore error since the shm may already have being removed. */
++ shmctl (shmid, IPC_RMID, NULL);
++}
++
++static void
++do_prepare (int argc, char *argv[])
++{
++ TEST_VERIFY_EXIT (create_temp_file ("tst-sysvshm.", &name) != -1);
++}
++
++#define PREPARE do_prepare
++
++struct test_shminfo
++{
++ __syscall_ulong_t shmall;
++ __syscall_ulong_t shmmax;
++ __syscall_ulong_t shmmni;
++};
++
++/* It tries to obtain some system-wide SysV shared memory information from
++ /proc to check against IPC_INFO/SHM_INFO. The /proc only returns the
++ tunables value of SHMALL, SHMMAX, and SHMMNI. */
++
++static uint64_t
++read_proc_file (const char *file)
++{
++ FILE *f = fopen (file, "r");
++ if (f == NULL)
++ FAIL_UNSUPPORTED ("/proc is not mounted or %s is not available", file);
++
++ /* Handle 32-bit binaries running on 64-bit kernels. */
++ uint64_t v;
++ int r = fscanf (f, "%" SCNu64, &v);
++ TEST_VERIFY_EXIT (r == 1);
++
++ fclose (f);
++ return v;
++}
++
++
++/* Check if the message queue with IDX (index into the kernel's internal
++ array) matches the one with KEY. The CMD is either SHM_STAT or
++ SHM_STAT_ANY. */
++
++static bool
++check_shminfo (int idx, key_t key, int cmd)
++{
++ struct shmid_ds shminfo;
++ int sid = shmctl (idx, cmd, &shminfo);
++ /* Ignore unused array slot returned by the kernel or information from
++ unknown message queue. */
++ if ((sid == -1 && errno == EINVAL) || sid != shmid)
++ return false;
++
++ if (sid == -1)
++ FAIL_EXIT1 ("shmctl with %s failed: %m",
++ cmd == SHM_STAT ? "SHM_STAT" : "SHM_STAT_ANY");
++
++ TEST_COMPARE (shminfo.shm_perm.__key, key);
++ TEST_COMPARE (shminfo.shm_perm.mode, SHM_MODE);
++ TEST_COMPARE (shminfo.shm_segsz, pgsz);
++
++ return true;
++}
++
++static int
++do_test (void)
++{
++ atexit (remove_shm);
++
++ pgsz = sysconf (_SC_PAGESIZE);
++ if (pgsz == -1)
++ FAIL_EXIT1 ("sysconf (_SC_PAGESIZE) failed: %m");
++
++ key_t key = ftok (name, 'G');
++ if (key == -1)
++ FAIL_EXIT1 ("ftok failed: %m");
++
++ shmid = shmget (key, pgsz, IPC_CREAT | IPC_EXCL | SHM_MODE);
++ if (shmid == -1)
++ FAIL_EXIT1 ("shmget failed: %m");
++
++ /* It does not check shmmax because kernel clamp its value to INT_MAX for:
++
++ 1. Compat symbols with IPC_64, i.e, 32-bit binaries running on 64-bit
++ kernels.
++
++ 2. Default symbol without IPC_64 (defined as IPC_OLD within Linux) and
++ glibc always use IPC_64 for 32-bit ABIs (to support 64-bit time_t).
++ It means that 32-bit binaries running on 32-bit kernels will not see
++ shmmax being clamped.
++
++ And finding out whether the compat symbol is used would require checking
++ the underlying kernel against the current ABI. The shmall and shmmni
++ already provided enough coverage. */
++
++ struct test_shminfo tipcinfo;
++ tipcinfo.shmall = read_proc_file ("/proc/sys/kernel/shmall");
++ tipcinfo.shmmni = read_proc_file ("/proc/sys/kernel/shmmni");
++
++ int shmidx;
++
++ /* Note: SHM_INFO does not return a shminfo, but rather a 'struct shm_info'.
++ It is tricky to verify its values since the syscall returns system wide
++ resources consumed by shared memory. The shmctl implementation handles
++ SHM_INFO as IPC_INFO, so the IPC_INFO test should validate SHM_INFO as
++ well. */
++
++ {
++ struct shminfo ipcinfo;
++ shmidx = shmctl (shmid, IPC_INFO, (struct shmid_ds *) &ipcinfo);
++ if (shmidx == -1)
++ FAIL_EXIT1 ("shmctl with IPC_INFO failed: %m");
++
++ TEST_COMPARE (ipcinfo.shmall, tipcinfo.shmall);
++ TEST_COMPARE (ipcinfo.shmmni, tipcinfo.shmmni);
++ }
++
++ /* We check if the created shared memory shows in the global list. */
++ bool found = false;
++ for (int i = 0; i <= shmidx; i++)
++ {
++ /* We can't tell apart if SHM_STAT_ANY is not supported (kernel older
++ than 4.17) or if the index used is invalid. So it just check if
++ value returned from a valid call matches the created message
++ queue. */
++ check_shminfo (i, key, SHM_STAT_ANY);
++
++ if (check_shminfo (i, key, SHM_STAT))
++ {
++ found = true;
++ break;
++ }
++ }
++
++ if (!found)
++ FAIL_EXIT1 ("shmctl with SHM_STAT/SHM_STAT_ANY could not find the "
++ "created shared memory");
++
++ if (shmctl (shmid, IPC_RMID, NULL) == -1)
++ FAIL_EXIT1 ("shmctl failed");
++
++ return 0;
++}
++
++#include <support/test-driver.c>
+diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile
+index a6736aef25..e821d95fa3 100644
+--- a/sysdeps/x86/Makefile
++++ b/sysdeps/x86/Makefile
+@@ -12,10 +12,39 @@ endif
+ ifeq ($(subdir),setjmp)
+ gen-as-const-headers += jmp_buf-ssp.sym
+ sysdep_routines += __longjmp_cancel
++ifneq ($(enable-cet),no)
++ifneq ($(have-tunables),no)
++tests += tst-setjmp-cet
++tst-setjmp-cet-ENV = GLIBC_TUNABLES=glibc.cpu.x86_ibt=on:glibc.cpu.x86_shstk=on
++endif
++endif
+ endif
+
+ ifeq ($(subdir),string)
+ sysdep_routines += cacheinfo
++
++tests += \
++ tst-memchr-rtm \
++ tst-memcmp-rtm \
++ tst-memmove-rtm \
++ tst-memrchr-rtm \
++ tst-memset-rtm \
++ tst-strchr-rtm \
++ tst-strcpy-rtm \
++ tst-strlen-rtm \
++ tst-strncmp-rtm \
++ tst-strrchr-rtm
++
++CFLAGS-tst-memchr-rtm.c += -mrtm
++CFLAGS-tst-memcmp-rtm.c += -mrtm
++CFLAGS-tst-memmove-rtm.c += -mrtm
++CFLAGS-tst-memrchr-rtm.c += -mrtm
++CFLAGS-tst-memset-rtm.c += -mrtm
++CFLAGS-tst-strchr-rtm.c += -mrtm
++CFLAGS-tst-strcpy-rtm.c += -mrtm
++CFLAGS-tst-strlen-rtm.c += -mrtm
++CFLAGS-tst-strncmp-rtm.c += -mrtm
++CFLAGS-tst-strrchr-rtm.c += -mrtm
+ endif
+
+ ifneq ($(enable-cet),no)
+diff --git a/sysdeps/x86/cacheinfo.c b/sysdeps/x86/cacheinfo.c
+index 217c21c34f..3fb4a028d8 100644
+--- a/sysdeps/x86/cacheinfo.c
++++ b/sysdeps/x86/cacheinfo.c
+@@ -808,7 +808,7 @@ init_cacheinfo (void)
+ threads = 1 << ((ecx >> 12) & 0x0f);
+ }
+
+- if (threads == 0)
++ if (threads == 0 || cpu_features->basic.family >= 0x17)
+ {
+ /* If APIC ID width is not available, use logical
+ processor count. */
+@@ -823,8 +823,22 @@ init_cacheinfo (void)
+ if (threads > 0)
+ shared /= threads;
+
+- /* Account for exclusive L2 and L3 caches. */
+- shared += core;
++ /* Get shared cache per ccx for Zen architectures. */
++ if (cpu_features->basic.family >= 0x17)
++ {
++ unsigned int eax;
++
++ /* Get number of threads share the L3 cache in CCX. */
++ __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
++
++ unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
++ shared *= threads_per_ccx;
++ }
++ else
++ {
++ /* Account for exclusive L2 and L3 caches. */
++ shared += core;
++ }
+ }
+ }
+
+@@ -854,14 +868,20 @@ init_cacheinfo (void)
+ __x86_shared_cache_size = shared;
+ }
+
+- /* The large memcpy micro benchmark in glibc shows that 6 times of
+- shared cache size is the approximate value above which non-temporal
+- store becomes faster on a 8-core processor. This is the 3/4 of the
+- total shared cache size. */
++ /* The default setting for the non_temporal threshold is 3/4 of one
++ thread's share of the chip's cache. For most Intel and AMD processors
++ with an initial release date between 2017 and 2020, a thread's typical
++ share of the cache is from 500 KBytes to 2 MBytes. Using the 3/4
++ threshold leaves 125 KBytes to 500 KBytes of the thread's data
++ in cache after a maximum temporal copy, which will maintain
++ in cache a reasonable portion of the thread's stack and other
++ active data. If the threshold is set higher than one thread's
++ share of the cache, it has a substantial risk of negatively
++ impacting the performance of other threads running on the chip. */
+ __x86_shared_non_temporal_threshold
+ = (cpu_features->non_temporal_threshold != 0
+ ? cpu_features->non_temporal_threshold
+- : __x86_shared_cache_size * threads * 3 / 4);
++ : __x86_shared_cache_size * 3 / 4);
+
+ /* NB: The REP MOVSB threshold must be greater than VEC_SIZE * 8. */
+ unsigned int minimum_rep_movsb_threshold;
+diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
+index 4c24ba7c31..484efe7a0f 100644
+--- a/sysdeps/x86/cpu-features.c
++++ b/sysdeps/x86/cpu-features.c
+@@ -71,7 +71,6 @@ update_usable (struct cpu_features *cpu_features)
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_6);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_7);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_9);
+- CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_11);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_12);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_13);
+ CPU_FEATURE_UNSET (cpu_features, INDEX_7_EDX_17);
+@@ -318,6 +317,9 @@ update_usable (struct cpu_features *cpu_features)
+ /* Determine if PKU is usable. */
+ if (CPU_FEATURES_CPU_P (cpu_features, OSPKE))
+ CPU_FEATURE_SET (cpu_features, PKU);
++
++ if (CPU_FEATURES_CPU_P (cpu_features, RTM_ALWAYS_ABORT))
++ CPU_FEATURE_UNSET (cpu_features, RTM);
+ }
+
+ static void
+@@ -516,11 +518,39 @@ init_cpu_features (struct cpu_features *cpu_features)
+ break;
+ }
+
+- /* Disable TSX on some Haswell processors to avoid TSX on kernels that
+- weren't updated with the latest microcode package (which disables
+- broken feature by default). */
++ /* Disable TSX on some processors to avoid TSX on kernels that
++ weren't updated with the latest microcode package (which
++ disables broken feature by default). */
+ switch (model)
+ {
++ case 0x55:
++ if (stepping <= 5)
++ goto disable_tsx;
++ break;
++ case 0x8e:
++ /* NB: Although the errata documents that for model == 0x8e,
++ only 0xb stepping or lower are impacted, the intention of
++ the errata was to disable TSX on all client processors on
++ all steppings. Include 0xc stepping which is an Intel
++ Core i7-8665U, a client mobile processor. */
++ case 0x9e:
++ if (stepping > 0xc)
++ break;
++ /* Fall through. */
++ case 0x4e:
++ case 0x5e:
++ {
++ /* Disable Intel TSX and enable RTM_ALWAYS_ABORT for
++ processors listed in:
++
++https://www.intel.com/content/www/us/en/support/articles/000059422/processors.html
++ */
++disable_tsx:
++ CPU_FEATURE_UNSET (cpu_features, HLE);
++ CPU_FEATURE_UNSET (cpu_features, RTM);
++ CPU_FEATURE_SET (cpu_features, RTM_ALWAYS_ABORT);
++ }
++ break;
+ case 0x3f:
+ /* Xeon E7 v3 with stepping >= 4 has working TSX. */
+ if (stepping >= 4)
+@@ -546,8 +576,24 @@ init_cpu_features (struct cpu_features *cpu_features)
+ cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
+ |= bit_arch_Prefer_No_VZEROUPPER;
+ else
+- cpu_features->preferred[index_arch_Prefer_No_AVX512]
+- |= bit_arch_Prefer_No_AVX512;
++ {
++ cpu_features->preferred[index_arch_Prefer_No_AVX512]
++ |= bit_arch_Prefer_No_AVX512;
++
++ /* Avoid RTM abort triggered by VZEROUPPER inside a
++ transactionally executing RTM region. */
++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
++ cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
++ |= bit_arch_Prefer_No_VZEROUPPER;
++
++ /* Since to compare 2 32-byte strings, 256-bit EVEX strcmp
++ requires 2 loads, 3 VPCMPs and 2 KORDs while AVX2 strcmp
++ requires 1 load, 2 VPCMPEQs, 1 VPMINU and 1 VPMOVMSKB,
++ AVX2 strcmp is faster than EVEX strcmp. */
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
++ cpu_features->preferred[index_arch_Prefer_AVX2_STRCMP]
++ |= bit_arch_Prefer_AVX2_STRCMP;
++ }
+ }
+ /* This spells out "AuthenticAMD" or "HygonGenuine". */
+ else if ((ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
+diff --git a/sysdeps/x86/cpu-features.h b/sysdeps/x86/cpu-features.h
+index a0b9b9177c..8995a15f09 100644
+--- a/sysdeps/x86/cpu-features.h
++++ b/sysdeps/x86/cpu-features.h
+@@ -295,7 +295,7 @@ extern const struct cpu_features *__get_cpu_features (void)
+ #define bit_cpu_AVX512_VP2INTERSECT (1u << 8)
+ #define bit_cpu_INDEX_7_EDX_9 (1u << 9)
+ #define bit_cpu_MD_CLEAR (1u << 10)
+-#define bit_cpu_INDEX_7_EDX_11 (1u << 11)
++#define bit_cpu_RTM_ALWAYS_ABORT (1u << 11)
+ #define bit_cpu_INDEX_7_EDX_12 (1u << 12)
+ #define bit_cpu_INDEX_7_EDX_13 (1u << 13)
+ #define bit_cpu_SERIALIZE (1u << 14)
+@@ -508,7 +508,7 @@ extern const struct cpu_features *__get_cpu_features (void)
+ #define index_cpu_AVX512_VP2INTERSECT COMMON_CPUID_INDEX_7
+ #define index_cpu_INDEX_7_EDX_9 COMMON_CPUID_INDEX_7
+ #define index_cpu_MD_CLEAR COMMON_CPUID_INDEX_7
+-#define index_cpu_INDEX_7_EDX_11 COMMON_CPUID_INDEX_7
++#define index_cpu_RTM_ALWAYS_ABORT COMMON_CPUID_INDEX_7
+ #define index_cpu_INDEX_7_EDX_12 COMMON_CPUID_INDEX_7
+ #define index_cpu_INDEX_7_EDX_13 COMMON_CPUID_INDEX_7
+ #define index_cpu_SERIALIZE COMMON_CPUID_INDEX_7
+@@ -721,7 +721,7 @@ extern const struct cpu_features *__get_cpu_features (void)
+ #define reg_AVX512_VP2INTERSECT edx
+ #define reg_INDEX_7_EDX_9 edx
+ #define reg_MD_CLEAR edx
+-#define reg_INDEX_7_EDX_11 edx
++#define reg_RTM_ALWAYS_ABORT edx
+ #define reg_INDEX_7_EDX_12 edx
+ #define reg_INDEX_7_EDX_13 edx
+ #define reg_SERIALIZE edx
+@@ -804,6 +804,7 @@ extern const struct cpu_features *__get_cpu_features (void)
+ #define bit_arch_Prefer_FSRM (1u << 13)
+ #define bit_arch_Prefer_No_AVX512 (1u << 14)
+ #define bit_arch_MathVec_Prefer_No_AVX512 (1u << 15)
++#define bit_arch_Prefer_AVX2_STRCMP (1u << 16)
+
+ #define index_arch_Fast_Rep_String PREFERRED_FEATURE_INDEX_1
+ #define index_arch_Fast_Copy_Backward PREFERRED_FEATURE_INDEX_1
+@@ -821,6 +822,7 @@ extern const struct cpu_features *__get_cpu_features (void)
+ #define index_arch_Prefer_No_AVX512 PREFERRED_FEATURE_INDEX_1
+ #define index_arch_MathVec_Prefer_No_AVX512 PREFERRED_FEATURE_INDEX_1
+ #define index_arch_Prefer_FSRM PREFERRED_FEATURE_INDEX_1
++#define index_arch_Prefer_AVX2_STRCMP PREFERRED_FEATURE_INDEX_1
+
+ /* XCR0 Feature flags. */
+ #define bit_XMM_state (1u << 1)
+diff --git a/sysdeps/x86/cpu-tunables.c b/sysdeps/x86/cpu-tunables.c
+index 588bbf9448..b251a91af3 100644
+--- a/sysdeps/x86/cpu-tunables.c
++++ b/sysdeps/x86/cpu-tunables.c
+@@ -238,6 +238,8 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
+ CHECK_GLIBC_IFUNC_PREFERRED_BOTH (n, cpu_features,
+ Fast_Copy_Backward,
+ disable, 18);
++ CHECK_GLIBC_IFUNC_PREFERRED_NEED_BOTH
++ (n, cpu_features, Prefer_AVX2_STRCMP, AVX2, disable, 18);
+ }
+ break;
+ case 19:
+diff --git a/sysdeps/x86/dl-cet.c b/sysdeps/x86/dl-cet.c
+index 03572f7af6..3cc54a8d53 100644
+--- a/sysdeps/x86/dl-cet.c
++++ b/sysdeps/x86/dl-cet.c
+@@ -47,7 +47,10 @@ dl_cet_check (struct link_map *m, const char *program)
+ /* No legacy object check if both IBT and SHSTK are always on. */
+ if (enable_ibt_type == cet_always_on
+ && enable_shstk_type == cet_always_on)
+- return;
++ {
++ THREAD_SETMEM (THREAD_SELF, header.feature_1, GL(dl_x86_feature_1));
++ return;
++ }
+
+ /* Check if IBT is enabled by kernel. */
+ bool ibt_enabled
+diff --git a/sysdeps/x86/dl-prop.h b/sysdeps/x86/dl-prop.h
+index 89911e19e2..4eb3b85a7b 100644
+--- a/sysdeps/x86/dl-prop.h
++++ b/sysdeps/x86/dl-prop.h
+@@ -145,15 +145,15 @@ _dl_process_cet_property_note (struct link_map *l,
+ }
+
+ static inline void __attribute__ ((unused))
+-_dl_process_pt_note (struct link_map *l, const ElfW(Phdr) *ph)
++_dl_process_pt_note (struct link_map *l, int fd, const ElfW(Phdr) *ph)
+ {
+ const ElfW(Nhdr) *note = (const void *) (ph->p_vaddr + l->l_addr);
+ _dl_process_cet_property_note (l, note, ph->p_memsz, ph->p_align);
+ }
+
+ static inline int __attribute__ ((always_inline))
+-_dl_process_gnu_property (struct link_map *l, uint32_t type, uint32_t datasz,
+- void *data)
++_dl_process_gnu_property (struct link_map *l, int fd, uint32_t type,
++ uint32_t datasz, void *data)
+ {
+ return 0;
+ }
+diff --git a/sysdeps/x86/tst-get-cpu-features.c b/sysdeps/x86/tst-get-cpu-features.c
+index 080c58e70b..527de3b5d9 100644
+--- a/sysdeps/x86/tst-get-cpu-features.c
++++ b/sysdeps/x86/tst-get-cpu-features.c
+@@ -183,6 +183,7 @@ do_test (void)
+ CHECK_CPU_FEATURE (FSRM);
+ CHECK_CPU_FEATURE (AVX512_VP2INTERSECT);
+ CHECK_CPU_FEATURE (MD_CLEAR);
++ CHECK_CPU_FEATURE (RTM_ALWAYS_ABORT);
+ CHECK_CPU_FEATURE (SERIALIZE);
+ CHECK_CPU_FEATURE (HYBRID);
+ CHECK_CPU_FEATURE (TSXLDTRK);
+@@ -336,6 +337,7 @@ do_test (void)
+ CHECK_CPU_FEATURE_USABLE (FSRM);
+ CHECK_CPU_FEATURE_USABLE (AVX512_VP2INTERSECT);
+ CHECK_CPU_FEATURE_USABLE (MD_CLEAR);
++ CHECK_CPU_FEATURE_USABLE (RTM_ALWAYS_ABORT);
+ CHECK_CPU_FEATURE_USABLE (SERIALIZE);
+ CHECK_CPU_FEATURE_USABLE (HYBRID);
+ CHECK_CPU_FEATURE_USABLE (TSXLDTRK);
+diff --git a/sysdeps/x86/tst-memchr-rtm.c b/sysdeps/x86/tst-memchr-rtm.c
+new file mode 100644
+index 0000000000..e47494011e
+--- /dev/null
++++ b/sysdeps/x86/tst-memchr-rtm.c
+@@ -0,0 +1,54 @@
++/* Test case for memchr inside a transactionally executing RTM region.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <tst-string-rtm.h>
++
++#define LOOP 3000
++#define STRING_SIZE 1024
++char string1[STRING_SIZE];
++
++__attribute__ ((noinline, noclone))
++static int
++prepare (void)
++{
++ memset (string1, 'a', STRING_SIZE);
++ string1[100] = 'c';
++ string1[STRING_SIZE - 100] = 'c';
++ char *p = memchr (string1, 'c', STRING_SIZE);
++ if (p == &string1[100])
++ return EXIT_SUCCESS;
++ else
++ return EXIT_FAILURE;
++}
++
++__attribute__ ((noinline, noclone))
++static int
++function (void)
++{
++ char *p = memchr (string1, 'c', STRING_SIZE);
++ if (p == &string1[100])
++ return 0;
++ else
++ return 1;
++}
++
++static int
++do_test (void)
++{
++ return do_test_1 ("memchr", LOOP, prepare, function);
++}
+diff --git a/sysdeps/x86/tst-memcmp-rtm.c b/sysdeps/x86/tst-memcmp-rtm.c
+new file mode 100644
+index 0000000000..e4c8a623bb
+--- /dev/null
++++ b/sysdeps/x86/tst-memcmp-rtm.c
+@@ -0,0 +1,52 @@
++/* Test case for memcmp inside a transactionally executing RTM region.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <tst-string-rtm.h>
++
++#define LOOP 3000
++#define STRING_SIZE 1024
++char string1[STRING_SIZE];
++char string2[STRING_SIZE];
++
++__attribute__ ((noinline, noclone))
++static int
++prepare (void)
++{
++ memset (string1, 'a', STRING_SIZE);
++ memset (string2, 'a', STRING_SIZE);
++ if (memcmp (string1, string2, STRING_SIZE) == 0)
++ return EXIT_SUCCESS;
++ else
++ return EXIT_FAILURE;
++}
++
++__attribute__ ((noinline, noclone))
++static int
++function (void)
++{
++ if (memcmp (string1, string2, STRING_SIZE) == 0)
++ return 0;
++ else
++ return 1;
++}
++
++static int
++do_test (void)
++{
++ return do_test_1 ("memcmp", LOOP, prepare, function);
++}
+diff --git a/sysdeps/x86/tst-memmove-rtm.c b/sysdeps/x86/tst-memmove-rtm.c
+new file mode 100644
+index 0000000000..4bf97ef1e3
+--- /dev/null
++++ b/sysdeps/x86/tst-memmove-rtm.c
+@@ -0,0 +1,53 @@
++/* Test case for memmove inside a transactionally executing RTM region.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <tst-string-rtm.h>
++
++#define LOOP 3000
++#define STRING_SIZE 1024
++char string1[STRING_SIZE];
++char string2[STRING_SIZE];
++
++__attribute__ ((noinline, noclone))
++static int
++prepare (void)
++{
++ memset (string1, 'a', STRING_SIZE);
++ if (memmove (string2, string1, STRING_SIZE) == string2
++ && memcmp (string2, string1, STRING_SIZE) == 0)
++ return EXIT_SUCCESS;
++ else
++ return EXIT_FAILURE;
++}
++
++__attribute__ ((noinline, noclone))
++static int
++function (void)
++{
++ if (memmove (string2, string1, STRING_SIZE) == string2
++ && memcmp (string2, string1, STRING_SIZE) == 0)
++ return 0;
++ else
++ return 1;
++}
++
++static int
++do_test (void)
++{
++ return do_test_1 ("memmove", LOOP, prepare, function);
++}
+diff --git a/sysdeps/x86/tst-memrchr-rtm.c b/sysdeps/x86/tst-memrchr-rtm.c
+new file mode 100644
+index 0000000000..a57a5a8eb9
+--- /dev/null
++++ b/sysdeps/x86/tst-memrchr-rtm.c
+@@ -0,0 +1,54 @@
++/* Test case for memrchr inside a transactionally executing RTM region.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <tst-string-rtm.h>
++
++#define LOOP 3000
++#define STRING_SIZE 1024
++char string1[STRING_SIZE];
++
++__attribute__ ((noinline, noclone))
++static int
++prepare (void)
++{
++ memset (string1, 'a', STRING_SIZE);
++ string1[100] = 'c';
++ string1[STRING_SIZE - 100] = 'c';
++ char *p = memrchr (string1, 'c', STRING_SIZE);
++ if (p == &string1[STRING_SIZE - 100])
++ return EXIT_SUCCESS;
++ else
++ return EXIT_FAILURE;
++}
++
++__attribute__ ((noinline, noclone))
++static int
++function (void)
++{
++ char *p = memrchr (string1, 'c', STRING_SIZE);
++ if (p == &string1[STRING_SIZE - 100])
++ return 0;
++ else
++ return 1;
++}
++
++static int
++do_test (void)
++{
++ return do_test_1 ("memrchr", LOOP, prepare, function);
++}
+diff --git a/sysdeps/x86/tst-memset-rtm.c b/sysdeps/x86/tst-memset-rtm.c
+new file mode 100644
+index 0000000000..bf343a4dad
+--- /dev/null
++++ b/sysdeps/x86/tst-memset-rtm.c
+@@ -0,0 +1,45 @@
++/* Test case for memset inside a transactionally executing RTM region.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <tst-string-rtm.h>
++
++#define LOOP 3000
++#define STRING_SIZE 1024
++char string1[STRING_SIZE];
++
++__attribute__ ((noinline, noclone))
++static int
++prepare (void)
++{
++ memset (string1, 'a', STRING_SIZE);
++ return EXIT_SUCCESS;
++}
++
++__attribute__ ((noinline, noclone))
++static int
++function (void)
++{
++ memset (string1, 'a', STRING_SIZE);
++ return 0;
++}
++
++static int
++do_test (void)
++{
++ return do_test_1 ("memset", LOOP, prepare, function);
++}
+diff --git a/sysdeps/x86/tst-setjmp-cet.c b/sysdeps/x86/tst-setjmp-cet.c
+new file mode 100644
+index 0000000000..42c795d2a8
+--- /dev/null
++++ b/sysdeps/x86/tst-setjmp-cet.c
+@@ -0,0 +1 @@
++#include <setjmp/tst-setjmp.c>
+diff --git a/sysdeps/x86/tst-strchr-rtm.c b/sysdeps/x86/tst-strchr-rtm.c
+new file mode 100644
+index 0000000000..a82e29c072
+--- /dev/null
++++ b/sysdeps/x86/tst-strchr-rtm.c
+@@ -0,0 +1,54 @@
++/* Test case for strchr inside a transactionally executing RTM region.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <tst-string-rtm.h>
++
++#define LOOP 3000
++#define STRING_SIZE 1024
++char string1[STRING_SIZE];
++
++__attribute__ ((noinline, noclone))
++static int
++prepare (void)
++{
++ memset (string1, 'a', STRING_SIZE - 1);
++ string1[100] = 'c';
++ string1[STRING_SIZE - 100] = 'c';
++ char *p = strchr (string1, 'c');
++ if (p == &string1[100])
++ return EXIT_SUCCESS;
++ else
++ return EXIT_FAILURE;
++}
++
++__attribute__ ((noinline, noclone))
++static int
++function (void)
++{
++ char *p = strchr (string1, 'c');
++ if (p == &string1[100])
++ return 0;
++ else
++ return 1;
++}
++
++static int
++do_test (void)
++{
++ return do_test_1 ("strchr", LOOP, prepare, function);
++}
+diff --git a/sysdeps/x86/tst-strcpy-rtm.c b/sysdeps/x86/tst-strcpy-rtm.c
+new file mode 100644
+index 0000000000..2b2a583fb4
+--- /dev/null
++++ b/sysdeps/x86/tst-strcpy-rtm.c
+@@ -0,0 +1,53 @@
++/* Test case for strcpy inside a transactionally executing RTM region.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <tst-string-rtm.h>
++
++#define LOOP 3000
++#define STRING_SIZE 1024
++char string1[STRING_SIZE];
++char string2[STRING_SIZE];
++
++__attribute__ ((noinline, noclone))
++static int
++prepare (void)
++{
++ memset (string1, 'a', STRING_SIZE - 1);
++ if (strcpy (string2, string1) == string2
++ && strcmp (string2, string1) == 0)
++ return EXIT_SUCCESS;
++ else
++ return EXIT_FAILURE;
++}
++
++__attribute__ ((noinline, noclone))
++static int
++function (void)
++{
++ if (strcpy (string2, string1) == string2
++ && strcmp (string2, string1) == 0)
++ return 0;
++ else
++ return 1;
++}
++
++static int
++do_test (void)
++{
++ return do_test_1 ("strcpy", LOOP, prepare, function);
++}
+diff --git a/sysdeps/x86/tst-string-rtm.h b/sysdeps/x86/tst-string-rtm.h
+new file mode 100644
+index 0000000000..6ed9eca017
+--- /dev/null
++++ b/sysdeps/x86/tst-string-rtm.h
+@@ -0,0 +1,72 @@
++/* Test string function in a transactionally executing RTM region.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <string.h>
++#include <x86intrin.h>
++#include <cpu-features.h>
++#include <support/check.h>
++#include <support/test-driver.h>
++
++static int
++do_test_1 (const char *name, unsigned int loop, int (*prepare) (void),
++ int (*function) (void))
++{
++ if (!CPU_FEATURE_USABLE (RTM))
++ return EXIT_UNSUPPORTED;
++
++ int status = prepare ();
++ if (status != EXIT_SUCCESS)
++ return status;
++
++ unsigned int i;
++ unsigned int naborts = 0;
++ unsigned int failed = 0;
++ for (i = 0; i < loop; i++)
++ {
++ failed |= function ();
++ if (_xbegin() == _XBEGIN_STARTED)
++ {
++ failed |= function ();
++ _xend();
++ }
++ else
++ {
++ failed |= function ();
++ ++naborts;
++ }
++ }
++
++ if (failed)
++ FAIL_EXIT1 ("%s() failed", name);
++
++ if (naborts)
++ {
++ /* NB: Low single digit (<= 5%) noise-level aborts are normal for
++ TSX. */
++ double rate = 100 * ((double) naborts) / ((double) loop);
++ if (rate > 5)
++ FAIL_EXIT1 ("TSX abort rate: %.2f%% (%d out of %d)",
++ rate, naborts, loop);
++ }
++
++ return EXIT_SUCCESS;
++}
++
++static int do_test (void);
++
++#include <support/test-driver.c>
+diff --git a/sysdeps/x86/tst-strlen-rtm.c b/sysdeps/x86/tst-strlen-rtm.c
+new file mode 100644
+index 0000000000..0dcf14db87
+--- /dev/null
++++ b/sysdeps/x86/tst-strlen-rtm.c
+@@ -0,0 +1,53 @@
++/* Test case for strlen inside a transactionally executing RTM region.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <tst-string-rtm.h>
++
++#define LOOP 3000
++#define STRING_SIZE 1024
++char string1[STRING_SIZE];
++
++__attribute__ ((noinline, noclone))
++static int
++prepare (void)
++{
++ memset (string1, 'a', STRING_SIZE - 1);
++ string1[STRING_SIZE - 100] = '\0';
++ size_t len = strlen (string1);
++ if (len == STRING_SIZE - 100)
++ return EXIT_SUCCESS;
++ else
++ return EXIT_FAILURE;
++}
++
++__attribute__ ((noinline, noclone))
++static int
++function (void)
++{
++ size_t len = strlen (string1);
++ if (len == STRING_SIZE - 100)
++ return 0;
++ else
++ return 1;
++}
++
++static int
++do_test (void)
++{
++ return do_test_1 ("strlen", LOOP, prepare, function);
++}
+diff --git a/sysdeps/x86/tst-strncmp-rtm.c b/sysdeps/x86/tst-strncmp-rtm.c
+new file mode 100644
+index 0000000000..236ad951b5
+--- /dev/null
++++ b/sysdeps/x86/tst-strncmp-rtm.c
+@@ -0,0 +1,52 @@
++/* Test case for strncmp inside a transactionally executing RTM region.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <tst-string-rtm.h>
++
++#define LOOP 3000
++#define STRING_SIZE 1024
++char string1[STRING_SIZE];
++char string2[STRING_SIZE];
++
++__attribute__ ((noinline, noclone))
++static int
++prepare (void)
++{
++ memset (string1, 'a', STRING_SIZE - 1);
++ memset (string2, 'a', STRING_SIZE - 1);
++ if (strncmp (string1, string2, STRING_SIZE) == 0)
++ return EXIT_SUCCESS;
++ else
++ return EXIT_FAILURE;
++}
++
++__attribute__ ((noinline, noclone))
++static int
++function (void)
++{
++ if (strncmp (string1, string2, STRING_SIZE) == 0)
++ return 0;
++ else
++ return 1;
++}
++
++static int
++do_test (void)
++{
++ return do_test_1 ("strncmp", LOOP, prepare, function);
++}
+diff --git a/sysdeps/x86/tst-strrchr-rtm.c b/sysdeps/x86/tst-strrchr-rtm.c
+new file mode 100644
+index 0000000000..e32bfaf5f5
+--- /dev/null
++++ b/sysdeps/x86/tst-strrchr-rtm.c
+@@ -0,0 +1,53 @@
++/* Test case for strrchr inside a transactionally executing RTM region.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <tst-string-rtm.h>
++
++#define LOOP 3000
++#define STRING_SIZE 1024
++char string1[STRING_SIZE];
++
++__attribute__ ((noinline, noclone))
++static int
++prepare (void)
++{
++ memset (string1, 'a', STRING_SIZE - 1);
++ string1[STRING_SIZE - 100] = 'c';
++ char *p = strrchr (string1, 'c');
++ if (p == &string1[STRING_SIZE - 100])
++ return EXIT_SUCCESS;
++ else
++ return EXIT_FAILURE;
++}
++
++__attribute__ ((noinline, noclone))
++static int
++function (void)
++{
++ char *p = strrchr (string1, 'c');
++ if (p == &string1[STRING_SIZE - 100])
++ return 0;
++ else
++ return 1;
++}
++
++static int
++do_test (void)
++{
++ return do_test_1 ("strrchr", LOOP, prepare, function);
++}
+diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
+index 42b97c5cc7..020044da80 100644
+--- a/sysdeps/x86_64/Makefile
++++ b/sysdeps/x86_64/Makefile
+@@ -20,6 +20,8 @@ endif
+ ifeq ($(subdir),string)
+ sysdep_routines += strcasecmp_l-nonascii strncase_l-nonascii
+ gen-as-const-headers += locale-defines.sym
++tests += \
++ tst-rsi-strlen
+ endif
+
+ ifeq ($(subdir),elf)
+@@ -150,6 +152,11 @@ ifeq ($(subdir),csu)
+ gen-as-const-headers += tlsdesc.sym rtld-offsets.sym
+ endif
+
++ifeq ($(subdir),wcsmbs)
++tests += \
++ tst-rsi-wcslen
++endif
++
+ $(objpfx)x86_64/tst-x86_64mod-1.os: $(objpfx)tst-x86_64mod-1.os
+ $(make-target-directory)
+ rm -f $@
+diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure
+old mode 100644
+new mode 100755
+index 84f82c2406..fc1840e23f
+--- a/sysdeps/x86_64/configure
++++ b/sysdeps/x86_64/configure
+@@ -107,39 +107,6 @@ if test x"$build_mathvec" = xnotset; then
+ build_mathvec=yes
+ fi
+
+-if test "$static_pie" = yes; then
+- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for linker static PIE support" >&5
+-$as_echo_n "checking for linker static PIE support... " >&6; }
+-if ${libc_cv_ld_static_pie+:} false; then :
+- $as_echo_n "(cached) " >&6
+-else
+- cat > conftest.s <<\EOF
+- .text
+- .global _start
+- .weak foo
+-_start:
+- leaq foo(%rip), %rax
+-EOF
+- libc_cv_pie_option="-Wl,-pie"
+- if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&5'
+- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+- (eval $ac_try) 2>&5
+- ac_status=$?
+- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+- test $ac_status = 0; }; }; then
+- libc_cv_ld_static_pie=yes
+- else
+- libc_cv_ld_static_pie=no
+- fi
+-rm -f conftest*
+-fi
+-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libc_cv_ld_static_pie" >&5
+-$as_echo "$libc_cv_ld_static_pie" >&6; }
+- if test "$libc_cv_ld_static_pie" != yes; then
+- as_fn_error $? "linker support for static PIE needed" "$LINENO" 5
+- fi
+-fi
+-
+ $as_echo "#define PI_STATIC_AND_HIDDEN 1" >>confdefs.h
+
+
+diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac
+index cdaba0c075..611a7d9ba3 100644
+--- a/sysdeps/x86_64/configure.ac
++++ b/sysdeps/x86_64/configure.ac
+@@ -53,31 +53,6 @@ if test x"$build_mathvec" = xnotset; then
+ build_mathvec=yes
+ fi
+
+-dnl Check if linker supports static PIE with the fix for
+-dnl
+-dnl https://sourceware.org/bugzilla/show_bug.cgi?id=21782
+-dnl
+-if test "$static_pie" = yes; then
+- AC_CACHE_CHECK(for linker static PIE support, libc_cv_ld_static_pie, [dnl
+-cat > conftest.s <<\EOF
+- .text
+- .global _start
+- .weak foo
+-_start:
+- leaq foo(%rip), %rax
+-EOF
+- libc_cv_pie_option="-Wl,-pie"
+- if AC_TRY_COMMAND(${CC-cc} $CFLAGS $CPPFLAGS $LDFLAGS -nostartfiles -nostdlib $no_ssp $libc_cv_pie_option -o conftest conftest.s 1>&AS_MESSAGE_LOG_FD); then
+- libc_cv_ld_static_pie=yes
+- else
+- libc_cv_ld_static_pie=no
+- fi
+-rm -f conftest*])
+- if test "$libc_cv_ld_static_pie" != yes; then
+- AC_MSG_ERROR([linker support for static PIE needed])
+- fi
+-fi
+-
+ dnl It is always possible to access static and hidden symbols in an
+ dnl position independent way.
+ AC_DEFINE(PI_STATIC_AND_HIDDEN)
+diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h
+index ca73d8fef9..363a749cb2 100644
+--- a/sysdeps/x86_64/dl-machine.h
++++ b/sysdeps/x86_64/dl-machine.h
+@@ -315,16 +315,22 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc,
+ {
+ # ifndef RTLD_BOOTSTRAP
+ if (sym_map != map
+- && sym_map->l_type != lt_executable
+ && !sym_map->l_relocated)
+ {
+ const char *strtab
+ = (const char *) D_PTR (map, l_info[DT_STRTAB]);
+- _dl_error_printf ("\
++ if (sym_map->l_type == lt_executable)
++ _dl_fatal_printf ("\
++%s: IFUNC symbol '%s' referenced in '%s' is defined in the executable \
++and creates an unsatisfiable circular dependency.\n",
++ RTLD_PROGNAME, strtab + refsym->st_name,
++ map->l_name);
++ else
++ _dl_error_printf ("\
+ %s: Relink `%s' with `%s' for IFUNC symbol `%s'\n",
+- RTLD_PROGNAME, map->l_name,
+- sym_map->l_name,
+- strtab + refsym->st_name);
++ RTLD_PROGNAME, map->l_name,
++ sym_map->l_name,
++ strtab + refsym->st_name);
+ }
+ # endif
+ value = ((ElfW(Addr) (*) (void)) value) ();
+diff --git a/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h
+index 7659758972..e5fd5ac9cb 100644
+--- a/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h
++++ b/sysdeps/x86_64/fpu/multiarch/ifunc-fma4.h
+@@ -32,7 +32,7 @@ IFUNC_SELECTOR (void)
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX2))
+ return OPTIMIZE (fma);
+
+- if (CPU_FEATURE_USABLE_P (cpu_features, FMA))
++ if (CPU_FEATURE_USABLE_P (cpu_features, FMA4))
+ return OPTIMIZE (fma4);
+
+ return OPTIMIZE (sse2);
+diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
+index a5c879d2af..070e5ef90b 100644
+--- a/sysdeps/x86_64/memchr.S
++++ b/sysdeps/x86_64/memchr.S
+@@ -21,9 +21,11 @@
+ #ifdef USE_AS_WMEMCHR
+ # define MEMCHR wmemchr
+ # define PCMPEQ pcmpeqd
++# define CHAR_PER_VEC 4
+ #else
+ # define MEMCHR memchr
+ # define PCMPEQ pcmpeqb
++# define CHAR_PER_VEC 16
+ #endif
+
+ /* fast SSE2 version with using pmaxub and 64 byte loop */
+@@ -33,15 +35,14 @@ ENTRY(MEMCHR)
+ movd %esi, %xmm1
+ mov %edi, %ecx
+
++#ifdef __ILP32__
++ /* Clear the upper 32 bits. */
++ movl %edx, %edx
++#endif
+ #ifdef USE_AS_WMEMCHR
+ test %RDX_LP, %RDX_LP
+ jz L(return_null)
+- shl $2, %RDX_LP
+ #else
+-# ifdef __ILP32__
+- /* Clear the upper 32 bits. */
+- movl %edx, %edx
+-# endif
+ punpcklbw %xmm1, %xmm1
+ test %RDX_LP, %RDX_LP
+ jz L(return_null)
+@@ -60,13 +61,16 @@ ENTRY(MEMCHR)
+ test %eax, %eax
+
+ jnz L(matches_1)
+- sub $16, %rdx
++ sub $CHAR_PER_VEC, %rdx
+ jbe L(return_null)
+ add $16, %rdi
+ and $15, %ecx
+ and $-16, %rdi
++#ifdef USE_AS_WMEMCHR
++ shr $2, %ecx
++#endif
+ add %rcx, %rdx
+- sub $64, %rdx
++ sub $(CHAR_PER_VEC * 4), %rdx
+ jbe L(exit_loop)
+ jmp L(loop_prolog)
+
+@@ -77,16 +81,21 @@ L(crosscache):
+ movdqa (%rdi), %xmm0
+
+ PCMPEQ %xmm1, %xmm0
+-/* Check if there is a match. */
++ /* Check if there is a match. */
+ pmovmskb %xmm0, %eax
+-/* Remove the leading bytes. */
++ /* Remove the leading bytes. */
+ sar %cl, %eax
+ test %eax, %eax
+ je L(unaligned_no_match)
+-/* Check which byte is a match. */
++ /* Check which byte is a match. */
+ bsf %eax, %eax
+-
++#ifdef USE_AS_WMEMCHR
++ mov %eax, %esi
++ shr $2, %esi
++ sub %rsi, %rdx
++#else
+ sub %rax, %rdx
++#endif
+ jbe L(return_null)
+ add %rdi, %rax
+ add %rcx, %rax
+@@ -94,15 +103,18 @@ L(crosscache):
+
+ .p2align 4
+ L(unaligned_no_match):
+- /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using
++ /* "rcx" is less than 16. Calculate "rdx + rcx - 16" by using
+ "rdx - (16 - rcx)" instead of "(rdx + rcx) - 16" to void
+ possible addition overflow. */
+ neg %rcx
+ add $16, %rcx
++#ifdef USE_AS_WMEMCHR
++ shr $2, %ecx
++#endif
+ sub %rcx, %rdx
+ jbe L(return_null)
+ add $16, %rdi
+- sub $64, %rdx
++ sub $(CHAR_PER_VEC * 4), %rdx
+ jbe L(exit_loop)
+
+ .p2align 4
+@@ -135,7 +147,7 @@ L(loop_prolog):
+ test $0x3f, %rdi
+ jz L(align64_loop)
+
+- sub $64, %rdx
++ sub $(CHAR_PER_VEC * 4), %rdx
+ jbe L(exit_loop)
+
+ movdqa (%rdi), %xmm0
+@@ -167,11 +179,14 @@ L(loop_prolog):
+ mov %rdi, %rcx
+ and $-64, %rdi
+ and $63, %ecx
++#ifdef USE_AS_WMEMCHR
++ shr $2, %ecx
++#endif
+ add %rcx, %rdx
+
+ .p2align 4
+ L(align64_loop):
+- sub $64, %rdx
++ sub $(CHAR_PER_VEC * 4), %rdx
+ jbe L(exit_loop)
+ movdqa (%rdi), %xmm0
+ movdqa 16(%rdi), %xmm2
+@@ -218,7 +233,7 @@ L(align64_loop):
+
+ .p2align 4
+ L(exit_loop):
+- add $32, %edx
++ add $(CHAR_PER_VEC * 2), %edx
+ jle L(exit_loop_32)
+
+ movdqa (%rdi), %xmm0
+@@ -238,7 +253,7 @@ L(exit_loop):
+ pmovmskb %xmm3, %eax
+ test %eax, %eax
+ jnz L(matches32_1)
+- sub $16, %edx
++ sub $CHAR_PER_VEC, %edx
+ jle L(return_null)
+
+ PCMPEQ 48(%rdi), %xmm1
+@@ -250,13 +265,13 @@ L(exit_loop):
+
+ .p2align 4
+ L(exit_loop_32):
+- add $32, %edx
++ add $(CHAR_PER_VEC * 2), %edx
+ movdqa (%rdi), %xmm0
+ PCMPEQ %xmm1, %xmm0
+ pmovmskb %xmm0, %eax
+ test %eax, %eax
+ jnz L(matches_1)
+- sub $16, %edx
++ sub $CHAR_PER_VEC, %edx
+ jbe L(return_null)
+
+ PCMPEQ 16(%rdi), %xmm1
+@@ -293,7 +308,13 @@ L(matches32):
+ .p2align 4
+ L(matches_1):
+ bsf %eax, %eax
++#ifdef USE_AS_WMEMCHR
++ mov %eax, %esi
++ shr $2, %esi
++ sub %rsi, %rdx
++#else
+ sub %rax, %rdx
++#endif
+ jbe L(return_null)
+ add %rdi, %rax
+ ret
+@@ -301,7 +322,13 @@ L(matches_1):
+ .p2align 4
+ L(matches16_1):
+ bsf %eax, %eax
++#ifdef USE_AS_WMEMCHR
++ mov %eax, %esi
++ shr $2, %esi
++ sub %rsi, %rdx
++#else
+ sub %rax, %rdx
++#endif
+ jbe L(return_null)
+ lea 16(%rdi, %rax), %rax
+ ret
+@@ -309,7 +336,13 @@ L(matches16_1):
+ .p2align 4
+ L(matches32_1):
+ bsf %eax, %eax
++#ifdef USE_AS_WMEMCHR
++ mov %eax, %esi
++ shr $2, %esi
++ sub %rsi, %rdx
++#else
+ sub %rax, %rdx
++#endif
+ jbe L(return_null)
+ lea 32(%rdi, %rax), %rax
+ ret
+@@ -317,7 +350,13 @@ L(matches32_1):
+ .p2align 4
+ L(matches48_1):
+ bsf %eax, %eax
++#ifdef USE_AS_WMEMCHR
++ mov %eax, %esi
++ shr $2, %esi
++ sub %rsi, %rdx
++#else
+ sub %rax, %rdx
++#endif
+ jbe L(return_null)
+ lea 48(%rdi, %rax), %rax
+ ret
+diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
+index 395e432c09..da1446d731 100644
+--- a/sysdeps/x86_64/multiarch/Makefile
++++ b/sysdeps/x86_64/multiarch/Makefile
+@@ -43,7 +43,45 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c \
+ memmove-avx512-unaligned-erms \
+ memset-sse2-unaligned-erms \
+ memset-avx2-unaligned-erms \
+- memset-avx512-unaligned-erms
++ memset-avx512-unaligned-erms \
++ memchr-avx2-rtm \
++ memcmp-avx2-movbe-rtm \
++ memmove-avx-unaligned-erms-rtm \
++ memrchr-avx2-rtm \
++ memset-avx2-unaligned-erms-rtm \
++ rawmemchr-avx2-rtm \
++ strchr-avx2-rtm \
++ strcmp-avx2-rtm \
++ strchrnul-avx2-rtm \
++ stpcpy-avx2-rtm \
++ stpncpy-avx2-rtm \
++ strcat-avx2-rtm \
++ strcpy-avx2-rtm \
++ strlen-avx2-rtm \
++ strncat-avx2-rtm \
++ strncmp-avx2-rtm \
++ strncpy-avx2-rtm \
++ strnlen-avx2-rtm \
++ strrchr-avx2-rtm \
++ memchr-evex \
++ memcmp-evex-movbe \
++ memmove-evex-unaligned-erms \
++ memrchr-evex \
++ memset-evex-unaligned-erms \
++ rawmemchr-evex \
++ stpcpy-evex \
++ stpncpy-evex \
++ strcat-evex \
++ strchr-evex \
++ strchrnul-evex \
++ strcmp-evex \
++ strcpy-evex \
++ strlen-evex \
++ strncat-evex \
++ strncmp-evex \
++ strncpy-evex \
++ strnlen-evex \
++ strrchr-evex
+ CFLAGS-varshift.c += -msse4
+ CFLAGS-strcspn-c.c += -msse4
+ CFLAGS-strpbrk-c.c += -msse4
+@@ -59,8 +97,24 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \
+ wcscpy-ssse3 wcscpy-c \
+ wcschr-sse2 wcschr-avx2 \
+ wcsrchr-sse2 wcsrchr-avx2 \
+- wcsnlen-sse4_1 wcsnlen-c \
+- wcslen-sse2 wcslen-avx2 wcsnlen-avx2
++ wcslen-sse2 wcslen-sse4_1 wcslen-avx2 \
++ wcsnlen-c wcsnlen-sse4_1 wcsnlen-avx2 \
++ wcschr-avx2-rtm \
++ wcscmp-avx2-rtm \
++ wcslen-avx2-rtm \
++ wcsncmp-avx2-rtm \
++ wcsnlen-avx2-rtm \
++ wcsrchr-avx2-rtm \
++ wmemchr-avx2-rtm \
++ wmemcmp-avx2-movbe-rtm \
++ wcschr-evex \
++ wcscmp-evex \
++ wcslen-evex \
++ wcsncmp-evex \
++ wcsnlen-evex \
++ wcsrchr-evex \
++ wmemchr-evex \
++ wmemcmp-evex-movbe
+ endif
+
+ ifeq ($(subdir),debug)
+diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
+index f4e311d470..f450c786f0 100644
+--- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
++++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
+@@ -21,16 +21,28 @@
+
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
+ static inline void *
+ IFUNC_SELECTOR (void)
+ {
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+- && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+- return OPTIMIZE (avx2);
++ {
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
++ && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
++ return OPTIMIZE (evex);
++
++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
++ return OPTIMIZE (avx2_rtm);
++
++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
++ return OPTIMIZE (avx2);
++ }
+
+ return OPTIMIZE (sse2);
+ }
+diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+index f93ec39d98..920e64241e 100644
+--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
++++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+@@ -43,6 +43,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, memchr,
+ CPU_FEATURE_USABLE (AVX2),
+ __memchr_avx2)
++ IFUNC_IMPL_ADD (array, i, memchr,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __memchr_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, memchr,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)
++ && CPU_FEATURE_USABLE (BMI2)),
++ __memchr_evex)
+ IFUNC_IMPL_ADD (array, i, memchr, 1, __memchr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/memcmp.c. */
+@@ -51,6 +60,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (MOVBE)),
+ __memcmp_avx2_movbe)
++ IFUNC_IMPL_ADD (array, i, memcmp,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (MOVBE)
++ && CPU_FEATURE_USABLE (RTM)),
++ __memcmp_avx2_movbe_rtm)
++ IFUNC_IMPL_ADD (array, i, memcmp,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)
++ && CPU_FEATURE_USABLE (MOVBE)),
++ __memcmp_evex_movbe)
+ IFUNC_IMPL_ADD (array, i, memcmp, CPU_FEATURE_USABLE (SSE4_1),
+ __memcmp_sse4_1)
+ IFUNC_IMPL_ADD (array, i, memcmp, CPU_FEATURE_USABLE (SSSE3),
+@@ -64,10 +83,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ CPU_FEATURE_USABLE (AVX512F),
+ __memmove_chk_avx512_no_vzeroupper)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+- CPU_FEATURE_USABLE (AVX512F),
++ CPU_FEATURE_USABLE (AVX512VL),
+ __memmove_chk_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+- CPU_FEATURE_USABLE (AVX512F),
++ CPU_FEATURE_USABLE (AVX512VL),
+ __memmove_chk_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ CPU_FEATURE_USABLE (AVX),
+@@ -75,6 +94,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ CPU_FEATURE_USABLE (AVX),
+ __memmove_chk_avx_unaligned_erms)
++ IFUNC_IMPL_ADD (array, i, __memmove_chk,
++ (CPU_FEATURE_USABLE (AVX)
++ && CPU_FEATURE_USABLE (RTM)),
++ __memmove_chk_avx_unaligned_rtm)
++ IFUNC_IMPL_ADD (array, i, __memmove_chk,
++ (CPU_FEATURE_USABLE (AVX)
++ && CPU_FEATURE_USABLE (RTM)),
++ __memmove_chk_avx_unaligned_erms_rtm)
++ IFUNC_IMPL_ADD (array, i, __memmove_chk,
++ CPU_FEATURE_USABLE (AVX512VL),
++ __memmove_chk_evex_unaligned)
++ IFUNC_IMPL_ADD (array, i, __memmove_chk,
++ CPU_FEATURE_USABLE (AVX512VL),
++ __memmove_chk_evex_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memmove_chk,
+ CPU_FEATURE_USABLE (SSSE3),
+ __memmove_chk_ssse3_back)
+@@ -97,14 +130,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, memmove,
+ CPU_FEATURE_USABLE (AVX),
+ __memmove_avx_unaligned_erms)
++ IFUNC_IMPL_ADD (array, i, memmove,
++ (CPU_FEATURE_USABLE (AVX)
++ && CPU_FEATURE_USABLE (RTM)),
++ __memmove_avx_unaligned_rtm)
++ IFUNC_IMPL_ADD (array, i, memmove,
++ (CPU_FEATURE_USABLE (AVX)
++ && CPU_FEATURE_USABLE (RTM)),
++ __memmove_avx_unaligned_erms_rtm)
++ IFUNC_IMPL_ADD (array, i, memmove,
++ CPU_FEATURE_USABLE (AVX512VL),
++ __memmove_evex_unaligned)
++ IFUNC_IMPL_ADD (array, i, memmove,
++ CPU_FEATURE_USABLE (AVX512VL),
++ __memmove_evex_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memmove,
+ CPU_FEATURE_USABLE (AVX512F),
+ __memmove_avx512_no_vzeroupper)
+ IFUNC_IMPL_ADD (array, i, memmove,
+- CPU_FEATURE_USABLE (AVX512F),
++ CPU_FEATURE_USABLE (AVX512VL),
+ __memmove_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, memmove,
+- CPU_FEATURE_USABLE (AVX512F),
++ CPU_FEATURE_USABLE (AVX512VL),
+ __memmove_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memmove, CPU_FEATURE_USABLE (SSSE3),
+ __memmove_ssse3_back)
+@@ -121,6 +168,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, memrchr,
+ CPU_FEATURE_USABLE (AVX2),
+ __memrchr_avx2)
++ IFUNC_IMPL_ADD (array, i, memrchr,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __memrchr_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, memrchr,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)),
++ __memrchr_evex)
++
+ IFUNC_IMPL_ADD (array, i, memrchr, 1, __memrchr_sse2))
+
+ #ifdef SHARED
+@@ -139,10 +195,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ CPU_FEATURE_USABLE (AVX2),
+ __memset_chk_avx2_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
+- CPU_FEATURE_USABLE (AVX512F),
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __memset_chk_avx2_unaligned_rtm)
++ IFUNC_IMPL_ADD (array, i, __memset_chk,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __memset_chk_avx2_unaligned_erms_rtm)
++ IFUNC_IMPL_ADD (array, i, __memset_chk,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)),
++ __memset_chk_evex_unaligned)
++ IFUNC_IMPL_ADD (array, i, __memset_chk,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)),
++ __memset_chk_evex_unaligned_erms)
++ IFUNC_IMPL_ADD (array, i, __memset_chk,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)),
+ __memset_chk_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
+- CPU_FEATURE_USABLE (AVX512F),
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)),
+ __memset_chk_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, __memset_chk,
+ CPU_FEATURE_USABLE (AVX512F),
+@@ -164,10 +238,28 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ CPU_FEATURE_USABLE (AVX2),
+ __memset_avx2_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memset,
+- CPU_FEATURE_USABLE (AVX512F),
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __memset_avx2_unaligned_rtm)
++ IFUNC_IMPL_ADD (array, i, memset,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __memset_avx2_unaligned_erms_rtm)
++ IFUNC_IMPL_ADD (array, i, memset,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)),
++ __memset_evex_unaligned)
++ IFUNC_IMPL_ADD (array, i, memset,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)),
++ __memset_evex_unaligned_erms)
++ IFUNC_IMPL_ADD (array, i, memset,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)),
+ __memset_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memset,
+- CPU_FEATURE_USABLE (AVX512F),
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)),
+ __memset_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, memset,
+ CPU_FEATURE_USABLE (AVX512F),
+@@ -179,20 +271,51 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, rawmemchr,
+ CPU_FEATURE_USABLE (AVX2),
+ __rawmemchr_avx2)
++ IFUNC_IMPL_ADD (array, i, rawmemchr,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __rawmemchr_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, rawmemchr,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)
++ && CPU_FEATURE_USABLE (BMI2)),
++ __rawmemchr_evex)
+ IFUNC_IMPL_ADD (array, i, rawmemchr, 1, __rawmemchr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/strlen.c. */
+ IFUNC_IMPL (i, name, strlen,
+ IFUNC_IMPL_ADD (array, i, strlen,
+- CPU_FEATURE_USABLE (AVX2),
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (BMI2)),
+ __strlen_avx2)
++ IFUNC_IMPL_ADD (array, i, strlen,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (BMI2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __strlen_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, strlen,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)
++ && CPU_FEATURE_USABLE (BMI2)),
++ __strlen_evex)
+ IFUNC_IMPL_ADD (array, i, strlen, 1, __strlen_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/strnlen.c. */
+ IFUNC_IMPL (i, name, strnlen,
+ IFUNC_IMPL_ADD (array, i, strnlen,
+- CPU_FEATURE_USABLE (AVX2),
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (BMI2)),
+ __strnlen_avx2)
++ IFUNC_IMPL_ADD (array, i, strnlen,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (BMI2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __strnlen_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, strnlen,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)
++ && CPU_FEATURE_USABLE (BMI2)),
++ __strnlen_evex)
+ IFUNC_IMPL_ADD (array, i, strnlen, 1, __strnlen_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/stpncpy.c. */
+@@ -201,6 +324,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ __stpncpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, stpncpy, CPU_FEATURE_USABLE (AVX2),
+ __stpncpy_avx2)
++ IFUNC_IMPL_ADD (array, i, stpncpy,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __stpncpy_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, stpncpy,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)),
++ __stpncpy_evex)
+ IFUNC_IMPL_ADD (array, i, stpncpy, 1,
+ __stpncpy_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, stpncpy, 1, __stpncpy_sse2))
+@@ -211,6 +342,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ __stpcpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, stpcpy, CPU_FEATURE_USABLE (AVX2),
+ __stpcpy_avx2)
++ IFUNC_IMPL_ADD (array, i, stpcpy,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __stpcpy_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, stpcpy,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)),
++ __stpcpy_evex)
+ IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, stpcpy, 1, __stpcpy_sse2))
+
+@@ -245,6 +384,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL (i, name, strcat,
+ IFUNC_IMPL_ADD (array, i, strcat, CPU_FEATURE_USABLE (AVX2),
+ __strcat_avx2)
++ IFUNC_IMPL_ADD (array, i, strcat,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __strcat_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, strcat,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)),
++ __strcat_evex)
+ IFUNC_IMPL_ADD (array, i, strcat, CPU_FEATURE_USABLE (SSSE3),
+ __strcat_ssse3)
+ IFUNC_IMPL_ADD (array, i, strcat, 1, __strcat_sse2_unaligned)
+@@ -255,6 +402,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, strchr,
+ CPU_FEATURE_USABLE (AVX2),
+ __strchr_avx2)
++ IFUNC_IMPL_ADD (array, i, strchr,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __strchr_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, strchr,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)
++ && CPU_FEATURE_USABLE (BMI2)),
++ __strchr_evex)
+ IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2_no_bsf)
+ IFUNC_IMPL_ADD (array, i, strchr, 1, __strchr_sse2))
+
+@@ -263,6 +419,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, strchrnul,
+ CPU_FEATURE_USABLE (AVX2),
+ __strchrnul_avx2)
++ IFUNC_IMPL_ADD (array, i, strchrnul,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __strchrnul_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, strchrnul,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)
++ && CPU_FEATURE_USABLE (BMI2)),
++ __strchrnul_evex)
+ IFUNC_IMPL_ADD (array, i, strchrnul, 1, __strchrnul_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/strrchr.c. */
+@@ -270,6 +435,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, strrchr,
+ CPU_FEATURE_USABLE (AVX2),
+ __strrchr_avx2)
++ IFUNC_IMPL_ADD (array, i, strrchr,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __strrchr_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, strrchr,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)),
++ __strrchr_evex)
+ IFUNC_IMPL_ADD (array, i, strrchr, 1, __strrchr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/strcmp.c. */
+@@ -277,6 +450,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, strcmp,
+ CPU_FEATURE_USABLE (AVX2),
+ __strcmp_avx2)
++ IFUNC_IMPL_ADD (array, i, strcmp,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __strcmp_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, strcmp,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)
++ && CPU_FEATURE_USABLE (BMI2)),
++ __strcmp_evex)
+ IFUNC_IMPL_ADD (array, i, strcmp, CPU_FEATURE_USABLE (SSE4_2),
+ __strcmp_sse42)
+ IFUNC_IMPL_ADD (array, i, strcmp, CPU_FEATURE_USABLE (SSSE3),
+@@ -288,6 +470,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL (i, name, strcpy,
+ IFUNC_IMPL_ADD (array, i, strcpy, CPU_FEATURE_USABLE (AVX2),
+ __strcpy_avx2)
++ IFUNC_IMPL_ADD (array, i, strcpy,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __strcpy_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, strcpy,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)),
++ __strcpy_evex)
+ IFUNC_IMPL_ADD (array, i, strcpy, CPU_FEATURE_USABLE (SSSE3),
+ __strcpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, strcpy, 1, __strcpy_sse2_unaligned)
+@@ -331,6 +521,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL (i, name, strncat,
+ IFUNC_IMPL_ADD (array, i, strncat, CPU_FEATURE_USABLE (AVX2),
+ __strncat_avx2)
++ IFUNC_IMPL_ADD (array, i, strncat,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __strncat_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, strncat,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)),
++ __strncat_evex)
+ IFUNC_IMPL_ADD (array, i, strncat, CPU_FEATURE_USABLE (SSSE3),
+ __strncat_ssse3)
+ IFUNC_IMPL_ADD (array, i, strncat, 1,
+@@ -341,6 +539,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL (i, name, strncpy,
+ IFUNC_IMPL_ADD (array, i, strncpy, CPU_FEATURE_USABLE (AVX2),
+ __strncpy_avx2)
++ IFUNC_IMPL_ADD (array, i, strncpy,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __strncpy_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, strncpy,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)),
++ __strncpy_evex)
+ IFUNC_IMPL_ADD (array, i, strncpy, CPU_FEATURE_USABLE (SSSE3),
+ __strncpy_ssse3)
+ IFUNC_IMPL_ADD (array, i, strncpy, 1,
+@@ -370,6 +576,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, wcschr,
+ CPU_FEATURE_USABLE (AVX2),
+ __wcschr_avx2)
++ IFUNC_IMPL_ADD (array, i, wcschr,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __wcschr_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, wcschr,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)
++ && CPU_FEATURE_USABLE (BMI2)),
++ __wcschr_evex)
+ IFUNC_IMPL_ADD (array, i, wcschr, 1, __wcschr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wcsrchr.c. */
+@@ -377,6 +592,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, wcsrchr,
+ CPU_FEATURE_USABLE (AVX2),
+ __wcsrchr_avx2)
++ IFUNC_IMPL_ADD (array, i, wcsrchr,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __wcsrchr_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, wcsrchr,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)
++ && CPU_FEATURE_USABLE (BMI2)),
++ __wcsrchr_evex)
+ IFUNC_IMPL_ADD (array, i, wcsrchr, 1, __wcsrchr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wcscmp.c. */
+@@ -384,6 +608,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, wcscmp,
+ CPU_FEATURE_USABLE (AVX2),
+ __wcscmp_avx2)
++ IFUNC_IMPL_ADD (array, i, wcscmp,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __wcscmp_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, wcscmp,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)
++ && CPU_FEATURE_USABLE (BMI2)),
++ __wcscmp_evex)
+ IFUNC_IMPL_ADD (array, i, wcscmp, 1, __wcscmp_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wcsncmp.c. */
+@@ -391,6 +624,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, wcsncmp,
+ CPU_FEATURE_USABLE (AVX2),
+ __wcsncmp_avx2)
++ IFUNC_IMPL_ADD (array, i, wcsncmp,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __wcsncmp_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, wcsncmp,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)
++ && CPU_FEATURE_USABLE (BMI2)),
++ __wcsncmp_evex)
+ IFUNC_IMPL_ADD (array, i, wcsncmp, 1, __wcsncmp_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wcscpy.c. */
+@@ -402,15 +644,40 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ /* Support sysdeps/x86_64/multiarch/wcslen.c. */
+ IFUNC_IMPL (i, name, wcslen,
+ IFUNC_IMPL_ADD (array, i, wcslen,
+- CPU_FEATURE_USABLE (AVX2),
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (BMI2)),
+ __wcslen_avx2)
++ IFUNC_IMPL_ADD (array, i, wcslen,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (BMI2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __wcslen_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, wcslen,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)
++ && CPU_FEATURE_USABLE (BMI2)),
++ __wcslen_evex)
++ IFUNC_IMPL_ADD (array, i, wcslen,
++ CPU_FEATURE_USABLE (SSE4_1),
++ __wcslen_sse4_1)
+ IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wcsnlen.c. */
+ IFUNC_IMPL (i, name, wcsnlen,
+ IFUNC_IMPL_ADD (array, i, wcsnlen,
+- CPU_FEATURE_USABLE (AVX2),
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (BMI2)),
+ __wcsnlen_avx2)
++ IFUNC_IMPL_ADD (array, i, wcsnlen,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (BMI2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __wcsnlen_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, wcsnlen,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)
++ && CPU_FEATURE_USABLE (BMI2)),
++ __wcsnlen_evex)
+ IFUNC_IMPL_ADD (array, i, wcsnlen,
+ CPU_FEATURE_USABLE (SSE4_1),
+ __wcsnlen_sse4_1)
+@@ -421,6 +688,15 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, wmemchr,
+ CPU_FEATURE_USABLE (AVX2),
+ __wmemchr_avx2)
++ IFUNC_IMPL_ADD (array, i, wmemchr,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __wmemchr_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, wmemchr,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)
++ && CPU_FEATURE_USABLE (BMI2)),
++ __wmemchr_evex)
+ IFUNC_IMPL_ADD (array, i, wmemchr, 1, __wmemchr_sse2))
+
+ /* Support sysdeps/x86_64/multiarch/wmemcmp.c. */
+@@ -429,6 +705,16 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ (CPU_FEATURE_USABLE (AVX2)
+ && CPU_FEATURE_USABLE (MOVBE)),
+ __wmemcmp_avx2_movbe)
++ IFUNC_IMPL_ADD (array, i, wmemcmp,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (MOVBE)
++ && CPU_FEATURE_USABLE (RTM)),
++ __wmemcmp_avx2_movbe_rtm)
++ IFUNC_IMPL_ADD (array, i, wmemcmp,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)
++ && CPU_FEATURE_USABLE (MOVBE)),
++ __wmemcmp_evex_movbe)
+ IFUNC_IMPL_ADD (array, i, wmemcmp, CPU_FEATURE_USABLE (SSE4_1),
+ __wmemcmp_sse4_1)
+ IFUNC_IMPL_ADD (array, i, wmemcmp, CPU_FEATURE_USABLE (SSSE3),
+@@ -443,7 +729,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ CPU_FEATURE_USABLE (AVX2),
+ __wmemset_avx2_unaligned)
+ IFUNC_IMPL_ADD (array, i, wmemset,
+- CPU_FEATURE_USABLE (AVX512F),
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __wmemset_avx2_unaligned_rtm)
++ IFUNC_IMPL_ADD (array, i, wmemset,
++ CPU_FEATURE_USABLE (AVX512VL),
++ __wmemset_evex_unaligned)
++ IFUNC_IMPL_ADD (array, i, wmemset,
++ CPU_FEATURE_USABLE (AVX512VL),
+ __wmemset_avx512_unaligned))
+
+ #ifdef SHARED
+@@ -453,10 +746,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ CPU_FEATURE_USABLE (AVX512F),
+ __memcpy_chk_avx512_no_vzeroupper)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+- CPU_FEATURE_USABLE (AVX512F),
++ CPU_FEATURE_USABLE (AVX512VL),
+ __memcpy_chk_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+- CPU_FEATURE_USABLE (AVX512F),
++ CPU_FEATURE_USABLE (AVX512VL),
+ __memcpy_chk_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ CPU_FEATURE_USABLE (AVX),
+@@ -464,6 +757,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ CPU_FEATURE_USABLE (AVX),
+ __memcpy_chk_avx_unaligned_erms)
++ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
++ (CPU_FEATURE_USABLE (AVX)
++ && CPU_FEATURE_USABLE (RTM)),
++ __memcpy_chk_avx_unaligned_rtm)
++ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
++ (CPU_FEATURE_USABLE (AVX)
++ && CPU_FEATURE_USABLE (RTM)),
++ __memcpy_chk_avx_unaligned_erms_rtm)
++ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
++ CPU_FEATURE_USABLE (AVX512VL),
++ __memcpy_chk_evex_unaligned)
++ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
++ CPU_FEATURE_USABLE (AVX512VL),
++ __memcpy_chk_evex_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __memcpy_chk,
+ CPU_FEATURE_USABLE (SSSE3),
+ __memcpy_chk_ssse3_back)
+@@ -486,6 +793,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, memcpy,
+ CPU_FEATURE_USABLE (AVX),
+ __memcpy_avx_unaligned_erms)
++ IFUNC_IMPL_ADD (array, i, memcpy,
++ (CPU_FEATURE_USABLE (AVX)
++ && CPU_FEATURE_USABLE (RTM)),
++ __memcpy_avx_unaligned_rtm)
++ IFUNC_IMPL_ADD (array, i, memcpy,
++ (CPU_FEATURE_USABLE (AVX)
++ && CPU_FEATURE_USABLE (RTM)),
++ __memcpy_avx_unaligned_erms_rtm)
++ IFUNC_IMPL_ADD (array, i, memcpy,
++ CPU_FEATURE_USABLE (AVX512VL),
++ __memcpy_evex_unaligned)
++ IFUNC_IMPL_ADD (array, i, memcpy,
++ CPU_FEATURE_USABLE (AVX512VL),
++ __memcpy_evex_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memcpy, CPU_FEATURE_USABLE (SSSE3),
+ __memcpy_ssse3_back)
+ IFUNC_IMPL_ADD (array, i, memcpy, CPU_FEATURE_USABLE (SSSE3),
+@@ -494,10 +815,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ CPU_FEATURE_USABLE (AVX512F),
+ __memcpy_avx512_no_vzeroupper)
+ IFUNC_IMPL_ADD (array, i, memcpy,
+- CPU_FEATURE_USABLE (AVX512F),
++ CPU_FEATURE_USABLE (AVX512VL),
+ __memcpy_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, memcpy,
+- CPU_FEATURE_USABLE (AVX512F),
++ CPU_FEATURE_USABLE (AVX512VL),
+ __memcpy_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_sse2_unaligned)
+ IFUNC_IMPL_ADD (array, i, memcpy, 1,
+@@ -511,10 +832,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ CPU_FEATURE_USABLE (AVX512F),
+ __mempcpy_chk_avx512_no_vzeroupper)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+- CPU_FEATURE_USABLE (AVX512F),
++ CPU_FEATURE_USABLE (AVX512VL),
+ __mempcpy_chk_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+- CPU_FEATURE_USABLE (AVX512F),
++ CPU_FEATURE_USABLE (AVX512VL),
+ __mempcpy_chk_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ CPU_FEATURE_USABLE (AVX),
+@@ -522,6 +843,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ CPU_FEATURE_USABLE (AVX),
+ __mempcpy_chk_avx_unaligned_erms)
++ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
++ (CPU_FEATURE_USABLE (AVX)
++ && CPU_FEATURE_USABLE (RTM)),
++ __mempcpy_chk_avx_unaligned_rtm)
++ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
++ (CPU_FEATURE_USABLE (AVX)
++ && CPU_FEATURE_USABLE (RTM)),
++ __mempcpy_chk_avx_unaligned_erms_rtm)
++ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
++ CPU_FEATURE_USABLE (AVX512VL),
++ __mempcpy_chk_evex_unaligned)
++ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
++ CPU_FEATURE_USABLE (AVX512VL),
++ __mempcpy_chk_evex_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
+ CPU_FEATURE_USABLE (SSSE3),
+ __mempcpy_chk_ssse3_back)
+@@ -542,10 +877,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ CPU_FEATURE_USABLE (AVX512F),
+ __mempcpy_avx512_no_vzeroupper)
+ IFUNC_IMPL_ADD (array, i, mempcpy,
+- CPU_FEATURE_USABLE (AVX512F),
++ CPU_FEATURE_USABLE (AVX512VL),
+ __mempcpy_avx512_unaligned)
+ IFUNC_IMPL_ADD (array, i, mempcpy,
+- CPU_FEATURE_USABLE (AVX512F),
++ CPU_FEATURE_USABLE (AVX512VL),
+ __mempcpy_avx512_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, mempcpy,
+ CPU_FEATURE_USABLE (AVX),
+@@ -553,6 +888,20 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, mempcpy,
+ CPU_FEATURE_USABLE (AVX),
+ __mempcpy_avx_unaligned_erms)
++ IFUNC_IMPL_ADD (array, i, mempcpy,
++ (CPU_FEATURE_USABLE (AVX)
++ && CPU_FEATURE_USABLE (RTM)),
++ __mempcpy_avx_unaligned_rtm)
++ IFUNC_IMPL_ADD (array, i, mempcpy,
++ (CPU_FEATURE_USABLE (AVX)
++ && CPU_FEATURE_USABLE (RTM)),
++ __mempcpy_avx_unaligned_erms_rtm)
++ IFUNC_IMPL_ADD (array, i, mempcpy,
++ CPU_FEATURE_USABLE (AVX512VL),
++ __mempcpy_evex_unaligned)
++ IFUNC_IMPL_ADD (array, i, mempcpy,
++ CPU_FEATURE_USABLE (AVX512VL),
++ __mempcpy_evex_unaligned_erms)
+ IFUNC_IMPL_ADD (array, i, mempcpy, CPU_FEATURE_USABLE (SSSE3),
+ __mempcpy_ssse3_back)
+ IFUNC_IMPL_ADD (array, i, mempcpy, CPU_FEATURE_USABLE (SSSE3),
+@@ -568,6 +917,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, strncmp,
+ CPU_FEATURE_USABLE (AVX2),
+ __strncmp_avx2)
++ IFUNC_IMPL_ADD (array, i, strncmp,
++ (CPU_FEATURE_USABLE (AVX2)
++ && CPU_FEATURE_USABLE (RTM)),
++ __strncmp_avx2_rtm)
++ IFUNC_IMPL_ADD (array, i, strncmp,
++ (CPU_FEATURE_USABLE (AVX512VL)
++ && CPU_FEATURE_USABLE (AVX512BW)),
++ __strncmp_evex)
+ IFUNC_IMPL_ADD (array, i, strncmp, CPU_FEATURE_USABLE (SSE4_2),
+ __strncmp_sse42)
+ IFUNC_IMPL_ADD (array, i, strncmp, CPU_FEATURE_USABLE (SSSE3),
+@@ -582,6 +939,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
+ IFUNC_IMPL_ADD (array, i, __wmemset_chk,
+ CPU_FEATURE_USABLE (AVX2),
+ __wmemset_chk_avx2_unaligned)
++ IFUNC_IMPL_ADD (array, i, __wmemset_chk,
++ CPU_FEATURE_USABLE (AVX512VL),
++ __wmemset_chk_evex_unaligned)
+ IFUNC_IMPL_ADD (array, i, __wmemset_chk,
+ CPU_FEATURE_USABLE (AVX512F),
+ __wmemset_chk_avx512_unaligned))
+diff --git a/sysdeps/x86_64/multiarch/ifunc-memcmp.h b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
+index 0e21b3a628..4f96c2764a 100644
+--- a/sysdeps/x86_64/multiarch/ifunc-memcmp.h
++++ b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
+@@ -23,17 +23,28 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe_rtm) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_movbe) attribute_hidden;
+
+ static inline void *
+ IFUNC_SELECTOR (void)
+ {
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+- && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && CPU_FEATURE_USABLE_P (cpu_features, MOVBE)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+- return OPTIMIZE (avx2_movbe);
++ {
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
++ return OPTIMIZE (evex_movbe);
++
++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
++ return OPTIMIZE (avx2_movbe_rtm);
++
++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
++ return OPTIMIZE (avx2_movbe);
++ }
+
+ if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
+ return OPTIMIZE (sse4_1);
+diff --git a/sysdeps/x86_64/multiarch/ifunc-memmove.h b/sysdeps/x86_64/multiarch/ifunc-memmove.h
+index 9ada03aa43..db26210e3b 100644
+--- a/sysdeps/x86_64/multiarch/ifunc-memmove.h
++++ b/sysdeps/x86_64/multiarch/ifunc-memmove.h
+@@ -29,6 +29,14 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3_back) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms)
+ attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_rtm)
++ attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms_rtm)
++ attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
++ attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
++ attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
+ attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
+@@ -48,21 +56,42 @@ IFUNC_SELECTOR (void)
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
+ && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+ {
+- if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+- return OPTIMIZE (avx512_no_vzeroupper);
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
++ {
++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
++ return OPTIMIZE (avx512_unaligned_erms);
+
+- if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+- return OPTIMIZE (avx512_unaligned_erms);
++ return OPTIMIZE (avx512_unaligned);
++ }
+
+- return OPTIMIZE (avx512_unaligned);
++ return OPTIMIZE (avx512_no_vzeroupper);
+ }
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ {
+- if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+- return OPTIMIZE (avx_unaligned_erms);
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
++ {
++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
++ return OPTIMIZE (evex_unaligned_erms);
++
++ return OPTIMIZE (evex_unaligned);
++ }
++
++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
++ {
++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
++ return OPTIMIZE (avx_unaligned_erms_rtm);
++
++ return OPTIMIZE (avx_unaligned_rtm);
++ }
++
++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
++ {
++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
++ return OPTIMIZE (avx_unaligned_erms);
+
+- return OPTIMIZE (avx_unaligned);
++ return OPTIMIZE (avx_unaligned);
++ }
+ }
+
+ if (!CPU_FEATURE_USABLE_P (cpu_features, SSSE3)
+diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h
+index f52613d372..57029fc17b 100644
+--- a/sysdeps/x86_64/multiarch/ifunc-memset.h
++++ b/sysdeps/x86_64/multiarch/ifunc-memset.h
+@@ -27,6 +27,14 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms)
+ attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm)
++ attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms_rtm)
++ attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
++ attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
++ attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned)
+ attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned_erms)
+@@ -45,21 +53,44 @@ IFUNC_SELECTOR (void)
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
+ && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+ {
+- if (CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+- return OPTIMIZE (avx512_no_vzeroupper);
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
++ {
++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
++ return OPTIMIZE (avx512_unaligned_erms);
+
+- if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+- return OPTIMIZE (avx512_unaligned_erms);
++ return OPTIMIZE (avx512_unaligned);
++ }
+
+- return OPTIMIZE (avx512_unaligned);
++ return OPTIMIZE (avx512_no_vzeroupper);
+ }
+
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2))
+ {
+- if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+- return OPTIMIZE (avx2_unaligned_erms);
+- else
+- return OPTIMIZE (avx2_unaligned);
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
++ {
++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
++ return OPTIMIZE (evex_unaligned_erms);
++
++ return OPTIMIZE (evex_unaligned);
++ }
++
++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
++ {
++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
++ return OPTIMIZE (avx2_unaligned_erms_rtm);
++
++ return OPTIMIZE (avx2_unaligned_rtm);
++ }
++
++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
++ {
++ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
++ return OPTIMIZE (avx2_unaligned_erms);
++
++ return OPTIMIZE (avx2_unaligned);
++ }
+ }
+
+ if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
+diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
+index 63b0dc0d96..35741f3ec8 100644
+--- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
++++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
+@@ -25,16 +25,27 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
+ attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
+ static inline void *
+ IFUNC_SELECTOR (void)
+ {
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+- && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+- return OPTIMIZE (avx2);
++ {
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
++ return OPTIMIZE (evex);
++
++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
++ return OPTIMIZE (avx2_rtm);
++
++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
++ return OPTIMIZE (avx2);
++ }
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
+ return OPTIMIZE (sse2_unaligned);
+diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
+new file mode 100644
+index 0000000000..39e3347378
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
+@@ -0,0 +1,52 @@
++/* Common definition for ifunc selections for wcslen and wcsnlen
++ All versions must be listed in ifunc-impl-list.c.
++ Copyright (C) 2017-2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <init-arch.h>
++
++extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
++
++static inline void *
++IFUNC_SELECTOR (void)
++{
++ const struct cpu_features* cpu_features = __get_cpu_features ();
++
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
++ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
++ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
++ {
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
++ return OPTIMIZE (evex);
++
++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
++ return OPTIMIZE (avx2_rtm);
++
++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
++ return OPTIMIZE (avx2);
++ }
++
++ if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
++ return OPTIMIZE (sse4_1);
++
++ return OPTIMIZE (sse2);
++}
+diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
+index 8cfce562fc..e06e8b4d80 100644
+--- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h
++++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
+@@ -20,6 +20,9 @@
+
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm)
++ attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden;
+
+ static inline void *
+@@ -27,14 +30,21 @@ IFUNC_SELECTOR (void)
+ {
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+- && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+ {
+- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512F)
+- && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
+- return OPTIMIZE (avx512_unaligned);
+- else
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
++ {
++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_AVX512))
++ return OPTIMIZE (avx512_unaligned);
++
++ return OPTIMIZE (evex_unaligned);
++ }
++
++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
++ return OPTIMIZE (avx2_unaligned_rtm);
++
++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
+ return OPTIMIZE (avx2_unaligned);
+ }
+
+diff --git a/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S
+new file mode 100644
+index 0000000000..87b076c7c4
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S
+@@ -0,0 +1,12 @@
++#ifndef MEMCHR
++# define MEMCHR __memchr_avx2_rtm
++#endif
++
++#define ZERO_UPPER_VEC_REGISTERS_RETURN \
++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
++
++#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
++
++#define SECTION(p) p##.avx.rtm
++
++#include "memchr-avx2.S"
+diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S
+index e5a9abd211..0987616a1b 100644
+--- a/sysdeps/x86_64/multiarch/memchr-avx2.S
++++ b/sysdeps/x86_64/multiarch/memchr-avx2.S
+@@ -26,319 +26,407 @@
+
+ # ifdef USE_AS_WMEMCHR
+ # define VPCMPEQ vpcmpeqd
++# define VPBROADCAST vpbroadcastd
++# define CHAR_SIZE 4
+ # else
+ # define VPCMPEQ vpcmpeqb
++# define VPBROADCAST vpbroadcastb
++# define CHAR_SIZE 1
++# endif
++
++# ifdef USE_AS_RAWMEMCHR
++# define ERAW_PTR_REG ecx
++# define RRAW_PTR_REG rcx
++# define ALGN_PTR_REG rdi
++# else
++# define ERAW_PTR_REG edi
++# define RRAW_PTR_REG rdi
++# define ALGN_PTR_REG rcx
+ # endif
+
+ # ifndef VZEROUPPER
+ # define VZEROUPPER vzeroupper
+ # endif
+
++# ifndef SECTION
++# define SECTION(p) p##.avx
++# endif
++
+ # define VEC_SIZE 32
++# define PAGE_SIZE 4096
++# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
+
+- .section .text.avx,"ax",@progbits
++ .section SECTION(.text),"ax",@progbits
+ ENTRY (MEMCHR)
+ # ifndef USE_AS_RAWMEMCHR
+ /* Check for zero length. */
++# ifdef __ILP32__
++ /* Clear upper bits. */
++ and %RDX_LP, %RDX_LP
++# else
+ test %RDX_LP, %RDX_LP
++# endif
+ jz L(null)
+ # endif
+- movl %edi, %ecx
+- /* Broadcast CHAR to YMM0. */
++ /* Broadcast CHAR to YMMMATCH. */
+ vmovd %esi, %xmm0
+-# ifdef USE_AS_WMEMCHR
+- shl $2, %RDX_LP
+- vpbroadcastd %xmm0, %ymm0
+-# else
+-# ifdef __ILP32__
+- /* Clear the upper 32 bits. */
+- movl %edx, %edx
+-# endif
+- vpbroadcastb %xmm0, %ymm0
+-# endif
++ VPBROADCAST %xmm0, %ymm0
+ /* Check if we may cross page boundary with one vector load. */
+- andl $(2 * VEC_SIZE - 1), %ecx
+- cmpl $VEC_SIZE, %ecx
+- ja L(cros_page_boundary)
++ movl %edi, %eax
++ andl $(PAGE_SIZE - 1), %eax
++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax
++ ja L(cross_page_boundary)
+
+ /* Check the first VEC_SIZE bytes. */
+- VPCMPEQ (%rdi), %ymm0, %ymm1
++ VPCMPEQ (%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+- testl %eax, %eax
+-
+ # ifndef USE_AS_RAWMEMCHR
+- jnz L(first_vec_x0_check)
+- /* Adjust length and check the end of data. */
+- subq $VEC_SIZE, %rdx
+- jbe L(zero)
+-# else
+- jnz L(first_vec_x0)
++ /* If length < CHAR_PER_VEC handle special. */
++ cmpq $CHAR_PER_VEC, %rdx
++ jbe L(first_vec_x0)
+ # endif
+-
+- /* Align data for aligned loads in the loop. */
+- addq $VEC_SIZE, %rdi
+- andl $(VEC_SIZE - 1), %ecx
+- andq $-VEC_SIZE, %rdi
++ testl %eax, %eax
++ jz L(aligned_more)
++ tzcntl %eax, %eax
++ addq %rdi, %rax
++ VZEROUPPER_RETURN
+
+ # ifndef USE_AS_RAWMEMCHR
+- /* Adjust length. */
+- addq %rcx, %rdx
++ .p2align 5
++L(first_vec_x0):
++ /* Check if first match was before length. */
++ tzcntl %eax, %eax
++# ifdef USE_AS_WMEMCHR
++ /* NB: Multiply length by 4 to get byte count. */
++ sall $2, %edx
++# endif
++ xorl %ecx, %ecx
++ cmpl %eax, %edx
++ leaq (%rdi, %rax), %rax
++ cmovle %rcx, %rax
++ VZEROUPPER_RETURN
+
+- subq $(VEC_SIZE * 4), %rdx
+- jbe L(last_4x_vec_or_less)
++L(null):
++ xorl %eax, %eax
++ ret
+ # endif
+- jmp L(more_4x_vec)
+-
+ .p2align 4
+-L(cros_page_boundary):
+- andl $(VEC_SIZE - 1), %ecx
+- andq $-VEC_SIZE, %rdi
+- VPCMPEQ (%rdi), %ymm0, %ymm1
++L(cross_page_boundary):
++ /* Save pointer before aligning as its original value is
++ necessary for computer return address if byte is found or
++ adjusting length if it is not and this is memchr. */
++ movq %rdi, %rcx
++ /* Align data to VEC_SIZE - 1. ALGN_PTR_REG is rcx for memchr
++ and rdi for rawmemchr. */
++ orq $(VEC_SIZE - 1), %ALGN_PTR_REG
++ VPCMPEQ -(VEC_SIZE - 1)(%ALGN_PTR_REG), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
++# ifndef USE_AS_RAWMEMCHR
++ /* Calculate length until end of page (length checked for a
++ match). */
++ leaq 1(%ALGN_PTR_REG), %rsi
++ subq %RRAW_PTR_REG, %rsi
++# ifdef USE_AS_WMEMCHR
++ /* NB: Divide bytes by 4 to get wchar_t count. */
++ shrl $2, %esi
++# endif
++# endif
+ /* Remove the leading bytes. */
+- sarl %cl, %eax
+- testl %eax, %eax
+- jz L(aligned_more)
+- tzcntl %eax, %eax
++ sarxl %ERAW_PTR_REG, %eax, %eax
+ # ifndef USE_AS_RAWMEMCHR
+ /* Check the end of data. */
+- cmpq %rax, %rdx
+- jbe L(zero)
++ cmpq %rsi, %rdx
++ jbe L(first_vec_x0)
+ # endif
++ testl %eax, %eax
++ jz L(cross_page_continue)
++ tzcntl %eax, %eax
++ addq %RRAW_PTR_REG, %rax
++L(return_vzeroupper):
++ ZERO_UPPER_VEC_REGISTERS_RETURN
++
++ .p2align 4
++L(first_vec_x1):
++ tzcntl %eax, %eax
++ incq %rdi
+ addq %rdi, %rax
+- addq %rcx, %rax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+-L(aligned_more):
+-# ifndef USE_AS_RAWMEMCHR
+- /* Calculate "rdx + rcx - VEC_SIZE" with "rdx - (VEC_SIZE - rcx)"
+- instead of "(rdx + rcx) - VEC_SIZE" to void possible addition
+- overflow. */
+- negq %rcx
+- addq $VEC_SIZE, %rcx
++L(first_vec_x2):
++ tzcntl %eax, %eax
++ addq $(VEC_SIZE + 1), %rdi
++ addq %rdi, %rax
++ VZEROUPPER_RETURN
+
+- /* Check the end of data. */
+- subq %rcx, %rdx
+- jbe L(zero)
+-# endif
++ .p2align 4
++L(first_vec_x3):
++ tzcntl %eax, %eax
++ addq $(VEC_SIZE * 2 + 1), %rdi
++ addq %rdi, %rax
++ VZEROUPPER_RETURN
+
+- addq $VEC_SIZE, %rdi
+
+-# ifndef USE_AS_RAWMEMCHR
+- subq $(VEC_SIZE * 4), %rdx
+- jbe L(last_4x_vec_or_less)
+-# endif
++ .p2align 4
++L(first_vec_x4):
++ tzcntl %eax, %eax
++ addq $(VEC_SIZE * 3 + 1), %rdi
++ addq %rdi, %rax
++ VZEROUPPER_RETURN
+
+-L(more_4x_vec):
++ .p2align 4
++L(aligned_more):
+ /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time
+ since data is only aligned to VEC_SIZE. */
+- VPCMPEQ (%rdi), %ymm0, %ymm1
+- vpmovmskb %ymm1, %eax
+- testl %eax, %eax
+- jnz L(first_vec_x0)
+
+- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
++# ifndef USE_AS_RAWMEMCHR
++L(cross_page_continue):
++ /* Align data to VEC_SIZE - 1. */
++ xorl %ecx, %ecx
++ subl %edi, %ecx
++ orq $(VEC_SIZE - 1), %rdi
++ /* esi is for adjusting length to see if near the end. */
++ leal (VEC_SIZE * 4 + 1)(%rdi, %rcx), %esi
++# ifdef USE_AS_WMEMCHR
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarl $2, %esi
++# endif
++# else
++ orq $(VEC_SIZE - 1), %rdi
++L(cross_page_continue):
++# endif
++ /* Load first VEC regardless. */
++ VPCMPEQ 1(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
++# ifndef USE_AS_RAWMEMCHR
++ /* Adjust length. If near end handle specially. */
++ subq %rsi, %rdx
++ jbe L(last_4x_vec_or_less)
++# endif
+ testl %eax, %eax
+ jnz L(first_vec_x1)
+
+- VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
++ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x2)
+
+- VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
++ VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x3)
+
+- addq $(VEC_SIZE * 4), %rdi
+-
+-# ifndef USE_AS_RAWMEMCHR
+- subq $(VEC_SIZE * 4), %rdx
+- jbe L(last_4x_vec_or_less)
+-# endif
+-
+- /* Align data to 4 * VEC_SIZE. */
+- movq %rdi, %rcx
+- andl $(4 * VEC_SIZE - 1), %ecx
+- andq $-(4 * VEC_SIZE), %rdi
++ VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
++ vpmovmskb %ymm1, %eax
++ testl %eax, %eax
++ jnz L(first_vec_x4)
+
+ # ifndef USE_AS_RAWMEMCHR
+- /* Adjust length. */
++ /* Check if at last VEC_SIZE * 4 length. */
++ subq $(CHAR_PER_VEC * 4), %rdx
++ jbe L(last_4x_vec_or_less_cmpeq)
++ /* Align data to VEC_SIZE * 4 - 1 for the loop and readjust
++ length. */
++ incq %rdi
++ movl %edi, %ecx
++ orq $(VEC_SIZE * 4 - 1), %rdi
++ andl $(VEC_SIZE * 4 - 1), %ecx
++# ifdef USE_AS_WMEMCHR
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarl $2, %ecx
++# endif
+ addq %rcx, %rdx
++# else
++ /* Align data to VEC_SIZE * 4 - 1 for loop. */
++ incq %rdi
++ orq $(VEC_SIZE * 4 - 1), %rdi
+ # endif
+
++ /* Compare 4 * VEC at a time forward. */
+ .p2align 4
+ L(loop_4x_vec):
+- /* Compare 4 * VEC at a time forward. */
+- VPCMPEQ (%rdi), %ymm0, %ymm1
+- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm2
+- VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm3
+- VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm4
+-
++ VPCMPEQ 1(%rdi), %ymm0, %ymm1
++ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm2
++ VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm3
++ VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm4
+ vpor %ymm1, %ymm2, %ymm5
+ vpor %ymm3, %ymm4, %ymm6
+ vpor %ymm5, %ymm6, %ymm5
+
+- vpmovmskb %ymm5, %eax
+- testl %eax, %eax
+- jnz L(4x_vec_end)
+-
+- addq $(VEC_SIZE * 4), %rdi
+-
++ vpmovmskb %ymm5, %ecx
+ # ifdef USE_AS_RAWMEMCHR
+- jmp L(loop_4x_vec)
++ subq $-(VEC_SIZE * 4), %rdi
++ testl %ecx, %ecx
++ jz L(loop_4x_vec)
+ # else
+- subq $(VEC_SIZE * 4), %rdx
+- ja L(loop_4x_vec)
++ testl %ecx, %ecx
++ jnz L(loop_4x_vec_end)
+
+-L(last_4x_vec_or_less):
+- /* Less than 4 * VEC and aligned to VEC_SIZE. */
+- addl $(VEC_SIZE * 2), %edx
+- jle L(last_2x_vec)
++ subq $-(VEC_SIZE * 4), %rdi
+
+- VPCMPEQ (%rdi), %ymm0, %ymm1
+- vpmovmskb %ymm1, %eax
+- testl %eax, %eax
+- jnz L(first_vec_x0)
++ subq $(CHAR_PER_VEC * 4), %rdx
++ ja L(loop_4x_vec)
+
+- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
++ /* Fall through into less than 4 remaining vectors of length
++ case. */
++ VPCMPEQ (VEC_SIZE * 0 + 1)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
++ .p2align 4
++L(last_4x_vec_or_less):
++# ifdef USE_AS_WMEMCHR
++ /* NB: Multiply length by 4 to get byte count. */
++ sall $2, %edx
++# endif
++ /* Check if first VEC contained match. */
+ testl %eax, %eax
+- jnz L(first_vec_x1)
++ jnz L(first_vec_x1_check)
+
+- VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
+- vpmovmskb %ymm1, %eax
+- testl %eax, %eax
++ /* If remaining length > VEC_SIZE * 2. */
++ addl $(VEC_SIZE * 2), %edx
++ jg L(last_4x_vec)
+
+- jnz L(first_vec_x2_check)
+- subl $VEC_SIZE, %edx
+- jle L(zero)
++L(last_2x_vec):
++ /* If remaining length < VEC_SIZE. */
++ addl $VEC_SIZE, %edx
++ jle L(zero_end)
+
+- VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
++ /* Check VEC2 and compare any match with remaining length. */
++ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+- testl %eax, %eax
+-
+- jnz L(first_vec_x3_check)
+- xorl %eax, %eax
+- VZEROUPPER
+- ret
++ tzcntl %eax, %eax
++ cmpl %eax, %edx
++ jbe L(set_zero_end)
++ addq $(VEC_SIZE + 1), %rdi
++ addq %rdi, %rax
++L(zero_end):
++ VZEROUPPER_RETURN
+
+ .p2align 4
+-L(last_2x_vec):
+- addl $(VEC_SIZE * 2), %edx
+- VPCMPEQ (%rdi), %ymm0, %ymm1
++L(loop_4x_vec_end):
++# endif
++ /* rawmemchr will fall through into this if match was found in
++ loop. */
++
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
++ jnz L(last_vec_x1_return)
+
+- jnz L(first_vec_x0_check)
+- subl $VEC_SIZE, %edx
+- jle L(zero)
+-
+- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
+- vpmovmskb %ymm1, %eax
++ vpmovmskb %ymm2, %eax
+ testl %eax, %eax
+- jnz L(first_vec_x1_check)
+- xorl %eax, %eax
+- VZEROUPPER
+- ret
++ jnz L(last_vec_x2_return)
+
+- .p2align 4
+-L(first_vec_x0_check):
+- tzcntl %eax, %eax
+- /* Check the end of data. */
+- cmpq %rax, %rdx
+- jbe L(zero)
++ vpmovmskb %ymm3, %eax
++ /* Combine VEC3 matches (eax) with VEC4 matches (ecx). */
++ salq $32, %rcx
++ orq %rcx, %rax
++ tzcntq %rax, %rax
++# ifdef USE_AS_RAWMEMCHR
++ subq $(VEC_SIZE * 2 - 1), %rdi
++# else
++ subq $-(VEC_SIZE * 2 + 1), %rdi
++# endif
+ addq %rdi, %rax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
++# ifndef USE_AS_RAWMEMCHR
+
+ .p2align 4
+ L(first_vec_x1_check):
+ tzcntl %eax, %eax
+- /* Check the end of data. */
+- cmpq %rax, %rdx
+- jbe L(zero)
+- addq $VEC_SIZE, %rax
++ /* Adjust length. */
++ subl $-(VEC_SIZE * 4), %edx
++ /* Check if match within remaining length. */
++ cmpl %eax, %edx
++ jbe L(set_zero_end)
++ incq %rdi
+ addq %rdi, %rax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
++ .p2align 4
++L(set_zero_end):
++ xorl %eax, %eax
++ VZEROUPPER_RETURN
++# endif
+
+ .p2align 4
+-L(first_vec_x2_check):
++L(last_vec_x1_return):
+ tzcntl %eax, %eax
+- /* Check the end of data. */
+- cmpq %rax, %rdx
+- jbe L(zero)
+- addq $(VEC_SIZE * 2), %rax
++# ifdef USE_AS_RAWMEMCHR
++ subq $(VEC_SIZE * 4 - 1), %rdi
++# else
++ incq %rdi
++# endif
+ addq %rdi, %rax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+-L(first_vec_x3_check):
++L(last_vec_x2_return):
+ tzcntl %eax, %eax
+- /* Check the end of data. */
+- cmpq %rax, %rdx
+- jbe L(zero)
+- addq $(VEC_SIZE * 3), %rax
++# ifdef USE_AS_RAWMEMCHR
++ subq $(VEC_SIZE * 3 - 1), %rdi
++# else
++ subq $-(VEC_SIZE + 1), %rdi
++# endif
+ addq %rdi, %rax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
++# ifndef USE_AS_RAWMEMCHR
+ .p2align 4
+-L(zero):
+- VZEROUPPER
+-L(null):
+- xorl %eax, %eax
+- ret
+-# endif
++L(last_4x_vec_or_less_cmpeq):
++ VPCMPEQ (VEC_SIZE * 4 + 1)(%rdi), %ymm0, %ymm1
++ vpmovmskb %ymm1, %eax
++# ifdef USE_AS_WMEMCHR
++ /* NB: Multiply length by 4 to get byte count. */
++ sall $2, %edx
++# endif
++ subq $-(VEC_SIZE * 4), %rdi
++ /* Check first VEC regardless. */
++ testl %eax, %eax
++ jnz L(first_vec_x1_check)
+
++ /* If remaining length <= CHAR_PER_VEC * 2. */
++ addl $(VEC_SIZE * 2), %edx
++ jle L(last_2x_vec)
+ .p2align 4
+-L(first_vec_x0):
+- tzcntl %eax, %eax
+- addq %rdi, %rax
+- VZEROUPPER
+- ret
++L(last_4x_vec):
++ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
++ vpmovmskb %ymm1, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x2_return)
+
+- .p2align 4
+-L(first_vec_x1):
+- tzcntl %eax, %eax
+- addq $VEC_SIZE, %rax
+- addq %rdi, %rax
+- VZEROUPPER
+- ret
++ VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
++ vpmovmskb %ymm1, %eax
+
+- .p2align 4
+-L(first_vec_x2):
++ /* Create mask for possible matches within remaining length. */
++ movq $-1, %rcx
++ bzhiq %rdx, %rcx, %rcx
++
++ /* Test matches in data against length match. */
++ andl %ecx, %eax
++ jnz L(last_vec_x3)
++
++ /* if remaining length <= VEC_SIZE * 3 (Note this is after
++ remaining length was found to be > VEC_SIZE * 2. */
++ subl $VEC_SIZE, %edx
++ jbe L(zero_end2)
++
++ VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
++ vpmovmskb %ymm1, %eax
++ /* Shift remaining length mask for last VEC. */
++ shrq $32, %rcx
++ andl %ecx, %eax
++ jz L(zero_end2)
+ tzcntl %eax, %eax
+- addq $(VEC_SIZE * 2), %rax
++ addq $(VEC_SIZE * 3 + 1), %rdi
+ addq %rdi, %rax
+- VZEROUPPER
+- ret
++L(zero_end2):
++ VZEROUPPER_RETURN
+
+ .p2align 4
+-L(4x_vec_end):
+- vpmovmskb %ymm1, %eax
+- testl %eax, %eax
+- jnz L(first_vec_x0)
+- vpmovmskb %ymm2, %eax
+- testl %eax, %eax
+- jnz L(first_vec_x1)
+- vpmovmskb %ymm3, %eax
+- testl %eax, %eax
+- jnz L(first_vec_x2)
+- vpmovmskb %ymm4, %eax
+- testl %eax, %eax
+-L(first_vec_x3):
++L(last_vec_x3):
+ tzcntl %eax, %eax
+- addq $(VEC_SIZE * 3), %rax
++ subq $-(VEC_SIZE * 2 + 1), %rdi
+ addq %rdi, %rax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
++# endif
+
+ END (MEMCHR)
+ #endif
+diff --git a/sysdeps/x86_64/multiarch/memchr-evex.S b/sysdeps/x86_64/multiarch/memchr-evex.S
+new file mode 100644
+index 0000000000..f3fdad4fda
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/memchr-evex.S
+@@ -0,0 +1,478 @@
++/* memchr/wmemchr optimized with 256-bit EVEX instructions.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#if IS_IN (libc)
++
++# include <sysdep.h>
++
++# ifndef MEMCHR
++# define MEMCHR __memchr_evex
++# endif
++
++# ifdef USE_AS_WMEMCHR
++# define VPBROADCAST vpbroadcastd
++# define VPMINU vpminud
++# define VPCMP vpcmpd
++# define VPCMPEQ vpcmpeqd
++# define CHAR_SIZE 4
++# else
++# define VPBROADCAST vpbroadcastb
++# define VPMINU vpminub
++# define VPCMP vpcmpb
++# define VPCMPEQ vpcmpeqb
++# define CHAR_SIZE 1
++# endif
++
++# ifdef USE_AS_RAWMEMCHR
++# define RAW_PTR_REG rcx
++# define ALGN_PTR_REG rdi
++# else
++# define RAW_PTR_REG rdi
++# define ALGN_PTR_REG rcx
++# endif
++
++# define XMMZERO xmm23
++# define YMMZERO ymm23
++# define XMMMATCH xmm16
++# define YMMMATCH ymm16
++# define YMM1 ymm17
++# define YMM2 ymm18
++# define YMM3 ymm19
++# define YMM4 ymm20
++# define YMM5 ymm21
++# define YMM6 ymm22
++
++# define VEC_SIZE 32
++# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
++# define PAGE_SIZE 4096
++
++ .section .text.evex,"ax",@progbits
++ENTRY (MEMCHR)
++# ifndef USE_AS_RAWMEMCHR
++ /* Check for zero length. */
++ test %RDX_LP, %RDX_LP
++ jz L(zero)
++
++# ifdef __ILP32__
++ /* Clear the upper 32 bits. */
++ movl %edx, %edx
++# endif
++# endif
++ /* Broadcast CHAR to YMMMATCH. */
++ VPBROADCAST %esi, %YMMMATCH
++ /* Check if we may cross page boundary with one vector load. */
++ movl %edi, %eax
++ andl $(PAGE_SIZE - 1), %eax
++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax
++ ja L(cross_page_boundary)
++
++ /* Check the first VEC_SIZE bytes. */
++ VPCMP $0, (%rdi), %YMMMATCH, %k0
++ kmovd %k0, %eax
++# ifndef USE_AS_RAWMEMCHR
++ /* If length < CHAR_PER_VEC handle special. */
++ cmpq $CHAR_PER_VEC, %rdx
++ jbe L(first_vec_x0)
++# endif
++ testl %eax, %eax
++ jz L(aligned_more)
++ tzcntl %eax, %eax
++# ifdef USE_AS_WMEMCHR
++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
++ leaq (%rdi, %rax, CHAR_SIZE), %rax
++# else
++ addq %rdi, %rax
++# endif
++ ret
++
++# ifndef USE_AS_RAWMEMCHR
++L(zero):
++ xorl %eax, %eax
++ ret
++
++ .p2align 5
++L(first_vec_x0):
++ /* Check if first match was before length. */
++ tzcntl %eax, %eax
++ xorl %ecx, %ecx
++ cmpl %eax, %edx
++ leaq (%rdi, %rax, CHAR_SIZE), %rax
++ cmovle %rcx, %rax
++ ret
++# else
++ /* NB: first_vec_x0 is 17 bytes which will leave
++ cross_page_boundary (which is relatively cold) close enough
++ to ideal alignment. So only realign L(cross_page_boundary) if
++ rawmemchr. */
++ .p2align 4
++# endif
++L(cross_page_boundary):
++ /* Save pointer before aligning as its original value is
++ necessary for computer return address if byte is found or
++ adjusting length if it is not and this is memchr. */
++ movq %rdi, %rcx
++ /* Align data to VEC_SIZE. ALGN_PTR_REG is rcx for memchr and rdi
++ for rawmemchr. */
++ andq $-VEC_SIZE, %ALGN_PTR_REG
++ VPCMP $0, (%ALGN_PTR_REG), %YMMMATCH, %k0
++ kmovd %k0, %r8d
++# ifdef USE_AS_WMEMCHR
++ /* NB: Divide shift count by 4 since each bit in K0 represent 4
++ bytes. */
++ sarl $2, %eax
++# endif
++# ifndef USE_AS_RAWMEMCHR
++ movl $(PAGE_SIZE / CHAR_SIZE), %esi
++ subl %eax, %esi
++# endif
++# ifdef USE_AS_WMEMCHR
++ andl $(CHAR_PER_VEC - 1), %eax
++# endif
++ /* Remove the leading bytes. */
++ sarxl %eax, %r8d, %eax
++# ifndef USE_AS_RAWMEMCHR
++ /* Check the end of data. */
++ cmpq %rsi, %rdx
++ jbe L(first_vec_x0)
++# endif
++ testl %eax, %eax
++ jz L(cross_page_continue)
++ tzcntl %eax, %eax
++# ifdef USE_AS_WMEMCHR
++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
++ leaq (%RAW_PTR_REG, %rax, CHAR_SIZE), %rax
++# else
++ addq %RAW_PTR_REG, %rax
++# endif
++ ret
++
++ .p2align 4
++L(first_vec_x1):
++ tzcntl %eax, %eax
++ leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax
++ ret
++
++ .p2align 4
++L(first_vec_x2):
++ tzcntl %eax, %eax
++ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
++ ret
++
++ .p2align 4
++L(first_vec_x3):
++ tzcntl %eax, %eax
++ leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
++ ret
++
++ .p2align 4
++L(first_vec_x4):
++ tzcntl %eax, %eax
++ leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax
++ ret
++
++ .p2align 5
++L(aligned_more):
++ /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time
++ since data is only aligned to VEC_SIZE. */
++
++# ifndef USE_AS_RAWMEMCHR
++ /* Align data to VEC_SIZE. */
++L(cross_page_continue):
++ xorl %ecx, %ecx
++ subl %edi, %ecx
++ andq $-VEC_SIZE, %rdi
++ /* esi is for adjusting length to see if near the end. */
++ leal (VEC_SIZE * 5)(%rdi, %rcx), %esi
++# ifdef USE_AS_WMEMCHR
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarl $2, %esi
++# endif
++# else
++ andq $-VEC_SIZE, %rdi
++L(cross_page_continue):
++# endif
++ /* Load first VEC regardless. */
++ VPCMP $0, (VEC_SIZE)(%rdi), %YMMMATCH, %k0
++ kmovd %k0, %eax
++# ifndef USE_AS_RAWMEMCHR
++ /* Adjust length. If near end handle specially. */
++ subq %rsi, %rdx
++ jbe L(last_4x_vec_or_less)
++# endif
++ testl %eax, %eax
++ jnz L(first_vec_x1)
++
++ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0
++ kmovd %k0, %eax
++ testl %eax, %eax
++ jnz L(first_vec_x2)
++
++ VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k0
++ kmovd %k0, %eax
++ testl %eax, %eax
++ jnz L(first_vec_x3)
++
++ VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0
++ kmovd %k0, %eax
++ testl %eax, %eax
++ jnz L(first_vec_x4)
++
++
++# ifndef USE_AS_RAWMEMCHR
++ /* Check if at last CHAR_PER_VEC * 4 length. */
++ subq $(CHAR_PER_VEC * 4), %rdx
++ jbe L(last_4x_vec_or_less_cmpeq)
++ addq $VEC_SIZE, %rdi
++
++ /* Align data to VEC_SIZE * 4 for the loop and readjust length.
++ */
++# ifdef USE_AS_WMEMCHR
++ movl %edi, %ecx
++ andq $-(4 * VEC_SIZE), %rdi
++ andl $(VEC_SIZE * 4 - 1), %ecx
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarl $2, %ecx
++ addq %rcx, %rdx
++# else
++ addq %rdi, %rdx
++ andq $-(4 * VEC_SIZE), %rdi
++ subq %rdi, %rdx
++# endif
++# else
++ addq $VEC_SIZE, %rdi
++ andq $-(4 * VEC_SIZE), %rdi
++# endif
++
++ vpxorq %XMMZERO, %XMMZERO, %XMMZERO
++
++ /* Compare 4 * VEC at a time forward. */
++ .p2align 4
++L(loop_4x_vec):
++ /* It would be possible to save some instructions using 4x VPCMP
++ but bottleneck on port 5 makes it not woth it. */
++ VPCMP $4, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k1
++ /* xor will set bytes match esi to zero. */
++ vpxorq (VEC_SIZE * 5)(%rdi), %YMMMATCH, %YMM2
++ vpxorq (VEC_SIZE * 6)(%rdi), %YMMMATCH, %YMM3
++ VPCMP $0, (VEC_SIZE * 7)(%rdi), %YMMMATCH, %k3
++ /* Reduce VEC2 / VEC3 with min and VEC1 with zero mask. */
++ VPMINU %YMM2, %YMM3, %YMM3{%k1}{z}
++ VPCMP $0, %YMM3, %YMMZERO, %k2
++# ifdef USE_AS_RAWMEMCHR
++ subq $-(VEC_SIZE * 4), %rdi
++ kortestd %k2, %k3
++ jz L(loop_4x_vec)
++# else
++ kortestd %k2, %k3
++ jnz L(loop_4x_vec_end)
++
++ subq $-(VEC_SIZE * 4), %rdi
++
++ subq $(CHAR_PER_VEC * 4), %rdx
++ ja L(loop_4x_vec)
++
++ /* Fall through into less than 4 remaining vectors of length case.
++ */
++ VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0
++ kmovd %k0, %eax
++ addq $(VEC_SIZE * 3), %rdi
++ .p2align 4
++L(last_4x_vec_or_less):
++ /* Check if first VEC contained match. */
++ testl %eax, %eax
++ jnz L(first_vec_x1_check)
++
++ /* If remaining length > CHAR_PER_VEC * 2. */
++ addl $(CHAR_PER_VEC * 2), %edx
++ jg L(last_4x_vec)
++
++L(last_2x_vec):
++ /* If remaining length < CHAR_PER_VEC. */
++ addl $CHAR_PER_VEC, %edx
++ jle L(zero_end)
++
++ /* Check VEC2 and compare any match with remaining length. */
++ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0
++ kmovd %k0, %eax
++ tzcntl %eax, %eax
++ cmpl %eax, %edx
++ jbe L(set_zero_end)
++ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
++L(zero_end):
++ ret
++
++
++ .p2align 4
++L(first_vec_x1_check):
++ tzcntl %eax, %eax
++ /* Adjust length. */
++ subl $-(CHAR_PER_VEC * 4), %edx
++ /* Check if match within remaining length. */
++ cmpl %eax, %edx
++ jbe L(set_zero_end)
++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
++ leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax
++ ret
++L(set_zero_end):
++ xorl %eax, %eax
++ ret
++
++ .p2align 4
++L(loop_4x_vec_end):
++# endif
++ /* rawmemchr will fall through into this if match was found in
++ loop. */
++
++ /* k1 has not of matches with VEC1. */
++ kmovd %k1, %eax
++# ifdef USE_AS_WMEMCHR
++ subl $((1 << CHAR_PER_VEC) - 1), %eax
++# else
++ incl %eax
++# endif
++ jnz L(last_vec_x1_return)
++
++ VPCMP $0, %YMM2, %YMMZERO, %k0
++ kmovd %k0, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x2_return)
++
++ kmovd %k2, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x3_return)
++
++ kmovd %k3, %eax
++ tzcntl %eax, %eax
++# ifdef USE_AS_RAWMEMCHR
++ leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
++# else
++ leaq (VEC_SIZE * 7)(%rdi, %rax, CHAR_SIZE), %rax
++# endif
++ ret
++
++ .p2align 4
++L(last_vec_x1_return):
++ tzcntl %eax, %eax
++# ifdef USE_AS_RAWMEMCHR
++# ifdef USE_AS_WMEMCHR
++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
++ leaq (%rdi, %rax, CHAR_SIZE), %rax
++# else
++ addq %rdi, %rax
++# endif
++# else
++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
++ leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax
++# endif
++ ret
++
++ .p2align 4
++L(last_vec_x2_return):
++ tzcntl %eax, %eax
++# ifdef USE_AS_RAWMEMCHR
++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
++ leaq VEC_SIZE(%rdi, %rax, CHAR_SIZE), %rax
++# else
++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
++ leaq (VEC_SIZE * 5)(%rdi, %rax, CHAR_SIZE), %rax
++# endif
++ ret
++
++ .p2align 4
++L(last_vec_x3_return):
++ tzcntl %eax, %eax
++# ifdef USE_AS_RAWMEMCHR
++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
++ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
++# else
++ /* NB: Multiply bytes by CHAR_SIZE to get the wchar_t count. */
++ leaq (VEC_SIZE * 6)(%rdi, %rax, CHAR_SIZE), %rax
++# endif
++ ret
++
++
++# ifndef USE_AS_RAWMEMCHR
++L(last_4x_vec_or_less_cmpeq):
++ VPCMP $0, (VEC_SIZE * 5)(%rdi), %YMMMATCH, %k0
++ kmovd %k0, %eax
++ subq $-(VEC_SIZE * 4), %rdi
++ /* Check first VEC regardless. */
++ testl %eax, %eax
++ jnz L(first_vec_x1_check)
++
++ /* If remaining length <= CHAR_PER_VEC * 2. */
++ addl $(CHAR_PER_VEC * 2), %edx
++ jle L(last_2x_vec)
++
++ .p2align 4
++L(last_4x_vec):
++ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k0
++ kmovd %k0, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x2)
++
++
++ VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k0
++ kmovd %k0, %eax
++ /* Create mask for possible matches within remaining length. */
++# ifdef USE_AS_WMEMCHR
++ movl $((1 << (CHAR_PER_VEC * 2)) - 1), %ecx
++ bzhil %edx, %ecx, %ecx
++# else
++ movq $-1, %rcx
++ bzhiq %rdx, %rcx, %rcx
++# endif
++ /* Test matches in data against length match. */
++ andl %ecx, %eax
++ jnz L(last_vec_x3)
++
++ /* if remaining length <= CHAR_PER_VEC * 3 (Note this is after
++ remaining length was found to be > CHAR_PER_VEC * 2. */
++ subl $CHAR_PER_VEC, %edx
++ jbe L(zero_end2)
++
++
++ VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMMATCH, %k0
++ kmovd %k0, %eax
++ /* Shift remaining length mask for last VEC. */
++# ifdef USE_AS_WMEMCHR
++ shrl $CHAR_PER_VEC, %ecx
++# else
++ shrq $CHAR_PER_VEC, %rcx
++# endif
++ andl %ecx, %eax
++ jz L(zero_end2)
++ tzcntl %eax, %eax
++ leaq (VEC_SIZE * 4)(%rdi, %rax, CHAR_SIZE), %rax
++L(zero_end2):
++ ret
++
++L(last_vec_x2):
++ tzcntl %eax, %eax
++ leaq (VEC_SIZE * 2)(%rdi, %rax, CHAR_SIZE), %rax
++ ret
++
++ .p2align 4
++L(last_vec_x3):
++ tzcntl %eax, %eax
++ leaq (VEC_SIZE * 3)(%rdi, %rax, CHAR_SIZE), %rax
++ ret
++# endif
++
++END (MEMCHR)
++#endif
+diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S
+new file mode 100644
+index 0000000000..cf4eff5d4a
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S
+@@ -0,0 +1,12 @@
++#ifndef MEMCMP
++# define MEMCMP __memcmp_avx2_movbe_rtm
++#endif
++
++#define ZERO_UPPER_VEC_REGISTERS_RETURN \
++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
++
++#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
++
++#define SECTION(p) p##.avx.rtm
++
++#include "memcmp-avx2-movbe.S"
+diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
+index 67fc575b59..87f9478eaf 100644
+--- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
++++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
+@@ -47,6 +47,10 @@
+ # define VZEROUPPER vzeroupper
+ # endif
+
++# ifndef SECTION
++# define SECTION(p) p##.avx
++# endif
++
+ # define VEC_SIZE 32
+ # define VEC_MASK ((1 << VEC_SIZE) - 1)
+
+@@ -55,7 +59,7 @@
+ memcmp has to use UNSIGNED comparison for elemnts.
+ */
+
+- .section .text.avx,"ax",@progbits
++ .section SECTION(.text),"ax",@progbits
+ ENTRY (MEMCMP)
+ # ifdef USE_AS_WMEMCMP
+ shl $2, %RDX_LP
+@@ -123,8 +127,8 @@ ENTRY (MEMCMP)
+ vptest %ymm0, %ymm5
+ jnc L(4x_vec_end)
+ xorl %eax, %eax
+- VZEROUPPER
+- ret
++L(return_vzeroupper):
++ ZERO_UPPER_VEC_REGISTERS_RETURN
+
+ .p2align 4
+ L(last_2x_vec):
+@@ -144,8 +148,7 @@ L(last_vec):
+ vpmovmskb %ymm2, %eax
+ subl $VEC_MASK, %eax
+ jnz L(first_vec)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(first_vec):
+@@ -164,8 +167,7 @@ L(wmemcmp_return):
+ movzbl (%rsi, %rcx), %edx
+ sub %edx, %eax
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ # ifdef USE_AS_WMEMCMP
+ .p2align 4
+@@ -367,8 +369,7 @@ L(last_4x_vec):
+ vpmovmskb %ymm2, %eax
+ subl $VEC_MASK, %eax
+ jnz L(first_vec)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(4x_vec_end):
+@@ -394,8 +395,7 @@ L(4x_vec_end):
+ movzbl (VEC_SIZE * 3)(%rsi, %rcx), %edx
+ sub %edx, %eax
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(first_vec_x1):
+@@ -410,8 +410,7 @@ L(first_vec_x1):
+ movzbl VEC_SIZE(%rsi, %rcx), %edx
+ sub %edx, %eax
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(first_vec_x2):
+@@ -426,7 +425,6 @@ L(first_vec_x2):
+ movzbl (VEC_SIZE * 2)(%rsi, %rcx), %edx
+ sub %edx, %eax
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+ END (MEMCMP)
+ #endif
+diff --git a/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
+new file mode 100644
+index 0000000000..9c093972e1
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/memcmp-evex-movbe.S
+@@ -0,0 +1,440 @@
++/* memcmp/wmemcmp optimized with 256-bit EVEX instructions.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#if IS_IN (libc)
++
++/* memcmp/wmemcmp is implemented as:
++ 1. For size from 2 to 7 bytes, load as big endian with movbe and bswap
++ to avoid branches.
++ 2. Use overlapping compare to avoid branch.
++ 3. Use vector compare when size >= 4 bytes for memcmp or size >= 8
++ bytes for wmemcmp.
++ 4. If size is 8 * VEC_SIZE or less, unroll the loop.
++ 5. Compare 4 * VEC_SIZE at a time with the aligned first memory
++ area.
++ 6. Use 2 vector compares when size is 2 * VEC_SIZE or less.
++ 7. Use 4 vector compares when size is 4 * VEC_SIZE or less.
++ 8. Use 8 vector compares when size is 8 * VEC_SIZE or less. */
++
++# include <sysdep.h>
++
++# ifndef MEMCMP
++# define MEMCMP __memcmp_evex_movbe
++# endif
++
++# define VMOVU vmovdqu64
++
++# ifdef USE_AS_WMEMCMP
++# define VPCMPEQ vpcmpeqd
++# else
++# define VPCMPEQ vpcmpeqb
++# endif
++
++# define XMM1 xmm17
++# define XMM2 xmm18
++# define YMM1 ymm17
++# define YMM2 ymm18
++# define YMM3 ymm19
++# define YMM4 ymm20
++# define YMM5 ymm21
++# define YMM6 ymm22
++
++# define VEC_SIZE 32
++# ifdef USE_AS_WMEMCMP
++# define VEC_MASK 0xff
++# define XMM_MASK 0xf
++# else
++# define VEC_MASK 0xffffffff
++# define XMM_MASK 0xffff
++# endif
++
++/* Warning!
++ wmemcmp has to use SIGNED comparison for elements.
++ memcmp has to use UNSIGNED comparison for elemnts.
++*/
++
++ .section .text.evex,"ax",@progbits
++ENTRY (MEMCMP)
++# ifdef USE_AS_WMEMCMP
++ shl $2, %RDX_LP
++# elif defined __ILP32__
++ /* Clear the upper 32 bits. */
++ movl %edx, %edx
++# endif
++ cmp $VEC_SIZE, %RDX_LP
++ jb L(less_vec)
++
++ /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */
++ VMOVU (%rsi), %YMM2
++ VPCMPEQ (%rdi), %YMM2, %k1
++ kmovd %k1, %eax
++ subl $VEC_MASK, %eax
++ jnz L(first_vec)
++
++ cmpq $(VEC_SIZE * 2), %rdx
++ jbe L(last_vec)
++
++ /* More than 2 * VEC. */
++ cmpq $(VEC_SIZE * 8), %rdx
++ ja L(more_8x_vec)
++ cmpq $(VEC_SIZE * 4), %rdx
++ jb L(last_4x_vec)
++
++ /* From 4 * VEC to 8 * VEC, inclusively. */
++ VMOVU (%rsi), %YMM1
++ VPCMPEQ (%rdi), %YMM1, %k1
++
++ VMOVU VEC_SIZE(%rsi), %YMM2
++ VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2
++
++ VMOVU (VEC_SIZE * 2)(%rsi), %YMM3
++ VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3
++
++ VMOVU (VEC_SIZE * 3)(%rsi), %YMM4
++ VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4
++
++ kandd %k1, %k2, %k5
++ kandd %k3, %k4, %k6
++ kandd %k5, %k6, %k6
++
++ kmovd %k6, %eax
++ cmpl $VEC_MASK, %eax
++ jne L(4x_vec_end)
++
++ leaq -(4 * VEC_SIZE)(%rdi, %rdx), %rdi
++ leaq -(4 * VEC_SIZE)(%rsi, %rdx), %rsi
++ VMOVU (%rsi), %YMM1
++ VPCMPEQ (%rdi), %YMM1, %k1
++
++ VMOVU VEC_SIZE(%rsi), %YMM2
++ VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2
++ kandd %k1, %k2, %k5
++
++ VMOVU (VEC_SIZE * 2)(%rsi), %YMM3
++ VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3
++ kandd %k3, %k5, %k5
++
++ VMOVU (VEC_SIZE * 3)(%rsi), %YMM4
++ VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4
++ kandd %k4, %k5, %k5
++
++ kmovd %k5, %eax
++ cmpl $VEC_MASK, %eax
++ jne L(4x_vec_end)
++ xorl %eax, %eax
++ ret
++
++ .p2align 4
++L(last_2x_vec):
++ /* From VEC to 2 * VEC. No branch when size == VEC_SIZE. */
++ VMOVU (%rsi), %YMM2
++ VPCMPEQ (%rdi), %YMM2, %k2
++ kmovd %k2, %eax
++ subl $VEC_MASK, %eax
++ jnz L(first_vec)
++
++L(last_vec):
++ /* Use overlapping loads to avoid branches. */
++ leaq -VEC_SIZE(%rdi, %rdx), %rdi
++ leaq -VEC_SIZE(%rsi, %rdx), %rsi
++ VMOVU (%rsi), %YMM2
++ VPCMPEQ (%rdi), %YMM2, %k2
++ kmovd %k2, %eax
++ subl $VEC_MASK, %eax
++ jnz L(first_vec)
++ ret
++
++ .p2align 4
++L(first_vec):
++ /* A byte or int32 is different within 16 or 32 bytes. */
++ tzcntl %eax, %ecx
++# ifdef USE_AS_WMEMCMP
++ xorl %eax, %eax
++ movl (%rdi, %rcx, 4), %edx
++ cmpl (%rsi, %rcx, 4), %edx
++L(wmemcmp_return):
++ setl %al
++ negl %eax
++ orl $1, %eax
++# else
++ movzbl (%rdi, %rcx), %eax
++ movzbl (%rsi, %rcx), %edx
++ sub %edx, %eax
++# endif
++ ret
++
++# ifdef USE_AS_WMEMCMP
++ .p2align 4
++L(4):
++ xorl %eax, %eax
++ movl (%rdi), %edx
++ cmpl (%rsi), %edx
++ jne L(wmemcmp_return)
++ ret
++# else
++ .p2align 4
++L(between_4_7):
++ /* Load as big endian with overlapping movbe to avoid branches. */
++ movbe (%rdi), %eax
++ movbe (%rsi), %ecx
++ shlq $32, %rax
++ shlq $32, %rcx
++ movbe -4(%rdi, %rdx), %edi
++ movbe -4(%rsi, %rdx), %esi
++ orq %rdi, %rax
++ orq %rsi, %rcx
++ subq %rcx, %rax
++ je L(exit)
++ sbbl %eax, %eax
++ orl $1, %eax
++ ret
++
++ .p2align 4
++L(exit):
++ ret
++
++ .p2align 4
++L(between_2_3):
++ /* Load as big endian to avoid branches. */
++ movzwl (%rdi), %eax
++ movzwl (%rsi), %ecx
++ shll $8, %eax
++ shll $8, %ecx
++ bswap %eax
++ bswap %ecx
++ movb -1(%rdi, %rdx), %al
++ movb -1(%rsi, %rdx), %cl
++ /* Subtraction is okay because the upper 8 bits are zero. */
++ subl %ecx, %eax
++ ret
++
++ .p2align 4
++L(1):
++ movzbl (%rdi), %eax
++ movzbl (%rsi), %ecx
++ subl %ecx, %eax
++ ret
++# endif
++
++ .p2align 4
++L(zero):
++ xorl %eax, %eax
++ ret
++
++ .p2align 4
++L(less_vec):
++# ifdef USE_AS_WMEMCMP
++ /* It can only be 0, 4, 8, 12, 16, 20, 24, 28 bytes. */
++ cmpb $4, %dl
++ je L(4)
++ jb L(zero)
++# else
++ cmpb $1, %dl
++ je L(1)
++ jb L(zero)
++ cmpb $4, %dl
++ jb L(between_2_3)
++ cmpb $8, %dl
++ jb L(between_4_7)
++# endif
++ cmpb $16, %dl
++ jae L(between_16_31)
++ /* It is between 8 and 15 bytes. */
++ vmovq (%rdi), %XMM1
++ vmovq (%rsi), %XMM2
++ VPCMPEQ %XMM1, %XMM2, %k2
++ kmovw %k2, %eax
++ subl $XMM_MASK, %eax
++ jnz L(first_vec)
++ /* Use overlapping loads to avoid branches. */
++ leaq -8(%rdi, %rdx), %rdi
++ leaq -8(%rsi, %rdx), %rsi
++ vmovq (%rdi), %XMM1
++ vmovq (%rsi), %XMM2
++ VPCMPEQ %XMM1, %XMM2, %k2
++ kmovw %k2, %eax
++ subl $XMM_MASK, %eax
++ jnz L(first_vec)
++ ret
++
++ .p2align 4
++L(between_16_31):
++ /* From 16 to 31 bytes. No branch when size == 16. */
++ VMOVU (%rsi), %XMM2
++ VPCMPEQ (%rdi), %XMM2, %k2
++ kmovw %k2, %eax
++ subl $XMM_MASK, %eax
++ jnz L(first_vec)
++
++ /* Use overlapping loads to avoid branches. */
++ leaq -16(%rdi, %rdx), %rdi
++ leaq -16(%rsi, %rdx), %rsi
++ VMOVU (%rsi), %XMM2
++ VPCMPEQ (%rdi), %XMM2, %k2
++ kmovw %k2, %eax
++ subl $XMM_MASK, %eax
++ jnz L(first_vec)
++ ret
++
++ .p2align 4
++L(more_8x_vec):
++ /* More than 8 * VEC. Check the first VEC. */
++ VMOVU (%rsi), %YMM2
++ VPCMPEQ (%rdi), %YMM2, %k2
++ kmovd %k2, %eax
++ subl $VEC_MASK, %eax
++ jnz L(first_vec)
++
++ /* Align the first memory area for aligned loads in the loop.
++ Compute how much the first memory area is misaligned. */
++ movq %rdi, %rcx
++ andl $(VEC_SIZE - 1), %ecx
++ /* Get the negative of offset for alignment. */
++ subq $VEC_SIZE, %rcx
++ /* Adjust the second memory area. */
++ subq %rcx, %rsi
++ /* Adjust the first memory area which should be aligned now. */
++ subq %rcx, %rdi
++ /* Adjust length. */
++ addq %rcx, %rdx
++
++L(loop_4x_vec):
++ /* Compare 4 * VEC at a time forward. */
++ VMOVU (%rsi), %YMM1
++ VPCMPEQ (%rdi), %YMM1, %k1
++
++ VMOVU VEC_SIZE(%rsi), %YMM2
++ VPCMPEQ VEC_SIZE(%rdi), %YMM2, %k2
++ kandd %k2, %k1, %k5
++
++ VMOVU (VEC_SIZE * 2)(%rsi), %YMM3
++ VPCMPEQ (VEC_SIZE * 2)(%rdi), %YMM3, %k3
++ kandd %k3, %k5, %k5
++
++ VMOVU (VEC_SIZE * 3)(%rsi), %YMM4
++ VPCMPEQ (VEC_SIZE * 3)(%rdi), %YMM4, %k4
++ kandd %k4, %k5, %k5
++
++ kmovd %k5, %eax
++ cmpl $VEC_MASK, %eax
++ jne L(4x_vec_end)
++
++ addq $(VEC_SIZE * 4), %rdi
++ addq $(VEC_SIZE * 4), %rsi
++
++ subq $(VEC_SIZE * 4), %rdx
++ cmpq $(VEC_SIZE * 4), %rdx
++ jae L(loop_4x_vec)
++
++ /* Less than 4 * VEC. */
++ cmpq $VEC_SIZE, %rdx
++ jbe L(last_vec)
++ cmpq $(VEC_SIZE * 2), %rdx
++ jbe L(last_2x_vec)
++
++L(last_4x_vec):
++ /* From 2 * VEC to 4 * VEC. */
++ VMOVU (%rsi), %YMM2
++ VPCMPEQ (%rdi), %YMM2, %k2
++ kmovd %k2, %eax
++ subl $VEC_MASK, %eax
++ jnz L(first_vec)
++
++ addq $VEC_SIZE, %rdi
++ addq $VEC_SIZE, %rsi
++ VMOVU (%rsi), %YMM2
++ VPCMPEQ (%rdi), %YMM2, %k2
++ kmovd %k2, %eax
++ subl $VEC_MASK, %eax
++ jnz L(first_vec)
++
++ /* Use overlapping loads to avoid branches. */
++ leaq -(3 * VEC_SIZE)(%rdi, %rdx), %rdi
++ leaq -(3 * VEC_SIZE)(%rsi, %rdx), %rsi
++ VMOVU (%rsi), %YMM2
++ VPCMPEQ (%rdi), %YMM2, %k2
++ kmovd %k2, %eax
++ subl $VEC_MASK, %eax
++ jnz L(first_vec)
++
++ addq $VEC_SIZE, %rdi
++ addq $VEC_SIZE, %rsi
++ VMOVU (%rsi), %YMM2
++ VPCMPEQ (%rdi), %YMM2, %k2
++ kmovd %k2, %eax
++ subl $VEC_MASK, %eax
++ jnz L(first_vec)
++ ret
++
++ .p2align 4
++L(4x_vec_end):
++ kmovd %k1, %eax
++ subl $VEC_MASK, %eax
++ jnz L(first_vec)
++ kmovd %k2, %eax
++ subl $VEC_MASK, %eax
++ jnz L(first_vec_x1)
++ kmovd %k3, %eax
++ subl $VEC_MASK, %eax
++ jnz L(first_vec_x2)
++ kmovd %k4, %eax
++ subl $VEC_MASK, %eax
++ tzcntl %eax, %ecx
++# ifdef USE_AS_WMEMCMP
++ xorl %eax, %eax
++ movl (VEC_SIZE * 3)(%rdi, %rcx, 4), %edx
++ cmpl (VEC_SIZE * 3)(%rsi, %rcx, 4), %edx
++ jmp L(wmemcmp_return)
++# else
++ movzbl (VEC_SIZE * 3)(%rdi, %rcx), %eax
++ movzbl (VEC_SIZE * 3)(%rsi, %rcx), %edx
++ sub %edx, %eax
++# endif
++ ret
++
++ .p2align 4
++L(first_vec_x1):
++ tzcntl %eax, %ecx
++# ifdef USE_AS_WMEMCMP
++ xorl %eax, %eax
++ movl VEC_SIZE(%rdi, %rcx, 4), %edx
++ cmpl VEC_SIZE(%rsi, %rcx, 4), %edx
++ jmp L(wmemcmp_return)
++# else
++ movzbl VEC_SIZE(%rdi, %rcx), %eax
++ movzbl VEC_SIZE(%rsi, %rcx), %edx
++ sub %edx, %eax
++# endif
++ ret
++
++ .p2align 4
++L(first_vec_x2):
++ tzcntl %eax, %ecx
++# ifdef USE_AS_WMEMCMP
++ xorl %eax, %eax
++ movl (VEC_SIZE * 2)(%rdi, %rcx, 4), %edx
++ cmpl (VEC_SIZE * 2)(%rsi, %rcx, 4), %edx
++ jmp L(wmemcmp_return)
++# else
++ movzbl (VEC_SIZE * 2)(%rdi, %rcx), %eax
++ movzbl (VEC_SIZE * 2)(%rsi, %rcx), %edx
++ sub %edx, %eax
++# endif
++ ret
++END (MEMCMP)
++#endif
+diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S
+new file mode 100644
+index 0000000000..1ec1962e86
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S
+@@ -0,0 +1,17 @@
++#if IS_IN (libc)
++# define VEC_SIZE 32
++# define VEC(i) ymm##i
++# define VMOVNT vmovntdq
++# define VMOVU vmovdqu
++# define VMOVA vmovdqa
++
++# define ZERO_UPPER_VEC_REGISTERS_RETURN \
++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
++
++# define VZEROUPPER_RETURN jmp L(return)
++
++# define SECTION(p) p##.avx.rtm
++# define MEMMOVE_SYMBOL(p,s) p##_avx_##s##_rtm
++
++# include "memmove-vec-unaligned-erms.S"
++#endif
+diff --git a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
+index aac1515cf6..848848ab39 100644
+--- a/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
++++ b/sysdeps/x86_64/multiarch/memmove-avx512-unaligned-erms.S
+@@ -1,11 +1,32 @@
+ #if IS_IN (libc)
+ # define VEC_SIZE 64
+-# define VEC(i) zmm##i
++# define XMM0 xmm16
++# define XMM1 xmm17
++# define YMM0 ymm16
++# define YMM1 ymm17
++# define VEC0 zmm16
++# define VEC1 zmm17
++# define VEC2 zmm18
++# define VEC3 zmm19
++# define VEC4 zmm20
++# define VEC5 zmm21
++# define VEC6 zmm22
++# define VEC7 zmm23
++# define VEC8 zmm24
++# define VEC9 zmm25
++# define VEC10 zmm26
++# define VEC11 zmm27
++# define VEC12 zmm28
++# define VEC13 zmm29
++# define VEC14 zmm30
++# define VEC15 zmm31
++# define VEC(i) VEC##i
+ # define VMOVNT vmovntdq
+ # define VMOVU vmovdqu64
+ # define VMOVA vmovdqa64
++# define VZEROUPPER
+
+-# define SECTION(p) p##.avx512
++# define SECTION(p) p##.evex512
+ # define MEMMOVE_SYMBOL(p,s) p##_avx512_##s
+
+ # include "memmove-vec-unaligned-erms.S"
+diff --git a/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S
+new file mode 100644
+index 0000000000..0cbce8f944
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/memmove-evex-unaligned-erms.S
+@@ -0,0 +1,33 @@
++#if IS_IN (libc)
++# define VEC_SIZE 32
++# define XMM0 xmm16
++# define XMM1 xmm17
++# define YMM0 ymm16
++# define YMM1 ymm17
++# define VEC0 ymm16
++# define VEC1 ymm17
++# define VEC2 ymm18
++# define VEC3 ymm19
++# define VEC4 ymm20
++# define VEC5 ymm21
++# define VEC6 ymm22
++# define VEC7 ymm23
++# define VEC8 ymm24
++# define VEC9 ymm25
++# define VEC10 ymm26
++# define VEC11 ymm27
++# define VEC12 ymm28
++# define VEC13 ymm29
++# define VEC14 ymm30
++# define VEC15 ymm31
++# define VEC(i) VEC##i
++# define VMOVNT vmovntdq
++# define VMOVU vmovdqu64
++# define VMOVA vmovdqa64
++# define VZEROUPPER
++
++# define SECTION(p) p##.evex
++# define MEMMOVE_SYMBOL(p,s) p##_evex_##s
++
++# include "memmove-vec-unaligned-erms.S"
++#endif
+diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+index bd5dc1a3f3..f71c343ecb 100644
+--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
++++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
+@@ -48,6 +48,14 @@
+ # define MEMMOVE_CHK_SYMBOL(p,s) MEMMOVE_SYMBOL(p, s)
+ #endif
+
++#ifndef XMM0
++# define XMM0 xmm0
++#endif
++
++#ifndef YMM0
++# define YMM0 ymm0
++#endif
++
+ #ifndef VZEROUPPER
+ # if VEC_SIZE > 16
+ # define VZEROUPPER vzeroupper
+@@ -56,6 +64,13 @@
+ # endif
+ #endif
+
++/* Avoid short distance rep movsb only with non-SSE vector. */
++#ifndef AVOID_SHORT_DISTANCE_REP_MOVSB
++# define AVOID_SHORT_DISTANCE_REP_MOVSB (VEC_SIZE > 16)
++#else
++# define AVOID_SHORT_DISTANCE_REP_MOVSB 0
++#endif
++
+ #ifndef PREFETCH
+ # define PREFETCH(addr) prefetcht0 addr
+ #endif
+@@ -132,11 +147,12 @@ L(last_2x_vec):
+ VMOVU -VEC_SIZE(%rsi,%rdx), %VEC(1)
+ VMOVU %VEC(0), (%rdi)
+ VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx)
+- VZEROUPPER
+ #if !defined USE_MULTIARCH || !IS_IN (libc)
+ L(nop):
+-#endif
+ ret
++#else
++ VZEROUPPER_RETURN
++#endif
+ #if defined USE_MULTIARCH && IS_IN (libc)
+ END (MEMMOVE_SYMBOL (__memmove, unaligned))
+
+@@ -229,8 +245,11 @@ L(last_2x_vec):
+ VMOVU %VEC(0), (%rdi)
+ VMOVU %VEC(1), -VEC_SIZE(%rdi,%rdx)
+ L(return):
+- VZEROUPPER
++#if VEC_SIZE > 16
++ ZERO_UPPER_VEC_REGISTERS_RETURN
++#else
+ ret
++#endif
+
+ L(movsb):
+ cmp __x86_shared_non_temporal_threshold(%rip), %RDX_LP
+@@ -243,7 +262,21 @@ L(movsb):
+ cmpq %r9, %rdi
+ /* Avoid slow backward REP MOVSB. */
+ jb L(more_8x_vec_backward)
++# if AVOID_SHORT_DISTANCE_REP_MOVSB
++ movq %rdi, %rcx
++ subq %rsi, %rcx
++ jmp 2f
++# endif
+ 1:
++# if AVOID_SHORT_DISTANCE_REP_MOVSB
++ movq %rsi, %rcx
++ subq %rdi, %rcx
++2:
++/* Avoid "rep movsb" if RCX, the distance between source and destination,
++ is N*4GB + [1..63] with N >= 0. */
++ cmpl $63, %ecx
++ jbe L(more_2x_vec) /* Avoid "rep movsb" if ECX <= 63. */
++# endif
+ mov %RDX_LP, %RCX_LP
+ rep movsb
+ L(nop):
+@@ -277,21 +310,20 @@ L(less_vec):
+ #if VEC_SIZE > 32
+ L(between_32_63):
+ /* From 32 to 63. No branch when size == 32. */
+- vmovdqu (%rsi), %ymm0
+- vmovdqu -32(%rsi,%rdx), %ymm1
+- vmovdqu %ymm0, (%rdi)
+- vmovdqu %ymm1, -32(%rdi,%rdx)
+- VZEROUPPER
+- ret
++ VMOVU (%rsi), %YMM0
++ VMOVU -32(%rsi,%rdx), %YMM1
++ VMOVU %YMM0, (%rdi)
++ VMOVU %YMM1, -32(%rdi,%rdx)
++ VZEROUPPER_RETURN
+ #endif
+ #if VEC_SIZE > 16
+ /* From 16 to 31. No branch when size == 16. */
+ L(between_16_31):
+- vmovdqu (%rsi), %xmm0
+- vmovdqu -16(%rsi,%rdx), %xmm1
+- vmovdqu %xmm0, (%rdi)
+- vmovdqu %xmm1, -16(%rdi,%rdx)
+- ret
++ VMOVU (%rsi), %XMM0
++ VMOVU -16(%rsi,%rdx), %XMM1
++ VMOVU %XMM0, (%rdi)
++ VMOVU %XMM1, -16(%rdi,%rdx)
++ VZEROUPPER_RETURN
+ #endif
+ L(between_8_15):
+ /* From 8 to 15. No branch when size == 8. */
+@@ -344,8 +376,7 @@ L(more_2x_vec):
+ VMOVU %VEC(5), -(VEC_SIZE * 2)(%rdi,%rdx)
+ VMOVU %VEC(6), -(VEC_SIZE * 3)(%rdi,%rdx)
+ VMOVU %VEC(7), -(VEC_SIZE * 4)(%rdi,%rdx)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+ L(last_4x_vec):
+ /* Copy from 2 * VEC to 4 * VEC. */
+ VMOVU (%rsi), %VEC(0)
+@@ -356,8 +387,7 @@ L(last_4x_vec):
+ VMOVU %VEC(1), VEC_SIZE(%rdi)
+ VMOVU %VEC(2), -VEC_SIZE(%rdi,%rdx)
+ VMOVU %VEC(3), -(VEC_SIZE * 2)(%rdi,%rdx)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ L(more_8x_vec):
+ cmpq %rsi, %rdi
+@@ -413,8 +443,7 @@ L(loop_4x_vec_forward):
+ VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx)
+ /* Store the first VEC. */
+ VMOVU %VEC(4), (%r11)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ L(more_8x_vec_backward):
+ /* Load the first 4 * VEC and last VEC to support overlapping
+@@ -465,8 +494,7 @@ L(loop_4x_vec_backward):
+ VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi)
+ /* Store the last VEC. */
+ VMOVU %VEC(8), (%r11)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
+ L(large_forward):
+@@ -501,8 +529,7 @@ L(loop_large_forward):
+ VMOVU %VEC(8), -(VEC_SIZE * 3)(%rcx)
+ /* Store the first VEC. */
+ VMOVU %VEC(4), (%r11)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ L(large_backward):
+ /* Don't use non-temporal store if there is overlap between
+@@ -536,8 +563,7 @@ L(loop_large_backward):
+ VMOVU %VEC(7), (VEC_SIZE * 3)(%rdi)
+ /* Store the last VEC. */
+ VMOVU %VEC(8), (%r11)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+ #endif
+ END (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
+
+diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S
+new file mode 100644
+index 0000000000..cea2d2a72d
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S
+@@ -0,0 +1,12 @@
++#ifndef MEMRCHR
++# define MEMRCHR __memrchr_avx2_rtm
++#endif
++
++#define ZERO_UPPER_VEC_REGISTERS_RETURN \
++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
++
++#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
++
++#define SECTION(p) p##.avx.rtm
++
++#include "memrchr-avx2.S"
+diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S
+index f5437b54de..c8d54c08d6 100644
+--- a/sysdeps/x86_64/multiarch/memrchr-avx2.S
++++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S
+@@ -20,14 +20,22 @@
+
+ # include <sysdep.h>
+
++# ifndef MEMRCHR
++# define MEMRCHR __memrchr_avx2
++# endif
++
+ # ifndef VZEROUPPER
+ # define VZEROUPPER vzeroupper
+ # endif
+
++# ifndef SECTION
++# define SECTION(p) p##.avx
++# endif
++
+ # define VEC_SIZE 32
+
+- .section .text.avx,"ax",@progbits
+-ENTRY (__memrchr_avx2)
++ .section SECTION(.text),"ax",@progbits
++ENTRY (MEMRCHR)
+ /* Broadcast CHAR to YMM0. */
+ vmovd %esi, %xmm0
+ vpbroadcastb %xmm0, %ymm0
+@@ -134,8 +142,8 @@ L(loop_4x_vec):
+ vpmovmskb %ymm1, %eax
+ bsrl %eax, %eax
+ addq %rdi, %rax
+- VZEROUPPER
+- ret
++L(return_vzeroupper):
++ ZERO_UPPER_VEC_REGISTERS_RETURN
+
+ .p2align 4
+ L(last_4x_vec_or_less):
+@@ -169,8 +177,7 @@ L(last_4x_vec_or_less):
+ addq %rax, %rdx
+ jl L(zero)
+ addq %rdi, %rax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(last_2x_vec):
+@@ -191,31 +198,27 @@ L(last_2x_vec):
+ jl L(zero)
+ addl $(VEC_SIZE * 2), %eax
+ addq %rdi, %rax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(last_vec_x0):
+ bsrl %eax, %eax
+ addq %rdi, %rax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(last_vec_x1):
+ bsrl %eax, %eax
+ addl $VEC_SIZE, %eax
+ addq %rdi, %rax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(last_vec_x2):
+ bsrl %eax, %eax
+ addl $(VEC_SIZE * 2), %eax
+ addq %rdi, %rax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(last_vec_x3):
+@@ -232,8 +235,7 @@ L(last_vec_x1_check):
+ jl L(zero)
+ addl $VEC_SIZE, %eax
+ addq %rdi, %rax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(last_vec_x3_check):
+@@ -243,12 +245,14 @@ L(last_vec_x3_check):
+ jl L(zero)
+ addl $(VEC_SIZE * 3), %eax
+ addq %rdi, %rax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(zero):
+- VZEROUPPER
++ xorl %eax, %eax
++ VZEROUPPER_RETURN
++
++ .p2align 4
+ L(null):
+ xorl %eax, %eax
+ ret
+@@ -273,8 +277,7 @@ L(last_vec_or_less_aligned):
+
+ bsrl %eax, %eax
+ addq %rdi, %rax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(last_vec_or_less):
+@@ -315,8 +318,7 @@ L(last_vec_or_less):
+ bsrl %eax, %eax
+ addq %rdi, %rax
+ addq %r8, %rax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(last_vec_2x_aligned):
+@@ -353,7 +355,6 @@ L(last_vec_2x_aligned):
+ bsrl %eax, %eax
+ addq %rdi, %rax
+ addq %r8, %rax
+- VZEROUPPER
+- ret
+-END (__memrchr_avx2)
++ VZEROUPPER_RETURN
++END (MEMRCHR)
+ #endif
+diff --git a/sysdeps/x86_64/multiarch/memrchr-evex.S b/sysdeps/x86_64/multiarch/memrchr-evex.S
+new file mode 100644
+index 0000000000..16bf8e02b1
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/memrchr-evex.S
+@@ -0,0 +1,337 @@
++/* memrchr optimized with 256-bit EVEX instructions.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#if IS_IN (libc)
++
++# include <sysdep.h>
++
++# define VMOVA vmovdqa64
++
++# define YMMMATCH ymm16
++
++# define VEC_SIZE 32
++
++ .section .text.evex,"ax",@progbits
++ENTRY (__memrchr_evex)
++ /* Broadcast CHAR to YMMMATCH. */
++ vpbroadcastb %esi, %YMMMATCH
++
++ sub $VEC_SIZE, %RDX_LP
++ jbe L(last_vec_or_less)
++
++ add %RDX_LP, %RDI_LP
++
++ /* Check the last VEC_SIZE bytes. */
++ vpcmpb $0, (%rdi), %YMMMATCH, %k1
++ kmovd %k1, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x0)
++
++ subq $(VEC_SIZE * 4), %rdi
++ movl %edi, %ecx
++ andl $(VEC_SIZE - 1), %ecx
++ jz L(aligned_more)
++
++ /* Align data for aligned loads in the loop. */
++ addq $VEC_SIZE, %rdi
++ addq $VEC_SIZE, %rdx
++ andq $-VEC_SIZE, %rdi
++ subq %rcx, %rdx
++
++ .p2align 4
++L(aligned_more):
++ subq $(VEC_SIZE * 4), %rdx
++ jbe L(last_4x_vec_or_less)
++
++ /* Check the last 4 * VEC_SIZE. Only one VEC_SIZE at a time
++ since data is only aligned to VEC_SIZE. */
++ vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1
++ kmovd %k1, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x3)
++
++ vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2
++ kmovd %k2, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x2)
++
++ vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3
++ kmovd %k3, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x1)
++
++ vpcmpb $0, (%rdi), %YMMMATCH, %k4
++ kmovd %k4, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x0)
++
++ /* Align data to 4 * VEC_SIZE for loop with fewer branches.
++ There are some overlaps with above if data isn't aligned
++ to 4 * VEC_SIZE. */
++ movl %edi, %ecx
++ andl $(VEC_SIZE * 4 - 1), %ecx
++ jz L(loop_4x_vec)
++
++ addq $(VEC_SIZE * 4), %rdi
++ addq $(VEC_SIZE * 4), %rdx
++ andq $-(VEC_SIZE * 4), %rdi
++ subq %rcx, %rdx
++
++ .p2align 4
++L(loop_4x_vec):
++ /* Compare 4 * VEC at a time forward. */
++ subq $(VEC_SIZE * 4), %rdi
++ subq $(VEC_SIZE * 4), %rdx
++ jbe L(last_4x_vec_or_less)
++
++ vpcmpb $0, (%rdi), %YMMMATCH, %k1
++ vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k2
++ kord %k1, %k2, %k5
++ vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k3
++ vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k4
++
++ kord %k3, %k4, %k6
++ kortestd %k5, %k6
++ jz L(loop_4x_vec)
++
++ /* There is a match. */
++ kmovd %k4, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x3)
++
++ kmovd %k3, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x2)
++
++ kmovd %k2, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x1)
++
++ kmovd %k1, %eax
++ bsrl %eax, %eax
++ addq %rdi, %rax
++ ret
++
++ .p2align 4
++L(last_4x_vec_or_less):
++ addl $(VEC_SIZE * 4), %edx
++ cmpl $(VEC_SIZE * 2), %edx
++ jbe L(last_2x_vec)
++
++ vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1
++ kmovd %k1, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x3)
++
++ vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k2
++ kmovd %k2, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x2)
++
++ vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k3
++ kmovd %k3, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x1_check)
++ cmpl $(VEC_SIZE * 3), %edx
++ jbe L(zero)
++
++ vpcmpb $0, (%rdi), %YMMMATCH, %k4
++ kmovd %k4, %eax
++ testl %eax, %eax
++ jz L(zero)
++ bsrl %eax, %eax
++ subq $(VEC_SIZE * 4), %rdx
++ addq %rax, %rdx
++ jl L(zero)
++ addq %rdi, %rax
++ ret
++
++ .p2align 4
++L(last_2x_vec):
++ vpcmpb $0, (VEC_SIZE * 3)(%rdi), %YMMMATCH, %k1
++ kmovd %k1, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x3_check)
++ cmpl $VEC_SIZE, %edx
++ jbe L(zero)
++
++ vpcmpb $0, (VEC_SIZE * 2)(%rdi), %YMMMATCH, %k1
++ kmovd %k1, %eax
++ testl %eax, %eax
++ jz L(zero)
++ bsrl %eax, %eax
++ subq $(VEC_SIZE * 2), %rdx
++ addq %rax, %rdx
++ jl L(zero)
++ addl $(VEC_SIZE * 2), %eax
++ addq %rdi, %rax
++ ret
++
++ .p2align 4
++L(last_vec_x0):
++ bsrl %eax, %eax
++ addq %rdi, %rax
++ ret
++
++ .p2align 4
++L(last_vec_x1):
++ bsrl %eax, %eax
++ addl $VEC_SIZE, %eax
++ addq %rdi, %rax
++ ret
++
++ .p2align 4
++L(last_vec_x2):
++ bsrl %eax, %eax
++ addl $(VEC_SIZE * 2), %eax
++ addq %rdi, %rax
++ ret
++
++ .p2align 4
++L(last_vec_x3):
++ bsrl %eax, %eax
++ addl $(VEC_SIZE * 3), %eax
++ addq %rdi, %rax
++ ret
++
++ .p2align 4
++L(last_vec_x1_check):
++ bsrl %eax, %eax
++ subq $(VEC_SIZE * 3), %rdx
++ addq %rax, %rdx
++ jl L(zero)
++ addl $VEC_SIZE, %eax
++ addq %rdi, %rax
++ ret
++
++ .p2align 4
++L(last_vec_x3_check):
++ bsrl %eax, %eax
++ subq $VEC_SIZE, %rdx
++ addq %rax, %rdx
++ jl L(zero)
++ addl $(VEC_SIZE * 3), %eax
++ addq %rdi, %rax
++ ret
++
++ .p2align 4
++L(zero):
++ xorl %eax, %eax
++ ret
++
++ .p2align 4
++L(last_vec_or_less_aligned):
++ movl %edx, %ecx
++
++ vpcmpb $0, (%rdi), %YMMMATCH, %k1
++
++ movl $1, %edx
++ /* Support rdx << 32. */
++ salq %cl, %rdx
++ subq $1, %rdx
++
++ kmovd %k1, %eax
++
++ /* Remove the trailing bytes. */
++ andl %edx, %eax
++ testl %eax, %eax
++ jz L(zero)
++
++ bsrl %eax, %eax
++ addq %rdi, %rax
++ ret
++
++ .p2align 4
++L(last_vec_or_less):
++ addl $VEC_SIZE, %edx
++
++ /* Check for zero length. */
++ testl %edx, %edx
++ jz L(zero)
++
++ movl %edi, %ecx
++ andl $(VEC_SIZE - 1), %ecx
++ jz L(last_vec_or_less_aligned)
++
++ movl %ecx, %esi
++ movl %ecx, %r8d
++ addl %edx, %esi
++ andq $-VEC_SIZE, %rdi
++
++ subl $VEC_SIZE, %esi
++ ja L(last_vec_2x_aligned)
++
++ /* Check the last VEC. */
++ vpcmpb $0, (%rdi), %YMMMATCH, %k1
++ kmovd %k1, %eax
++
++ /* Remove the leading and trailing bytes. */
++ sarl %cl, %eax
++ movl %edx, %ecx
++
++ movl $1, %edx
++ sall %cl, %edx
++ subl $1, %edx
++
++ andl %edx, %eax
++ testl %eax, %eax
++ jz L(zero)
++
++ bsrl %eax, %eax
++ addq %rdi, %rax
++ addq %r8, %rax
++ ret
++
++ .p2align 4
++L(last_vec_2x_aligned):
++ movl %esi, %ecx
++
++ /* Check the last VEC. */
++ vpcmpb $0, VEC_SIZE(%rdi), %YMMMATCH, %k1
++
++ movl $1, %edx
++ sall %cl, %edx
++ subl $1, %edx
++
++ kmovd %k1, %eax
++
++ /* Remove the trailing bytes. */
++ andl %edx, %eax
++
++ testl %eax, %eax
++ jnz L(last_vec_x1)
++
++ /* Check the second last VEC. */
++ vpcmpb $0, (%rdi), %YMMMATCH, %k1
++
++ movl %r8d, %ecx
++
++ kmovd %k1, %eax
++
++ /* Remove the leading bytes. Must use unsigned right shift for
++ bsrl below. */
++ shrl %cl, %eax
++ testl %eax, %eax
++ jz L(zero)
++
++ bsrl %eax, %eax
++ addq %rdi, %rax
++ addq %r8, %rax
++ ret
++END (__memrchr_evex)
++#endif
+diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S
+new file mode 100644
+index 0000000000..8ac3e479bb
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S
+@@ -0,0 +1,10 @@
++#define ZERO_UPPER_VEC_REGISTERS_RETURN \
++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
++
++#define VZEROUPPER_RETURN jmp L(return)
++
++#define SECTION(p) p##.avx.rtm
++#define MEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm
++#define WMEMSET_SYMBOL(p,s) p##_avx2_##s##_rtm
++
++#include "memset-avx2-unaligned-erms.S"
+diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
+index 7ab3d89849..ae0860f36a 100644
+--- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
++++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
+@@ -14,9 +14,15 @@
+ movq r, %rax; \
+ vpbroadcastd %xmm0, %ymm0
+
+-# define SECTION(p) p##.avx
+-# define MEMSET_SYMBOL(p,s) p##_avx2_##s
+-# define WMEMSET_SYMBOL(p,s) p##_avx2_##s
++# ifndef SECTION
++# define SECTION(p) p##.avx
++# endif
++# ifndef MEMSET_SYMBOL
++# define MEMSET_SYMBOL(p,s) p##_avx2_##s
++# endif
++# ifndef WMEMSET_SYMBOL
++# define WMEMSET_SYMBOL(p,s) p##_avx2_##s
++# endif
+
+ # include "memset-vec-unaligned-erms.S"
+ #endif
+diff --git a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
+index 0783979ca5..22e7b187c8 100644
+--- a/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
++++ b/sysdeps/x86_64/multiarch/memset-avx512-unaligned-erms.S
+@@ -1,22 +1,22 @@
+ #if IS_IN (libc)
+ # define VEC_SIZE 64
+-# define VEC(i) zmm##i
++# define XMM0 xmm16
++# define YMM0 ymm16
++# define VEC0 zmm16
++# define VEC(i) VEC##i
+ # define VMOVU vmovdqu64
+ # define VMOVA vmovdqa64
++# define VZEROUPPER
+
+ # define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+- vmovd d, %xmm0; \
+ movq r, %rax; \
+- vpbroadcastb %xmm0, %xmm0; \
+- vpbroadcastq %xmm0, %zmm0
++ vpbroadcastb d, %VEC0
+
+ # define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
+- vmovd d, %xmm0; \
+ movq r, %rax; \
+- vpbroadcastd %xmm0, %xmm0; \
+- vpbroadcastq %xmm0, %zmm0
++ vpbroadcastd d, %VEC0
+
+-# define SECTION(p) p##.avx512
++# define SECTION(p) p##.evex512
+ # define MEMSET_SYMBOL(p,s) p##_avx512_##s
+ # define WMEMSET_SYMBOL(p,s) p##_avx512_##s
+
+diff --git a/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
+new file mode 100644
+index 0000000000..ae0a4d6e46
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/memset-evex-unaligned-erms.S
+@@ -0,0 +1,24 @@
++#if IS_IN (libc)
++# define VEC_SIZE 32
++# define XMM0 xmm16
++# define YMM0 ymm16
++# define VEC0 ymm16
++# define VEC(i) VEC##i
++# define VMOVU vmovdqu64
++# define VMOVA vmovdqa64
++# define VZEROUPPER
++
++# define MEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
++ movq r, %rax; \
++ vpbroadcastb d, %VEC0
++
++# define WMEMSET_VDUP_TO_VEC0_AND_SET_RETURN(d, r) \
++ movq r, %rax; \
++ vpbroadcastd d, %VEC0
++
++# define SECTION(p) p##.evex
++# define MEMSET_SYMBOL(p,s) p##_evex_##s
++# define WMEMSET_SYMBOL(p,s) p##_evex_##s
++
++# include "memset-vec-unaligned-erms.S"
++#endif
+diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+index 2bfc95de05..de5a8a38f5 100644
+--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
++++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+@@ -34,20 +34,25 @@
+ # define WMEMSET_CHK_SYMBOL(p,s) WMEMSET_SYMBOL(p, s)
+ #endif
+
++#ifndef XMM0
++# define XMM0 xmm0
++#endif
++
++#ifndef YMM0
++# define YMM0 ymm0
++#endif
++
+ #ifndef VZEROUPPER
+ # if VEC_SIZE > 16
+ # define VZEROUPPER vzeroupper
++# define VZEROUPPER_SHORT_RETURN vzeroupper; ret
+ # else
+ # define VZEROUPPER
+ # endif
+ #endif
+
+ #ifndef VZEROUPPER_SHORT_RETURN
+-# if VEC_SIZE > 16
+-# define VZEROUPPER_SHORT_RETURN vzeroupper
+-# else
+-# define VZEROUPPER_SHORT_RETURN rep
+-# endif
++# define VZEROUPPER_SHORT_RETURN rep; ret
+ #endif
+
+ #ifndef MOVQ
+@@ -67,7 +72,7 @@
+ ENTRY (__bzero)
+ mov %RDI_LP, %RAX_LP /* Set return value. */
+ mov %RSI_LP, %RDX_LP /* Set n. */
+- pxor %xmm0, %xmm0
++ pxor %XMM0, %XMM0
+ jmp L(entry_from_bzero)
+ END (__bzero)
+ weak_alias (__bzero, bzero)
+@@ -109,8 +114,7 @@ L(entry_from_bzero):
+ /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
+ VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
+ VMOVU %VEC(0), (%rdi)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+ #if defined USE_MULTIARCH && IS_IN (libc)
+ END (MEMSET_SYMBOL (__memset, unaligned))
+
+@@ -133,14 +137,12 @@ ENTRY (__memset_erms)
+ ENTRY (MEMSET_SYMBOL (__memset, erms))
+ # endif
+ L(stosb):
+- /* Issue vzeroupper before rep stosb. */
+- VZEROUPPER
+ mov %RDX_LP, %RCX_LP
+ movzbl %sil, %eax
+ mov %RDI_LP, %RDX_LP
+ rep stosb
+ mov %RDX_LP, %RAX_LP
+- ret
++ VZEROUPPER_RETURN
+ # if VEC_SIZE == 16
+ END (__memset_erms)
+ # else
+@@ -167,8 +169,7 @@ ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
+ /* From VEC and to 2 * VEC. No branch when size == VEC_SIZE. */
+ VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
+ VMOVU %VEC(0), (%rdi)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ L(stosb_more_2x_vec):
+ cmp __x86_rep_stosb_threshold(%rip), %RDX_LP
+@@ -182,8 +183,11 @@ L(more_2x_vec):
+ VMOVU %VEC(0), -VEC_SIZE(%rdi,%rdx)
+ VMOVU %VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx)
+ L(return):
+- VZEROUPPER
++#if VEC_SIZE > 16
++ ZERO_UPPER_VEC_REGISTERS_RETURN
++#else
+ ret
++#endif
+
+ L(loop_start):
+ leaq (VEC_SIZE * 4)(%rdi), %rcx
+@@ -209,7 +213,6 @@ L(loop):
+ cmpq %rcx, %rdx
+ jne L(loop)
+ VZEROUPPER_SHORT_RETURN
+- ret
+ L(less_vec):
+ /* Less than 1 VEC. */
+ # if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
+@@ -223,7 +226,7 @@ L(less_vec):
+ cmpb $16, %dl
+ jae L(between_16_31)
+ # endif
+- MOVQ %xmm0, %rcx
++ MOVQ %XMM0, %rcx
+ cmpb $8, %dl
+ jae L(between_8_15)
+ cmpb $4, %dl
+@@ -233,40 +236,34 @@ L(less_vec):
+ jb 1f
+ movb %cl, (%rdi)
+ 1:
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+ # if VEC_SIZE > 32
+ /* From 32 to 63. No branch when size == 32. */
+ L(between_32_63):
+- vmovdqu %ymm0, -32(%rdi,%rdx)
+- vmovdqu %ymm0, (%rdi)
+- VZEROUPPER
+- ret
++ VMOVU %YMM0, -32(%rdi,%rdx)
++ VMOVU %YMM0, (%rdi)
++ VZEROUPPER_RETURN
+ # endif
+ # if VEC_SIZE > 16
+ /* From 16 to 31. No branch when size == 16. */
+ L(between_16_31):
+- vmovdqu %xmm0, -16(%rdi,%rdx)
+- vmovdqu %xmm0, (%rdi)
+- VZEROUPPER
+- ret
++ VMOVU %XMM0, -16(%rdi,%rdx)
++ VMOVU %XMM0, (%rdi)
++ VZEROUPPER_RETURN
+ # endif
+ /* From 8 to 15. No branch when size == 8. */
+ L(between_8_15):
+ movq %rcx, -8(%rdi,%rdx)
+ movq %rcx, (%rdi)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+ L(between_4_7):
+ /* From 4 to 7. No branch when size == 4. */
+ movl %ecx, -4(%rdi,%rdx)
+ movl %ecx, (%rdi)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+ L(between_2_3):
+ /* From 2 to 3. No branch when size == 2. */
+ movw %cx, -2(%rdi,%rdx)
+ movw %cx, (%rdi)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+ END (MEMSET_SYMBOL (__memset, unaligned_erms))
+diff --git a/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S
+new file mode 100644
+index 0000000000..acc5f6e2fb
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S
+@@ -0,0 +1,4 @@
++#define MEMCHR __rawmemchr_avx2_rtm
++#define USE_AS_RAWMEMCHR 1
++
++#include "memchr-avx2-rtm.S"
+diff --git a/sysdeps/x86_64/multiarch/rawmemchr-evex.S b/sysdeps/x86_64/multiarch/rawmemchr-evex.S
+new file mode 100644
+index 0000000000..ec942b77ba
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/rawmemchr-evex.S
+@@ -0,0 +1,4 @@
++#define MEMCHR __rawmemchr_evex
++#define USE_AS_RAWMEMCHR 1
++
++#include "memchr-evex.S"
+diff --git a/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S
+new file mode 100644
+index 0000000000..2b9c07a59f
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S
+@@ -0,0 +1,3 @@
++#define USE_AS_STPCPY
++#define STRCPY __stpcpy_avx2_rtm
++#include "strcpy-avx2-rtm.S"
+diff --git a/sysdeps/x86_64/multiarch/stpcpy-evex.S b/sysdeps/x86_64/multiarch/stpcpy-evex.S
+new file mode 100644
+index 0000000000..7c6f26cd98
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/stpcpy-evex.S
+@@ -0,0 +1,3 @@
++#define USE_AS_STPCPY
++#define STRCPY __stpcpy_evex
++#include "strcpy-evex.S"
+diff --git a/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S
+new file mode 100644
+index 0000000000..60a2ccfe53
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S
+@@ -0,0 +1,4 @@
++#define USE_AS_STPCPY
++#define USE_AS_STRNCPY
++#define STRCPY __stpncpy_avx2_rtm
++#include "strcpy-avx2-rtm.S"
+diff --git a/sysdeps/x86_64/multiarch/stpncpy-evex.S b/sysdeps/x86_64/multiarch/stpncpy-evex.S
+new file mode 100644
+index 0000000000..1570014d1c
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/stpncpy-evex.S
+@@ -0,0 +1,4 @@
++#define USE_AS_STPCPY
++#define USE_AS_STRNCPY
++#define STRCPY __stpncpy_evex
++#include "strcpy-evex.S"
+diff --git a/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S
+new file mode 100644
+index 0000000000..637fb557c4
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S
+@@ -0,0 +1,12 @@
++#ifndef STRCAT
++# define STRCAT __strcat_avx2_rtm
++#endif
++
++#define ZERO_UPPER_VEC_REGISTERS_RETURN \
++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
++
++#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
++
++#define SECTION(p) p##.avx.rtm
++
++#include "strcat-avx2.S"
+diff --git a/sysdeps/x86_64/multiarch/strcat-avx2.S b/sysdeps/x86_64/multiarch/strcat-avx2.S
+index a4143bf8f5..1e6d4827ee 100644
+--- a/sysdeps/x86_64/multiarch/strcat-avx2.S
++++ b/sysdeps/x86_64/multiarch/strcat-avx2.S
+@@ -30,7 +30,11 @@
+ /* Number of bytes in a vector register */
+ # define VEC_SIZE 32
+
+- .section .text.avx,"ax",@progbits
++# ifndef SECTION
++# define SECTION(p) p##.avx
++# endif
++
++ .section SECTION(.text),"ax",@progbits
+ ENTRY (STRCAT)
+ mov %rdi, %r9
+ # ifdef USE_AS_STRNCAT
+diff --git a/sysdeps/x86_64/multiarch/strcat-evex.S b/sysdeps/x86_64/multiarch/strcat-evex.S
+new file mode 100644
+index 0000000000..97c3d85b6d
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strcat-evex.S
+@@ -0,0 +1,283 @@
++/* strcat with 256-bit EVEX instructions.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#if IS_IN (libc)
++
++# include <sysdep.h>
++
++# ifndef STRCAT
++# define STRCAT __strcat_evex
++# endif
++
++# define VMOVU vmovdqu64
++# define VMOVA vmovdqa64
++
++/* zero register */
++# define XMMZERO xmm16
++# define YMMZERO ymm16
++# define YMM0 ymm17
++# define YMM1 ymm18
++
++# define USE_AS_STRCAT
++
++/* Number of bytes in a vector register */
++# define VEC_SIZE 32
++
++ .section .text.evex,"ax",@progbits
++ENTRY (STRCAT)
++ mov %rdi, %r9
++# ifdef USE_AS_STRNCAT
++ mov %rdx, %r8
++# endif
++
++ xor %eax, %eax
++ mov %edi, %ecx
++ and $((VEC_SIZE * 4) - 1), %ecx
++ vpxorq %XMMZERO, %XMMZERO, %XMMZERO
++ cmp $(VEC_SIZE * 3), %ecx
++ ja L(fourth_vector_boundary)
++ vpcmpb $0, (%rdi), %YMMZERO, %k0
++ kmovd %k0, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_first_vector)
++ mov %rdi, %rax
++ and $-VEC_SIZE, %rax
++ jmp L(align_vec_size_start)
++L(fourth_vector_boundary):
++ mov %rdi, %rax
++ and $-VEC_SIZE, %rax
++ vpcmpb $0, (%rax), %YMMZERO, %k0
++ mov $-1, %r10d
++ sub %rax, %rcx
++ shl %cl, %r10d
++ kmovd %k0, %edx
++ and %r10d, %edx
++ jnz L(exit)
++
++L(align_vec_size_start):
++ vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0
++ kmovd %k0, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_second_vector)
++
++ vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1
++ kmovd %k1, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_third_vector)
++
++ vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2
++ kmovd %k2, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_fourth_vector)
++
++ vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3
++ kmovd %k3, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_fifth_vector)
++
++ vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4
++ add $(VEC_SIZE * 4), %rax
++ kmovd %k4, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_second_vector)
++
++ vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1
++ kmovd %k1, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_third_vector)
++
++ vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2
++ kmovd %k2, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_fourth_vector)
++
++ vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3
++ kmovd %k3, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_fifth_vector)
++
++ vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4
++ kmovd %k4, %edx
++ add $(VEC_SIZE * 4), %rax
++ test %edx, %edx
++ jnz L(exit_null_on_second_vector)
++
++ vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1
++ kmovd %k1, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_third_vector)
++
++ vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2
++ kmovd %k2, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_fourth_vector)
++
++ vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3
++ kmovd %k3, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_fifth_vector)
++
++ vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4
++ add $(VEC_SIZE * 4), %rax
++ kmovd %k4, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_second_vector)
++
++ vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1
++ kmovd %k1, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_third_vector)
++
++ vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2
++ kmovd %k2, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_fourth_vector)
++
++ vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3
++ kmovd %k3, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_fifth_vector)
++
++ test $((VEC_SIZE * 4) - 1), %rax
++ jz L(align_four_vec_loop)
++
++ vpcmpb $0, (VEC_SIZE * 5)(%rax), %YMMZERO, %k4
++ add $(VEC_SIZE * 5), %rax
++ kmovd %k4, %edx
++ test %edx, %edx
++ jnz L(exit)
++
++ test $((VEC_SIZE * 4) - 1), %rax
++ jz L(align_four_vec_loop)
++
++ vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0
++ add $VEC_SIZE, %rax
++ kmovd %k0, %edx
++ test %edx, %edx
++ jnz L(exit)
++
++ test $((VEC_SIZE * 4) - 1), %rax
++ jz L(align_four_vec_loop)
++
++ vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k0
++ add $VEC_SIZE, %rax
++ kmovd %k0, %edx
++ test %edx, %edx
++ jnz L(exit)
++
++ test $((VEC_SIZE * 4) - 1), %rax
++ jz L(align_four_vec_loop)
++
++ vpcmpb $0, VEC_SIZE(%rax), %YMMZERO, %k1
++ add $VEC_SIZE, %rax
++ kmovd %k1, %edx
++ test %edx, %edx
++ jnz L(exit)
++
++ add $VEC_SIZE, %rax
++
++ .p2align 4
++L(align_four_vec_loop):
++ VMOVA (%rax), %YMM0
++ VMOVA (VEC_SIZE * 2)(%rax), %YMM1
++ vpminub VEC_SIZE(%rax), %YMM0, %YMM0
++ vpminub (VEC_SIZE * 3)(%rax), %YMM1, %YMM1
++ vpminub %YMM0, %YMM1, %YMM0
++ /* If K0 != 0, there is a null byte. */
++ vpcmpb $0, %YMM0, %YMMZERO, %k0
++ add $(VEC_SIZE * 4), %rax
++ ktestd %k0, %k0
++ jz L(align_four_vec_loop)
++
++ vpcmpb $0, -(VEC_SIZE * 4)(%rax), %YMMZERO, %k0
++ sub $(VEC_SIZE * 5), %rax
++ kmovd %k0, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_second_vector)
++
++ vpcmpb $0, (VEC_SIZE * 2)(%rax), %YMMZERO, %k1
++ kmovd %k1, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_third_vector)
++
++ vpcmpb $0, (VEC_SIZE * 3)(%rax), %YMMZERO, %k2
++ kmovd %k2, %edx
++ test %edx, %edx
++ jnz L(exit_null_on_fourth_vector)
++
++ vpcmpb $0, (VEC_SIZE * 4)(%rax), %YMMZERO, %k3
++ kmovd %k3, %edx
++ sub %rdi, %rax
++ bsf %rdx, %rdx
++ add %rdx, %rax
++ add $(VEC_SIZE * 4), %rax
++ jmp L(StartStrcpyPart)
++
++ .p2align 4
++L(exit):
++ sub %rdi, %rax
++L(exit_null_on_first_vector):
++ bsf %rdx, %rdx
++ add %rdx, %rax
++ jmp L(StartStrcpyPart)
++
++ .p2align 4
++L(exit_null_on_second_vector):
++ sub %rdi, %rax
++ bsf %rdx, %rdx
++ add %rdx, %rax
++ add $VEC_SIZE, %rax
++ jmp L(StartStrcpyPart)
++
++ .p2align 4
++L(exit_null_on_third_vector):
++ sub %rdi, %rax
++ bsf %rdx, %rdx
++ add %rdx, %rax
++ add $(VEC_SIZE * 2), %rax
++ jmp L(StartStrcpyPart)
++
++ .p2align 4
++L(exit_null_on_fourth_vector):
++ sub %rdi, %rax
++ bsf %rdx, %rdx
++ add %rdx, %rax
++ add $(VEC_SIZE * 3), %rax
++ jmp L(StartStrcpyPart)
++
++ .p2align 4
++L(exit_null_on_fifth_vector):
++ sub %rdi, %rax
++ bsf %rdx, %rdx
++ add %rdx, %rax
++ add $(VEC_SIZE * 4), %rax
++
++ .p2align 4
++L(StartStrcpyPart):
++ lea (%r9, %rax), %rdi
++ mov %rsi, %rcx
++ mov %r9, %rax /* save result */
++
++# ifdef USE_AS_STRNCAT
++ test %r8, %r8
++ jz L(ExitZero)
++# define USE_AS_STRNCPY
++# endif
++
++# include "strcpy-evex.S"
++#endif
+diff --git a/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S
+new file mode 100644
+index 0000000000..81f20d1d8e
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S
+@@ -0,0 +1,12 @@
++#ifndef STRCHR
++# define STRCHR __strchr_avx2_rtm
++#endif
++
++#define ZERO_UPPER_VEC_REGISTERS_RETURN \
++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
++
++#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
++
++#define SECTION(p) p##.avx.rtm
++
++#include "strchr-avx2.S"
+diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S
+index 39fc69da7b..0a5217514a 100644
+--- a/sysdeps/x86_64/multiarch/strchr-avx2.S
++++ b/sysdeps/x86_64/multiarch/strchr-avx2.S
+@@ -38,9 +38,13 @@
+ # define VZEROUPPER vzeroupper
+ # endif
+
++# ifndef SECTION
++# define SECTION(p) p##.avx
++# endif
++
+ # define VEC_SIZE 32
+
+- .section .text.avx,"ax",@progbits
++ .section SECTION(.text),"ax",@progbits
+ ENTRY (STRCHR)
+ movl %edi, %ecx
+ /* Broadcast CHAR to YMM0. */
+@@ -93,8 +97,8 @@ L(cros_page_boundary):
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+ # endif
+- VZEROUPPER
+- ret
++L(return_vzeroupper):
++ ZERO_UPPER_VEC_REGISTERS_RETURN
+
+ .p2align 4
+ L(aligned_more):
+@@ -190,8 +194,7 @@ L(first_vec_x0):
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(first_vec_x1):
+@@ -205,8 +208,7 @@ L(first_vec_x1):
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(first_vec_x2):
+@@ -220,8 +222,7 @@ L(first_vec_x2):
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(4x_vec_end):
+@@ -247,8 +248,7 @@ L(first_vec_x3):
+ cmp (%rax), %CHAR_REG
+ cmovne %rdx, %rax
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ END (STRCHR)
+ #endif
+diff --git a/sysdeps/x86_64/multiarch/strchr-evex.S b/sysdeps/x86_64/multiarch/strchr-evex.S
+new file mode 100644
+index 0000000000..ddc86a7058
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strchr-evex.S
+@@ -0,0 +1,335 @@
++/* strchr/strchrnul optimized with 256-bit EVEX instructions.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#if IS_IN (libc)
++
++# include <sysdep.h>
++
++# ifndef STRCHR
++# define STRCHR __strchr_evex
++# endif
++
++# define VMOVU vmovdqu64
++# define VMOVA vmovdqa64
++
++# ifdef USE_AS_WCSCHR
++# define VPBROADCAST vpbroadcastd
++# define VPCMP vpcmpd
++# define VPMINU vpminud
++# define CHAR_REG esi
++# define SHIFT_REG r8d
++# else
++# define VPBROADCAST vpbroadcastb
++# define VPCMP vpcmpb
++# define VPMINU vpminub
++# define CHAR_REG sil
++# define SHIFT_REG ecx
++# endif
++
++# define XMMZERO xmm16
++
++# define YMMZERO ymm16
++# define YMM0 ymm17
++# define YMM1 ymm18
++# define YMM2 ymm19
++# define YMM3 ymm20
++# define YMM4 ymm21
++# define YMM5 ymm22
++# define YMM6 ymm23
++# define YMM7 ymm24
++# define YMM8 ymm25
++
++# define VEC_SIZE 32
++# define PAGE_SIZE 4096
++
++ .section .text.evex,"ax",@progbits
++ENTRY (STRCHR)
++ movl %edi, %ecx
++# ifndef USE_AS_STRCHRNUL
++ xorl %edx, %edx
++# endif
++
++ /* Broadcast CHAR to YMM0. */
++ VPBROADCAST %esi, %YMM0
++
++ vpxorq %XMMZERO, %XMMZERO, %XMMZERO
++
++ /* Check if we cross page boundary with one vector load. */
++ andl $(PAGE_SIZE - 1), %ecx
++ cmpl $(PAGE_SIZE - VEC_SIZE), %ecx
++ ja L(cross_page_boundary)
++
++ /* Check the first VEC_SIZE bytes. Search for both CHAR and the
++ null bytes. */
++ VMOVU (%rdi), %YMM1
++
++ /* Leaves only CHARS matching esi as 0. */
++ vpxorq %YMM1, %YMM0, %YMM2
++ VPMINU %YMM2, %YMM1, %YMM2
++ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
++ VPCMP $0, %YMMZERO, %YMM2, %k0
++ ktestd %k0, %k0
++ jz L(more_vecs)
++ kmovd %k0, %eax
++ tzcntl %eax, %eax
++ /* Found CHAR or the null byte. */
++# ifdef USE_AS_WCSCHR
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ leaq (%rdi, %rax, 4), %rax
++# else
++ addq %rdi, %rax
++# endif
++# ifndef USE_AS_STRCHRNUL
++ cmp (%rax), %CHAR_REG
++ cmovne %rdx, %rax
++# endif
++ ret
++
++ .p2align 4
++L(more_vecs):
++ /* Align data for aligned loads in the loop. */
++ andq $-VEC_SIZE, %rdi
++L(aligned_more):
++
++ /* Check the next 4 * VEC_SIZE. Only one VEC_SIZE at a time
++ since data is only aligned to VEC_SIZE. */
++ VMOVA VEC_SIZE(%rdi), %YMM1
++ addq $VEC_SIZE, %rdi
++
++ /* Leaves only CHARS matching esi as 0. */
++ vpxorq %YMM1, %YMM0, %YMM2
++ VPMINU %YMM2, %YMM1, %YMM2
++ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
++ VPCMP $0, %YMMZERO, %YMM2, %k0
++ kmovd %k0, %eax
++ testl %eax, %eax
++ jnz L(first_vec_x0)
++
++ VMOVA VEC_SIZE(%rdi), %YMM1
++ /* Leaves only CHARS matching esi as 0. */
++ vpxorq %YMM1, %YMM0, %YMM2
++ VPMINU %YMM2, %YMM1, %YMM2
++ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
++ VPCMP $0, %YMMZERO, %YMM2, %k0
++ kmovd %k0, %eax
++ testl %eax, %eax
++ jnz L(first_vec_x1)
++
++ VMOVA (VEC_SIZE * 2)(%rdi), %YMM1
++ /* Leaves only CHARS matching esi as 0. */
++ vpxorq %YMM1, %YMM0, %YMM2
++ VPMINU %YMM2, %YMM1, %YMM2
++ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
++ VPCMP $0, %YMMZERO, %YMM2, %k0
++ kmovd %k0, %eax
++ testl %eax, %eax
++ jnz L(first_vec_x2)
++
++ VMOVA (VEC_SIZE * 3)(%rdi), %YMM1
++ /* Leaves only CHARS matching esi as 0. */
++ vpxorq %YMM1, %YMM0, %YMM2
++ VPMINU %YMM2, %YMM1, %YMM2
++ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
++ VPCMP $0, %YMMZERO, %YMM2, %k0
++ ktestd %k0, %k0
++ jz L(prep_loop_4x)
++
++ kmovd %k0, %eax
++ tzcntl %eax, %eax
++ /* Found CHAR or the null byte. */
++# ifdef USE_AS_WCSCHR
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ leaq (VEC_SIZE * 3)(%rdi, %rax, 4), %rax
++# else
++ leaq (VEC_SIZE * 3)(%rdi, %rax), %rax
++# endif
++# ifndef USE_AS_STRCHRNUL
++ cmp (%rax), %CHAR_REG
++ cmovne %rdx, %rax
++# endif
++ ret
++
++ .p2align 4
++L(first_vec_x0):
++ tzcntl %eax, %eax
++ /* Found CHAR or the null byte. */
++# ifdef USE_AS_WCSCHR
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ leaq (%rdi, %rax, 4), %rax
++# else
++ addq %rdi, %rax
++# endif
++# ifndef USE_AS_STRCHRNUL
++ cmp (%rax), %CHAR_REG
++ cmovne %rdx, %rax
++# endif
++ ret
++
++ .p2align 4
++L(first_vec_x1):
++ tzcntl %eax, %eax
++ /* Found CHAR or the null byte. */
++# ifdef USE_AS_WCSCHR
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ leaq VEC_SIZE(%rdi, %rax, 4), %rax
++# else
++ leaq VEC_SIZE(%rdi, %rax), %rax
++# endif
++# ifndef USE_AS_STRCHRNUL
++ cmp (%rax), %CHAR_REG
++ cmovne %rdx, %rax
++# endif
++ ret
++
++ .p2align 4
++L(first_vec_x2):
++ tzcntl %eax, %eax
++ /* Found CHAR or the null byte. */
++# ifdef USE_AS_WCSCHR
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ leaq (VEC_SIZE * 2)(%rdi, %rax, 4), %rax
++# else
++ leaq (VEC_SIZE * 2)(%rdi, %rax), %rax
++# endif
++# ifndef USE_AS_STRCHRNUL
++ cmp (%rax), %CHAR_REG
++ cmovne %rdx, %rax
++# endif
++ ret
++
++L(prep_loop_4x):
++ /* Align data to 4 * VEC_SIZE. */
++ andq $-(VEC_SIZE * 4), %rdi
++
++ .p2align 4
++L(loop_4x_vec):
++ /* Compare 4 * VEC at a time forward. */
++ VMOVA (VEC_SIZE * 4)(%rdi), %YMM1
++ VMOVA (VEC_SIZE * 5)(%rdi), %YMM2
++ VMOVA (VEC_SIZE * 6)(%rdi), %YMM3
++ VMOVA (VEC_SIZE * 7)(%rdi), %YMM4
++
++ /* Leaves only CHARS matching esi as 0. */
++ vpxorq %YMM1, %YMM0, %YMM5
++ vpxorq %YMM2, %YMM0, %YMM6
++ vpxorq %YMM3, %YMM0, %YMM7
++ vpxorq %YMM4, %YMM0, %YMM8
++
++ VPMINU %YMM5, %YMM1, %YMM5
++ VPMINU %YMM6, %YMM2, %YMM6
++ VPMINU %YMM7, %YMM3, %YMM7
++ VPMINU %YMM8, %YMM4, %YMM8
++
++ VPMINU %YMM5, %YMM6, %YMM1
++ VPMINU %YMM7, %YMM8, %YMM2
++
++ VPMINU %YMM1, %YMM2, %YMM1
++
++ /* Each bit in K0 represents a CHAR or a null byte. */
++ VPCMP $0, %YMMZERO, %YMM1, %k0
++
++ addq $(VEC_SIZE * 4), %rdi
++
++ ktestd %k0, %k0
++ jz L(loop_4x_vec)
++
++ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
++ VPCMP $0, %YMMZERO, %YMM5, %k0
++ kmovd %k0, %eax
++ testl %eax, %eax
++ jnz L(first_vec_x0)
++
++ /* Each bit in K1 represents a CHAR or a null byte in YMM2. */
++ VPCMP $0, %YMMZERO, %YMM6, %k1
++ kmovd %k1, %eax
++ testl %eax, %eax
++ jnz L(first_vec_x1)
++
++ /* Each bit in K2 represents a CHAR or a null byte in YMM3. */
++ VPCMP $0, %YMMZERO, %YMM7, %k2
++ /* Each bit in K3 represents a CHAR or a null byte in YMM4. */
++ VPCMP $0, %YMMZERO, %YMM8, %k3
++
++# ifdef USE_AS_WCSCHR
++ /* NB: Each bit in K2/K3 represents 4-byte element. */
++ kshiftlw $8, %k3, %k1
++# else
++ kshiftlq $32, %k3, %k1
++# endif
++
++ /* Each bit in K1 represents a NULL or a mismatch. */
++ korq %k1, %k2, %k1
++ kmovq %k1, %rax
++
++ tzcntq %rax, %rax
++# ifdef USE_AS_WCSCHR
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ leaq (VEC_SIZE * 2)(%rdi, %rax, 4), %rax
++# else
++ leaq (VEC_SIZE * 2)(%rdi, %rax), %rax
++# endif
++# ifndef USE_AS_STRCHRNUL
++ cmp (%rax), %CHAR_REG
++ cmovne %rdx, %rax
++# endif
++ ret
++
++ /* Cold case for crossing page with first load. */
++ .p2align 4
++L(cross_page_boundary):
++ andq $-VEC_SIZE, %rdi
++ andl $(VEC_SIZE - 1), %ecx
++
++ VMOVA (%rdi), %YMM1
++
++ /* Leaves only CHARS matching esi as 0. */
++ vpxorq %YMM1, %YMM0, %YMM2
++ VPMINU %YMM2, %YMM1, %YMM2
++ /* Each bit in K0 represents a CHAR or a null byte in YMM1. */
++ VPCMP $0, %YMMZERO, %YMM2, %k0
++ kmovd %k0, %eax
++ testl %eax, %eax
++
++# ifdef USE_AS_WCSCHR
++ /* NB: Divide shift count by 4 since each bit in K1 represent 4
++ bytes. */
++ movl %ecx, %SHIFT_REG
++ sarl $2, %SHIFT_REG
++# endif
++
++ /* Remove the leading bits. */
++ sarxl %SHIFT_REG, %eax, %eax
++ testl %eax, %eax
++
++ jz L(aligned_more)
++ tzcntl %eax, %eax
++ addq %rcx, %rdi
++# ifdef USE_AS_WCSCHR
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ leaq (%rdi, %rax, 4), %rax
++# else
++ addq %rdi, %rax
++# endif
++# ifndef USE_AS_STRCHRNUL
++ cmp (%rax), %CHAR_REG
++ cmovne %rdx, %rax
++# endif
++ ret
++
++END (STRCHR)
++# endif
+diff --git a/sysdeps/x86_64/multiarch/strchr.c b/sysdeps/x86_64/multiarch/strchr.c
+index 8df4609bf8..4ed1177c70 100644
+--- a/sysdeps/x86_64/multiarch/strchr.c
++++ b/sysdeps/x86_64/multiarch/strchr.c
+@@ -29,16 +29,28 @@
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
+ static inline void *
+ IFUNC_SELECTOR (void)
+ {
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+- && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+- return OPTIMIZE (avx2);
++ {
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
++ && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
++ return OPTIMIZE (evex);
++
++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
++ return OPTIMIZE (avx2_rtm);
++
++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
++ return OPTIMIZE (avx2);
++ }
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Slow_BSF))
+ return OPTIMIZE (sse2_no_bsf);
+diff --git a/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S b/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S
+new file mode 100644
+index 0000000000..cdcf818b91
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S
+@@ -0,0 +1,3 @@
++#define STRCHR __strchrnul_avx2_rtm
++#define USE_AS_STRCHRNUL 1
++#include "strchr-avx2-rtm.S"
+diff --git a/sysdeps/x86_64/multiarch/strchrnul-evex.S b/sysdeps/x86_64/multiarch/strchrnul-evex.S
+new file mode 100644
+index 0000000000..064fe7ca9e
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strchrnul-evex.S
+@@ -0,0 +1,3 @@
++#define STRCHR __strchrnul_evex
++#define USE_AS_STRCHRNUL 1
++#include "strchr-evex.S"
+diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S
+new file mode 100644
+index 0000000000..aecd30d97f
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S
+@@ -0,0 +1,12 @@
++#ifndef STRCMP
++# define STRCMP __strcmp_avx2_rtm
++#endif
++
++#define ZERO_UPPER_VEC_REGISTERS_RETURN \
++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
++
++#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
++
++#define SECTION(p) p##.avx.rtm
++
++#include "strcmp-avx2.S"
+diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
+index d42b04b54f..759e5b64c2 100644
+--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S
++++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
+@@ -55,6 +55,10 @@
+ # define VZEROUPPER vzeroupper
+ # endif
+
++# ifndef SECTION
++# define SECTION(p) p##.avx
++# endif
++
+ /* Warning!
+ wcscmp/wcsncmp have to use SIGNED comparison for elements.
+ strcmp/strncmp have to use UNSIGNED comparison for elements.
+@@ -75,7 +79,7 @@
+ the maximum offset is reached before a difference is found, zero is
+ returned. */
+
+- .section .text.avx,"ax",@progbits
++ .section SECTION(.text),"ax",@progbits
+ ENTRY (STRCMP)
+ # ifdef USE_AS_STRNCMP
+ /* Check for simple cases (0 or 1) in offset. */
+@@ -83,6 +87,16 @@ ENTRY (STRCMP)
+ je L(char0)
+ jb L(zero)
+ # ifdef USE_AS_WCSCMP
++# ifndef __ILP32__
++ movq %rdx, %rcx
++ /* Check if length could overflow when multiplied by
++ sizeof(wchar_t). Checking top 8 bits will cover all potential
++ overflow cases as well as redirect cases where its impossible to
++ length to bound a valid memory region. In these cases just use
++ 'wcscmp'. */
++ shrq $56, %rcx
++ jnz __wcscmp_avx2
++# endif
+ /* Convert units: from wide to byte char. */
+ shl $2, %RDX_LP
+ # endif
+@@ -127,8 +141,8 @@ L(return):
+ movzbl (%rsi, %rdx), %edx
+ subl %edx, %eax
+ # endif
+- VZEROUPPER
+- ret
++L(return_vzeroupper):
++ ZERO_UPPER_VEC_REGISTERS_RETURN
+
+ .p2align 4
+ L(return_vec_size):
+@@ -161,8 +175,7 @@ L(return_vec_size):
+ subl %edx, %eax
+ # endif
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(return_2_vec_size):
+@@ -195,8 +208,7 @@ L(return_2_vec_size):
+ subl %edx, %eax
+ # endif
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(return_3_vec_size):
+@@ -229,8 +241,7 @@ L(return_3_vec_size):
+ subl %edx, %eax
+ # endif
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(next_3_vectors):
+@@ -356,8 +367,7 @@ L(back_to_loop):
+ subl %edx, %eax
+ # endif
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(test_vec):
+@@ -400,8 +410,7 @@ L(test_vec):
+ subl %edx, %eax
+ # endif
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(test_2_vec):
+@@ -444,8 +453,7 @@ L(test_2_vec):
+ subl %edx, %eax
+ # endif
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(test_3_vec):
+@@ -486,8 +494,7 @@ L(test_3_vec):
+ subl %edx, %eax
+ # endif
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(loop_cross_page):
+@@ -556,8 +563,7 @@ L(loop_cross_page):
+ subl %edx, %eax
+ # endif
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(loop_cross_page_2_vec):
+@@ -631,8 +637,7 @@ L(loop_cross_page_2_vec):
+ subl %edx, %eax
+ # endif
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ # ifdef USE_AS_STRNCMP
+ L(string_nbyte_offset_check):
+@@ -674,8 +679,7 @@ L(cross_page_loop):
+ # ifndef USE_AS_WCSCMP
+ L(different):
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ # ifdef USE_AS_WCSCMP
+ .p2align 4
+@@ -685,16 +689,14 @@ L(different):
+ setl %al
+ negl %eax
+ orl $1, %eax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+ # endif
+
+ # ifdef USE_AS_STRNCMP
+ .p2align 4
+ L(zero):
+ xorl %eax, %eax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(char0):
+@@ -708,8 +710,7 @@ L(char0):
+ movzbl (%rdi), %eax
+ subl %ecx, %eax
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+ # endif
+
+ .p2align 4
+@@ -734,8 +735,7 @@ L(last_vector):
+ movzbl (%rsi, %rdx), %edx
+ subl %edx, %eax
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ /* Comparing on page boundary region requires special treatment:
+ It must done one vector at the time, starting with the wider
+@@ -856,7 +856,6 @@ L(cross_page_4bytes):
+ testl %eax, %eax
+ jne L(cross_page_loop)
+ subl %ecx, %eax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+ END (STRCMP)
+ #endif
+diff --git a/sysdeps/x86_64/multiarch/strcmp-evex.S b/sysdeps/x86_64/multiarch/strcmp-evex.S
+new file mode 100644
+index 0000000000..459eeed09f
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strcmp-evex.S
+@@ -0,0 +1,1043 @@
++/* strcmp/wcscmp/strncmp/wcsncmp optimized with 256-bit EVEX instructions.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#if IS_IN (libc)
++
++# include <sysdep.h>
++
++# ifndef STRCMP
++# define STRCMP __strcmp_evex
++# endif
++
++# define PAGE_SIZE 4096
++
++/* VEC_SIZE = Number of bytes in a ymm register */
++# define VEC_SIZE 32
++
++/* Shift for dividing by (VEC_SIZE * 4). */
++# define DIVIDE_BY_VEC_4_SHIFT 7
++# if (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT)
++# error (VEC_SIZE * 4) != (1 << DIVIDE_BY_VEC_4_SHIFT)
++# endif
++
++# define VMOVU vmovdqu64
++# define VMOVA vmovdqa64
++
++# ifdef USE_AS_WCSCMP
++/* Compare packed dwords. */
++# define VPCMP vpcmpd
++# define SHIFT_REG32 r8d
++# define SHIFT_REG64 r8
++/* 1 dword char == 4 bytes. */
++# define SIZE_OF_CHAR 4
++# else
++/* Compare packed bytes. */
++# define VPCMP vpcmpb
++# define SHIFT_REG32 ecx
++# define SHIFT_REG64 rcx
++/* 1 byte char == 1 byte. */
++# define SIZE_OF_CHAR 1
++# endif
++
++# define XMMZERO xmm16
++# define XMM0 xmm17
++# define XMM1 xmm18
++
++# define YMMZERO ymm16
++# define YMM0 ymm17
++# define YMM1 ymm18
++# define YMM2 ymm19
++# define YMM3 ymm20
++# define YMM4 ymm21
++# define YMM5 ymm22
++# define YMM6 ymm23
++# define YMM7 ymm24
++
++/* Warning!
++ wcscmp/wcsncmp have to use SIGNED comparison for elements.
++ strcmp/strncmp have to use UNSIGNED comparison for elements.
++*/
++
++/* The main idea of the string comparison (byte or dword) using 256-bit
++ EVEX instructions consists of comparing (VPCMP) two ymm vectors. The
++ latter can be on either packed bytes or dwords depending on
++ USE_AS_WCSCMP. In order to check the null char, algorithm keeps the
++ matched bytes/dwords, requiring 5 EVEX instructions (3 VPCMP and 2
++ KORD). In general, the costs of comparing VEC_SIZE bytes (32-bytes)
++ are 3 VPCMP and 2 KORD instructions, together with VMOVU and ktestd
++ instructions. Main loop (away from from page boundary) compares 4
++ vectors are a time, effectively comparing 4 x VEC_SIZE bytes (128
++ bytes) on each loop.
++
++ The routine strncmp/wcsncmp (enabled by defining USE_AS_STRNCMP) logic
++ is the same as strcmp, except that an a maximum offset is tracked. If
++ the maximum offset is reached before a difference is found, zero is
++ returned. */
++
++ .section .text.evex,"ax",@progbits
++ENTRY (STRCMP)
++# ifdef USE_AS_STRNCMP
++ /* Check for simple cases (0 or 1) in offset. */
++ cmp $1, %RDX_LP
++ je L(char0)
++ jb L(zero)
++# ifdef USE_AS_WCSCMP
++ /* Convert units: from wide to byte char. */
++ shl $2, %RDX_LP
++# endif
++ /* Register %r11 tracks the maximum offset. */
++ mov %RDX_LP, %R11_LP
++# endif
++ movl %edi, %eax
++ xorl %edx, %edx
++ /* Make %XMMZERO (%YMMZERO) all zeros in this function. */
++ vpxorq %XMMZERO, %XMMZERO, %XMMZERO
++ orl %esi, %eax
++ andl $(PAGE_SIZE - 1), %eax
++ cmpl $(PAGE_SIZE - (VEC_SIZE * 4)), %eax
++ jg L(cross_page)
++ /* Start comparing 4 vectors. */
++ VMOVU (%rdi), %YMM0
++ VMOVU (%rsi), %YMM1
++
++ /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */
++ VPCMP $4, %YMM0, %YMM1, %k0
++
++ /* Check for NULL in YMM0. */
++ VPCMP $0, %YMMZERO, %YMM0, %k1
++ /* Check for NULL in YMM1. */
++ VPCMP $0, %YMMZERO, %YMM1, %k2
++ /* Each bit in K1 represents a NULL in YMM0 or YMM1. */
++ kord %k1, %k2, %k1
++
++ /* Each bit in K1 represents:
++ 1. A mismatch in YMM0 and YMM1. Or
++ 2. A NULL in YMM0 or YMM1.
++ */
++ kord %k0, %k1, %k1
++
++ ktestd %k1, %k1
++ je L(next_3_vectors)
++ kmovd %k1, %ecx
++ tzcntl %ecx, %edx
++# ifdef USE_AS_WCSCMP
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ sall $2, %edx
++# endif
++# ifdef USE_AS_STRNCMP
++ /* Return 0 if the mismatched index (%rdx) is after the maximum
++ offset (%r11). */
++ cmpq %r11, %rdx
++ jae L(zero)
++# endif
++# ifdef USE_AS_WCSCMP
++ xorl %eax, %eax
++ movl (%rdi, %rdx), %ecx
++ cmpl (%rsi, %rdx), %ecx
++ je L(return)
++L(wcscmp_return):
++ setl %al
++ negl %eax
++ orl $1, %eax
++L(return):
++# else
++ movzbl (%rdi, %rdx), %eax
++ movzbl (%rsi, %rdx), %edx
++ subl %edx, %eax
++# endif
++ ret
++
++ .p2align 4
++L(return_vec_size):
++ kmovd %k1, %ecx
++ tzcntl %ecx, %edx
++# ifdef USE_AS_WCSCMP
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ sall $2, %edx
++# endif
++# ifdef USE_AS_STRNCMP
++ /* Return 0 if the mismatched index (%rdx + VEC_SIZE) is after
++ the maximum offset (%r11). */
++ addq $VEC_SIZE, %rdx
++ cmpq %r11, %rdx
++ jae L(zero)
++# ifdef USE_AS_WCSCMP
++ xorl %eax, %eax
++ movl (%rdi, %rdx), %ecx
++ cmpl (%rsi, %rdx), %ecx
++ jne L(wcscmp_return)
++# else
++ movzbl (%rdi, %rdx), %eax
++ movzbl (%rsi, %rdx), %edx
++ subl %edx, %eax
++# endif
++# else
++# ifdef USE_AS_WCSCMP
++ xorl %eax, %eax
++ movl VEC_SIZE(%rdi, %rdx), %ecx
++ cmpl VEC_SIZE(%rsi, %rdx), %ecx
++ jne L(wcscmp_return)
++# else
++ movzbl VEC_SIZE(%rdi, %rdx), %eax
++ movzbl VEC_SIZE(%rsi, %rdx), %edx
++ subl %edx, %eax
++# endif
++# endif
++ ret
++
++ .p2align 4
++L(return_2_vec_size):
++ kmovd %k1, %ecx
++ tzcntl %ecx, %edx
++# ifdef USE_AS_WCSCMP
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ sall $2, %edx
++# endif
++# ifdef USE_AS_STRNCMP
++ /* Return 0 if the mismatched index (%rdx + 2 * VEC_SIZE) is
++ after the maximum offset (%r11). */
++ addq $(VEC_SIZE * 2), %rdx
++ cmpq %r11, %rdx
++ jae L(zero)
++# ifdef USE_AS_WCSCMP
++ xorl %eax, %eax
++ movl (%rdi, %rdx), %ecx
++ cmpl (%rsi, %rdx), %ecx
++ jne L(wcscmp_return)
++# else
++ movzbl (%rdi, %rdx), %eax
++ movzbl (%rsi, %rdx), %edx
++ subl %edx, %eax
++# endif
++# else
++# ifdef USE_AS_WCSCMP
++ xorl %eax, %eax
++ movl (VEC_SIZE * 2)(%rdi, %rdx), %ecx
++ cmpl (VEC_SIZE * 2)(%rsi, %rdx), %ecx
++ jne L(wcscmp_return)
++# else
++ movzbl (VEC_SIZE * 2)(%rdi, %rdx), %eax
++ movzbl (VEC_SIZE * 2)(%rsi, %rdx), %edx
++ subl %edx, %eax
++# endif
++# endif
++ ret
++
++ .p2align 4
++L(return_3_vec_size):
++ kmovd %k1, %ecx
++ tzcntl %ecx, %edx
++# ifdef USE_AS_WCSCMP
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ sall $2, %edx
++# endif
++# ifdef USE_AS_STRNCMP
++ /* Return 0 if the mismatched index (%rdx + 3 * VEC_SIZE) is
++ after the maximum offset (%r11). */
++ addq $(VEC_SIZE * 3), %rdx
++ cmpq %r11, %rdx
++ jae L(zero)
++# ifdef USE_AS_WCSCMP
++ xorl %eax, %eax
++ movl (%rdi, %rdx), %ecx
++ cmpl (%rsi, %rdx), %ecx
++ jne L(wcscmp_return)
++# else
++ movzbl (%rdi, %rdx), %eax
++ movzbl (%rsi, %rdx), %edx
++ subl %edx, %eax
++# endif
++# else
++# ifdef USE_AS_WCSCMP
++ xorl %eax, %eax
++ movl (VEC_SIZE * 3)(%rdi, %rdx), %ecx
++ cmpl (VEC_SIZE * 3)(%rsi, %rdx), %ecx
++ jne L(wcscmp_return)
++# else
++ movzbl (VEC_SIZE * 3)(%rdi, %rdx), %eax
++ movzbl (VEC_SIZE * 3)(%rsi, %rdx), %edx
++ subl %edx, %eax
++# endif
++# endif
++ ret
++
++ .p2align 4
++L(next_3_vectors):
++ VMOVU VEC_SIZE(%rdi), %YMM0
++ VMOVU VEC_SIZE(%rsi), %YMM1
++ /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */
++ VPCMP $4, %YMM0, %YMM1, %k0
++ VPCMP $0, %YMMZERO, %YMM0, %k1
++ VPCMP $0, %YMMZERO, %YMM1, %k2
++ /* Each bit in K1 represents a NULL in YMM0 or YMM1. */
++ kord %k1, %k2, %k1
++ /* Each bit in K1 represents a NULL or a mismatch. */
++ kord %k0, %k1, %k1
++ ktestd %k1, %k1
++ jne L(return_vec_size)
++
++ VMOVU (VEC_SIZE * 2)(%rdi), %YMM2
++ VMOVU (VEC_SIZE * 3)(%rdi), %YMM3
++ VMOVU (VEC_SIZE * 2)(%rsi), %YMM4
++ VMOVU (VEC_SIZE * 3)(%rsi), %YMM5
++
++ /* Each bit in K0 represents a mismatch in YMM2 and YMM4. */
++ VPCMP $4, %YMM2, %YMM4, %k0
++ VPCMP $0, %YMMZERO, %YMM2, %k1
++ VPCMP $0, %YMMZERO, %YMM4, %k2
++ /* Each bit in K1 represents a NULL in YMM2 or YMM4. */
++ kord %k1, %k2, %k1
++ /* Each bit in K1 represents a NULL or a mismatch. */
++ kord %k0, %k1, %k1
++ ktestd %k1, %k1
++ jne L(return_2_vec_size)
++
++ /* Each bit in K0 represents a mismatch in YMM3 and YMM5. */
++ VPCMP $4, %YMM3, %YMM5, %k0
++ VPCMP $0, %YMMZERO, %YMM3, %k1
++ VPCMP $0, %YMMZERO, %YMM5, %k2
++ /* Each bit in K1 represents a NULL in YMM3 or YMM5. */
++ kord %k1, %k2, %k1
++ /* Each bit in K1 represents a NULL or a mismatch. */
++ kord %k0, %k1, %k1
++ ktestd %k1, %k1
++ jne L(return_3_vec_size)
++L(main_loop_header):
++ leaq (VEC_SIZE * 4)(%rdi), %rdx
++ movl $PAGE_SIZE, %ecx
++ /* Align load via RAX. */
++ andq $-(VEC_SIZE * 4), %rdx
++ subq %rdi, %rdx
++ leaq (%rdi, %rdx), %rax
++# ifdef USE_AS_STRNCMP
++ /* Starting from this point, the maximum offset, or simply the
++ 'offset', DECREASES by the same amount when base pointers are
++ moved forward. Return 0 when:
++ 1) On match: offset <= the matched vector index.
++ 2) On mistmach, offset is before the mistmatched index.
++ */
++ subq %rdx, %r11
++ jbe L(zero)
++# endif
++ addq %rsi, %rdx
++ movq %rdx, %rsi
++ andl $(PAGE_SIZE - 1), %esi
++ /* Number of bytes before page crossing. */
++ subq %rsi, %rcx
++ /* Number of VEC_SIZE * 4 blocks before page crossing. */
++ shrq $DIVIDE_BY_VEC_4_SHIFT, %rcx
++ /* ESI: Number of VEC_SIZE * 4 blocks before page crossing. */
++ movl %ecx, %esi
++ jmp L(loop_start)
++
++ .p2align 4
++L(loop):
++# ifdef USE_AS_STRNCMP
++ /* Base pointers are moved forward by 4 * VEC_SIZE. Decrease
++ the maximum offset (%r11) by the same amount. */
++ subq $(VEC_SIZE * 4), %r11
++ jbe L(zero)
++# endif
++ addq $(VEC_SIZE * 4), %rax
++ addq $(VEC_SIZE * 4), %rdx
++L(loop_start):
++ testl %esi, %esi
++ leal -1(%esi), %esi
++ je L(loop_cross_page)
++L(back_to_loop):
++ /* Main loop, comparing 4 vectors are a time. */
++ VMOVA (%rax), %YMM0
++ VMOVA VEC_SIZE(%rax), %YMM2
++ VMOVA (VEC_SIZE * 2)(%rax), %YMM4
++ VMOVA (VEC_SIZE * 3)(%rax), %YMM6
++ VMOVU (%rdx), %YMM1
++ VMOVU VEC_SIZE(%rdx), %YMM3
++ VMOVU (VEC_SIZE * 2)(%rdx), %YMM5
++ VMOVU (VEC_SIZE * 3)(%rdx), %YMM7
++
++ VPCMP $4, %YMM0, %YMM1, %k0
++ VPCMP $0, %YMMZERO, %YMM0, %k1
++ VPCMP $0, %YMMZERO, %YMM1, %k2
++ kord %k1, %k2, %k1
++ /* Each bit in K4 represents a NULL or a mismatch in YMM0 and
++ YMM1. */
++ kord %k0, %k1, %k4
++
++ VPCMP $4, %YMM2, %YMM3, %k0
++ VPCMP $0, %YMMZERO, %YMM2, %k1
++ VPCMP $0, %YMMZERO, %YMM3, %k2
++ kord %k1, %k2, %k1
++ /* Each bit in K5 represents a NULL or a mismatch in YMM2 and
++ YMM3. */
++ kord %k0, %k1, %k5
++
++ VPCMP $4, %YMM4, %YMM5, %k0
++ VPCMP $0, %YMMZERO, %YMM4, %k1
++ VPCMP $0, %YMMZERO, %YMM5, %k2
++ kord %k1, %k2, %k1
++ /* Each bit in K6 represents a NULL or a mismatch in YMM4 and
++ YMM5. */
++ kord %k0, %k1, %k6
++
++ VPCMP $4, %YMM6, %YMM7, %k0
++ VPCMP $0, %YMMZERO, %YMM6, %k1
++ VPCMP $0, %YMMZERO, %YMM7, %k2
++ kord %k1, %k2, %k1
++ /* Each bit in K7 represents a NULL or a mismatch in YMM6 and
++ YMM7. */
++ kord %k0, %k1, %k7
++
++ kord %k4, %k5, %k0
++ kord %k6, %k7, %k1
++
++ /* Test each mask (32 bits) individually because for VEC_SIZE
++ == 32 is not possible to OR the four masks and keep all bits
++ in a 64-bit integer register, differing from SSE2 strcmp
++ where ORing is possible. */
++ kortestd %k0, %k1
++ je L(loop)
++ ktestd %k4, %k4
++ je L(test_vec)
++ kmovd %k4, %edi
++ tzcntl %edi, %ecx
++# ifdef USE_AS_WCSCMP
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ sall $2, %ecx
++# endif
++# ifdef USE_AS_STRNCMP
++ cmpq %rcx, %r11
++ jbe L(zero)
++# ifdef USE_AS_WCSCMP
++ movq %rax, %rsi
++ xorl %eax, %eax
++ movl (%rsi, %rcx), %edi
++ cmpl (%rdx, %rcx), %edi
++ jne L(wcscmp_return)
++# else
++ movzbl (%rax, %rcx), %eax
++ movzbl (%rdx, %rcx), %edx
++ subl %edx, %eax
++# endif
++# else
++# ifdef USE_AS_WCSCMP
++ movq %rax, %rsi
++ xorl %eax, %eax
++ movl (%rsi, %rcx), %edi
++ cmpl (%rdx, %rcx), %edi
++ jne L(wcscmp_return)
++# else
++ movzbl (%rax, %rcx), %eax
++ movzbl (%rdx, %rcx), %edx
++ subl %edx, %eax
++# endif
++# endif
++ ret
++
++ .p2align 4
++L(test_vec):
++# ifdef USE_AS_STRNCMP
++ /* The first vector matched. Return 0 if the maximum offset
++ (%r11) <= VEC_SIZE. */
++ cmpq $VEC_SIZE, %r11
++ jbe L(zero)
++# endif
++ ktestd %k5, %k5
++ je L(test_2_vec)
++ kmovd %k5, %ecx
++ tzcntl %ecx, %edi
++# ifdef USE_AS_WCSCMP
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ sall $2, %edi
++# endif
++# ifdef USE_AS_STRNCMP
++ addq $VEC_SIZE, %rdi
++ cmpq %rdi, %r11
++ jbe L(zero)
++# ifdef USE_AS_WCSCMP
++ movq %rax, %rsi
++ xorl %eax, %eax
++ movl (%rsi, %rdi), %ecx
++ cmpl (%rdx, %rdi), %ecx
++ jne L(wcscmp_return)
++# else
++ movzbl (%rax, %rdi), %eax
++ movzbl (%rdx, %rdi), %edx
++ subl %edx, %eax
++# endif
++# else
++# ifdef USE_AS_WCSCMP
++ movq %rax, %rsi
++ xorl %eax, %eax
++ movl VEC_SIZE(%rsi, %rdi), %ecx
++ cmpl VEC_SIZE(%rdx, %rdi), %ecx
++ jne L(wcscmp_return)
++# else
++ movzbl VEC_SIZE(%rax, %rdi), %eax
++ movzbl VEC_SIZE(%rdx, %rdi), %edx
++ subl %edx, %eax
++# endif
++# endif
++ ret
++
++ .p2align 4
++L(test_2_vec):
++# ifdef USE_AS_STRNCMP
++ /* The first 2 vectors matched. Return 0 if the maximum offset
++ (%r11) <= 2 * VEC_SIZE. */
++ cmpq $(VEC_SIZE * 2), %r11
++ jbe L(zero)
++# endif
++ ktestd %k6, %k6
++ je L(test_3_vec)
++ kmovd %k6, %ecx
++ tzcntl %ecx, %edi
++# ifdef USE_AS_WCSCMP
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ sall $2, %edi
++# endif
++# ifdef USE_AS_STRNCMP
++ addq $(VEC_SIZE * 2), %rdi
++ cmpq %rdi, %r11
++ jbe L(zero)
++# ifdef USE_AS_WCSCMP
++ movq %rax, %rsi
++ xorl %eax, %eax
++ movl (%rsi, %rdi), %ecx
++ cmpl (%rdx, %rdi), %ecx
++ jne L(wcscmp_return)
++# else
++ movzbl (%rax, %rdi), %eax
++ movzbl (%rdx, %rdi), %edx
++ subl %edx, %eax
++# endif
++# else
++# ifdef USE_AS_WCSCMP
++ movq %rax, %rsi
++ xorl %eax, %eax
++ movl (VEC_SIZE * 2)(%rsi, %rdi), %ecx
++ cmpl (VEC_SIZE * 2)(%rdx, %rdi), %ecx
++ jne L(wcscmp_return)
++# else
++ movzbl (VEC_SIZE * 2)(%rax, %rdi), %eax
++ movzbl (VEC_SIZE * 2)(%rdx, %rdi), %edx
++ subl %edx, %eax
++# endif
++# endif
++ ret
++
++ .p2align 4
++L(test_3_vec):
++# ifdef USE_AS_STRNCMP
++ /* The first 3 vectors matched. Return 0 if the maximum offset
++ (%r11) <= 3 * VEC_SIZE. */
++ cmpq $(VEC_SIZE * 3), %r11
++ jbe L(zero)
++# endif
++ kmovd %k7, %esi
++ tzcntl %esi, %ecx
++# ifdef USE_AS_WCSCMP
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ sall $2, %ecx
++# endif
++# ifdef USE_AS_STRNCMP
++ addq $(VEC_SIZE * 3), %rcx
++ cmpq %rcx, %r11
++ jbe L(zero)
++# ifdef USE_AS_WCSCMP
++ movq %rax, %rsi
++ xorl %eax, %eax
++ movl (%rsi, %rcx), %esi
++ cmpl (%rdx, %rcx), %esi
++ jne L(wcscmp_return)
++# else
++ movzbl (%rax, %rcx), %eax
++ movzbl (%rdx, %rcx), %edx
++ subl %edx, %eax
++# endif
++# else
++# ifdef USE_AS_WCSCMP
++ movq %rax, %rsi
++ xorl %eax, %eax
++ movl (VEC_SIZE * 3)(%rsi, %rcx), %esi
++ cmpl (VEC_SIZE * 3)(%rdx, %rcx), %esi
++ jne L(wcscmp_return)
++# else
++ movzbl (VEC_SIZE * 3)(%rax, %rcx), %eax
++ movzbl (VEC_SIZE * 3)(%rdx, %rcx), %edx
++ subl %edx, %eax
++# endif
++# endif
++ ret
++
++ .p2align 4
++L(loop_cross_page):
++ xorl %r10d, %r10d
++ movq %rdx, %rcx
++ /* Align load via RDX. We load the extra ECX bytes which should
++ be ignored. */
++ andl $((VEC_SIZE * 4) - 1), %ecx
++ /* R10 is -RCX. */
++ subq %rcx, %r10
++
++ /* This works only if VEC_SIZE * 2 == 64. */
++# if (VEC_SIZE * 2) != 64
++# error (VEC_SIZE * 2) != 64
++# endif
++
++ /* Check if the first VEC_SIZE * 2 bytes should be ignored. */
++ cmpl $(VEC_SIZE * 2), %ecx
++ jge L(loop_cross_page_2_vec)
++
++ VMOVU (%rax, %r10), %YMM2
++ VMOVU VEC_SIZE(%rax, %r10), %YMM3
++ VMOVU (%rdx, %r10), %YMM4
++ VMOVU VEC_SIZE(%rdx, %r10), %YMM5
++
++ VPCMP $4, %YMM4, %YMM2, %k0
++ VPCMP $0, %YMMZERO, %YMM2, %k1
++ VPCMP $0, %YMMZERO, %YMM4, %k2
++ kord %k1, %k2, %k1
++ /* Each bit in K1 represents a NULL or a mismatch in YMM2 and
++ YMM4. */
++ kord %k0, %k1, %k1
++
++ VPCMP $4, %YMM5, %YMM3, %k3
++ VPCMP $0, %YMMZERO, %YMM3, %k4
++ VPCMP $0, %YMMZERO, %YMM5, %k5
++ kord %k4, %k5, %k4
++ /* Each bit in K3 represents a NULL or a mismatch in YMM3 and
++ YMM5. */
++ kord %k3, %k4, %k3
++
++# ifdef USE_AS_WCSCMP
++ /* NB: Each bit in K1/K3 represents 4-byte element. */
++ kshiftlw $8, %k3, %k2
++ /* NB: Divide shift count by 4 since each bit in K1 represent 4
++ bytes. */
++ movl %ecx, %SHIFT_REG32
++ sarl $2, %SHIFT_REG32
++# else
++ kshiftlq $32, %k3, %k2
++# endif
++
++ /* Each bit in K1 represents a NULL or a mismatch. */
++ korq %k1, %k2, %k1
++ kmovq %k1, %rdi
++
++ /* Since ECX < VEC_SIZE * 2, simply skip the first ECX bytes. */
++ shrxq %SHIFT_REG64, %rdi, %rdi
++ testq %rdi, %rdi
++ je L(loop_cross_page_2_vec)
++ tzcntq %rdi, %rcx
++# ifdef USE_AS_WCSCMP
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ sall $2, %ecx
++# endif
++# ifdef USE_AS_STRNCMP
++ cmpq %rcx, %r11
++ jbe L(zero)
++# ifdef USE_AS_WCSCMP
++ movq %rax, %rsi
++ xorl %eax, %eax
++ movl (%rsi, %rcx), %edi
++ cmpl (%rdx, %rcx), %edi
++ jne L(wcscmp_return)
++# else
++ movzbl (%rax, %rcx), %eax
++ movzbl (%rdx, %rcx), %edx
++ subl %edx, %eax
++# endif
++# else
++# ifdef USE_AS_WCSCMP
++ movq %rax, %rsi
++ xorl %eax, %eax
++ movl (%rsi, %rcx), %edi
++ cmpl (%rdx, %rcx), %edi
++ jne L(wcscmp_return)
++# else
++ movzbl (%rax, %rcx), %eax
++ movzbl (%rdx, %rcx), %edx
++ subl %edx, %eax
++# endif
++# endif
++ ret
++
++ .p2align 4
++L(loop_cross_page_2_vec):
++ /* The first VEC_SIZE * 2 bytes match or are ignored. */
++ VMOVU (VEC_SIZE * 2)(%rax, %r10), %YMM0
++ VMOVU (VEC_SIZE * 3)(%rax, %r10), %YMM1
++ VMOVU (VEC_SIZE * 2)(%rdx, %r10), %YMM2
++ VMOVU (VEC_SIZE * 3)(%rdx, %r10), %YMM3
++
++ VPCMP $4, %YMM0, %YMM2, %k0
++ VPCMP $0, %YMMZERO, %YMM0, %k1
++ VPCMP $0, %YMMZERO, %YMM2, %k2
++ kord %k1, %k2, %k1
++ /* Each bit in K1 represents a NULL or a mismatch in YMM0 and
++ YMM2. */
++ kord %k0, %k1, %k1
++
++ VPCMP $4, %YMM1, %YMM3, %k3
++ VPCMP $0, %YMMZERO, %YMM1, %k4
++ VPCMP $0, %YMMZERO, %YMM3, %k5
++ kord %k4, %k5, %k4
++ /* Each bit in K3 represents a NULL or a mismatch in YMM1 and
++ YMM3. */
++ kord %k3, %k4, %k3
++
++# ifdef USE_AS_WCSCMP
++ /* NB: Each bit in K1/K3 represents 4-byte element. */
++ kshiftlw $8, %k3, %k2
++# else
++ kshiftlq $32, %k3, %k2
++# endif
++
++ /* Each bit in K1 represents a NULL or a mismatch. */
++ korq %k1, %k2, %k1
++ kmovq %k1, %rdi
++
++ xorl %r8d, %r8d
++ /* If ECX > VEC_SIZE * 2, skip ECX - (VEC_SIZE * 2) bytes. */
++ subl $(VEC_SIZE * 2), %ecx
++ jle 1f
++ /* R8 has number of bytes skipped. */
++ movl %ecx, %r8d
++# ifdef USE_AS_WCSCMP
++ /* NB: Divide shift count by 4 since each bit in K1 represent 4
++ bytes. */
++ sarl $2, %ecx
++# endif
++ /* Skip ECX bytes. */
++ shrq %cl, %rdi
++1:
++ /* Before jumping back to the loop, set ESI to the number of
++ VEC_SIZE * 4 blocks before page crossing. */
++ movl $(PAGE_SIZE / (VEC_SIZE * 4) - 1), %esi
++
++ testq %rdi, %rdi
++# ifdef USE_AS_STRNCMP
++ /* At this point, if %rdi value is 0, it already tested
++ VEC_SIZE*4+%r10 byte starting from %rax. This label
++ checks whether strncmp maximum offset reached or not. */
++ je L(string_nbyte_offset_check)
++# else
++ je L(back_to_loop)
++# endif
++ tzcntq %rdi, %rcx
++# ifdef USE_AS_WCSCMP
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ sall $2, %ecx
++# endif
++ addq %r10, %rcx
++ /* Adjust for number of bytes skipped. */
++ addq %r8, %rcx
++# ifdef USE_AS_STRNCMP
++ addq $(VEC_SIZE * 2), %rcx
++ subq %rcx, %r11
++ jbe L(zero)
++# ifdef USE_AS_WCSCMP
++ movq %rax, %rsi
++ xorl %eax, %eax
++ movl (%rsi, %rcx), %edi
++ cmpl (%rdx, %rcx), %edi
++ jne L(wcscmp_return)
++# else
++ movzbl (%rax, %rcx), %eax
++ movzbl (%rdx, %rcx), %edx
++ subl %edx, %eax
++# endif
++# else
++# ifdef USE_AS_WCSCMP
++ movq %rax, %rsi
++ xorl %eax, %eax
++ movl (VEC_SIZE * 2)(%rsi, %rcx), %edi
++ cmpl (VEC_SIZE * 2)(%rdx, %rcx), %edi
++ jne L(wcscmp_return)
++# else
++ movzbl (VEC_SIZE * 2)(%rax, %rcx), %eax
++ movzbl (VEC_SIZE * 2)(%rdx, %rcx), %edx
++ subl %edx, %eax
++# endif
++# endif
++ ret
++
++# ifdef USE_AS_STRNCMP
++L(string_nbyte_offset_check):
++ leaq (VEC_SIZE * 4)(%r10), %r10
++ cmpq %r10, %r11
++ jbe L(zero)
++ jmp L(back_to_loop)
++# endif
++
++ .p2align 4
++L(cross_page_loop):
++ /* Check one byte/dword at a time. */
++# ifdef USE_AS_WCSCMP
++ cmpl %ecx, %eax
++# else
++ subl %ecx, %eax
++# endif
++ jne L(different)
++ addl $SIZE_OF_CHAR, %edx
++ cmpl $(VEC_SIZE * 4), %edx
++ je L(main_loop_header)
++# ifdef USE_AS_STRNCMP
++ cmpq %r11, %rdx
++ jae L(zero)
++# endif
++# ifdef USE_AS_WCSCMP
++ movl (%rdi, %rdx), %eax
++ movl (%rsi, %rdx), %ecx
++# else
++ movzbl (%rdi, %rdx), %eax
++ movzbl (%rsi, %rdx), %ecx
++# endif
++ /* Check null char. */
++ testl %eax, %eax
++ jne L(cross_page_loop)
++ /* Since %eax == 0, subtract is OK for both SIGNED and UNSIGNED
++ comparisons. */
++ subl %ecx, %eax
++# ifndef USE_AS_WCSCMP
++L(different):
++# endif
++ ret
++
++# ifdef USE_AS_WCSCMP
++ .p2align 4
++L(different):
++ /* Use movl to avoid modifying EFLAGS. */
++ movl $0, %eax
++ setl %al
++ negl %eax
++ orl $1, %eax
++ ret
++# endif
++
++# ifdef USE_AS_STRNCMP
++ .p2align 4
++L(zero):
++ xorl %eax, %eax
++ ret
++
++ .p2align 4
++L(char0):
++# ifdef USE_AS_WCSCMP
++ xorl %eax, %eax
++ movl (%rdi), %ecx
++ cmpl (%rsi), %ecx
++ jne L(wcscmp_return)
++# else
++ movzbl (%rsi), %ecx
++ movzbl (%rdi), %eax
++ subl %ecx, %eax
++# endif
++ ret
++# endif
++
++ .p2align 4
++L(last_vector):
++ addq %rdx, %rdi
++ addq %rdx, %rsi
++# ifdef USE_AS_STRNCMP
++ subq %rdx, %r11
++# endif
++ tzcntl %ecx, %edx
++# ifdef USE_AS_WCSCMP
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ sall $2, %edx
++# endif
++# ifdef USE_AS_STRNCMP
++ cmpq %r11, %rdx
++ jae L(zero)
++# endif
++# ifdef USE_AS_WCSCMP
++ xorl %eax, %eax
++ movl (%rdi, %rdx), %ecx
++ cmpl (%rsi, %rdx), %ecx
++ jne L(wcscmp_return)
++# else
++ movzbl (%rdi, %rdx), %eax
++ movzbl (%rsi, %rdx), %edx
++ subl %edx, %eax
++# endif
++ ret
++
++ /* Comparing on page boundary region requires special treatment:
++ It must done one vector at the time, starting with the wider
++ ymm vector if possible, if not, with xmm. If fetching 16 bytes
++ (xmm) still passes the boundary, byte comparison must be done.
++ */
++ .p2align 4
++L(cross_page):
++ /* Try one ymm vector at a time. */
++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax
++ jg L(cross_page_1_vector)
++L(loop_1_vector):
++ VMOVU (%rdi, %rdx), %YMM0
++ VMOVU (%rsi, %rdx), %YMM1
++
++ /* Each bit in K0 represents a mismatch in YMM0 and YMM1. */
++ VPCMP $4, %YMM0, %YMM1, %k0
++ VPCMP $0, %YMMZERO, %YMM0, %k1
++ VPCMP $0, %YMMZERO, %YMM1, %k2
++ /* Each bit in K1 represents a NULL in YMM0 or YMM1. */
++ kord %k1, %k2, %k1
++ /* Each bit in K1 represents a NULL or a mismatch. */
++ kord %k0, %k1, %k1
++ kmovd %k1, %ecx
++ testl %ecx, %ecx
++ jne L(last_vector)
++
++ addl $VEC_SIZE, %edx
++
++ addl $VEC_SIZE, %eax
++# ifdef USE_AS_STRNCMP
++ /* Return 0 if the current offset (%rdx) >= the maximum offset
++ (%r11). */
++ cmpq %r11, %rdx
++ jae L(zero)
++# endif
++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax
++ jle L(loop_1_vector)
++L(cross_page_1_vector):
++ /* Less than 32 bytes to check, try one xmm vector. */
++ cmpl $(PAGE_SIZE - 16), %eax
++ jg L(cross_page_1_xmm)
++ VMOVU (%rdi, %rdx), %XMM0
++ VMOVU (%rsi, %rdx), %XMM1
++
++ /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */
++ VPCMP $4, %XMM0, %XMM1, %k0
++ VPCMP $0, %XMMZERO, %XMM0, %k1
++ VPCMP $0, %XMMZERO, %XMM1, %k2
++ /* Each bit in K1 represents a NULL in XMM0 or XMM1. */
++ korw %k1, %k2, %k1
++ /* Each bit in K1 represents a NULL or a mismatch. */
++ korw %k0, %k1, %k1
++ kmovw %k1, %ecx
++ testl %ecx, %ecx
++ jne L(last_vector)
++
++ addl $16, %edx
++# ifndef USE_AS_WCSCMP
++ addl $16, %eax
++# endif
++# ifdef USE_AS_STRNCMP
++ /* Return 0 if the current offset (%rdx) >= the maximum offset
++ (%r11). */
++ cmpq %r11, %rdx
++ jae L(zero)
++# endif
++
++L(cross_page_1_xmm):
++# ifndef USE_AS_WCSCMP
++ /* Less than 16 bytes to check, try 8 byte vector. NB: No need
++ for wcscmp nor wcsncmp since wide char is 4 bytes. */
++ cmpl $(PAGE_SIZE - 8), %eax
++ jg L(cross_page_8bytes)
++ vmovq (%rdi, %rdx), %XMM0
++ vmovq (%rsi, %rdx), %XMM1
++
++ /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */
++ VPCMP $4, %XMM0, %XMM1, %k0
++ VPCMP $0, %XMMZERO, %XMM0, %k1
++ VPCMP $0, %XMMZERO, %XMM1, %k2
++ /* Each bit in K1 represents a NULL in XMM0 or XMM1. */
++ kord %k1, %k2, %k1
++ /* Each bit in K1 represents a NULL or a mismatch. */
++ kord %k0, %k1, %k1
++ kmovd %k1, %ecx
++
++# ifdef USE_AS_WCSCMP
++ /* Only last 2 bits are valid. */
++ andl $0x3, %ecx
++# else
++ /* Only last 8 bits are valid. */
++ andl $0xff, %ecx
++# endif
++
++ testl %ecx, %ecx
++ jne L(last_vector)
++
++ addl $8, %edx
++ addl $8, %eax
++# ifdef USE_AS_STRNCMP
++ /* Return 0 if the current offset (%rdx) >= the maximum offset
++ (%r11). */
++ cmpq %r11, %rdx
++ jae L(zero)
++# endif
++
++L(cross_page_8bytes):
++ /* Less than 8 bytes to check, try 4 byte vector. */
++ cmpl $(PAGE_SIZE - 4), %eax
++ jg L(cross_page_4bytes)
++ vmovd (%rdi, %rdx), %XMM0
++ vmovd (%rsi, %rdx), %XMM1
++
++ /* Each bit in K0 represents a mismatch in XMM0 and XMM1. */
++ VPCMP $4, %XMM0, %XMM1, %k0
++ VPCMP $0, %XMMZERO, %XMM0, %k1
++ VPCMP $0, %XMMZERO, %XMM1, %k2
++ /* Each bit in K1 represents a NULL in XMM0 or XMM1. */
++ kord %k1, %k2, %k1
++ /* Each bit in K1 represents a NULL or a mismatch. */
++ kord %k0, %k1, %k1
++ kmovd %k1, %ecx
++
++# ifdef USE_AS_WCSCMP
++ /* Only the last bit is valid. */
++ andl $0x1, %ecx
++# else
++ /* Only last 4 bits are valid. */
++ andl $0xf, %ecx
++# endif
++
++ testl %ecx, %ecx
++ jne L(last_vector)
++
++ addl $4, %edx
++# ifdef USE_AS_STRNCMP
++ /* Return 0 if the current offset (%rdx) >= the maximum offset
++ (%r11). */
++ cmpq %r11, %rdx
++ jae L(zero)
++# endif
++
++L(cross_page_4bytes):
++# endif
++ /* Less than 4 bytes to check, try one byte/dword at a time. */
++# ifdef USE_AS_STRNCMP
++ cmpq %r11, %rdx
++ jae L(zero)
++# endif
++# ifdef USE_AS_WCSCMP
++ movl (%rdi, %rdx), %eax
++ movl (%rsi, %rdx), %ecx
++# else
++ movzbl (%rdi, %rdx), %eax
++ movzbl (%rsi, %rdx), %ecx
++# endif
++ testl %eax, %eax
++ jne L(cross_page_loop)
++ subl %ecx, %eax
++ ret
++END (STRCMP)
++#endif
+diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
+index 16ae72a4c8..df4ba875d9 100644
+--- a/sysdeps/x86_64/multiarch/strcmp.c
++++ b/sysdeps/x86_64/multiarch/strcmp.c
+@@ -30,16 +30,29 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
+ static inline void *
+ IFUNC_SELECTOR (void)
+ {
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+- && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+- return OPTIMIZE (avx2);
++ {
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
++ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
++ && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP))
++ return OPTIMIZE (evex);
++
++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
++ return OPTIMIZE (avx2_rtm);
++
++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
++ return OPTIMIZE (avx2);
++ }
+
+ if (CPU_FEATURES_ARCH_P (cpu_features, Fast_Unaligned_Load))
+ return OPTIMIZE (sse2_unaligned);
+diff --git a/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S
+new file mode 100644
+index 0000000000..c2c581ecf7
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S
+@@ -0,0 +1,12 @@
++#ifndef STRCPY
++# define STRCPY __strcpy_avx2_rtm
++#endif
++
++#define ZERO_UPPER_VEC_REGISTERS_RETURN \
++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
++
++#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
++
++#define SECTION(p) p##.avx.rtm
++
++#include "strcpy-avx2.S"
+diff --git a/sysdeps/x86_64/multiarch/strcpy-avx2.S b/sysdeps/x86_64/multiarch/strcpy-avx2.S
+index 3f2f9e8170..1ce17253ab 100644
+--- a/sysdeps/x86_64/multiarch/strcpy-avx2.S
++++ b/sysdeps/x86_64/multiarch/strcpy-avx2.S
+@@ -37,6 +37,10 @@
+ # define VZEROUPPER vzeroupper
+ # endif
+
++# ifndef SECTION
++# define SECTION(p) p##.avx
++# endif
++
+ /* zero register */
+ #define xmmZ xmm0
+ #define ymmZ ymm0
+@@ -46,7 +50,7 @@
+
+ # ifndef USE_AS_STRCAT
+
+- .section .text.avx,"ax",@progbits
++ .section SECTION(.text),"ax",@progbits
+ ENTRY (STRCPY)
+ # ifdef USE_AS_STRNCPY
+ mov %RDX_LP, %R8_LP
+@@ -369,8 +373,8 @@ L(CopyVecSizeExit):
+ lea 1(%rdi), %rdi
+ jnz L(StrncpyFillTailWithZero)
+ # endif
+- VZEROUPPER
+- ret
++L(return_vzeroupper):
++ ZERO_UPPER_VEC_REGISTERS_RETURN
+
+ .p2align 4
+ L(CopyTwoVecSize1):
+@@ -553,8 +557,7 @@ L(Exit1):
+ lea 2(%rdi), %rdi
+ jnz L(StrncpyFillTailWithZero)
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(Exit2):
+@@ -569,8 +572,7 @@ L(Exit2):
+ lea 3(%rdi), %rdi
+ jnz L(StrncpyFillTailWithZero)
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(Exit3):
+@@ -584,8 +586,7 @@ L(Exit3):
+ lea 4(%rdi), %rdi
+ jnz L(StrncpyFillTailWithZero)
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(Exit4_7):
+@@ -602,8 +603,7 @@ L(Exit4_7):
+ lea 1(%rdi, %rdx), %rdi
+ jnz L(StrncpyFillTailWithZero)
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(Exit8_15):
+@@ -620,8 +620,7 @@ L(Exit8_15):
+ lea 1(%rdi, %rdx), %rdi
+ jnz L(StrncpyFillTailWithZero)
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(Exit16_31):
+@@ -638,8 +637,7 @@ L(Exit16_31):
+ lea 1(%rdi, %rdx), %rdi
+ jnz L(StrncpyFillTailWithZero)
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(Exit32_63):
+@@ -656,8 +654,7 @@ L(Exit32_63):
+ lea 1(%rdi, %rdx), %rdi
+ jnz L(StrncpyFillTailWithZero)
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ # ifdef USE_AS_STRNCPY
+
+@@ -671,8 +668,7 @@ L(StrncpyExit1):
+ # ifdef USE_AS_STRCAT
+ movb $0, 1(%rdi)
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(StrncpyExit2):
+@@ -684,8 +680,7 @@ L(StrncpyExit2):
+ # ifdef USE_AS_STRCAT
+ movb $0, 2(%rdi)
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(StrncpyExit3_4):
+@@ -699,8 +694,7 @@ L(StrncpyExit3_4):
+ # ifdef USE_AS_STRCAT
+ movb $0, (%rdi, %r8)
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(StrncpyExit5_8):
+@@ -714,8 +708,7 @@ L(StrncpyExit5_8):
+ # ifdef USE_AS_STRCAT
+ movb $0, (%rdi, %r8)
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(StrncpyExit9_16):
+@@ -729,8 +722,7 @@ L(StrncpyExit9_16):
+ # ifdef USE_AS_STRCAT
+ movb $0, (%rdi, %r8)
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(StrncpyExit17_32):
+@@ -744,8 +736,7 @@ L(StrncpyExit17_32):
+ # ifdef USE_AS_STRCAT
+ movb $0, (%rdi, %r8)
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(StrncpyExit33_64):
+@@ -760,8 +751,7 @@ L(StrncpyExit33_64):
+ # ifdef USE_AS_STRCAT
+ movb $0, (%rdi, %r8)
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(StrncpyExit65):
+@@ -778,50 +768,43 @@ L(StrncpyExit65):
+ # ifdef USE_AS_STRCAT
+ movb $0, 65(%rdi)
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ # ifndef USE_AS_STRCAT
+
+ .p2align 4
+ L(Fill1):
+ mov %dl, (%rdi)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(Fill2):
+ mov %dx, (%rdi)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(Fill3_4):
+ mov %dx, (%rdi)
+ mov %dx, -2(%rdi, %r8)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(Fill5_8):
+ mov %edx, (%rdi)
+ mov %edx, -4(%rdi, %r8)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(Fill9_16):
+ mov %rdx, (%rdi)
+ mov %rdx, -8(%rdi, %r8)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(Fill17_32):
+ vmovdqu %xmmZ, (%rdi)
+ vmovdqu %xmmZ, -16(%rdi, %r8)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(CopyVecSizeUnalignedVec2):
+@@ -898,8 +881,7 @@ L(Fill):
+ cmp $1, %r8d
+ ja L(Fill2)
+ je L(Fill1)
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ /* end of ifndef USE_AS_STRCAT */
+ # endif
+@@ -929,8 +911,7 @@ L(UnalignedFourVecSizeLeaveCase3):
+ # ifdef USE_AS_STRCAT
+ movb $0, (VEC_SIZE * 4)(%rdi)
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(UnalignedFourVecSizeLeaveCase2):
+@@ -1001,16 +982,14 @@ L(StrncpyExit):
+ # ifdef USE_AS_STRCAT
+ movb $0, (%rdi)
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(ExitZero):
+ # ifndef USE_AS_STRCAT
+ mov %rdi, %rax
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ # endif
+
+diff --git a/sysdeps/x86_64/multiarch/strcpy-evex.S b/sysdeps/x86_64/multiarch/strcpy-evex.S
+new file mode 100644
+index 0000000000..a343a1a692
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strcpy-evex.S
+@@ -0,0 +1,1003 @@
++/* strcpy with 256-bit EVEX instructions.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#if IS_IN (libc)
++
++# ifndef USE_AS_STRCAT
++# include <sysdep.h>
++
++# ifndef STRCPY
++# define STRCPY __strcpy_evex
++# endif
++
++# endif
++
++# define VMOVU vmovdqu64
++# define VMOVA vmovdqa64
++
++/* Number of bytes in a vector register */
++# ifndef VEC_SIZE
++# define VEC_SIZE 32
++# endif
++
++# define XMM2 xmm18
++# define XMM3 xmm19
++
++# define YMM2 ymm18
++# define YMM3 ymm19
++# define YMM4 ymm20
++# define YMM5 ymm21
++# define YMM6 ymm22
++# define YMM7 ymm23
++
++# ifndef USE_AS_STRCAT
++
++/* zero register */
++# define XMMZERO xmm16
++# define YMMZERO ymm16
++# define YMM1 ymm17
++
++ .section .text.evex,"ax",@progbits
++ENTRY (STRCPY)
++# ifdef USE_AS_STRNCPY
++ mov %RDX_LP, %R8_LP
++ test %R8_LP, %R8_LP
++ jz L(ExitZero)
++# endif
++ mov %rsi, %rcx
++# ifndef USE_AS_STPCPY
++ mov %rdi, %rax /* save result */
++# endif
++
++ vpxorq %XMMZERO, %XMMZERO, %XMMZERO
++# endif
++
++ and $((VEC_SIZE * 4) - 1), %ecx
++ cmp $(VEC_SIZE * 2), %ecx
++ jbe L(SourceStringAlignmentLessTwoVecSize)
++
++ and $-VEC_SIZE, %rsi
++ and $(VEC_SIZE - 1), %ecx
++
++ vpcmpb $0, (%rsi), %YMMZERO, %k0
++ kmovd %k0, %edx
++ shr %cl, %rdx
++
++# ifdef USE_AS_STRNCPY
++# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
++ mov $VEC_SIZE, %r10
++ sub %rcx, %r10
++ cmp %r10, %r8
++# else
++ mov $(VEC_SIZE + 1), %r10
++ sub %rcx, %r10
++ cmp %r10, %r8
++# endif
++ jbe L(CopyVecSizeTailCase2OrCase3)
++# endif
++ test %edx, %edx
++ jnz L(CopyVecSizeTail)
++
++ vpcmpb $0, VEC_SIZE(%rsi), %YMMZERO, %k1
++ kmovd %k1, %edx
++
++# ifdef USE_AS_STRNCPY
++ add $VEC_SIZE, %r10
++ cmp %r10, %r8
++ jbe L(CopyTwoVecSizeCase2OrCase3)
++# endif
++ test %edx, %edx
++ jnz L(CopyTwoVecSize)
++
++ VMOVU (%rsi, %rcx), %YMM2 /* copy VEC_SIZE bytes */
++ VMOVU %YMM2, (%rdi)
++
++/* If source address alignment != destination address alignment */
++ .p2align 4
++L(UnalignVecSizeBoth):
++ sub %rcx, %rdi
++# ifdef USE_AS_STRNCPY
++ add %rcx, %r8
++ sbb %rcx, %rcx
++ or %rcx, %r8
++# endif
++ mov $VEC_SIZE, %rcx
++ VMOVA (%rsi, %rcx), %YMM2
++ VMOVU %YMM2, (%rdi, %rcx)
++ VMOVA VEC_SIZE(%rsi, %rcx), %YMM2
++ vpcmpb $0, %YMM2, %YMMZERO, %k0
++ kmovd %k0, %edx
++ add $VEC_SIZE, %rcx
++# ifdef USE_AS_STRNCPY
++ sub $(VEC_SIZE * 3), %r8
++ jbe L(CopyVecSizeCase2OrCase3)
++# endif
++ test %edx, %edx
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++ jnz L(CopyVecSizeUnalignedVec2)
++# else
++ jnz L(CopyVecSize)
++# endif
++
++ VMOVU %YMM2, (%rdi, %rcx)
++ VMOVA VEC_SIZE(%rsi, %rcx), %YMM3
++ vpcmpb $0, %YMM3, %YMMZERO, %k0
++ kmovd %k0, %edx
++ add $VEC_SIZE, %rcx
++# ifdef USE_AS_STRNCPY
++ sub $VEC_SIZE, %r8
++ jbe L(CopyVecSizeCase2OrCase3)
++# endif
++ test %edx, %edx
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++ jnz L(CopyVecSizeUnalignedVec3)
++# else
++ jnz L(CopyVecSize)
++# endif
++
++ VMOVU %YMM3, (%rdi, %rcx)
++ VMOVA VEC_SIZE(%rsi, %rcx), %YMM4
++ vpcmpb $0, %YMM4, %YMMZERO, %k0
++ kmovd %k0, %edx
++ add $VEC_SIZE, %rcx
++# ifdef USE_AS_STRNCPY
++ sub $VEC_SIZE, %r8
++ jbe L(CopyVecSizeCase2OrCase3)
++# endif
++ test %edx, %edx
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++ jnz L(CopyVecSizeUnalignedVec4)
++# else
++ jnz L(CopyVecSize)
++# endif
++
++ VMOVU %YMM4, (%rdi, %rcx)
++ VMOVA VEC_SIZE(%rsi, %rcx), %YMM2
++ vpcmpb $0, %YMM2, %YMMZERO, %k0
++ kmovd %k0, %edx
++ add $VEC_SIZE, %rcx
++# ifdef USE_AS_STRNCPY
++ sub $VEC_SIZE, %r8
++ jbe L(CopyVecSizeCase2OrCase3)
++# endif
++ test %edx, %edx
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++ jnz L(CopyVecSizeUnalignedVec2)
++# else
++ jnz L(CopyVecSize)
++# endif
++
++ VMOVU %YMM2, (%rdi, %rcx)
++ VMOVA VEC_SIZE(%rsi, %rcx), %YMM2
++ vpcmpb $0, %YMM2, %YMMZERO, %k0
++ kmovd %k0, %edx
++ add $VEC_SIZE, %rcx
++# ifdef USE_AS_STRNCPY
++ sub $VEC_SIZE, %r8
++ jbe L(CopyVecSizeCase2OrCase3)
++# endif
++ test %edx, %edx
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++ jnz L(CopyVecSizeUnalignedVec2)
++# else
++ jnz L(CopyVecSize)
++# endif
++
++ VMOVA VEC_SIZE(%rsi, %rcx), %YMM3
++ VMOVU %YMM2, (%rdi, %rcx)
++ vpcmpb $0, %YMM3, %YMMZERO, %k0
++ kmovd %k0, %edx
++ add $VEC_SIZE, %rcx
++# ifdef USE_AS_STRNCPY
++ sub $VEC_SIZE, %r8
++ jbe L(CopyVecSizeCase2OrCase3)
++# endif
++ test %edx, %edx
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++ jnz L(CopyVecSizeUnalignedVec3)
++# else
++ jnz L(CopyVecSize)
++# endif
++
++ VMOVU %YMM3, (%rdi, %rcx)
++ mov %rsi, %rdx
++ lea VEC_SIZE(%rsi, %rcx), %rsi
++ and $-(VEC_SIZE * 4), %rsi
++ sub %rsi, %rdx
++ sub %rdx, %rdi
++# ifdef USE_AS_STRNCPY
++ lea (VEC_SIZE * 8)(%r8, %rdx), %r8
++# endif
++L(UnalignedFourVecSizeLoop):
++ VMOVA (%rsi), %YMM4
++ VMOVA VEC_SIZE(%rsi), %YMM5
++ VMOVA (VEC_SIZE * 2)(%rsi), %YMM6
++ VMOVA (VEC_SIZE * 3)(%rsi), %YMM7
++ vpminub %YMM5, %YMM4, %YMM2
++ vpminub %YMM7, %YMM6, %YMM3
++ vpminub %YMM2, %YMM3, %YMM2
++ /* If K7 != 0, there is a null byte. */
++ vpcmpb $0, %YMM2, %YMMZERO, %k7
++ kmovd %k7, %edx
++# ifdef USE_AS_STRNCPY
++ sub $(VEC_SIZE * 4), %r8
++ jbe L(UnalignedLeaveCase2OrCase3)
++# endif
++ test %edx, %edx
++ jnz L(UnalignedFourVecSizeLeave)
++
++L(UnalignedFourVecSizeLoop_start):
++ add $(VEC_SIZE * 4), %rdi
++ add $(VEC_SIZE * 4), %rsi
++ VMOVU %YMM4, -(VEC_SIZE * 4)(%rdi)
++ VMOVA (%rsi), %YMM4
++ VMOVU %YMM5, -(VEC_SIZE * 3)(%rdi)
++ VMOVA VEC_SIZE(%rsi), %YMM5
++ vpminub %YMM5, %YMM4, %YMM2
++ VMOVU %YMM6, -(VEC_SIZE * 2)(%rdi)
++ VMOVA (VEC_SIZE * 2)(%rsi), %YMM6
++ VMOVU %YMM7, -VEC_SIZE(%rdi)
++ VMOVA (VEC_SIZE * 3)(%rsi), %YMM7
++ vpminub %YMM7, %YMM6, %YMM3
++ vpminub %YMM2, %YMM3, %YMM2
++ /* If K7 != 0, there is a null byte. */
++ vpcmpb $0, %YMM2, %YMMZERO, %k7
++ kmovd %k7, %edx
++# ifdef USE_AS_STRNCPY
++ sub $(VEC_SIZE * 4), %r8
++ jbe L(UnalignedLeaveCase2OrCase3)
++# endif
++ test %edx, %edx
++ jz L(UnalignedFourVecSizeLoop_start)
++
++L(UnalignedFourVecSizeLeave):
++ vpcmpb $0, %YMM4, %YMMZERO, %k1
++ kmovd %k1, %edx
++ test %edx, %edx
++ jnz L(CopyVecSizeUnaligned_0)
++
++ vpcmpb $0, %YMM5, %YMMZERO, %k2
++ kmovd %k2, %ecx
++ test %ecx, %ecx
++ jnz L(CopyVecSizeUnaligned_16)
++
++ vpcmpb $0, %YMM6, %YMMZERO, %k3
++ kmovd %k3, %edx
++ test %edx, %edx
++ jnz L(CopyVecSizeUnaligned_32)
++
++ vpcmpb $0, %YMM7, %YMMZERO, %k4
++ kmovd %k4, %ecx
++ bsf %ecx, %edx
++ VMOVU %YMM4, (%rdi)
++ VMOVU %YMM5, VEC_SIZE(%rdi)
++ VMOVU %YMM6, (VEC_SIZE * 2)(%rdi)
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++# ifdef USE_AS_STPCPY
++ lea (VEC_SIZE * 3)(%rdi, %rdx), %rax
++# endif
++ VMOVU %YMM7, (VEC_SIZE * 3)(%rdi)
++ add $(VEC_SIZE - 1), %r8
++ sub %rdx, %r8
++ lea ((VEC_SIZE * 3) + 1)(%rdi, %rdx), %rdi
++ jmp L(StrncpyFillTailWithZero)
++# else
++ add $(VEC_SIZE * 3), %rsi
++ add $(VEC_SIZE * 3), %rdi
++ jmp L(CopyVecSizeExit)
++# endif
++
++/* If source address alignment == destination address alignment */
++
++L(SourceStringAlignmentLessTwoVecSize):
++ VMOVU (%rsi), %YMM3
++ VMOVU VEC_SIZE(%rsi), %YMM2
++ vpcmpb $0, %YMM3, %YMMZERO, %k0
++ kmovd %k0, %edx
++
++# ifdef USE_AS_STRNCPY
++# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
++ cmp $VEC_SIZE, %r8
++# else
++ cmp $(VEC_SIZE + 1), %r8
++# endif
++ jbe L(CopyVecSizeTail1Case2OrCase3)
++# endif
++ test %edx, %edx
++ jnz L(CopyVecSizeTail1)
++
++ VMOVU %YMM3, (%rdi)
++ vpcmpb $0, %YMM2, %YMMZERO, %k0
++ kmovd %k0, %edx
++
++# ifdef USE_AS_STRNCPY
++# if defined USE_AS_STPCPY || defined USE_AS_STRCAT
++ cmp $(VEC_SIZE * 2), %r8
++# else
++ cmp $((VEC_SIZE * 2) + 1), %r8
++# endif
++ jbe L(CopyTwoVecSize1Case2OrCase3)
++# endif
++ test %edx, %edx
++ jnz L(CopyTwoVecSize1)
++
++ and $-VEC_SIZE, %rsi
++ and $(VEC_SIZE - 1), %ecx
++ jmp L(UnalignVecSizeBoth)
++
++/*------End of main part with loops---------------------*/
++
++/* Case1 */
++
++# if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)
++ .p2align 4
++L(CopyVecSize):
++ add %rcx, %rdi
++# endif
++L(CopyVecSizeTail):
++ add %rcx, %rsi
++L(CopyVecSizeTail1):
++ bsf %edx, %edx
++L(CopyVecSizeExit):
++ cmp $32, %edx
++ jae L(Exit32_63)
++ cmp $16, %edx
++ jae L(Exit16_31)
++ cmp $8, %edx
++ jae L(Exit8_15)
++ cmp $4, %edx
++ jae L(Exit4_7)
++ cmp $3, %edx
++ je L(Exit3)
++ cmp $1, %edx
++ ja L(Exit2)
++ je L(Exit1)
++ movb $0, (%rdi)
++# ifdef USE_AS_STPCPY
++ lea (%rdi), %rax
++# endif
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++ sub $1, %r8
++ lea 1(%rdi), %rdi
++ jnz L(StrncpyFillTailWithZero)
++# endif
++ ret
++
++ .p2align 4
++L(CopyTwoVecSize1):
++ add $VEC_SIZE, %rsi
++ add $VEC_SIZE, %rdi
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++ sub $VEC_SIZE, %r8
++# endif
++ jmp L(CopyVecSizeTail1)
++
++ .p2align 4
++L(CopyTwoVecSize):
++ bsf %edx, %edx
++ add %rcx, %rsi
++ add $VEC_SIZE, %edx
++ sub %ecx, %edx
++ jmp L(CopyVecSizeExit)
++
++ .p2align 4
++L(CopyVecSizeUnaligned_0):
++ bsf %edx, %edx
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++# ifdef USE_AS_STPCPY
++ lea (%rdi, %rdx), %rax
++# endif
++ VMOVU %YMM4, (%rdi)
++ add $((VEC_SIZE * 4) - 1), %r8
++ sub %rdx, %r8
++ lea 1(%rdi, %rdx), %rdi
++ jmp L(StrncpyFillTailWithZero)
++# else
++ jmp L(CopyVecSizeExit)
++# endif
++
++ .p2align 4
++L(CopyVecSizeUnaligned_16):
++ bsf %ecx, %edx
++ VMOVU %YMM4, (%rdi)
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++# ifdef USE_AS_STPCPY
++ lea VEC_SIZE(%rdi, %rdx), %rax
++# endif
++ VMOVU %YMM5, VEC_SIZE(%rdi)
++ add $((VEC_SIZE * 3) - 1), %r8
++ sub %rdx, %r8
++ lea (VEC_SIZE + 1)(%rdi, %rdx), %rdi
++ jmp L(StrncpyFillTailWithZero)
++# else
++ add $VEC_SIZE, %rsi
++ add $VEC_SIZE, %rdi
++ jmp L(CopyVecSizeExit)
++# endif
++
++ .p2align 4
++L(CopyVecSizeUnaligned_32):
++ bsf %edx, %edx
++ VMOVU %YMM4, (%rdi)
++ VMOVU %YMM5, VEC_SIZE(%rdi)
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++# ifdef USE_AS_STPCPY
++ lea (VEC_SIZE * 2)(%rdi, %rdx), %rax
++# endif
++ VMOVU %YMM6, (VEC_SIZE * 2)(%rdi)
++ add $((VEC_SIZE * 2) - 1), %r8
++ sub %rdx, %r8
++ lea ((VEC_SIZE * 2) + 1)(%rdi, %rdx), %rdi
++ jmp L(StrncpyFillTailWithZero)
++# else
++ add $(VEC_SIZE * 2), %rsi
++ add $(VEC_SIZE * 2), %rdi
++ jmp L(CopyVecSizeExit)
++# endif
++
++# ifdef USE_AS_STRNCPY
++# ifndef USE_AS_STRCAT
++ .p2align 4
++L(CopyVecSizeUnalignedVec6):
++ VMOVU %YMM6, (%rdi, %rcx)
++ jmp L(CopyVecSizeVecExit)
++
++ .p2align 4
++L(CopyVecSizeUnalignedVec5):
++ VMOVU %YMM5, (%rdi, %rcx)
++ jmp L(CopyVecSizeVecExit)
++
++ .p2align 4
++L(CopyVecSizeUnalignedVec4):
++ VMOVU %YMM4, (%rdi, %rcx)
++ jmp L(CopyVecSizeVecExit)
++
++ .p2align 4
++L(CopyVecSizeUnalignedVec3):
++ VMOVU %YMM3, (%rdi, %rcx)
++ jmp L(CopyVecSizeVecExit)
++# endif
++
++/* Case2 */
++
++ .p2align 4
++L(CopyVecSizeCase2):
++ add $VEC_SIZE, %r8
++ add %rcx, %rdi
++ add %rcx, %rsi
++ bsf %edx, %edx
++ cmp %r8d, %edx
++ jb L(CopyVecSizeExit)
++ jmp L(StrncpyExit)
++
++ .p2align 4
++L(CopyTwoVecSizeCase2):
++ add %rcx, %rsi
++ bsf %edx, %edx
++ add $VEC_SIZE, %edx
++ sub %ecx, %edx
++ cmp %r8d, %edx
++ jb L(CopyVecSizeExit)
++ jmp L(StrncpyExit)
++
++L(CopyVecSizeTailCase2):
++ add %rcx, %rsi
++ bsf %edx, %edx
++ cmp %r8d, %edx
++ jb L(CopyVecSizeExit)
++ jmp L(StrncpyExit)
++
++L(CopyVecSizeTail1Case2):
++ bsf %edx, %edx
++ cmp %r8d, %edx
++ jb L(CopyVecSizeExit)
++ jmp L(StrncpyExit)
++
++/* Case2 or Case3, Case3 */
++
++ .p2align 4
++L(CopyVecSizeCase2OrCase3):
++ test %rdx, %rdx
++ jnz L(CopyVecSizeCase2)
++L(CopyVecSizeCase3):
++ add $VEC_SIZE, %r8
++ add %rcx, %rdi
++ add %rcx, %rsi
++ jmp L(StrncpyExit)
++
++ .p2align 4
++L(CopyTwoVecSizeCase2OrCase3):
++ test %rdx, %rdx
++ jnz L(CopyTwoVecSizeCase2)
++ add %rcx, %rsi
++ jmp L(StrncpyExit)
++
++ .p2align 4
++L(CopyVecSizeTailCase2OrCase3):
++ test %rdx, %rdx
++ jnz L(CopyVecSizeTailCase2)
++ add %rcx, %rsi
++ jmp L(StrncpyExit)
++
++ .p2align 4
++L(CopyTwoVecSize1Case2OrCase3):
++ add $VEC_SIZE, %rdi
++ add $VEC_SIZE, %rsi
++ sub $VEC_SIZE, %r8
++L(CopyVecSizeTail1Case2OrCase3):
++ test %rdx, %rdx
++ jnz L(CopyVecSizeTail1Case2)
++ jmp L(StrncpyExit)
++# endif
++
++/*------------End labels regarding with copying 1-VEC_SIZE bytes--and 1-(VEC_SIZE*2) bytes----*/
++
++ .p2align 4
++L(Exit1):
++ movzwl (%rsi), %edx
++ mov %dx, (%rdi)
++# ifdef USE_AS_STPCPY
++ lea 1(%rdi), %rax
++# endif
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++ sub $2, %r8
++ lea 2(%rdi), %rdi
++ jnz L(StrncpyFillTailWithZero)
++# endif
++ ret
++
++ .p2align 4
++L(Exit2):
++ movzwl (%rsi), %ecx
++ mov %cx, (%rdi)
++ movb $0, 2(%rdi)
++# ifdef USE_AS_STPCPY
++ lea 2(%rdi), %rax
++# endif
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++ sub $3, %r8
++ lea 3(%rdi), %rdi
++ jnz L(StrncpyFillTailWithZero)
++# endif
++ ret
++
++ .p2align 4
++L(Exit3):
++ mov (%rsi), %edx
++ mov %edx, (%rdi)
++# ifdef USE_AS_STPCPY
++ lea 3(%rdi), %rax
++# endif
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++ sub $4, %r8
++ lea 4(%rdi), %rdi
++ jnz L(StrncpyFillTailWithZero)
++# endif
++ ret
++
++ .p2align 4
++L(Exit4_7):
++ mov (%rsi), %ecx
++ mov %ecx, (%rdi)
++ mov -3(%rsi, %rdx), %ecx
++ mov %ecx, -3(%rdi, %rdx)
++# ifdef USE_AS_STPCPY
++ lea (%rdi, %rdx), %rax
++# endif
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++ sub %rdx, %r8
++ sub $1, %r8
++ lea 1(%rdi, %rdx), %rdi
++ jnz L(StrncpyFillTailWithZero)
++# endif
++ ret
++
++ .p2align 4
++L(Exit8_15):
++ mov (%rsi), %rcx
++ mov -7(%rsi, %rdx), %r9
++ mov %rcx, (%rdi)
++ mov %r9, -7(%rdi, %rdx)
++# ifdef USE_AS_STPCPY
++ lea (%rdi, %rdx), %rax
++# endif
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++ sub %rdx, %r8
++ sub $1, %r8
++ lea 1(%rdi, %rdx), %rdi
++ jnz L(StrncpyFillTailWithZero)
++# endif
++ ret
++
++ .p2align 4
++L(Exit16_31):
++ VMOVU (%rsi), %XMM2
++ VMOVU -15(%rsi, %rdx), %XMM3
++ VMOVU %XMM2, (%rdi)
++ VMOVU %XMM3, -15(%rdi, %rdx)
++# ifdef USE_AS_STPCPY
++ lea (%rdi, %rdx), %rax
++# endif
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++ sub %rdx, %r8
++ sub $1, %r8
++ lea 1(%rdi, %rdx), %rdi
++ jnz L(StrncpyFillTailWithZero)
++# endif
++ ret
++
++ .p2align 4
++L(Exit32_63):
++ VMOVU (%rsi), %YMM2
++ VMOVU -31(%rsi, %rdx), %YMM3
++ VMOVU %YMM2, (%rdi)
++ VMOVU %YMM3, -31(%rdi, %rdx)
++# ifdef USE_AS_STPCPY
++ lea (%rdi, %rdx), %rax
++# endif
++# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
++ sub %rdx, %r8
++ sub $1, %r8
++ lea 1(%rdi, %rdx), %rdi
++ jnz L(StrncpyFillTailWithZero)
++# endif
++ ret
++
++# ifdef USE_AS_STRNCPY
++
++ .p2align 4
++L(StrncpyExit1):
++ movzbl (%rsi), %edx
++ mov %dl, (%rdi)
++# ifdef USE_AS_STPCPY
++ lea 1(%rdi), %rax
++# endif
++# ifdef USE_AS_STRCAT
++ movb $0, 1(%rdi)
++# endif
++ ret
++
++ .p2align 4
++L(StrncpyExit2):
++ movzwl (%rsi), %edx
++ mov %dx, (%rdi)
++# ifdef USE_AS_STPCPY
++ lea 2(%rdi), %rax
++# endif
++# ifdef USE_AS_STRCAT
++ movb $0, 2(%rdi)
++# endif
++ ret
++
++ .p2align 4
++L(StrncpyExit3_4):
++ movzwl (%rsi), %ecx
++ movzwl -2(%rsi, %r8), %edx
++ mov %cx, (%rdi)
++ mov %dx, -2(%rdi, %r8)
++# ifdef USE_AS_STPCPY
++ lea (%rdi, %r8), %rax
++# endif
++# ifdef USE_AS_STRCAT
++ movb $0, (%rdi, %r8)
++# endif
++ ret
++
++ .p2align 4
++L(StrncpyExit5_8):
++ mov (%rsi), %ecx
++ mov -4(%rsi, %r8), %edx
++ mov %ecx, (%rdi)
++ mov %edx, -4(%rdi, %r8)
++# ifdef USE_AS_STPCPY
++ lea (%rdi, %r8), %rax
++# endif
++# ifdef USE_AS_STRCAT
++ movb $0, (%rdi, %r8)
++# endif
++ ret
++
++ .p2align 4
++L(StrncpyExit9_16):
++ mov (%rsi), %rcx
++ mov -8(%rsi, %r8), %rdx
++ mov %rcx, (%rdi)
++ mov %rdx, -8(%rdi, %r8)
++# ifdef USE_AS_STPCPY
++ lea (%rdi, %r8), %rax
++# endif
++# ifdef USE_AS_STRCAT
++ movb $0, (%rdi, %r8)
++# endif
++ ret
++
++ .p2align 4
++L(StrncpyExit17_32):
++ VMOVU (%rsi), %XMM2
++ VMOVU -16(%rsi, %r8), %XMM3
++ VMOVU %XMM2, (%rdi)
++ VMOVU %XMM3, -16(%rdi, %r8)
++# ifdef USE_AS_STPCPY
++ lea (%rdi, %r8), %rax
++# endif
++# ifdef USE_AS_STRCAT
++ movb $0, (%rdi, %r8)
++# endif
++ ret
++
++ .p2align 4
++L(StrncpyExit33_64):
++ /* 0/32, 31/16 */
++ VMOVU (%rsi), %YMM2
++ VMOVU -VEC_SIZE(%rsi, %r8), %YMM3
++ VMOVU %YMM2, (%rdi)
++ VMOVU %YMM3, -VEC_SIZE(%rdi, %r8)
++# ifdef USE_AS_STPCPY
++ lea (%rdi, %r8), %rax
++# endif
++# ifdef USE_AS_STRCAT
++ movb $0, (%rdi, %r8)
++# endif
++ ret
++
++ .p2align 4
++L(StrncpyExit65):
++ /* 0/32, 32/32, 64/1 */
++ VMOVU (%rsi), %YMM2
++ VMOVU 32(%rsi), %YMM3
++ mov 64(%rsi), %cl
++ VMOVU %YMM2, (%rdi)
++ VMOVU %YMM3, 32(%rdi)
++ mov %cl, 64(%rdi)
++# ifdef USE_AS_STPCPY
++ lea 65(%rdi), %rax
++# endif
++# ifdef USE_AS_STRCAT
++ movb $0, 65(%rdi)
++# endif
++ ret
++
++# ifndef USE_AS_STRCAT
++
++ .p2align 4
++L(Fill1):
++ mov %dl, (%rdi)
++ ret
++
++ .p2align 4
++L(Fill2):
++ mov %dx, (%rdi)
++ ret
++
++ .p2align 4
++L(Fill3_4):
++ mov %dx, (%rdi)
++ mov %dx, -2(%rdi, %r8)
++ ret
++
++ .p2align 4
++L(Fill5_8):
++ mov %edx, (%rdi)
++ mov %edx, -4(%rdi, %r8)
++ ret
++
++ .p2align 4
++L(Fill9_16):
++ mov %rdx, (%rdi)
++ mov %rdx, -8(%rdi, %r8)
++ ret
++
++ .p2align 4
++L(Fill17_32):
++ VMOVU %XMMZERO, (%rdi)
++ VMOVU %XMMZERO, -16(%rdi, %r8)
++ ret
++
++ .p2align 4
++L(CopyVecSizeUnalignedVec2):
++ VMOVU %YMM2, (%rdi, %rcx)
++
++ .p2align 4
++L(CopyVecSizeVecExit):
++ bsf %edx, %edx
++ add $(VEC_SIZE - 1), %r8
++ add %rcx, %rdi
++# ifdef USE_AS_STPCPY
++ lea (%rdi, %rdx), %rax
++# endif
++ sub %rdx, %r8
++ lea 1(%rdi, %rdx), %rdi
++
++ .p2align 4
++L(StrncpyFillTailWithZero):
++ xor %edx, %edx
++ sub $VEC_SIZE, %r8
++ jbe L(StrncpyFillExit)
++
++ VMOVU %YMMZERO, (%rdi)
++ add $VEC_SIZE, %rdi
++
++ mov %rdi, %rsi
++ and $(VEC_SIZE - 1), %esi
++ sub %rsi, %rdi
++ add %rsi, %r8
++ sub $(VEC_SIZE * 4), %r8
++ jb L(StrncpyFillLessFourVecSize)
++
++L(StrncpyFillLoopVmovdqa):
++ VMOVA %YMMZERO, (%rdi)
++ VMOVA %YMMZERO, VEC_SIZE(%rdi)
++ VMOVA %YMMZERO, (VEC_SIZE * 2)(%rdi)
++ VMOVA %YMMZERO, (VEC_SIZE * 3)(%rdi)
++ add $(VEC_SIZE * 4), %rdi
++ sub $(VEC_SIZE * 4), %r8
++ jae L(StrncpyFillLoopVmovdqa)
++
++L(StrncpyFillLessFourVecSize):
++ add $(VEC_SIZE * 2), %r8
++ jl L(StrncpyFillLessTwoVecSize)
++ VMOVA %YMMZERO, (%rdi)
++ VMOVA %YMMZERO, VEC_SIZE(%rdi)
++ add $(VEC_SIZE * 2), %rdi
++ sub $VEC_SIZE, %r8
++ jl L(StrncpyFillExit)
++ VMOVA %YMMZERO, (%rdi)
++ add $VEC_SIZE, %rdi
++ jmp L(Fill)
++
++ .p2align 4
++L(StrncpyFillLessTwoVecSize):
++ add $VEC_SIZE, %r8
++ jl L(StrncpyFillExit)
++ VMOVA %YMMZERO, (%rdi)
++ add $VEC_SIZE, %rdi
++ jmp L(Fill)
++
++ .p2align 4
++L(StrncpyFillExit):
++ add $VEC_SIZE, %r8
++L(Fill):
++ cmp $17, %r8d
++ jae L(Fill17_32)
++ cmp $9, %r8d
++ jae L(Fill9_16)
++ cmp $5, %r8d
++ jae L(Fill5_8)
++ cmp $3, %r8d
++ jae L(Fill3_4)
++ cmp $1, %r8d
++ ja L(Fill2)
++ je L(Fill1)
++ ret
++
++/* end of ifndef USE_AS_STRCAT */
++# endif
++
++ .p2align 4
++L(UnalignedLeaveCase2OrCase3):
++ test %rdx, %rdx
++ jnz L(UnalignedFourVecSizeLeaveCase2)
++L(UnalignedFourVecSizeLeaveCase3):
++ lea (VEC_SIZE * 4)(%r8), %rcx
++ and $-VEC_SIZE, %rcx
++ add $(VEC_SIZE * 3), %r8
++ jl L(CopyVecSizeCase3)
++ VMOVU %YMM4, (%rdi)
++ sub $VEC_SIZE, %r8
++ jb L(CopyVecSizeCase3)
++ VMOVU %YMM5, VEC_SIZE(%rdi)
++ sub $VEC_SIZE, %r8
++ jb L(CopyVecSizeCase3)
++ VMOVU %YMM6, (VEC_SIZE * 2)(%rdi)
++ sub $VEC_SIZE, %r8
++ jb L(CopyVecSizeCase3)
++ VMOVU %YMM7, (VEC_SIZE * 3)(%rdi)
++# ifdef USE_AS_STPCPY
++ lea (VEC_SIZE * 4)(%rdi), %rax
++# endif
++# ifdef USE_AS_STRCAT
++ movb $0, (VEC_SIZE * 4)(%rdi)
++# endif
++ ret
++
++ .p2align 4
++L(UnalignedFourVecSizeLeaveCase2):
++ xor %ecx, %ecx
++ vpcmpb $0, %YMM4, %YMMZERO, %k1
++ kmovd %k1, %edx
++ add $(VEC_SIZE * 3), %r8
++ jle L(CopyVecSizeCase2OrCase3)
++ test %edx, %edx
++# ifndef USE_AS_STRCAT
++ jnz L(CopyVecSizeUnalignedVec4)
++# else
++ jnz L(CopyVecSize)
++# endif
++ vpcmpb $0, %YMM5, %YMMZERO, %k2
++ kmovd %k2, %edx
++ VMOVU %YMM4, (%rdi)
++ add $VEC_SIZE, %rcx
++ sub $VEC_SIZE, %r8
++ jbe L(CopyVecSizeCase2OrCase3)
++ test %edx, %edx
++# ifndef USE_AS_STRCAT
++ jnz L(CopyVecSizeUnalignedVec5)
++# else
++ jnz L(CopyVecSize)
++# endif
++
++ vpcmpb $0, %YMM6, %YMMZERO, %k3
++ kmovd %k3, %edx
++ VMOVU %YMM5, VEC_SIZE(%rdi)
++ add $VEC_SIZE, %rcx
++ sub $VEC_SIZE, %r8
++ jbe L(CopyVecSizeCase2OrCase3)
++ test %edx, %edx
++# ifndef USE_AS_STRCAT
++ jnz L(CopyVecSizeUnalignedVec6)
++# else
++ jnz L(CopyVecSize)
++# endif
++
++ vpcmpb $0, %YMM7, %YMMZERO, %k4
++ kmovd %k4, %edx
++ VMOVU %YMM6, (VEC_SIZE * 2)(%rdi)
++ lea VEC_SIZE(%rdi, %rcx), %rdi
++ lea VEC_SIZE(%rsi, %rcx), %rsi
++ bsf %edx, %edx
++ cmp %r8d, %edx
++ jb L(CopyVecSizeExit)
++L(StrncpyExit):
++ cmp $65, %r8d
++ je L(StrncpyExit65)
++ cmp $33, %r8d
++ jae L(StrncpyExit33_64)
++ cmp $17, %r8d
++ jae L(StrncpyExit17_32)
++ cmp $9, %r8d
++ jae L(StrncpyExit9_16)
++ cmp $5, %r8d
++ jae L(StrncpyExit5_8)
++ cmp $3, %r8d
++ jae L(StrncpyExit3_4)
++ cmp $1, %r8d
++ ja L(StrncpyExit2)
++ je L(StrncpyExit1)
++# ifdef USE_AS_STPCPY
++ mov %rdi, %rax
++# endif
++# ifdef USE_AS_STRCAT
++ movb $0, (%rdi)
++# endif
++ ret
++
++ .p2align 4
++L(ExitZero):
++# ifndef USE_AS_STRCAT
++ mov %rdi, %rax
++# endif
++ ret
++
++# endif
++
++# ifndef USE_AS_STRCAT
++END (STRCPY)
++# else
++END (STRCAT)
++# endif
++#endif
+diff --git a/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S
+new file mode 100644
+index 0000000000..75b4b7612c
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S
+@@ -0,0 +1,12 @@
++#ifndef STRLEN
++# define STRLEN __strlen_avx2_rtm
++#endif
++
++#define ZERO_UPPER_VEC_REGISTERS_RETURN \
++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
++
++#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
++
++#define SECTION(p) p##.avx.rtm
++
++#include "strlen-avx2.S"
+diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S
+index 73421ec1b2..45e08e64d6 100644
+--- a/sysdeps/x86_64/multiarch/strlen-avx2.S
++++ b/sysdeps/x86_64/multiarch/strlen-avx2.S
+@@ -27,370 +27,531 @@
+ # ifdef USE_AS_WCSLEN
+ # define VPCMPEQ vpcmpeqd
+ # define VPMINU vpminud
++# define CHAR_SIZE 4
+ # else
+ # define VPCMPEQ vpcmpeqb
+ # define VPMINU vpminub
++# define CHAR_SIZE 1
+ # endif
+
+ # ifndef VZEROUPPER
+ # define VZEROUPPER vzeroupper
+ # endif
+
++# ifndef SECTION
++# define SECTION(p) p##.avx
++# endif
++
+ # define VEC_SIZE 32
++# define PAGE_SIZE 4096
++# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
+
+- .section .text.avx,"ax",@progbits
++ .section SECTION(.text),"ax",@progbits
+ ENTRY (STRLEN)
+ # ifdef USE_AS_STRNLEN
+- /* Check for zero length. */
++ /* Check zero length. */
++# ifdef __ILP32__
++ /* Clear upper bits. */
++ and %RSI_LP, %RSI_LP
++# else
+ test %RSI_LP, %RSI_LP
+- jz L(zero)
+-# ifdef USE_AS_WCSLEN
+- shl $2, %RSI_LP
+-# elif defined __ILP32__
+- /* Clear the upper 32 bits. */
+- movl %esi, %esi
+ # endif
++ jz L(zero)
++ /* Store max len in R8_LP before adjusting if using WCSLEN. */
+ mov %RSI_LP, %R8_LP
+ # endif
+- movl %edi, %ecx
++ movl %edi, %eax
+ movq %rdi, %rdx
+ vpxor %xmm0, %xmm0, %xmm0
+-
++ /* Clear high bits from edi. Only keeping bits relevant to page
++ cross check. */
++ andl $(PAGE_SIZE - 1), %eax
+ /* Check if we may cross page boundary with one vector load. */
+- andl $(2 * VEC_SIZE - 1), %ecx
+- cmpl $VEC_SIZE, %ecx
+- ja L(cros_page_boundary)
++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax
++ ja L(cross_page_boundary)
+
+ /* Check the first VEC_SIZE bytes. */
+- VPCMPEQ (%rdi), %ymm0, %ymm1
++ VPCMPEQ (%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+- testl %eax, %eax
+-
+ # ifdef USE_AS_STRNLEN
+- jnz L(first_vec_x0_check)
+- /* Adjust length and check the end of data. */
+- subq $VEC_SIZE, %rsi
+- jbe L(max)
+-# else
+- jnz L(first_vec_x0)
++ /* If length < VEC_SIZE handle special. */
++ cmpq $CHAR_PER_VEC, %rsi
++ jbe L(first_vec_x0)
+ # endif
+-
+- /* Align data for aligned loads in the loop. */
+- addq $VEC_SIZE, %rdi
+- andl $(VEC_SIZE - 1), %ecx
+- andq $-VEC_SIZE, %rdi
++ /* If empty continue to aligned_more. Otherwise return bit
++ position of first match. */
++ testl %eax, %eax
++ jz L(aligned_more)
++ tzcntl %eax, %eax
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get wchar_t count. */
++ shrl $2, %eax
++# endif
++ VZEROUPPER_RETURN
+
+ # ifdef USE_AS_STRNLEN
+- /* Adjust length. */
+- addq %rcx, %rsi
++L(zero):
++ xorl %eax, %eax
++ ret
+
+- subq $(VEC_SIZE * 4), %rsi
+- jbe L(last_4x_vec_or_less)
++ .p2align 4
++L(first_vec_x0):
++ /* Set bit for max len so that tzcnt will return min of max len
++ and position of first match. */
++# ifdef USE_AS_WCSLEN
++ /* NB: Multiply length by 4 to get byte count. */
++ sall $2, %esi
++# endif
++ btsq %rsi, %rax
++ tzcntl %eax, %eax
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get wchar_t count. */
++ shrl $2, %eax
++# endif
++ VZEROUPPER_RETURN
+ # endif
+- jmp L(more_4x_vec)
+
+ .p2align 4
+-L(cros_page_boundary):
+- andl $(VEC_SIZE - 1), %ecx
+- andq $-VEC_SIZE, %rdi
+- VPCMPEQ (%rdi), %ymm0, %ymm1
+- vpmovmskb %ymm1, %eax
+- /* Remove the leading bytes. */
+- sarl %cl, %eax
+- testl %eax, %eax
+- jz L(aligned_more)
++L(first_vec_x1):
+ tzcntl %eax, %eax
++ /* Safe to use 32 bit instructions as these are only called for
++ size = [1, 159]. */
+ # ifdef USE_AS_STRNLEN
+- /* Check the end of data. */
+- cmpq %rax, %rsi
+- jbe L(max)
++ /* Use ecx which was computed earlier to compute correct value.
++ */
++# ifdef USE_AS_WCSLEN
++ leal -(VEC_SIZE * 4 + 1)(%rax, %rcx, 4), %eax
++# else
++ subl $(VEC_SIZE * 4 + 1), %ecx
++ addl %ecx, %eax
++# endif
++# else
++ subl %edx, %edi
++ incl %edi
++ addl %edi, %eax
+ # endif
+- addq %rdi, %rax
+- addq %rcx, %rax
+- subq %rdx, %rax
+ # ifdef USE_AS_WCSLEN
+- shrq $2, %rax
++ /* NB: Divide bytes by 4 to get wchar_t count. */
++ shrl $2, %eax
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+-L(aligned_more):
++L(first_vec_x2):
++ tzcntl %eax, %eax
++ /* Safe to use 32 bit instructions as these are only called for
++ size = [1, 159]. */
+ # ifdef USE_AS_STRNLEN
+- /* "rcx" is less than VEC_SIZE. Calculate "rdx + rcx - VEC_SIZE"
+- with "rdx - (VEC_SIZE - rcx)" instead of "(rdx + rcx) - VEC_SIZE"
+- to void possible addition overflow. */
+- negq %rcx
+- addq $VEC_SIZE, %rcx
+-
+- /* Check the end of data. */
+- subq %rcx, %rsi
+- jbe L(max)
++ /* Use ecx which was computed earlier to compute correct value.
++ */
++# ifdef USE_AS_WCSLEN
++ leal -(VEC_SIZE * 3 + 1)(%rax, %rcx, 4), %eax
++# else
++ subl $(VEC_SIZE * 3 + 1), %ecx
++ addl %ecx, %eax
++# endif
++# else
++ subl %edx, %edi
++ addl $(VEC_SIZE + 1), %edi
++ addl %edi, %eax
+ # endif
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get wchar_t count. */
++ shrl $2, %eax
++# endif
++ VZEROUPPER_RETURN
+
+- addq $VEC_SIZE, %rdi
++ .p2align 4
++L(first_vec_x3):
++ tzcntl %eax, %eax
++ /* Safe to use 32 bit instructions as these are only called for
++ size = [1, 159]. */
++# ifdef USE_AS_STRNLEN
++ /* Use ecx which was computed earlier to compute correct value.
++ */
++# ifdef USE_AS_WCSLEN
++ leal -(VEC_SIZE * 2 + 1)(%rax, %rcx, 4), %eax
++# else
++ subl $(VEC_SIZE * 2 + 1), %ecx
++ addl %ecx, %eax
++# endif
++# else
++ subl %edx, %edi
++ addl $(VEC_SIZE * 2 + 1), %edi
++ addl %edi, %eax
++# endif
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get wchar_t count. */
++ shrl $2, %eax
++# endif
++ VZEROUPPER_RETURN
+
++ .p2align 4
++L(first_vec_x4):
++ tzcntl %eax, %eax
++ /* Safe to use 32 bit instructions as these are only called for
++ size = [1, 159]. */
+ # ifdef USE_AS_STRNLEN
+- subq $(VEC_SIZE * 4), %rsi
+- jbe L(last_4x_vec_or_less)
++ /* Use ecx which was computed earlier to compute correct value.
++ */
++# ifdef USE_AS_WCSLEN
++ leal -(VEC_SIZE * 1 + 1)(%rax, %rcx, 4), %eax
++# else
++ subl $(VEC_SIZE + 1), %ecx
++ addl %ecx, %eax
++# endif
++# else
++ subl %edx, %edi
++ addl $(VEC_SIZE * 3 + 1), %edi
++ addl %edi, %eax
+ # endif
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get wchar_t count. */
++ shrl $2, %eax
++# endif
++ VZEROUPPER_RETURN
+
+-L(more_4x_vec):
++ .p2align 5
++L(aligned_more):
++ /* Align data to VEC_SIZE - 1. This is the same number of
++ instructions as using andq with -VEC_SIZE but saves 4 bytes of
++ code on the x4 check. */
++ orq $(VEC_SIZE - 1), %rdi
++L(cross_page_continue):
+ /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time
+ since data is only aligned to VEC_SIZE. */
+- VPCMPEQ (%rdi), %ymm0, %ymm1
+- vpmovmskb %ymm1, %eax
+- testl %eax, %eax
+- jnz L(first_vec_x0)
+-
+- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
++# ifdef USE_AS_STRNLEN
++ /* + 1 because rdi is aligned to VEC_SIZE - 1. + CHAR_SIZE
++ because it simplies the logic in last_4x_vec_or_less. */
++ leaq (VEC_SIZE * 4 + CHAR_SIZE + 1)(%rdi), %rcx
++ subq %rdx, %rcx
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarl $2, %ecx
++# endif
++# endif
++ /* Load first VEC regardless. */
++ VPCMPEQ 1(%rdi), %ymm0, %ymm1
++# ifdef USE_AS_STRNLEN
++ /* Adjust length. If near end handle specially. */
++ subq %rcx, %rsi
++ jb L(last_4x_vec_or_less)
++# endif
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x1)
+
+- VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
++ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x2)
+
+- VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
++ VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+ testl %eax, %eax
+ jnz L(first_vec_x3)
+
+- addq $(VEC_SIZE * 4), %rdi
+-
+-# ifdef USE_AS_STRNLEN
+- subq $(VEC_SIZE * 4), %rsi
+- jbe L(last_4x_vec_or_less)
+-# endif
+-
+- /* Align data to 4 * VEC_SIZE. */
+- movq %rdi, %rcx
+- andl $(4 * VEC_SIZE - 1), %ecx
+- andq $-(4 * VEC_SIZE), %rdi
++ VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
++ vpmovmskb %ymm1, %eax
++ testl %eax, %eax
++ jnz L(first_vec_x4)
+
++ /* Align data to VEC_SIZE * 4 - 1. */
+ # ifdef USE_AS_STRNLEN
+- /* Adjust length. */
++ /* Before adjusting length check if at last VEC_SIZE * 4. */
++ cmpq $(CHAR_PER_VEC * 4 - 1), %rsi
++ jbe L(last_4x_vec_or_less_load)
++ incq %rdi
++ movl %edi, %ecx
++ orq $(VEC_SIZE * 4 - 1), %rdi
++ andl $(VEC_SIZE * 4 - 1), %ecx
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarl $2, %ecx
++# endif
++ /* Readjust length. */
+ addq %rcx, %rsi
++# else
++ incq %rdi
++ orq $(VEC_SIZE * 4 - 1), %rdi
+ # endif
+-
++ /* Compare 4 * VEC at a time forward. */
+ .p2align 4
+ L(loop_4x_vec):
+- /* Compare 4 * VEC at a time forward. */
+- vmovdqa (%rdi), %ymm1
+- vmovdqa VEC_SIZE(%rdi), %ymm2
+- vmovdqa (VEC_SIZE * 2)(%rdi), %ymm3
+- vmovdqa (VEC_SIZE * 3)(%rdi), %ymm4
+- VPMINU %ymm1, %ymm2, %ymm5
+- VPMINU %ymm3, %ymm4, %ymm6
+- VPMINU %ymm5, %ymm6, %ymm5
+-
++# ifdef USE_AS_STRNLEN
++ /* Break if at end of length. */
++ subq $(CHAR_PER_VEC * 4), %rsi
++ jb L(last_4x_vec_or_less_cmpeq)
++# endif
++ /* Save some code size by microfusing VPMINU with the load.
++ Since the matches in ymm2/ymm4 can only be returned if there
++ where no matches in ymm1/ymm3 respectively there is no issue
++ with overlap. */
++ vmovdqa 1(%rdi), %ymm1
++ VPMINU (VEC_SIZE + 1)(%rdi), %ymm1, %ymm2
++ vmovdqa (VEC_SIZE * 2 + 1)(%rdi), %ymm3
++ VPMINU (VEC_SIZE * 3 + 1)(%rdi), %ymm3, %ymm4
++
++ VPMINU %ymm2, %ymm4, %ymm5
+ VPCMPEQ %ymm5, %ymm0, %ymm5
+- vpmovmskb %ymm5, %eax
+- testl %eax, %eax
+- jnz L(4x_vec_end)
++ vpmovmskb %ymm5, %ecx
+
+- addq $(VEC_SIZE * 4), %rdi
++ subq $-(VEC_SIZE * 4), %rdi
++ testl %ecx, %ecx
++ jz L(loop_4x_vec)
+
+-# ifndef USE_AS_STRNLEN
+- jmp L(loop_4x_vec)
+-# else
+- subq $(VEC_SIZE * 4), %rsi
+- ja L(loop_4x_vec)
+
+-L(last_4x_vec_or_less):
+- /* Less than 4 * VEC and aligned to VEC_SIZE. */
+- addl $(VEC_SIZE * 2), %esi
+- jle L(last_2x_vec)
+-
+- VPCMPEQ (%rdi), %ymm0, %ymm1
+- vpmovmskb %ymm1, %eax
+- testl %eax, %eax
+- jnz L(first_vec_x0)
+-
+- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
++ VPCMPEQ %ymm1, %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
++ subq %rdx, %rdi
+ testl %eax, %eax
+- jnz L(first_vec_x1)
++ jnz L(last_vec_return_x0)
+
+- VPCMPEQ (VEC_SIZE * 2)(%rdi), %ymm0, %ymm1
+- vpmovmskb %ymm1, %eax
++ VPCMPEQ %ymm2, %ymm0, %ymm2
++ vpmovmskb %ymm2, %eax
+ testl %eax, %eax
++ jnz L(last_vec_return_x1)
+
+- jnz L(first_vec_x2_check)
+- subl $VEC_SIZE, %esi
+- jle L(max)
++ /* Combine last 2 VEC. */
++ VPCMPEQ %ymm3, %ymm0, %ymm3
++ vpmovmskb %ymm3, %eax
++ /* rcx has combined result from all 4 VEC. It will only be used
++ if the first 3 other VEC all did not contain a match. */
++ salq $32, %rcx
++ orq %rcx, %rax
++ tzcntq %rax, %rax
++ subq $(VEC_SIZE * 2 - 1), %rdi
++ addq %rdi, %rax
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get wchar_t count. */
++ shrq $2, %rax
++# endif
++ VZEROUPPER_RETURN
+
+- VPCMPEQ (VEC_SIZE * 3)(%rdi), %ymm0, %ymm1
+- vpmovmskb %ymm1, %eax
+- testl %eax, %eax
+
+- jnz L(first_vec_x3_check)
+- movq %r8, %rax
++# ifdef USE_AS_STRNLEN
++ .p2align 4
++L(last_4x_vec_or_less_load):
++ /* Depending on entry adjust rdi / prepare first VEC in ymm1.
++ */
++ subq $-(VEC_SIZE * 4), %rdi
++L(last_4x_vec_or_less_cmpeq):
++ VPCMPEQ 1(%rdi), %ymm0, %ymm1
++L(last_4x_vec_or_less):
+ # ifdef USE_AS_WCSLEN
+- shrq $2, %rax
++ /* NB: Multiply length by 4 to get byte count. */
++ sall $2, %esi
+ # endif
+- VZEROUPPER
+- ret
+-
+- .p2align 4
+-L(last_2x_vec):
+- addl $(VEC_SIZE * 2), %esi
+- VPCMPEQ (%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
++ /* If remaining length > VEC_SIZE * 2. This works if esi is off
++ by VEC_SIZE * 4. */
++ testl $(VEC_SIZE * 2), %esi
++ jnz L(last_4x_vec)
++
++ /* length may have been negative or positive by an offset of
++ VEC_SIZE * 4 depending on where this was called from. This fixes
++ that. */
++ andl $(VEC_SIZE * 4 - 1), %esi
+ testl %eax, %eax
++ jnz L(last_vec_x1_check)
+
+- jnz L(first_vec_x0_check)
+ subl $VEC_SIZE, %esi
+- jle L(max)
++ jb L(max)
+
+- VPCMPEQ VEC_SIZE(%rdi), %ymm0, %ymm1
++ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
+- testl %eax, %eax
+- jnz L(first_vec_x1_check)
+- movq %r8, %rax
+-# ifdef USE_AS_WCSLEN
+- shrq $2, %rax
+-# endif
+- VZEROUPPER
+- ret
+-
+- .p2align 4
+-L(first_vec_x0_check):
+ tzcntl %eax, %eax
+ /* Check the end of data. */
+- cmpq %rax, %rsi
+- jbe L(max)
++ cmpl %eax, %esi
++ jb L(max)
++ subq %rdx, %rdi
++ addl $(VEC_SIZE + 1), %eax
+ addq %rdi, %rax
+- subq %rdx, %rax
+ # ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get wchar_t count. */
+ shrq $2, %rax
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
++# endif
+
+ .p2align 4
+-L(first_vec_x1_check):
++L(last_vec_return_x0):
+ tzcntl %eax, %eax
+- /* Check the end of data. */
+- cmpq %rax, %rsi
+- jbe L(max)
+- addq $VEC_SIZE, %rax
++ subq $(VEC_SIZE * 4 - 1), %rdi
+ addq %rdi, %rax
+- subq %rdx, %rax
+-# ifdef USE_AS_WCSLEN
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get wchar_t count. */
+ shrq $2, %rax
+-# endif
+- VZEROUPPER
+- ret
++# endif
++ VZEROUPPER_RETURN
+
+ .p2align 4
+-L(first_vec_x2_check):
++L(last_vec_return_x1):
+ tzcntl %eax, %eax
+- /* Check the end of data. */
+- cmpq %rax, %rsi
+- jbe L(max)
+- addq $(VEC_SIZE * 2), %rax
++ subq $(VEC_SIZE * 3 - 1), %rdi
+ addq %rdi, %rax
+- subq %rdx, %rax
+-# ifdef USE_AS_WCSLEN
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get wchar_t count. */
+ shrq $2, %rax
+-# endif
+- VZEROUPPER
+- ret
++# endif
++ VZEROUPPER_RETURN
+
++# ifdef USE_AS_STRNLEN
+ .p2align 4
+-L(first_vec_x3_check):
++L(last_vec_x1_check):
++
+ tzcntl %eax, %eax
+ /* Check the end of data. */
+- cmpq %rax, %rsi
+- jbe L(max)
+- addq $(VEC_SIZE * 3), %rax
++ cmpl %eax, %esi
++ jb L(max)
++ subq %rdx, %rdi
++ incl %eax
+ addq %rdi, %rax
+- subq %rdx, %rax
+ # ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get wchar_t count. */
+ shrq $2, %rax
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+- .p2align 4
+ L(max):
+ movq %r8, %rax
++ VZEROUPPER_RETURN
++
++ .p2align 4
++L(last_4x_vec):
++ /* Test first 2x VEC normally. */
++ testl %eax, %eax
++ jnz L(last_vec_x1)
++
++ VPCMPEQ (VEC_SIZE + 1)(%rdi), %ymm0, %ymm1
++ vpmovmskb %ymm1, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x2)
++
++ /* Normalize length. */
++ andl $(VEC_SIZE * 4 - 1), %esi
++ VPCMPEQ (VEC_SIZE * 2 + 1)(%rdi), %ymm0, %ymm1
++ vpmovmskb %ymm1, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x3)
++
++ subl $(VEC_SIZE * 3), %esi
++ jb L(max)
++
++ VPCMPEQ (VEC_SIZE * 3 + 1)(%rdi), %ymm0, %ymm1
++ vpmovmskb %ymm1, %eax
++ tzcntl %eax, %eax
++ /* Check the end of data. */
++ cmpl %eax, %esi
++ jb L(max)
++ subq %rdx, %rdi
++ addl $(VEC_SIZE * 3 + 1), %eax
++ addq %rdi, %rax
+ # ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get wchar_t count. */
+ shrq $2, %rax
+ # endif
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+- .p2align 4
+-L(zero):
+- xorl %eax, %eax
+- ret
+-# endif
+
+ .p2align 4
+-L(first_vec_x0):
++L(last_vec_x1):
++ /* essentially duplicates of first_vec_x1 but use 64 bit
++ instructions. */
+ tzcntl %eax, %eax
++ subq %rdx, %rdi
++ incl %eax
+ addq %rdi, %rax
+- subq %rdx, %rax
+-# ifdef USE_AS_WCSLEN
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get wchar_t count. */
+ shrq $2, %rax
+-# endif
+- VZEROUPPER
+- ret
++# endif
++ VZEROUPPER_RETURN
+
+ .p2align 4
+-L(first_vec_x1):
++L(last_vec_x2):
++ /* essentially duplicates of first_vec_x1 but use 64 bit
++ instructions. */
+ tzcntl %eax, %eax
+- addq $VEC_SIZE, %rax
++ subq %rdx, %rdi
++ addl $(VEC_SIZE + 1), %eax
+ addq %rdi, %rax
+- subq %rdx, %rax
+-# ifdef USE_AS_WCSLEN
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get wchar_t count. */
+ shrq $2, %rax
+-# endif
+- VZEROUPPER
+- ret
++# endif
++ VZEROUPPER_RETURN
+
+ .p2align 4
+-L(first_vec_x2):
++L(last_vec_x3):
+ tzcntl %eax, %eax
+- addq $(VEC_SIZE * 2), %rax
++ subl $(VEC_SIZE * 2), %esi
++ /* Check the end of data. */
++ cmpl %eax, %esi
++ jb L(max_end)
++ subq %rdx, %rdi
++ addl $(VEC_SIZE * 2 + 1), %eax
+ addq %rdi, %rax
+- subq %rdx, %rax
+-# ifdef USE_AS_WCSLEN
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get wchar_t count. */
+ shrq $2, %rax
++# endif
++ VZEROUPPER_RETURN
++L(max_end):
++ movq %r8, %rax
++ VZEROUPPER_RETURN
+ # endif
+- VZEROUPPER
+- ret
+
++ /* Cold case for crossing page with first load. */
+ .p2align 4
+-L(4x_vec_end):
+- VPCMPEQ %ymm1, %ymm0, %ymm1
++L(cross_page_boundary):
++ /* Align data to VEC_SIZE - 1. */
++ orq $(VEC_SIZE - 1), %rdi
++ VPCMPEQ -(VEC_SIZE - 1)(%rdi), %ymm0, %ymm1
+ vpmovmskb %ymm1, %eax
++ /* Remove the leading bytes. sarxl only uses bits [5:0] of COUNT
++ so no need to manually mod rdx. */
++ sarxl %edx, %eax, %eax
++# ifdef USE_AS_STRNLEN
+ testl %eax, %eax
+- jnz L(first_vec_x0)
+- VPCMPEQ %ymm2, %ymm0, %ymm2
+- vpmovmskb %ymm2, %eax
+- testl %eax, %eax
+- jnz L(first_vec_x1)
+- VPCMPEQ %ymm3, %ymm0, %ymm3
+- vpmovmskb %ymm3, %eax
++ jnz L(cross_page_less_vec)
++ leaq 1(%rdi), %rcx
++ subq %rdx, %rcx
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get wchar_t count. */
++ shrl $2, %ecx
++# endif
++ /* Check length. */
++ cmpq %rsi, %rcx
++ jb L(cross_page_continue)
++ movq %r8, %rax
++# else
+ testl %eax, %eax
+- jnz L(first_vec_x2)
+- VPCMPEQ %ymm4, %ymm0, %ymm4
+- vpmovmskb %ymm4, %eax
+-L(first_vec_x3):
++ jz L(cross_page_continue)
+ tzcntl %eax, %eax
+- addq $(VEC_SIZE * 3), %rax
+- addq %rdi, %rax
+- subq %rdx, %rax
+-# ifdef USE_AS_WCSLEN
+- shrq $2, %rax
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide length by 4 to get wchar_t count. */
++ shrl $2, %eax
++# endif
++# endif
++L(return_vzeroupper):
++ ZERO_UPPER_VEC_REGISTERS_RETURN
++
++# ifdef USE_AS_STRNLEN
++ .p2align 4
++L(cross_page_less_vec):
++ tzcntl %eax, %eax
++# ifdef USE_AS_WCSLEN
++ /* NB: Multiply length by 4 to get byte count. */
++ sall $2, %esi
++# endif
++ cmpq %rax, %rsi
++ cmovb %esi, %eax
++# ifdef USE_AS_WCSLEN
++ shrl $2, %eax
++# endif
++ VZEROUPPER_RETURN
+ # endif
+- VZEROUPPER
+- ret
+
+ END (STRLEN)
+ #endif
+diff --git a/sysdeps/x86_64/multiarch/strlen-evex.S b/sysdeps/x86_64/multiarch/strlen-evex.S
+new file mode 100644
+index 0000000000..4bf6874b82
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strlen-evex.S
+@@ -0,0 +1,489 @@
++/* strlen/strnlen/wcslen/wcsnlen optimized with 256-bit EVEX instructions.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#if IS_IN (libc)
++
++# include <sysdep.h>
++
++# ifndef STRLEN
++# define STRLEN __strlen_evex
++# endif
++
++# define VMOVA vmovdqa64
++
++# ifdef USE_AS_WCSLEN
++# define VPCMP vpcmpd
++# define VPMINU vpminud
++# define SHIFT_REG ecx
++# define CHAR_SIZE 4
++# else
++# define VPCMP vpcmpb
++# define VPMINU vpminub
++# define SHIFT_REG edx
++# define CHAR_SIZE 1
++# endif
++
++# define XMMZERO xmm16
++# define YMMZERO ymm16
++# define YMM1 ymm17
++# define YMM2 ymm18
++# define YMM3 ymm19
++# define YMM4 ymm20
++# define YMM5 ymm21
++# define YMM6 ymm22
++
++# define VEC_SIZE 32
++# define PAGE_SIZE 4096
++# define CHAR_PER_VEC (VEC_SIZE / CHAR_SIZE)
++
++ .section .text.evex,"ax",@progbits
++ENTRY (STRLEN)
++# ifdef USE_AS_STRNLEN
++ /* Check zero length. */
++ test %RSI_LP, %RSI_LP
++ jz L(zero)
++# ifdef __ILP32__
++ /* Clear the upper 32 bits. */
++ movl %esi, %esi
++# endif
++ mov %RSI_LP, %R8_LP
++# endif
++ movl %edi, %eax
++ vpxorq %XMMZERO, %XMMZERO, %XMMZERO
++ /* Clear high bits from edi. Only keeping bits relevant to page
++ cross check. */
++ andl $(PAGE_SIZE - 1), %eax
++ /* Check if we may cross page boundary with one vector load. */
++ cmpl $(PAGE_SIZE - VEC_SIZE), %eax
++ ja L(cross_page_boundary)
++
++ /* Check the first VEC_SIZE bytes. Each bit in K0 represents a
++ null byte. */
++ VPCMP $0, (%rdi), %YMMZERO, %k0
++ kmovd %k0, %eax
++# ifdef USE_AS_STRNLEN
++ /* If length < CHAR_PER_VEC handle special. */
++ cmpq $CHAR_PER_VEC, %rsi
++ jbe L(first_vec_x0)
++# endif
++ testl %eax, %eax
++ jz L(aligned_more)
++ tzcntl %eax, %eax
++ ret
++# ifdef USE_AS_STRNLEN
++L(zero):
++ xorl %eax, %eax
++ ret
++
++ .p2align 4
++L(first_vec_x0):
++ /* Set bit for max len so that tzcnt will return min of max len
++ and position of first match. */
++ btsq %rsi, %rax
++ tzcntl %eax, %eax
++ ret
++# endif
++
++ .p2align 4
++L(first_vec_x1):
++ tzcntl %eax, %eax
++ /* Safe to use 32 bit instructions as these are only called for
++ size = [1, 159]. */
++# ifdef USE_AS_STRNLEN
++ /* Use ecx which was computed earlier to compute correct value.
++ */
++ leal -(CHAR_PER_VEC * 4 + 1)(%rcx, %rax), %eax
++# else
++ subl %edx, %edi
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarl $2, %edi
++# endif
++ leal CHAR_PER_VEC(%rdi, %rax), %eax
++# endif
++ ret
++
++ .p2align 4
++L(first_vec_x2):
++ tzcntl %eax, %eax
++ /* Safe to use 32 bit instructions as these are only called for
++ size = [1, 159]. */
++# ifdef USE_AS_STRNLEN
++ /* Use ecx which was computed earlier to compute correct value.
++ */
++ leal -(CHAR_PER_VEC * 3 + 1)(%rcx, %rax), %eax
++# else
++ subl %edx, %edi
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarl $2, %edi
++# endif
++ leal (CHAR_PER_VEC * 2)(%rdi, %rax), %eax
++# endif
++ ret
++
++ .p2align 4
++L(first_vec_x3):
++ tzcntl %eax, %eax
++ /* Safe to use 32 bit instructions as these are only called for
++ size = [1, 159]. */
++# ifdef USE_AS_STRNLEN
++ /* Use ecx which was computed earlier to compute correct value.
++ */
++ leal -(CHAR_PER_VEC * 2 + 1)(%rcx, %rax), %eax
++# else
++ subl %edx, %edi
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarl $2, %edi
++# endif
++ leal (CHAR_PER_VEC * 3)(%rdi, %rax), %eax
++# endif
++ ret
++
++ .p2align 4
++L(first_vec_x4):
++ tzcntl %eax, %eax
++ /* Safe to use 32 bit instructions as these are only called for
++ size = [1, 159]. */
++# ifdef USE_AS_STRNLEN
++ /* Use ecx which was computed earlier to compute correct value.
++ */
++ leal -(CHAR_PER_VEC + 1)(%rcx, %rax), %eax
++# else
++ subl %edx, %edi
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarl $2, %edi
++# endif
++ leal (CHAR_PER_VEC * 4)(%rdi, %rax), %eax
++# endif
++ ret
++
++ .p2align 5
++L(aligned_more):
++ movq %rdi, %rdx
++ /* Align data to VEC_SIZE. */
++ andq $-(VEC_SIZE), %rdi
++L(cross_page_continue):
++ /* Check the first 4 * VEC_SIZE. Only one VEC_SIZE at a time
++ since data is only aligned to VEC_SIZE. */
++# ifdef USE_AS_STRNLEN
++ /* + CHAR_SIZE because it simplies the logic in
++ last_4x_vec_or_less. */
++ leaq (VEC_SIZE * 5 + CHAR_SIZE)(%rdi), %rcx
++ subq %rdx, %rcx
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarl $2, %ecx
++# endif
++# endif
++ /* Load first VEC regardless. */
++ VPCMP $0, VEC_SIZE(%rdi), %YMMZERO, %k0
++# ifdef USE_AS_STRNLEN
++ /* Adjust length. If near end handle specially. */
++ subq %rcx, %rsi
++ jb L(last_4x_vec_or_less)
++# endif
++ kmovd %k0, %eax
++ testl %eax, %eax
++ jnz L(first_vec_x1)
++
++ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0
++ kmovd %k0, %eax
++ test %eax, %eax
++ jnz L(first_vec_x2)
++
++ VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMZERO, %k0
++ kmovd %k0, %eax
++ testl %eax, %eax
++ jnz L(first_vec_x3)
++
++ VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMZERO, %k0
++ kmovd %k0, %eax
++ testl %eax, %eax
++ jnz L(first_vec_x4)
++
++ addq $VEC_SIZE, %rdi
++# ifdef USE_AS_STRNLEN
++ /* Check if at last VEC_SIZE * 4 length. */
++ cmpq $(CHAR_PER_VEC * 4 - 1), %rsi
++ jbe L(last_4x_vec_or_less_load)
++ movl %edi, %ecx
++ andl $(VEC_SIZE * 4 - 1), %ecx
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarl $2, %ecx
++# endif
++ /* Readjust length. */
++ addq %rcx, %rsi
++# endif
++ /* Align data to VEC_SIZE * 4. */
++ andq $-(VEC_SIZE * 4), %rdi
++
++ /* Compare 4 * VEC at a time forward. */
++ .p2align 4
++L(loop_4x_vec):
++ /* Load first VEC regardless. */
++ VMOVA (VEC_SIZE * 4)(%rdi), %YMM1
++# ifdef USE_AS_STRNLEN
++ /* Break if at end of length. */
++ subq $(CHAR_PER_VEC * 4), %rsi
++ jb L(last_4x_vec_or_less_cmpeq)
++# endif
++ /* Save some code size by microfusing VPMINU with the load. Since
++ the matches in ymm2/ymm4 can only be returned if there where no
++ matches in ymm1/ymm3 respectively there is no issue with overlap.
++ */
++ VPMINU (VEC_SIZE * 5)(%rdi), %YMM1, %YMM2
++ VMOVA (VEC_SIZE * 6)(%rdi), %YMM3
++ VPMINU (VEC_SIZE * 7)(%rdi), %YMM3, %YMM4
++
++ VPCMP $0, %YMM2, %YMMZERO, %k0
++ VPCMP $0, %YMM4, %YMMZERO, %k1
++ subq $-(VEC_SIZE * 4), %rdi
++ kortestd %k0, %k1
++ jz L(loop_4x_vec)
++
++ /* Check if end was in first half. */
++ kmovd %k0, %eax
++ subq %rdx, %rdi
++# ifdef USE_AS_WCSLEN
++ shrq $2, %rdi
++# endif
++ testl %eax, %eax
++ jz L(second_vec_return)
++
++ VPCMP $0, %YMM1, %YMMZERO, %k2
++ kmovd %k2, %edx
++ /* Combine VEC1 matches (edx) with VEC2 matches (eax). */
++# ifdef USE_AS_WCSLEN
++ sall $CHAR_PER_VEC, %eax
++ orl %edx, %eax
++ tzcntl %eax, %eax
++# else
++ salq $CHAR_PER_VEC, %rax
++ orq %rdx, %rax
++ tzcntq %rax, %rax
++# endif
++ addq %rdi, %rax
++ ret
++
++
++# ifdef USE_AS_STRNLEN
++
++L(last_4x_vec_or_less_load):
++ /* Depending on entry adjust rdi / prepare first VEC in YMM1. */
++ VMOVA (VEC_SIZE * 4)(%rdi), %YMM1
++L(last_4x_vec_or_less_cmpeq):
++ VPCMP $0, %YMM1, %YMMZERO, %k0
++ addq $(VEC_SIZE * 3), %rdi
++L(last_4x_vec_or_less):
++ kmovd %k0, %eax
++ /* If remaining length > VEC_SIZE * 2. This works if esi is off by
++ VEC_SIZE * 4. */
++ testl $(CHAR_PER_VEC * 2), %esi
++ jnz L(last_4x_vec)
++
++ /* length may have been negative or positive by an offset of
++ CHAR_PER_VEC * 4 depending on where this was called from. This
++ fixes that. */
++ andl $(CHAR_PER_VEC * 4 - 1), %esi
++ testl %eax, %eax
++ jnz L(last_vec_x1_check)
++
++ /* Check the end of data. */
++ subl $CHAR_PER_VEC, %esi
++ jb L(max)
++
++ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0
++ kmovd %k0, %eax
++ tzcntl %eax, %eax
++ /* Check the end of data. */
++ cmpl %eax, %esi
++ jb L(max)
++
++ subq %rdx, %rdi
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarq $2, %rdi
++# endif
++ leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax
++ ret
++L(max):
++ movq %r8, %rax
++ ret
++# endif
++
++ /* Placed here in strnlen so that the jcc L(last_4x_vec_or_less)
++ in the 4x VEC loop can use 2 byte encoding. */
++ .p2align 4
++L(second_vec_return):
++ VPCMP $0, %YMM3, %YMMZERO, %k0
++ /* Combine YMM3 matches (k0) with YMM4 matches (k1). */
++# ifdef USE_AS_WCSLEN
++ kunpckbw %k0, %k1, %k0
++ kmovd %k0, %eax
++ tzcntl %eax, %eax
++# else
++ kunpckdq %k0, %k1, %k0
++ kmovq %k0, %rax
++ tzcntq %rax, %rax
++# endif
++ leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax
++ ret
++
++
++# ifdef USE_AS_STRNLEN
++L(last_vec_x1_check):
++ tzcntl %eax, %eax
++ /* Check the end of data. */
++ cmpl %eax, %esi
++ jb L(max)
++ subq %rdx, %rdi
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarq $2, %rdi
++# endif
++ leaq (CHAR_PER_VEC)(%rdi, %rax), %rax
++ ret
++
++ .p2align 4
++L(last_4x_vec):
++ /* Test first 2x VEC normally. */
++ testl %eax, %eax
++ jnz L(last_vec_x1)
++
++ VPCMP $0, (VEC_SIZE * 2)(%rdi), %YMMZERO, %k0
++ kmovd %k0, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x2)
++
++ /* Normalize length. */
++ andl $(CHAR_PER_VEC * 4 - 1), %esi
++ VPCMP $0, (VEC_SIZE * 3)(%rdi), %YMMZERO, %k0
++ kmovd %k0, %eax
++ testl %eax, %eax
++ jnz L(last_vec_x3)
++
++ /* Check the end of data. */
++ subl $(CHAR_PER_VEC * 3), %esi
++ jb L(max)
++
++ VPCMP $0, (VEC_SIZE * 4)(%rdi), %YMMZERO, %k0
++ kmovd %k0, %eax
++ tzcntl %eax, %eax
++ /* Check the end of data. */
++ cmpl %eax, %esi
++ jb L(max_end)
++
++ subq %rdx, %rdi
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarq $2, %rdi
++# endif
++ leaq (CHAR_PER_VEC * 4)(%rdi, %rax), %rax
++ ret
++
++ .p2align 4
++L(last_vec_x1):
++ tzcntl %eax, %eax
++ subq %rdx, %rdi
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarq $2, %rdi
++# endif
++ leaq (CHAR_PER_VEC)(%rdi, %rax), %rax
++ ret
++
++ .p2align 4
++L(last_vec_x2):
++ tzcntl %eax, %eax
++ subq %rdx, %rdi
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarq $2, %rdi
++# endif
++ leaq (CHAR_PER_VEC * 2)(%rdi, %rax), %rax
++ ret
++
++ .p2align 4
++L(last_vec_x3):
++ tzcntl %eax, %eax
++ subl $(CHAR_PER_VEC * 2), %esi
++ /* Check the end of data. */
++ cmpl %eax, %esi
++ jb L(max_end)
++ subq %rdx, %rdi
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide bytes by 4 to get the wchar_t count. */
++ sarq $2, %rdi
++# endif
++ leaq (CHAR_PER_VEC * 3)(%rdi, %rax), %rax
++ ret
++L(max_end):
++ movq %r8, %rax
++ ret
++# endif
++
++ /* Cold case for crossing page with first load. */
++ .p2align 4
++L(cross_page_boundary):
++ movq %rdi, %rdx
++ /* Align data to VEC_SIZE. */
++ andq $-VEC_SIZE, %rdi
++ VPCMP $0, (%rdi), %YMMZERO, %k0
++ kmovd %k0, %eax
++ /* Remove the leading bytes. */
++# ifdef USE_AS_WCSLEN
++ /* NB: Divide shift count by 4 since each bit in K0 represent 4
++ bytes. */
++ movl %edx, %ecx
++ shrl $2, %ecx
++ andl $(CHAR_PER_VEC - 1), %ecx
++# endif
++ /* SHIFT_REG is ecx for USE_AS_WCSLEN and edx otherwise. */
++ sarxl %SHIFT_REG, %eax, %eax
++ testl %eax, %eax
++# ifndef USE_AS_STRNLEN
++ jz L(cross_page_continue)
++ tzcntl %eax, %eax
++ ret
++# else
++ jnz L(cross_page_less_vec)
++# ifndef USE_AS_WCSLEN
++ movl %edx, %ecx
++ andl $(CHAR_PER_VEC - 1), %ecx
++# endif
++ movl $CHAR_PER_VEC, %eax
++ subl %ecx, %eax
++ /* Check the end of data. */
++ cmpq %rax, %rsi
++ ja L(cross_page_continue)
++ movl %esi, %eax
++ ret
++L(cross_page_less_vec):
++ tzcntl %eax, %eax
++ /* Select min of length and position of first null. */
++ cmpq %rax, %rsi
++ cmovb %esi, %eax
++ ret
++# endif
++
++END (STRLEN)
++#endif
+diff --git a/sysdeps/x86_64/multiarch/strlen-sse2.S b/sysdeps/x86_64/multiarch/strlen-sse2.S
+index 055fbbc690..812af73c13 100644
+--- a/sysdeps/x86_64/multiarch/strlen-sse2.S
++++ b/sysdeps/x86_64/multiarch/strlen-sse2.S
+@@ -20,4 +20,4 @@
+ # define strlen __strlen_sse2
+ #endif
+
+-#include "../strlen.S"
++#include "strlen-vec.S"
+diff --git a/sysdeps/x86_64/multiarch/strlen-vec.S b/sysdeps/x86_64/multiarch/strlen-vec.S
+new file mode 100644
+index 0000000000..439e486a43
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strlen-vec.S
+@@ -0,0 +1,270 @@
++/* SSE2 version of strlen and SSE4.1 version of wcslen.
++ Copyright (C) 2012-2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <sysdep.h>
++
++#ifdef AS_WCSLEN
++# define PMINU pminud
++# define PCMPEQ pcmpeqd
++# define SHIFT_RETURN shrq $2, %rax
++#else
++# define PMINU pminub
++# define PCMPEQ pcmpeqb
++# define SHIFT_RETURN
++#endif
++
++/* Long lived register in strlen(s), strnlen(s, n) are:
++
++ %xmm3 - zero
++ %rdi - s
++ %r10 (s+n) & (~(64-1))
++ %r11 s+n
++*/
++
++
++.text
++ENTRY(strlen)
++
++/* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx. */
++#define FIND_ZERO \
++ PCMPEQ (%rax), %xmm0; \
++ PCMPEQ 16(%rax), %xmm1; \
++ PCMPEQ 32(%rax), %xmm2; \
++ PCMPEQ 48(%rax), %xmm3; \
++ pmovmskb %xmm0, %esi; \
++ pmovmskb %xmm1, %edx; \
++ pmovmskb %xmm2, %r8d; \
++ pmovmskb %xmm3, %ecx; \
++ salq $16, %rdx; \
++ salq $16, %rcx; \
++ orq %rsi, %rdx; \
++ orq %r8, %rcx; \
++ salq $32, %rcx; \
++ orq %rcx, %rdx;
++
++#ifdef AS_STRNLEN
++/* Do not read anything when n==0. */
++ test %RSI_LP, %RSI_LP
++ jne L(n_nonzero)
++ xor %rax, %rax
++ ret
++L(n_nonzero):
++# ifdef AS_WCSLEN
++/* Check for overflow from maxlen * sizeof(wchar_t). If it would
++ overflow the only way this program doesn't have undefined behavior
++ is if there is a null terminator in valid memory so wcslen will
++ suffice. */
++ mov %RSI_LP, %R10_LP
++ sar $62, %R10_LP
++ test %R10_LP, %R10_LP
++ jnz __wcslen_sse4_1
++ sal $2, %RSI_LP
++# endif
++
++
++/* Initialize long lived registers. */
++
++ add %RDI_LP, %RSI_LP
++# ifdef AS_WCSLEN
++/* Check for overflow again from s + maxlen * sizeof(wchar_t). */
++ jbe __wcslen_sse4_1
++# endif
++ mov %RSI_LP, %R10_LP
++ and $-64, %R10_LP
++ mov %RSI_LP, %R11_LP
++#endif
++
++ pxor %xmm0, %xmm0
++ pxor %xmm1, %xmm1
++ pxor %xmm2, %xmm2
++ pxor %xmm3, %xmm3
++ movq %rdi, %rax
++ movq %rdi, %rcx
++ andq $4095, %rcx
++/* Offsets 4032-4047 will be aligned into 4032 thus fit into page. */
++ cmpq $4047, %rcx
++/* We cannot unify this branching as it would be ~6 cycles slower. */
++ ja L(cross_page)
++
++#ifdef AS_STRNLEN
++/* Test if end is among first 64 bytes. */
++# define STRNLEN_PROLOG \
++ mov %r11, %rsi; \
++ subq %rax, %rsi; \
++ andq $-64, %rax; \
++ testq $-64, %rsi; \
++ je L(strnlen_ret)
++#else
++# define STRNLEN_PROLOG andq $-64, %rax;
++#endif
++
++/* Ignore bits in mask that come before start of string. */
++#define PROLOG(lab) \
++ movq %rdi, %rcx; \
++ xorq %rax, %rcx; \
++ STRNLEN_PROLOG; \
++ sarq %cl, %rdx; \
++ test %rdx, %rdx; \
++ je L(lab); \
++ bsfq %rdx, %rax; \
++ SHIFT_RETURN; \
++ ret
++
++#ifdef AS_STRNLEN
++ andq $-16, %rax
++ FIND_ZERO
++#else
++ /* Test first 16 bytes unaligned. */
++ movdqu (%rax), %xmm4
++ PCMPEQ %xmm0, %xmm4
++ pmovmskb %xmm4, %edx
++ test %edx, %edx
++ je L(next48_bytes)
++ bsf %edx, %eax /* If eax is zeroed 16bit bsf can be used. */
++ SHIFT_RETURN
++ ret
++
++L(next48_bytes):
++/* Same as FIND_ZERO except we do not check first 16 bytes. */
++ andq $-16, %rax
++ PCMPEQ 16(%rax), %xmm1
++ PCMPEQ 32(%rax), %xmm2
++ PCMPEQ 48(%rax), %xmm3
++ pmovmskb %xmm1, %edx
++ pmovmskb %xmm2, %r8d
++ pmovmskb %xmm3, %ecx
++ salq $16, %rdx
++ salq $16, %rcx
++ orq %r8, %rcx
++ salq $32, %rcx
++ orq %rcx, %rdx
++#endif
++
++ /* When no zero byte is found xmm1-3 are zero so we do not have to
++ zero them. */
++ PROLOG(loop)
++
++ .p2align 4
++L(cross_page):
++ andq $-64, %rax
++ FIND_ZERO
++ PROLOG(loop_init)
++
++#ifdef AS_STRNLEN
++/* We must do this check to correctly handle strnlen (s, -1). */
++L(strnlen_ret):
++ bts %rsi, %rdx
++ sarq %cl, %rdx
++ test %rdx, %rdx
++ je L(loop_init)
++ bsfq %rdx, %rax
++ SHIFT_RETURN
++ ret
++#endif
++ .p2align 4
++L(loop_init):
++ pxor %xmm1, %xmm1
++ pxor %xmm2, %xmm2
++ pxor %xmm3, %xmm3
++#ifdef AS_STRNLEN
++ .p2align 4
++L(loop):
++
++ addq $64, %rax
++ cmpq %rax, %r10
++ je L(exit_end)
++
++ movdqa (%rax), %xmm0
++ PMINU 16(%rax), %xmm0
++ PMINU 32(%rax), %xmm0
++ PMINU 48(%rax), %xmm0
++ PCMPEQ %xmm3, %xmm0
++ pmovmskb %xmm0, %edx
++ testl %edx, %edx
++ jne L(exit)
++ jmp L(loop)
++
++ .p2align 4
++L(exit_end):
++ cmp %rax, %r11
++ je L(first) /* Do not read when end is at page boundary. */
++ pxor %xmm0, %xmm0
++ FIND_ZERO
++
++L(first):
++ bts %r11, %rdx
++ bsfq %rdx, %rdx
++ addq %rdx, %rax
++ subq %rdi, %rax
++ SHIFT_RETURN
++ ret
++
++ .p2align 4
++L(exit):
++ pxor %xmm0, %xmm0
++ FIND_ZERO
++
++ bsfq %rdx, %rdx
++ addq %rdx, %rax
++ subq %rdi, %rax
++ SHIFT_RETURN
++ ret
++
++#else
++
++ /* Main loop. Unrolled twice to improve L2 cache performance on core2. */
++ .p2align 4
++L(loop):
++
++ movdqa 64(%rax), %xmm0
++ PMINU 80(%rax), %xmm0
++ PMINU 96(%rax), %xmm0
++ PMINU 112(%rax), %xmm0
++ PCMPEQ %xmm3, %xmm0
++ pmovmskb %xmm0, %edx
++ testl %edx, %edx
++ jne L(exit64)
++
++ subq $-128, %rax
++
++ movdqa (%rax), %xmm0
++ PMINU 16(%rax), %xmm0
++ PMINU 32(%rax), %xmm0
++ PMINU 48(%rax), %xmm0
++ PCMPEQ %xmm3, %xmm0
++ pmovmskb %xmm0, %edx
++ testl %edx, %edx
++ jne L(exit0)
++ jmp L(loop)
++
++ .p2align 4
++L(exit64):
++ addq $64, %rax
++L(exit0):
++ pxor %xmm0, %xmm0
++ FIND_ZERO
++
++ bsfq %rdx, %rdx
++ addq %rdx, %rax
++ subq %rdi, %rax
++ SHIFT_RETURN
++ ret
++
++#endif
++
++END(strlen)
+diff --git a/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S
+new file mode 100644
+index 0000000000..0dcea18dbb
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S
+@@ -0,0 +1,3 @@
++#define USE_AS_STRNCAT
++#define STRCAT __strncat_avx2_rtm
++#include "strcat-avx2-rtm.S"
+diff --git a/sysdeps/x86_64/multiarch/strncat-evex.S b/sysdeps/x86_64/multiarch/strncat-evex.S
+new file mode 100644
+index 0000000000..8884f02371
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strncat-evex.S
+@@ -0,0 +1,3 @@
++#define USE_AS_STRNCAT
++#define STRCAT __strncat_evex
++#include "strcat-evex.S"
+diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
+new file mode 100644
+index 0000000000..37d1224bb9
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
+@@ -0,0 +1,3 @@
++#define STRCMP __strncmp_avx2_rtm
++#define USE_AS_STRNCMP 1
++#include "strcmp-avx2-rtm.S"
+diff --git a/sysdeps/x86_64/multiarch/strncmp-evex.S b/sysdeps/x86_64/multiarch/strncmp-evex.S
+new file mode 100644
+index 0000000000..a1d53e8c9f
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strncmp-evex.S
+@@ -0,0 +1,3 @@
++#define STRCMP __strncmp_evex
++#define USE_AS_STRNCMP 1
++#include "strcmp-evex.S"
+diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c
+index 3c94b3ffd9..7accba2b7c 100644
+--- a/sysdeps/x86_64/multiarch/strncmp.c
++++ b/sysdeps/x86_64/multiarch/strncmp.c
+@@ -30,16 +30,29 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
+ extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
++extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
+
+ static inline void *
+ IFUNC_SELECTOR (void)
+ {
+ const struct cpu_features* cpu_features = __get_cpu_features ();
+
+- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+- && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+- return OPTIMIZE (avx2);
++ {
++ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
++ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
++ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
++ && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP))
++ return OPTIMIZE (evex);
++
++ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
++ return OPTIMIZE (avx2_rtm);
++
++ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
++ return OPTIMIZE (avx2);
++ }
+
+ if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
+ && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
+diff --git a/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S
+new file mode 100644
+index 0000000000..79e7083299
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S
+@@ -0,0 +1,3 @@
++#define USE_AS_STRNCPY
++#define STRCPY __strncpy_avx2_rtm
++#include "strcpy-avx2-rtm.S"
+diff --git a/sysdeps/x86_64/multiarch/strncpy-evex.S b/sysdeps/x86_64/multiarch/strncpy-evex.S
+new file mode 100644
+index 0000000000..40e391f0da
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strncpy-evex.S
+@@ -0,0 +1,3 @@
++#define USE_AS_STRNCPY
++#define STRCPY __strncpy_evex
++#include "strcpy-evex.S"
+diff --git a/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S
+new file mode 100644
+index 0000000000..04f1626a5c
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S
+@@ -0,0 +1,4 @@
++#define STRLEN __strnlen_avx2_rtm
++#define USE_AS_STRNLEN 1
++
++#include "strlen-avx2-rtm.S"
+diff --git a/sysdeps/x86_64/multiarch/strnlen-evex.S b/sysdeps/x86_64/multiarch/strnlen-evex.S
+new file mode 100644
+index 0000000000..722022f303
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strnlen-evex.S
+@@ -0,0 +1,4 @@
++#define STRLEN __strnlen_evex
++#define USE_AS_STRNLEN 1
++
++#include "strlen-evex.S"
+diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S
+new file mode 100644
+index 0000000000..5def14ec1c
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S
+@@ -0,0 +1,12 @@
++#ifndef STRRCHR
++# define STRRCHR __strrchr_avx2_rtm
++#endif
++
++#define ZERO_UPPER_VEC_REGISTERS_RETURN \
++ ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
++
++#define VZEROUPPER_RETURN jmp L(return_vzeroupper)
++
++#define SECTION(p) p##.avx.rtm
++
++#include "strrchr-avx2.S"
+diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2.S b/sysdeps/x86_64/multiarch/strrchr-avx2.S
+index 146bdd51d0..ad91fab991 100644
+--- a/sysdeps/x86_64/multiarch/strrchr-avx2.S
++++ b/sysdeps/x86_64/multiarch/strrchr-avx2.S
+@@ -36,9 +36,13 @@
+ # define VZEROUPPER vzeroupper
+ # endif
+
++# ifndef SECTION
++# define SECTION(p) p##.avx
++# endif
++
+ # define VEC_SIZE 32
+
+- .section .text.avx,"ax",@progbits
++ .section SECTION(.text),"ax",@progbits
+ ENTRY (STRRCHR)
+ movd %esi, %xmm4
+ movl %edi, %ecx
+@@ -166,8 +170,8 @@ L(return_value):
+ # endif
+ bsrl %eax, %eax
+ leaq -VEC_SIZE(%rdi, %rax), %rax
+- VZEROUPPER
+- ret
++L(return_vzeroupper):
++ ZERO_UPPER_VEC_REGISTERS_RETURN
+
+ .p2align 4
+ L(match):
+@@ -198,8 +202,7 @@ L(find_nul):
+ jz L(return_value)
+ bsrl %eax, %eax
+ leaq -VEC_SIZE(%rdi, %rax), %rax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(char_and_nul):
+@@ -222,14 +225,12 @@ L(char_and_nul_in_first_vec):
+ jz L(return_null)
+ bsrl %eax, %eax
+ leaq -VEC_SIZE(%rdi, %rax), %rax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ .p2align 4
+ L(return_null):
+ xorl %eax, %eax
+- VZEROUPPER
+- ret
++ VZEROUPPER_RETURN
+
+ END (STRRCHR)
+ #endif
+diff --git a/sysdeps/x86_64/multiarch/strrchr-evex.S b/sysdeps/x86_64/multiarch/strrchr-evex.S
+new file mode 100644
+index 0000000000..f920b5a584
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/strrchr-evex.S
+@@ -0,0 +1,265 @@
++/* strrchr/wcsrchr optimized with 256-bit EVEX instructions.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#if IS_IN (libc)
++
++# include <sysdep.h>
++
++# ifndef STRRCHR
++# define STRRCHR __strrchr_evex
++# endif
++
++# define VMOVU vmovdqu64
++# define VMOVA vmovdqa64
++
++# ifdef USE_AS_WCSRCHR
++# define VPBROADCAST vpbroadcastd
++# define VPCMP vpcmpd
++# define SHIFT_REG r8d
++# else
++# define VPBROADCAST vpbroadcastb
++# define VPCMP vpcmpb
++# define SHIFT_REG ecx
++# endif
++
++# define XMMZERO xmm16
++# define YMMZERO ymm16
++# define YMMMATCH ymm17
++# define YMM1 ymm18
++
++# define VEC_SIZE 32
++
++ .section .text.evex,"ax",@progbits
++ENTRY (STRRCHR)
++ movl %edi, %ecx
++ /* Broadcast CHAR to YMMMATCH. */
++ VPBROADCAST %esi, %YMMMATCH
++
++ vpxorq %XMMZERO, %XMMZERO, %XMMZERO
++
++ /* Check if we may cross page boundary with one vector load. */
++ andl $(2 * VEC_SIZE - 1), %ecx
++ cmpl $VEC_SIZE, %ecx
++ ja L(cros_page_boundary)
++
++ VMOVU (%rdi), %YMM1
++
++ /* Each bit in K0 represents a null byte in YMM1. */
++ VPCMP $0, %YMMZERO, %YMM1, %k0
++ /* Each bit in K1 represents a CHAR in YMM1. */
++ VPCMP $0, %YMMMATCH, %YMM1, %k1
++ kmovd %k0, %ecx
++ kmovd %k1, %eax
++
++ addq $VEC_SIZE, %rdi
++
++ testl %eax, %eax
++ jnz L(first_vec)
++
++ testl %ecx, %ecx
++ jnz L(return_null)
++
++ andq $-VEC_SIZE, %rdi
++ xorl %edx, %edx
++ jmp L(aligned_loop)
++
++ .p2align 4
++L(first_vec):
++ /* Check if there is a null byte. */
++ testl %ecx, %ecx
++ jnz L(char_and_nul_in_first_vec)
++
++ /* Remember the match and keep searching. */
++ movl %eax, %edx
++ movq %rdi, %rsi
++ andq $-VEC_SIZE, %rdi
++ jmp L(aligned_loop)
++
++ .p2align 4
++L(cros_page_boundary):
++ andl $(VEC_SIZE - 1), %ecx
++ andq $-VEC_SIZE, %rdi
++
++# ifdef USE_AS_WCSRCHR
++ /* NB: Divide shift count by 4 since each bit in K1 represent 4
++ bytes. */
++ movl %ecx, %SHIFT_REG
++ sarl $2, %SHIFT_REG
++# endif
++
++ VMOVA (%rdi), %YMM1
++
++ /* Each bit in K0 represents a null byte in YMM1. */
++ VPCMP $0, %YMMZERO, %YMM1, %k0
++ /* Each bit in K1 represents a CHAR in YMM1. */
++ VPCMP $0, %YMMMATCH, %YMM1, %k1
++ kmovd %k0, %edx
++ kmovd %k1, %eax
++
++ shrxl %SHIFT_REG, %edx, %edx
++ shrxl %SHIFT_REG, %eax, %eax
++ addq $VEC_SIZE, %rdi
++
++ /* Check if there is a CHAR. */
++ testl %eax, %eax
++ jnz L(found_char)
++
++ testl %edx, %edx
++ jnz L(return_null)
++
++ jmp L(aligned_loop)
++
++ .p2align 4
++L(found_char):
++ testl %edx, %edx
++ jnz L(char_and_nul)
++
++ /* Remember the match and keep searching. */
++ movl %eax, %edx
++ leaq (%rdi, %rcx), %rsi
++
++ .p2align 4
++L(aligned_loop):
++ VMOVA (%rdi), %YMM1
++ addq $VEC_SIZE, %rdi
++
++ /* Each bit in K0 represents a null byte in YMM1. */
++ VPCMP $0, %YMMZERO, %YMM1, %k0
++ /* Each bit in K1 represents a CHAR in YMM1. */
++ VPCMP $0, %YMMMATCH, %YMM1, %k1
++ kmovd %k0, %ecx
++ kmovd %k1, %eax
++ orl %eax, %ecx
++ jnz L(char_nor_null)
++
++ VMOVA (%rdi), %YMM1
++ add $VEC_SIZE, %rdi
++
++ /* Each bit in K0 represents a null byte in YMM1. */
++ VPCMP $0, %YMMZERO, %YMM1, %k0
++ /* Each bit in K1 represents a CHAR in YMM1. */
++ VPCMP $0, %YMMMATCH, %YMM1, %k1
++ kmovd %k0, %ecx
++ kmovd %k1, %eax
++ orl %eax, %ecx
++ jnz L(char_nor_null)
++
++ VMOVA (%rdi), %YMM1
++ addq $VEC_SIZE, %rdi
++
++ /* Each bit in K0 represents a null byte in YMM1. */
++ VPCMP $0, %YMMZERO, %YMM1, %k0
++ /* Each bit in K1 represents a CHAR in YMM1. */
++ VPCMP $0, %YMMMATCH, %YMM1, %k1
++ kmovd %k0, %ecx
++ kmovd %k1, %eax
++ orl %eax, %ecx
++ jnz L(char_nor_null)
++
++ VMOVA (%rdi), %YMM1
++ addq $VEC_SIZE, %rdi
++
++ /* Each bit in K0 represents a null byte in YMM1. */
++ VPCMP $0, %YMMZERO, %YMM1, %k0
++ /* Each bit in K1 represents a CHAR in YMM1. */
++ VPCMP $0, %YMMMATCH, %YMM1, %k1
++ kmovd %k0, %ecx
++ kmovd %k1, %eax
++ orl %eax, %ecx
++ jz L(aligned_loop)
++
++ .p2align 4
++L(char_nor_null):
++ /* Find a CHAR or a null byte in a loop. */
++ testl %eax, %eax
++ jnz L(match)
++L(return_value):
++ testl %edx, %edx
++ jz L(return_null)
++ movl %edx, %eax
++ movq %rsi, %rdi
++ bsrl %eax, %eax
++# ifdef USE_AS_WCSRCHR
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ leaq -VEC_SIZE(%rdi, %rax, 4), %rax
++# else
++ leaq -VEC_SIZE(%rdi, %rax), %rax
++# endif
++ ret
++
++ .p2align 4
++L(match):
++ /* Find a CHAR. Check if there is a null byte. */
++ kmovd %k0, %ecx
++ testl %ecx, %ecx
++ jnz L(find_nul)
++
++ /* Remember the match and keep searching. */
++ movl %eax, %edx
++ movq %rdi, %rsi
++ jmp L(aligned_loop)
++
++ .p2align 4
++L(find_nul):
++ /* Mask out any matching bits after the null byte. */
++ movl %ecx, %r8d
++ subl $1, %r8d
++ xorl %ecx, %r8d
++ andl %r8d, %eax
++ testl %eax, %eax
++ /* If there is no CHAR here, return the remembered one. */
++ jz L(return_value)
++ bsrl %eax, %eax
++# ifdef USE_AS_WCSRCHR
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ leaq -VEC_SIZE(%rdi, %rax, 4), %rax
++# else
++ leaq -VEC_SIZE(%rdi, %rax), %rax
++# endif
++ ret
++
++ .p2align 4
++L(char_and_nul):
++ /* Find both a CHAR and a null byte. */
++ addq %rcx, %rdi
++ movl %edx, %ecx
++L(char_and_nul_in_first_vec):
++ /* Mask out any matching bits after the null byte. */
++ movl %ecx, %r8d
++ subl $1, %r8d
++ xorl %ecx, %r8d
++ andl %r8d, %eax
++ testl %eax, %eax
++ /* Return null pointer if the null byte comes first. */
++ jz L(return_null)
++ bsrl %eax, %eax
++# ifdef USE_AS_WCSRCHR
++ /* NB: Multiply wchar_t count by 4 to get the number of bytes. */
++ leaq -VEC_SIZE(%rdi, %rax, 4), %rax
++# else
++ leaq -VEC_SIZE(%rdi, %rax), %rax
++# endif
++ ret
++
++ .p2align 4
++L(return_null):
++ xorl %eax, %eax
++ ret
++
++END (STRRCHR)
++#endif
+diff --git a/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S
+new file mode 100644
+index 0000000000..d49dbbf0b4
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S
+@@ -0,0 +1,3 @@
++#define STRCHR __wcschr_avx2_rtm
++#define USE_AS_WCSCHR 1
++#include "strchr-avx2-rtm.S"
+diff --git a/sysdeps/x86_64/multiarch/wcschr-evex.S b/sysdeps/x86_64/multiarch/wcschr-evex.S
+new file mode 100644
+index 0000000000..7cb8f1e41a
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/wcschr-evex.S
+@@ -0,0 +1,3 @@
++#define STRCHR __wcschr_evex
++#define USE_AS_WCSCHR 1
++#include "strchr-evex.S"
+diff --git a/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S
+new file mode 100644
+index 0000000000..d6ca2b8064
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S
+@@ -0,0 +1,4 @@
++#define STRCMP __wcscmp_avx2_rtm
++#define USE_AS_WCSCMP 1
++
++#include "strcmp-avx2-rtm.S"
+diff --git a/sysdeps/x86_64/multiarch/wcscmp-evex.S b/sysdeps/x86_64/multiarch/wcscmp-evex.S
+new file mode 100644
+index 0000000000..42e73e51eb
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/wcscmp-evex.S
+@@ -0,0 +1,4 @@
++#define STRCMP __wcscmp_evex
++#define USE_AS_WCSCMP 1
++
++#include "strcmp-evex.S"
+diff --git a/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S
+new file mode 100644
+index 0000000000..35658d7365
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S
+@@ -0,0 +1,4 @@
++#define STRLEN __wcslen_avx2_rtm
++#define USE_AS_WCSLEN 1
++
++#include "strlen-avx2-rtm.S"
+diff --git a/sysdeps/x86_64/multiarch/wcslen-evex.S b/sysdeps/x86_64/multiarch/wcslen-evex.S
+new file mode 100644
+index 0000000000..bdafa83bd5
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/wcslen-evex.S
+@@ -0,0 +1,4 @@
++#define STRLEN __wcslen_evex
++#define USE_AS_WCSLEN 1
++
++#include "strlen-evex.S"
+diff --git a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
+new file mode 100644
+index 0000000000..7e62621afc
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
+@@ -0,0 +1,4 @@
++#define AS_WCSLEN
++#define strlen __wcslen_sse4_1
++
++#include "strlen-vec.S"
+diff --git a/sysdeps/x86_64/multiarch/wcslen.c b/sysdeps/x86_64/multiarch/wcslen.c
+index bb97438c7f..26b5fdffd6 100644
+--- a/sysdeps/x86_64/multiarch/wcslen.c
++++ b/sysdeps/x86_64/multiarch/wcslen.c
+@@ -24,7 +24,7 @@
+ # undef __wcslen
+
+ # define SYMBOL_NAME wcslen
+-# include "ifunc-avx2.h"
++# include "ifunc-wcslen.h"
+
+ libc_ifunc_redirected (__redirect_wcslen, __wcslen, IFUNC_SELECTOR ());
+ weak_alias (__wcslen, wcslen);
+diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
+new file mode 100644
+index 0000000000..4e88c70cc6
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
+@@ -0,0 +1,5 @@
++#define STRCMP __wcsncmp_avx2_rtm
++#define USE_AS_STRNCMP 1
++#define USE_AS_WCSCMP 1
++
++#include "strcmp-avx2-rtm.S"
+diff --git a/sysdeps/x86_64/multiarch/wcsncmp-evex.S b/sysdeps/x86_64/multiarch/wcsncmp-evex.S
+new file mode 100644
+index 0000000000..8a8e310713
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/wcsncmp-evex.S
+@@ -0,0 +1,5 @@
++#define STRCMP __wcsncmp_evex
++#define USE_AS_STRNCMP 1
++#define USE_AS_WCSCMP 1
++
++#include "strcmp-evex.S"
+diff --git a/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S
+new file mode 100644
+index 0000000000..7437ebee2d
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S
+@@ -0,0 +1,5 @@
++#define STRLEN __wcsnlen_avx2_rtm
++#define USE_AS_WCSLEN 1
++#define USE_AS_STRNLEN 1
++
++#include "strlen-avx2-rtm.S"
+diff --git a/sysdeps/x86_64/multiarch/wcsnlen-evex.S b/sysdeps/x86_64/multiarch/wcsnlen-evex.S
+new file mode 100644
+index 0000000000..24773bb4e2
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/wcsnlen-evex.S
+@@ -0,0 +1,5 @@
++#define STRLEN __wcsnlen_evex
++#define USE_AS_WCSLEN 1
++#define USE_AS_STRNLEN 1
++
++#include "strlen-evex.S"
+diff --git a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
+index a8cab0cb00..5fa51fe07c 100644
+--- a/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
++++ b/sysdeps/x86_64/multiarch/wcsnlen-sse4_1.S
+@@ -2,4 +2,4 @@
+ #define AS_STRNLEN
+ #define strlen __wcsnlen_sse4_1
+
+-#include "../strlen.S"
++#include "strlen-vec.S"
+diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
+index 52e7e5d4f3..f15c1b328b 100644
+--- a/sysdeps/x86_64/multiarch/wcsnlen.c
++++ b/sysdeps/x86_64/multiarch/wcsnlen.c
+@@ -24,27 +24,7 @@
+ # undef __wcsnlen
+
+ # define SYMBOL_NAME wcsnlen
+-# include <init-arch.h>
+-
+-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
+-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
+-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
+-
+-static inline void *
+-IFUNC_SELECTOR (void)
+-{
+- const struct cpu_features* cpu_features = __get_cpu_features ();
+-
+- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER)
+- && CPU_FEATURE_USABLE_P (cpu_features, AVX2)
+- && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
+- return OPTIMIZE (avx2);
+-
+- if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
+- return OPTIMIZE (sse4_1);
+-
+- return OPTIMIZE (sse2);
+-}
++# include "ifunc-wcslen.h"
+
+ libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
+ weak_alias (__wcsnlen, wcsnlen);
+diff --git a/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S
+new file mode 100644
+index 0000000000..9bf760833f
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S
+@@ -0,0 +1,3 @@
++#define STRRCHR __wcsrchr_avx2_rtm
++#define USE_AS_WCSRCHR 1
++#include "strrchr-avx2-rtm.S"
+diff --git a/sysdeps/x86_64/multiarch/wcsrchr-evex.S b/sysdeps/x86_64/multiarch/wcsrchr-evex.S
+new file mode 100644
+index 0000000000..c64602f7dc
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/wcsrchr-evex.S
+@@ -0,0 +1,3 @@
++#define STRRCHR __wcsrchr_evex
++#define USE_AS_WCSRCHR 1
++#include "strrchr-evex.S"
+diff --git a/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S
+new file mode 100644
+index 0000000000..58ed21db01
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S
+@@ -0,0 +1,4 @@
++#define MEMCHR __wmemchr_avx2_rtm
++#define USE_AS_WMEMCHR 1
++
++#include "memchr-avx2-rtm.S"
+diff --git a/sysdeps/x86_64/multiarch/wmemchr-evex.S b/sysdeps/x86_64/multiarch/wmemchr-evex.S
+new file mode 100644
+index 0000000000..06cd0f9f5a
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/wmemchr-evex.S
+@@ -0,0 +1,4 @@
++#define MEMCHR __wmemchr_evex
++#define USE_AS_WMEMCHR 1
++
++#include "memchr-evex.S"
+diff --git a/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S b/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S
+new file mode 100644
+index 0000000000..31104d1215
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S
+@@ -0,0 +1,4 @@
++#define MEMCMP __wmemcmp_avx2_movbe_rtm
++#define USE_AS_WMEMCMP 1
++
++#include "memcmp-avx2-movbe-rtm.S"
+diff --git a/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S b/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S
+new file mode 100644
+index 0000000000..4726d74aa1
+--- /dev/null
++++ b/sysdeps/x86_64/multiarch/wmemcmp-evex-movbe.S
+@@ -0,0 +1,4 @@
++#define MEMCMP __wmemcmp_evex_movbe
++#define USE_AS_WMEMCMP 1
++
++#include "memcmp-evex-movbe.S"
+diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
+index 2e226d0d55..8422c15cc8 100644
+--- a/sysdeps/x86_64/strlen.S
++++ b/sysdeps/x86_64/strlen.S
+@@ -1,5 +1,5 @@
+-/* SSE2 version of strlen/wcslen.
+- Copyright (C) 2012-2020 Free Software Foundation, Inc.
++/* SSE2 version of strlen.
++ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+@@ -16,243 +16,6 @@
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+-#include <sysdep.h>
++#include "multiarch/strlen-vec.S"
+
+-#ifdef AS_WCSLEN
+-# define PMINU pminud
+-# define PCMPEQ pcmpeqd
+-# define SHIFT_RETURN shrq $2, %rax
+-#else
+-# define PMINU pminub
+-# define PCMPEQ pcmpeqb
+-# define SHIFT_RETURN
+-#endif
+-
+-/* Long lived register in strlen(s), strnlen(s, n) are:
+-
+- %xmm3 - zero
+- %rdi - s
+- %r10 (s+n) & (~(64-1))
+- %r11 s+n
+-*/
+-
+-
+-.text
+-ENTRY(strlen)
+-
+-/* Test 64 bytes from %rax for zero. Save result as bitmask in %rdx. */
+-#define FIND_ZERO \
+- PCMPEQ (%rax), %xmm0; \
+- PCMPEQ 16(%rax), %xmm1; \
+- PCMPEQ 32(%rax), %xmm2; \
+- PCMPEQ 48(%rax), %xmm3; \
+- pmovmskb %xmm0, %esi; \
+- pmovmskb %xmm1, %edx; \
+- pmovmskb %xmm2, %r8d; \
+- pmovmskb %xmm3, %ecx; \
+- salq $16, %rdx; \
+- salq $16, %rcx; \
+- orq %rsi, %rdx; \
+- orq %r8, %rcx; \
+- salq $32, %rcx; \
+- orq %rcx, %rdx;
+-
+-#ifdef AS_STRNLEN
+-/* Do not read anything when n==0. */
+- test %RSI_LP, %RSI_LP
+- jne L(n_nonzero)
+- xor %rax, %rax
+- ret
+-L(n_nonzero):
+-# ifdef AS_WCSLEN
+- shl $2, %RSI_LP
+-# endif
+-
+-/* Initialize long lived registers. */
+-
+- add %RDI_LP, %RSI_LP
+- mov %RSI_LP, %R10_LP
+- and $-64, %R10_LP
+- mov %RSI_LP, %R11_LP
+-#endif
+-
+- pxor %xmm0, %xmm0
+- pxor %xmm1, %xmm1
+- pxor %xmm2, %xmm2
+- pxor %xmm3, %xmm3
+- movq %rdi, %rax
+- movq %rdi, %rcx
+- andq $4095, %rcx
+-/* Offsets 4032-4047 will be aligned into 4032 thus fit into page. */
+- cmpq $4047, %rcx
+-/* We cannot unify this branching as it would be ~6 cycles slower. */
+- ja L(cross_page)
+-
+-#ifdef AS_STRNLEN
+-/* Test if end is among first 64 bytes. */
+-# define STRNLEN_PROLOG \
+- mov %r11, %rsi; \
+- subq %rax, %rsi; \
+- andq $-64, %rax; \
+- testq $-64, %rsi; \
+- je L(strnlen_ret)
+-#else
+-# define STRNLEN_PROLOG andq $-64, %rax;
+-#endif
+-
+-/* Ignore bits in mask that come before start of string. */
+-#define PROLOG(lab) \
+- movq %rdi, %rcx; \
+- xorq %rax, %rcx; \
+- STRNLEN_PROLOG; \
+- sarq %cl, %rdx; \
+- test %rdx, %rdx; \
+- je L(lab); \
+- bsfq %rdx, %rax; \
+- SHIFT_RETURN; \
+- ret
+-
+-#ifdef AS_STRNLEN
+- andq $-16, %rax
+- FIND_ZERO
+-#else
+- /* Test first 16 bytes unaligned. */
+- movdqu (%rax), %xmm4
+- PCMPEQ %xmm0, %xmm4
+- pmovmskb %xmm4, %edx
+- test %edx, %edx
+- je L(next48_bytes)
+- bsf %edx, %eax /* If eax is zeroed 16bit bsf can be used. */
+- SHIFT_RETURN
+- ret
+-
+-L(next48_bytes):
+-/* Same as FIND_ZERO except we do not check first 16 bytes. */
+- andq $-16, %rax
+- PCMPEQ 16(%rax), %xmm1
+- PCMPEQ 32(%rax), %xmm2
+- PCMPEQ 48(%rax), %xmm3
+- pmovmskb %xmm1, %edx
+- pmovmskb %xmm2, %r8d
+- pmovmskb %xmm3, %ecx
+- salq $16, %rdx
+- salq $16, %rcx
+- orq %r8, %rcx
+- salq $32, %rcx
+- orq %rcx, %rdx
+-#endif
+-
+- /* When no zero byte is found xmm1-3 are zero so we do not have to
+- zero them. */
+- PROLOG(loop)
+-
+- .p2align 4
+-L(cross_page):
+- andq $-64, %rax
+- FIND_ZERO
+- PROLOG(loop_init)
+-
+-#ifdef AS_STRNLEN
+-/* We must do this check to correctly handle strnlen (s, -1). */
+-L(strnlen_ret):
+- bts %rsi, %rdx
+- sarq %cl, %rdx
+- test %rdx, %rdx
+- je L(loop_init)
+- bsfq %rdx, %rax
+- SHIFT_RETURN
+- ret
+-#endif
+- .p2align 4
+-L(loop_init):
+- pxor %xmm1, %xmm1
+- pxor %xmm2, %xmm2
+- pxor %xmm3, %xmm3
+-#ifdef AS_STRNLEN
+- .p2align 4
+-L(loop):
+-
+- addq $64, %rax
+- cmpq %rax, %r10
+- je L(exit_end)
+-
+- movdqa (%rax), %xmm0
+- PMINU 16(%rax), %xmm0
+- PMINU 32(%rax), %xmm0
+- PMINU 48(%rax), %xmm0
+- PCMPEQ %xmm3, %xmm0
+- pmovmskb %xmm0, %edx
+- testl %edx, %edx
+- jne L(exit)
+- jmp L(loop)
+-
+- .p2align 4
+-L(exit_end):
+- cmp %rax, %r11
+- je L(first) /* Do not read when end is at page boundary. */
+- pxor %xmm0, %xmm0
+- FIND_ZERO
+-
+-L(first):
+- bts %r11, %rdx
+- bsfq %rdx, %rdx
+- addq %rdx, %rax
+- subq %rdi, %rax
+- SHIFT_RETURN
+- ret
+-
+- .p2align 4
+-L(exit):
+- pxor %xmm0, %xmm0
+- FIND_ZERO
+-
+- bsfq %rdx, %rdx
+- addq %rdx, %rax
+- subq %rdi, %rax
+- SHIFT_RETURN
+- ret
+-
+-#else
+-
+- /* Main loop. Unrolled twice to improve L2 cache performance on core2. */
+- .p2align 4
+-L(loop):
+-
+- movdqa 64(%rax), %xmm0
+- PMINU 80(%rax), %xmm0
+- PMINU 96(%rax), %xmm0
+- PMINU 112(%rax), %xmm0
+- PCMPEQ %xmm3, %xmm0
+- pmovmskb %xmm0, %edx
+- testl %edx, %edx
+- jne L(exit64)
+-
+- subq $-128, %rax
+-
+- movdqa (%rax), %xmm0
+- PMINU 16(%rax), %xmm0
+- PMINU 32(%rax), %xmm0
+- PMINU 48(%rax), %xmm0
+- PCMPEQ %xmm3, %xmm0
+- pmovmskb %xmm0, %edx
+- testl %edx, %edx
+- jne L(exit0)
+- jmp L(loop)
+-
+- .p2align 4
+-L(exit64):
+- addq $64, %rax
+-L(exit0):
+- pxor %xmm0, %xmm0
+- FIND_ZERO
+-
+- bsfq %rdx, %rdx
+- addq %rdx, %rax
+- subq %rdi, %rax
+- SHIFT_RETURN
+- ret
+-
+-#endif
+-
+-END(strlen)
+ libc_hidden_builtin_def (strlen)
+diff --git a/sysdeps/x86_64/sysdep.h b/sysdeps/x86_64/sysdep.h
+index 0b73674f68..c8ad778fee 100644
+--- a/sysdeps/x86_64/sysdep.h
++++ b/sysdeps/x86_64/sysdep.h
+@@ -95,6 +95,28 @@ lose: \
+ #define R14_LP r14
+ #define R15_LP r15
+
++/* Zero upper vector registers and return with xtest. NB: Use VZEROALL
++ to avoid RTM abort triggered by VZEROUPPER inside transactionally. */
++#define ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST \
++ xtest; \
++ jz 1f; \
++ vzeroall; \
++ ret; \
++1: \
++ vzeroupper; \
++ ret
++
++/* Zero upper vector registers and return. */
++#ifndef ZERO_UPPER_VEC_REGISTERS_RETURN
++# define ZERO_UPPER_VEC_REGISTERS_RETURN \
++ VZEROUPPER; \
++ ret
++#endif
++
++#ifndef VZEROUPPER_RETURN
++# define VZEROUPPER_RETURN VZEROUPPER; ret
++#endif
++
+ #else /* __ASSEMBLER__ */
+
+ /* Long and pointer size in bytes. */
+diff --git a/sysdeps/x86_64/tst-rsi-strlen.c b/sysdeps/x86_64/tst-rsi-strlen.c
+new file mode 100644
+index 0000000000..a80c4f85c2
+--- /dev/null
++++ b/sysdeps/x86_64/tst-rsi-strlen.c
+@@ -0,0 +1,81 @@
++/* Test strlen with 0 in the RSI register.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#ifdef WIDE
++# define TEST_NAME "wcslen"
++#else
++# define TEST_NAME "strlen"
++#endif /* WIDE */
++
++#define TEST_MAIN
++#include <string/test-string.h>
++
++#ifdef WIDE
++# include <wchar.h>
++# define STRLEN wcslen
++# define CHAR wchar_t
++#else
++# define STRLEN strlen
++# define CHAR char
++#endif /* WIDE */
++
++IMPL (STRLEN, 1)
++
++typedef size_t (*proto_t) (const CHAR *);
++
++typedef struct
++{
++ void (*fn) (void);
++} parameter_t;
++
++size_t
++__attribute__ ((weak, noinline, noclone))
++do_strlen (parameter_t *a, int zero, const CHAR *str)
++{
++ return CALL (a, str);
++}
++
++static int
++test_main (void)
++{
++ test_init ();
++
++ size_t size = page_size / sizeof (CHAR) - 1;
++ CHAR *buf = (CHAR *) buf2;
++ buf[size] = 0;
++
++ parameter_t a;
++
++ int ret = 0;
++ FOR_EACH_IMPL (impl, 0)
++ {
++ a.fn = impl->fn;
++ /* NB: Pass 0 in RSI. */
++ size_t res = do_strlen (&a, 0, buf);
++ if (res != size)
++ {
++ error (0, 0, "Wrong result in function %s: %zu != %zu",
++ impl->name, res, size);
++ ret = 1;
++ }
++ }
++
++ return ret ? EXIT_FAILURE : EXIT_SUCCESS;
++}
++
++#include <support/test-driver.c>
+diff --git a/sysdeps/x86_64/tst-rsi-wcslen.c b/sysdeps/x86_64/tst-rsi-wcslen.c
+new file mode 100644
+index 0000000000..f45a7dfb51
+--- /dev/null
++++ b/sysdeps/x86_64/tst-rsi-wcslen.c
+@@ -0,0 +1,20 @@
++/* Test wcslen with 0 in the RSI register.
++ Copyright (C) 2021 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#define WIDE 1
++#include "tst-rsi-strlen.c"
+diff --git a/sysvipc/test-sysvsem.c b/sysvipc/test-sysvsem.c
+index 01dbff343a..b7284e0b48 100644
+--- a/sysvipc/test-sysvsem.c
++++ b/sysvipc/test-sysvsem.c
+@@ -20,6 +20,7 @@
+ #include <stdlib.h>
+ #include <errno.h>
+ #include <string.h>
++#include <stdbool.h>
+ #include <sys/types.h>
+ #include <sys/ipc.h>
+ #include <sys/sem.h>
+diff --git a/version.h b/version.h
+index 83cd196798..e6ca7a8857 100644
+--- a/version.h
++++ b/version.h
+@@ -1,4 +1,4 @@
+ /* This file just defines the current version number of libc. */
+
+-#define RELEASE "release"
++#define RELEASE "stable"
+ #define VERSION "2.32"
+diff -pruN glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/64/configure glibc-2.32/sysdeps/unix/sysv/linux/x86_64/64/configure
+--- glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/64/configure 2021-09-18 21:02:32.741186019 +1000
++++ glibc-2.32/sysdeps/unix/sysv/linux/x86_64/64/configure 2021-09-18 21:03:05.314302356 +1000
+@@ -4,10 +4,10 @@
+ test -n "$libc_cv_slibdir" ||
+ case "$prefix" in
+ /usr | /usr/)
+- libc_cv_slibdir='/lib64'
+- libc_cv_rtlddir='/lib64'
++ libc_cv_slibdir='/lib'
++ libc_cv_rtlddir='/lib'
+ if test "$libdir" = '${exec_prefix}/lib'; then
+- libdir='${exec_prefix}/lib64';
++ libdir='${exec_prefix}/lib';
+ # Locale data can be shared between 32-bit and 64-bit libraries.
+ libc_cv_complocaledir='${exec_prefix}/lib/locale'
+ fi
+diff -pruN glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/ldconfig.h glibc-2.32/sysdeps/unix/sysv/linux/x86_64/ldconfig.h
+--- glibc-2.32.orig/sysdeps/unix/sysv/linux/x86_64/ldconfig.h 2021-09-18 21:02:32.742186053 +1000
++++ glibc-2.32/sysdeps/unix/sysv/linux/x86_64/ldconfig.h 2021-09-18 21:03:05.314302356 +1000
+@@ -18,9 +18,9 @@
+ #include <sysdeps/generic/ldconfig.h>
+
+ #define SYSDEP_KNOWN_INTERPRETER_NAMES \
+- { "/lib/ld-linux.so.2", FLAG_ELF_LIBC6 }, \
++ { "/lib32/ld-linux.so.2", FLAG_ELF_LIBC6 }, \
+ { "/libx32/ld-linux-x32.so.2", FLAG_ELF_LIBC6 }, \
+- { "/lib64/ld-linux-x86-64.so.2", FLAG_ELF_LIBC6 },
++ { "/lib/ld-linux-x86-64.so.2", FLAG_ELF_LIBC6 },
+ #define SYSDEP_KNOWN_LIBRARY_NAMES \
+ { "libc.so.6", FLAG_ELF_LIBC6 }, \
+ { "libm.so.6", FLAG_ELF_LIBC6 },

Generated by cgit