[00/16] Linux v4.16 and v4.17 updates

Submitted by Szabolcs Nagy on June 14, 2018, 11:03 p.m.

Details

Message ID 20180614230337.GR4418@port70.net
State New
Series "Linux v4.16 and v4.17 updates"
Headers show

Patch hide | download patch | download mbox

From 9e18e6e7854d17aa1639d6145584df27f4b4f856 Mon Sep 17 00:00:00 2001
From: Szabolcs Nagy <nsz@port70.net>
Date: Sat, 28 Apr 2018 17:25:41 +0000
Subject: [PATCH 16/16] Add memfd_create, mlock2 and pkey_* apis

This patch adds support for the following linux syscalls and related
interfaces following glibc:

memfd_create (linux v3.17)
mlock2 (linux v4.4)
pkey_alloc (linux v4.9)
pkey_free (linux v4.9)
pkey_mprotect (linux v4.9)
pkey_get (glibc 2.27)
pkey_set (glibc 2.27)

pkey_get / pkey_set are glibc apis, not syscalls, only an always
failing generic implementation is provided in this patch, for pkey_
apis to be useful these will need target specific implementations.

MLOCK_ONFAULT is moved under _GNU_SOURCE following glibc.

Similar to glibc, mlock2 and pkey_mprotect have fallbacks to mlock and
mprotect respectively in case of special arguments.
---
 arch/powerpc/bits/mman.h   |  4 ++++
 arch/powerpc64/bits/mman.h |  4 ++++
 include/sys/mman.h         | 24 +++++++++++++++++++++---
 src/linux/memfd_create.c   |  8 ++++++++
 src/linux/mlock2.c         | 11 +++++++++++
 src/linux/pkey_alloc.c     | 22 ++++++++++++++++++++++
 src/linux/pkey_get.c       |  9 +++++++++
 src/linux/pkey_mprotect.c  | 15 +++++++++++++++
 src/linux/pkey_set.c       |  9 +++++++++
 9 files changed, 103 insertions(+), 3 deletions(-)
 create mode 100644 src/linux/memfd_create.c
 create mode 100644 src/linux/mlock2.c
 create mode 100644 src/linux/pkey_alloc.c
 create mode 100644 src/linux/pkey_get.c
 create mode 100644 src/linux/pkey_mprotect.c
 create mode 100644 src/linux/pkey_set.c

diff --git a/arch/powerpc/bits/mman.h b/arch/powerpc/bits/mman.h
index b3a675a8..23e18eb1 100644
--- a/arch/powerpc/bits/mman.h
+++ b/arch/powerpc/bits/mman.h
@@ -12,3 +12,7 @@ 
 #define MCL_FUTURE      0x4000
 #undef MCL_ONFAULT
 #define MCL_ONFAULT     0x8000
+
+#ifdef _GNU_SOURCE
+#define PKEY_DISABLE_EXECUTE   0x4
+#endif
diff --git a/arch/powerpc64/bits/mman.h b/arch/powerpc64/bits/mman.h
index b3a675a8..23e18eb1 100644
--- a/arch/powerpc64/bits/mman.h
+++ b/arch/powerpc64/bits/mman.h
@@ -12,3 +12,7 @@ 
 #define MCL_FUTURE      0x4000
 #undef MCL_ONFAULT
 #define MCL_ONFAULT     0x8000
+
+#ifdef _GNU_SOURCE
+#define PKEY_DISABLE_EXECUTE   0x4
+#endif
diff --git a/include/sys/mman.h b/include/sys/mman.h
index 19dd844e..643f882d 100644
--- a/include/sys/mman.h
+++ b/include/sys/mman.h
@@ -94,6 +94,20 @@  extern "C" {
 #define MADV_SOFT_OFFLINE 101
 #endif
 
+#ifdef _GNU_SOURCE
+#define MREMAP_MAYMOVE 1
+#define MREMAP_FIXED 2
+
+#define PKEY_DISABLE_ACCESS 0x1
+#define PKEY_DISABLE_WRITE 0x2
+
+#define MLOCK_ONFAULT 0x01
+
+#define MFD_CLOEXEC 0x0001U
+#define MFD_ALLOW_SEALING 0x0002U
+#define MFD_HUGETLB 0x0004U
+#endif
+
 #include <bits/mman.h>
 
 void *mmap (void *, size_t, int, int, int, off_t);
@@ -110,14 +124,18 @@  int mlockall (int);
 int munlockall (void);
 
 #ifdef _GNU_SOURCE
-#define MREMAP_MAYMOVE 1
-#define MREMAP_FIXED 2
 void *mremap (void *, size_t, size_t, int, ...);
 int remap_file_pages (void *, size_t, int, size_t, int);
+int memfd_create (const char *, unsigned);
+int mlock2 (const void *, size_t, unsigned);
+int pkey_alloc (unsigned, unsigned);
+int pkey_free (int);
+int pkey_mprotect (void *, size_t, int, int);
+int pkey_get (int);
+int pkey_set (int, unsigned);
 #endif
 
 #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
-#define MLOCK_ONFAULT   0x01
 int madvise (void *, size_t, int);
 int mincore (void *, size_t, unsigned char *);
 #endif
diff --git a/src/linux/memfd_create.c b/src/linux/memfd_create.c
new file mode 100644
index 00000000..1649fe55
--- /dev/null
+++ b/src/linux/memfd_create.c
@@ -0,0 +1,8 @@ 
+#define _GNU_SOURCE 1
+#include <sys/mman.h>
+#include "syscall.h"
+
+int memfd_create(const char *name, unsigned flags)
+{
+	return syscall(SYS_memfd_create, name, flags);
+}
diff --git a/src/linux/mlock2.c b/src/linux/mlock2.c
new file mode 100644
index 00000000..e1235c46
--- /dev/null
+++ b/src/linux/mlock2.c
@@ -0,0 +1,11 @@ 
+#define _GNU_SOURCE 1
+#include <sys/mman.h>
+#include <errno.h>
+#include "syscall.h"
+
+int mlock2(const void *addr, size_t len, unsigned flags)
+{
+	if (flags == 0)
+		return mlock(addr, len);
+	return syscall(SYS_mlock2, addr, len, flags);
+}
diff --git a/src/linux/pkey_alloc.c b/src/linux/pkey_alloc.c
new file mode 100644
index 00000000..8027cd12
--- /dev/null
+++ b/src/linux/pkey_alloc.c
@@ -0,0 +1,22 @@ 
+#define _GNU_SOURCE 1
+#include <sys/mman.h>
+#include <errno.h>
+#include "syscall.h"
+
+int pkey_alloc(unsigned flags, unsigned access)
+{
+#ifdef SYS_pkey_alloc
+	return syscall(SYS_pkey_alloc, flags, access);
+#else
+	return __syscall_ret(-ENOSYS);
+#endif
+}
+
+int pkey_free(int pkey)
+{
+#ifdef SYS_pkey_free
+	return syscall(SYS_pkey_free, pkey);
+#else
+	return __syscall_ret(-ENOSYS);
+#endif
+}
diff --git a/src/linux/pkey_get.c b/src/linux/pkey_get.c
new file mode 100644
index 00000000..d583fa9c
--- /dev/null
+++ b/src/linux/pkey_get.c
@@ -0,0 +1,9 @@ 
+#define _GNU_SOURCE 1
+#include <sys/mman.h>
+#include <errno.h>
+#include "syscall.h"
+
+int pkey_get(int pkey)
+{
+	return __syscall_ret(-ENOSYS);
+}
diff --git a/src/linux/pkey_mprotect.c b/src/linux/pkey_mprotect.c
new file mode 100644
index 00000000..0a1f97ad
--- /dev/null
+++ b/src/linux/pkey_mprotect.c
@@ -0,0 +1,15 @@ 
+#define _GNU_SOURCE 1
+#include <sys/mman.h>
+#include <errno.h>
+#include "syscall.h"
+
+int pkey_mprotect(void *addr, size_t len, int prot, int pkey)
+{
+	if (pkey == -1)
+		return mprotect(addr, len, prot);
+#ifdef SYS_pkey_mprotect
+	return syscall(SYS_pkey_mprotect, addr, len, prot, pkey);
+#else
+	return __syscall_ret(-ENOSYS);
+#endif
+}
diff --git a/src/linux/pkey_set.c b/src/linux/pkey_set.c
new file mode 100644
index 00000000..addf8da5
--- /dev/null
+++ b/src/linux/pkey_set.c
@@ -0,0 +1,9 @@ 
+#define _GNU_SOURCE 1
+#include <sys/mman.h>
+#include <errno.h>
+#include "syscall.h"
+
+int pkey_set(int pkey, unsigned access)
+{
+	return __syscall_ret(-ENOSYS);
+}
-- 
2.16.3


Comments

Rich Felker June 19, 2018, 5:33 p.m.
On Fri, Jun 15, 2018 at 01:03:38AM +0200, Szabolcs Nagy wrote:
> respin of the v4.16 patches
> http://www.openwall.com/lists/musl/2018/04/28/1
> without the siginfo changes and with v4.17 additions.
> 
> the last patch is adding missing memory mapping related
> syscall wrappers, the pkey* support is incomplete.
> 
> Szabolcs Nagy (16):
>   sys/epoll.h: add EPOLLNVAL from linux v4.16
>   netinet/if_ether.h: add ETH_P_ERSPAN2 from linux v4.16
>   netinet/if_ether.h: add ETH_TLEN from linux v4.16
>   sys/ptrace.h: add PTRACE_SECCOMP_GET_METADATA from linux v4.16
>   aarch64: add HWCAP_ASIMDFHM from linux v4.16
>   powerpc: add pkey syscall numbers from linux v4.16
>   add MAP_FIXED_NOREPLACE from linux v4.17
>   add {MSG,SEM,SHM}_STAT_ANY from linux v4.17
>   add TCP_NLA_* from linux v4.17
>   add ETH_P_PREAUTH ethertype from linux v4.17
>   add speculation control prctls from linux v4.17
>   aarch64: add HWCAP_ flags from linux v4.17
>   mips: add HWCAP_ flags from linux v4.17
>   s390x: add kexec_file_load syscall number from linux v4.17

All of these look fine, committing as-is.

>   add si_codes from linux v4.17
>   Add memfd_create, mlock2 and pkey_* apis

See below:

> From 45493abeb3918e3722ef8d75bf231ef155a2b755 Mon Sep 17 00:00:00 2001
> From: Szabolcs Nagy <nsz@port70.net>
> Date: Sat, 9 Jun 2018 23:22:51 +0000
> Subject: [PATCH 15/16] add si_codes from linux v4.17
> 
> target specific si_codes were made generic in linux commits
> a402ab8cc7b0578c445f348c9010e62ab390bee8 (ia64) and
> 75abf64287cab73546382a4fa7fa2f4e3516efeb (parisc)
> ---
>  include/signal.h | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
> 
> diff --git a/include/signal.h b/include/signal.h
> index a4f85cca..69395468 100644
> --- a/include/signal.h
> +++ b/include/signal.h
> @@ -61,6 +61,13 @@ typedef struct sigaltstack stack_t;
>  #define FPE_FLTRES 6
>  #define FPE_FLTINV 7
>  #define FPE_FLTSUB 8
> +#define __FPE_DECOVF 9
> +#define __FPE_DECDIV 10
> +#define __FPE_DECERR 11
> +#define __FPE_INVASC 12
> +#define __FPE_INVDEC 13
> +#define FPE_FLTUNK 14
> +#define FPE_CONDTRAP 15
>  
>  #define ILL_ILLOPC 1
>  #define ILL_ILLOPN 2
> @@ -70,11 +77,17 @@ typedef struct sigaltstack stack_t;
>  #define ILL_PRVREG 6
>  #define ILL_COPROC 7
>  #define ILL_BADSTK 8
> +#define ILL_BADIADDR 9
> +#define __ILL_BREAK 10
> +#define __ILL_BNDMOD 11

Is there a reason some of these are __-prefixed? They don't need too
be (FPE_* and ILL_* are reserved for signal.h) and it seems wrong that
macros intended to be used by the application are in the
reserved-for-implementation-use namespace rather than the public one
for the interface.

> From 9e18e6e7854d17aa1639d6145584df27f4b4f856 Mon Sep 17 00:00:00 2001
> From: Szabolcs Nagy <nsz@port70.net>
> Date: Sat, 28 Apr 2018 17:25:41 +0000
> Subject: [PATCH 16/16] Add memfd_create, mlock2 and pkey_* apis
> 
> This patch adds support for the following linux syscalls and related
> interfaces following glibc:
> 
> memfd_create (linux v3.17)
> mlock2 (linux v4.4)
> pkey_alloc (linux v4.9)
> pkey_free (linux v4.9)
> pkey_mprotect (linux v4.9)
> pkey_get (glibc 2.27)
> pkey_set (glibc 2.27)
> 
> pkey_get / pkey_set are glibc apis, not syscalls, only an always
> failing generic implementation is provided in this patch, for pkey_
> apis to be useful these will need target specific implementations.
> 
> MLOCK_ONFAULT is moved under _GNU_SOURCE following glibc.
> 
> Similar to glibc, mlock2 and pkey_mprotect have fallbacks to mlock and
> mprotect respectively in case of special arguments.

I'm holding off on this one just because I don't understand the pkey
stuff, and it looks like it's not actually functional without
additional arch-specific code.

It might also make sense to split this into 2 or 3 patches, since
memfd_create and mlock2 seem like separate, simple functionality
unrelated to the pkey stuff.

Rich
Szabolcs Nagy June 19, 2018, 7:55 p.m.
* Rich Felker <dalias@libc.org> [2018-06-19 13:33:15 -0400]:
> On Fri, Jun 15, 2018 at 01:03:38AM +0200, Szabolcs Nagy wrote:
> > From 45493abeb3918e3722ef8d75bf231ef155a2b755 Mon Sep 17 00:00:00 2001
> > From: Szabolcs Nagy <nsz@port70.net>
> > Date: Sat, 9 Jun 2018 23:22:51 +0000
> > Subject: [PATCH 15/16] add si_codes from linux v4.17
> > 
> > target specific si_codes were made generic in linux commits
> > a402ab8cc7b0578c445f348c9010e62ab390bee8 (ia64) and
> > 75abf64287cab73546382a4fa7fa2f4e3516efeb (parisc)
> > ---
> >  include/signal.h | 13 +++++++++++++
> >  1 file changed, 13 insertions(+)
> > 
> > diff --git a/include/signal.h b/include/signal.h
> > index a4f85cca..69395468 100644
> > --- a/include/signal.h
> > +++ b/include/signal.h
> > @@ -61,6 +61,13 @@ typedef struct sigaltstack stack_t;
> >  #define FPE_FLTRES 6
> >  #define FPE_FLTINV 7
> >  #define FPE_FLTSUB 8
> > +#define __FPE_DECOVF 9
> > +#define __FPE_DECDIV 10
> > +#define __FPE_DECERR 11
> > +#define __FPE_INVASC 12
> > +#define __FPE_INVDEC 13
> > +#define FPE_FLTUNK 14
> > +#define FPE_CONDTRAP 15
> >  
> >  #define ILL_ILLOPC 1
> >  #define ILL_ILLOPN 2
> > @@ -70,11 +77,17 @@ typedef struct sigaltstack stack_t;
> >  #define ILL_PRVREG 6
> >  #define ILL_COPROC 7
> >  #define ILL_BADSTK 8
> > +#define ILL_BADIADDR 9
> > +#define __ILL_BREAK 10
> > +#define __ILL_BNDMOD 11
> 
> Is there a reason some of these are __-prefixed? They don't need too
> be (FPE_* and ILL_* are reserved for signal.h) and it seems wrong that
> macros intended to be used by the application are in the
> reserved-for-implementation-use namespace rather than the public one
> for the interface.
> 

this is how linux uapi exposes them

however it seems glibc has them without __ (and only on ia64)

i thought glibc included linux uapi but apparently it has its
own thing, so probably it's better if we don't do this.

> > From 9e18e6e7854d17aa1639d6145584df27f4b4f856 Mon Sep 17 00:00:00 2001
> > From: Szabolcs Nagy <nsz@port70.net>
> > Date: Sat, 28 Apr 2018 17:25:41 +0000
> > Subject: [PATCH 16/16] Add memfd_create, mlock2 and pkey_* apis
> > 
> > This patch adds support for the following linux syscalls and related
> > interfaces following glibc:
> > 
> > memfd_create (linux v3.17)
> > mlock2 (linux v4.4)
> > pkey_alloc (linux v4.9)
> > pkey_free (linux v4.9)
> > pkey_mprotect (linux v4.9)
> > pkey_get (glibc 2.27)
> > pkey_set (glibc 2.27)
> > 
> > pkey_get / pkey_set are glibc apis, not syscalls, only an always
> > failing generic implementation is provided in this patch, for pkey_
> > apis to be useful these will need target specific implementations.
> > 
> > MLOCK_ONFAULT is moved under _GNU_SOURCE following glibc.
> > 
> > Similar to glibc, mlock2 and pkey_mprotect have fallbacks to mlock and
> > mprotect respectively in case of special arguments.
> 
> I'm holding off on this one just because I don't understand the pkey
> stuff, and it looks like it's not actually functional without
> additional arch-specific code.
> 
> It might also make sense to split this into 2 or 3 patches, since
> memfd_create and mlock2 seem like separate, simple functionality
> unrelated to the pkey stuff.
> 

ok, i'll split those out.
Rich Felker June 20, 2018, 1:11 a.m.
On Tue, Jun 19, 2018 at 09:55:17PM +0200, Szabolcs Nagy wrote:
> * Rich Felker <dalias@libc.org> [2018-06-19 13:33:15 -0400]:
> > On Fri, Jun 15, 2018 at 01:03:38AM +0200, Szabolcs Nagy wrote:
> > > From 45493abeb3918e3722ef8d75bf231ef155a2b755 Mon Sep 17 00:00:00 2001
> > > From: Szabolcs Nagy <nsz@port70.net>
> > > Date: Sat, 9 Jun 2018 23:22:51 +0000
> > > Subject: [PATCH 15/16] add si_codes from linux v4.17
> > > 
> > > target specific si_codes were made generic in linux commits
> > > a402ab8cc7b0578c445f348c9010e62ab390bee8 (ia64) and
> > > 75abf64287cab73546382a4fa7fa2f4e3516efeb (parisc)
> > > ---
> > >  include/signal.h | 13 +++++++++++++
> > >  1 file changed, 13 insertions(+)
> > > 
> > > diff --git a/include/signal.h b/include/signal.h
> > > index a4f85cca..69395468 100644
> > > --- a/include/signal.h
> > > +++ b/include/signal.h
> > > @@ -61,6 +61,13 @@ typedef struct sigaltstack stack_t;
> > >  #define FPE_FLTRES 6
> > >  #define FPE_FLTINV 7
> > >  #define FPE_FLTSUB 8
> > > +#define __FPE_DECOVF 9
> > > +#define __FPE_DECDIV 10
> > > +#define __FPE_DECERR 11
> > > +#define __FPE_INVASC 12
> > > +#define __FPE_INVDEC 13
> > > +#define FPE_FLTUNK 14
> > > +#define FPE_CONDTRAP 15
> > >  
> > >  #define ILL_ILLOPC 1
> > >  #define ILL_ILLOPN 2
> > > @@ -70,11 +77,17 @@ typedef struct sigaltstack stack_t;
> > >  #define ILL_PRVREG 6
> > >  #define ILL_COPROC 7
> > >  #define ILL_BADSTK 8
> > > +#define ILL_BADIADDR 9
> > > +#define __ILL_BREAK 10
> > > +#define __ILL_BNDMOD 11
> > 
> > Is there a reason some of these are __-prefixed? They don't need too
> > be (FPE_* and ILL_* are reserved for signal.h) and it seems wrong that
> > macros intended to be used by the application are in the
> > reserved-for-implementation-use namespace rather than the public one
> > for the interface.
> > 
> 
> this is how linux uapi exposes them
> 
> however it seems glibc has them without __ (and only on ia64)
> 
> i thought glibc included linux uapi but apparently it has its
> own thing, so probably it's better if we don't do this.

If they're ia64 (itanic) only we can just omit them entirely, I think.

> > > From 9e18e6e7854d17aa1639d6145584df27f4b4f856 Mon Sep 17 00:00:00 2001
> > > From: Szabolcs Nagy <nsz@port70.net>
> > > Date: Sat, 28 Apr 2018 17:25:41 +0000
> > > Subject: [PATCH 16/16] Add memfd_create, mlock2 and pkey_* apis
> > > 
> > > This patch adds support for the following linux syscalls and related
> > > interfaces following glibc:
> > > 
> > > memfd_create (linux v3.17)
> > > mlock2 (linux v4.4)
> > > pkey_alloc (linux v4.9)
> > > pkey_free (linux v4.9)
> > > pkey_mprotect (linux v4.9)
> > > pkey_get (glibc 2.27)
> > > pkey_set (glibc 2.27)
> > > 
> > > pkey_get / pkey_set are glibc apis, not syscalls, only an always
> > > failing generic implementation is provided in this patch, for pkey_
> > > apis to be useful these will need target specific implementations.
> > > 
> > > MLOCK_ONFAULT is moved under _GNU_SOURCE following glibc.
> > > 
> > > Similar to glibc, mlock2 and pkey_mprotect have fallbacks to mlock and
> > > mprotect respectively in case of special arguments.
> > 
> > I'm holding off on this one just because I don't understand the pkey
> > stuff, and it looks like it's not actually functional without
> > additional arch-specific code.
> > 
> > It might also make sense to split this into 2 or 3 patches, since
> > memfd_create and mlock2 seem like separate, simple functionality
> > unrelated to the pkey stuff.
> > 
> 
> ok, i'll split those out.

Sounds good. Then I can merge the trivial ones and we can discuss the
pkey stuff and what the requirements for a working implementation are.

Rich