add statx

Submitted by Ben Noordhuis on Jan. 19, 2020, 12:12 p.m.

Details

Message ID 20200119121247.37310-1-info@bnoordhuis.nl
State New
Series "add statx"
Headers show

Commit Message

Ben Noordhuis Jan. 19, 2020, 12:12 p.m.
glibc exposes a wrapper for this system call. it's inconvenient that
musl doesn't so let's add one.

ref: https://github.com/rust-lang/rust/pull/67774
---
 include/fcntl.h    | 15 +++++++++++++++
 include/sys/stat.h | 34 ++++++++++++++++++++++++++++++++++
 src/stat/fstatat.c | 27 +--------------------------
 src/stat/statx.c   |  8 ++++++++
 4 files changed, 58 insertions(+), 26 deletions(-)
 create mode 100644 src/stat/statx.c

Patch hide | download patch | download mbox

diff --git a/include/fcntl.h b/include/fcntl.h
index b664cdc4..21050a65 100644
--- a/include/fcntl.h
+++ b/include/fcntl.h
@@ -106,6 +106,21 @@  int posix_fallocate(int, off_t, off_t);
 #define AT_STATX_DONT_SYNC 0x4000
 #define AT_RECURSIVE 0x8000
 
+#define STATX_TYPE 1U
+#define STATX_MODE 2U
+#define STATX_NLINK 4U
+#define STATX_UID 8U
+#define STATX_GID 0x10U
+#define STATX_ATIME 0x20U
+#define STATX_MTIME 0x40U
+#define STATX_CTIME 0x80U
+#define STATX_INO 0x100U
+#define STATX_SIZE 0x200U
+#define STATX_BLOCKS 0x400U
+#define STATX_BASIC_STATS 0x7ffU
+#define STATX_BTIME 0x800U
+#define STATX_ALL 0xfffU
+
 #define FAPPEND O_APPEND
 #define FFSYNC O_SYNC
 #define FASYNC O_ASYNC
diff --git a/include/sys/stat.h b/include/sys/stat.h
index 10d446c4..5db71590 100644
--- a/include/sys/stat.h
+++ b/include/sys/stat.h
@@ -5,6 +5,7 @@  extern "C" {
 #endif
 
 #include <features.h>
+#include <stdint.h>
 
 #define __NEED_dev_t
 #define __NEED_ino_t
@@ -70,6 +71,38 @@  extern "C" {
 #define UTIME_NOW  0x3fffffff
 #define UTIME_OMIT 0x3ffffffe
 
+#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
+struct statx_timestamp {
+	int64_t tv_sec;
+	uint32_t tv_nsec;
+	int32_t __pad;
+};
+
+struct statx {
+	uint32_t stx_mask;
+	uint32_t stx_blksize;
+	uint64_t stx_attributes;
+	uint32_t stx_nlink;
+	uint32_t stx_uid;
+	uint32_t stx_gid;
+	uint16_t stx_mode;
+	uint16_t __pad0[1];
+	uint64_t stx_ino;
+	uint64_t stx_size;
+	uint64_t stx_blocks;
+	uint64_t stx_attributes_mask;
+	struct statx_timestamp stx_atime;
+	struct statx_timestamp stx_btime;
+	struct statx_timestamp stx_ctime;
+	struct statx_timestamp stx_mtime;
+	uint32_t stx_rdev_major;
+	uint32_t stx_rdev_minor;
+	uint32_t stx_dev_major;
+	uint32_t stx_dev_minor;
+	uint64_t __pad1[14];
+};
+#endif
+
 int stat(const char *__restrict, struct stat *__restrict);
 int fstat(int, struct stat *);
 int lstat(const char *__restrict, struct stat *__restrict);
@@ -93,6 +126,7 @@  int utimensat(int, const char *, const struct timespec [2], int);
 
 #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
 int lchmod(const char *, mode_t);
+int statx(int, const char *__restrict, int, unsigned, struct statx *__restrict);
 #define S_IREAD S_IRUSR
 #define S_IWRITE S_IWUSR
 #define S_IEXEC S_IXUSR
diff --git a/src/stat/fstatat.c b/src/stat/fstatat.c
index de165b5c..ab22e4c6 100644
--- a/src/stat/fstatat.c
+++ b/src/stat/fstatat.c
@@ -8,36 +8,11 @@ 
 #include "syscall.h"
 #include "kstat.h"
 
-struct statx {
-	uint32_t stx_mask;
-	uint32_t stx_blksize;
-	uint64_t stx_attributes;
-	uint32_t stx_nlink;
-	uint32_t stx_uid;
-	uint32_t stx_gid;
-	uint16_t stx_mode;
-	uint16_t pad1;
-	uint64_t stx_ino;
-	uint64_t stx_size;
-	uint64_t stx_blocks;
-	uint64_t stx_attributes_mask;
-	struct {
-		int64_t tv_sec;
-		uint32_t tv_nsec;
-		int32_t pad;
-	} stx_atime, stx_btime, stx_ctime, stx_mtime;
-	uint32_t stx_rdev_major;
-	uint32_t stx_rdev_minor;
-	uint32_t stx_dev_major;
-	uint32_t stx_dev_minor;
-	uint64_t spare[14];
-};
-
 static int fstatat_statx(int fd, const char *restrict path, struct stat *restrict st, int flag)
 {
 	struct statx stx;
 
-	int ret = __syscall(SYS_statx, fd, path, flag, 0x7ff, &stx);
+	int ret = __syscall(SYS_statx, fd, path, flag, STATX_BASIC_STATS, &stx);
 	if (ret) return ret;
 
 	*st = (struct stat){
diff --git a/src/stat/statx.c b/src/stat/statx.c
new file mode 100644
index 00000000..36b45d33
--- /dev/null
+++ b/src/stat/statx.c
@@ -0,0 +1,8 @@ 
+#define _GNU_SOURCE
+#include <sys/stat.h>
+#include "syscall.h"
+
+int statx(int dirfd, const char *restrict path, int flags, unsigned mask, struct statx *restrict stx)
+{
+	return syscall(SYS_statx, dirfd, path, flags, mask, stx);
+}

Comments

Ben Noordhuis Jan. 24, 2020, 8:38 a.m.
On Sun, Jan 19, 2020 at 1:13 PM Ben Noordhuis <info@bnoordhuis.nl> wrote:
>
> glibc exposes a wrapper for this system call. it's inconvenient that
> musl doesn't so let's add one.
>
> ref: https://github.com/rust-lang/rust/pull/67774
> ---
>  include/fcntl.h    | 15 +++++++++++++++
>  include/sys/stat.h | 34 ++++++++++++++++++++++++++++++++++
>  src/stat/fstatat.c | 27 +--------------------------
>  src/stat/statx.c   |  8 ++++++++
>  4 files changed, 58 insertions(+), 26 deletions(-)
>  create mode 100644 src/stat/statx.c
>
> diff --git a/include/fcntl.h b/include/fcntl.h
> index b664cdc4..21050a65 100644
> --- a/include/fcntl.h
> +++ b/include/fcntl.h
> @@ -106,6 +106,21 @@ int posix_fallocate(int, off_t, off_t);
>  #define AT_STATX_DONT_SYNC 0x4000
>  #define AT_RECURSIVE 0x8000
>
> +#define STATX_TYPE 1U
> +#define STATX_MODE 2U
> +#define STATX_NLINK 4U
> +#define STATX_UID 8U
> +#define STATX_GID 0x10U
> +#define STATX_ATIME 0x20U
> +#define STATX_MTIME 0x40U
> +#define STATX_CTIME 0x80U
> +#define STATX_INO 0x100U
> +#define STATX_SIZE 0x200U
> +#define STATX_BLOCKS 0x400U
> +#define STATX_BASIC_STATS 0x7ffU
> +#define STATX_BTIME 0x800U
> +#define STATX_ALL 0xfffU
> +
>  #define FAPPEND O_APPEND
>  #define FFSYNC O_SYNC
>  #define FASYNC O_ASYNC
> diff --git a/include/sys/stat.h b/include/sys/stat.h
> index 10d446c4..5db71590 100644
> --- a/include/sys/stat.h
> +++ b/include/sys/stat.h
> @@ -5,6 +5,7 @@ extern "C" {
>  #endif
>
>  #include <features.h>
> +#include <stdint.h>
>
>  #define __NEED_dev_t
>  #define __NEED_ino_t
> @@ -70,6 +71,38 @@ extern "C" {
>  #define UTIME_NOW  0x3fffffff
>  #define UTIME_OMIT 0x3ffffffe
>
> +#if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
> +struct statx_timestamp {
> +       int64_t tv_sec;
> +       uint32_t tv_nsec;
> +       int32_t __pad;
> +};
> +
> +struct statx {
> +       uint32_t stx_mask;
> +       uint32_t stx_blksize;
> +       uint64_t stx_attributes;
> +       uint32_t stx_nlink;
> +       uint32_t stx_uid;
> +       uint32_t stx_gid;
> +       uint16_t stx_mode;
> +       uint16_t __pad0[1];
> +       uint64_t stx_ino;
> +       uint64_t stx_size;
> +       uint64_t stx_blocks;
> +       uint64_t stx_attributes_mask;
> +       struct statx_timestamp stx_atime;
> +       struct statx_timestamp stx_btime;
> +       struct statx_timestamp stx_ctime;
> +       struct statx_timestamp stx_mtime;
> +       uint32_t stx_rdev_major;
> +       uint32_t stx_rdev_minor;
> +       uint32_t stx_dev_major;
> +       uint32_t stx_dev_minor;
> +       uint64_t __pad1[14];
> +};
> +#endif
> +
>  int stat(const char *__restrict, struct stat *__restrict);
>  int fstat(int, struct stat *);
>  int lstat(const char *__restrict, struct stat *__restrict);
> @@ -93,6 +126,7 @@ int utimensat(int, const char *, const struct timespec [2], int);
>
>  #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
>  int lchmod(const char *, mode_t);
> +int statx(int, const char *__restrict, int, unsigned, struct statx *__restrict);
>  #define S_IREAD S_IRUSR
>  #define S_IWRITE S_IWUSR
>  #define S_IEXEC S_IXUSR
> diff --git a/src/stat/fstatat.c b/src/stat/fstatat.c
> index de165b5c..ab22e4c6 100644
> --- a/src/stat/fstatat.c
> +++ b/src/stat/fstatat.c
> @@ -8,36 +8,11 @@
>  #include "syscall.h"
>  #include "kstat.h"
>
> -struct statx {
> -       uint32_t stx_mask;
> -       uint32_t stx_blksize;
> -       uint64_t stx_attributes;
> -       uint32_t stx_nlink;
> -       uint32_t stx_uid;
> -       uint32_t stx_gid;
> -       uint16_t stx_mode;
> -       uint16_t pad1;
> -       uint64_t stx_ino;
> -       uint64_t stx_size;
> -       uint64_t stx_blocks;
> -       uint64_t stx_attributes_mask;
> -       struct {
> -               int64_t tv_sec;
> -               uint32_t tv_nsec;
> -               int32_t pad;
> -       } stx_atime, stx_btime, stx_ctime, stx_mtime;
> -       uint32_t stx_rdev_major;
> -       uint32_t stx_rdev_minor;
> -       uint32_t stx_dev_major;
> -       uint32_t stx_dev_minor;
> -       uint64_t spare[14];
> -};
> -
>  static int fstatat_statx(int fd, const char *restrict path, struct stat *restrict st, int flag)
>  {
>         struct statx stx;
>
> -       int ret = __syscall(SYS_statx, fd, path, flag, 0x7ff, &stx);
> +       int ret = __syscall(SYS_statx, fd, path, flag, STATX_BASIC_STATS, &stx);
>         if (ret) return ret;
>
>         *st = (struct stat){
> diff --git a/src/stat/statx.c b/src/stat/statx.c
> new file mode 100644
> index 00000000..36b45d33
> --- /dev/null
> +++ b/src/stat/statx.c
> @@ -0,0 +1,8 @@
> +#define _GNU_SOURCE
> +#include <sys/stat.h>
> +#include "syscall.h"
> +
> +int statx(int dirfd, const char *restrict path, int flags, unsigned mask, struct statx *restrict stx)
> +{
> +       return syscall(SYS_statx, dirfd, path, flags, mask, stx);
> +}
> --
> 2.23.0
>

Can I get some feedback on this patch, even if it's just "no because"? Thanks.
Rich Felker Jan. 24, 2020, 2 p.m.
On Sun, Jan 19, 2020 at 01:12:47PM +0100, Ben Noordhuis wrote:
> glibc exposes a wrapper for this system call. it's inconvenient that
> musl doesn't so let's add one.
> 
> ref: https://github.com/rust-lang/rust/pull/67774
> ---
>  include/fcntl.h    | 15 +++++++++++++++
>  include/sys/stat.h | 34 ++++++++++++++++++++++++++++++++++
>  src/stat/fstatat.c | 27 +--------------------------
>  src/stat/statx.c   |  8 ++++++++
>  4 files changed, 58 insertions(+), 26 deletions(-)
>  create mode 100644 src/stat/statx.c
> 
> diff --git a/include/fcntl.h b/include/fcntl.h
> index b664cdc4..21050a65 100644
> --- a/include/fcntl.h
> +++ b/include/fcntl.h
> @@ -106,6 +106,21 @@ int posix_fallocate(int, off_t, off_t);
>  #define AT_STATX_DONT_SYNC 0x4000
>  #define AT_RECURSIVE 0x8000
>  
> +#define STATX_TYPE 1U
> +#define STATX_MODE 2U
> +#define STATX_NLINK 4U
> +#define STATX_UID 8U
> +#define STATX_GID 0x10U
> +#define STATX_ATIME 0x20U
> +#define STATX_MTIME 0x40U
> +#define STATX_CTIME 0x80U
> +#define STATX_INO 0x100U
> +#define STATX_SIZE 0x200U
> +#define STATX_BLOCKS 0x400U
> +#define STATX_BASIC_STATS 0x7ffU
> +#define STATX_BTIME 0x800U
> +#define STATX_ALL 0xfffU
> +

This is under BSD||GNU (i.e. DEFAULT||ALL) rather than just under the
latter. Is there a reason for that? Generally the extensions that are
pretty clearly Linux-only, as opposed to something that other
POSIX-based OS's are likely to adopt, are put under GNU/ALL to
discourage their use without intent and to avoid namespace clutter.

Also, I don't think it makes sense for these to be in fcntl.h at all,
and I don't think there's precedent. glibc has them exposed via
sys/stat.h not fcntl.h. And it looks like glibc exposes them (and all
the statx stuff) only under _GNU_SOURCE:

#ifdef __USE_GNU
# include <bits/statx.h>
#endif

So in summary, unless there's strong reason to do differently, move to
sys/stat.h and make _GNU_SOURCE-only.

>  #define FAPPEND O_APPEND
>  #define FFSYNC O_SYNC
>  #define FASYNC O_ASYNC
> diff --git a/include/sys/stat.h b/include/sys/stat.h
> index 10d446c4..5db71590 100644
> --- a/include/sys/stat.h
> +++ b/include/sys/stat.h
> @@ -5,6 +5,7 @@ extern "C" {
>  #endif
>  
>  #include <features.h>
> +#include <stdint.h>

Extra indirect headers can't be pulled in unconditionally beyond
what's specified for sys/stat.h. Instead you could put it under
_GNU_SOURCE (this is another reason not to put it in default), but
what would be better is putting just the __NEED_* for the types you
want under _GNU_SOURCE here, e.g.

#ifdef _GNU_SOURCE
#define __NEED_uint16_t
#define __NEED_uint32_t
#define __NEED_uint64_t
#define __NEED_int32_t
#define __NEED_int64_t
#endif

>  int stat(const char *__restrict, struct stat *__restrict);
>  int fstat(int, struct stat *);
>  int lstat(const char *__restrict, struct stat *__restrict);
> @@ -93,6 +126,7 @@ int utimensat(int, const char *, const struct timespec [2], int);
>  
>  #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
>  int lchmod(const char *, mode_t);
> +int statx(int, const char *__restrict, int, unsigned, struct statx *__restrict);

Also _GNU_SOURCE-only.

>  #define S_IREAD S_IRUSR
>  #define S_IWRITE S_IWUSR
>  #define S_IEXEC S_IXUSR
> diff --git a/src/stat/fstatat.c b/src/stat/fstatat.c
> index de165b5c..ab22e4c6 100644
> --- a/src/stat/fstatat.c
> +++ b/src/stat/fstatat.c
> @@ -8,36 +8,11 @@
>  #include "syscall.h"
>  #include "kstat.h"
>  
> -struct statx {
> -	uint32_t stx_mask;
> -	uint32_t stx_blksize;
> -	uint64_t stx_attributes;
> -	uint32_t stx_nlink;
> -	uint32_t stx_uid;
> -	uint32_t stx_gid;
> -	uint16_t stx_mode;
> -	uint16_t pad1;
> -	uint64_t stx_ino;
> -	uint64_t stx_size;
> -	uint64_t stx_blocks;
> -	uint64_t stx_attributes_mask;
> -	struct {
> -		int64_t tv_sec;
> -		uint32_t tv_nsec;
> -		int32_t pad;
> -	} stx_atime, stx_btime, stx_ctime, stx_mtime;
> -	uint32_t stx_rdev_major;
> -	uint32_t stx_rdev_minor;
> -	uint32_t stx_dev_major;
> -	uint32_t stx_dev_minor;
> -	uint64_t spare[14];
> -};
> -
>  static int fstatat_statx(int fd, const char *restrict path, struct stat *restrict st, int flag)
>  {
>  	struct statx stx;
>  
> -	int ret = __syscall(SYS_statx, fd, path, flag, 0x7ff, &stx);
> +	int ret = __syscall(SYS_statx, fd, path, flag, STATX_BASIC_STATS, &stx);
>  	if (ret) return ret;

This looks ok.

>  	*st = (struct stat){
> diff --git a/src/stat/statx.c b/src/stat/statx.c
> new file mode 100644
> index 00000000..36b45d33
> --- /dev/null
> +++ b/src/stat/statx.c
> @@ -0,0 +1,8 @@
> +#define _GNU_SOURCE
> +#include <sys/stat.h>
> +#include "syscall.h"
> +
> +int statx(int dirfd, const char *restrict path, int flags, unsigned mask, struct statx *restrict stx)
> +{
> +	return syscall(SYS_statx, dirfd, path, flags, mask, stx);
> +}
> -- 
> 2.23.0

Should we do fallback translation from stat if SYS_statx fails with
ENOSYS? My leaning would be yes.

Rich
Rich Felker Jan. 24, 2020, 2:01 p.m.
On Fri, Jan 24, 2020 at 09:38:49AM +0100, Ben Noordhuis wrote:
> 
> Can I get some feedback on this patch, even if it's just "no because"? Thanks.

Sorry aboout that; I'd just had my mind on other things and hadn't
taken the time to make a good review yet.
Florian Weimer Jan. 24, 2020, 3:27 p.m.
* Rich Felker:

> This is under BSD||GNU (i.e. DEFAULT||ALL) rather than just under the
> latter. Is there a reason for that? Generally the extensions that are
> pretty clearly Linux-only, as opposed to something that other
> POSIX-based OS's are likely to adopt, are put under GNU/ALL to
> discourage their use without intent and to avoid namespace clutter.

statx is not Linux-specific in glibc, but also available on Hurd.

We received feedback that our headers are not useful due to the
__u64/uint64_t mismatch.

  <https://sourceware.org/bugzilla/show_bug.cgi?id=25292
  <https://sourceware.org/ml/libc-alpha/2019-12/msg00631.html>

Thanks,
Florian
Rich Felker Jan. 24, 2020, 3:54 p.m.
On Fri, Jan 24, 2020 at 04:27:47PM +0100, Florian Weimer wrote:
> * Rich Felker:
> 
> > This is under BSD||GNU (i.e. DEFAULT||ALL) rather than just under the
> > latter. Is there a reason for that? Generally the extensions that are
> > pretty clearly Linux-only, as opposed to something that other
> > POSIX-based OS's are likely to adopt, are put under GNU/ALL to
> > discourage their use without intent and to avoid namespace clutter.
> 
> statx is not Linux-specific in glibc, but also available on Hurd.

OK, well GNU/Linux-specific. :-) Some ppl find _GNU_SOURCE odd for
stuff that comes from Linux not GNU, but in this case it seems pretty
appropriate since GNU and Linux are the two systems doing it.

> We received feedback that our headers are not useful due to the
> __u64/uint64_t mismatch.
> 
>   <https://sourceware.org/bugzilla/show_bug.cgi?id=25292
>   <https://sourceware.org/ml/libc-alpha/2019-12/msg00631.html>

Uhg. That change seems unfortunate since it's incompatible with
theoretical future archs having 128-bit long long -- an idea I'm not
much a fan of, but don't actually want to preclude. Is there a reason
to actually care about compatibility with the kernel types? It's not
like both struct definitions can be included in the same source
anyway; the tags would clash. Using the canonical uintN_t types makes
more sense from an API perspective, I think; kernel assumptions about
types should not leak into an API intended to be clean and shared with
non-Linux implementations.

Rich
Florian Weimer Jan. 24, 2020, 4:12 p.m.
* Rich Felker:

> On Fri, Jan 24, 2020 at 04:27:47PM +0100, Florian Weimer wrote:
>> * Rich Felker:
>> 
>> > This is under BSD||GNU (i.e. DEFAULT||ALL) rather than just under the
>> > latter. Is there a reason for that? Generally the extensions that are
>> > pretty clearly Linux-only, as opposed to something that other
>> > POSIX-based OS's are likely to adopt, are put under GNU/ALL to
>> > discourage their use without intent and to avoid namespace clutter.
>> 
>> statx is not Linux-specific in glibc, but also available on Hurd.
>
> OK, well GNU/Linux-specific. :-) Some ppl find _GNU_SOURCE odd for
> stuff that comes from Linux not GNU, but in this case it seems pretty
> appropriate since GNU and Linux are the two systems doing it.

Sorry for nit-picking, it's common to both GNU and Linux.

gettid is Linux-specific, and so is pthread_getattr_default_np.

pkey_set is something that is GNU/Linux-specific in the sense that is
something that's only part of glibc, and only on Linux.

>> We received feedback that our headers are not useful due to the
>> __u64/uint64_t mismatch.
>> 
>>   <https://sourceware.org/bugzilla/show_bug.cgi?id=25292
>>   <https://sourceware.org/ml/libc-alpha/2019-12/msg00631.html>
>
> Uhg. That change seems unfortunate since it's incompatible with
> theoretical future archs having 128-bit long long -- an idea I'm not
> much a fan of, but don't actually want to preclude. Is there a reason
> to actually care about compatibility with the kernel types?

Yes, printf format strings. 8-(

> It's not like both struct definitions can be included in the same
> source anyway; the tags would clash. Using the canonical uintN_t types
> makes more sense from an API perspective, I think; kernel assumptions
> about types should not leak into an API intended to be clean and
> shared with non-Linux implementations.

I thought so too, which is why I used them.

But it is fairly compelling to use the kernel types if the header is
available, so that we don't have to patch and rebuild glibc if someone
backports new statx features into the kernel.  That's why I added the
__has_include kludge.

Thanks,
Florian
Rich Felker Jan. 24, 2020, 4:29 p.m.
On Fri, Jan 24, 2020 at 05:12:51PM +0100, Florian Weimer wrote:
> * Rich Felker:
> 
> > On Fri, Jan 24, 2020 at 04:27:47PM +0100, Florian Weimer wrote:
> >> * Rich Felker:
> >> 
> >> > This is under BSD||GNU (i.e. DEFAULT||ALL) rather than just under the
> >> > latter. Is there a reason for that? Generally the extensions that are
> >> > pretty clearly Linux-only, as opposed to something that other
> >> > POSIX-based OS's are likely to adopt, are put under GNU/ALL to
> >> > discourage their use without intent and to avoid namespace clutter.
> >> 
> >> statx is not Linux-specific in glibc, but also available on Hurd.
> >
> > OK, well GNU/Linux-specific. :-) Some ppl find _GNU_SOURCE odd for
> > stuff that comes from Linux not GNU, but in this case it seems pretty
> > appropriate since GNU and Linux are the two systems doing it.
> 
> Sorry for nit-picking, it's common to both GNU and Linux.

Yes, I was just making a bad joke about "GNU/Linux", reusing it to
mean "applies to both GNU and to Linux". Sorry for being unclear.

> >> We received feedback that our headers are not useful due to the
> >> __u64/uint64_t mismatch.
> >> 
> >>   <https://sourceware.org/bugzilla/show_bug.cgi?id=25292
> >>   <https://sourceware.org/ml/libc-alpha/2019-12/msg00631.html>
> >
> > Uhg. That change seems unfortunate since it's incompatible with
> > theoretical future archs having 128-bit long long -- an idea I'm not
> > much a fan of, but don't actually want to preclude. Is there a reason
> > to actually care about compatibility with the kernel types?
> 
> Yes, printf format strings. 8-(

Why not let ppl get the warnings and fix them by casting to an
appropriate type to print. I prefer [u]intmax_t anyway to avoid the
PRIu64 etc. mess that makes your format strings unreadable and that
interferes with translation (*).

> > It's not like both struct definitions can be included in the same
> > source anyway; the tags would clash. Using the canonical uintN_t types
> > makes more sense from an API perspective, I think; kernel assumptions
> > about types should not leak into an API intended to be clean and
> > shared with non-Linux implementations.
> 
> I thought so too, which is why I used them.
> 
> But it is fairly compelling to use the kernel types if the header is
> available, so that we don't have to patch and rebuild glibc if someone
> backports new statx features into the kernel.  That's why I added the
> __has_include kludge.

I see. This is something of a tradeoff I guess; for musl users I'd
expect folks to have libc headers newer than kernel headers in most
cases since distros are slow to follow new kernels but fast to follow
new libc, and you want compile-time support for future kernel
features even if you don't have the kernel to use them yet. But on
glibc-based dists it may be the other way around.

Rich



(*) GNU gettext has "SYSDEP" format strings that solve this problem
but do so by requiring non-shareable string memory for all the
affected strings. musl gettext does not support this, but is intended
to be used with gettext-tiny's version of msgfmt that works out the
combinatorics at .mo generation time and emits them all in shareable
form. Still I think it's better to get rid of this practice and just
use %jd etc. everywhere with suitable casts.
Ben Noordhuis Jan. 28, 2020, 8:59 a.m.
On Fri, Jan 24, 2020 at 3:01 PM Rich Felker <dalias@libc.org> wrote:
>
> On Fri, Jan 24, 2020 at 09:38:49AM +0100, Ben Noordhuis wrote:
> >
> > Can I get some feedback on this patch, even if it's just "no because"? Thanks.
>
> Sorry aboout that; I'd just had my mind on other things and hadn't
> taken the time to make a good review yet.

Thanks for the feedback and no worries, I'm no saint in that regard either.

Before I post a v2, did I understand the following issues correctly?

1. Switch _GNU_SOURCE || _BSD_SOURCE -> just _GNU_SOURCE? FWIW,
_BSD_SOURCE currently exposes the AT_STATX_* flags in fcntl.h.

2. uint64_t -> unsigned long long guarded by #ifdef __GNUC__
__extension__? Or just leave it as-is?

4. An ENOSYS fallback to fstatat()? glibc's fallback returns EINVAL
for AT_* flags it doesn't understand and ignores all STATX_* flags: it
sets stx_mask to STATX_BASIC_STATS, fills in stx_uid/stx_gid/etc. and
sets stx_btime to zero. Does that sound reasonable?
Florian Weimer Jan. 28, 2020, 10:41 a.m.
* Rich Felker:

>> >> We received feedback that our headers are not useful due to the
>> >> __u64/uint64_t mismatch.
>> >> 
>> >>   <https://sourceware.org/bugzilla/show_bug.cgi?id=25292
>> >>   <https://sourceware.org/ml/libc-alpha/2019-12/msg00631.html>
>> >
>> > Uhg. That change seems unfortunate since it's incompatible with
>> > theoretical future archs having 128-bit long long -- an idea I'm not
>> > much a fan of, but don't actually want to preclude. Is there a reason
>> > to actually care about compatibility with the kernel types?
>> 
>> Yes, printf format strings. 8-(
>
> Why not let ppl get the warnings and fix them by casting to an
> appropriate type to print. I prefer [u]intmax_t anyway to avoid the
> PRIu64 etc. mess that makes your format strings unreadable and that
> interferes with translation (*).

I'm concerned that such casts hide or even introduce bugs, like casting
tv_sec to int.

Here's an example from the MySQL sources:

  int my_timeval_to_str(const struct timeval *tm, char *to, uint dec)
  {
    int len= sprintf(to, "%d", (int) tm->tv_sec);
    if (dec)
      len+= my_useconds_to_str(to + len, tm->tv_usec, dec);
    return len;
  }

That's why the kernel always uses unsigned long long for __u64, which
seems a reasonable trade-off to me.  It will make porting to 128-bit
architectures difficult.  But I think we should focus on the
architectures we have today.

>> > It's not like both struct definitions can be included in the same
>> > source anyway; the tags would clash. Using the canonical uintN_t types
>> > makes more sense from an API perspective, I think; kernel assumptions
>> > about types should not leak into an API intended to be clean and
>> > shared with non-Linux implementations.
>> 
>> I thought so too, which is why I used them.
>> 
>> But it is fairly compelling to use the kernel types if the header is
>> available, so that we don't have to patch and rebuild glibc if someone
>> backports new statx features into the kernel.  That's why I added the
>> __has_include kludge.
>
> I see. This is something of a tradeoff I guess; for musl users I'd
> expect folks to have libc headers newer than kernel headers in most
> cases since distros are slow to follow new kernels but fast to follow
> new libc, and you want compile-time support for future kernel
> features even if you don't have the kernel to use them yet. But on
> glibc-based dists it may be the other way around.

I think it's actually the same for self-built musl on most glibc-based
distributions because for that, developers will usually pick the latest
musl release and use the distribution's kernel headers.

But for glibc itself on glibc distributions, it's likely to have newer
kernel headers.  Fedora rebase kernels (including the UAPI headers) in a
release, but not glibc.  Debian has backport kernel packages for stable
releases (but no glibc backport).  Some enterprise distributions
aggressively backport features (including new userspace APIs) and even
rebase entire kernel subsystems from time to time.

Thanks,
Florian
Rich Felker Jan. 28, 2020, 1:18 p.m.
On Tue, Jan 28, 2020 at 11:41:33AM +0100, Florian Weimer wrote:
> * Rich Felker:
> 
> >> >> We received feedback that our headers are not useful due to the
> >> >> __u64/uint64_t mismatch.
> >> >> 
> >> >>   <https://sourceware.org/bugzilla/show_bug.cgi?id=25292
> >> >>   <https://sourceware.org/ml/libc-alpha/2019-12/msg00631.html>
> >> >
> >> > Uhg. That change seems unfortunate since it's incompatible with
> >> > theoretical future archs having 128-bit long long -- an idea I'm not
> >> > much a fan of, but don't actually want to preclude. Is there a reason
> >> > to actually care about compatibility with the kernel types?
> >> 
> >> Yes, printf format strings. 8-(
> >
> > Why not let ppl get the warnings and fix them by casting to an
> > appropriate type to print. I prefer [u]intmax_t anyway to avoid the
> > PRIu64 etc. mess that makes your format strings unreadable and that
> > interferes with translation (*).
> 
> I'm concerned that such casts hide or even introduce bugs, like casting
> tv_sec to int.

As you've found, people will do incorrect casts regardless, in places
where it makes a bigger difference than here, where using fixed basic
types isn't an option. I don't think the solution is throwing away
proper semantic use of types.

> Here's an example from the MySQL sources:
> 
>   int my_timeval_to_str(const struct timeval *tm, char *to, uint dec)
>   {
>     int len= sprintf(to, "%d", (int) tm->tv_sec);
>     if (dec)
>       len+= my_useconds_to_str(to + len, tm->tv_usec, dec);
>     return len;
>   }

In the particular case of time, we should probably teach GCC to issue
warnings (-Wy2038?) for casts from an expression whose type was
declared via a typedef named time_t (I know GCC tracks knowledge of
the typedef name used, since it shows it in other warnings) to a type
smaller than 64-bit.

> That's why the kernel always uses unsigned long long for __u64, which
> seems a reasonable trade-off to me.  It will make porting to 128-bit
> architectures difficult.  But I think we should focus on the
> architectures we have today.

I disagree strongly with the last sentence. Designing an *API* in a
way that's not compatible with anything but long long == 64-bit is bad
API design. Arguably you could get into having something like
k_[u]intNN_t or similar, and use these, but I feel like that's just
gratuitous ugliness. The userspace type for 64-bit fixed-size fields
is [u]int64_t and we should be promoting its consistent usage, not
fragmenting things around kernel uapi mistakes. (Perhaps kernel uapi
headers should be fixed so that, when __KERNEL__ is not defined, they
include <stdint.h> and define __u64 etc. in terms of the stdint
types?)

Rich
Rich Felker Jan. 28, 2020, 1:39 p.m.
On Tue, Jan 28, 2020 at 09:59:56AM +0100, Ben Noordhuis wrote:
> On Fri, Jan 24, 2020 at 3:01 PM Rich Felker <dalias@libc.org> wrote:
> >
> > On Fri, Jan 24, 2020 at 09:38:49AM +0100, Ben Noordhuis wrote:
> > >
> > > Can I get some feedback on this patch, even if it's just "no because"? Thanks.
> >
> > Sorry aboout that; I'd just had my mind on other things and hadn't
> > taken the time to make a good review yet.
> 
> Thanks for the feedback and no worries, I'm no saint in that regard either.
> 
> Before I post a v2, did I understand the following issues correctly?
> 
> 1. Switch _GNU_SOURCE || _BSD_SOURCE -> just _GNU_SOURCE?

Yes. 

> FWIW, _BSD_SOURCE currently exposes the AT_STATX_* flags in fcntl.h.

Being that they're "namespaced" under AT_*, I don't think this is a
big deal. I was generally of the opinion that AT_* flags make sense to
always expose since they might be used with standard interfaces as
extensions, rather than with nonstandard interfaces. Even though
AT_STATX_* seem statx-exclusive, maybe they make sense as extension
flags to fstatat too?

In any case I think we can leave any consideration of a change here
for later; it's separate from adding statx().

> 2. uint64_t -> unsigned long long guarded by #ifdef __GNUC__
> __extension__? Or just leave it as-is?

long long does not use __extension__ guard in musl, but I think we
should stick with the semantically correct types, [u]intNN_t.

> 4. An ENOSYS fallback to fstatat()? glibc's fallback returns EINVAL
> for AT_* flags it doesn't understand and ignores all STATX_* flags: it
> sets stx_mask to STATX_BASIC_STATS, fills in stx_uid/stx_gid/etc. and
> sets stx_btime to zero. Does that sound reasonable?

Sounds right.

Note that filling in stx_[r]dev_{major,minor} requires the
sys/sysmacros.h major()/minor()/ macros since statx oddly separated
them out of dev_t.

I don't think setting btime to 0 is strictly necessary, but since it's
in the range of the struct that's unconditionally present, it's okay
to do so, and perhaps guards against info leaks from old stack
contents.

Rich
Florian Weimer Feb. 17, 2020, 9:10 a.m.
* Rich Felker:

>> That's why the kernel always uses unsigned long long for __u64, which
>> seems a reasonable trade-off to me.  It will make porting to 128-bit
>> architectures difficult.  But I think we should focus on the
>> architectures we have today.
>
> I disagree strongly with the last sentence. Designing an *API* in a
> way that's not compatible with anything but long long == 64-bit is bad
> API design.

We don't know what LL128 architectures will look like.  For all we know,
they might have more expressive type descriptors for variadic functions,
so the whole issue of matching integer types with precise format
specifiers becomes moot.

> Arguably you could get into having something like k_[u]intNN_t or
> similar, and use these, but I feel like that's just gratuitous
> ugliness. The userspace type for 64-bit fixed-size fields is
> [u]int64_t and we should be promoting its consistent usage, not
> fragmenting things around kernel uapi mistakes.

I'm not convinced it's a mistake.

> (Perhaps kernel uapi headers should be fixed so that, when __KERNEL__
> is not defined, they include <stdint.h> and define __u64 etc. in terms
> of the stdint types?)

This breaks the existing format specifiers.  The kernel choices have
been deliberately made in such a way that you can use %llu for printing
__u64 values.

Thanks,
Florian
Rich Felker Feb. 17, 2020, 3:29 p.m.
On Mon, Feb 17, 2020 at 10:10:40AM +0100, Florian Weimer wrote:
> * Rich Felker:
> 
> >> That's why the kernel always uses unsigned long long for __u64, which
> >> seems a reasonable trade-off to me.  It will make porting to 128-bit
> >> architectures difficult.  But I think we should focus on the
> >> architectures we have today.
> >
> > I disagree strongly with the last sentence. Designing an *API* in a
> > way that's not compatible with anything but long long == 64-bit is bad
> > API design.
> 
> We don't know what LL128 architectures will look like.  For all we know,
> they might have more expressive type descriptors for variadic functions,
> so the whole issue of matching integer types with precise format
> specifiers becomes moot.

I don't follow. Mechanically the wrong format for long long vs
int64_t *already works*; the problem at hand is that per the
language, it's undefined, and rightly produces warnings. None of that
would go away if LL128 archs used a fancy variadic call mechanism.

Rich