zdtm/static/fd failure on aarch64

Submitted by Dmitry Safonov on June 11, 2018, 8:55 p.m.

Details

Message ID CAJwJo6YK_QU0SGC34M8bkvj3o8QHxxT1sZvXV5dkm=_LkThutw@mail.gmail.com
State New
Series "zdtm/static/fd failure on aarch64"
Headers show

Commit Message

Dmitry Safonov June 11, 2018, 8:55 p.m.
2018-06-11 21:34 GMT+01:00 Adrian Reber <adrian@lisas.de>:
> On Mon, Jun 11, 2018 at 09:19:21PM +0100, Dmitry Safonov wrote:
>> I'm awfully sorry about asking you so many times..
>> Maybe the thing is in sizeof(unsigned int) != 4 on arm64?
>>
>> Returning back to the first version + correcting definition for syscall.
>
> This was also something I was suspecting, that the syscall is not done
> correctly, but your patch does not help. Same result:
>
> 5085  dup3(4, 0, 0)                     = 0
> 5085  close(4)                          = 0
> 5085  mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, 0, 0) = 0xffff97050000
> 5085  clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0xffff970b2dd0) = 5086
> 5085  futex(0xffff97050000, FUTEX_WAIT, 0, NULL <unfinished ...>
> 5086  setsid()                          = 5086
> 5086  rt_sigaction(SIGCHLD, {SIG_DFL, [], SA_RESTART}, NULL, 8) = 0
> 5086  mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0) = 0xffff96eb0000
> 5086  futex(0xffff96eb0000, FUTEX_WAIT, 2531983361, NULL) = -1 EAGAIN (Resource temporarily unavailable)
> 5086  brk(NULL)                         = 0x2e920000
> 5086  brk(0x2e950000)                   = 0x2e950000
> 5086  brk(NULL)                         = 0x2e950000
> 5086  openat(AT_FDCWD, "/etc/localtime", O_RDONLY|O_CLOEXEC) = 4
> 5086  fstat(4, {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
> 5086  fstat(4, {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
> 5086  mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xffff96ea0000
> 5086  read(4, "TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\4\0\0\0\4\0\0\0\0"..., 8192) = 3519
> 5086  lseek(4, -2252, SEEK_CUR)         = 1267
> 5086  read(4, "TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\5\0\0\0\5\0\0\0\0"..., 8192) = 2252
> 5086  close(4)                          = 0
> 5086  munmap(0xffff96ea0000, 65536)     = 0
> 5086  write(2, "16:31:33.969:  5086: ERR: ../lib"..., 99) = 99
> 5086  futex(0xffff96eb0000, FUTEX_WAIT, 2531983361, NULL) = -1 EAGAIN (Resource temporarily unavailable)
> 5086  newfstatat(AT_FDCWD, "/etc/localtime", {st_mode=S_IFREG|0644, st_size=3519, ...}, 0) = 0
> 5086  write(2, "16:31:33.971:  5086: ERR: ../lib"..., 99) = 99
> 5086  futex(0xffff96eb0000, FUTEX_WAIT, 2531983361, NULL) = -1 EAGAIN (Resource temporarily unavailable)
> 5086  newfstatat(AT_FDCWD, "/etc/localtime", {st_mode=S_IFREG|0644, st_size=3519, ...}, 0) = 0
> 5086  write(2, "16:31:33.971:  5086: ERR: ../lib"..., 99) = 99

On the top of the last one - let's add some debugging.
It looks like the third parameter of futex() has lower bits set correctly,
but the upper contains a garbage.
It might happen if futex is not aligned on 4 bytes in memory, IIRC..
Let's see what will debug patch result in.

Patch hide | download patch | download mbox

diff --git a/test/zdtm/lib/lock.h b/test/zdtm/lib/lock.h
index 9c0831d461c1..68f922e9025c 100644
--- a/test/zdtm/lib/lock.h
+++ b/test/zdtm/lib/lock.h
@@ -145,7 +145,8 @@  static void inline mutex_lock(mutex_t *m)
 	while ((c = atomic_inc(&m->raw)) != 0) {
 		ret = sys_futex(&m->raw, FUTEX_WAIT, c + 1, NULL, NULL, 0);
 		if (ret < 0)
-			pr_perror("futex");
+			pr_perror("futex *%p = %d, c = %u",
+				&m->raw, atomic_get(&m->raw), c);
 		BUG_ON(ret < 0 && errno != EWOULDBLOCK);
 	}
 }

Comments

Adrian Reber June 12, 2018, 6:48 a.m.
On Mon, Jun 11, 2018 at 09:55:53PM +0100, Dmitry Safonov wrote:
> 2018-06-11 21:34 GMT+01:00 Adrian Reber <adrian@lisas.de>:
> > On Mon, Jun 11, 2018 at 09:19:21PM +0100, Dmitry Safonov wrote:
> >> I'm awfully sorry about asking you so many times..
> >> Maybe the thing is in sizeof(unsigned int) != 4 on arm64?
> >>
> >> Returning back to the first version + correcting definition for syscall.
> >
> > This was also something I was suspecting, that the syscall is not done
> > correctly, but your patch does not help. Same result:
> >
> > 5085  dup3(4, 0, 0)                     = 0
> > 5085  close(4)                          = 0
> > 5085  mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, 0, 0) = 0xffff97050000
> > 5085  clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0xffff970b2dd0) = 5086
> > 5085  futex(0xffff97050000, FUTEX_WAIT, 0, NULL <unfinished ...>
> > 5086  setsid()                          = 5086
> > 5086  rt_sigaction(SIGCHLD, {SIG_DFL, [], SA_RESTART}, NULL, 8) = 0
> > 5086  mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0) = 0xffff96eb0000
> > 5086  futex(0xffff96eb0000, FUTEX_WAIT, 2531983361, NULL) = -1 EAGAIN (Resource temporarily unavailable)
> > 5086  brk(NULL)                         = 0x2e920000
> > 5086  brk(0x2e950000)                   = 0x2e950000
> > 5086  brk(NULL)                         = 0x2e950000
> > 5086  openat(AT_FDCWD, "/etc/localtime", O_RDONLY|O_CLOEXEC) = 4
> > 5086  fstat(4, {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
> > 5086  fstat(4, {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
> > 5086  mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xffff96ea0000
> > 5086  read(4, "TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\4\0\0\0\4\0\0\0\0"..., 8192) = 3519
> > 5086  lseek(4, -2252, SEEK_CUR)         = 1267
> > 5086  read(4, "TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\5\0\0\0\5\0\0\0\0"..., 8192) = 2252
> > 5086  close(4)                          = 0
> > 5086  munmap(0xffff96ea0000, 65536)     = 0
> > 5086  write(2, "16:31:33.969:  5086: ERR: ../lib"..., 99) = 99
> > 5086  futex(0xffff96eb0000, FUTEX_WAIT, 2531983361, NULL) = -1 EAGAIN (Resource temporarily unavailable)
> > 5086  newfstatat(AT_FDCWD, "/etc/localtime", {st_mode=S_IFREG|0644, st_size=3519, ...}, 0) = 0
> > 5086  write(2, "16:31:33.971:  5086: ERR: ../lib"..., 99) = 99
> > 5086  futex(0xffff96eb0000, FUTEX_WAIT, 2531983361, NULL) = -1 EAGAIN (Resource temporarily unavailable)
> > 5086  newfstatat(AT_FDCWD, "/etc/localtime", {st_mode=S_IFREG|0644, st_size=3519, ...}, 0) = 0
> > 5086  write(2, "16:31:33.971:  5086: ERR: ../lib"..., 99) = 99
> 
> On the top of the last one - let's add some debugging.
> It looks like the third parameter of futex() has lower bits set correctly,
> but the upper contains a garbage.
> It might happen if futex is not aligned on 4 bytes in memory, IIRC..
> Let's see what will debug patch result in.

Output is always like this:

02:20:50.173: 11691: ERR: ../lib/lock.h:149: futex *0xffff86100000 = 0, c = 2249195520 (errno = 11 (Resource temporarily unavailable))

11691 mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0) = 0xffff86100000
11691 futex(0xffff86100000, FUTEX_WAIT, 2249195521, NULL) = -1 EAGAIN (Resource temporarily unavailable)
11691 brk(NULL)                         = 0x149f0000
11691 brk(0x14a20000)                   = 0x14a20000
11691 brk(NULL)                         = 0x14a20000
11691 openat(AT_FDCWD, "/etc/localtime", O_RDONLY|O_CLOEXEC) = 4
11691 fstat(4, {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
11691 fstat(4, {st_mode=S_IFREG|0644, st_size=3519, ...}) = 0
11691 mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xffff860f0000
11691 read(4, "TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\4\0\0\0\4\0\0\0\0"..., 8192) = 3519
11691 lseek(4, -2252, SEEK_CUR)         = 1267
11691 read(4, "TZif2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\5\0\0\0\5\0\0\0\0"..., 8192) = 2252
11691 close(4)                          = 0
11691 munmap(0xffff860f0000, 65536)     = 0
11691 write(2, "02:20:50.173: 11691: ERR: ../lib"..., 135) = 135
11691 futex(0xffff86100000, FUTEX_WAIT, 2249195521, NULL) = -1 EAGAIN (Resource temporarily unavailable)
11691 newfstatat(AT_FDCWD, "/etc/localtime", {st_mode=S_IFREG|0644, st_size=3519, ...}, 0) = 0
11691 write(2, "02:20:50.175: 11691: ERR: ../lib"..., 135) = 135

How I understand it is that atomic_inc() should return the new value but the returned value
is completely different from what atomic_get() returns, right?

		Adrian