[v4,2/3] tests: fix builds on alpine and centos

Submitted by Adrian Reber on June 28, 2018, 9:03 a.m.

Details

Message ID 1530176637-12620-2-git-send-email-adrian@lisas.de
State New
Series "tests: fix builds on alpine and centos"
Headers show

Commit Message

Adrian Reber June 28, 2018, 9:03 a.m.
From: Adrian Reber <areber@redhat.com>

Install sudo, create test user with ID 1000, install bash,
fix pidfile creation and pidfile chmod.

v2:
 * use sleep to give the criu daemon some time to start up

v3:
 * Andrei is of course right and sleep is not good solution.
   After adding --status-fd support to criu service, this
   is how we now detect that criu is ready.

v4:
 * This was much more complicated than expected which is related
   to the different versions of the tools on the different travis
   test targets. There seems to be a bug in bash on Ubuntu
    https://lists.gnu.org/archive/html/bug-bash/2017-07/msg00039.html
   which prevents using 'read -n1' on Ubuntu. As a workaround
   the result from CRIU's status FD is now read via python.

   Another problem was discovered on alpine with the loop restore test.
   CRIU says to use setsid even if the process is already using setsid.
   As a workaround, still with setsid, this process is now using
   shell-job true for checkpoint and restore.

Parts of v2 have been committed before. So the changes from this commit
are partially already in another commit.

Signed-off-by: Adrian Reber <areber@redhat.com>
---
 scripts/build/Dockerfile.centos |  4 ++++
 test/others/rpc/Makefile        | 20 ++++++++++++++++----
 test/others/rpc/read.py         | 18 ++++++++++++++++++
 test/others/rpc/restore-loop.py |  5 +++++
 test/others/rpc/run.sh          |  4 +++-
 5 files changed, 46 insertions(+), 5 deletions(-)
 create mode 100644 test/others/rpc/read.py

Patch hide | download patch | download mbox

diff --git a/scripts/build/Dockerfile.centos b/scripts/build/Dockerfile.centos
index 0160b75..d8e70ac 100644
--- a/scripts/build/Dockerfile.centos
+++ b/scripts/build/Dockerfile.centos
@@ -40,4 +40,8 @@  WORKDIR /criu
 ENV CCACHE_DIR=/tmp/.ccache CCACHE_NOCOMPRESS=1 $ENV1=yes
 RUN mv .ccache /tmp && make mrproper && ccache -sz  && \
 	date && make -j $(nproc) CC="$CC" && date && ccache -s
+
+# The rpc test cases are running as user #1000, let's add the user
+RUN adduser -u 1000 test
+
 RUN make -C test/zdtm -j $(nproc)
diff --git a/test/others/rpc/Makefile b/test/others/rpc/Makefile
index 2b15873..077a06b 100644
--- a/test/others/rpc/Makefile
+++ b/test/others/rpc/Makefile
@@ -4,13 +4,25 @@  all: test-c rpc_pb2.py criu
 CFLAGS += -g -Werror -Wall -I.
 LDLIBS +=  -lprotobuf-c
 
+PYTHON ?= python
+
 run: all
 	mkdir -p build
 	chmod a+rwx build
-	@# need to start the criu daemon here to access the pidfile
-	sudo -g '#1000' -u '#1000' ./criu service -v4 -W build -o service.log --address criu_service.socket -d --pidfile pidfile
-	# Give the criu daemon some time to start up
-	sleep 0.5
+	rm -f build/status
+	sudo -g '#1000' -u '#1000' mkfifo build/status
+	@# Need to start the criu daemon here to access the pidfile.
+	@# Some of the shells only support FDs < 10
+	ls -la /bin/sh
+	echo $$TEST_SHELL
+	echo $(PYTHON)
+	ls -la /bin/bash
+	ls -la
+	sudo -g '#1000' -u '#1000' -- bash -c "exec 200<>build/status; \
+		./criu service -v4 -W build --address criu_service.socket \
+		-d --pidfile pidfile -o service.log --status-fd 200; \
+		$(PYTHON) read.py build/status"
+	rm -f build/status
 	chmod a+rw build/pidfile
 	sudo -g '#1000' -u '#1000' ./run.sh
 	sudo -g '#1000' -u '#1000' ./version.py
diff --git a/test/others/rpc/read.py b/test/others/rpc/read.py
new file mode 100644
index 0000000..680069b
--- /dev/null
+++ b/test/others/rpc/read.py
@@ -0,0 +1,18 @@ 
+# This script is used to read a single character from CRIU's status FD.
+# That way we know when the CRIU service is ready. CRIU writes a \0 to
+# the status FD.
+# In theory this could be easily done using 'read -n 1' from bash, but
+# but the bash version on Ubuntu has probably the following bug:
+# https://lists.gnu.org/archive/html/bug-bash/2017-07/msg00039.html
+
+import os
+import sys
+
+
+f=open(sys.argv[1])
+r = f.read(1)
+
+if r == '\0':
+	sys.exit(0)
+
+sys.exit(-1)
diff --git a/test/others/rpc/restore-loop.py b/test/others/rpc/restore-loop.py
index 21e93b9..1ea9bf3 100755
--- a/test/others/rpc/restore-loop.py
+++ b/test/others/rpc/restore-loop.py
@@ -19,6 +19,11 @@  s.connect(args['socket'])
 req			= rpc.criu_req()
 req.type		= rpc.RESTORE
 req.opts.images_dir_fd	= os.open(args['dir'], os.O_DIRECTORY)
+# As the dumped process is running with setsid this should not
+# be necessary. There seems to be a problem for this testcase
+# in combination with alpine's setsid.
+# The dump is now done with -j and the restore also.
+req.opts.shell_job      = True
 
 # Send request
 s.send(req.SerializeToString())
diff --git a/test/others/rpc/run.sh b/test/others/rpc/run.sh
index aaf48f4..d1facd8 100755
--- a/test/others/rpc/run.sh
+++ b/test/others/rpc/run.sh
@@ -50,7 +50,9 @@  function test_restore_loop {
 	echo "pid ${P}"
 
 	title_print "Dump loop.sh"
-	${CRIU} dump -v4 -o dump-loop.log -D build/imgs_loop -t ${P}
+	# So theoretically '-j' (--shell-job) should not be necessary, but on alpine
+	# this test fails without it.
+	${CRIU} dump -j -v4 -o dump-loop.log -D build/imgs_loop -t ${P}
 
 	title_print "Run restore-loop"
 	./restore-loop.py build/criu_service.socket build/imgs_loop

Comments

Adrian Reber June 28, 2018, 9:08 a.m.
Sorry, too much debug output still included. Need to fix that.

On Thu, Jun 28, 2018 at 09:03:56AM +0000, Adrian Reber wrote:
> From: Adrian Reber <areber@redhat.com>
> 
> Install sudo, create test user with ID 1000, install bash,
> fix pidfile creation and pidfile chmod.
> 
> v2:
>  * use sleep to give the criu daemon some time to start up
> 
> v3:
>  * Andrei is of course right and sleep is not good solution.
>    After adding --status-fd support to criu service, this
>    is how we now detect that criu is ready.
> 
> v4:
>  * This was much more complicated than expected which is related
>    to the different versions of the tools on the different travis
>    test targets. There seems to be a bug in bash on Ubuntu
>     https://lists.gnu.org/archive/html/bug-bash/2017-07/msg00039.html
>    which prevents using 'read -n1' on Ubuntu. As a workaround
>    the result from CRIU's status FD is now read via python.
> 
>    Another problem was discovered on alpine with the loop restore test.
>    CRIU says to use setsid even if the process is already using setsid.
>    As a workaround, still with setsid, this process is now using
>    shell-job true for checkpoint and restore.
> 
> Parts of v2 have been committed before. So the changes from this commit
> are partially already in another commit.
> 
> Signed-off-by: Adrian Reber <areber@redhat.com>
> ---
>  scripts/build/Dockerfile.centos |  4 ++++
>  test/others/rpc/Makefile        | 20 ++++++++++++++++----
>  test/others/rpc/read.py         | 18 ++++++++++++++++++
>  test/others/rpc/restore-loop.py |  5 +++++
>  test/others/rpc/run.sh          |  4 +++-
>  5 files changed, 46 insertions(+), 5 deletions(-)
>  create mode 100644 test/others/rpc/read.py
> 
> diff --git a/scripts/build/Dockerfile.centos b/scripts/build/Dockerfile.centos
> index 0160b75..d8e70ac 100644
> --- a/scripts/build/Dockerfile.centos
> +++ b/scripts/build/Dockerfile.centos
> @@ -40,4 +40,8 @@ WORKDIR /criu
>  ENV CCACHE_DIR=/tmp/.ccache CCACHE_NOCOMPRESS=1 $ENV1=yes
>  RUN mv .ccache /tmp && make mrproper && ccache -sz  && \
>  	date && make -j $(nproc) CC="$CC" && date && ccache -s
> +
> +# The rpc test cases are running as user #1000, let's add the user
> +RUN adduser -u 1000 test
> +
>  RUN make -C test/zdtm -j $(nproc)
> diff --git a/test/others/rpc/Makefile b/test/others/rpc/Makefile
> index 2b15873..077a06b 100644
> --- a/test/others/rpc/Makefile
> +++ b/test/others/rpc/Makefile
> @@ -4,13 +4,25 @@ all: test-c rpc_pb2.py criu
>  CFLAGS += -g -Werror -Wall -I.
>  LDLIBS +=  -lprotobuf-c
>  
> +PYTHON ?= python
> +
>  run: all
>  	mkdir -p build
>  	chmod a+rwx build
> -	@# need to start the criu daemon here to access the pidfile
> -	sudo -g '#1000' -u '#1000' ./criu service -v4 -W build -o service.log --address criu_service.socket -d --pidfile pidfile
> -	# Give the criu daemon some time to start up
> -	sleep 0.5
> +	rm -f build/status
> +	sudo -g '#1000' -u '#1000' mkfifo build/status
> +	@# Need to start the criu daemon here to access the pidfile.
> +	@# Some of the shells only support FDs < 10
> +	ls -la /bin/sh
> +	echo $$TEST_SHELL
> +	echo $(PYTHON)
> +	ls -la /bin/bash
> +	ls -la
> +	sudo -g '#1000' -u '#1000' -- bash -c "exec 200<>build/status; \
> +		./criu service -v4 -W build --address criu_service.socket \
> +		-d --pidfile pidfile -o service.log --status-fd 200; \
> +		$(PYTHON) read.py build/status"
> +	rm -f build/status
>  	chmod a+rw build/pidfile
>  	sudo -g '#1000' -u '#1000' ./run.sh
>  	sudo -g '#1000' -u '#1000' ./version.py
> diff --git a/test/others/rpc/read.py b/test/others/rpc/read.py
> new file mode 100644
> index 0000000..680069b
> --- /dev/null
> +++ b/test/others/rpc/read.py
> @@ -0,0 +1,18 @@
> +# This script is used to read a single character from CRIU's status FD.
> +# That way we know when the CRIU service is ready. CRIU writes a \0 to
> +# the status FD.
> +# In theory this could be easily done using 'read -n 1' from bash, but
> +# but the bash version on Ubuntu has probably the following bug:
> +# https://lists.gnu.org/archive/html/bug-bash/2017-07/msg00039.html
> +
> +import os
> +import sys
> +
> +
> +f=open(sys.argv[1])
> +r = f.read(1)
> +
> +if r == '\0':
> +	sys.exit(0)
> +
> +sys.exit(-1)
> diff --git a/test/others/rpc/restore-loop.py b/test/others/rpc/restore-loop.py
> index 21e93b9..1ea9bf3 100755
> --- a/test/others/rpc/restore-loop.py
> +++ b/test/others/rpc/restore-loop.py
> @@ -19,6 +19,11 @@ s.connect(args['socket'])
>  req			= rpc.criu_req()
>  req.type		= rpc.RESTORE
>  req.opts.images_dir_fd	= os.open(args['dir'], os.O_DIRECTORY)
> +# As the dumped process is running with setsid this should not
> +# be necessary. There seems to be a problem for this testcase
> +# in combination with alpine's setsid.
> +# The dump is now done with -j and the restore also.
> +req.opts.shell_job      = True
>  
>  # Send request
>  s.send(req.SerializeToString())
> diff --git a/test/others/rpc/run.sh b/test/others/rpc/run.sh
> index aaf48f4..d1facd8 100755
> --- a/test/others/rpc/run.sh
> +++ b/test/others/rpc/run.sh
> @@ -50,7 +50,9 @@ function test_restore_loop {
>  	echo "pid ${P}"
>  
>  	title_print "Dump loop.sh"
> -	${CRIU} dump -v4 -o dump-loop.log -D build/imgs_loop -t ${P}
> +	# So theoretically '-j' (--shell-job) should not be necessary, but on alpine
> +	# this test fails without it.
> +	${CRIU} dump -j -v4 -o dump-loop.log -D build/imgs_loop -t ${P}
>  
>  	title_print "Run restore-loop"
>  	./restore-loop.py build/criu_service.socket build/imgs_loop
> -- 
> 1.8.3.1
>