Message ID | 58A595B3.9030901@virtuozzo.com |
---|---|
State | Accepted |
Series | "phaul: Go library for live migration" |
Commit | b42037cd85624e0791486807ddd3e5d71075d4a1 |
Headers | show |
diff --git a/phaul/.gitignore b/phaul/.gitignore new file mode 100644 index 0000000..d8d94a2 --- /dev/null +++ b/phaul/.gitignore @@ -0,0 +1 @@ +src/stats/stats.pb.go diff --git a/phaul/Makefile b/phaul/Makefile new file mode 100644 index 0000000..7e83ba2 --- /dev/null +++ b/phaul/Makefile @@ -0,0 +1,11 @@ +all: test piggie + +test: stats + GOPATH=$(shell pwd):$(shell pwd)/../lib/go/:/usr/share/gocode go build -o test test + +stats: + mkdir -p src/stats/ + protoc --go_out=src/stats/ --proto_path=../images/ ../images/stats.proto + +piggie: piggie.c + gcc piggie.c -o piggie diff --git a/phaul/piggie.c b/phaul/piggie.c new file mode 100644 index 0000000..1dc0801 --- /dev/null +++ b/phaul/piggie.c @@ -0,0 +1,57 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <signal.h> +#include <unistd.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <sched.h> + +#define STKS (4*4096) + +#ifndef CLONE_NEWPID +#define CLONE_NEWPID 0x20000000 +#endif + +static int do_test(void *logf) +{ + int fd, i = 0; + + setsid(); + + close(0); + close(1); + close(2); + + fd = open("/dev/null", O_RDONLY); + if (fd != 0) { + dup2(fd, 0); + close(fd); + } + + fd = open(logf, O_WRONLY | O_TRUNC | O_CREAT, 0600); + dup2(fd, 1); + dup2(fd, 2); + if (fd != 1 && fd != 2) + close(fd); + + while (1) { + sleep(1); + printf("%d\n", i++); + fflush(stdout); + } + + return 0; +} + +int main(int argc, char **argv) +{ + int pid; + void *stk; + + stk = mmap(NULL, STKS, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN, 0, 0); + pid = clone(do_test, stk + STKS, SIGCHLD | CLONE_NEWPID, argv[1]); + printf("Child forked, pid %d\n", pid); + + return 0; +} diff --git a/phaul/src/phaul/api.go b/phaul/src/phaul/api.go new file mode 100644 index 0000000..6093a12 --- /dev/null +++ b/phaul/src/phaul/api.go @@ -0,0 +1,52 @@ +package phaul + +import ( + "criu" +) + +/* + * Configuration passed around + * + * Pid is what we migrate + * Memfd is the file descriptor via which criu can + * transfer memory pages. + * Wdir is the directory where phaul can put images + * and other stuff + */ +type PhaulConfig struct { + Pid int + Memfd int + Wdir string +} + +/* + * Rpc between PhaulClient and PhaulServer. When client + * calls anything on this one, the corresponding method + * should be called on PhaulServer object. + */ +type PhaulRemote interface { + StartIter() error + StopIter() error +} + +/* + * Interface to local classes. PhaulClient calls them when + * it needs something on the source node. + * + * Methods: + * + * - DumpCopyRestore() is called on client side when the + * pre-iterations are over and it's time to do full dump, + * copy images and restore them on the server side. + * All the time this method is executed victim tree is + * frozen on client. Returning nil kills the tree, error + * unfreezes it and resumes. The criu argument is the + * pointer on created criu.Criu object on which client + * may call Dump(). The requirement on opts passed are: + * set Ps.Fd to comm.Memfd + * set ParentImg to last_client_images_path + * set TrackMem to true + */ +type PhaulLocal interface { + DumpCopyRestore(criu *criu.Criu, c PhaulConfig, last_client_images_path string) error +} diff --git a/phaul/src/phaul/client.go b/phaul/src/phaul/client.go new file mode 100644 index 0000000..06fb821 --- /dev/null +++ b/phaul/src/phaul/client.go @@ -0,0 +1,130 @@ +package phaul + +import ( + "criu" + "fmt" + "github.com/golang/protobuf/proto" + "rpc" + "stats" +) + +const minPagesWritten uint64 = 64 +const maxIters int = 8 +const maxGrowDelta int64 = 32 + +type PhaulClient struct { + local PhaulLocal + remote PhaulRemote + cfg PhaulConfig +} + +/* + * Main entry point. Caller should create the client object by + * passing here local, remote and comm. See comment in corresponding + * interfaces/structs for explanation. + * + * Then call client.Migrate() and enjoy :) + */ +func MakePhaulClient(l PhaulLocal, r PhaulRemote, c PhaulConfig) (*PhaulClient, error) { + return &PhaulClient{local: l, remote: r, cfg: c}, nil +} + +func isLastIter(iter int, stats *stats.DumpStatsEntry, prev_stats *stats.DumpStatsEntry) bool { + if iter >= maxIters { + fmt.Printf("`- max iters reached\n") + return true + } + + pagesWritten := stats.GetPagesWritten() + if pagesWritten < minPagesWritten { + fmt.Printf("`- tiny pre-dump (%d) reached\n", int(pagesWritten)) + return true + } + + pages_delta := int64(pagesWritten) - int64(prev_stats.GetPagesWritten()) + if pages_delta >= maxGrowDelta { + fmt.Printf("`- grow iter (%d) reached\n", int(pages_delta)) + return true + } + + return false +} + +func (pc *PhaulClient) Migrate() error { + criu := criu.MakeCriu() + psi := rpc.CriuPageServerInfo{ + Fd: proto.Int32(int32(pc.cfg.Memfd)), + } + opts := rpc.CriuOpts{ + Pid: proto.Int32(int32(pc.cfg.Pid)), + LogLevel: proto.Int32(4), + LogFile: proto.String("pre-dump.log"), + Ps: &psi, + } + + err := criu.Prepare() + if err != nil { + return err + } + + defer criu.Cleanup() + + imgs, err := preparePhaulImages(pc.cfg.Wdir) + if err != nil { + return err + } + prev_stats := &stats.DumpStatsEntry{} + iter := 0 + + for { + err = pc.remote.StartIter() + if err != nil { + return err + } + + prev_p := imgs.lastImagesDir() + img_dir, err := imgs.openNextDir() + if err != nil { + return err + } + + opts.ImagesDirFd = proto.Int32(int32(img_dir.Fd())) + if prev_p != "" { + opts.ParentImg = proto.String(prev_p) + } + + err = criu.PreDump(opts, nil) + img_dir.Close() + if err != nil { + return err + } + + err = pc.remote.StopIter() + if err != nil { + return err + } + + st, err := criuGetDumpStats(img_dir) + if err != nil { + return err + } + + if isLastIter(iter, st, prev_stats) { + break + } + + prev_stats = st + } + + err = pc.remote.StartIter() + if err == nil { + prev_p := imgs.lastImagesDir() + err = pc.local.DumpCopyRestore(criu, pc.cfg, prev_p) + err2 := pc.remote.StopIter() + if err == nil { + err = err2 + } + } + + return err +} diff --git a/phaul/src/phaul/images.go b/phaul/src/phaul/images.go new file mode 100644 index 0000000..5a433ca --- /dev/null +++ b/phaul/src/phaul/images.go @@ -0,0 +1,41 @@ +package phaul + +import ( + "fmt" + "os" + "path/filepath" +) + +type images struct { + cursor int + dir string +} + +func preparePhaulImages(wdir string) (*images, error) { + return &images{dir: wdir}, nil +} + +func (i *images) getPath(idx int) string { + return fmt.Sprintf(i.dir+"/%d", idx) +} + +func (i *images) openNextDir() (*os.File, error) { + ipath := i.getPath(i.cursor) + err := os.Mkdir(ipath, 0700) + if err != nil { + return nil, err + } + + i.cursor++ + return os.Open(ipath) +} + +func (i *images) lastImagesDir() string { + var ret string + if i.cursor == 0 { + ret = "" + } else { + ret, _ = filepath.Abs(i.getPath(i.cursor - 1)) + } + return ret +} diff --git a/phaul/src/phaul/server.go b/phaul/src/phaul/server.go new file mode 100644 index 0000000..8992ee9 --- /dev/null +++ b/phaul/src/phaul/server.go @@ -0,0 +1,73 @@ +package phaul + +import ( + "criu" + "fmt" + "github.com/golang/protobuf/proto" + "rpc" +) + +type PhaulServer struct { + cfg PhaulConfig + imgs *images + cr *criu.Criu +} + +/* + * Main entry point. Make the server with comm and call PhaulRemote + * methods on it upon client requests. + */ +func MakePhaulServer(c PhaulConfig) (*PhaulServer, error) { + img, err := preparePhaulImages(c.Wdir) + if err != nil { + return nil, err + } + + cr := criu.MakeCriu() + + return &PhaulServer{imgs: img, cfg: c, cr: cr}, nil +} + +/* + * PhaulRemote methods + */ +func (s *PhaulServer) StartIter() error { + fmt.Printf("S: start iter\n") + psi := rpc.CriuPageServerInfo{ + Fd: proto.Int32(int32(s.cfg.Memfd)), + } + opts := rpc.CriuOpts{ + LogLevel: proto.Int32(4), + LogFile: proto.String("ps.log"), + Ps: &psi, + } + + prev_p := s.imgs.lastImagesDir() + img_dir, err := s.imgs.openNextDir() + if err != nil { + return err + } + defer img_dir.Close() + + opts.ImagesDirFd = proto.Int32(int32(img_dir.Fd())) + if prev_p != "" { + opts.ParentImg = proto.String(prev_p) + } + + return s.cr.StartPageServer(opts) +} + +func (s *PhaulServer) StopIter() error { + return nil +} + +/* + * Server-local methods + */ +func (s *PhaulServer) LastImagesDir() string { + return s.imgs.lastImagesDir() +} + +func (s *PhaulServer) GetCriu() *criu.Criu { + return s.cr +} diff --git a/phaul/src/phaul/stats.go b/phaul/src/phaul/stats.go new file mode 100644 index 0000000..77eb346 --- /dev/null +++ b/phaul/src/phaul/stats.go @@ -0,0 +1,31 @@ +package phaul + +import ( + "github.com/golang/protobuf/proto" + "os" + "stats" +) + +/* FIXME: report stats from CriuResp */ +func criuGetDumpStats(img_dir *os.File) (*stats.DumpStatsEntry, error) { + stf, err := os.Open(img_dir.Name() + "/stats-dump") + if err != nil { + return nil, err + } + defer stf.Close() + + buf := make([]byte, 2*4096) + sz, err := stf.Read(buf) + if err != nil { + return nil, err + } + + st := &stats.StatsEntry{} + // Skip 2 magic values and entry size + err = proto.Unmarshal(buf[12:sz], st) + if err != nil { + return nil, err + } + + return st.GetDump(), nil +} diff --git a/phaul/src/test/main.go b/phaul/src/test/main.go new file mode 100644 index 0000000..0e853df --- /dev/null +++ b/phaul/src/test/main.go @@ -0,0 +1,188 @@ +package main + +import ( + "criu" + "fmt" + "github.com/golang/protobuf/proto" + "os" + "phaul" + "rpc" + "strconv" + "strings" + "syscall" +) + +type testLocal struct { + criu.CriuNoNotify + r *testRemote +} + +type testRemote struct { + srv *phaul.PhaulServer +} + +/* Dir where test will put dump images */ +const images_dir = "test_images" + +func prepareImages() error { + err := os.Mkdir(images_dir, 0700) + if err != nil { + return err + } + + /* Work dir for PhaulClient */ + err = os.Mkdir(images_dir+"/local", 0700) + if err != nil { + return err + } + + /* Work dir for PhaulServer */ + err = os.Mkdir(images_dir+"/remote", 0700) + if err != nil { + return err + } + + /* Work dir for DumpCopyRestore */ + err = os.Mkdir(images_dir+"/test", 0700) + if err != nil { + return err + } + + return nil +} + +func mergeImages(dump_dir, last_pre_dump_dir string) error { + idir, err := os.Open(dump_dir) + if err != nil { + return err + } + + defer idir.Close() + + imgs, err := idir.Readdirnames(0) + if err != nil { + return err + } + + for _, fname := range imgs { + if !strings.HasSuffix(fname, ".img") { + continue + } + + fmt.Printf("\t%s -> %s/\n", fname, last_pre_dump_dir) + err = syscall.Link(dump_dir+"/"+fname, last_pre_dump_dir+"/"+fname) + if err != nil { + return err + } + } + + return nil +} + +func (r *testRemote) doRestore() error { + last_srv_images_dir := r.srv.LastImagesDir() + /* + * In images_dir we have images from dump, in the + * last_srv_images_dir -- where server-side images + * (from page server, with pages and pagemaps) are. + * Need to put former into latter and restore from + * them. + */ + err := mergeImages(images_dir+"/test", last_srv_images_dir) + if err != nil { + return err + } + + img_dir, err := os.Open(last_srv_images_dir) + if err != nil { + return err + } + defer img_dir.Close() + + opts := rpc.CriuOpts{ + LogLevel: proto.Int32(4), + LogFile: proto.String("restore.log"), + ImagesDirFd: proto.Int32(int32(img_dir.Fd())), + } + + cr := r.srv.GetCriu() + fmt.Printf("Do restore\n") + return cr.Restore(opts, nil) +} + +func (l *testLocal) PostDump() error { + return l.r.doRestore() +} + +func (l *testLocal) DumpCopyRestore(cr *criu.Criu, cfg phaul.PhaulConfig, last_cln_images_dir string) error { + fmt.Printf("Final stage\n") + + img_dir, err := os.Open(images_dir + "/test") + if err != nil { + return err + } + defer img_dir.Close() + + psi := rpc.CriuPageServerInfo{ + Fd: proto.Int32(int32(cfg.Memfd)), + } + + opts := rpc.CriuOpts{ + Pid: proto.Int32(int32(cfg.Pid)), + LogLevel: proto.Int32(4), + LogFile: proto.String("dump.log"), + ImagesDirFd: proto.Int32(int32(img_dir.Fd())), + TrackMem: proto.Bool(true), + ParentImg: proto.String(last_cln_images_dir), + Ps: &psi, + } + + fmt.Printf("Do dump\n") + return cr.Dump(opts, l) +} + +func main() { + pid, _ := strconv.Atoi(os.Args[1]) + fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM, 0) + if err != nil { + fmt.Printf("Can't make socketpair\n") + return + } + + err = prepareImages() + if err != nil { + fmt.Printf("Can't prepare dirs for images\n") + return + } + + fmt.Printf("Make server part (socket %d)\n", fds[1]) + srv, err := phaul.MakePhaulServer(phaul.PhaulConfig{ + Pid: pid, + Memfd: fds[1], + Wdir: images_dir + "/remote"}) + if err != nil { + return + } + + r := &testRemote{srv} + + fmt.Printf("Make client part (socket %d)\n", fds[0]) + cln, err := phaul.MakePhaulClient(&testLocal{r: r}, srv, + phaul.PhaulConfig{ + Pid: pid, + Memfd: fds[0], + Wdir: images_dir + "/local"}) + if err != nil { + return + } + + fmt.Printf("Migrate\n") + err = cln.Migrate() + if err != nil { + fmt.Printf("Failed: ") + fmt.Print(err) + return + } + + fmt.Printf("SUCCESS!\n") +}
On 02/16/2017 04:42 PM, Patchwork wrote: > == Series Details == > > Series: phaul: Go library for live migration > URL : https://patchwork.criu.org/series/1254/ > State : failure > > == Logs == > > For more details see: https://travis-ci.org/criupatchwork/criu/builds/202228250 > . https://github.com/xemul/criu/issues/282
Applied. Thanks. Could you add the test to scripts/travis/travis-tests? On Thu, Feb 16, 2017 at 03:06:11PM +0300, Pavel Emelyanov wrote: > The API is as simple as > > srv := MakePhaulServer(config) > cln := MakePhaulClient(local, remote, config) > cln.Migrate() > > * config is the PhaulConfig struct that contains pid to migrate, > memory transfer channel (file descriptor) that phaul can use > to send/receive memory and path to existing directory where > phaul can put intermediate files and images. > > * local is PhaulLocal interface with (for now) the single method > - DumpCopyRestore(): method that phaul calls when it's time > to do engine-specific dump, images copy and restore on > the destination side. > > Few words about the latter -- we've learned, that different > engines have their own way to call CRIU to dump a container, > so phaul, instead of dumping one by its own, lets the caller > do it. To keep-up with pre-dump stuff, the client should > not forget to do three things: > > - set the TrackMem option to true > - set the ParentImg to the passed value > - set the Ps (page server) channel with 'config.Memfd' > > The criu object is passed here as well, so that caller can > call Dump() on it (once we have keep_open support in libcriu > this will help to avoid additional criu execve). > > The method also should handle the PostDump notification and > do images-copy and restore in it. Not sure how to wrap this > into phaul better. > > * remote is PhaulRemote interface whose method should be called > on the dst side on the PhaulServer object using whatever RPC > the caller finds acceptable. > > As a demonstration the src/test/main.go example is attached. To > see how it goes 'make' it, then start the 'piggie $outfile' > proggie and run 'test $pid' command. The piggie will be, well, > live migrated locally :) i.e. will appear as a process with > different pid (it lives in a pid namespace). > > Changes since v2: > > * Reworked the API onto local/remote/config scheme > * Added ability to configure diretory for images > * Re-used server side Criu object for final restore > > Changes since v1: > > * Supported keep_open-s for pre-dumps > * Added code comments about interface > * Simplified the example code > > Further plans for this are > > - move py p.haul to use this compiled library > - add post-copy (lazy pages) support (with Mike help) > - add image-cache and image-proxy (with Ridrigo help) > - add API/framwork for FS migration > > Signed-off-by: Pavel Emelyanov <xemul@virtuozzo.com> > --- > phaul/.gitignore | 1 + > phaul/Makefile | 11 +++ > phaul/piggie.c | 57 ++++++++++++++ > phaul/src/phaul/api.go | 52 +++++++++++++ > phaul/src/phaul/client.go | 130 ++++++++++++++++++++++++++++++++ > phaul/src/phaul/images.go | 41 ++++++++++ > phaul/src/phaul/server.go | 73 ++++++++++++++++++ > phaul/src/phaul/stats.go | 31 ++++++++ > phaul/src/test/main.go | 188 ++++++++++++++++++++++++++++++++++++++++++++++ > 9 files changed, 584 insertions(+) > create mode 100644 phaul/.gitignore > create mode 100644 phaul/Makefile > create mode 100644 phaul/piggie.c > create mode 100644 phaul/src/phaul/api.go > create mode 100644 phaul/src/phaul/client.go > create mode 100644 phaul/src/phaul/images.go > create mode 100644 phaul/src/phaul/server.go > create mode 100644 phaul/src/phaul/stats.go > create mode 100644 phaul/src/test/main.go > > diff --git a/phaul/.gitignore b/phaul/.gitignore > new file mode 100644 > index 0000000..d8d94a2 > --- /dev/null > +++ b/phaul/.gitignore > @@ -0,0 +1 @@ > +src/stats/stats.pb.go > diff --git a/phaul/Makefile b/phaul/Makefile > new file mode 100644 > index 0000000..7e83ba2 > --- /dev/null > +++ b/phaul/Makefile > @@ -0,0 +1,11 @@ > +all: test piggie > + > +test: stats > + GOPATH=$(shell pwd):$(shell pwd)/../lib/go/:/usr/share/gocode go build -o test test > + > +stats: > + mkdir -p src/stats/ > + protoc --go_out=src/stats/ --proto_path=../images/ ../images/stats.proto > + > +piggie: piggie.c > + gcc piggie.c -o piggie > diff --git a/phaul/piggie.c b/phaul/piggie.c > new file mode 100644 > index 0000000..1dc0801 > --- /dev/null > +++ b/phaul/piggie.c > @@ -0,0 +1,57 @@ > +#define _GNU_SOURCE > +#include <stdio.h> > +#include <signal.h> > +#include <unistd.h> > +#include <sys/mman.h> > +#include <fcntl.h> > +#include <sched.h> > + > +#define STKS (4*4096) > + > +#ifndef CLONE_NEWPID > +#define CLONE_NEWPID 0x20000000 > +#endif > + > +static int do_test(void *logf) > +{ > + int fd, i = 0; > + > + setsid(); > + > + close(0); > + close(1); > + close(2); > + > + fd = open("/dev/null", O_RDONLY); > + if (fd != 0) { > + dup2(fd, 0); > + close(fd); > + } > + > + fd = open(logf, O_WRONLY | O_TRUNC | O_CREAT, 0600); > + dup2(fd, 1); > + dup2(fd, 2); > + if (fd != 1 && fd != 2) > + close(fd); > + > + while (1) { > + sleep(1); > + printf("%d\n", i++); > + fflush(stdout); > + } > + > + return 0; > +} > + > +int main(int argc, char **argv) > +{ > + int pid; > + void *stk; > + > + stk = mmap(NULL, STKS, PROT_READ | PROT_WRITE, > + MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN, 0, 0); > + pid = clone(do_test, stk + STKS, SIGCHLD | CLONE_NEWPID, argv[1]); > + printf("Child forked, pid %d\n", pid); > + > + return 0; > +} > diff --git a/phaul/src/phaul/api.go b/phaul/src/phaul/api.go > new file mode 100644 > index 0000000..6093a12 > --- /dev/null > +++ b/phaul/src/phaul/api.go > @@ -0,0 +1,52 @@ > +package phaul > + > +import ( > + "criu" > +) > + > +/* > + * Configuration passed around > + * > + * Pid is what we migrate > + * Memfd is the file descriptor via which criu can > + * transfer memory pages. > + * Wdir is the directory where phaul can put images > + * and other stuff > + */ > +type PhaulConfig struct { > + Pid int > + Memfd int > + Wdir string > +} > + > +/* > + * Rpc between PhaulClient and PhaulServer. When client > + * calls anything on this one, the corresponding method > + * should be called on PhaulServer object. > + */ > +type PhaulRemote interface { > + StartIter() error > + StopIter() error > +} > + > +/* > + * Interface to local classes. PhaulClient calls them when > + * it needs something on the source node. > + * > + * Methods: > + * > + * - DumpCopyRestore() is called on client side when the > + * pre-iterations are over and it's time to do full dump, > + * copy images and restore them on the server side. > + * All the time this method is executed victim tree is > + * frozen on client. Returning nil kills the tree, error > + * unfreezes it and resumes. The criu argument is the > + * pointer on created criu.Criu object on which client > + * may call Dump(). The requirement on opts passed are: > + * set Ps.Fd to comm.Memfd > + * set ParentImg to last_client_images_path > + * set TrackMem to true > + */ > +type PhaulLocal interface { > + DumpCopyRestore(criu *criu.Criu, c PhaulConfig, last_client_images_path string) error > +} > diff --git a/phaul/src/phaul/client.go b/phaul/src/phaul/client.go > new file mode 100644 > index 0000000..06fb821 > --- /dev/null > +++ b/phaul/src/phaul/client.go > @@ -0,0 +1,130 @@ > +package phaul > + > +import ( > + "criu" > + "fmt" > + "github.com/golang/protobuf/proto" > + "rpc" > + "stats" > +) > + > +const minPagesWritten uint64 = 64 > +const maxIters int = 8 > +const maxGrowDelta int64 = 32 > + > +type PhaulClient struct { > + local PhaulLocal > + remote PhaulRemote > + cfg PhaulConfig > +} > + > +/* > + * Main entry point. Caller should create the client object by > + * passing here local, remote and comm. See comment in corresponding > + * interfaces/structs for explanation. > + * > + * Then call client.Migrate() and enjoy :) > + */ > +func MakePhaulClient(l PhaulLocal, r PhaulRemote, c PhaulConfig) (*PhaulClient, error) { > + return &PhaulClient{local: l, remote: r, cfg: c}, nil > +} > + > +func isLastIter(iter int, stats *stats.DumpStatsEntry, prev_stats *stats.DumpStatsEntry) bool { > + if iter >= maxIters { > + fmt.Printf("`- max iters reached\n") > + return true > + } > + > + pagesWritten := stats.GetPagesWritten() > + if pagesWritten < minPagesWritten { > + fmt.Printf("`- tiny pre-dump (%d) reached\n", int(pagesWritten)) > + return true > + } > + > + pages_delta := int64(pagesWritten) - int64(prev_stats.GetPagesWritten()) > + if pages_delta >= maxGrowDelta { > + fmt.Printf("`- grow iter (%d) reached\n", int(pages_delta)) > + return true > + } > + > + return false > +} > + > +func (pc *PhaulClient) Migrate() error { > + criu := criu.MakeCriu() > + psi := rpc.CriuPageServerInfo{ > + Fd: proto.Int32(int32(pc.cfg.Memfd)), > + } > + opts := rpc.CriuOpts{ > + Pid: proto.Int32(int32(pc.cfg.Pid)), > + LogLevel: proto.Int32(4), > + LogFile: proto.String("pre-dump.log"), > + Ps: &psi, > + } > + > + err := criu.Prepare() > + if err != nil { > + return err > + } > + > + defer criu.Cleanup() > + > + imgs, err := preparePhaulImages(pc.cfg.Wdir) > + if err != nil { > + return err > + } > + prev_stats := &stats.DumpStatsEntry{} > + iter := 0 > + > + for { > + err = pc.remote.StartIter() > + if err != nil { > + return err > + } > + > + prev_p := imgs.lastImagesDir() > + img_dir, err := imgs.openNextDir() > + if err != nil { > + return err > + } > + > + opts.ImagesDirFd = proto.Int32(int32(img_dir.Fd())) > + if prev_p != "" { > + opts.ParentImg = proto.String(prev_p) > + } > + > + err = criu.PreDump(opts, nil) > + img_dir.Close() > + if err != nil { > + return err > + } > + > + err = pc.remote.StopIter() > + if err != nil { > + return err > + } > + > + st, err := criuGetDumpStats(img_dir) > + if err != nil { > + return err > + } > + > + if isLastIter(iter, st, prev_stats) { > + break > + } > + > + prev_stats = st > + } > + > + err = pc.remote.StartIter() > + if err == nil { > + prev_p := imgs.lastImagesDir() > + err = pc.local.DumpCopyRestore(criu, pc.cfg, prev_p) > + err2 := pc.remote.StopIter() > + if err == nil { > + err = err2 > + } > + } > + > + return err > +} > diff --git a/phaul/src/phaul/images.go b/phaul/src/phaul/images.go > new file mode 100644 > index 0000000..5a433ca > --- /dev/null > +++ b/phaul/src/phaul/images.go > @@ -0,0 +1,41 @@ > +package phaul > + > +import ( > + "fmt" > + "os" > + "path/filepath" > +) > + > +type images struct { > + cursor int > + dir string > +} > + > +func preparePhaulImages(wdir string) (*images, error) { > + return &images{dir: wdir}, nil > +} > + > +func (i *images) getPath(idx int) string { > + return fmt.Sprintf(i.dir+"/%d", idx) > +} > + > +func (i *images) openNextDir() (*os.File, error) { > + ipath := i.getPath(i.cursor) > + err := os.Mkdir(ipath, 0700) > + if err != nil { > + return nil, err > + } > + > + i.cursor++ > + return os.Open(ipath) > +} > + > +func (i *images) lastImagesDir() string { > + var ret string > + if i.cursor == 0 { > + ret = "" > + } else { > + ret, _ = filepath.Abs(i.getPath(i.cursor - 1)) > + } > + return ret > +} > diff --git a/phaul/src/phaul/server.go b/phaul/src/phaul/server.go > new file mode 100644 > index 0000000..8992ee9 > --- /dev/null > +++ b/phaul/src/phaul/server.go > @@ -0,0 +1,73 @@ > +package phaul > + > +import ( > + "criu" > + "fmt" > + "github.com/golang/protobuf/proto" > + "rpc" > +) > + > +type PhaulServer struct { > + cfg PhaulConfig > + imgs *images > + cr *criu.Criu > +} > + > +/* > + * Main entry point. Make the server with comm and call PhaulRemote > + * methods on it upon client requests. > + */ > +func MakePhaulServer(c PhaulConfig) (*PhaulServer, error) { > + img, err := preparePhaulImages(c.Wdir) > + if err != nil { > + return nil, err > + } > + > + cr := criu.MakeCriu() > + > + return &PhaulServer{imgs: img, cfg: c, cr: cr}, nil > +} > + > +/* > + * PhaulRemote methods > + */ > +func (s *PhaulServer) StartIter() error { > + fmt.Printf("S: start iter\n") > + psi := rpc.CriuPageServerInfo{ > + Fd: proto.Int32(int32(s.cfg.Memfd)), > + } > + opts := rpc.CriuOpts{ > + LogLevel: proto.Int32(4), > + LogFile: proto.String("ps.log"), > + Ps: &psi, > + } > + > + prev_p := s.imgs.lastImagesDir() > + img_dir, err := s.imgs.openNextDir() > + if err != nil { > + return err > + } > + defer img_dir.Close() > + > + opts.ImagesDirFd = proto.Int32(int32(img_dir.Fd())) > + if prev_p != "" { > + opts.ParentImg = proto.String(prev_p) > + } > + > + return s.cr.StartPageServer(opts) > +} > + > +func (s *PhaulServer) StopIter() error { > + return nil > +} > + > +/* > + * Server-local methods > + */ > +func (s *PhaulServer) LastImagesDir() string { > + return s.imgs.lastImagesDir() > +} > + > +func (s *PhaulServer) GetCriu() *criu.Criu { > + return s.cr > +} > diff --git a/phaul/src/phaul/stats.go b/phaul/src/phaul/stats.go > new file mode 100644 > index 0000000..77eb346 > --- /dev/null > +++ b/phaul/src/phaul/stats.go > @@ -0,0 +1,31 @@ > +package phaul > + > +import ( > + "github.com/golang/protobuf/proto" > + "os" > + "stats" > +) > + > +/* FIXME: report stats from CriuResp */ > +func criuGetDumpStats(img_dir *os.File) (*stats.DumpStatsEntry, error) { > + stf, err := os.Open(img_dir.Name() + "/stats-dump") > + if err != nil { > + return nil, err > + } > + defer stf.Close() > + > + buf := make([]byte, 2*4096) > + sz, err := stf.Read(buf) > + if err != nil { > + return nil, err > + } > + > + st := &stats.StatsEntry{} > + // Skip 2 magic values and entry size > + err = proto.Unmarshal(buf[12:sz], st) > + if err != nil { > + return nil, err > + } > + > + return st.GetDump(), nil > +} > diff --git a/phaul/src/test/main.go b/phaul/src/test/main.go > new file mode 100644 > index 0000000..0e853df > --- /dev/null > +++ b/phaul/src/test/main.go > @@ -0,0 +1,188 @@ > +package main > + > +import ( > + "criu" > + "fmt" > + "github.com/golang/protobuf/proto" > + "os" > + "phaul" > + "rpc" > + "strconv" > + "strings" > + "syscall" > +) > + > +type testLocal struct { > + criu.CriuNoNotify > + r *testRemote > +} > + > +type testRemote struct { > + srv *phaul.PhaulServer > +} > + > +/* Dir where test will put dump images */ > +const images_dir = "test_images" > + > +func prepareImages() error { > + err := os.Mkdir(images_dir, 0700) > + if err != nil { > + return err > + } > + > + /* Work dir for PhaulClient */ > + err = os.Mkdir(images_dir+"/local", 0700) > + if err != nil { > + return err > + } > + > + /* Work dir for PhaulServer */ > + err = os.Mkdir(images_dir+"/remote", 0700) > + if err != nil { > + return err > + } > + > + /* Work dir for DumpCopyRestore */ > + err = os.Mkdir(images_dir+"/test", 0700) > + if err != nil { > + return err > + } > + > + return nil > +} > + > +func mergeImages(dump_dir, last_pre_dump_dir string) error { > + idir, err := os.Open(dump_dir) > + if err != nil { > + return err > + } > + > + defer idir.Close() > + > + imgs, err := idir.Readdirnames(0) > + if err != nil { > + return err > + } > + > + for _, fname := range imgs { > + if !strings.HasSuffix(fname, ".img") { > + continue > + } > + > + fmt.Printf("\t%s -> %s/\n", fname, last_pre_dump_dir) > + err = syscall.Link(dump_dir+"/"+fname, last_pre_dump_dir+"/"+fname) > + if err != nil { > + return err > + } > + } > + > + return nil > +} > + > +func (r *testRemote) doRestore() error { > + last_srv_images_dir := r.srv.LastImagesDir() > + /* > + * In images_dir we have images from dump, in the > + * last_srv_images_dir -- where server-side images > + * (from page server, with pages and pagemaps) are. > + * Need to put former into latter and restore from > + * them. > + */ > + err := mergeImages(images_dir+"/test", last_srv_images_dir) > + if err != nil { > + return err > + } > + > + img_dir, err := os.Open(last_srv_images_dir) > + if err != nil { > + return err > + } > + defer img_dir.Close() > + > + opts := rpc.CriuOpts{ > + LogLevel: proto.Int32(4), > + LogFile: proto.String("restore.log"), > + ImagesDirFd: proto.Int32(int32(img_dir.Fd())), > + } > + > + cr := r.srv.GetCriu() > + fmt.Printf("Do restore\n") > + return cr.Restore(opts, nil) > +} > + > +func (l *testLocal) PostDump() error { > + return l.r.doRestore() > +} > + > +func (l *testLocal) DumpCopyRestore(cr *criu.Criu, cfg phaul.PhaulConfig, last_cln_images_dir string) error { > + fmt.Printf("Final stage\n") > + > + img_dir, err := os.Open(images_dir + "/test") > + if err != nil { > + return err > + } > + defer img_dir.Close() > + > + psi := rpc.CriuPageServerInfo{ > + Fd: proto.Int32(int32(cfg.Memfd)), > + } > + > + opts := rpc.CriuOpts{ > + Pid: proto.Int32(int32(cfg.Pid)), > + LogLevel: proto.Int32(4), > + LogFile: proto.String("dump.log"), > + ImagesDirFd: proto.Int32(int32(img_dir.Fd())), > + TrackMem: proto.Bool(true), > + ParentImg: proto.String(last_cln_images_dir), > + Ps: &psi, > + } > + > + fmt.Printf("Do dump\n") > + return cr.Dump(opts, l) > +} > + > +func main() { > + pid, _ := strconv.Atoi(os.Args[1]) > + fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM, 0) > + if err != nil { > + fmt.Printf("Can't make socketpair\n") > + return > + } > + > + err = prepareImages() > + if err != nil { > + fmt.Printf("Can't prepare dirs for images\n") > + return > + } > + > + fmt.Printf("Make server part (socket %d)\n", fds[1]) > + srv, err := phaul.MakePhaulServer(phaul.PhaulConfig{ > + Pid: pid, > + Memfd: fds[1], > + Wdir: images_dir + "/remote"}) > + if err != nil { > + return > + } > + > + r := &testRemote{srv} > + > + fmt.Printf("Make client part (socket %d)\n", fds[0]) > + cln, err := phaul.MakePhaulClient(&testLocal{r: r}, srv, > + phaul.PhaulConfig{ > + Pid: pid, > + Memfd: fds[0], > + Wdir: images_dir + "/local"}) > + if err != nil { > + return > + } > + > + fmt.Printf("Migrate\n") > + err = cln.Migrate() > + if err != nil { > + fmt.Printf("Failed: ") > + fmt.Print(err) > + return > + } > + > + fmt.Printf("SUCCESS!\n") > +} > -- > 2.5.0 > _______________________________________________ > CRIU mailing list > CRIU@openvz.org > https://lists.openvz.org/mailman/listinfo/criu
On Mon, Mar 13, 2017 at 11:34:31AM -0700, Andrei Vagin wrote: > Applied. Thanks. Could you add the test to scripts/travis/travis-tests? ping https://github.com/xemul/criu/issues/367 > > On Thu, Feb 16, 2017 at 03:06:11PM +0300, Pavel Emelyanov wrote: > > The API is as simple as > > > > srv := MakePhaulServer(config) > > cln := MakePhaulClient(local, remote, config) > > cln.Migrate() > > > > * config is the PhaulConfig struct that contains pid to migrate, > > memory transfer channel (file descriptor) that phaul can use > > to send/receive memory and path to existing directory where > > phaul can put intermediate files and images. > > > > * local is PhaulLocal interface with (for now) the single method > > - DumpCopyRestore(): method that phaul calls when it's time > > to do engine-specific dump, images copy and restore on > > the destination side. > > > > Few words about the latter -- we've learned, that different > > engines have their own way to call CRIU to dump a container, > > so phaul, instead of dumping one by its own, lets the caller > > do it. To keep-up with pre-dump stuff, the client should > > not forget to do three things: > > > > - set the TrackMem option to true > > - set the ParentImg to the passed value > > - set the Ps (page server) channel with 'config.Memfd' > > > > The criu object is passed here as well, so that caller can > > call Dump() on it (once we have keep_open support in libcriu > > this will help to avoid additional criu execve). > > > > The method also should handle the PostDump notification and > > do images-copy and restore in it. Not sure how to wrap this > > into phaul better. > > > > * remote is PhaulRemote interface whose method should be called > > on the dst side on the PhaulServer object using whatever RPC > > the caller finds acceptable. > > > > As a demonstration the src/test/main.go example is attached. To > > see how it goes 'make' it, then start the 'piggie $outfile' > > proggie and run 'test $pid' command. The piggie will be, well, > > live migrated locally :) i.e. will appear as a process with > > different pid (it lives in a pid namespace). > > > > Changes since v2: > > > > * Reworked the API onto local/remote/config scheme > > * Added ability to configure diretory for images > > * Re-used server side Criu object for final restore > > > > Changes since v1: > > > > * Supported keep_open-s for pre-dumps > > * Added code comments about interface > > * Simplified the example code > > > > Further plans for this are > > > > - move py p.haul to use this compiled library > > - add post-copy (lazy pages) support (with Mike help) > > - add image-cache and image-proxy (with Ridrigo help) > > - add API/framwork for FS migration > > > > Signed-off-by: Pavel Emelyanov <xemul@virtuozzo.com> > > --- > > phaul/.gitignore | 1 + > > phaul/Makefile | 11 +++ > > phaul/piggie.c | 57 ++++++++++++++ > > phaul/src/phaul/api.go | 52 +++++++++++++ > > phaul/src/phaul/client.go | 130 ++++++++++++++++++++++++++++++++ > > phaul/src/phaul/images.go | 41 ++++++++++ > > phaul/src/phaul/server.go | 73 ++++++++++++++++++ > > phaul/src/phaul/stats.go | 31 ++++++++ > > phaul/src/test/main.go | 188 ++++++++++++++++++++++++++++++++++++++++++++++ > > 9 files changed, 584 insertions(+) > > create mode 100644 phaul/.gitignore > > create mode 100644 phaul/Makefile > > create mode 100644 phaul/piggie.c > > create mode 100644 phaul/src/phaul/api.go > > create mode 100644 phaul/src/phaul/client.go > > create mode 100644 phaul/src/phaul/images.go > > create mode 100644 phaul/src/phaul/server.go > > create mode 100644 phaul/src/phaul/stats.go > > create mode 100644 phaul/src/test/main.go > > > > diff --git a/phaul/.gitignore b/phaul/.gitignore > > new file mode 100644 > > index 0000000..d8d94a2 > > --- /dev/null > > +++ b/phaul/.gitignore > > @@ -0,0 +1 @@ > > +src/stats/stats.pb.go > > diff --git a/phaul/Makefile b/phaul/Makefile > > new file mode 100644 > > index 0000000..7e83ba2 > > --- /dev/null > > +++ b/phaul/Makefile > > @@ -0,0 +1,11 @@ > > +all: test piggie > > + > > +test: stats > > + GOPATH=$(shell pwd):$(shell pwd)/../lib/go/:/usr/share/gocode go build -o test test > > + > > +stats: > > + mkdir -p src/stats/ > > + protoc --go_out=src/stats/ --proto_path=../images/ ../images/stats.proto > > + > > +piggie: piggie.c > > + gcc piggie.c -o piggie > > diff --git a/phaul/piggie.c b/phaul/piggie.c > > new file mode 100644 > > index 0000000..1dc0801 > > --- /dev/null > > +++ b/phaul/piggie.c > > @@ -0,0 +1,57 @@ > > +#define _GNU_SOURCE > > +#include <stdio.h> > > +#include <signal.h> > > +#include <unistd.h> > > +#include <sys/mman.h> > > +#include <fcntl.h> > > +#include <sched.h> > > + > > +#define STKS (4*4096) > > + > > +#ifndef CLONE_NEWPID > > +#define CLONE_NEWPID 0x20000000 > > +#endif > > + > > +static int do_test(void *logf) > > +{ > > + int fd, i = 0; > > + > > + setsid(); > > + > > + close(0); > > + close(1); > > + close(2); > > + > > + fd = open("/dev/null", O_RDONLY); > > + if (fd != 0) { > > + dup2(fd, 0); > > + close(fd); > > + } > > + > > + fd = open(logf, O_WRONLY | O_TRUNC | O_CREAT, 0600); > > + dup2(fd, 1); > > + dup2(fd, 2); > > + if (fd != 1 && fd != 2) > > + close(fd); > > + > > + while (1) { > > + sleep(1); > > + printf("%d\n", i++); > > + fflush(stdout); > > + } > > + > > + return 0; > > +} > > + > > +int main(int argc, char **argv) > > +{ > > + int pid; > > + void *stk; > > + > > + stk = mmap(NULL, STKS, PROT_READ | PROT_WRITE, > > + MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN, 0, 0); > > + pid = clone(do_test, stk + STKS, SIGCHLD | CLONE_NEWPID, argv[1]); > > + printf("Child forked, pid %d\n", pid); > > + > > + return 0; > > +} > > diff --git a/phaul/src/phaul/api.go b/phaul/src/phaul/api.go > > new file mode 100644 > > index 0000000..6093a12 > > --- /dev/null > > +++ b/phaul/src/phaul/api.go > > @@ -0,0 +1,52 @@ > > +package phaul > > + > > +import ( > > + "criu" > > +) > > + > > +/* > > + * Configuration passed around > > + * > > + * Pid is what we migrate > > + * Memfd is the file descriptor via which criu can > > + * transfer memory pages. > > + * Wdir is the directory where phaul can put images > > + * and other stuff > > + */ > > +type PhaulConfig struct { > > + Pid int > > + Memfd int > > + Wdir string > > +} > > + > > +/* > > + * Rpc between PhaulClient and PhaulServer. When client > > + * calls anything on this one, the corresponding method > > + * should be called on PhaulServer object. > > + */ > > +type PhaulRemote interface { > > + StartIter() error > > + StopIter() error > > +} > > + > > +/* > > + * Interface to local classes. PhaulClient calls them when > > + * it needs something on the source node. > > + * > > + * Methods: > > + * > > + * - DumpCopyRestore() is called on client side when the > > + * pre-iterations are over and it's time to do full dump, > > + * copy images and restore them on the server side. > > + * All the time this method is executed victim tree is > > + * frozen on client. Returning nil kills the tree, error > > + * unfreezes it and resumes. The criu argument is the > > + * pointer on created criu.Criu object on which client > > + * may call Dump(). The requirement on opts passed are: > > + * set Ps.Fd to comm.Memfd > > + * set ParentImg to last_client_images_path > > + * set TrackMem to true > > + */ > > +type PhaulLocal interface { > > + DumpCopyRestore(criu *criu.Criu, c PhaulConfig, last_client_images_path string) error > > +} > > diff --git a/phaul/src/phaul/client.go b/phaul/src/phaul/client.go > > new file mode 100644 > > index 0000000..06fb821 > > --- /dev/null > > +++ b/phaul/src/phaul/client.go > > @@ -0,0 +1,130 @@ > > +package phaul > > + > > +import ( > > + "criu" > > + "fmt" > > + "github.com/golang/protobuf/proto" > > + "rpc" > > + "stats" > > +) > > + > > +const minPagesWritten uint64 = 64 > > +const maxIters int = 8 > > +const maxGrowDelta int64 = 32 > > + > > +type PhaulClient struct { > > + local PhaulLocal > > + remote PhaulRemote > > + cfg PhaulConfig > > +} > > + > > +/* > > + * Main entry point. Caller should create the client object by > > + * passing here local, remote and comm. See comment in corresponding > > + * interfaces/structs for explanation. > > + * > > + * Then call client.Migrate() and enjoy :) > > + */ > > +func MakePhaulClient(l PhaulLocal, r PhaulRemote, c PhaulConfig) (*PhaulClient, error) { > > + return &PhaulClient{local: l, remote: r, cfg: c}, nil > > +} > > + > > +func isLastIter(iter int, stats *stats.DumpStatsEntry, prev_stats *stats.DumpStatsEntry) bool { > > + if iter >= maxIters { > > + fmt.Printf("`- max iters reached\n") > > + return true > > + } > > + > > + pagesWritten := stats.GetPagesWritten() > > + if pagesWritten < minPagesWritten { > > + fmt.Printf("`- tiny pre-dump (%d) reached\n", int(pagesWritten)) > > + return true > > + } > > + > > + pages_delta := int64(pagesWritten) - int64(prev_stats.GetPagesWritten()) > > + if pages_delta >= maxGrowDelta { > > + fmt.Printf("`- grow iter (%d) reached\n", int(pages_delta)) > > + return true > > + } > > + > > + return false > > +} > > + > > +func (pc *PhaulClient) Migrate() error { > > + criu := criu.MakeCriu() > > + psi := rpc.CriuPageServerInfo{ > > + Fd: proto.Int32(int32(pc.cfg.Memfd)), > > + } > > + opts := rpc.CriuOpts{ > > + Pid: proto.Int32(int32(pc.cfg.Pid)), > > + LogLevel: proto.Int32(4), > > + LogFile: proto.String("pre-dump.log"), > > + Ps: &psi, > > + } > > + > > + err := criu.Prepare() > > + if err != nil { > > + return err > > + } > > + > > + defer criu.Cleanup() > > + > > + imgs, err := preparePhaulImages(pc.cfg.Wdir) > > + if err != nil { > > + return err > > + } > > + prev_stats := &stats.DumpStatsEntry{} > > + iter := 0 > > + > > + for { > > + err = pc.remote.StartIter() > > + if err != nil { > > + return err > > + } > > + > > + prev_p := imgs.lastImagesDir() > > + img_dir, err := imgs.openNextDir() > > + if err != nil { > > + return err > > + } > > + > > + opts.ImagesDirFd = proto.Int32(int32(img_dir.Fd())) > > + if prev_p != "" { > > + opts.ParentImg = proto.String(prev_p) > > + } > > + > > + err = criu.PreDump(opts, nil) > > + img_dir.Close() > > + if err != nil { > > + return err > > + } > > + > > + err = pc.remote.StopIter() > > + if err != nil { > > + return err > > + } > > + > > + st, err := criuGetDumpStats(img_dir) > > + if err != nil { > > + return err > > + } > > + > > + if isLastIter(iter, st, prev_stats) { > > + break > > + } > > + > > + prev_stats = st > > + } > > + > > + err = pc.remote.StartIter() > > + if err == nil { > > + prev_p := imgs.lastImagesDir() > > + err = pc.local.DumpCopyRestore(criu, pc.cfg, prev_p) > > + err2 := pc.remote.StopIter() > > + if err == nil { > > + err = err2 > > + } > > + } > > + > > + return err > > +} > > diff --git a/phaul/src/phaul/images.go b/phaul/src/phaul/images.go > > new file mode 100644 > > index 0000000..5a433ca > > --- /dev/null > > +++ b/phaul/src/phaul/images.go > > @@ -0,0 +1,41 @@ > > +package phaul > > + > > +import ( > > + "fmt" > > + "os" > > + "path/filepath" > > +) > > + > > +type images struct { > > + cursor int > > + dir string > > +} > > + > > +func preparePhaulImages(wdir string) (*images, error) { > > + return &images{dir: wdir}, nil > > +} > > + > > +func (i *images) getPath(idx int) string { > > + return fmt.Sprintf(i.dir+"/%d", idx) > > +} > > + > > +func (i *images) openNextDir() (*os.File, error) { > > + ipath := i.getPath(i.cursor) > > + err := os.Mkdir(ipath, 0700) > > + if err != nil { > > + return nil, err > > + } > > + > > + i.cursor++ > > + return os.Open(ipath) > > +} > > + > > +func (i *images) lastImagesDir() string { > > + var ret string > > + if i.cursor == 0 { > > + ret = "" > > + } else { > > + ret, _ = filepath.Abs(i.getPath(i.cursor - 1)) > > + } > > + return ret > > +} > > diff --git a/phaul/src/phaul/server.go b/phaul/src/phaul/server.go > > new file mode 100644 > > index 0000000..8992ee9 > > --- /dev/null > > +++ b/phaul/src/phaul/server.go > > @@ -0,0 +1,73 @@ > > +package phaul > > + > > +import ( > > + "criu" > > + "fmt" > > + "github.com/golang/protobuf/proto" > > + "rpc" > > +) > > + > > +type PhaulServer struct { > > + cfg PhaulConfig > > + imgs *images > > + cr *criu.Criu > > +} > > + > > +/* > > + * Main entry point. Make the server with comm and call PhaulRemote > > + * methods on it upon client requests. > > + */ > > +func MakePhaulServer(c PhaulConfig) (*PhaulServer, error) { > > + img, err := preparePhaulImages(c.Wdir) > > + if err != nil { > > + return nil, err > > + } > > + > > + cr := criu.MakeCriu() > > + > > + return &PhaulServer{imgs: img, cfg: c, cr: cr}, nil > > +} > > + > > +/* > > + * PhaulRemote methods > > + */ > > +func (s *PhaulServer) StartIter() error { > > + fmt.Printf("S: start iter\n") > > + psi := rpc.CriuPageServerInfo{ > > + Fd: proto.Int32(int32(s.cfg.Memfd)), > > + } > > + opts := rpc.CriuOpts{ > > + LogLevel: proto.Int32(4), > > + LogFile: proto.String("ps.log"), > > + Ps: &psi, > > + } > > + > > + prev_p := s.imgs.lastImagesDir() > > + img_dir, err := s.imgs.openNextDir() > > + if err != nil { > > + return err > > + } > > + defer img_dir.Close() > > + > > + opts.ImagesDirFd = proto.Int32(int32(img_dir.Fd())) > > + if prev_p != "" { > > + opts.ParentImg = proto.String(prev_p) > > + } > > + > > + return s.cr.StartPageServer(opts) > > +} > > + > > +func (s *PhaulServer) StopIter() error { > > + return nil > > +} > > + > > +/* > > + * Server-local methods > > + */ > > +func (s *PhaulServer) LastImagesDir() string { > > + return s.imgs.lastImagesDir() > > +} > > + > > +func (s *PhaulServer) GetCriu() *criu.Criu { > > + return s.cr > > +} > > diff --git a/phaul/src/phaul/stats.go b/phaul/src/phaul/stats.go > > new file mode 100644 > > index 0000000..77eb346 > > --- /dev/null > > +++ b/phaul/src/phaul/stats.go > > @@ -0,0 +1,31 @@ > > +package phaul > > + > > +import ( > > + "github.com/golang/protobuf/proto" > > + "os" > > + "stats" > > +) > > + > > +/* FIXME: report stats from CriuResp */ > > +func criuGetDumpStats(img_dir *os.File) (*stats.DumpStatsEntry, error) { > > + stf, err := os.Open(img_dir.Name() + "/stats-dump") > > + if err != nil { > > + return nil, err > > + } > > + defer stf.Close() > > + > > + buf := make([]byte, 2*4096) > > + sz, err := stf.Read(buf) > > + if err != nil { > > + return nil, err > > + } > > + > > + st := &stats.StatsEntry{} > > + // Skip 2 magic values and entry size > > + err = proto.Unmarshal(buf[12:sz], st) > > + if err != nil { > > + return nil, err > > + } > > + > > + return st.GetDump(), nil > > +} > > diff --git a/phaul/src/test/main.go b/phaul/src/test/main.go > > new file mode 100644 > > index 0000000..0e853df > > --- /dev/null > > +++ b/phaul/src/test/main.go > > @@ -0,0 +1,188 @@ > > +package main > > + > > +import ( > > + "criu" > > + "fmt" > > + "github.com/golang/protobuf/proto" > > + "os" > > + "phaul" > > + "rpc" > > + "strconv" > > + "strings" > > + "syscall" > > +) > > + > > +type testLocal struct { > > + criu.CriuNoNotify > > + r *testRemote > > +} > > + > > +type testRemote struct { > > + srv *phaul.PhaulServer > > +} > > + > > +/* Dir where test will put dump images */ > > +const images_dir = "test_images" > > + > > +func prepareImages() error { > > + err := os.Mkdir(images_dir, 0700) > > + if err != nil { > > + return err > > + } > > + > > + /* Work dir for PhaulClient */ > > + err = os.Mkdir(images_dir+"/local", 0700) > > + if err != nil { > > + return err > > + } > > + > > + /* Work dir for PhaulServer */ > > + err = os.Mkdir(images_dir+"/remote", 0700) > > + if err != nil { > > + return err > > + } > > + > > + /* Work dir for DumpCopyRestore */ > > + err = os.Mkdir(images_dir+"/test", 0700) > > + if err != nil { > > + return err > > + } > > + > > + return nil > > +} > > + > > +func mergeImages(dump_dir, last_pre_dump_dir string) error { > > + idir, err := os.Open(dump_dir) > > + if err != nil { > > + return err > > + } > > + > > + defer idir.Close() > > + > > + imgs, err := idir.Readdirnames(0) > > + if err != nil { > > + return err > > + } > > + > > + for _, fname := range imgs { > > + if !strings.HasSuffix(fname, ".img") { > > + continue > > + } > > + > > + fmt.Printf("\t%s -> %s/\n", fname, last_pre_dump_dir) > > + err = syscall.Link(dump_dir+"/"+fname, last_pre_dump_dir+"/"+fname) > > + if err != nil { > > + return err > > + } > > + } > > + > > + return nil > > +} > > + > > +func (r *testRemote) doRestore() error { > > + last_srv_images_dir := r.srv.LastImagesDir() > > + /* > > + * In images_dir we have images from dump, in the > > + * last_srv_images_dir -- where server-side images > > + * (from page server, with pages and pagemaps) are. > > + * Need to put former into latter and restore from > > + * them. > > + */ > > + err := mergeImages(images_dir+"/test", last_srv_images_dir) > > + if err != nil { > > + return err > > + } > > + > > + img_dir, err := os.Open(last_srv_images_dir) > > + if err != nil { > > + return err > > + } > > + defer img_dir.Close() > > + > > + opts := rpc.CriuOpts{ > > + LogLevel: proto.Int32(4), > > + LogFile: proto.String("restore.log"), > > + ImagesDirFd: proto.Int32(int32(img_dir.Fd())), > > + } > > + > > + cr := r.srv.GetCriu() > > + fmt.Printf("Do restore\n") > > + return cr.Restore(opts, nil) > > +} > > + > > +func (l *testLocal) PostDump() error { > > + return l.r.doRestore() > > +} > > + > > +func (l *testLocal) DumpCopyRestore(cr *criu.Criu, cfg phaul.PhaulConfig, last_cln_images_dir string) error { > > + fmt.Printf("Final stage\n") > > + > > + img_dir, err := os.Open(images_dir + "/test") > > + if err != nil { > > + return err > > + } > > + defer img_dir.Close() > > + > > + psi := rpc.CriuPageServerInfo{ > > + Fd: proto.Int32(int32(cfg.Memfd)), > > + } > > + > > + opts := rpc.CriuOpts{ > > + Pid: proto.Int32(int32(cfg.Pid)), > > + LogLevel: proto.Int32(4), > > + LogFile: proto.String("dump.log"), > > + ImagesDirFd: proto.Int32(int32(img_dir.Fd())), > > + TrackMem: proto.Bool(true), > > + ParentImg: proto.String(last_cln_images_dir), > > + Ps: &psi, > > + } > > + > > + fmt.Printf("Do dump\n") > > + return cr.Dump(opts, l) > > +} > > + > > +func main() { > > + pid, _ := strconv.Atoi(os.Args[1]) > > + fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM, 0) > > + if err != nil { > > + fmt.Printf("Can't make socketpair\n") > > + return > > + } > > + > > + err = prepareImages() > > + if err != nil { > > + fmt.Printf("Can't prepare dirs for images\n") > > + return > > + } > > + > > + fmt.Printf("Make server part (socket %d)\n", fds[1]) > > + srv, err := phaul.MakePhaulServer(phaul.PhaulConfig{ > > + Pid: pid, > > + Memfd: fds[1], > > + Wdir: images_dir + "/remote"}) > > + if err != nil { > > + return > > + } > > + > > + r := &testRemote{srv} > > + > > + fmt.Printf("Make client part (socket %d)\n", fds[0]) > > + cln, err := phaul.MakePhaulClient(&testLocal{r: r}, srv, > > + phaul.PhaulConfig{ > > + Pid: pid, > > + Memfd: fds[0], > > + Wdir: images_dir + "/local"}) > > + if err != nil { > > + return > > + } > > + > > + fmt.Printf("Migrate\n") > > + err = cln.Migrate() > > + if err != nil { > > + fmt.Printf("Failed: ") > > + fmt.Print(err) > > + return > > + } > > + > > + fmt.Printf("SUCCESS!\n") > > +} > > -- > > 2.5.0 > > _______________________________________________ > > CRIU mailing list > > CRIU@openvz.org > > https://lists.openvz.org/mailman/listinfo/criu > _______________________________________________ > CRIU mailing list > CRIU@openvz.org > https://lists.openvz.org/mailman/listinfo/criu
The API is as simple as srv := MakePhaulServer(config) cln := MakePhaulClient(local, remote, config) cln.Migrate() * config is the PhaulConfig struct that contains pid to migrate, memory transfer channel (file descriptor) that phaul can use to send/receive memory and path to existing directory where phaul can put intermediate files and images. * local is PhaulLocal interface with (for now) the single method - DumpCopyRestore(): method that phaul calls when it's time to do engine-specific dump, images copy and restore on the destination side. Few words about the latter -- we've learned, that different engines have their own way to call CRIU to dump a container, so phaul, instead of dumping one by its own, lets the caller do it. To keep-up with pre-dump stuff, the client should not forget to do three things: - set the TrackMem option to true - set the ParentImg to the passed value - set the Ps (page server) channel with 'config.Memfd' The criu object is passed here as well, so that caller can call Dump() on it (once we have keep_open support in libcriu this will help to avoid additional criu execve). The method also should handle the PostDump notification and do images-copy and restore in it. Not sure how to wrap this into phaul better. * remote is PhaulRemote interface whose method should be called on the dst side on the PhaulServer object using whatever RPC the caller finds acceptable. As a demonstration the src/test/main.go example is attached. To see how it goes 'make' it, then start the 'piggie $outfile' proggie and run 'test $pid' command. The piggie will be, well, live migrated locally :) i.e. will appear as a process with different pid (it lives in a pid namespace). Changes since v2: * Reworked the API onto local/remote/config scheme * Added ability to configure diretory for images * Re-used server side Criu object for final restore Changes since v1: * Supported keep_open-s for pre-dumps * Added code comments about interface * Simplified the example code Further plans for this are - move py p.haul to use this compiled library - add post-copy (lazy pages) support (with Mike help) - add image-cache and image-proxy (with Ridrigo help) - add API/framwork for FS migration Signed-off-by: Pavel Emelyanov <xemul@virtuozzo.com> --- phaul/.gitignore | 1 + phaul/Makefile | 11 +++ phaul/piggie.c | 57 ++++++++++++++ phaul/src/phaul/api.go | 52 +++++++++++++ phaul/src/phaul/client.go | 130 ++++++++++++++++++++++++++++++++ phaul/src/phaul/images.go | 41 ++++++++++ phaul/src/phaul/server.go | 73 ++++++++++++++++++ phaul/src/phaul/stats.go | 31 ++++++++ phaul/src/test/main.go | 188 ++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 584 insertions(+) create mode 100644 phaul/.gitignore create mode 100644 phaul/Makefile create mode 100644 phaul/piggie.c create mode 100644 phaul/src/phaul/api.go create mode 100644 phaul/src/phaul/client.go create mode 100644 phaul/src/phaul/images.go create mode 100644 phaul/src/phaul/server.go create mode 100644 phaul/src/phaul/stats.go create mode 100644 phaul/src/test/main.go