/* GNU Guix --- Functional package management for GNU Copyright (C) 2018, 2019, 2020 Ludovic Courtès This file is part of GNU Guix. GNU Guix is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. GNU Guix is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU Guix. If not, see . */ /* Make the given @WRAPPED_PROGRAM@ relocatable by executing it in a separate mount namespace where the store is mounted in its right place. We would happily do that in Scheme using 'call-with-container'. However, this very program needs to be relocatable, so it needs to be statically linked, which complicates things (Guile's modules can hardly be "linked" into a single executable.) */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Whether we're building the ld.so/libfakechroot wrapper. */ #define HAVE_EXEC_WITH_LOADER \ (defined PROGRAM_INTERPRETER) && (defined LOADER_AUDIT_MODULE) \ && (defined FAKECHROOT_LIBRARY) /* The original store, "/gnu/store" by default. */ static const char original_store[] = "@STORE_DIRECTORY@"; /* Like 'malloc', but abort if 'malloc' returns NULL. */ static void * xmalloc (size_t size) { void *result = malloc (size); assert (result != NULL); return result; } /* Concatenate DIRECTORY, a slash, and FILE. Return the result, which the caller must eventually free. */ static char * concat (const char *directory, const char *file) { char *result = xmalloc (strlen (directory) + 2 + strlen (file)); strcpy (result, directory); strcat (result, "/"); strcat (result, file); return result; } static void mkdir_p (const char *directory) { if (strcmp (directory, "/") != 0) { char *parent = dirname (strdupa (directory)); mkdir_p (parent); int err = mkdir (directory, 0700); if (err < 0 && errno != EEXIST) assert_perror (errno); } } static void rm_rf (const char *directory) { DIR *stream = opendir (directory); for (struct dirent *entry = readdir (stream); entry != NULL; entry = readdir (stream)) { if (strcmp (entry->d_name, ".") == 0 || strcmp (entry->d_name, "..") == 0) continue; char *full = concat (directory, entry->d_name); int err = unlink (full); if (err < 0) { if (errno == EISDIR) /* Recurse (we expect a shallow directory structure so there's little risk of stack overflow.) */ rm_rf (full); else assert_perror (errno); } free (full); } closedir (stream); int err = rmdir (directory); if (err < 0 && errno != ENOENT) assert_perror (errno); } /* Make TARGET a bind-mount of SOURCE. Take into account ENTRY's type, which corresponds to SOURCE. */ static int bind_mount (const char *source, const struct dirent *entry, const char *target) { if (entry->d_type == DT_DIR) { int err = mkdir (target, 0700); if (err != 0) return err; } else close (open (target, O_WRONLY | O_CREAT)); return mount (source, target, "none", MS_BIND | MS_REC | MS_RDONLY, NULL); } #if HAVE_EXEC_WITH_LOADER /* Make TARGET a symlink to SOURCE. */ static int make_symlink (const char *source, const struct dirent *entry, const char *target) { return symlink (source, target); } #endif /* Mirror with FIRMLINK all the top-level entries in SOURCE to TARGET. */ static void mirror_directory (const char *source, const char *target, int (* firmlink) (const char *, const struct dirent *, const char *)) { DIR *stream = opendir (source); for (struct dirent *entry = readdir (stream); entry != NULL; entry = readdir (stream)) { /* XXX: Some file systems may not report a useful 'd_type'. Ignore them for now. */ assert (entry->d_type != DT_UNKNOWN); if (strcmp (entry->d_name, ".") == 0 || strcmp (entry->d_name, "..") == 0) continue; char *abs_source = concat (source, entry->d_name); char *new_entry = concat (target, entry->d_name); if (entry->d_type == DT_LNK) { char target[PATH_MAX]; ssize_t result = readlink (abs_source, target, sizeof target - 1); if (result > 0) { target[result] = '\0'; int err = symlink (target, new_entry); if (err < 0) assert_perror (errno); } } else { /* Create the mount point. */ int err = firmlink (abs_source, entry, new_entry); /* It used to be that only directories could be bind-mounted. Thus, keep going if we fail to bind-mount a non-directory entry. That's OK because regular files in the root file system are usually uninteresting. */ if (err != 0 && entry->d_type != DT_DIR) assert_perror (errno); free (new_entry); free (abs_source); } } closedir (stream); } /* Write the user/group ID map for PID to FILE, mapping ID to itself. See user_namespaces(7). */ static void write_id_map (pid_t pid, const char *file, int id) { char id_map_file[100]; snprintf (id_map_file, sizeof id_map_file, "/proc/%d/%s", pid, file); char id_map[100]; /* Map root and the current user. */ int len = snprintf (id_map, sizeof id_map, "%d %d 1\n", id, id); int fd = open (id_map_file, O_WRONLY); if (fd < 0) assert_perror (errno); int n = write (fd, id_map, len); if (n < 0) assert_perror (errno); close (fd); } /* Disallow setgroups(2) for PID. */ static void disallow_setgroups (pid_t pid) { char file[100]; snprintf (file, sizeof file, "/proc/%d/setgroups", pid); int fd = open (file, O_WRONLY); if (fd < 0) assert_perror (errno); int err = write (fd, "deny", 5); if (err < 0) assert_perror (errno); close (fd); } /* Run the wrapper program in a separate mount user namespace. Return only upon failure. */ static void exec_in_user_namespace (const char *store, int argc, char *argv[]) { /* Spawn @WRAPPED_PROGRAM@ in a separate namespace where STORE is bind-mounted in the right place. */ int err; char *new_root = mkdtemp (strdup ("/tmp/guix-exec-XXXXXX")); char *new_store = concat (new_root, original_store); char *cwd = get_current_dir_name (); /* Create a child with separate namespaces and set up bind-mounts from there. That way, bind-mounts automatically disappear when the child exits, which simplifies cleanup for the parent. Note: clone is more convenient than fork + unshare since the parent can directly write the child uid_map/gid_map files. */ pid_t child = syscall (SYS_clone, SIGCHLD | CLONE_NEWNS | CLONE_NEWUSER, NULL, NULL, NULL); switch (child) { case 0: /* Note: Due to we cannot make NEW_ROOT a tmpfs (which would have saved the need for 'rm_rf'.) */ mirror_directory ("/", new_root, bind_mount); mkdir_p (new_store); err = mount (store, new_store, "none", MS_BIND | MS_REC | MS_RDONLY, NULL); if (err < 0) assert_perror (errno); chdir (new_root); err = chroot (new_root); if (err < 0) assert_perror (errno); /* Change back to where we were before chroot'ing. */ chdir (cwd); int err = execv ("@WRAPPED_PROGRAM@", argv); if (err < 0) assert_perror (errno); break; case -1: /* Failure: user namespaces not supported. */ fprintf (stderr, "%s: error: 'clone' failed: %m\n", argv[0]); rm_rf (new_root); break; default: { /* Map the current user/group ID in the child's namespace (the default is to get the "overflow UID", i.e., the UID of "nobody"). We must first disallow 'setgroups' for that process. */ disallow_setgroups (child); write_id_map (child, "uid_map", getuid ()); write_id_map (child, "gid_map", getgid ()); int status; waitpid (child, &status, 0); chdir ("/"); /* avoid EBUSY */ rm_rf (new_root); free (new_root); if (WIFEXITED (status)) exit (WEXITSTATUS (status)); else /* Abnormal termination cannot really be reproduced, so exit with 255. */ exit (255); } } } #ifdef PROOT_PROGRAM /* Execute the wrapped program with PRoot, passing it ARGC and ARGV, and "bind-mounting" STORE in the right place. */ static void exec_with_proot (const char *store, int argc, char *argv[]) { int proot_specific_argc = 4; int proot_argc = argc + proot_specific_argc; char *proot_argv[proot_argc + 1], *proot; char bind_spec[strlen (store) + 1 + sizeof original_store]; strcpy (bind_spec, store); strcat (bind_spec, ":"); strcat (bind_spec, original_store); proot = concat (store, PROOT_PROGRAM); proot_argv[0] = proot; proot_argv[1] = "-b"; proot_argv[2] = bind_spec; proot_argv[3] = "@WRAPPED_PROGRAM@"; for (int i = 0; i < argc; i++) proot_argv[i + proot_specific_argc] = argv[i + 1]; proot_argv[proot_argc] = NULL; /* Seccomp support seems to invariably lead to segfaults; disable it by default. */ setenv ("PROOT_NO_SECCOMP", "1", 0); int err = execv (proot, proot_argv); if (err < 0) assert_perror (errno); } #endif #if HAVE_EXEC_WITH_LOADER /* Execute the wrapped program by invoking the loader (ld.so) directly, passing it the audit module and preloading libfakechroot.so. */ static void exec_with_loader (const char *store, int argc, char *argv[]) { char *loader = concat (store, PROGRAM_INTERPRETER + sizeof original_store); size_t loader_specific_argc = 6; size_t loader_argc = argc + loader_specific_argc; char *loader_argv[loader_argc + 1]; loader_argv[0] = argv[0]; loader_argv[1] = "--audit"; loader_argv[2] = concat (store, LOADER_AUDIT_MODULE + sizeof original_store); loader_argv[3] = "--preload"; loader_argv[4] = concat (store, FAKECHROOT_LIBRARY + sizeof original_store); loader_argv[5] = concat (store, "@WRAPPED_PROGRAM@" + sizeof original_store); for (size_t i = 0; i < argc; i++) loader_argv[i + loader_specific_argc] = argv[i + 1]; loader_argv[loader_argc] = NULL; /* Set up the root directory. */ int err; char *new_root = mkdtemp (strdup ("/tmp/guix-exec-XXXXXX")); mirror_directory ("/", new_root, make_symlink); char *new_store = concat (new_root, original_store); char *new_store_parent = dirname (strdup (new_store)); mkdir_p (new_store_parent); symlink (store, new_store); #ifdef GCONV_DIRECTORY /* Tell libc where to find its gconv modules. This is necessary because gconv uses non-interposable 'open' calls. */ char *gconv_path = concat (store, GCONV_DIRECTORY + sizeof original_store); setenv ("GCONV_PATH", gconv_path, 1); free (gconv_path); #endif setenv ("FAKECHROOT_BASE", new_root, 1); pid_t child = fork (); switch (child) { case 0: err = execv (loader, loader_argv); if (err < 0) assert_perror (errno); exit (EXIT_FAILURE); break; case -1: assert_perror (errno); exit (EXIT_FAILURE); break; default: { int status; waitpid (child, &status, 0); chdir ("/"); /* avoid EBUSY */ rm_rf (new_root); free (new_root); close (2); /* flushing stderr should be silent */ if (WIFEXITED (status)) exit (WEXITSTATUS (status)); else /* Abnormal termination cannot really be reproduced, so exit with 255. */ exit (255); } } } #endif /* Execution engines. */ struct engine { const char *name; void (* exec) (const char *, int, char **); }; static void buffer_stderr (void) { static char stderr_buffer[4096]; setvbuf (stderr, stderr_buffer, _IOFBF, sizeof stderr_buffer); } /* The default engine: choose a robust method. */ static void exec_default (const char *store, int argc, char *argv[]) { /* Buffer stderr so that nothing's displayed if 'exec_in_user_namespace' fails but 'exec_with_proot' works. */ buffer_stderr (); exec_in_user_namespace (store, argc, argv); #ifdef PROOT_PROGRAM exec_with_proot (store, argc, argv); #endif } /* The "performance" engine: choose performance over robustness. */ static void exec_performance (const char *store, int argc, char *argv[]) { buffer_stderr (); exec_in_user_namespace (store, argc, argv); #if HAVE_EXEC_WITH_LOADER exec_with_loader (store, argc, argv); #endif } /* List of supported engines. */ static const struct engine engines[] = { { "default", exec_default }, { "performance", exec_performance }, { "userns", exec_in_user_namespace }, #ifdef PROOT_PROGRAM { "proot", exec_with_proot }, #endif #if HAVE_EXEC_WITH_LOADER { "fakechroot", exec_with_loader }, #endif { NULL, NULL } }; /* Return the "execution engine" to use. */ static const struct engine * execution_engine (void) { const char *str = getenv ("GUIX_EXECUTION_ENGINE"); if (str == NULL) str = "default"; try: for (const struct engine *engine = engines; engine->name != NULL; engine++) { if (strcmp (engine->name, str) == 0) return engine; } fprintf (stderr, "%s: unsupported Guix execution engine; ignoring\n", str); str = "default"; goto try; } int main (int argc, char *argv[]) { ssize_t size; char self[PATH_MAX]; size = readlink ("/proc/self/exe", self, sizeof self - 1); assert (size > 0); /* SELF is something like "/home/ludo/.local/gnu/store/…-foo/bin/ls" and we want to extract "/home/ludo/.local/gnu/store". */ size_t index = strlen (self) - strlen ("@WRAPPED_PROGRAM@") + strlen (original_store); char *store = strdup (self); store[index] = '\0'; struct stat statbuf; /* If STORE is already at the "right" place, we can execute @WRAPPED_PROGRAM@ right away. This is not just an optimization: it's needed when running one of these wrappers from within an unshare'd namespace, because 'unshare' fails with EPERM in that context. */ if (strcmp (store, original_store) != 0 && lstat ("@WRAPPED_PROGRAM@", &statbuf) != 0) { const struct engine *engine = execution_engine (); engine->exec (store, argc, argv); /* If we reach this point, that's because ENGINE failed to do the job. */ fprintf (stderr, "\ This may be because \"user namespaces\" are not supported on this system.\n\ Consequently, we cannot run '@WRAPPED_PROGRAM@',\n\ unless you move it to the '@STORE_DIRECTORY@' directory.\n\ \n\ Please refer to the 'guix pack' documentation for more information.\n"); return EXIT_FAILURE; } /* The executable is available under @STORE_DIRECTORY@, so we can now execute it. */ int err = execv ("@WRAPPED_PROGRAM@", argv); if (err < 0) assert_perror (errno); return EXIT_FAILURE; }