我正在研究 linux 中的 posix 功能和命名空间,并受这些令人印象深刻的文章的启发编写了一些代码行,以更好地理解如何从不同的命名空间中看到这些功能。部分代码摘自文章的例子,不是我玩的...
#define _GNU_SOURCE
#include <unistd.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <signal.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <sched.h>
#include <sys/capability.h>
#include "caputilities.h"
#define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
} while (0)
#define MAXLEN 255
/* Replace commas in mapping string arguments with newlines */
static void get_mapstr(char *map){
if (map==NULL) return;
size_t map_len = strlen(map);
for (int j = 0; j < map_len; j++)
if (map[j] == ',') map[j] = '\n';
}
static void save_map(char *map, char *map_file){
int fd;
fd = open(map_file, O_RDWR);
if (fd == -1) {
fprintf(stderr, "open %s: %s\n", map_file, strerror(errno));
exit(EXIT_FAILURE);
}
size_t map_len = strlen(map);
if (write(fd, map, map_len) != map_len) {
fprintf(stderr, "write %s: %s\n", map_file, strerror(errno));
exit(EXIT_FAILURE);
}
close(fd);
}
/* Start function for cloned child */
static int childFunc(void *arg){
pid_t pid = getpid();
fprintf(stderr, "cloned child pid %ld\n", (long)pid);
fprintf(stderr, "child process capabilities %s\n", cap_to_text(cap_get_proc(), NULL));
fprintf(stderr, "euid %ld, egid %ld\n", (long)geteuid(), (long)getegid());
if (arg!=NULL){ //user ns enabled
char *uidmap = ((char **)arg)[0];
char *gidmap = ((char **)arg)[1];
if (uidmap!=NULL) fprintf(stderr, "setting uid map %s\n", uidmap);
if (gidmap!=NULL) fprintf(stderr, "setting gid map %s\n", gidmap);
char map_path[MAXLEN + 1];
if (uidmap != NULL){
snprintf(map_path, MAXLEN, "/proc/%ld/uid_map", (long)pid);
save_map(uidmap, map_path);
}
if (gidmap != NULL){
snprintf(map_path, MAXLEN, "/proc/%ld/gid_map", (long)pid);
save_map(gidmap, map_path);
}
fprintf(stderr, "child process capabilities %s\n", cap_to_text(cap_get_proc(), NULL));
fprintf(stderr, "euid %ld, egid %ld\n", (long)geteuid(), (long)getegid());
}
sleep(200);
exit(0);
}
static void usage(char *pname){
fprintf(stderr, "Usage: %s -U -M mapstring -G mapstring\n", pname);
fprintf(stderr, " -U use user namespace\n");
fprintf(stderr, " -M uid mapping\n");
fprintf(stderr, " -G gid mapping\n");
fprintf(stderr, " mapstring is a comma separated list of mapping of the form:\n");
fprintf(stderr, " ID_inside-ns ID-outside-ns length [,ID_inside-ns ID-outside-ns length, ...]\n");
exit(EXIT_FAILURE);
}
#define STACK_SIZE (1024 * 1024)
static char child_stack[STACK_SIZE]; /* Space for child's stack */
/* Receive a UID and/or GID mapping as arguments
Every mapping consists of a list of tuple (separated by new line) of the form:
ID_inside-ns ID-outside-ns length
Requiring the user to supply a string that contains newlines is
of course inconvenient for command-line use. Thus, we permit the
use of commas to delimit records in this string, and replace them
with newlines before writing the string to the file. */
int main(int argc, char *argv[]){
int flags = 0;
char *gid_map = NULL, *uid_map = NULL;
int opt;
while ((opt = getopt(argc, argv, "UM:G:")) != -1) {
switch (opt){
case 'U': flags |= CLONE_NEWUSER;
case 'M': uid_map = optarg; break;
case 'G': gid_map = optarg; break;
default: usage(argv[0]);
}
}
if ((uid_map != NULL || gid_map != NULL) && !(flags & CLONE_NEWUSER)){
fprintf(stderr,"what about give me the user namespace option? what's in your mind today?\n");
usage(argv[0]);
}
char* args[2];
get_mapstr(uid_map); args[0] = uid_map;
get_mapstr(gid_map); args[1] = gid_map;
pid_t child_pid = clone(childFunc, child_stack + STACK_SIZE, flags | SIGCHLD, (flags & CLONE_NEWUSER) ? &args : NULL);
if (child_pid == -1) errExit("clone");
sleep(1);
fprintf(stderr, "child process pid capabilities from parent: %s\n", cap_to_text(cap_get_pid(child_pid), NULL));
fprintf(stderr, "euid %ld, egid %ld\n", (long)geteuid(), (long)getegid());
exit(0);
}
我证明了从新命名空间中的子进程只能将父进程的外部命名空间中的有效用户 id 映射到新命名空间中的任何 uid,包括 root,但是如果您尝试从子进程映射不同的外部用户你得到错误。没关系。
$ ./testcap3 -U -M"1000 39 1"
cloned child pid 7659
child process capabilities = cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read+ep
euid 65534, egid 65534
setting uid map 1000 39 1
write /proc/7659/uid_map: Operation not permitted
child process pid capabilities from parent: = cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read+ep
euid 1000, egid 1000
$ ./testcap3 -U -M"0 1000 1"
cloned child pid 7665
child process capabilities = cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read+ep
euid 65534, egid 65534
setting uid map 0 1000 1
child process capabilities = cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read+ep
euid 0, egid 65534
child process pid capabilities from parent: = cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read+ep
euid 1000, egid 1000
我不明白为什么子进程的功能在从父进程打印时显示为全部启用。我本来希望在外部命名空间中看不到任何特权,我错了吗?显然二进制 testcap3 没有特权(文件上既没有设置 setuid/setgid 位也没有设置功能,并且有效用户不是管理员) 功能是如何存储的?数据结构如何与命名空间相关?