//To map the uid/gid, // we need edit the /proc/PID/uid_map (or /proc/PID/gid_map) in parent //The file format is // ID-inside-ns ID-outside-ns length //if no mapping, // the uid will be taken from /proc/sys/kernel/overflowuid // the gid will be taken from /proc/sys/kernel/overflowgid set_uid_map(container_pid, 0, uid, 1); set_gid_map(container_pid, 0, gid, 1);
functionsetup(){ echo"1/10: 创建名为 '${namespace}' 的网络命名空间" ip netns add ${namespace}
echo"2/10: 创建一对 'veth' 类型的网卡设备,一个网卡为 '${ifname_outside_ns}',另一个网卡为 '${ifname_inside_ns}'" ip link add ${ifname_outside_ns}type veth peer name ${ifname_inside_ns}
echo"3/10: 配置网卡 '${ifname_outside_ns}' 的IP地址 '${ip_outside_ns}'" ifconfig ${ifname_outside_ns}${ip_outside_ns} netmask ${ip_netmask} up
echo"4/10: 将网卡 '${ifname_inside_ns}' 加入网络命名空间 '${namespace}' 中" ip linkset${ifname_inside_ns} netns ${namespace}
echo"5/10: 将在网络命名空间 '${namespace}' 中的网卡 '${ifname_inside_ns}' 的IP地址设置为 '${ip_inside_ns}',它需要和网卡 '${ifname_outside_ns}' 的IP地址在同一个网段上" ip netns exec${namespace} ifconfig ${ifname_inside_ns}${ip_inside_ns} netmask ${ip_netmask} up
echo"2/13: 配置网桥 '${bridge_name}' 的IP '${ip_outside_ns}'" ifconfig ${bridge_name}${ip_outside_ns} netmask ${ip_netmask} up
echo"3/13: 创建名为 '${namespace}' 的网络命名空间" ip netns add ${namespace}
echo"4/13: 创建一对 'veth' 类型的网卡设备,一个网卡为 '${ifname_outside_ns}',另一个网卡为 '${ifname_inside_ns}'" ip link add ${ifname_outside_ns}type veth peer name ${ifname_inside_ns}
echo"5/13: 开启网卡 '${ifname_outside_ns}'" ip linkset${ifname_outside_ns} up
# docker run 启动一个容器 # -d:退到容器之外,并打印container id # alpine:3.10.2:镜像 # true:容器执行的命令 CID=$(docker run -d alpine:3.10.2 true) echo$CID #-------------------------↓↓↓↓↓↓------------------------- f9d2df08221a67653fe6af9f99dbb2367a6736aecbba8c5403bf3dbb68310f2a #-------------------------↑↑↑↑↑↑-------------------------
# 将容器对应的镜像导出到 images/alpine/ 目录中 docker export$CID | tar -C images/alpine/ -xf- ls images/alpine/ #-------------------------↓↓↓↓↓↓------------------------- bin dev etc home lib media mnt opt proc root run sbin srv sys tmp usr var #-------------------------↑↑↑↑↑↑-------------------------
# 以 images/alpine/ 为源,制作快照 containers/tupperware btrfs subvol snapshot images/alpine/ containers/tupperware #-------------------------↓↓↓↓↓↓------------------------- Create a snapshot of 'images/alpine/'in'containers/tupperware' #-------------------------↑↑↑↑↑↑-------------------------
# 在快照路径中ls,发现存在文件 NICK_WAS_HERE ls containers/tupperware/ #-------------------------↓↓↓↓↓↓------------------------- bin dev etc home lib media mnt NICK_WAS_HERE opt proc root run sbin srv sys tmp usr var #-------------------------↑↑↑↑↑↑-------------------------
# 在源路径中ls,发现不存在文件 NICK_WAS_HERE ls images/alpine/ #-------------------------↓↓↓↓↓↓------------------------- bin dev etc home lib media mnt opt proc root run sbin srv sys tmp usr var #-------------------------↑↑↑↑↑↑-------------------------
ls #-------------------------↓↓↓↓↓↓------------------------- NICK_WAS_HERE dev home media opt root sbin sys usr bin etc lib mnt proc run srv tmp var #-------------------------↑↑↑↑↑↑-------------------------
# 接下来,利用 mount 命令以及 pivot_root 命令把 /btrfs/containers/tupperware 作为文件系统的根目录 # 创建目录oldroot,之后会将当前的根文件系统挂载到 oldroot 上 mkdir /btrfs/containers/tupperware/oldroot # mount --bind 将目录挂载到目录上 mount --bind /btrfs/containers/tupperware /btrfs
# 进入到 /btrfs 目录中,看下是否已将容器挂载到该目录下 cd /btrfs/ ls #-------------------------↓↓↓↓↓↓------------------------- bin dev etc home lib media mnt NICK_WAS_HERE oldroot opt proc root run sbin srv sys tmp usr var #-------------------------↑↑↑↑↑↑-------------------------
# 查看当前的根目录,其实就是容器 ls #-------------------------↓↓↓↓↓↓------------------------- NICK_WAS_HERE dev home media oldroot proc run srv tmp var bin etc lib mnt opt root sbin sys usr #-------------------------↑↑↑↑↑↑-------------------------
# 查看原始的根目录,之前的根文件系统挂载到了 oldroot ls oldroot/ #-------------------------↓↓↓↓↓↓------------------------- bin boot btrfs dev disk.img etc home lib lib64 media mnt opt proc root run sbin srv sys tmp usr var #-------------------------↑↑↑↑↑↑-------------------------
# 必须先挂载proc,因为mount命令依赖proc mount -t proc nodev /proc
# 查看当前挂载点,可以发现存在非常多的挂载点,这些挂载点不应该对容器可见 mount | head #-------------------------↓↓↓↓↓↓------------------------- /dev/mapper/centos-root on /oldroot type xfs (rw,seclabel,relatime,attr2,inode64,noquota) devtmpfs on /oldroot/dev type devtmpfs (rw,seclabel,nosuid,size=929308k,nr_inodes=232327,mode=755) tmpfs on /oldroot/dev/shm type tmpfs (rw,seclabel,nosuid,nodev) devpts on /oldroot/dev/pts type devpts (rw,seclabel,nosuid,noexec,relatime,gid=5,mode=620,ptmxmode=000) hugetlbfs on /oldroot/dev/hugepages type hugetlbfs (rw,seclabel,relatime) mqueue on /oldroot/dev/mqueue type mqueue (rw,seclabel,relatime) proc on /oldroot/proc type proc (rw,nosuid,nodev,noexec,relatime) systemd-1 on /oldroot/proc/sys/fs/binfmt_misc type autofs (rw,relatime,fd=34,pgrp=0,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=8897) sysfs on /oldroot/sys type sysfs (rw,seclabel,nosuid,nodev,noexec,relatime) #-------------------------↑↑↑↑↑↑-------------------------
# 继续挂载proc,因为mount依赖proc mount -t proc nodev /proc
# 在上一步 umount -a 中,由于/oldroot未被卸载掉,因此这里仍然可以看到 mount #-------------------------↓↓↓↓↓↓------------------------- /dev/mapper/centos-root on /oldroot type xfs (rw,seclabel,relatime,attr2,inode64,noquota) /dev/loop0 on /oldroot/btrfs type btrfs (ro,seclabel,relatime,space_cache,subvolid=5,subvol=/) /dev/loop0 on / type btrfs (ro,seclabel,relatime,space_cache,subvolid=258,subvol=/containers/tupperware) proc on /proc type proc (rw,relatime) #-------------------------↑↑↑↑↑↑-------------------------
# 至此,挂载点整理完毕 mount #-------------------------↓↓↓↓↓↓------------------------- rootfs on / type rootfs (rw) /dev/loop0 on / type btrfs (ro,seclabel,relatime,space_cache,subvolid=258,subvol=/containers/tupperware) proc on /proc type proc (rw,relatime) #-------------------------↑↑↑↑↑↑-------------------------
lo Link encap:Local Loopback inet addr:127.0.0.1 Mask:255.0.0.0 inet6 addr: ::1/128 Scope:Host UP LOOPBACK RUNNING MTU:65536 Metric:1 RX packets:0 errors:0 dropped:0 overruns:0 frame:0 TX packets:0 errors:0 dropped:0 overruns:0 carrier:0 collisions:0 txqueuelen:1000 RX bytes:0 (0.0 B) TX bytes:0 (0.0 B) #-------------------------↑↑↑↑↑↑-------------------------
ping -c 3 8.8.8.8 #-------------------------↓↓↓↓↓↓------------------------- PING 8.8.8.8 (8.8.8.8): 56 data bytes 64 bytes from 8.8.8.8: seq=0 ttl=49 time=30.481 ms 64 bytes from 8.8.8.8: seq=1 ttl=49 time=31.170 ms 64 bytes from 8.8.8.8: seq=2 ttl=49 time=30.111 ms
--- 8.8.8.8 ping statistics --- 3 packets transmitted, 3 packets received, 0% packet loss round-trip min/avg/max = 30.111/30.587/31.170 ms #-------------------------↑↑↑↑↑↑-------------------------
#ifdef CONFIG_CGROUPS /* Control Group info protected by css_set_lock */ structcss_set __rcu *cgroups; /* cg_list protected by css_set_lock and tsk->alloc_lock */ structlist_headcg_list; #endif
/* ... 省略无关定义 */ };
structcss_set {
/* Reference count */ atomic_t refcount;
/* * List running through all cgroup groups in the same hash * slot. Protected by css_set_lock */ structhlist_nodehlist;
/* * List running through all tasks using this cgroup * group. Protected by css_set_lock */ structlist_headtasks;
/* * List of cg_cgroup_link objects on link chains from * cgroups referenced from this css_set. Protected by * css_set_lock */ structlist_headcg_links;
/* * Set of subsystem states, one for each subsystem. This array * is immutable after creation apart from the init_css_set * during subsystem registration (at boot time) and modular subsystem * loading/unloading. */ structcgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
/* For RCU-protected deletion */ structrcu_headrcu_head; };
structcgroup_subsys_state { /* * The cgroup that this subsystem is attached to. Useful * for subsystems that want to know about the cgroup * hierarchy structure */ structcgroup *cgroup;
/* * State maintained by the cgroup system to allow subsystems * to be "busy". Should be accessed via css_get(), * css_tryget() and css_put(). */
atomic_t refcnt;
unsignedlong flags; /* ID for this css, if possible */ structcss_id __rcu *id;
/* Used to put @cgroup->dentry on the last css_put() */ structwork_structdput_work; };
structcgroup { unsignedlong flags; /* "unsigned long" so bitops work */
/* * count users of this cgroup. >0 means busy, but doesn't * necessarily indicate the number of tasks in the cgroup */ atomic_t count;
int id; /* ida allocated in-hierarchy ID */
/* * We link our 'sibling' struct into our parent's 'children'. * Our children link their 'sibling' into our 'children'. */ structlist_headsibling;/* my parent's children */ structlist_headchildren;/* my children */ structlist_headfiles;/* my files */
/* * This is a copy of dentry->d_name, and it's needed because * we can't use dentry->d_name in cgroup_path(). * * You must acquire rcu_read_lock() to access cgrp->name, and * the only place that can change it is rename(), which is * protected by parent dir's i_mutex. * * Normally you should use cgroup_name() wrapper rather than * access it directly. */ structcgroup_name __rcu *name;
/* Private pointers for each registered subsystem */ structcgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
structcgroupfs_root *root;
/* * List of cg_cgroup_links pointing at css_sets with * tasks in this cgroup. Protected by css_set_lock */ structlist_headcss_sets;
structlist_headallcg_node;/* cgroupfs_root->allcg_list */ structlist_headcft_q_node;/* used during cftype add/rm */
/* * Linked list running through all cgroups that can * potentially be reaped by the release agent. Protected by * release_list_lock */ structlist_headrelease_list;
/* * list of pidlists, up to two for each namespace (one for procs, one * for tasks); created on demand. */ structlist_headpidlists; structmutexpidlist_mutex;
/* For RCU-protected deletion */ structrcu_headrcu_head; structwork_structfree_work;
/* List of events which userspace want to receive */ structlist_headevent_list; spinlock_t event_list_lock;