[klibc] ramfs to tmpfs

Luciano Miguel Ferreira Rocha strange at nsk.no-ip.org
Thu Sep 28 11:27:38 PDT 2006


Hello,

I was using a bunch of cpios in initramfs as a working system, and
wondering why the unused files weren't being paged out to swap.

So I reread ramfs-rootfs-initramfs.txt and now I know.

So I wrote the attached utility. It creates a tmpfs, moves all files
on the initramfs, moves / and executes the real init.

It works, even with hardlinks, but it isn't the correct approach. Have
anyone implemented a small init, in klibc, that decompresses a bunch of
cpios to a tmpfs?

Regards,
Luciano Rocha
-- 
lfr
0/0
-------------- next part --------------
/* ----------------------------------------------------------------------- *
 *
 *   Copyright 2004-2006 H. Peter Anvin - All Rights Reserved
 *   Copyright 2006 Luciano Rocha - All Rights Reserved
 *
 *   Permission is hereby granted, free of charge, to any person
 *   obtaining a copy of this software and associated documentation
 *   files (the "Software"), to deal in the Software without
 *   restriction, including without limitation the rights to use,
 *   copy, modify, merge, publish, distribute, sublicense, and/or
 *   sell copies of the Software, and to permit persons to whom
 *   the Software is furnished to do so, subject to the following
 *   conditions:
 *
 *   The above copyright notice and this permission notice shall
 *   be included in all copies or substantial portions of the Software.
 *
 *   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 *   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 *   OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 *   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 *   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 *   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 *   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 *   OTHER DEALINGS IN THE SOFTWARE.
 *
 * ----------------------------------------------------------------------- */

/*
 * rtfs.c -- ramfs to tmpfs
 *
 * Usage: install as /init
 *
 * This program should be installed as /init on an initramfs;
 * it does the following:
 *
 * - Mounts a tmpfs on an empty /newroot*
 * - Moves all files on the ramfs at / to the tmpfs
 * - Remounts /newroot onto the root filesystem;
 * - Chroots;
 * - Opens /dev/console;
 * - Spawns the init program (with arguments.)
 */

#include <alloca.h>
#include <assert.h>
#include <ctype.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/vfs.h>
#include <unistd.h>
#include <utime.h>

/* Make it possible to compile on glibc by including constants that the
   always-behind shipped glibc headers may not include.  Classic example
   on why the lack of ABI headers screw us up. */
#ifndef TMPFS_MAGIC
# define TMPFS_MAGIC	0x01021994
#endif
#ifndef RAMFS_MAGIC
# define RAMFS_MAGIC	0x858458f6
#endif
#ifndef MS_MOVE
# define MS_MOVE	8192
#endif

#define NEWROOT "/newroot"
#define CMDLINE "/proc/cmdline"
#define MEMINFO "/proc/meminfo"

static const char *program;	// argv[0]
static char newroot[512];	// where newroot will be
static char **initargs;		// argv
static struct stat root_st;	// stat of /
static char meminfo[1024];	// contents of /proc/meminfo, after getprocinfo
static char cmdline[2048];	// contents of /proc/cmdline, after getprocinfo

static const char *getprocinfo(void);

/* error functions:
 * error(fmt, ...) print "program: msg" and exits
 * serror(fmt, ...) print "program: msg: error\n" and exits
 */
#define error(...) my_error(0, __VA_ARGS__)
#define serror(...) my_error(1, __VA_ARGS__)

static void __attribute__ ((noreturn,format(printf, 2, 3)))
	my_error(int p, const char *fmt, ...)
{
	va_list ap;
	int err = errno;

	va_start(ap, fmt);
	fprintf(stderr, "%s: ", program);
	vfprintf(stderr, fmt, ap);
	if (p) fprintf(stderr, ": %s\n", strerror(err));
	va_end(ap);
	exit(1);
}

/* change stdin, stdout and stderr to /dev/console */
static void openconsole(void)
{
	int fd;

	if ((fd = open("/dev/console", O_RDWR)) >= 0) {
		dup2(fd, 0);
		dup2(fd, 1);
		dup2(fd, 2);
		if (fd > 2) close(fd);
	}
}

/* if init specified in cmdline, execute it, else trye from the usual
 * places. Die if no init found */
static void __attribute__ ((noreturn)) exec_init(void)
{
	int i;
	char *s;
	char *definit[] = {
		"/sbin/init",
		"/bin/init",
		"/etc/init",
		"/bin/sh",
		NULL
	};

	openconsole();

	if ((s = strstr(cmdline, "init="))) {
		char *p;

		/* find last init= */
		while ((p = strstr(s+1, "init=")))
			s = p;
		s += 5;

		for (p = s; *p > ' '; ++p);
		*p = '\0';
		initargs[0] = s;
		execv(initargs[0], initargs);
		serror("executing init '%s' (from cmdline) failed", s);
	}
	
	for (i = 0; definit[i]; i++) {
		initargs[0] = definit[i];
		execv(initargs[0], initargs);
	}
	error("init not found\n");
}

/* check running environment, to avoid destroying already running system */
static void checkenv(void)
{
	struct statfs sfs;

	/* must be run as init */
	if (getpid() != 1)
		error("must be run as init\n");

	openconsole();

	/* check / */
	if (statfs("/", &sfs))
		serror("statfs /");

	/* no work needed if on tmpfs */
	if (sfs.f_type == TMPFS_MAGIC) {
		getprocinfo();
		exec_init();
	}

	/* Make sure we're on a ramfs */
	if (sfs.f_type != RAMFS_MAGIC)
		error("rootfs not a ramfs\n");

	if (lstat("/", &root_st))
		serror("stat /");
}

/* check if empty is dir. assumes an existing dir always includes '.' and
 * '..' */
static int isdirempty(const char *p)
{
	DIR *d;
	struct dirent *de;
	int dc;

	if ((d = opendir(p)) == NULL)
		return (errno == ENOENT);

	dc = 0;
	while ((de = readdir(d)))
		dc++;
	closedir(d);
	return (dc == 2);
}

/* get data from /proc/cmdline and /proc/meminfo */
static const char *getprocinfo(void)
{
	int fd, i, j;

	if ((fd = open(CMDLINE, O_RDONLY)) < 0 && errno == ENOENT) {
		mkdir("/proc", 0555);
		if (mount("none", "/proc", "proc", 0, NULL) < 0)
			return "mounting /proc";
		fd = open(CMDLINE, O_RDONLY);
	}
	if (fd < 0)
		return "opening " CMDLINE;
	j = 0;
	while ((i = read(fd, cmdline+j, sizeof cmdline - j - 1)) > 0)
		j += i;

	if (i < 0)
		return "reading " CMDLINE;

	cmdline[j] = '\0';
	close(fd);

	/* meminfo is optional */
	if ((fd = open(MEMINFO, O_RDONLY)) >= 0) {
		j = 0;
		while ((i = read(fd, meminfo+j, sizeof meminfo - j - 1)) > 0)
			j += i;
		cmdline[j] = '\0';
		close(fd);
	}
	umount("/proc");
	return NULL;
}

/* linked list structure for hard links */
struct le {
	struct le *next;
	ino_t inode;
	char path[];
};

static struct le *hl;

/* if inode on list, return the destination file. otherwise, add inode and
 * current target to the list */
static char *hlfind(ino_t inode, const char *s)
{
	struct le *l;

	for (l = hl; l && inode != l->inode; l = l->next);

	/* found, return path */
	if (l) return l->path;

	/* new target */
	l = malloc(sizeof *l + strlen(s) + 1);
	l->next = hl;
	l->inode = inode;
	strcpy(l->path, s);
	hl = l;
	return NULL;
}

static void move_dir(const char *s);

/* move a directory entry. recurse into subdirectories, copy regular files,
 * and recreate special files.
 * preserve owner, access and creation times and permissions, whenever
 * possible */
static void move_ent(const char *p, const char *n)
{
	int pl = strlen(p), nl = strlen(n);
	char s[pl+nl+3];
	struct stat st;
	struct utimbuf tm;

	s[0] = '.';
	memcpy(s+1, p, pl);
	s[pl+1] = '/';
	memcpy(s+pl+2, n, nl+1);

	if (!strcmp(s+1, newroot))
		return;

	if (lstat(s+1, &st))
		serror("stating %s", s+1);

	if (st.st_dev != root_st.st_dev)
		return;

	if (S_ISDIR(st.st_mode)) {
		if (mkdir(s, 0700))
			serror("creating %s", s);
		move_dir(s+1);
		if (rmdir(s+1))
			serror("removing %s", s+1);
	} else if (S_ISREG(st.st_mode)) {
		int fdi, fdo;
		char *p;

		if (st.st_nlink > 1 && (p = hlfind(st.st_ino, s))) {
			if (link(p, s))
				serror("linking %s", s);
			if (unlink(s+1))
				serror("unlink of %s", s+1);
			return;
		}
		if ((fdi = open(s+1, O_RDONLY)) < 0)
			serror("opening %s", s+1);
		if ((fdo = open(s, O_CREAT|O_WRONLY|O_TRUNC, 0600)) < 0)
			serror("creating %s", s);
		if ((p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fdi, 0))
				== MAP_FAILED) {
			char b[2048];
			int i;

			while ((i = read(fdi, b, sizeof b)) > 0) {
				if (write(fdo, b, i) != i)
					error("short write on %s\n", s);
			}
			if (i < 0)
				error("reading %s\n", s+1);
		} else {
			if (write(fdo, p, st.st_size) != st.st_size)
				error("short write on %s\n", s);
			munmap(p, st.st_size);
		}
		close(fdi);
		close(fdo);
	} else if (S_ISLNK(st.st_mode)) {
		char target[2048];
		int i;

		if ((i = readlink(s+1, target, sizeof target-1)) < 0)
			serror("reading link %s", s+1);
		target[i] = '\0';
		if (symlink(target, s))
			serror("creating link %s", s);
	} else if (mknod(s, st.st_mode, st.st_rdev))
		serror("mknod of %s", s);

	if (lchown(s, st.st_uid, st.st_gid))
		serror("chown of %s", s);
	if (!S_ISLNK(st.st_mode)) {
		tm.actime = st.st_atime;
		tm.modtime = st.st_mtime;
	       	if (utime(s, &tm))
			serror("settime of %s", s);
		if (chmod(s, st.st_mode & ~S_IFMT))
			serror("chmod of %s", s);
	}
	if (!S_ISDIR(st.st_mode) && unlink(s+1))
		serror("unlink of %s", s+1);
}

/* call move_ent for each entry in directory */
static void move_dir(const char *s)
{
	DIR *d;
	struct dirent *de;

	if (!(d = opendir(s)))
		serror("listing %s", s);
	while ((de = readdir(d))) {
		if (de->d_name[0] == '.'
			&& (de->d_name[1] == '\0' || (de->d_name[1] == '.'
			&& de->d_name[2] == '\0')))
			continue;
		move_ent(s, de->d_name);
	}
	closedir(d);
}

/* parse data from MEMINFO and return string in the format
 * size=totalmemk or NULL on error */
const char *getmemsize(void)
{
	int i, j;

	/* Assuming first line of meminfo is MemTotal, find first digit */
	for (i = 0; meminfo[i] && !isdigit(meminfo[i]); i++);

	/* space needed for 'size=', return NULL otherwise */
	if (i < 5 || !meminfo[i]) return NULL;

	/* add 'k' after last digit and terminate string */
	for (j = i+1; isdigit(meminfo[j]); j++);
	meminfo[j] = 'k';
	meminfo[j+1] = '\0';

	/* now add 'size=' */
	i -= 5;
	memcpy(meminfo+i, "size=", 5);

	/* return from 'size=' */
	return meminfo+i;
}

int main(int argc, char *argv[])
{
	int nrc = 0;
	const char *e;

	program = argv[0];
	initargs = argv;

	umask(0);

	checkenv();

	if ((e = getprocinfo()))
		serror(e);

	/* create a tmpfs */
	strcpy(newroot, NEWROOT);
	while (!isdirempty(newroot)) {
		snprintf(newroot, sizeof newroot, "%s%d\n", NEWROOT, nrc++);
	}
	if (mkdir(newroot, 0755) && errno != EEXIST)
		serror("creating new root %s", newroot);
	if (mount("none", newroot, "tmpfs", 0, getmemsize()) < 0)
		serror("mounting tmpfs");
	if (chdir(newroot))
		serror("entering new root");

	move_dir("/");

	if (mount(".", "/", NULL, MS_MOVE, NULL))
		serror("overmounting root");
	if (chroot(".") || chdir("/"))
		serror("chroot");

	/* remove copy of this program */
	unlink("/init");
	exec_init();
	return 1;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
Url : http://www.zytor.com/pipermail/klibc/attachments/20060928/a299f453/attachment.bin 


More information about the klibc mailing list