diff -urNP sys.orig/alpha/alpha/pmap.c sys/alpha/alpha/pmap.c --- sys.orig/alpha/alpha/pmap.c Wed Apr 12 07:22:50 2006 +++ sys/alpha/alpha/pmap.c Sun May 7 18:06:35 2006 @@ -2625,7 +2625,7 @@ int index; sx_slock(&allproc_lock); - LIST_FOREACH(p, &allproc, p_list) { + FOREACH_PROC_IN_SYSTEM(p) { if (p->p_pid != pid) continue; diff -urNP sys.orig/alpha/osf1/osf1_misc.c sys/alpha/osf1/osf1_misc.c --- sys.orig/alpha/osf1/osf1_misc.c Tue Feb 7 00:06:52 2006 +++ sys/alpha/osf1/osf1_misc.c Sat May 13 21:55:24 2006 @@ -894,11 +894,15 @@ uid_t uid; struct uidinfo *uip; struct ucred *newcred, *oldcred; + struct prison *jail; p = td->td_proc; uid = uap->uid; newcred = crget(); - uip = uifind(uid); + PROC_LOCK(p); + jail = p->p_ucred->cr_prison; + PROC_UNLOCK(p); + uip = uifind(jail, uid); PROC_LOCK(p); oldcred = p->p_ucred; diff -urNP sys.orig/compat/linux/linux_mib.c sys/compat/linux/linux_mib.c --- sys.orig/compat/linux/linux_mib.c Sun Mar 19 13:10:33 2006 +++ sys/compat/linux/linux_mib.c Sun May 7 18:06:35 2006 @@ -137,7 +137,7 @@ return (NULL); pr = td->td_ucred->cr_prison; mtx_lock(&pr->pr_mtx); - if (pr->pr_linux == NULL) { + if (jailed(td->td_ucred)) { /* * If we don't have a linux prison structure yet, allocate * one. We have to handle the race where another thread @@ -162,7 +162,7 @@ register struct linux_prison *lpr; pr = td->td_ucred->cr_prison; - if (pr != NULL) { + if (jailed(td->td_ucred)) { mtx_lock(&pr->pr_mtx); if (pr->pr_linux != NULL) { lpr = (struct linux_prison *)pr->pr_linux; @@ -207,7 +207,7 @@ struct linux_prison *lpr; pr = td->td_ucred->cr_prison; - if (pr != NULL) { + if (jailed(td->td_ucred)) { mtx_lock(&pr->pr_mtx); if (pr->pr_linux != NULL) { lpr = (struct linux_prison *)pr->pr_linux; @@ -254,7 +254,7 @@ int version; pr = td->td_ucred->cr_prison; - if (pr != NULL) { + if (jailed(td->td_ucred)) { mtx_lock(&pr->pr_mtx); if (pr->pr_linux != NULL) { lpr = (struct linux_prison *)pr->pr_linux; diff -urNP sys.orig/conf/files sys/conf/files --- sys.orig/conf/files Wed Apr 19 22:38:35 2006 +++ sys/conf/files Sun May 7 18:06:35 2006 @@ -1271,6 +1271,10 @@ kern/kern_idle.c standard kern/kern_intr.c standard kern/kern_jail.c standard +kern/kern_jail2_common.c standard +kern/kern_jail2_network.c standard +kern/kern_jail2_resource.c standard +kern/kern_jail2_disk.c standard kern/kern_kse.c standard kern/kern_kthread.c standard kern/kern_ktr.c optional ktr diff -urNP sys.orig/ddb/db_command.c sys/ddb/db_command.c --- sys.orig/ddb/db_command.c Fri Apr 21 23:39:51 2006 +++ sys/ddb/db_command.c Sun May 7 18:06:35 2006 @@ -564,7 +564,7 @@ * since we're in DDB. */ /* sx_slock(&allproc_lock); */ - LIST_FOREACH(p, &allproc, p_list) + FOREACH_PROC_IN_SYSTEM(p) if (p->p_pid == pid) break; /* sx_sunlock(&allproc_lock); */ diff -urNP sys.orig/i386/i386/pmap.c sys/i386/i386/pmap.c --- sys.orig/i386/i386/pmap.c Wed Apr 12 07:22:51 2006 +++ sys/i386/i386/pmap.c Sun May 7 18:06:35 2006 @@ -3041,7 +3041,7 @@ int index; sx_slock(&allproc_lock); - LIST_FOREACH(p, &allproc, p_list) { + FOREACH_PROC_IN_SYSTEM(p) { if (p->p_pid != pid) continue; diff -urNP sys.orig/i386/ibcs2/ibcs2_sysvec.c sys/i386/ibcs2/ibcs2_sysvec.c --- sys.orig/i386/ibcs2/ibcs2_sysvec.c Tue Jan 3 22:39:38 2006 +++ sys/i386/ibcs2/ibcs2_sysvec.c Sun May 7 18:06:35 2006 @@ -110,7 +110,7 @@ case MOD_UNLOAD: /* if this was an ELF module we'd use elf_brand_inuse()... */ sx_slock(&allproc_lock); - LIST_FOREACH(p, &allproc, p_list) { + FOREACH_PROC_IN_SYSTEM(p) { if (p->p_sysent == &ibcs2_svr3_sysvec) { rval = EBUSY; break; diff -urNP sys.orig/kern/imgact_elf.c sys/kern/imgact_elf.c --- sys.orig/kern/imgact_elf.c Sat Jan 21 22:11:49 2006 +++ sys/kern/imgact_elf.c Sun May 7 18:06:35 2006 @@ -145,7 +145,7 @@ int rval = FALSE; sx_slock(&allproc_lock); - LIST_FOREACH(p, &allproc, p_list) { + FOREACH_PROC_IN_SYSTEM(p) { if (p->p_sysent == entry->sysvec) { rval = TRUE; break; diff -urNP sys.orig/kern/init_main.c sys/kern/init_main.c --- sys.orig/kern/init_main.c Tue Feb 7 23:22:01 2006 +++ sys/kern/init_main.c Sat May 13 22:18:41 2006 @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -391,9 +392,8 @@ /* Create credentials. */ p->p_ucred = crget(); p->p_ucred->cr_ngroups = 1; /* group 0 */ - p->p_ucred->cr_uidinfo = uifind(0); - p->p_ucred->cr_ruidinfo = uifind(0); - p->p_ucred->cr_prison = NULL; /* Don't jail it. */ + p->p_ucred->cr_uidinfo = uifind(&jail_0, 0); + p->p_ucred->cr_ruidinfo = uifind(&jail_0, 0); #ifdef AUDIT audit_proc_alloc(p); audit_proc_kproc0(p); @@ -457,7 +457,7 @@ * time from the filesystem. Pretend that proc0 started now. */ sx_slock(&allproc_lock); - LIST_FOREACH(p, &allproc, p_list) { + FOREACH_PROC_IN_SYSTEM(p) { microuptime(&p->p_stats->p_start); p->p_rux.rux_runtime = 0; } diff -urNP sys.orig/kern/kern_descrip.c sys/kern/kern_descrip.c --- sys.orig/kern/kern_descrip.c Sat Apr 1 14:09:54 2006 +++ sys/kern/kern_descrip.c Sun May 7 18:06:35 2006 @@ -2451,7 +2451,7 @@ bzero(&xf, sizeof(xf)); xf.xf_size = sizeof(xf); sx_slock(&allproc_lock); - LIST_FOREACH(p, &allproc, p_list) { + FOREACH_PROC_IN_SYSTEM(p) { if (p->p_state == PRS_NEW) continue; PROC_LOCK(p); diff -urNP sys.orig/kern/kern_exec.c sys/kern/kern_exec.c --- sys.orig/kern/kern_exec.c Tue Apr 4 00:16:08 2006 +++ sys/kern/kern_exec.c Sat May 13 22:06:19 2006 @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -305,6 +306,7 @@ #ifdef HWPMC_HOOKS struct pmckern_procexec pe; #endif + struct prison *jail; vfslocked = 0; imgp = &image_params; @@ -478,7 +480,10 @@ * Malloc things before we need locks. */ newcred = crget(); - euip = uifind(attr.va_uid); + PROC_LOCK(p); + jail = p->p_ucred->cr_prison; + PROC_UNLOCK(p); + euip = uifind(jail, attr.va_uid); i = imgp->args->begin_envv - imgp->args->begin_argv; /* Cache arguments if they fit inside our allowance */ if (ps_arg_cache_limit >= i + sizeof(struct pargs)) { @@ -540,6 +545,7 @@ * transitions on the file system. */ oldcred = p->p_ucred; + credential_changing = 0; credential_changing |= (attr.va_mode & VSUID) && oldcred->cr_uid != attr.va_uid; diff -urNP sys.orig/kern/kern_exit.c sys/kern/kern_exit.c --- sys.orig/kern/kern_exit.c Mon Apr 10 17:07:28 2006 +++ sys/kern/kern_exit.c Sun May 7 18:06:35 2006 @@ -71,6 +71,7 @@ #ifdef KTRACE #include #endif +#include #include @@ -437,6 +438,7 @@ */ sx_xlock(&allproc_lock); LIST_REMOVE(p, p_list); + jail_task_del(&p->p_ucred->cr_prison->pr_tasks, p); LIST_INSERT_HEAD(&zombproc, p, p_list); LIST_REMOVE(p, p_hash); sx_xunlock(&allproc_lock); diff -urNP sys.orig/kern/kern_fork.c sys/kern/kern_fork.c --- sys.orig/kern/kern_fork.c Thu Mar 16 01:24:14 2006 +++ sys/kern/kern_fork.c Sun May 7 18:06:35 2006 @@ -65,6 +65,7 @@ #include #include #include +#include #include @@ -207,6 +208,7 @@ struct thread *td2; struct ksegrp *kg2; struct sigacts *newsigacts; + struct prison *prison; int error; /* Can't copy and clear. */ @@ -309,6 +311,15 @@ */ sx_xlock(&allproc_lock); uid = td->td_ucred->cr_ruid; + /* first check per jail limit */ + prison = td->td_ucred->cr_prison; + if (!JAIL_CAN_FORK(prison)) { + printf("Over jail proclimit - %d %d:%d\n", JAIL_STARTED(prison), + JAIL_NPROCS(prison), JAIL_MAXPROC(prison)); + error = EAGAIN; + goto fail; + } + if ((nprocs >= maxproc - 10 && suser_cred(td->td_ucred, SUSER_RUID) != 0) || nprocs >= maxproc) { @@ -420,6 +431,7 @@ p2->p_pid = trypid; AUDIT_ARG(pid, p2->p_pid); LIST_INSERT_HEAD(&allproc, p2, p_list); + jail_task_add(&p1->p_ucred->cr_prison->pr_tasks, p2); LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); sx_xunlock(&allproc_lock); diff -urNP sys.orig/kern/kern_jail.c sys/kern/kern_jail.c --- sys.orig/kern/kern_jail.c Wed Sep 28 03:30:56 2005 +++ sys/kern/kern_jail.c Sun May 14 22:19:04 2006 @@ -13,7 +13,6 @@ #include "opt_mac.h" #include -#include #include #include #include @@ -23,11 +22,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -75,22 +76,38 @@ struct prisonlist allprison; struct mtx allprison_mtx; int lastprid = 0; -int prisoncount = 0; +int prisoncount = 1; /* root jail */ + +struct prison jail_0 = { + .pr_id = 0, + .pr_flags = J_START_FL | J_SYSVIPC_ALLOW_FL, + .pr_linux = NULL, + .pr_disk = JAIL_DISKS_INIT, + .pr_tasks = JAIL_TASKS_INIT +}; static void init_prison(void *); static void prison_complete(void *context, int pending); -static struct prison *prison_find(int); static int sysctl_jail_list(SYSCTL_HANDLER_ARGS); +#define jprint(a...) +/* printf(a) */ + static void init_prison(void *data __unused) { + jprint("Init prison - jail0 - %p\n", &jail_0); + refcount_init(&jail_0.pr_refcnt,2); /* 1 hold + 1 for process 0 */ mtx_init(&allprison_mtx, "allprison", NULL, MTX_DEF); LIST_INIT(&allprison); + /* init jail0 */ + mtx_init(&jail_0.pr_mtx, "jail mutex", NULL, MTX_DEF); + LIST_INSERT_HEAD(&allprison, &jail_0, pr_list); } -SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL); +/* me at init locks time*/ +SYSINIT(prison, SI_SUB_LOCK, SI_ORDER_FIRST, init_prison, NULL); /* * MPSAFE @@ -99,41 +116,39 @@ * struct jail *jail; * }; */ -int -jail(struct thread *td, struct jail_args *uap) +static int +jail_1(struct thread *td, struct jail_args *uap) { struct nameidata nd; struct prison *pr, *tpr; - struct jail j; + struct jail_1 j; struct jail_attach_args jaa; int vfslocked, error, tryprid; error = copyin(uap->jail, &j, sizeof(j)); if (error) return (error); - if (j.version != 0) - return (EINVAL); MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF); - pr->pr_ref = 1; - error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0); + refcount_init(&pr->pr_refcnt,1); + error = copyinstr(j.path, &pr->pr_disk.pr_path, sizeof(pr->pr_disk.pr_path), 0); if (error) goto e_killmtx; NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, - pr->pr_path, td); + pr->pr_disk.pr_path, td); error = namei(&nd); if (error) goto e_killmtx; vfslocked = NDHASGIANT(&nd); - pr->pr_root = nd.ni_vp; + JAIL_VROOT(pr) = nd.ni_vp; VOP_UNLOCK(nd.ni_vp, 0, td); NDFREE(&nd, NDF_ONLY_PNBUF); VFS_UNLOCK_GIANT(vfslocked); error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0); if (error) goto e_dropvnref; - pr->pr_ip = j.ip_number; + pr->pr_network.pr_ip = htonl(j.ip_number); pr->pr_linux = NULL; pr->pr_securelevel = securelevel; @@ -154,17 +169,17 @@ goto next; } } + pr->pr_id = jaa.jid = lastprid = tryprid; LIST_INSERT_HEAD(&allprison, pr, pr_list); prisoncount++; mtx_unlock(&allprison_mtx); - + pr->pr_flags |= J_START_FL; error = jail_attach(td, &jaa); if (error) goto e_dropprref; - mtx_lock(&pr->pr_mtx); - pr->pr_ref--; - mtx_unlock(&pr->pr_mtx); + + prison_free(pr); td->td_retval[0] = jaa.jid; return (0); e_dropprref: @@ -173,8 +188,8 @@ prisoncount--; mtx_unlock(&allprison_mtx); e_dropvnref: - vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); - vrele(pr->pr_root); + vfslocked = VFS_LOCK_GIANT(JAIL_VROOT(pr)->v_mount); + vrele(JAIL_VROOT(pr)); VFS_UNLOCK_GIANT(vfslocked); e_killmtx: mtx_destroy(&pr->pr_mtx); @@ -182,6 +197,105 @@ return (error); } +static int +jail_2(struct thread *td, struct jail_args *uap) +{ + struct jail_2 j; + int error; + + error = copyin(uap->jail, &j, sizeof(j)); + if (error) + return (error); + + switch( JAIL2_CLASS(j.cmd) ) { + case J_COMMON: + error = jail2_common(td, &j); + break; + case J_NETWORK: + error = jail2_network(td, &j); + break; + case J_RESOURCE: + error = jail2_resource(td, &j); + break; + default: + printf("wrong jail2 cmd class %d\n", JAIL2_CLASS(j.cmd) ); + error = EOPNOTSUPP; + break; + } + + td->td_retval[0] = error; + return (error); +} + +int +jail(struct thread *td, struct jail_args *uap) +{ + uint32_t ver; + int error; + + error = copyin(uap->jail, &ver, sizeof(ver)); + if (error) + return (error); + + switch( ver ) { + case 0: + case 1: + return jail_1(td, uap); + case 2: + return jail_2(td, uap); + default: + return (EOPNOTSUPP); + } +} + +int +jail_migrate(struct thread *td, struct prison *pr) +{ + int error = 0; + struct proc *p; + struct ucred *newcred, *oldcred; + int vfslocked; + + if( JAIL_VROOT(pr) == NULL ) + return (ENOENT); + + vfslocked = VFS_LOCK_GIANT(JAIL_VROOT(pr)->v_mount); + vn_lock(JAIL_VROOT(pr), LK_EXCLUSIVE | LK_RETRY, td); + if ((error = change_dir(JAIL_VROOT(pr), td)) != 0) + goto e_unlock; + +#ifdef MAC + if ((error = mac_check_vnode_chroot(td->td_ucred, JAIL_VROOT(pr)))) + goto e_unlock; +#endif + VOP_UNLOCK(JAIL_VROOT(pr), 0, td); + if((error = change_root(JAIL_VROOT(pr), td)) != 0) + goto e_unlock1; + VFS_UNLOCK_GIANT(vfslocked); + + if((error = kern_chdir(td, "/", UIO_SYSSPACE)) != 0) + return (error); + + p = td->td_proc; + newcred = crget(); + PROC_LOCK(p); + oldcred = p->p_ucred; + setsugid(p); + crcopy(newcred, oldcred); + newcred->cr_prison = pr; + p->p_ucred = newcred; + prison_hold(pr); + PROC_UNLOCK(p); + crfree(oldcred); + + return (0); + +e_unlock: + VOP_UNLOCK(JAIL_VROOT(pr), 0, td); +e_unlock1: + VFS_UNLOCK_GIANT(vfslocked); + return (error); +} /* * MPSAFE * @@ -192,10 +306,8 @@ int jail_attach(struct thread *td, struct jail_attach_args *uap) { - struct proc *p; - struct ucred *newcred, *oldcred; struct prison *pr; - int vfslocked, error; + int error; /* * XXX: Note that there is a slight race here if two threads @@ -209,52 +321,53 @@ if (error) return (error); - p = td->td_proc; - mtx_lock(&allprison_mtx); pr = prison_find(uap->jid); if (pr == NULL) { - mtx_unlock(&allprison_mtx); - return (EINVAL); + return (ESRCH); } - pr->pr_ref++; - mtx_unlock(&pr->pr_mtx); - mtx_unlock(&allprison_mtx); - - vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); - vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY, td); - if ((error = change_dir(pr->pr_root, td)) != 0) - goto e_unlock; -#ifdef MAC - if ((error = mac_check_vnode_chroot(td->td_ucred, pr->pr_root))) - goto e_unlock; -#endif - VOP_UNLOCK(pr->pr_root, 0, td); - change_root(pr->pr_root, td); - VFS_UNLOCK_GIANT(vfslocked); + + error = jail_migrate(td, pr); + prison_free(pr); - newcred = crget(); - PROC_LOCK(p); - oldcred = p->p_ucred; - setsugid(p); - crcopy(newcred, oldcred); - newcred->cr_prison = pr; - p->p_ucred = newcred; - PROC_UNLOCK(p); - crfree(oldcred); - return (0); -e_unlock: - VOP_UNLOCK(pr->pr_root, 0, td); - VFS_UNLOCK_GIANT(vfslocked); - mtx_lock(&pr->pr_mtx); - pr->pr_ref--; - mtx_unlock(&pr->pr_mtx); return (error); } +struct prison * +prison_alloc(uint32_t ctx_id) +{ + struct prison *pr; + struct prison *old_pr; + + MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); + pr->pr_id = ctx_id; + refcount_init(&pr->pr_refcnt,1); + mtx_lock(&allprison_mtx); + if((old_pr=prison_find(ctx_id)) != NULL) { + jprint("Already have prisons"); + goto found; + } + + LIST_INSERT_HEAD(&allprison, pr, pr_list); + prisoncount++; + mtx_unlock(&allprison_mtx); + /* init prison */ + mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF); + pr->pr_linux = NULL; + pr->pr_securelevel = securelevel; + pr_uihashinit(pr); + + return (pr); +found: + mtx_unlock(&allprison_mtx); + prison_free(old_pr); + FREE(pr, M_PRISON); + return (NULL); +} + /* * Returns a locked prison instance, or NULL on failure. */ -static struct prison * +struct prison * prison_find(int prid) { struct prison *pr; @@ -262,7 +375,7 @@ mtx_assert(&allprison_mtx, MA_OWNED); LIST_FOREACH(pr, &allprison, pr_list) { if (pr->pr_id == prid) { - mtx_lock(&pr->pr_mtx); + prison_hold(pr); return (pr); } } @@ -270,23 +383,15 @@ } void -prison_free(struct prison *pr) +_prison_free(struct prison *pr) { - mtx_lock(&allprison_mtx); - mtx_lock(&pr->pr_mtx); - pr->pr_ref--; - if (pr->pr_ref == 0) { - LIST_REMOVE(pr, pr_list); - mtx_unlock(&pr->pr_mtx); - prisoncount--; - mtx_unlock(&allprison_mtx); - - TASK_INIT(&pr->pr_task, 0, prison_complete, pr); - taskqueue_enqueue(taskqueue_thread, &pr->pr_task); - return; - } - mtx_unlock(&pr->pr_mtx); + jprint("destroy prison %d\n",pr->pr_id); + mtx_lock(&allprison_mtx); + LIST_REMOVE(pr, pr_list); + prisoncount--; + TASK_INIT(&pr->pr_task, 0, prison_complete, pr); + taskqueue_enqueue(taskqueue_thread, &pr->pr_task); mtx_unlock(&allprison_mtx); } @@ -294,13 +399,10 @@ prison_complete(void *context, int pending) { struct prison *pr; - int vfslocked; pr = (struct prison *)context; - - vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); - vrele(pr->pr_root); - VFS_UNLOCK_GIANT(vfslocked); + jail_destroyvroot(pr); + pr_uihashdestroy(pr); mtx_destroy(&pr->pr_mtx); if (pr->pr_linux != NULL) @@ -308,70 +410,53 @@ FREE(pr, M_PRISON); } -void -prison_hold(struct prison *pr) -{ - - mtx_lock(&pr->pr_mtx); - pr->pr_ref++; - mtx_unlock(&pr->pr_mtx); -} - -u_int32_t -prison_getip(struct ucred *cred) -{ - - return (cred->cr_prison->pr_ip); -} - int -prison_ip(struct ucred *cred, int flag, u_int32_t *ip) +prison_ip(struct ucred *cred, uint32_t *ip) { - u_int32_t tmp; + uint32_t tmp; - if (!jailed(cred)) - return (0); - if (flag) - tmp = *ip; - else - tmp = ntohl(*ip); - if (tmp == INADDR_ANY) { - if (flag) - *ip = cred->cr_prison->pr_ip; - else - *ip = htonl(cred->cr_prison->pr_ip); + if (!jailed(cred)) + return (0); + + tmp = *ip; + jprint("prison_ip %x - %x - %x - %x - ", + tmp, htonl(INADDR_ANY), htonl(INADDR_LOOPBACK), + cred->cr_prison->pr_network.pr_ip); + if (tmp == htonl(INADDR_ANY)) { + jprint("any\n"); + *ip = cred->cr_prison->pr_network.pr_ip; return (0); } - if (tmp == INADDR_LOOPBACK) { - if (flag) - *ip = cred->cr_prison->pr_ip; - else - *ip = htonl(cred->cr_prison->pr_ip); + if (tmp == htonl(INADDR_LOOPBACK)) { + jprint("loopback\n"); + *ip = cred->cr_prison->pr_network.pr_ip; return (0); } - if (cred->cr_prison->pr_ip != tmp) + if (cred->cr_prison->pr_network.pr_ip != tmp) { + jprint("not prison\n"); return (1); + } + jprint("prison\n"); return (0); } void -prison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip) +prison_remote_ip(struct ucred *cred, uint32_t *ip) { - u_int32_t tmp; + uint32_t tmp; if (!jailed(cred)) return; - if (flag) - tmp = *ip; - else - tmp = ntohl(*ip); - if (tmp == INADDR_LOOPBACK) { - if (flag) - *ip = cred->cr_prison->pr_ip; - else - *ip = htonl(cred->cr_prison->pr_ip); + tmp = *ip; + jprint("prison_remote_ip %x - %x - ", + tmp, htonl(INADDR_LOOPBACK)); + + if (tmp == htonl(INADDR_LOOPBACK)) { + *ip = cred->cr_prison->pr_network.pr_ip; + jprint("loopback\n"); return; } + jprint("other\n"); return; } @@ -386,10 +471,12 @@ ok = 1; else if (sai->sin_family != AF_INET) ok = 0; - else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr)) + else if (cred->cr_prison->pr_network.pr_ip != sai->sin_addr.s_addr) ok = 1; else ok = 0; + jprint("prison_if %x %x - %x\n", + cred->cr_prison->pr_network.pr_ip, sai->sin_addr.s_addr, ok); return (ok); } @@ -411,16 +498,6 @@ } /* - * Return 1 if the passed credential is in a jail, otherwise 0. - */ -int -jailed(struct ucred *cred) -{ - - return (cred->cr_prison != NULL); -} - -/* * Return the correct hostname for the passed credential. */ void @@ -452,7 +529,7 @@ if (!jailed(cred) || jail_enforce_statfs == 0) return (0); pr = cred->cr_prison; - if (pr->pr_root->v_mount == mp) + if (JAIL_VROOT(pr)->v_mount == mp) return (0); if (jail_enforce_statfs == 2) return (ENOENT); @@ -462,11 +539,11 @@ * This is ugly check, but this is the only situation when jail's * directory ends with '/'. */ - if (strcmp(pr->pr_path, "/") == 0) + if (strcmp(pr->pr_disk.pr_path, "/") == 0) return (0); - len = strlen(pr->pr_path); + len = strlen(pr->pr_disk.pr_path); sp = &mp->mnt_stat; - if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) + if (strncmp(pr->pr_disk.pr_path, sp->f_mntonname, len) != 0) return (ENOENT); /* * Be sure that we don't have situation where jail's root directory @@ -493,7 +570,7 @@ sizeof(sp->f_mntonname)); return; } - if (pr->pr_root->v_mount == mp) { + if (JAIL_VROOT(pr)->v_mount == mp) { /* * Clear current buffer data, so we are sure nothing from * the valid path left there. @@ -506,9 +583,9 @@ * If jail's chroot directory is set to "/" we should be able to see * all mount-points from inside a jail. */ - if (strcmp(pr->pr_path, "/") == 0) + if (strcmp(pr->pr_disk.pr_path, "/") == 0) return; - len = strlen(pr->pr_path); + len = strlen(pr->pr_disk.pr_path); strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); /* * Clear current buffer data, so we are sure nothing from @@ -542,7 +619,7 @@ sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO); mtx_lock(&allprison_mtx); - if (count != prisoncount) { + if (count > prisoncount) { mtx_unlock(&allprison_mtx); free(sxp, M_TEMP); goto retry; @@ -552,9 +629,9 @@ mtx_lock(&pr->pr_mtx); xp->pr_version = XPRISON_VERSION; xp->pr_id = pr->pr_id; - strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path)); + strlcpy(xp->pr_path, pr->pr_disk.pr_path, sizeof(xp->pr_path)); strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host)); - xp->pr_ip = pr->pr_ip; + xp->pr_ip = ntohl(pr->pr_network.pr_ip); mtx_unlock(&pr->pr_mtx); xp++; } @@ -564,6 +641,7 @@ free(sxp, M_TEMP); if (error) return (error); + return (0); } diff -urNP sys.orig/kern/kern_jail2_common.c sys/kern/kern_jail2_common.c --- sys.orig/kern/kern_jail2_common.c Thu Jan 1 03:00:00 1970 +++ sys/kern/kern_jail2_common.c Sun May 14 22:19:12 2006 @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2004 Alex Lyashkov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define jprint(a...) +/* printf(a) */ + +int jail2_common(struct thread *td, struct jail_2 *j) +{ + struct prison *pr = NULL; + int ret = 0; + + jprint("jail2_command %d ctx: %d \n", JAIL2_CMD(j->cmd), j->ctx_id); + + if (JAIL2_CMD(j->cmd) != J_CREATE) { + mtx_lock(&allprison_mtx); + pr = prison_find(j->ctx_id); + mtx_unlock(&allprison_mtx); + if (pr == NULL) { + printf("Prison not found for command %x\n", j->cmd); + return (ESRCH); + } + } + + switch(JAIL2_CMD(j->cmd)) { + case J_CREATE: + { + /* not found - create */ + pr = prison_alloc(j->ctx_id); + jprint("prison_alloc return %p\n",pr); + if (pr == NULL) { + ret = ENOMEM; + break; + } + prison_hold(pr); + break; + } + case J_DESTROY: + { + /* remove hold */ + prison_free(pr); + break; + } + case J_ENTER: + { + ret = suser(td); + if (ret) { + ret = EPERM; + break; + } + + ret = jail_migrate(td, pr); + break; + } + case J_START: + { + JAIL_SET_STARTED(pr); + break; + } + case J_STOP: + { + JAIL_CLEAR_STARTED(pr); + break; + } + case J_SETFLAGS: + { + uint32_t flags; + + ret = copyin(j->data, &flags, sizeof(flags)); + if (ret) + return (ret); + + JAIL_SET_FLAGS(pr, flags); + break; + } + case J_STATS: + { + struct jail2_stats st; + + st.version = JAIL2_STATS_VER; + st.id = pr->pr_id; + st.v1.flags = JAIL_GET_FLAGS(pr); + st.v1.taskcount = JAIL_NPROCS(pr); + + ret = copyout(&st,j->data,sizeof(st)); + + break; + } + default: + ret = EOPNOTSUPP; + break; + } + + /* remove refs from find/create */ + if (pr!=NULL) + prison_free(pr); + jprint("jail2_command return %d\n", ret); + return (ret); +} diff -urNP sys.orig/kern/kern_jail2_disk.c sys/kern/kern_jail2_disk.c --- sys.orig/kern/kern_jail2_disk.c Thu Jan 1 03:00:00 1970 +++ sys/kern/kern_jail2_disk.c Sun May 14 22:12:37 2006 @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2004 Alex Lyashkov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +int jail_setvroot(struct prison *pr, struct thread *td, char *root) +{ + int error, vfslocked; + struct nameidata nd; + char path[MAXPATHLEN]; + + error = copyinstr(root, &path, sizeof(path), 0); + if (error) + goto e_exit; + +// printf("setvroot %s\n",path); + NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, + path, td); + error = namei(&nd); + if (error) + goto e_exit; + + vfslocked = NDHASGIANT(&nd); + JAIL_VROOT(pr) = nd.ni_vp; + strlcpy(pr->pr_disk.pr_path, path, MAXPATHLEN); + VOP_UNLOCK(nd.ni_vp, 0, td); + VFS_UNLOCK_GIANT(vfslocked); + NDFREE(&nd, NDF_ONLY_PNBUF); +e_exit: + return (error); +} + +void jail_destroyvroot(struct prison *pr) +{ + int vfslocked; + + pr->pr_disk.pr_path[0]=0; + if (JAIL_VROOT(pr) != NULL) { + vfslocked = VFS_LOCK_GIANT(JAIL_VROOT(pr)->v_mount); + vrele(JAIL_VROOT(pr)); + VFS_UNLOCK_GIANT(vfslocked); + JAIL_VROOT(pr) = NULL; + } +} diff -urNP sys.orig/kern/kern_jail2_network.c sys/kern/kern_jail2_network.c --- sys.orig/kern/kern_jail2_network.c Thu Jan 1 03:00:00 1970 +++ sys/kern/kern_jail2_network.c Tue May 9 10:31:44 2006 @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2004 Alex Lyashkov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define jprint(a...) +/* printf(a) */ + +static int +jail_addradd_ipv4(struct prison *_jail, void *_udata) +{ + struct jail2_ipv4addr ja; + int error; + + error = copyin(_udata, &ja, sizeof(ja)); + if (error) + return (error); + + mtx_lock(&_jail->pr_mtx); + _jail->pr_network.pr_ip = htonl(ja.addr); + mtx_unlock(&_jail->pr_mtx); + + return (0); +} + +static int +jail_addrdel_ipv4(struct prison *_jail, void *_udata) +{ + struct jail2_ipv4addr ja; + int error; + + error = copyin(_udata, &ja, sizeof(ja)); + if (error) + return (error); + + mtx_lock(&_jail->pr_mtx); + error = _jail->pr_network.pr_ip == ja.addr ? 0 : ESRCH; + if( error == 0 ) + _jail->pr_network.pr_ip = 0; + mtx_unlock(&_jail->pr_mtx); + + return (0); +} + + +int jail2_network(struct thread *td, struct jail_2 *j) +{ + struct prison *pr; + int ret = 0; + + mtx_lock(&allprison_mtx); + pr = prison_find(j->ctx_id); + mtx_unlock(&allprison_mtx); + + jprint("jail2_network %d ctx: %d pr: %p\n", JAIL2_CMD(j->cmd), j->ctx_id, pr ); + if(pr == NULL) { + printf("Prison not found for command %x\n", j->cmd); + return (ESRCH); + } + + switch(JAIL2_CMD(j->cmd)) { + case J_VNETDEV_CREATE: + case J_VNETDEV_REMOVE: + case J_VNETDEV_ATTACH: + case J_VNETDEV_DETACH: + case J_VNETDEV_SET_TXSPEED: + case J_VNETDEV_SET_FLAGS: + case J_VNETDEV_UNSET_FLAGS: + ret = EOPNOTSUPP; + break; + /* IPV4 control */ + case J_ADDMASKIPV4: + ret = jail_addradd_ipv4(pr,j->data); + break; + case J_RMIPV4: + ret = jail_addrdel_ipv4(pr,j->data); + break; + default: + ret = EOPNOTSUPP; + break; + } + + /* remove refs from find/create */ + prison_free(pr); + jprint("jail2_network return %d\n", ret); + return (ret); +} diff -urNP sys.orig/kern/kern_jail2_resource.c sys/kern/kern_jail2_resource.c --- sys.orig/kern/kern_jail2_resource.c Thu Jan 1 03:00:00 1970 +++ sys/kern/kern_jail2_resource.c Sun May 14 18:35:16 2006 @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2004 Alex Lyashkov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#define jprint(a...) +/* printf(a) */ + +static int +jail_setproclimit(struct prison *_pr, void *_data) +{ + int error; + int limit; + + error = copyin(_data, &limit, sizeof(limit)); + if (error) + return (error); + + sx_xlock(&allproc_lock); + _pr->pr_tasks.jt_limit = limit; + sx_xunlock(&allproc_lock); + + if (limit != 0) + JAIL_SET_PROCLIM(_pr); + else + JAIL_CLEAR_PROCLIM(_pr); + + return (0); +} + +static int +jail_setresource_int(uint32_t *res, void *_data) +{ + int error; + int limit; + + error = copyin(_data, &limit, sizeof(limit)); + if (error) + return (error); + + atomic_store_rel_int(res, limit); + + return (0); +} + + +int jail2_resource(struct thread *_td, struct jail_2 *_j) +{ + struct prison *pr; + int ret = 0; + + mtx_lock(&allprison_mtx); + pr = prison_find(_j->ctx_id); + mtx_unlock(&allprison_mtx); + + jprint("jail2_resouce %d ctx: %d pr: %p\n", JAIL2_CMD(_j->cmd), _j->ctx_id, pr ); + if (pr == NULL) { + printf("Prison not found for command %x\n", _j->cmd); + return (ESRCH); + } + + switch(JAIL2_CMD(_j->cmd)) { + case J_SETVROOT: + if (JAIL_VROOT(pr) != NULL) { + if (JAIL_NPROCS(pr) != 0) { + ret = EPERM; + break; + } + jail_destroyvroot(pr); + } + ret = jail_setvroot(pr, _td, _j->data); + break; + case J_SETPROCLIMIT: + ret = jail_setproclimit(pr, _j->data); + break; + case J_SETUSEDSPACE: + ret = jail_setresource_int(&pr->pr_disk.pr_du, _j->data); + break; + case J_SETDISKQUOTE: + ret = jail_setresource_int(&pr->pr_disk.pr_l_du, _j->data); + break; + case J_INODEUSAGE: + ret = jail_setresource_int(&pr->pr_disk.pr_inodes, _j->data); + break; + case J_INODELIMIT: + ret = jail_setresource_int(&pr->pr_disk.pr_l_inodes, _j->data); + break; + case J_SETMEMLIMIT: + case J_SETFILELIMIT: + case J_SETRSSLIMIT: + case J_SETTCPSOCKETSLIMIT: + case J_SETUDPSOCKETSLIMIT: + case J_CPUHARDLIMIT: + case J_CPUSOFTLIMIT: + default: + ret = EOPNOTSUPP; + break; + } + + /* remove refs from find/create */ + prison_free (pr); + jprint("jail2_resouce return %d\n", ret); + return (ret); + +} diff -urNP sys.orig/kern/kern_ktrace.c sys/kern/kern_ktrace.c --- sys.orig/kern/kern_ktrace.c Wed Mar 29 00:30:22 2006 +++ sys/kern/kern_ktrace.c Sun May 7 18:06:35 2006 @@ -638,7 +638,7 @@ */ if (ops == KTROP_CLEARFILE) { sx_slock(&allproc_lock); - LIST_FOREACH(p, &allproc, p_list) { + FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_tracevp == vp) { if (ktrcanset(td, p)) { @@ -976,7 +976,7 @@ */ cred = NULL; sx_slock(&allproc_lock); - LIST_FOREACH(p, &allproc, p_list) { + FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_tracevp == vp) { mtx_lock(&ktrace_mtx); diff -urNP sys.orig/kern/kern_mib.c sys/kern/kern_mib.c --- sys.orig/kern/kern_mib.c Sun Aug 21 21:03:31 2005 +++ sys/kern/kern_mib.c Sun May 7 18:06:35 2006 @@ -198,7 +198,7 @@ int error; pr = req->td->td_ucred->cr_prison; - if (pr != NULL) { + if (jailed(req->td->td_ucred)) { if (!jail_set_hostname_allowed && req->newptr) return (EPERM); /* @@ -269,7 +269,7 @@ * Permit update only if the new securelevel exceeds the * global level, and local level if any. */ - if (pr != NULL) { + if (jailed(req->td->td_ucred)) { mtx_lock(&pr->pr_mtx); if (!regression_securelevel_nonmonotonic && (level < imax(securelevel, pr->pr_securelevel))) { diff -urNP sys.orig/kern/kern_prot.c sys/kern/kern_prot.c --- sys.orig/kern/kern_prot.c Mon Feb 6 02:32:33 2006 +++ sys/kern/kern_prot.c Sat May 13 22:27:40 2006 @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -58,7 +59,6 @@ #include #include #include -#include #include #include #include @@ -496,11 +496,15 @@ uid_t uid; struct uidinfo *uip; int error; + struct prison *jail; uid = uap->uid; AUDIT_ARG(uid, uid); newcred = crget(); - uip = uifind(uid); + PROC_LOCK(p); + jail = p->p_ucred->cr_prison; + PROC_UNLOCK(p); + uip = uifind(jail, uid); PROC_LOCK(p); oldcred = p->p_ucred; @@ -610,11 +614,15 @@ uid_t euid; struct uidinfo *euip; int error; + struct prison *jail; euid = uap->euid; AUDIT_ARG(euid, euid); newcred = crget(); - euip = uifind(euid); + PROC_LOCK(p); + jail = p->p_ucred->cr_prison; + PROC_UNLOCK(p); + euip = uifind(jail, euid); PROC_LOCK(p); oldcred = p->p_ucred; @@ -895,14 +903,18 @@ uid_t euid, ruid; struct uidinfo *euip, *ruip; int error; + struct prison *jail; euid = uap->euid; ruid = uap->ruid; AUDIT_ARG(euid, euid); AUDIT_ARG(ruid, ruid); newcred = crget(); - euip = uifind(euid); - ruip = uifind(ruid); + PROC_LOCK(p); + jail = p->p_ucred->cr_prison; + PROC_UNLOCK(p); + euip = uifind(jail, euid); + ruip = uifind(jail, ruid); PROC_LOCK(p); oldcred = p->p_ucred; @@ -1036,6 +1048,7 @@ uid_t euid, ruid, suid; struct uidinfo *euip, *ruip; int error; + struct prison *jail; euid = uap->euid; ruid = uap->ruid; @@ -1044,8 +1057,11 @@ AUDIT_ARG(ruid, ruid); AUDIT_ARG(suid, suid); newcred = crget(); - euip = uifind(euid); - ruip = uifind(ruid); + PROC_LOCK(p); + jail = p->p_ucred->cr_prison; + PROC_UNLOCK(p); + euip = uifind(jail, euid); + ruip = uifind(jail, ruid); PROC_LOCK(p); oldcred = p->p_ucred; @@ -1377,7 +1393,7 @@ active_securelevel = securelevel; KASSERT(cr != NULL, ("securelevel_gt: null cr")); - if (cr->cr_prison != NULL) + if (jailed(cr)) active_securelevel = imax(cr->cr_prison->pr_securelevel, active_securelevel); return (active_securelevel > level ? EPERM : 0); @@ -1390,7 +1406,7 @@ active_securelevel = securelevel; KASSERT(cr != NULL, ("securelevel_ge: null cr")); - if (cr->cr_prison != NULL) + if (jailed(cr)) active_securelevel = imax(cr->cr_prison->pr_securelevel, active_securelevel); return (active_securelevel >= level ? EPERM : 0); @@ -1860,6 +1876,8 @@ MALLOC(cr, struct ucred *, sizeof(*cr), M_CRED, M_WAITOK | M_ZERO); refcount_init(&cr->cr_ref, 1); + cr->cr_prison = &jail_0; + prison_hold(cr->cr_prison); #ifdef MAC mac_init_cred(cr); #endif @@ -1902,8 +1920,7 @@ /* * Free a prison, if any. */ - if (jailed(cr)) - prison_free(cr->cr_prison); + prison_free(cr->cr_prison); #ifdef MAC mac_destroy_cred(cr); #endif @@ -1931,13 +1948,14 @@ { KASSERT(crshared(dest) == 0, ("crcopy of shared ucred")); + if(dest->cr_prison) + prison_free(dest->cr_prison); bcopy(&src->cr_startcopy, &dest->cr_startcopy, (unsigned)((caddr_t)&src->cr_endcopy - (caddr_t)&src->cr_startcopy)); uihold(dest->cr_uidinfo); uihold(dest->cr_ruidinfo); - if (jailed(dest)) - prison_hold(dest->cr_prison); + prison_hold(dest->cr_prison); #ifdef MAC mac_copy_cred(src, dest); #endif diff -urNP sys.orig/kern/kern_resource.c sys/kern/kern_resource.c --- sys.orig/kern/kern_resource.c Sat Mar 11 12:48:19 2006 +++ sys/kern/kern_resource.c Sun May 14 01:10:54 2006 @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -64,15 +65,13 @@ static MALLOC_DEFINE(M_PLIMIT, "plimit", "plimit structures"); static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures"); -#define UIHASH(uid) (&uihashtbl[(uid) & uihash]) +#define UIHASH(jail, uid) (&(jail)->uihashtbl[(uid) & (jail)->uihash]) static struct mtx uihashtbl_mtx; -static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; -static u_long uihash; /* size of hash table - 1 */ static void calcru1(struct proc *p, struct rusage_ext *ruxp, struct timeval *up, struct timeval *sp); static int donice(struct thread *td, struct proc *chgp, int n); -static struct uidinfo *uilookup(uid_t uid); +static struct uidinfo *uilookup(struct prison *jail, uid_t uid); /* * Resource controls and accounting. @@ -141,7 +140,7 @@ if (uap->who == 0) uap->who = td->td_ucred->cr_uid; sx_slock(&allproc_lock); - LIST_FOREACH(p, &allproc, p_list) { + FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (!p_cansee(td, p) && p->p_ucred->cr_uid == uap->who) { @@ -1001,23 +1000,49 @@ uihashinit() { - uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash); + jail_0.uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &jail_0.uihash); mtx_init(&uihashtbl_mtx, "uidinfo hash", NULL, MTX_DEF); } +void +pr_uihashinit(struct prison *jail) +{ + jail->uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &jail->uihash); +} + +/* + * flush all uid info and destroy hash + * call with destroy prison - without any active reference exist. +*/ + +void pr_uihashdestroy(struct prison *jail) +{ + u_long i; + struct uihashhead *uipp; + struct uidinfo *uip; + + for(i=0;iuihash;i++) { + uipp = &jail->uihashtbl[i]; + while((uip = LIST_FIRST(uipp)) != NULL) { + LIST_REMOVE(uip, ui_hash); + FREE(uip, M_UIDINFO); + } + } + hashdestroy(jail->uihashtbl, M_UIDINFO, jail->uihash); +} + /* * Look up a uidinfo struct for the parameter uid. * uihashtbl_mtx must be locked. */ static struct uidinfo * -uilookup(uid) - uid_t uid; +uilookup(struct prison *jail, uid_t uid) { struct uihashhead *uipp; struct uidinfo *uip; mtx_assert(&uihashtbl_mtx, MA_OWNED); - uipp = UIHASH(uid); + uipp = UIHASH(jail, uid); LIST_FOREACH(uip, uipp, ui_hash) if (uip->ui_uid == uid) break; @@ -1031,13 +1056,12 @@ * uifree() should be called on a struct uidinfo when released. */ struct uidinfo * -uifind(uid) - uid_t uid; +uifind(struct prison *jail, uid_t uid) { struct uidinfo *old_uip, *uip; mtx_lock(&uihashtbl_mtx); - uip = uilookup(uid); + uip = uilookup(jail, uid); if (uip == NULL) { mtx_unlock(&uihashtbl_mtx); uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO); @@ -1047,14 +1071,14 @@ * were in malloc and not holding the lock, so we have to * make sure we don't insert a duplicate uidinfo. */ - if ((old_uip = uilookup(uid)) != NULL) { + if ((old_uip = uilookup(jail,uid)) != NULL) { /* Someone else beat us to it. */ free(uip, M_UIDINFO); uip = old_uip; } else { uip->ui_mtxp = mtx_pool_alloc(mtxpool_sleep); uip->ui_uid = uid; - LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash); + LIST_INSERT_HEAD(UIHASH(jail, uid), uip, ui_hash); } } uihold(uip); diff -urNP sys.orig/kern/kern_sig.c sys/kern/kern_sig.c --- sys.orig/kern/kern_sig.c Fri Apr 21 22:26:21 2006 +++ sys/kern/kern_sig.c Sun May 7 18:06:35 2006 @@ -1671,7 +1671,7 @@ * broadcast */ sx_slock(&allproc_lock); - LIST_FOREACH(p, &allproc, p_list) { + FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); if (p->p_pid <= 1 || p->p_flag & P_SYSTEM || p == td->td_proc || p->p_state == PRS_NEW) { diff -urNP sys.orig/kern/sysv_ipc.c sys/kern/sysv_ipc.c --- sys.orig/kern/sysv_ipc.c Fri Jan 7 01:35:39 2005 +++ sys/kern/sysv_ipc.c Sun May 14 12:26:37 2006 @@ -41,6 +41,7 @@ #include #include #include +#include void (*shmfork_hook)(struct proc *, struct proc *) = NULL; void (*shmexit_hook)(struct vmspace *) = NULL; @@ -83,6 +84,9 @@ struct ucred *cred = td->td_ucred; int error; + if (cred->cr_prison->pr_id != perm->prisonid) + return (EACCES); + if (cred->cr_uid != perm->cuid && cred->cr_uid != perm->uid) { /* * For a non-create/owner, we require privilege to diff -urNP sys.orig/kern/sysv_msg.c sys/kern/sysv_msg.c --- sys.orig/kern/sysv_msg.c Sat Feb 12 03:22:39 2005 +++ sys/kern/sysv_msg.c Sun May 14 11:36:48 2006 @@ -69,6 +69,7 @@ #include #include #include +#include static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues"); @@ -342,7 +343,7 @@ { int error; - if (!jail_sysvipc_allowed && jailed(td->td_ucred)) + if (!JAIL_SYSVIPC_ALLOW(td->td_ucred->cr_prison)) return (ENOSYS); if (uap->which < 0 || uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0])) @@ -418,8 +419,10 @@ { int rval, error, msqix; register struct msqid_kernel *msqkptr; + struct prison *jail; - if (!jail_sysvipc_allowed && jailed(td->td_ucred)) + jail = td->td_ucred->cr_prison; + if (!JAIL_SYSVIPC_ALLOW(jail)) return (ENOSYS); msqix = IPCID_TO_IX(msqid); @@ -579,10 +582,12 @@ int msgflg = uap->msgflg; struct ucred *cred = td->td_ucred; register struct msqid_kernel *msqkptr = NULL; + struct prison *jail; DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg)); - if (!jail_sysvipc_allowed && jailed(td->td_ucred)) + jail = td->td_ucred->cr_prison; + if (!JAIL_SYSVIPC_ALLOW(jail)) return (ENOSYS); mtx_lock(&msq_mtx); @@ -644,7 +649,8 @@ msqkptr->u.msg_perm.cgid = cred->cr_gid; msqkptr->u.msg_perm.gid = cred->cr_gid; msqkptr->u.msg_perm.mode = (msgflg & 0777); - /* Make sure that the returned msqid is unique */ + msqkptr->u.msg_perm.prisonid = jail->pr_id; + /* Make sure that the returned msqid is unique */ msqkptr->u.msg_perm.seq = (msqkptr->u.msg_perm.seq + 1) & 0x7fff; msqkptr->u.msg_first = NULL; msqkptr->u.msg_last = NULL; @@ -698,10 +704,13 @@ register struct msqid_kernel *msqkptr; register struct msg *msghdr; short next; + struct prison *jail; DPRINTF(("call to msgsnd(%d, 0x%x, %d, %d)\n", msqid, user_msgp, msgsz, msgflg)); - if (!jail_sysvipc_allowed && jailed(td->td_ucred)) + + jail = td->td_ucred->cr_prison; + if (JAIL_SYSVIPC_ALLOW(jail)) return (ENOSYS); mtx_lock(&msq_mtx); @@ -1041,7 +1050,7 @@ DPRINTF(("call to msgrcv(%d, 0x%x, %d, %ld, %d)\n", msqid, user_msgp, msgsz, msgtyp, msgflg)); - if (!jail_sysvipc_allowed && jailed(td->td_ucred)) + if (!JAIL_SYSVIPC_ALLOW(td->td_ucred->cr_prison)) return (ENOSYS); msqid = IPCID_TO_IX(msqid); diff -urNP sys.orig/kern/sysv_sem.c sys/kern/sysv_sem.c --- sys.orig/kern/sysv_sem.c Tue Jun 7 08:03:27 2005 +++ sys/kern/sysv_sem.c Sun May 14 12:27:21 2006 @@ -57,6 +57,7 @@ #include #include #include +#include #include static MALLOC_DEFINE(M_SEM, "sem", "SVID compatible semaphores"); @@ -339,8 +340,10 @@ } */ *uap; { int error; + struct prison *jail; - if (!jail_sysvipc_allowed && jailed(td->td_ucred)) + jail = td->td_ucred->cr_prison; + if (!JAIL_SYSVIPC_ALLOW(jail)) return (ENOSYS); if (uap->which < 0 || uap->which >= sizeof(semcalls)/sizeof(semcalls[0])) @@ -567,10 +570,11 @@ struct semid_kernel *semakptr; struct mtx *sema_mtxp; u_short usval, count; + struct prison *jail = cred->cr_prison; DPRINTF(("call to semctl(%d, %d, %d, 0x%x)\n", semid, semnum, cmd, arg)); - if (!jail_sysvipc_allowed && jailed(td->td_ucred)) + if (!JAIL_SYSVIPC_ALLOW(jail)) return (ENOSYS); array = NULL; @@ -851,9 +855,10 @@ int nsems = uap->nsems; int semflg = uap->semflg; struct ucred *cred = td->td_ucred; + struct prison *jail = cred->cr_prison; DPRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg)); - if (!jail_sysvipc_allowed && jailed(td->td_ucred)) + if (!JAIL_SYSVIPC_ALLOW(jail)) return (ENOSYS); mtx_lock(&Giant); @@ -922,6 +927,7 @@ sema[semid].u.sem_perm.cgid = cred->cr_gid; sema[semid].u.sem_perm.gid = cred->cr_gid; sema[semid].u.sem_perm.mode = (semflg & 0777) | SEM_ALLOC; + sema[semid].u.sem_perm.prisonid = jail->pr_id; sema[semid].u.sem_perm.seq = (sema[semid].u.sem_perm.seq + 1) & 0x7fff; sema[semid].u.sem_nsems = nsems; @@ -981,7 +987,8 @@ DPRINTF(("call to semop(%d, 0x%x, %u)\n", semid, sops, nsops)); - if (!jail_sysvipc_allowed && jailed(td->td_ucred)) + + if (!JAIL_SYSVIPC_ALLOW(td->td_ucred->cr_prison)) return (ENOSYS); semid = IPCID_TO_IX(semid); /* Convert back to zero origin */ diff -urNP sys.orig/kern/sysv_shm.c sys/kern/sysv_shm.c --- sys.orig/kern/sysv_shm.c Thu Mar 30 10:42:32 2006 +++ sys/kern/sysv_shm.c Sun May 14 11:46:57 2006 @@ -311,9 +311,10 @@ #endif int i; int error = 0; - - if (!jail_sysvipc_allowed && jailed(td->td_ucred)) + + if (!JAIL_SYSVIPC_ALLOW(td->td_ucred->cr_prison)) return (ENOSYS); + mtx_lock(&Giant); shmmap_s = p->p_vmspace->vm_shm; if (shmmap_s == NULL) { @@ -372,7 +373,7 @@ int rv; int error = 0; - if (!jail_sysvipc_allowed && jailed(td->td_ucred)) + if (!JAIL_SYSVIPC_ALLOW(td->td_ucred->cr_prison)) return (ENOSYS); mtx_lock(&Giant); shmmap_s = p->p_vmspace->vm_shm; @@ -500,7 +501,8 @@ struct shmid_kernel *shmseg; struct oshmid_ds outbuf; - if (!jail_sysvipc_allowed && jailed(td->td_ucred)) + + if (!JAIL_SYSVIPC_ALLOW(td->td_ucred->cr_prison)) return (ENOSYS); mtx_lock(&Giant); shmseg = shm_find_segment_by_shmid(uap->shmid); @@ -569,7 +571,7 @@ int error = 0; struct shmid_kernel *shmseg; - if (!jail_sysvipc_allowed && jailed(td->td_ucred)) + if (!JAIL_SYSVIPC_ALLOW(td->td_ucred->cr_prison)) return (ENOSYS); mtx_lock(&Giant); @@ -811,6 +813,7 @@ shmseg->u.shm_perm.cgid = shmseg->u.shm_perm.gid = cred->cr_gid; shmseg->u.shm_perm.mode = (shmseg->u.shm_perm.mode & SHMSEG_WANTED) | (mode & ACCESSPERMS) | SHMSEG_ALLOCATED; + shmseg->u.shm_perm.prisonid = shmseg->u.shm_perm.prisonid; shmseg->u.shm_segsz = uap->size; shmseg->u.shm_cpid = td->td_proc->p_pid; shmseg->u.shm_lpid = shmseg->u.shm_nattch = 0; diff -urNP sys.orig/netinet/in_pcb.c sys/netinet/in_pcb.c --- sys.orig/netinet/in_pcb.c Tue Apr 4 15:26:07 2006 +++ sys/netinet/in_pcb.c Sun May 7 18:06:35 2006 @@ -294,7 +294,7 @@ return (EAFNOSUPPORT); #endif if (sin->sin_addr.s_addr != INADDR_ANY) - if (prison_ip(cred, 0, &sin->sin_addr.s_addr)) + if (prison_ip(cred, &sin->sin_addr.s_addr)) return(EINVAL); if (sin->sin_port != *lportp) { /* Don't allow the port to change. */ @@ -352,7 +352,7 @@ t->inp_socket->so_cred->cr_uid)) return (EADDRINUSE); } - if (prison && prison_ip(cred, 0, &sin->sin_addr.s_addr)) + if (prison && prison_ip(cred, &sin->sin_addr.s_addr)) return (EADDRNOTAVAIL); t = in_pcblookup_local(pcbinfo, sin->sin_addr, lport, prison ? 0 : wild); @@ -388,7 +388,7 @@ int count; if (laddr.s_addr != INADDR_ANY) - if (prison_ip(cred, 0, &laddr.s_addr)) + if (prison_ip(cred, &laddr.s_addr)) return (EINVAL); if (inp->inp_flags & INP_HIGHPORT) { @@ -471,7 +471,7 @@ wild)); } } - if (prison_ip(cred, 0, &laddr.s_addr)) + if (prison_ip(cred, &laddr.s_addr)) return (EINVAL); *laddrp = laddr.s_addr; *lportp = lport; diff -urNP sys.orig/netinet/tcp_usrreq.c sys/netinet/tcp_usrreq.c --- sys.orig/netinet/tcp_usrreq.c Mon Apr 3 15:43:56 2006 +++ sys/netinet/tcp_usrreq.c Sun May 7 18:06:35 2006 @@ -455,7 +455,7 @@ && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) return (EAFNOSUPPORT); if (jailed(td->td_ucred)) - prison_remote_ip(td->td_ucred, 0, &sinp->sin_addr.s_addr); + prison_remote_ip(td->td_ucred, &sinp->sin_addr.s_addr); TCPDEBUG0; INP_INFO_WLOCK(&tcbinfo); diff -urNP sys.orig/netinet/udp_usrreq.c sys/netinet/udp_usrreq.c --- sys.orig/netinet/udp_usrreq.c Fri Apr 21 12:25:40 2006 +++ sys/netinet/udp_usrreq.c Sun May 7 18:06:35 2006 @@ -812,7 +812,7 @@ if (addr) { sin = (struct sockaddr_in *)addr; if (jailed(td->td_ucred)) - prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr); + prison_remote_ip(td->td_ucred, &sin->sin_addr.s_addr); if (inp->inp_faddr.s_addr != INADDR_ANY) { error = EISCONN; goto release; @@ -1017,7 +1017,7 @@ } sin = (struct sockaddr_in *)nam; if (jailed(td->td_ucred)) - prison_remote_ip(td->td_ucred, 0, &sin->sin_addr.s_addr); + prison_remote_ip(td->td_ucred, &sin->sin_addr.s_addr); error = in_pcbconnect(inp, nam, td->td_ucred); if (error == 0) soisconnected(so); diff -urNP sys.orig/sys/ipc.h sys/sys/ipc.h --- sys.orig/sys/ipc.h Fri Jan 7 04:29:23 2005 +++ sys/sys/ipc.h Sun May 14 12:23:28 2006 @@ -79,6 +79,7 @@ unsigned short gid; /* group id */ unsigned short mode; /* r/w permission */ unsigned short seq; /* sequence # (to generate unique ipcid) */ + unsigned int prisonid; /* id prison created IPC object */ key_t key; /* user specified msg/sem/shm key */ }; diff -urNP sys.orig/sys/jail.h sys/sys/jail.h --- sys.orig/sys/jail.h Thu Jun 9 21:49:19 2005 +++ sys/sys/jail.h Sun May 14 12:00:58 2006 @@ -13,11 +13,11 @@ #ifndef _SYS_JAIL_H_ #define _SYS_JAIL_H_ -struct jail { +struct jail_1 { u_int32_t version; char *path; char *hostname; - u_int32_t ip_number; + u_int32_t ip_number; /* network order */ }; struct xprison { @@ -25,21 +25,29 @@ int pr_id; char pr_path[MAXPATHLEN]; char pr_host[MAXHOSTNAMELEN]; - u_int32_t pr_ip; + u_int32_t pr_ip; /* network order */ }; #define XPRISON_VERSION 1 #ifndef _KERNEL -int jail(struct jail *); +int jail(void *); int jail_attach(int); #else /* _KERNEL */ #include -#include -#include +#include +#include #include +#include +#include + +#include +#include +#include +#include +#include #define JAIL_MAX 999999 @@ -59,20 +67,32 @@ * (c) set only during creation before the structure is shared, no mutex * required to read * (d) set only during destruction of jail, no mutex needed + * (n) not need locking or atomic operation + * (i) have internal locker */ #if defined(_KERNEL) || defined(_WANT_PRISON) struct prison { - LIST_ENTRY(prison) pr_list; /* (a) all prisons */ - int pr_id; /* (c) prison id */ - int pr_ref; /* (p) refcount */ - char pr_path[MAXPATHLEN]; /* (c) chroot path */ - struct vnode *pr_root; /* (c) vnode to rdir */ - char pr_host[MAXHOSTNAMELEN]; /* (p) jail hostname */ - u_int32_t pr_ip; /* (c) ip addr host */ - void *pr_linux; /* (p) linux abi */ - int pr_securelevel; /* (p) securelevel */ - struct task pr_task; /* (d) destroy task */ - struct mtx pr_mtx; + LIST_ENTRY(prison) pr_list; /* (a) all prisons */ + int pr_id; /* (c) prison id */ + int pr_refcnt; /* (p) refcount */ + + uint32_t pr_flags; /* (n) flags for jails restructions + * see J_*_FL in jail_flags.h + */ + char pr_host[MAXHOSTNAMELEN]; /* (p) jail hostname */ + void *pr_linux; /* (p) linux abi */ + int pr_securelevel; /* (p) securelevel */ + struct task pr_task; /* (d) destroy task */ + struct mtx pr_mtx; + struct jail_tasks pr_tasks; /* (i) tasks in jail */ + struct jail_disks pr_disk; /* (i) disk usage */ + struct jail_network pr_network; /* (i) for jail */ + struct jail_limits pr_limits; /* (n) various limits for jail */ + /* (c) per jail uid info + * also protected with uihash mutex + */ + LIST_HEAD(uihashhead, uidinfo) *uihashtbl; + u_long uihash; /* size of hash table - 1 */ }; #endif /* _KERNEL || _WANT_PRISON */ @@ -91,26 +111,71 @@ LIST_HEAD(prisonlist, prison); extern struct prisonlist allprison; +extern struct mtx allprison_mtx; +extern struct prison jail_0; /* * Kernel support functions for jail(). */ -struct ucred; struct mount; struct sockaddr; struct statfs; -int jailed(struct ucred *cred); +struct thread; + void getcredhostname(struct ucred *cred, char *, size_t); int prison_check(struct ucred *cred1, struct ucred *cred2); int prison_canseemount(struct ucred *cred, struct mount *mp); void prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp); -void prison_free(struct prison *pr); -u_int32_t prison_getip(struct ucred *cred); -void prison_hold(struct prison *pr); + +/* jails control */ +struct prison *prison_alloc(uint32_t ctx_id); +struct prison *prison_find(int prid); +struct prison *prison_assign(int prid); +void _prison_free(struct prison *pr); + +/* task control */ +int jail_migrate(struct thread *td, struct prison *pr); + +/* network */ int prison_if(struct ucred *cred, struct sockaddr *sa); -int prison_ip(struct ucred *cred, int flag, u_int32_t *ip); -void prison_remote_ip(struct ucred *cred, int flags, u_int32_t *ip); +int prison_ip(struct ucred *cred, uint32_t *ip); +void prison_remote_ip(struct ucred *cred, uint32_t *ip); + +/* INLINES */ +/* + * Return 1 if the passed credential is in a jail, otherwise 0. + */ +static inline int +jailed(struct ucred *cred) +{ + return (cred->cr_prison != &jail_0); +} +static inline int +pr_jailed(struct prison *pr) +{ + return (pr != &jail_0); +} + + +static inline void +prison_hold(struct prison *pr) +{ + refcount_acquire(&pr->pr_refcnt); +} + +static __inline +void prison_free(struct prison *pr) +{ + if (refcount_release(&pr->pr_refcnt)) + _prison_free(pr); +} + +static inline uint32_t +prison_getip(struct ucred *cred) +{ + return (cred->cr_prison->pr_network.pr_ip); +} #endif /* _KERNEL */ #endif /* !_SYS_JAIL_H_ */ diff -urNP sys.orig/sys/jail2.h sys/sys/jail2.h --- sys.orig/sys/jail2.h Thu Jan 1 03:00:00 1970 +++ sys/sys/jail2.h Sun May 14 18:39:07 2006 @@ -0,0 +1,105 @@ +#ifndef _SYS_JAIL2_H_ +#define _SYS_JAIL2_H_ + +struct jail_2 { + uint32_t version; /* vsersion always 2*/ + uint32_t ctx_id; /* prision id */ + uint32_t cmd; /* jail2 command */ + void *data; /* pointer to command data */ +}; + +struct jail2_stats_v1 { + uint32_t flags; + uint32_t taskcount; +}; + +#define JAIL2_STATS_VER 1 +struct jail2_stats { + uint32_t version; + uint32_t id; + struct jail2_stats_v1 v1; +}; + + +/* jail2 commands category */ +#define J_COMMON 0x1 +#define J_NETWORK 0x2 +#define J_RESOURCE 0x3 + +/* common staff */ +/* create context */ +#define J_CREATE 1 +/* destroy context */ +#define J_DESTROY 2 +/* allow execute programs in context */ +#define J_START 3 +/* destroy all programs and disallow execute programs in context */ +#define J_STOP 4 +/* migrate process to context */ +#define J_ENTER 5 +/* set flags for context */ +#define J_SETFLAGS 6 +/* get detaled statistic about context */ +#define J_STATS 7 + +/* contexts limits */ +#define J_SETMEMLIMIT 1 +/* int limit */ +#define J_SETPROCLIMIT 2 +/* int limit */ +#define J_SETDISKQUOTE 3 +/* int limit */ +#define J_SETVROOT 4 +/* set as vroot dev */ +#define J_SETUSEDSPACE 5 +/* int space in k */ +#define J_SETFILELIMIT 6 +/* int limit */ +#define J_SETRSSLIMIT 7 +/* int RSS limit */ +#define J_SETTCPSOCKETSLIMIT 8 +/* int TCP established sockets limit */ +#define J_SETUDPSOCKETSLIMIT 9 +/* int UDP established sockets limit */ +#define J_CPUHARDLIMIT 10 +/* int cpu time limit - max 1024 */ +#define J_CPUSOFTLIMIT 11 +/* int cpu time limit - max 1024 */ +#define J_INODEUSAGE 12 +/* int inode count used in context */ +#define J_INODELIMIT 13 +/* int inodes count limit for context */ + +/* network stuf */ +#define J_VNETDEV_CREATE 1 +#define J_VNETDEV_REMOVE 2 + +/* only not have task */ +#define J_ADDMASKIPV4 5 +#define J_RMIPV4 6 + +/* attach vnetdev to host netdev */ +#define J_VNETDEV_ATTACH 7 +/* detach vnetdev from host netdev */ +#define J_VNETDEV_DETACH 8 + +/* set vnetdev shaper speed */ +#define J_VNETDEV_SET_TXSPEED 10 +/* set specified option flags */ +#define J_VNETDEV_SET_FLAGS 11 +/* unset specified option flags */ +#define J_VNETDEV_UNSET_FLAGS 12 + +#define JAIL2_COMMAND(class, cmd) (((class)<<24) | (cmd)) +#define JAIL2_CLASS(fcmd) (((fcmd)>>24) & 0xFF) +#define JAIL2_CMD(fcmd) ((fcmd) & 0xFFF) + +#ifdef _KERNEL + +struct thread; +int jail2_common(struct thread *td, struct jail_2 *j); +int jail2_network(struct thread *td, struct jail_2 *j); +int jail2_resource(struct thread *td, struct jail_2 *j); + +#endif /* !_KERNEL */ +#endif /* !_SYS_JAIL2_H_ */ diff -urNP sys.orig/sys/jail2_disks.h sys/sys/jail2_disks.h --- sys.orig/sys/jail2_disks.h Thu Jan 1 03:00:00 1970 +++ sys/sys/jail2_disks.h Sun May 14 12:06:46 2006 @@ -0,0 +1,60 @@ +#ifndef _SYS_JAIL_DISKS_H_ +#define _SYS_JAIL_DISKS_H_ + +/* + * Lock key: + * (a) allprison_mutex + * (p) locked by pr_mutex + * (c) set only during creation before the structure is shared, no mutex + * required to read + * (d) set only during destruction of jail, no mutex needed + * (n) not need locking or atomic operation + * (i) have internal locker + */ +struct jail_disks { + uint32_t pr_du; /* (n) diskusage in jail */ + uint32_t pr_inodes; /* (n) inode usage in jail */ + char pr_path[MAXPATHLEN]; /* (c) chroot path */ + struct vnode *pr_root; /* (c) vnode to rdir */ +/* limits */ + uint32_t pr_l_du; /* (n) limit diskusage in jail */ + uint32_t pr_l_inodes; /* (n) limit inode usage in jail */ +}; +#define JAIL_DISKS_INIT {.pr_du = 0, \ + .pr_inodes = 0, \ + .pr_path = {0}, \ + .pr_root = NULL, \ + .pr_l_du = 0, \ + .pr_l_inodes = 0 \ + } + +#define JAIL_VROOT(prison) ((prison)->pr_disk.pr_root) + +#define JAIL_DISK_SET_SIZE(prison,size) (atomic_store_rel_int(&(prison)->pr_disk.pr_l_du, size)) +#define JAIL_DISK_GET_SIZE(prison) (atomic_load_acq_int(&(prison)->pr_disk.pr_l_du)) + +#define JAIL_DISK_ALLOC(prison,size) (atomic_add_int(&(prison)->pr_disk.pr_du, size)) +#define JAIL_DISK_SET_USAGE(prison,size) (atomic_store_rel_int(&(prison)->pr_disk.pr_du, size)) +#define JAIL_DISK_GET_USAGE(prison) (atomic_load_acq_int(&(prison)->pr_disk.pr_du)) + +#define JAIL_DISK_CAN_ALLOC(prison,size) \ + (JAIL_DISK_GET_SIZE(prison) != 0 ? \ + ((JAIL_DISK_GET_SIZE(prison)-JAIL_DISK_GET_USAGE(prison))>size): \ + (1)) + +#define JAIL_INODE_SET_COUNT(prison,size) (atomic_store_rel_int(&(prison)->pr_disk.pr_l_inodes, size)) +#define JAIL_INODE_GET_COUNT(prison) (atomic_load_acq_int(&(prison)->pr_disk.pr_l_inodes)) + +#define JAIL_INODE_ALLOC(prison) (atomic_inc_int(&(prison)->pr_disk.pr_inodes)) +#define JAIL_INODE_SET_USAGE(prison,size) (atomic_store_rel_int(&(prison)->pr_disk.pr_inodes, size)) +#define JAIL_INODE_GET_USAGE(prison) (atomic_load_acq_int(&(prison)->pr_disk.pr_inodes)) + +#define JAIL_INODE_CAN_ALLOC(prison) \ + (JAIL_INODE_GET_COUNT(prison) != 0 ? \ + ((JAIL_INODE_GET_COUNT(prison)-JAIL_INODE_GET_USAGE(prison))>1): \ + (1)) + +extern int jail_setvroot(struct prison *pr, struct thread *td, char *root); +extern void jail_destroyvroot(struct prison *pr); + +#endif /* !_SYS_JAIL_DISKS_H_ */ diff -urNP sys.orig/sys/jail2_flags.h sys/sys/jail2_flags.h --- sys.orig/sys/jail2_flags.h Thu Jan 1 03:00:00 1970 +++ sys/sys/jail2_flags.h Sun May 14 18:45:02 2006 @@ -0,0 +1,31 @@ +#ifndef _SYS_JAIL_FLAGS_H_ +#define _SYS_JAIL_FLAGS_H_ + +/* + * jail->pr_flags + * change jail flags must be protect with pr_mutex + */ +#define J_START_FL 0x0001 +#define J_PROCLIMIT_FL 0x0002 +#define J_SYSVIPC_ALLOW_FL 0x0004 + +#define JAIL_SET_FLAGS(prison,data) (atomic_store_rel_int(&(prison)->pr_flags, (data))) +#define JAIL_GET_FLAGS(prison) (atomic_load_acq_int(&(prison)->pr_flags)) + +/* set macros */ +#define JAIL_SET_STARTED(prison) (atomic_set_int(&(prison)->pr_flags, J_START_FL)) +#define JAIL_SET_PROCLIM(prison) (atomic_set_int(&(prison)->pr_flags, J_PROCLIMIT_FL)) +#define JAIL_SET_SYSVIPCALLOW(prison) (atomic_set_int(&(prison)->pr_flags, J_SYSVIPC_ALLOW_FL)) + +/* clear macros */ +#define JAIL_CLEAR_STARTED(prison) (atomic_clear_int(&(prison)->pr_flags, J_START_FL)) +#define JAIL_CLEAR_PROCLIM(prison) (atomic_clear_int(&(prison)->pr_flags, J_PROCLIMIT_FL)) +#define JAIL_CLEAR_SYSVIPCALLOW(prison) (atomic_clear_int(&(prison)->pr_flags, J_SYSVIPC_ALLOW_FL)) + +/* check macros */ +#define JAIL_STARTED(prison) ((atomic_load_acq_int(&(prison)->pr_flags) & J_START_FL) != 0) +#define JAIL_HAVE_PROCLIMIT(prison) ((atomic_load_acq_int(&(prison)->pr_flags) & J_PROCLIMIT_FL) != 0) +#define JAIL_SYSVIPC_ALLOW(prison) ((atomic_load_acq_int(&(prison)->pr_flags) & J_SYSVIPC_ALLOW_FL) != 0) + + +#endif /* !_SYS_JAIL_FLAGS_H_ */ diff -urNP sys.orig/sys/jail2_limits.h sys/sys/jail2_limits.h --- sys.orig/sys/jail2_limits.h Thu Jan 1 03:00:00 1970 +++ sys/sys/jail2_limits.h Sun May 7 18:06:35 2006 @@ -0,0 +1,11 @@ +#ifndef _SYS_JAIL_LIMITS_H_ +#define _SYS_JAIL_LIMITS_H_ + +/* all operations _MUST_ be atomic */ +struct jail_limits { + uint32_t pr_du; /* diskusage in jail */ + uint32_t pr_mem; /* total memory usage in jail */ + uint32_t pr_rss; /* resident usage in jail */ +}; + +#endif /* !_SYS_JAIL_LIMITS_H_ */ diff -urNP sys.orig/sys/jail2_network.h sys/sys/jail2_network.h --- sys.orig/sys/jail2_network.h Thu Jan 1 03:00:00 1970 +++ sys/sys/jail2_network.h Sun May 7 18:06:35 2006 @@ -0,0 +1,15 @@ +#ifndef _SYS_JAIL_NETWORK_H_ +#define _SYS_JAIL_NETWORK_H_ + +struct jail2_ipv4addr { + uint32_t addr; + uint32_t mask; +}; + +#ifdef _KERNEL +struct jail_network { + uint32_t pr_ip; /* (c) ip addr host */ +}; + +#endif /* !_KERNEL */ +#endif /* !_SYS_JAIL_NETWORK_H_ */ diff -urNP sys.orig/sys/jail2_task.h sys/sys/jail2_task.h --- sys.orig/sys/jail2_task.h Thu Jan 1 03:00:00 1970 +++ sys/sys/jail2_task.h Sun May 7 18:06:35 2006 @@ -0,0 +1,53 @@ +#ifndef _SYS_JAIL_TASKS_H_ +#define _SYS_JAIL_TASKS_H_ + +#include +#include +#include + +#include + +/* + * (p) under allproclock or atomic + * (a) atomic + */ +struct jail_tasks { + uint32_t jt_count; /* (p) total process count */ + uint32_t jt_limit; /* (p) limits process in jail */ +/* */ + uint32_t jt_hcpu_limit; /* (a) hard cpu usage limit */ + uint32_t jt_qcpu_limit; /* (a) quarantee cpu usage limit */ + uint32_t jt_cpu_used; /* (a) total cpu usage in context */ + uint32_t jt_prio; /* (a) priority beetwen contexts */ + uint64_t jt_next_timer; /* (a) time end of limit */ +}; + +#define JAIL_TASKS_INIT {.jt_count = 0, \ + .jt_limit = 0, \ + .jt_hcpu_limit = 0, \ + .jt_qcpu_limit = 0, \ + .jt_cpu_used = 0, \ + .jt_prio = 0, \ + .jt_next_timer = 0 \ + } + +#define JAIL_NPROCS(prison) ((prison)->pr_tasks.jt_count) +#define JAIL_MAXPROC(prison) ((prison)->pr_tasks.jt_limit) + +#define JAIL_CAN_FORK(prison) (JAIL_STARTED(prison) && \ + (!JAIL_HAVE_PROCLIMIT(prison) || \ + (JAIL_HAVE_PROCLIMIT(prison) && (JAIL_NPROCS(prison) <= JAIL_MAXPROC(prison))))) + +static inline void +jail_task_add(struct jail_tasks *jt, struct proc *p) +{ + jt->jt_count++; +} + +static inline void +jail_task_del(struct jail_tasks *jt, struct proc *p) +{ + jt->jt_count--; +} + +#endif /* !_SYS_JAIL_TASKS_H_ */ diff -urNP sys.orig/sys/resourcevar.h sys/sys/resourcevar.h --- sys.orig/sys/resourcevar.h Sat Dec 17 00:08:32 2005 +++ sys/sys/resourcevar.h Sat May 13 22:40:54 2006 @@ -103,6 +103,7 @@ struct proc; struct rusage_ext; struct thread; +struct prison; void addupc_intr(struct thread *td, uintfptr_t pc, u_int ticks); void addupc_task(struct thread *td, uintfptr_t pc, u_int ticks); @@ -125,9 +126,11 @@ struct rusage_ext *rux2); int suswintr(void *base, int word); struct uidinfo - *uifind(uid_t uid); + *uifind(struct prison *jail, uid_t uid); void uifree(struct uidinfo *uip); void uihashinit(void); +void pr_uihashinit(struct prison *jail); +void pr_uihashdestroy(struct prison *jail); void uihold(struct uidinfo *uip); #endif /* _KERNEL */ diff -urNP sys.orig/ufs/ufs/dinode.h sys/ufs/ufs/dinode.h --- sys.orig/ufs/ufs/dinode.h Fri Jan 7 04:29:26 2005 +++ sys/ufs/ufs/dinode.h Sun May 7 18:06:35 2006 @@ -145,7 +145,9 @@ ufs2_daddr_t di_extb[NXADDR];/* 96: External attributes block. */ ufs2_daddr_t di_db[NDADDR]; /* 112: Direct disk blocks. */ ufs2_daddr_t di_ib[NIADDR]; /* 208: Indirect disk blocks. */ - int64_t di_spare[3]; /* 232: Reserved; currently unused */ + uint32_t di_prid; /* 232: Prison(jail) id */ + uint32_t di_spare32; /* 236: reserved 32 bit */ + int64_t di_spare[2]; /* 240: Reserved; currently unused */ }; /* @@ -183,7 +185,8 @@ int32_t di_gen; /* 108: Generation number. */ u_int32_t di_uid; /* 112: File owner. */ u_int32_t di_gid; /* 116: File group. */ - int32_t di_spare[2]; /* 120: Reserved; currently unused */ + uint32_t di_prid; /* 120: Prison(jail) id */ + int32_t di_spare[1]; /* 124: Reserved; currently unused */ }; #define di_ogid di_u.oldids[1] #define di_ouid di_u.oldids[0] diff -urNP sys.orig/vm/vm_object.c sys/vm/vm_object.c --- sys.orig/vm/vm_object.c Fri Mar 3 00:13:27 2006 +++ sys/vm/vm_object.c Sun May 7 18:06:35 2006 @@ -1978,7 +1978,7 @@ struct proc *p; /* sx_slock(&allproc_lock); */ - LIST_FOREACH(p, &allproc, p_list) { + FOREACH_PROC_IN_SYSTEM(p) { if (!p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) continue; if (_vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) { diff -urNP sys.orig/vm/vm_pageout.c sys/vm/vm_pageout.c --- sys.orig/vm/vm_pageout.c Fri Feb 17 23:02:39 2006 +++ sys/vm/vm_pageout.c Sun May 7 18:06:35 2006 @@ -1568,7 +1568,7 @@ * process is swapped out -- deactivate pages */ sx_slock(&allproc_lock); - LIST_FOREACH(p, &allproc, p_list) { + FOREACH_PROC_IN_SYSTEM(p) { vm_pindex_t limit, size; /*