Commit 6d93a197 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'time-namespace-v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux

Pull time namespace updates from Christian Brauner:
 "When time namespaces were introduced we missed to virtualize the
  'btime' field in /proc/stat. This confuses tasks which are in another
  time namespace with a virtualized boottime which is common in some
  container workloads. This contains Michael's series to fix 'btime'
  which Thomas asked me to take through my tree.

  To fix 'btime' virtualization we simply subtract the offset of the
  time namespace's boottime from btime before printing the stats. Note
  that since start_boottime of processes are seconds since boottime and
  the boottime stamp is now shifted according to the time namespace's
  offset, the offset of the time namespace also needs to be applied
  before the process stats are given to userspace. This avoids that
  processes shown by tools such as 'ps' appear as time travelers in the
  corresponding time namespace.

  Selftests are included to verify that btime virtualization in
  /proc/stat works as expected"

* tag 'time-namespace-v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux:
  namespace: make timens_on_fork() return nothing
  selftests/timens: added selftest for /proc/stat btime
  fs/proc: apply the time namespace offset to /proc/stat btime
  timens: additional helper functions for boottime offset handling
parents 0ca2ce81 5c62634f
...@@ -56,6 +56,7 @@ ...@@ -56,6 +56,7 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/time.h> #include <linux/time.h>
#include <linux/time_namespace.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
#include <linux/tty.h> #include <linux/tty.h>
...@@ -533,8 +534,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, ...@@ -533,8 +534,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
priority = task_prio(task); priority = task_prio(task);
nice = task_nice(task); nice = task_nice(task);
/* convert nsec -> ticks */ /* apply timens offset for boottime and convert nsec -> ticks */
start_time = nsec_to_clock_t(task->start_boottime); start_time =
nsec_to_clock_t(timens_add_boottime_ns(task->start_boottime));
seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns)); seq_put_decimal_ull(m, "", pid_nr_ns(pid, ns));
seq_puts(m, " ("); seq_puts(m, " (");
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <linux/seq_file.h> #include <linux/seq_file.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/time.h> #include <linux/time.h>
#include <linux/time_namespace.h>
#include <linux/irqnr.h> #include <linux/irqnr.h>
#include <linux/sched/cputime.h> #include <linux/sched/cputime.h>
#include <linux/tick.h> #include <linux/tick.h>
...@@ -118,6 +119,8 @@ static int show_stat(struct seq_file *p, void *v) ...@@ -118,6 +119,8 @@ static int show_stat(struct seq_file *p, void *v)
irq = softirq = steal = 0; irq = softirq = steal = 0;
guest = guest_nice = 0; guest = guest_nice = 0;
getboottime64(&boottime); getboottime64(&boottime);
/* shift boot timestamp according to the timens offset */
timens_sub_boottime(&boottime);
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
struct kernel_cpustat kcpustat; struct kernel_cpustat kcpustat;
......
...@@ -45,7 +45,7 @@ struct time_namespace *copy_time_ns(unsigned long flags, ...@@ -45,7 +45,7 @@ struct time_namespace *copy_time_ns(unsigned long flags,
struct user_namespace *user_ns, struct user_namespace *user_ns,
struct time_namespace *old_ns); struct time_namespace *old_ns);
void free_time_ns(struct kref *kref); void free_time_ns(struct kref *kref);
int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk); void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk);
struct vdso_data *arch_get_vdso_data(void *vvar_page); struct vdso_data *arch_get_vdso_data(void *vvar_page);
static inline void put_time_ns(struct time_namespace *ns) static inline void put_time_ns(struct time_namespace *ns)
...@@ -77,6 +77,20 @@ static inline void timens_add_boottime(struct timespec64 *ts) ...@@ -77,6 +77,20 @@ static inline void timens_add_boottime(struct timespec64 *ts)
*ts = timespec64_add(*ts, ns_offsets->boottime); *ts = timespec64_add(*ts, ns_offsets->boottime);
} }
static inline u64 timens_add_boottime_ns(u64 nsec)
{
struct timens_offsets *ns_offsets = &current->nsproxy->time_ns->offsets;
return nsec + timespec64_to_ns(&ns_offsets->boottime);
}
static inline void timens_sub_boottime(struct timespec64 *ts)
{
struct timens_offsets *ns_offsets = &current->nsproxy->time_ns->offsets;
*ts = timespec64_sub(*ts, ns_offsets->boottime);
}
ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim, ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim,
struct timens_offsets *offsets); struct timens_offsets *offsets);
...@@ -122,14 +136,22 @@ struct time_namespace *copy_time_ns(unsigned long flags, ...@@ -122,14 +136,22 @@ struct time_namespace *copy_time_ns(unsigned long flags,
return old_ns; return old_ns;
} }
static inline int timens_on_fork(struct nsproxy *nsproxy, static inline void timens_on_fork(struct nsproxy *nsproxy,
struct task_struct *tsk) struct task_struct *tsk)
{ {
return 0; return;
} }
static inline void timens_add_monotonic(struct timespec64 *ts) { } static inline void timens_add_monotonic(struct timespec64 *ts) { }
static inline void timens_add_boottime(struct timespec64 *ts) { } static inline void timens_add_boottime(struct timespec64 *ts) { }
static inline u64 timens_add_boottime_ns(u64 nsec)
{
return nsec;
}
static inline void timens_sub_boottime(struct timespec64 *ts) { }
static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim) static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim)
{ {
return tim; return tim;
......
...@@ -153,7 +153,6 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) ...@@ -153,7 +153,6 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
struct nsproxy *old_ns = tsk->nsproxy; struct nsproxy *old_ns = tsk->nsproxy;
struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns); struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
struct nsproxy *new_ns; struct nsproxy *new_ns;
int ret;
if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
CLONE_NEWPID | CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWNET |
...@@ -180,11 +179,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk) ...@@ -180,11 +179,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
if (IS_ERR(new_ns)) if (IS_ERR(new_ns))
return PTR_ERR(new_ns); return PTR_ERR(new_ns);
ret = timens_on_fork(new_ns, tsk); timens_on_fork(new_ns, tsk);
if (ret) {
free_nsproxy(new_ns);
return ret;
}
tsk->nsproxy = new_ns; tsk->nsproxy = new_ns;
return 0; return 0;
......
...@@ -308,22 +308,20 @@ static int timens_install(struct nsset *nsset, struct ns_common *new) ...@@ -308,22 +308,20 @@ static int timens_install(struct nsset *nsset, struct ns_common *new)
return 0; return 0;
} }
int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk) void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk)
{ {
struct ns_common *nsc = &nsproxy->time_ns_for_children->ns; struct ns_common *nsc = &nsproxy->time_ns_for_children->ns;
struct time_namespace *ns = to_time_ns(nsc); struct time_namespace *ns = to_time_ns(nsc);
/* create_new_namespaces() already incremented the ref counter */ /* create_new_namespaces() already incremented the ref counter */
if (nsproxy->time_ns == nsproxy->time_ns_for_children) if (nsproxy->time_ns == nsproxy->time_ns_for_children)
return 0; return;
get_time_ns(ns); get_time_ns(ns);
put_time_ns(nsproxy->time_ns); put_time_ns(nsproxy->time_ns);
nsproxy->time_ns = ns; nsproxy->time_ns = ns;
timens_commit(tsk, ns); timens_commit(tsk, ns);
return 0;
} }
static struct user_namespace *timens_owner(struct ns_common *ns) static struct user_namespace *timens_owner(struct ns_common *ns)
......
...@@ -93,6 +93,33 @@ static int read_proc_uptime(struct timespec *uptime) ...@@ -93,6 +93,33 @@ static int read_proc_uptime(struct timespec *uptime)
return 0; return 0;
} }
static int read_proc_stat_btime(unsigned long long *boottime_sec)
{
FILE *proc;
char line_buf[2048];
proc = fopen("/proc/stat", "r");
if (proc == NULL) {
pr_perror("Unable to open /proc/stat");
return -1;
}
while (fgets(line_buf, 2048, proc)) {
if (sscanf(line_buf, "btime %llu", boottime_sec) != 1)
continue;
fclose(proc);
return 0;
}
if (errno) {
pr_perror("fscanf");
fclose(proc);
return -errno;
}
pr_err("failed to parse /proc/stat");
fclose(proc);
return -1;
}
static int check_uptime(void) static int check_uptime(void)
{ {
struct timespec uptime_new, uptime_old; struct timespec uptime_new, uptime_old;
...@@ -123,18 +150,47 @@ static int check_uptime(void) ...@@ -123,18 +150,47 @@ static int check_uptime(void)
return 0; return 0;
} }
static int check_stat_btime(void)
{
unsigned long long btime_new, btime_old;
unsigned long long btime_expected;
if (switch_ns(parent_ns))
return pr_err("switch_ns(%d)", parent_ns);
if (read_proc_stat_btime(&btime_old))
return 1;
if (switch_ns(child_ns))
return pr_err("switch_ns(%d)", child_ns);
if (read_proc_stat_btime(&btime_new))
return 1;
btime_expected = btime_old - TEN_DAYS_IN_SEC;
if (btime_new != btime_expected) {
pr_fail("btime in /proc/stat: old %llu, new %llu [%llu]",
btime_old, btime_new, btime_expected);
return 1;
}
ksft_test_result_pass("Passed for /proc/stat btime\n");
return 0;
}
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
int ret = 0; int ret = 0;
nscheck(); nscheck();
ksft_set_plan(1); ksft_set_plan(2);
if (init_namespaces()) if (init_namespaces())
return 1; return 1;
ret |= check_uptime(); ret |= check_uptime();
ret |= check_stat_btime();
if (ret) if (ret)
ksft_exit_fail(); ksft_exit_fail();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment