[RFC PATCH 4/7] x86/sci: hook up isolated system call entry and exit
Mike Rapoport
rppt at linux.ibm.com
Thu Apr 25 21:45:51 UTC 2019
When a system call is required to run in an isolated context, the CR3 will
be switched to the SCI page table a per-cpu variable will contain and
offset from the original CR3. This offset is used to switch back to the
full kernel context when a trap occurs during isolated system call.
Signed-off-by: Mike Rapoport <rppt at linux.ibm.com>
---
arch/x86/entry/common.c | 61 ++++++++++++++++++++++++++++++++++++++++++++
arch/x86/kernel/process_64.c | 5 ++++
kernel/exit.c | 3 +++
3 files changed, 69 insertions(+)
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 7bc105f..8f2a6fd 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -25,12 +25,14 @@
#include <linux/uprobes.h>
#include <linux/livepatch.h>
#include <linux/syscalls.h>
+#include <linux/sci.h>
#include <asm/desc.h>
#include <asm/traps.h>
#include <asm/vdso.h>
#include <linux/uaccess.h>
#include <asm/cpufeature.h>
+#include <asm/tlbflush.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -269,6 +271,50 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
}
#ifdef CONFIG_X86_64
+
+#ifdef CONFIG_SYSCALL_ISOLATION
+static inline bool sci_required(unsigned long nr)
+{
+ return false;
+}
+
+static inline unsigned long sci_syscall_enter(unsigned long nr)
+{
+ unsigned long sci_cr3, kernel_cr3;
+ unsigned long asid;
+
+ kernel_cr3 = __read_cr3();
+ asid = kernel_cr3 & ~PAGE_MASK;
+
+ sci_cr3 = build_cr3(current->sci->pgd, 0) & PAGE_MASK;
+ sci_cr3 |= (asid | (1 << X86_CR3_SCI_PCID_BIT));
+
+ current->in_isolated_syscall = 1;
+ current->sci->cr3_offset = kernel_cr3 - sci_cr3;
+
+ this_cpu_write(cpu_sci.sci_syscall, 1);
+ this_cpu_write(cpu_sci.sci_cr3_offset, current->sci->cr3_offset);
+
+ write_cr3(sci_cr3);
+
+ return kernel_cr3;
+}
+
+static inline void sci_syscall_exit(unsigned long cr3)
+{
+ if (cr3) {
+ write_cr3(cr3);
+ current->in_isolated_syscall = 0;
+ this_cpu_write(cpu_sci.sci_syscall, 0);
+ sci_clear_data();
+ }
+}
+#else
+static inline bool sci_required(unsigned long nr) { return false; }
+static inline unsigned long sci_syscall_enter(unsigned long nr) { return 0; }
+static inline void sci_syscall_exit(unsigned long cr3) {}
+#endif
+
__visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
{
struct thread_info *ti;
@@ -286,10 +332,25 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
*/
nr &= __SYSCALL_MASK;
if (likely(nr < NR_syscalls)) {
+ unsigned long sci_cr3 = 0;
+
nr = array_index_nospec(nr, NR_syscalls);
+
+ if (sci_required(nr)) {
+ int err = sci_init(current);
+
+ if (err) {
+ regs->ax = err;
+ goto err_return_from_syscall;
+ }
+ sci_cr3 = sci_syscall_enter(nr);
+ }
+
regs->ax = sys_call_table[nr](regs);
+ sci_syscall_exit(sci_cr3);
}
+err_return_from_syscall:
syscall_return_slowpath(regs);
}
#endif
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6a62f4a..b8aa624 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -55,6 +55,8 @@
#include <asm/resctrl_sched.h>
#include <asm/unistd.h>
#include <asm/fsgsbase.h>
+#include <asm/sci.h>
+
#ifdef CONFIG_IA32_EMULATION
/* Not included via unistd.h */
#include <asm/unistd_32_ia32.h>
@@ -581,6 +583,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
switch_to_extra(prev_p, next_p);
+ /* update syscall isolation per-cpu data */
+ sci_switch_to(next_p);
+
#ifdef CONFIG_XEN_PV
/*
* On Xen PV, IOPL bits in pt_regs->flags have no effect, and
diff --git a/kernel/exit.c b/kernel/exit.c
index 2639a30..8e81353 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -62,6 +62,7 @@
#include <linux/random.h>
#include <linux/rcuwait.h>
#include <linux/compat.h>
+#include <linux/sci.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -859,6 +860,8 @@ void __noreturn do_exit(long code)
tsk->exit_code = code;
taskstats_exit(tsk, group_dead);
+ sci_exit(tsk);
+
exit_mm();
if (group_dead)
--
2.7.4
More information about the Linux-security-module-archive
mailing list