[RFC PATCH 4/7] x86/sci: hook up isolated system call entry and exit

Mike Rapoport rppt at linux.ibm.com
Thu Apr 25 21:45:51 UTC 2019


When a system call is required to run in an isolated context, the CR3 will
be switched to the SCI page table a per-cpu variable will contain and
offset from the original CR3. This offset is used to switch back to the
full kernel context when a trap occurs during isolated system call.

Signed-off-by: Mike Rapoport <rppt at linux.ibm.com>
---
 arch/x86/entry/common.c      | 61 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/process_64.c |  5 ++++
 kernel/exit.c                |  3 +++
 3 files changed, 69 insertions(+)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 7bc105f..8f2a6fd 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -25,12 +25,14 @@
 #include <linux/uprobes.h>
 #include <linux/livepatch.h>
 #include <linux/syscalls.h>
+#include <linux/sci.h>
 
 #include <asm/desc.h>
 #include <asm/traps.h>
 #include <asm/vdso.h>
 #include <linux/uaccess.h>
 #include <asm/cpufeature.h>
+#include <asm/tlbflush.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
@@ -269,6 +271,50 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
 }
 
 #ifdef CONFIG_X86_64
+
+#ifdef CONFIG_SYSCALL_ISOLATION
+static inline bool sci_required(unsigned long nr)
+{
+	return false;
+}
+
+static inline unsigned long sci_syscall_enter(unsigned long nr)
+{
+	unsigned long sci_cr3, kernel_cr3;
+	unsigned long asid;
+
+	kernel_cr3 = __read_cr3();
+	asid = kernel_cr3 & ~PAGE_MASK;
+
+	sci_cr3 = build_cr3(current->sci->pgd, 0) & PAGE_MASK;
+	sci_cr3 |= (asid | (1 << X86_CR3_SCI_PCID_BIT));
+
+	current->in_isolated_syscall = 1;
+	current->sci->cr3_offset = kernel_cr3 - sci_cr3;
+
+	this_cpu_write(cpu_sci.sci_syscall, 1);
+	this_cpu_write(cpu_sci.sci_cr3_offset, current->sci->cr3_offset);
+
+	write_cr3(sci_cr3);
+
+	return kernel_cr3;
+}
+
+static inline void sci_syscall_exit(unsigned long cr3)
+{
+	if (cr3) {
+		write_cr3(cr3);
+		current->in_isolated_syscall = 0;
+		this_cpu_write(cpu_sci.sci_syscall, 0);
+		sci_clear_data();
+	}
+}
+#else
+static inline bool sci_required(unsigned long nr) { return false; }
+static inline unsigned long sci_syscall_enter(unsigned long nr) { return 0; }
+static inline void sci_syscall_exit(unsigned long cr3) {}
+#endif
+
 __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
 {
 	struct thread_info *ti;
@@ -286,10 +332,25 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
 	 */
 	nr &= __SYSCALL_MASK;
 	if (likely(nr < NR_syscalls)) {
+		unsigned long sci_cr3 = 0;
+
 		nr = array_index_nospec(nr, NR_syscalls);
+
+		if (sci_required(nr)) {
+			int err = sci_init(current);
+
+			if (err) {
+				regs->ax = err;
+				goto err_return_from_syscall;
+			}
+			sci_cr3 = sci_syscall_enter(nr);
+		}
+
 		regs->ax = sys_call_table[nr](regs);
+		sci_syscall_exit(sci_cr3);
 	}
 
+err_return_from_syscall:
 	syscall_return_slowpath(regs);
 }
 #endif
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6a62f4a..b8aa624 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -55,6 +55,8 @@
 #include <asm/resctrl_sched.h>
 #include <asm/unistd.h>
 #include <asm/fsgsbase.h>
+#include <asm/sci.h>
+
 #ifdef CONFIG_IA32_EMULATION
 /* Not included via unistd.h */
 #include <asm/unistd_32_ia32.h>
@@ -581,6 +583,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
 	switch_to_extra(prev_p, next_p);
 
+	/* update syscall isolation per-cpu data */
+	sci_switch_to(next_p);
+
 #ifdef CONFIG_XEN_PV
 	/*
 	 * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
diff --git a/kernel/exit.c b/kernel/exit.c
index 2639a30..8e81353 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -62,6 +62,7 @@
 #include <linux/random.h>
 #include <linux/rcuwait.h>
 #include <linux/compat.h>
+#include <linux/sci.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -859,6 +860,8 @@ void __noreturn do_exit(long code)
 	tsk->exit_code = code;
 	taskstats_exit(tsk, group_dead);
 
+	sci_exit(tsk);
+
 	exit_mm();
 
 	if (group_dead)
-- 
2.7.4



More information about the Linux-security-module-archive mailing list