Kernel Self Protection Project/Recommended Settings

From Linux Kernel Security Subsystem
Jump to navigation Jump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

Sometimes people ask the Kernel Self Protection Project what a secure set of build CONFIGs and runtime settings are. This is a brain-dump of the various options for a particularly paranoid system.

Another place to find recommended kernel hardening settings is via the "kernel-hardening-checker" tool maintained by Alexander Popov.


CONFIGs

# Report BUG() conditions and kill the offending process.
CONFIG_BUG=y

# Make sure kernel page tables have safe permissions.
CONFIG_DEBUG_KERNEL=y (prior to v4.11, needed to select CONFIG_DEBUG_RODATA below)
CONFIG_DEBUG_RODATA=y (prior to v4.11)
CONFIG_STRICT_KERNEL_RWX=y (since v4.11)

# Report any dangerous memory permissions (not available on all archs).
CONFIG_DEBUG_WX=y

# Use -fstack-protector-strong (gcc 4.9+) for best stack canary coverage.
# Prior to v4.18, these are:
#  CONFIG_CC_STACKPROTECTOR=y
#  CONFIG_CC_STACKPROTECTOR_STRONG=y
CONFIG_STACKPROTECTOR=y
CONFIG_STACKPROTECTOR_STRONG=y

# Do not allow direct physical memory access (but if you must have it, at least enable STRICT mode...)
# CONFIG_DEVMEM is not set
CONFIG_STRICT_DEVMEM=y
CONFIG_IO_STRICT_DEVMEM=y

# Provides some protections against SYN flooding.
CONFIG_SYN_COOKIES=y

# Perform additional validation of various commonly targeted structures.
CONFIG_DEBUG_CREDENTIALS=y
CONFIG_DEBUG_NOTIFIERS=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_VIRTUAL=y
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_SCHED_STACK_END_CHECK=y

# Provide userspace with seccomp BPF API for syscall attack surface reduction.
CONFIG_SECCOMP=y
CONFIG_SECCOMP_FILTER=y

# Make sure line disciplines can't be autoloaded (since v5.1).
# CONFIG_LDISC_AUTOLOAD is not set

# Provide userspace with ptrace ancestry protections.
# Make sure that "yama" is also present in the "CONFIG_LSM=yama,..." list.
CONFIG_SECURITY=y
CONFIG_SECURITY_YAMA=y

# Provide userspace with Landlock MAC interface.
# Make sure that "landlock" is also present in the "CONFIG_LSM=landlock,..." list.
CONFIG_SECURITY_LANDLOCK=y

# Make sure SELinux cannot be disabled trivially.
# CONFIG_SECURITY_SELINUX_BOOTPARAM is not set
# CONFIG_SECURITY_SELINUX_DEVELOP is not set
# CONFIG_SECURITY_WRITABLE_HOOKS is not set

# Enable "lockdown" LSM for bright line between the root user and kernel memory.
CONFIG_SECURITY_LOCKDOWN_LSM=y
CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
CONFIG_LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY=y

# Perform usercopy bounds checking. (And disable fallback to gain full whitelist enforcement.)
CONFIG_HARDENED_USERCOPY=y
# CONFIG_HARDENED_USERCOPY_FALLBACK is not set
# CONFIG_HARDENED_USERCOPY_PAGESPAN is not set

# Randomize allocator freelists, harden metadata.
CONFIG_SLAB_FREELIST_RANDOM=y
CONFIG_SLAB_FREELIST_HARDENED=y

# Allow for randomization of high-order page allocation freelist. Must be enabled with
# the "page_alloc.shuffle=1" command line below).
CONFIG_SHUFFLE_PAGE_ALLOCATOR=y

# Allow allocator validation checking to be enabled (see "slub_debug=P" below).
CONFIG_SLUB_DEBUG=y

# Wipe higher-level memory allocations when they are freed (needs "page_poison=1" command line below).
# (If you can afford even more performance penalty, leave CONFIG_PAGE_POISONING_NO_SANITY=n)
CONFIG_PAGE_POISONING=y
CONFIG_PAGE_POISONING_NO_SANITY=y
CONFIG_PAGE_POISONING_ZERO=y

# Wipe slab and page allocations (since v5.3)
# Instead of "slub_debug=P" and "page_poison=1", a single place can control memory allocation wiping now.
# The init_on_free is only needed if there is concern about minimizing stale data lifetime.
CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
CONFIG_INIT_ON_FREE_DEFAULT_ON=y

# Initialize all stack variables on function entry. (Clang and GCC 12+ builds only. For earlier GCC, see CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL=y below)
CONFIG_INIT_STACK_ALL_ZERO=y

# Adds guard pages to kernel stacks (not all architectures support this yet).
CONFIG_VMAP_STACK=y

# Perform extensive checks on reference counting.
CONFIG_REFCOUNT_FULL=y

# Check for memory copies that might overflow a structure in str*() and mem*() functions both at build-time and run-time.
CONFIG_FORTIFY_SOURCE=y

# Avoid kernel memory address exposures via dmesg (sets sysctl kernel.dmesg_restrict initial value to 1)
CONFIG_SECURITY_DMESG_RESTRICT=y

# Enable trapping bounds checking of array indexes (since v5.11). All the other UBSAN checks should be disabled.
CONFIG_UBSAN=y
CONFIG_UBSAN_TRAP=y
CONFIG_UBSAN_BOUNDS=y
CONFIG_UBSAN_SANITIZE_ALL=y
# CONFIG_UBSAN_SHIFT is not set
# CONFIG_UBSAN_DIV_ZERO is not set
# CONFIG_UBSAN_UNREACHABLE is not set
# CONFIG_UBSAN_BOOL is not set
# CONFIG_UBSAN_ENUM is not set
# CONFIG_UBSAN_ALIGNMENT is not set
# This is only available on Clang builds, and is likely already enabled if CONFIG_UBSAN_BOUNDS=y is set:
CONFIG_UBSAN_LOCAL_BOUNDS=y

# Enable sampling-based overflow detection (since v5.12). This is similar to KASAN coverage, but with almost zero runtime overhead.
CONFIG_KFENCE=y

# Randomize kernel stack offset on syscall entry (since v5.13).
CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y

# Do not ignore compile-time warnings (since v5.15)
CONFIG_WERROR=y

# Disable DMA between EFI hand-off and the kernel's IOMMU setup.
CONFIG_EFI_DISABLE_PCI_DMA=y

# Force IOMMU TLB invalidation so devices will never be able to access stale data contents (or set "iommu.passthrough=0 iommu.strict=1" at boot)
CONFIG_IOMMU_SUPPORT=y
CONFIG_IOMMU_DEFAULT_DMA_STRICT=y

# Enable feeding RNG entropy from TPM, if available.
CONFIG_HW_RANDOM_TPM=y

# Get as much entropy as possible from external sources. The Chacha mixer isn't vulnerable to injected entropy, so even
# malicious sources should not cause problems.
CONFIG_RANDOM_TRUST_BOOTLOADER=y
CONFIG_RANDOM_TRUST_CPU=y

# Make scheduler aware of SMT Cores. Program needs to opt-in to using this feature with prctl(PR_SCHED_CORE).
CONFIG_SCHED_CORE=y

# Wipe all caller-used registers on exit from the function (reduces available ROP gadgets and
# minimizes stale data in registers). (Since v5.15)
CONFIG_ZERO_CALL_USED_REGS=y

# Wipe RAM at reboot via EFI.
# For more details, see:
# https://trustedcomputinggroup.org/resource/pc-client-work-group-platform-reset-attack-mitigation-specification/
# https://bugzilla.redhat.com/show_bug.cgi?id=1532058
CONFIG_RESET_ATTACK_MITIGATION=y

# This needs userspace support, and will break "regular" distros. See: https://github.com/tych0/huldufolk
CONFIG_STATIC_USERMODEHELPER=y

# Dangerous; enabling this allows direct physical memory writing.
# CONFIG_ACPI_CUSTOM_METHOD is not set

# Dangerous; enabling this disables brk ASLR.
# CONFIG_COMPAT_BRK is not set

# Dangerous; enabling this allows direct kernel memory writing.
# CONFIG_DEVKMEM is not set

# Dangerous; exposes kernel text image layout.
# CONFIG_PROC_KCORE is not set

# Dangerous; enabling this disables VDSO ASLR.
# CONFIG_COMPAT_VDSO is not set

# Dangerous; enabling this allows replacement of running kernel.
# CONFIG_KEXEC is not set

# Dangerous; enabling this allows replacement of running kernel.
# CONFIG_HIBERNATION is not set

# Prior to v4.1, assists heap memory attacks; best to keep interface disabled.
# CONFIG_INET_DIAG is not set

# Easily confused by misconfigured userspace, keep off.
# CONFIG_BINFMT_MISC is not set

# Use the modern PTY interface (devpts) only.
# CONFIG_LEGACY_PTYS is not set

# Block TTY stuffing attacks (this will break screen readers, see "dev.tty.legacy_tiocsti" sysctl below).
# CONFIG_LEGACY_TIOCSTI is not set

# If SELinux can be disabled at runtime, the LSM structures cannot be read-only; keep off.
# CONFIG_SECURITY_SELINUX_DISABLE is not set

# Reboot devices immediately if kernel experiences an Oops.
CONFIG_PANIC_ON_OOPS=y
CONFIG_PANIC_TIMEOUT=-1

# Keep root from altering kernel memory via loadable modules.
# CONFIG_MODULES is not set

# But if CONFIG_MODULE=y is needed, at least they must be signed with a per-build key.
CONFIG_DEBUG_SET_MODULE_RONX=y (prior to v4.11)
CONFIG_STRICT_MODULE_RWX=y (since v4.11)
CONFIG_MODULE_SIG=y
CONFIG_MODULE_SIG_FORCE=y
CONFIG_MODULE_SIG_ALL=y
CONFIG_MODULE_SIG_SHA512=y
CONFIG_MODULE_SIG_HASH="sha512"
CONFIG_MODULE_SIG_KEY="certs/signing_key.pem"

GCC plugins

# Enable GCC Plugins
CONFIG_GCC_PLUGINS=y

# Gather additional entropy at boot time for systems that may not have appropriate entropy sources.
CONFIG_GCC_PLUGIN_LATENT_ENTROPY=y

# Force all structures to be initialized before they are passed to other functions.
# When building with GCC:
CONFIG_GCC_PLUGIN_STRUCTLEAK=y
CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL=y

# Wipe stack contents on syscall exit (reduces stale data lifetime in stack)
CONFIG_GCC_PLUGIN_STACKLEAK=y
# CONFIG_STACKLEAK_METRICS is not set
# CONFIG_STACKLEAK_RUNTIME_DISABLE is not set

# Randomize the layout of system structures. This may have dramatic performance impact, so
# use with caution or also use CONFIG_GCC_PLUGIN_RANDSTRUCT_PERFORMANCE=y
CONFIG_GCC_PLUGIN_RANDSTRUCT=y
# CONFIG_GCC_PLUGIN_RANDSTRUCT_PERFORMANCE is not set

x86_64

# Full 64-bit means PAE and NX bit.
CONFIG_X86_64=y

# Disallow allocating the first 64k of memory.
CONFIG_DEFAULT_MMAP_MIN_ADDR=65536

# Disable Model-Specific Register writes.
# CONFIG_X86_MSR is not set

# Randomize position of kernel and memory.
CONFIG_RANDOMIZE_BASE=y
CONFIG_RANDOMIZE_MEMORY=y

# Modern libc no longer needs a fixed-position mapping in userspace, remove it as a possible target.
# CONFIG_X86_VSYSCALL_EMULATION is not set
CONFIG_LEGACY_VSYSCALL_NONE=y

# Enable Kernel Page Table Isolation to remove an entire class of cache timing side-channels.
CONFIG_PAGE_TABLE_ISOLATION=y

# Remove additional (32-bit) attack surface, unless you really need them.
# CONFIG_COMPAT is not set
# CONFIG_IA32_EMULATION is not set
# CONFIG_X86_X32 is not set
# CONFIG_X86_X32_ABI is not set
# CONFIG_MODIFY_LDT_SYSCALL is not set

# Enable chip-specific IOMMU support. 
CONFIG_INTEL_IOMMU=y
CONFIG_INTEL_IOMMU_DEFAULT_ON=y
CONFIG_INTEL_IOMMU_SVM=y
CONFIG_AMD_IOMMU=y
CONFIG_AMD_IOMMU_V2=y

# Straight-Line-Speculation
CONFIG_SLS=y

# Enable Control Flow Integrity (since v6.1)
CONFIG_CFI_CLANG=y
# CONFIG_CFI_PERMISSIVE is not set

arm64

# Disallow allocating the first 32k of memory (cannot be 64k due to ARM loader).
CONFIG_DEFAULT_MMAP_MIN_ADDR=32768

# Randomize position of kernel (requires UEFI RNG or bootloader support for /chosen/kaslr-seed DT property).
CONFIG_RANDOMIZE_BASE=y

# Remove arm32 support to reduce syscall attack surface.
# CONFIG_COMPAT is not set

# Make sure PAN emulation is enabled.
CONFIG_ARM64_SW_TTBR0_PAN=y

# Enable Kernel Page Table Isolation to remove an entire class of cache timing side-channels.
CONFIG_UNMAP_KERNEL_AT_EL0=y

# Software Shadow Stack or PAC
CONFIG_SHADOW_CALL_STACK=y

# Pointer authentication (ARMv8.3 and later). If hardware actually supports it, one can
# turn off CONFIG_STACKPROTECTOR_STRONG with this enabled.
CONFIG_ARM64_PTR_AUTH=y
CONFIG_ARM64_PTR_AUTH_KERNEL=y

# Available in ARMv8.5 and later.
CONFIG_ARM64_BTI=y
CONFIG_ARM64_BTI_KERNEL=y
CONFIG_ARM64_MTE=y
CONFIG_KASAN_HW_TAGS=y
CONFIG_ARM64_E0PD=y

# Available in ARMv8.7 and later.
CONFIG_ARM64_EPAN=y

# Enable Control Flow Integrity
CONFIG_CFI_CLANG=y
# CONFIG_CFI_PERMISSIVE is not set

x86_32

# On 32-bit kernels, require PAE for NX bit support.
# CONFIG_M486 is not set
# CONFIG_HIGHMEM4G is not set
CONFIG_HIGHMEM64G=y
CONFIG_X86_PAE=y

# Disallow allocating the first 64k of memory.
CONFIG_DEFAULT_MMAP_MIN_ADDR=65536

# Disable Model-Specific Register writes.
# CONFIG_X86_MSR is not set

# Randomize position of kernel.
CONFIG_RANDOMIZE_BASE=y

# Enable Kernel Page Table Isolation to remove an entire class of cache timing side-channels.
CONFIG_PAGE_TABLE_ISOLATION=y

# Enable chip-specific IOMMU support. 
CONFIG_INTEL_IOMMU=y
CONFIG_INTEL_IOMMU_DEFAULT_ON=y

# Don't allow for 16-bit program emulation and associated LDT tricks.
# CONFIG_MODIFY_LDT_SYSCALL is not set

arm

# Disallow allocating the first 32k of memory (cannot be 64k due to ARM loader).
CONFIG_DEFAULT_MMAP_MIN_ADDR=32768

# For maximal userspace memory area (and maximum ASLR).
CONFIG_VMSPLIT_3G=y

# If building an old out-of-tree Qualcomm kernel, this is similar to CONFIG_STRICT_KERNEL_RWX.
CONFIG_STRICT_MEMORY_RWX=y

# Make sure PXN/PAN emulation is enabled.
CONFIG_CPU_SW_DOMAIN_PAN=y

# Dangerous; old interfaces and needless additional attack surface.
# CONFIG_OABI_COMPAT is not set

kernel command line options

# Make sure CONFIG_HARDENED_USERCOPY stays enabled.
hardened_usercopy=1

# Wipe slab and page allocations (Since v5.3; supersedes "slub_debug=P" and "page_poison=1" below)
# See CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y and CONFIG_INIT_ON_FREE_DEFAULT_ON=y above.
init_on_alloc=1
init_on_free=1

# Randomize kernel stack offset on syscall entry (since v5.13).
# See CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT above.
randomize_kstack_offset=on

# Randomize page allocator (needs CONFIG_SHUFFLE_PAGE_ALLOCATOR=y too).
page_alloc.shuffle=1

# Disable slab merging (makes many heap overflow attacks more difficult).
slab_nomerge

# Always enable Kernel Page Table Isolation, even if the CPU claims it is safe from Meltdown.
pti=on

# To prevent against L1TF, at the cost of losing hyper threading (slow).
nosmt

# Enable SLUB redzoning and sanity checking (slow; requires CONFIG_SLUB_DEBUG=y above).
slub_debug=ZF

# (Before v5.3 without "init_on_free=1") Enable slub/slab allocator free poisoning (requires CONFIG_SLUB_DEBUG=y above).
slub_debug=P

# (Before v5.3 without "init_on_free=1") Enable buddy allocator free poisoning (requires CONFIG_PAGE_POISONING=y above).
page_poison=1

# Force IOMMU TLB invalidation so devices will never be able to access stale data contents (see CONFIG_IOMMU_DEFAULT_DMA_STRICT=y above).
iommu.passthrough=0 iommu.strict=1

# Mitigates all known CPU vulnerabilities, disabling SMT *if needed*.
mitigations=auto,nosmt

x86_64

# Remove vsyscall entirely to avoid it being a fixed-position ROP target of any kind.
# (Same as CONFIG_LEGACY_VSYSCALL_NONE=y above.)
vsyscall=none

# Make sure COMPAT_VDSO stays disabled
vdso32=0

sysctls

# Try to keep kernel address exposures out of various /proc files (kallsyms, modules, etc). (There is no CONFIG for the changing the initial value.) If root absolutely needs values from /proc, use value "1".
kernel.kptr_restrict = 2

# Avoid kernel memory address exposures via dmesg (this value can also be set by CONFIG_SECURITY_DMESG_RESTRICT).
kernel.dmesg_restrict = 1

# Block non-uid-0 profiling (needs distro patch, otherwise this is the same as "= 2")
kernel.perf_event_paranoid = 3

# Turn off kexec, even if it's built in.
kernel.kexec_load_disabled = 1

# Make sure the expected default is enabled to enable full ASLR in userpsace.
kernel.randomize_va_space = 2

# Block all PTRACE_ATTACH. If you need ptrace to work, then avoid non-ancestor ptrace access to running processes and their credentials, and use value "1".
kernel.yama.ptrace_scope = 3

# Disable User Namespaces, as it opens up a large attack surface to unprivileged users.
user.max_user_namespaces = 0

# Disable tty line discipline autoloading (see CONFIG_LDISC_AUTOLOAD).
dev.tty.ldisc_autoload = 0

# Disable TIOCSTI which is used to inject keypresses. (This will, however, break screen readers.)
dev.tty.legacy_tiocsti = 0

# Turn off unprivileged eBPF access.
kernel.unprivileged_bpf_disabled = 1

# Turn on BPF JIT hardening, if the JIT is enabled.
net.core.bpf_jit_harden = 2

# Disable userfaultfd for unprivileged processes.
vm.unprivileged_userfaultfd = 0

# Disable POSIX symlink and hardlink corner cases that lead to lots of filesystem confusion attacks.
fs.protected_symlinks = 1
fs.protected_hardlinks = 1

# Disable POSIX corner cases with creating files and fifos unless the directory owner matches. Check your workloads!
fs.protected_fifos = 2
fs.protected_regular = 2

# Make sure the default process dumpability is set (processes that changed privileges aren't dumpable).
fs.suid_dumpable = 0