[PATCH kvm-next V11 4/7] KVM: guest_memfd: Use guest mem inodes instead of anonymous inodes
Garg, Shivank
shivankg at amd.com
Thu Sep 25 11:44:15 UTC 2025
On 9/25/2025 8:20 AM, Sean Christopherson wrote:
> My apologies for the super late feedback. None of this is critical (mechanical
> things that can be cleaned up after the fact), so if there's any urgency to
> getting this series into 6.18, just ignore it.
>
> On Wed, Aug 27, 2025, Ackerley Tng wrote:
>> Shivank Garg <shivankg at amd.com> writes:
>> @@ -463,11 +502,70 @@ bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm)
>> return true;
>> }
>>
>> +static struct inode *kvm_gmem_inode_create(const char *name, loff_t size,
>> + u64 flags)
>> +{
>> + struct inode *inode;
>> +
>> + inode = anon_inode_make_secure_inode(kvm_gmem_mnt->mnt_sb, name, NULL);
>> + if (IS_ERR(inode))
>> + return inode;
>> +
>> + inode->i_private = (void *)(unsigned long)flags;
>> + inode->i_op = &kvm_gmem_iops;
>> + inode->i_mapping->a_ops = &kvm_gmem_aops;
>> + inode->i_mode |= S_IFREG;
>> + inode->i_size = size;
>> + mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
>> + mapping_set_inaccessible(inode->i_mapping);
>> + /* Unmovable mappings are supposed to be marked unevictable as well. */
>> + WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
>> +
>> + return inode;
>> +}
>> +
>> +static struct file *kvm_gmem_inode_create_getfile(void *priv, loff_t size,
>> + u64 flags)
>> +{
>> + static const char *name = "[kvm-gmem]";
>> + struct inode *inode;
>> + struct file *file;
>> + int err;
>> +
>> + err = -ENOENT;
>> + /* __fput() will take care of fops_put(). */
>> + if (!fops_get(&kvm_gmem_fops))
>> + goto err;
>> +
>> + inode = kvm_gmem_inode_create(name, size, flags);
>> + if (IS_ERR(inode)) {
>> + err = PTR_ERR(inode);
>> + goto err_fops_put;
>> + }
>> +
>> + file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR,
>> + &kvm_gmem_fops);
>> + if (IS_ERR(file)) {
>> + err = PTR_ERR(file);
>> + goto err_put_inode;
>> + }
>> +
>> + file->f_flags |= O_LARGEFILE;
>> + file->private_data = priv;
>> +
>> + return file;
>> +
>> +err_put_inode:
>> + iput(inode);
>> +err_fops_put:
>> + fops_put(&kvm_gmem_fops);
>> +err:
>> + return ERR_PTR(err);
>> +}
>
> I don't see any reason to add two helpers. It requires quite a bit more lines
> of code due to adding more error paths and local variables, and IMO doesn't make
> the code any easier to read.
>
> Passing in "gmem" as @priv is especially ridiculous, as it adds code and
> obfuscates what file->private_data is set to.
>
> I get the sense that the code was written to be a "replacement" for common APIs,
> but that is nonsensical (no pun intended).
>
>> static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>> {
>> - const char *anon_name = "[kvm-gmem]";
>> struct kvm_gmem *gmem;
>> - struct inode *inode;
>> struct file *file;
>> int fd, err;
>>
>> @@ -481,32 +579,16 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>> goto err_fd;
>> }
>>
>> - file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem,
>> - O_RDWR, NULL);
>> + file = kvm_gmem_inode_create_getfile(gmem, size, flags);
>> if (IS_ERR(file)) {
>> err = PTR_ERR(file);
>> goto err_gmem;
>> }
>>
>> - file->f_flags |= O_LARGEFILE;
>> -
>> - inode = file->f_inode;
>> - WARN_ON(file->f_mapping != inode->i_mapping);
>> -
>> - inode->i_private = (void *)(unsigned long)flags;
>> - inode->i_op = &kvm_gmem_iops;
>> - inode->i_mapping->a_ops = &kvm_gmem_aops;
>> - inode->i_mode |= S_IFREG;
>> - inode->i_size = size;
>> - mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
>> - mapping_set_inaccessible(inode->i_mapping);
>> - /* Unmovable mappings are supposed to be marked unevictable as well. */
>> - WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
>> -
>> kvm_get_kvm(kvm);
>> gmem->kvm = kvm;
>> xa_init(&gmem->bindings);
>> - list_add(&gmem->entry, &inode->i_mapping->i_private_list);
>> + list_add(&gmem->entry, &file_inode(file)->i_mapping->i_private_list);
>
> I don't understand this change? Isn't file_inode(file) == inode?
>
> Compile tested only, and again not critical, but it's -40 LoC...
>
>
Thanks.
I did functional testing and it works fine.
> ---
> include/uapi/linux/magic.h | 1 +
> virt/kvm/guest_memfd.c | 75 ++++++++++++++++++++++++++++++++------
> virt/kvm/kvm_main.c | 7 +++-
> virt/kvm/kvm_mm.h | 9 +++--
> 4 files changed, 76 insertions(+), 16 deletions(-)
>
> diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
> index bb575f3ab45e..638ca21b7a90 100644
> --- a/include/uapi/linux/magic.h
> +++ b/include/uapi/linux/magic.h
> @@ -103,5 +103,6 @@
> #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */
> #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */
> #define PID_FS_MAGIC 0x50494446 /* "PIDF" */
> +#define GUEST_MEMFD_MAGIC 0x474d454d /* "GMEM" */
>
> #endif /* __LINUX_MAGIC_H__ */
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 08a6bc7d25b6..73c9791879d5 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -1,12 +1,16 @@
> // SPDX-License-Identifier: GPL-2.0
> +#include <linux/anon_inodes.h>
> #include <linux/backing-dev.h>
> #include <linux/falloc.h>
> +#include <linux/fs.h>
> #include <linux/kvm_host.h>
> +#include <linux/pseudo_fs.h>
> #include <linux/pagemap.h>
> -#include <linux/anon_inodes.h>
>
> #include "kvm_mm.h"
>
> +static struct vfsmount *kvm_gmem_mnt;
> +
> struct kvm_gmem {
> struct kvm *kvm;
> struct xarray bindings;
> @@ -385,9 +389,45 @@ static struct file_operations kvm_gmem_fops = {
> .fallocate = kvm_gmem_fallocate,
> };
>
> -void kvm_gmem_init(struct module *module)
> +static int kvm_gmem_init_fs_context(struct fs_context *fc)
> +{
> + if (!init_pseudo(fc, GUEST_MEMFD_MAGIC))
> + return -ENOMEM;
> +
> + fc->s_iflags |= SB_I_NOEXEC;
> + fc->s_iflags |= SB_I_NODEV;
> +
> + return 0;
> +}
> +
> +static struct file_system_type kvm_gmem_fs = {
> + .name = "guest_memfd",
> + .init_fs_context = kvm_gmem_init_fs_context,
> + .kill_sb = kill_anon_super,
> +};
> +
> +static int kvm_gmem_init_mount(void)
> +{
> + kvm_gmem_mnt = kern_mount(&kvm_gmem_fs);
> +
> + if (IS_ERR(kvm_gmem_mnt))
> + return PTR_ERR(kvm_gmem_mnt);
> +
> + kvm_gmem_mnt->mnt_flags |= MNT_NOEXEC;
> + return 0;
> +}
> +
> +int kvm_gmem_init(struct module *module)
> {
> kvm_gmem_fops.owner = module;
> +
> + return kvm_gmem_init_mount();
> +}
> +
> +void kvm_gmem_exit(void)
> +{
> + kern_unmount(kvm_gmem_mnt);
> + kvm_gmem_mnt = NULL;
> }
>
> static int kvm_gmem_migrate_folio(struct address_space *mapping,
> @@ -465,7 +505,7 @@ bool __weak kvm_arch_supports_gmem_mmap(struct kvm *kvm)
>
> static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
> {
> - const char *anon_name = "[kvm-gmem]";
> + static const char *name = "[kvm-gmem]";
> struct kvm_gmem *gmem;
> struct inode *inode;
> struct file *file;
> @@ -481,17 +521,17 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
> goto err_fd;
> }
>
> - file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem,
> - O_RDWR, NULL);
> - if (IS_ERR(file)) {
> - err = PTR_ERR(file);
> + /* __fput() will take care of fops_put(). */
> + if (!fops_get(&kvm_gmem_fops)) {
> + err = -ENOENT;
> goto err_gmem;
> }
>
> - file->f_flags |= O_LARGEFILE;
> -
> - inode = file->f_inode;
> - WARN_ON(file->f_mapping != inode->i_mapping);
> + inode = anon_inode_make_secure_inode(kvm_gmem_mnt->mnt_sb, name, NULL);
> + if (IS_ERR(inode)) {
> + err = PTR_ERR(inode);
> + goto err_fops;
> + }
>
> inode->i_private = (void *)(unsigned long)flags;
> inode->i_op = &kvm_gmem_iops;
> @@ -503,6 +543,15 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
> /* Unmovable mappings are supposed to be marked unevictable as well. */
> WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping));
>
> + file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, &kvm_gmem_fops);
> + if (IS_ERR(file)) {
> + err = PTR_ERR(file);
> + goto err_inode;
> + }
> +
> + file->f_flags |= O_LARGEFILE;
> + file->private_data = gmem;
> +
> kvm_get_kvm(kvm);
> gmem->kvm = kvm;
> xa_init(&gmem->bindings);
> @@ -511,6 +560,10 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
> fd_install(fd, file);
> return fd;
>
> +err_inode:
> + iput(inode);
> +err_fops:
> + fops_put(&kvm_gmem_fops);
> err_gmem:
> kfree(gmem);
> err_fd:
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 18f29ef93543..301d48d6e00d 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -6489,7 +6489,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
> if (WARN_ON_ONCE(r))
> goto err_vfio;
>
> - kvm_gmem_init(module);
> + r = kvm_gmem_init(module);
> + if (r)
> + goto err_gmem;
>
> r = kvm_init_virtualization();
> if (r)
> @@ -6510,6 +6512,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
> err_register:
> kvm_uninit_virtualization();
> err_virt:
> + kvm_gmem_exit();
> +err_gmem:
> kvm_vfio_ops_exit();
> err_vfio:
> kvm_async_pf_deinit();
> @@ -6541,6 +6545,7 @@ void kvm_exit(void)
> for_each_possible_cpu(cpu)
> free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
> kmem_cache_destroy(kvm_vcpu_cache);
> + kvm_gmem_exit();
> kvm_vfio_ops_exit();
> kvm_async_pf_deinit();
> kvm_irqfd_exit();
> diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
> index 31defb08ccba..9fcc5d5b7f8d 100644
> --- a/virt/kvm/kvm_mm.h
> +++ b/virt/kvm/kvm_mm.h
> @@ -68,17 +68,18 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
> #endif /* HAVE_KVM_PFNCACHE */
>
> #ifdef CONFIG_KVM_GUEST_MEMFD
> -void kvm_gmem_init(struct module *module);
> +int kvm_gmem_init(struct module *module);
> +void kvm_gmem_exit(void);
> int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args);
> int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
> unsigned int fd, loff_t offset);
> void kvm_gmem_unbind(struct kvm_memory_slot *slot);
> #else
> -static inline void kvm_gmem_init(struct module *module)
> +static inline int kvm_gmem_init(struct module *module)
> {
> -
> + return 0;
> }
> -
> +static inline void kvm_gmem_exit(void) {};
> static inline int kvm_gmem_bind(struct kvm *kvm,
> struct kvm_memory_slot *slot,
> unsigned int fd, loff_t offset)
>
> base-commit: d133892dddd6607de651b7e32510359a6af97c4c
> --
More information about the Linux-security-module-archive
mailing list