sst-linux/fs
Johannes Thumshirn a40de0330a btrfs: fix use-after-free in btrfs_encoded_read_endio()
commit 05b36b04d74a517d6675bf2f90829ff1ac7e28dc upstream.

Shinichiro reported the following use-after free that sometimes is
happening in our CI system when running fstests' btrfs/284 on a TCMU
runner device:

  BUG: KASAN: slab-use-after-free in lock_release+0x708/0x780
  Read of size 8 at addr ffff888106a83f18 by task kworker/u80:6/219

  CPU: 8 UID: 0 PID: 219 Comm: kworker/u80:6 Not tainted 6.12.0-rc6-kts+ #15
  Hardware name: Supermicro Super Server/X11SPi-TF, BIOS 3.3 02/21/2020
  Workqueue: btrfs-endio btrfs_end_bio_work [btrfs]
  Call Trace:
   <TASK>
   dump_stack_lvl+0x6e/0xa0
   ? lock_release+0x708/0x780
   print_report+0x174/0x505
   ? lock_release+0x708/0x780
   ? __virt_addr_valid+0x224/0x410
   ? lock_release+0x708/0x780
   kasan_report+0xda/0x1b0
   ? lock_release+0x708/0x780
   ? __wake_up+0x44/0x60
   lock_release+0x708/0x780
   ? __pfx_lock_release+0x10/0x10
   ? __pfx_do_raw_spin_lock+0x10/0x10
   ? lock_is_held_type+0x9a/0x110
   _raw_spin_unlock_irqrestore+0x1f/0x60
   __wake_up+0x44/0x60
   btrfs_encoded_read_endio+0x14b/0x190 [btrfs]
   btrfs_check_read_bio+0x8d9/0x1360 [btrfs]
   ? lock_release+0x1b0/0x780
   ? trace_lock_acquire+0x12f/0x1a0
   ? __pfx_btrfs_check_read_bio+0x10/0x10 [btrfs]
   ? process_one_work+0x7e3/0x1460
   ? lock_acquire+0x31/0xc0
   ? process_one_work+0x7e3/0x1460
   process_one_work+0x85c/0x1460
   ? __pfx_process_one_work+0x10/0x10
   ? assign_work+0x16c/0x240
   worker_thread+0x5e6/0xfc0
   ? __pfx_worker_thread+0x10/0x10
   kthread+0x2c3/0x3a0
   ? __pfx_kthread+0x10/0x10
   ret_from_fork+0x31/0x70
   ? __pfx_kthread+0x10/0x10
   ret_from_fork_asm+0x1a/0x30
   </TASK>

  Allocated by task 3661:
   kasan_save_stack+0x30/0x50
   kasan_save_track+0x14/0x30
   __kasan_kmalloc+0xaa/0xb0
   btrfs_encoded_read_regular_fill_pages+0x16c/0x6d0 [btrfs]
   send_extent_data+0xf0f/0x24a0 [btrfs]
   process_extent+0x48a/0x1830 [btrfs]
   changed_cb+0x178b/0x2ea0 [btrfs]
   btrfs_ioctl_send+0x3bf9/0x5c20 [btrfs]
   _btrfs_ioctl_send+0x117/0x330 [btrfs]
   btrfs_ioctl+0x184a/0x60a0 [btrfs]
   __x64_sys_ioctl+0x12e/0x1a0
   do_syscall_64+0x95/0x180
   entry_SYSCALL_64_after_hwframe+0x76/0x7e

  Freed by task 3661:
   kasan_save_stack+0x30/0x50
   kasan_save_track+0x14/0x30
   kasan_save_free_info+0x3b/0x70
   __kasan_slab_free+0x4f/0x70
   kfree+0x143/0x490
   btrfs_encoded_read_regular_fill_pages+0x531/0x6d0 [btrfs]
   send_extent_data+0xf0f/0x24a0 [btrfs]
   process_extent+0x48a/0x1830 [btrfs]
   changed_cb+0x178b/0x2ea0 [btrfs]
   btrfs_ioctl_send+0x3bf9/0x5c20 [btrfs]
   _btrfs_ioctl_send+0x117/0x330 [btrfs]
   btrfs_ioctl+0x184a/0x60a0 [btrfs]
   __x64_sys_ioctl+0x12e/0x1a0
   do_syscall_64+0x95/0x180
   entry_SYSCALL_64_after_hwframe+0x76/0x7e

  The buggy address belongs to the object at ffff888106a83f00
   which belongs to the cache kmalloc-rnd-07-96 of size 96
  The buggy address is located 24 bytes inside of
   freed 96-byte region [ffff888106a83f00, ffff888106a83f60)

  The buggy address belongs to the physical page:
  page: refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888106a83800 pfn:0x106a83
  flags: 0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff)
  page_type: f5(slab)
  raw: 0017ffffc0000000 ffff888100053680 ffffea0004917200 0000000000000004
  raw: ffff888106a83800 0000000080200019 00000001f5000000 0000000000000000
  page dumped because: kasan: bad access detected

  Memory state around the buggy address:
   ffff888106a83e00: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
   ffff888106a83e80: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
  >ffff888106a83f00: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
                              ^
   ffff888106a83f80: fa fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
   ffff888106a84000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  ==================================================================

Further analyzing the trace and the crash dump's vmcore file shows that
the wake_up() call in btrfs_encoded_read_endio() is calling wake_up() on
the wait_queue that is in the private data passed to the end_io handler.

Commit 4ff47df40447 ("btrfs: move priv off stack in
btrfs_encoded_read_regular_fill_pages()") moved 'struct
btrfs_encoded_read_private' off the stack.

Before that commit one can see a corruption of the private data when
analyzing the vmcore after a crash:

*(struct btrfs_encoded_read_private *)0xffff88815626eec8 = {
	.wait = (wait_queue_head_t){
		.lock = (spinlock_t){
			.rlock = (struct raw_spinlock){
				.raw_lock = (arch_spinlock_t){
					.val = (atomic_t){
						.counter = (int)-2005885696,
					},
					.locked = (u8)0,
					.pending = (u8)157,
					.locked_pending = (u16)40192,
					.tail = (u16)34928,
				},
				.magic = (unsigned int)536325682,
				.owner_cpu = (unsigned int)29,
				.owner = (void *)__SCT__tp_func_btrfs_transaction_commit+0x0 = 0x0,
				.dep_map = (struct lockdep_map){
					.key = (struct lock_class_key *)0xffff8881575a3b6c,
					.class_cache = (struct lock_class *[2]){ 0xffff8882a71985c0, 0xffffea00066f5d40 },
					.name = (const char *)0xffff88815626f100 = "",
					.wait_type_outer = (u8)37,
					.wait_type_inner = (u8)178,
					.lock_type = (u8)154,
				},
			},
			.__padding = (u8 [24]){ 0, 157, 112, 136, 50, 174, 247, 31, 29 },
			.dep_map = (struct lockdep_map){
				.key = (struct lock_class_key *)0xffff8881575a3b6c,
				.class_cache = (struct lock_class *[2]){ 0xffff8882a71985c0, 0xffffea00066f5d40 },
				.name = (const char *)0xffff88815626f100 = "",
				.wait_type_outer = (u8)37,
				.wait_type_inner = (u8)178,
				.lock_type = (u8)154,
			},
		},
		.head = (struct list_head){
			.next = (struct list_head *)0x112cca,
			.prev = (struct list_head *)0x47,
		},
	},
	.pending = (atomic_t){
		.counter = (int)-1491499288,
	},
	.status = (blk_status_t)130,
}

Here we can see several indicators of in-memory data corruption, e.g. the
large negative atomic values of ->pending or
->wait->lock->rlock->raw_lock->val, as well as the bogus spinlock magic
0x1ff7ae32 (decimal 536325682 above) instead of 0xdead4ead or the bogus
pointer values for ->wait->head.

To fix this, change atomic_dec_return() to atomic_dec_and_test() to fix the
corruption, as atomic_dec_return() is defined as two instructions on
x86_64, whereas atomic_dec_and_test() is defined as a single atomic
operation. This can lead to a situation where counter value is already
decremented but the if statement in btrfs_encoded_read_endio() is not
completely processed, i.e. the 0 test has not completed. If another thread
continues executing btrfs_encoded_read_regular_fill_pages() the
atomic_dec_return() there can see an already updated ->pending counter and
continues by freeing the private data. Continuing in the endio handler the
test for 0 succeeds and the wait_queue is woken up, resulting in a
use-after-free.

Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Suggested-by: Damien Le Moal <Damien.LeMoal@wdc.com>
Fixes: 1881fba89b ("btrfs: add BTRFS_IOC_ENCODED_READ ioctl")
CC: stable@vger.kernel.org # 6.1+
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Alva Lan <alvalan9@foxmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2025-01-09 13:29:56 +01:00
..
9p fs/9p: fix uninitialized values during inode evict 2024-11-22 15:37:34 +01:00
adfs
affs
afs afs: Fix missing subdir edit when renamed between parent dirs 2024-11-08 16:26:43 +01:00
autofs
befs
bfs
btrfs btrfs: fix use-after-free in btrfs_encoded_read_endio() 2025-01-09 13:29:56 +01:00
cachefiles cachefiles: Fix missing pos updates in cachefiles_ondemand_fd_write_iter() 2024-12-14 19:53:15 +01:00
ceph ceph: validate snapdirname option length when mounting 2024-12-27 13:53:01 +01:00
coda
configfs
cramfs
crypto fs: Create a generic is_dot_dotdot() utility 2024-10-17 15:21:17 +02:00
debugfs
devpts
dlm
ecryptfs fs: Create a generic is_dot_dotdot() utility 2024-10-17 15:21:17 +02:00
efivarfs efivarfs: Fix error on non-existent file 2024-12-27 13:52:55 +01:00
efs
erofs erofs: fix incorrect symlink detection in fast symlink 2024-10-17 15:22:09 +02:00
exfat exfat: fix potential deadlock on __exfat_get_dentry_set 2024-12-19 18:08:51 +01:00
exportfs
ext2 ext2: Verify bitmap and itable block numbers before using them 2024-08-03 08:49:32 +02:00
ext4 ext4: fix FS_IOC_GETFSMAP handling 2024-12-14 19:53:58 +01:00
f2fs f2fs: fix f2fs_bug_on when uninstalling filesystem call f2fs_evict_inode. 2024-12-14 19:54:47 +01:00
fat fat: fix uninitialized variable 2024-10-22 15:56:43 +02:00
freevxfs
fscache netfs/fscache: Add a memory barrier for FSCACHE_VOLUME_CREATING 2024-12-14 19:53:15 +01:00
fuse fuse: add feature flag for expire-only 2024-09-12 11:10:30 +02:00
gfs2 gfs2: Remove freeze_go_demote_ok 2024-08-29 17:30:56 +02:00
hfs hfs: fix to initialize fields of hfs_inode_info after hfs_alloc_inode() 2024-08-03 08:49:32 +02:00
hfsplus hfsplus: don't query the device logical block size multiple times 2024-12-14 19:53:16 +01:00
hostfs
hpfs
hugetlbfs mm/hugetlb: add hugetlb_folio_subpool() helpers 2024-05-17 11:55:51 +02:00
iomap iomap: turn iomap_want_unshare_iter into an inline function 2024-11-08 16:26:42 +01:00
isofs
jbd2 jbd2: correctly compare tids with tid_geq function in jbd2_fc_begin_commit 2024-10-17 15:21:56 +02:00
jffs2 jffs2: Fix rtime decompressor 2024-12-14 19:54:52 +01:00
jfs jfs: add a check to prevent array-index-out-of-bounds in dbAdjTree 2024-12-14 19:54:43 +01:00
kernfs kernfs: fix false-positive WARN(nr_mmapped) in kernfs_drain_open_files 2024-08-29 17:30:35 +02:00
lockd nfsd: stop setting ->pg_stats for unused stats 2024-08-19 06:00:04 +02:00
minix
netfs
nfs NFS/pnfs: Fix a live lock between recalled layouts and layoutget 2024-12-27 13:53:00 +01:00
nfs_common
nfsd nfsd: cancel nfsd_shrinker_work using sync mode in nfs4_state_shutdown_net 2025-01-02 10:30:55 +01:00
nilfs2 nilfs2: prevent use of deleted inode 2024-12-27 13:53:01 +01:00
nls
notify fsnotify: fix sending inotify event with unexpected filename 2024-12-14 19:53:59 +01:00
ntfs
ntfs3 fs/ntfs3: Sequential field availability check in mi_enum_attr() 2024-12-14 19:54:55 +01:00
ocfs2 ocfs2: Revert "ocfs2: fix the la space leak when unmounting an ocfs2 volume" 2024-12-14 19:54:52 +01:00
omfs
openpromfs openpromfs: finish conversion to the new mount API 2024-06-12 11:03:03 +02:00
orangefs orangefs: fix out-of-bounds fsid access 2024-07-11 12:47:08 +02:00
overlayfs ovl: properly handle large files in ovl_security_fileattr 2024-12-14 19:54:16 +01:00
proc fs/proc/kcore.c: Clear ret value in read_kcore_iter after successful iov_iter_zero 2024-12-14 19:54:15 +01:00
pstore
qnx4
qnx6
quota quota: flush quota_release_work upon quota writeback 2024-12-14 19:54:10 +01:00
ramfs
reiserfs reiserfs: fix uninit-value in comp_keys 2024-08-29 17:30:20 +02:00
romfs
smb smb: server: Fix building with GCC 15 2025-01-02 10:30:52 +01:00
squashfs Squashfs: sanity check symbolic link size 2024-09-12 11:10:25 +02:00
sysfs
sysv
tracefs
ubifs ubifs: authentication: Fix use-after-free in ubifs_tnc_end_commit 2024-12-14 19:54:08 +01:00
udf udf: Fix directory iteration for longer tail extents 2024-12-27 13:53:01 +01:00
ufs
unicode Revert "unicode: Don't special case ignorable code points" 2024-12-14 19:54:50 +01:00
vboxsf
verity
xfs xfs: only run precommits once per transaction object 2024-12-19 18:08:50 +01:00
zonefs
aio.c
anon_inodes.c
attr.c
bad_inode.c
binfmt_elf_fdpic.c fs: binfmt_elf_efpic: don't use missing interpreter's properties 2024-08-29 17:30:35 +02:00
binfmt_elf_test.c
binfmt_elf.c ELF: fix kernel.randomize_va_space double read 2024-09-12 11:10:19 +02:00
binfmt_flat.c binfmt_flat: Fix corruption when not offsetting data start 2024-08-19 06:00:07 +02:00
binfmt_misc.c binfmt_misc: cleanup on filesystem umount 2024-08-29 17:30:30 +02:00
binfmt_script.c
buffer.c
char_dev.c
compat_binfmt_elf.c
coredump.c
d_path.c
dax.c fsdax: dax_unshare_iter needs to copy entire blocks 2024-11-08 16:26:42 +01:00
dcache.c fs: better handle deep ancestor chains in is_subdir() 2024-07-25 09:49:18 +02:00
direct-io.c
drop_caches.c
eventfd.c
eventpoll.c epoll: Add synchronous wakeup support for ep_poll_callback 2024-12-27 13:53:01 +01:00
exec.c exec: don't WARN for racy path_noexec check 2024-11-01 01:56:02 +01:00
fcntl.c fs: Fix file_set_fowner LSM hook inconsistencies 2024-10-17 15:21:23 +02:00
fhandle.c
file_table.c
file.c close_range(): fix the logics in descriptor table trimming 2024-10-17 15:22:03 +02:00
filesystems.c
fs_context.c
fs_parser.c
fs_pin.c
fs_struct.c
fs_types.c
fs-writeback.c
fsopen.c
init.c
inode.c fs/inode: Prevent dump_mapping() accessing invalid dentry.d_name.name 2024-12-14 19:53:13 +01:00
internal.h
ioctl.c
Kconfig
Kconfig.binfmt
kernel_read_file.c
libfs.c
locks.c filelock: Fix fcntl/close race recovery compat path 2024-07-27 11:32:19 +02:00
Makefile
mbcache.c
mount.h
mpage.c
namei.c fs: Create a generic is_dot_dotdot() utility 2024-10-17 15:21:17 +02:00
namespace.c mount: handle OOM on mnt_warn_timestamp_expiry 2024-10-17 15:20:37 +02:00
no-block.c
nsfs.c
open.c openat2: explicitly return -E2BIG for (usize > PAGE_SIZE) 2024-11-01 01:56:06 +01:00
pipe.c
pnode.c
pnode.h
posix_acl.c
proc_namespace.c
read_write.c
readdir.c
remap_range.c
select.c
seq_file.c
signalfd.c
splice.c
stack.c
stat.c
statfs.c
super.c fs: don't allow non-init s_user_ns for filesystems without FS_USERNS_MOUNT 2024-08-03 08:49:53 +02:00
sync.c
sysctls.c
timerfd.c
userfaultfd.c Fix userfaultfd_api to return EINVAL as expected 2024-07-18 13:18:41 +02:00
utimes.c
xattr.c