Skip to content

Commit 9f6c61f

Browse files
author
Miklos Szeredi
committed
proc/mounts: add cursor
If mounts are deleted after a read(2) call on /proc/self/mounts (or its kin), the subsequent read(2) could miss a mount that comes after the deleted one in the list. This is because the file position is interpreted as the number mount entries from the start of the list. E.g. first read gets entries #0 to #9; the seq file index will be 10. Then entry #5 is deleted, resulting in #10 becoming #9 and #11 becoming #10, etc... The next read will continue from entry #10, and #9 is missed. Solve this by adding a cursor entry for each open instance. Taking the global namespace_sem for write seems excessive, since we are only dealing with a per-namespace list. Instead add a per-namespace spinlock and use that together with namespace_sem taken for read to protect against concurrent modification of the mount list. This may reduce parallelism of is_local_mountpoint(), but it's hardly a big contention point. We could also use RCU freeing of cursors to make traversal not need additional locks, if that turns out to be neceesary. Only move the cursor once for each read (cursor is not added on open) to minimize cacheline invalidation. When EOF is reached, the cursor is taken off the list, in order to prevent an excessive number of cursors due to inactive open file descriptors. Reported-by: Karel Zak <[email protected]> Signed-off-by: Miklos Szeredi <[email protected]>
1 parent 530f32f commit 9f6c61f

File tree

4 files changed

+90
-21
lines changed

4 files changed

+90
-21
lines changed

fs/mount.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,13 @@ struct mnt_namespace {
99
atomic_t count;
1010
struct ns_common ns;
1111
struct mount * root;
12+
/*
13+
* Traversal and modification of .list is protected by either
14+
* - taking namespace_sem for write, OR
15+
* - taking namespace_sem for read AND taking .ns_lock.
16+
*/
1217
struct list_head list;
18+
spinlock_t ns_lock;
1319
struct user_namespace *user_ns;
1420
struct ucounts *ucounts;
1521
u64 seq; /* Sequence number to prevent loops */
@@ -133,9 +139,7 @@ struct proc_mounts {
133139
struct mnt_namespace *ns;
134140
struct path root;
135141
int (*show)(struct seq_file *, struct vfsmount *);
136-
void *cached_mount;
137-
u64 cached_event;
138-
loff_t cached_index;
142+
struct mount cursor;
139143
};
140144

141145
extern const struct seq_operations mounts_op;
@@ -153,3 +157,5 @@ static inline bool is_anon_ns(struct mnt_namespace *ns)
153157
{
154158
return ns->seq == 0;
155159
}
160+
161+
extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);

fs/namespace.c

Lines changed: 75 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,21 @@ struct vfsmount *lookup_mnt(const struct path *path)
648648
return m;
649649
}
650650

651+
static inline void lock_ns_list(struct mnt_namespace *ns)
652+
{
653+
spin_lock(&ns->ns_lock);
654+
}
655+
656+
static inline void unlock_ns_list(struct mnt_namespace *ns)
657+
{
658+
spin_unlock(&ns->ns_lock);
659+
}
660+
661+
static inline bool mnt_is_cursor(struct mount *mnt)
662+
{
663+
return mnt->mnt.mnt_flags & MNT_CURSOR;
664+
}
665+
651666
/*
652667
* __is_local_mountpoint - Test to see if dentry is a mountpoint in the
653668
* current mount namespace.
@@ -673,11 +688,15 @@ bool __is_local_mountpoint(struct dentry *dentry)
673688
goto out;
674689

675690
down_read(&namespace_sem);
691+
lock_ns_list(ns);
676692
list_for_each_entry(mnt, &ns->list, mnt_list) {
693+
if (mnt_is_cursor(mnt))
694+
continue;
677695
is_covered = (mnt->mnt_mountpoint == dentry);
678696
if (is_covered)
679697
break;
680698
}
699+
unlock_ns_list(ns);
681700
up_read(&namespace_sem);
682701
out:
683702
return is_covered;
@@ -1245,46 +1264,71 @@ struct vfsmount *mnt_clone_internal(const struct path *path)
12451264
}
12461265

12471266
#ifdef CONFIG_PROC_FS
1267+
static struct mount *mnt_list_next(struct mnt_namespace *ns,
1268+
struct list_head *p)
1269+
{
1270+
struct mount *mnt, *ret = NULL;
1271+
1272+
lock_ns_list(ns);
1273+
list_for_each_continue(p, &ns->list) {
1274+
mnt = list_entry(p, typeof(*mnt), mnt_list);
1275+
if (!mnt_is_cursor(mnt)) {
1276+
ret = mnt;
1277+
break;
1278+
}
1279+
}
1280+
unlock_ns_list(ns);
1281+
1282+
return ret;
1283+
}
1284+
12481285
/* iterator; we want it to have access to namespace_sem, thus here... */
12491286
static void *m_start(struct seq_file *m, loff_t *pos)
12501287
{
12511288
struct proc_mounts *p = m->private;
1289+
struct list_head *prev;
12521290

12531291
down_read(&namespace_sem);
1254-
if (p->cached_event == p->ns->event) {
1255-
void *v = p->cached_mount;
1256-
if (*pos == p->cached_index)
1257-
return v;
1258-
if (*pos == p->cached_index + 1) {
1259-
v = seq_list_next(v, &p->ns->list, &p->cached_index);
1260-
return p->cached_mount = v;
1261-
}
1292+
if (!*pos) {
1293+
prev = &p->ns->list;
1294+
} else {
1295+
prev = &p->cursor.mnt_list;
1296+
1297+
/* Read after we'd reached the end? */
1298+
if (list_empty(prev))
1299+
return NULL;
12621300
}
12631301

1264-
p->cached_event = p->ns->event;
1265-
p->cached_mount = seq_list_start(&p->ns->list, *pos);
1266-
p->cached_index = *pos;
1267-
return p->cached_mount;
1302+
return mnt_list_next(p->ns, prev);
12681303
}
12691304

12701305
static void *m_next(struct seq_file *m, void *v, loff_t *pos)
12711306
{
12721307
struct proc_mounts *p = m->private;
1308+
struct mount *mnt = v;
12731309

1274-
p->cached_mount = seq_list_next(v, &p->ns->list, pos);
1275-
p->cached_index = *pos;
1276-
return p->cached_mount;
1310+
++*pos;
1311+
return mnt_list_next(p->ns, &mnt->mnt_list);
12771312
}
12781313

12791314
static void m_stop(struct seq_file *m, void *v)
12801315
{
1316+
struct proc_mounts *p = m->private;
1317+
struct mount *mnt = v;
1318+
1319+
lock_ns_list(p->ns);
1320+
if (mnt)
1321+
list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list);
1322+
else
1323+
list_del_init(&p->cursor.mnt_list);
1324+
unlock_ns_list(p->ns);
12811325
up_read(&namespace_sem);
12821326
}
12831327

12841328
static int m_show(struct seq_file *m, void *v)
12851329
{
12861330
struct proc_mounts *p = m->private;
1287-
struct mount *r = list_entry(v, struct mount, mnt_list);
1331+
struct mount *r = v;
12881332
return p->show(m, &r->mnt);
12891333
}
12901334

@@ -1294,6 +1338,15 @@ const struct seq_operations mounts_op = {
12941338
.stop = m_stop,
12951339
.show = m_show,
12961340
};
1341+
1342+
void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor)
1343+
{
1344+
down_read(&namespace_sem);
1345+
lock_ns_list(ns);
1346+
list_del(&cursor->mnt_list);
1347+
unlock_ns_list(ns);
1348+
up_read(&namespace_sem);
1349+
}
12971350
#endif /* CONFIG_PROC_FS */
12981351

12991352
/**
@@ -3202,6 +3255,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
32023255
atomic_set(&new_ns->count, 1);
32033256
INIT_LIST_HEAD(&new_ns->list);
32043257
init_waitqueue_head(&new_ns->poll);
3258+
spin_lock_init(&new_ns->ns_lock);
32053259
new_ns->user_ns = get_user_ns(user_ns);
32063260
new_ns->ucounts = ucounts;
32073261
return new_ns;
@@ -3842,10 +3896,14 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
38423896
bool visible = false;
38433897

38443898
down_read(&namespace_sem);
3899+
lock_ns_list(ns);
38453900
list_for_each_entry(mnt, &ns->list, mnt_list) {
38463901
struct mount *child;
38473902
int mnt_flags;
38483903

3904+
if (mnt_is_cursor(mnt))
3905+
continue;
3906+
38493907
if (mnt->mnt.mnt_sb->s_type != sb->s_type)
38503908
continue;
38513909

@@ -3893,6 +3951,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
38933951
next: ;
38943952
}
38953953
found:
3954+
unlock_ns_list(ns);
38963955
up_read(&namespace_sem);
38973956
return visible;
38983957
}

fs/proc_namespace.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,8 @@ static int mounts_open_common(struct inode *inode, struct file *file,
279279
p->ns = ns;
280280
p->root = root;
281281
p->show = show;
282-
p->cached_event = ~0ULL;
282+
INIT_LIST_HEAD(&p->cursor.mnt_list);
283+
p->cursor.mnt.mnt_flags = MNT_CURSOR;
283284

284285
return 0;
285286

@@ -296,6 +297,7 @@ static int mounts_release(struct inode *inode, struct file *file)
296297
struct seq_file *m = file->private_data;
297298
struct proc_mounts *p = m->private;
298299
path_put(&p->root);
300+
mnt_cursor_del(p->ns, &p->cursor);
299301
put_mnt_ns(p->ns);
300302
return seq_release_private(inode, file);
301303
}

include/linux/mount.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ struct fs_context;
5050
#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME )
5151

5252
#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \
53-
MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED)
53+
MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED | \
54+
MNT_CURSOR)
5455

5556
#define MNT_INTERNAL 0x4000
5657

@@ -64,6 +65,7 @@ struct fs_context;
6465
#define MNT_SYNC_UMOUNT 0x2000000
6566
#define MNT_MARKED 0x4000000
6667
#define MNT_UMOUNT 0x8000000
68+
#define MNT_CURSOR 0x10000000
6769

6870
struct vfsmount {
6971
struct dentry *mnt_root; /* root of the mounted tree */

0 commit comments

Comments
 (0)