From 4082f985456507cb05e4ea79b8fe44b34259d3d6 Mon Sep 17 00:00:00 2001 From: hoewweken <44616757+hoewweken@users.noreply.github.com> Date: Wed, 12 Jul 2023 10:26:30 +0200 Subject: [PATCH] rebuild libebml with gcc-10 (#12787 --- components/library/libebml/Makefile | 1 components/openindiana/illumos-gate/patches/0004-OS-3294-add-support-for-inotify.patch | 3046 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 3,047 insertions(+), 0 deletions(-) diff --git a/components/library/libebml/Makefile b/components/library/libebml/Makefile index 620f37f..a1fe248 100644 --- a/components/library/libebml/Makefile +++ b/components/library/libebml/Makefile @@ -20,6 +20,7 @@ COMPONENT_NAME= libebml COMPONENT_VERSION= 1.4.4 +COMPONENT_REVISION= 1 COMPONENT_SUMMARY= Extensible Binary Markup Language COMPONENT_PROJECT_URL= https://matroska-org.github.io/libebml/ COMPONENT_SRC= $(COMPONENT_NAME)-$(COMPONENT_VERSION) diff --git a/components/openindiana/illumos-gate/patches/0004-OS-3294-add-support-for-inotify.patch b/components/openindiana/illumos-gate/patches/0004-OS-3294-add-support-for-inotify.patch new file mode 100644 index 0000000..29d9330 --- /dev/null +++ b/components/openindiana/illumos-gate/patches/0004-OS-3294-add-support-for-inotify.patch @@ -0,0 +1,3046 @@ +From a9a246c0c49e192616e7499eaa2362b21fde8f5e Mon Sep 17 00:00:00 2001 +From: Bryan Cantrill <bryan@joyent.com> +Date: Thu, 18 Sep 2014 06:22:00 +0000 +Subject: [PATCH] OS-3294 add support for inotify Reviewed by: Jerry Jelinek + <jerry.jelinek@joyent.com> Reviewed by: Robert Mustacchi <rm@joyent.com> + +--- + usr/src/cmd/devfsadm/misc_link.c | 5 +- + usr/src/lib/libc/amd64/Makefile | 1 + + usr/src/lib/libc/i386/Makefile.com | 1 + + usr/src/lib/libc/port/mapfile-vers | 4 + + usr/src/lib/libc/port/sys/inotify.c | 142 +++ + usr/src/lib/libc/sparc/Makefile.com | 1 + + usr/src/man/man3c/Makefile | 3 + + usr/src/man/man3c/inotify_add_watch.3c | 120 ++ + usr/src/man/man3c/inotify_init.3c | 107 ++ + usr/src/man/man3c/inotify_rm_watch.3c | 81 ++ + usr/src/man/man5/Makefile | 1 + + usr/src/man/man5/inotify.5 | 305 +++++ + usr/src/uts/common/Makefile.files | 2 + + usr/src/uts/common/fs/nfs/nfs3_vnops.c | 5 +- + usr/src/uts/common/fs/nfs/nfs4_vnops.c | 30 +- + usr/src/uts/common/fs/nfs/nfs_vnops.c | 6 +- + usr/src/uts/common/fs/pcfs/pc_dir.c | 12 +- + usr/src/uts/common/fs/tmpfs/tmp_vnops.c | 6 +- + usr/src/uts/common/fs/udfs/udf_dir.c | 6 +- + usr/src/uts/common/fs/ufs/ufs_vnops.c | 7 +- + usr/src/uts/common/fs/vnode.c | 8 +- + usr/src/uts/common/fs/zfs/zfs_vnops.c | 6 +- + usr/src/uts/common/io/inotify.c | 1480 +++++++++++++++++++++++ + usr/src/uts/common/io/inotify.conf | 16 + + usr/src/uts/common/sys/Makefile | 1 + + usr/src/uts/common/sys/inotify.h | 153 +++ + usr/src/uts/common/sys/vnode.h | 8 +- + usr/src/uts/intel/Makefile.intel | 1 + + usr/src/uts/intel/inotify/Makefile | 70 ++ + usr/src/uts/sparc/Makefile.sparc | 2 + + usr/src/uts/sparc/inotify/Makefile | 70 ++ + 32 files changed, 2617 insertions(+), 51 deletions(-) + create mode 100644 usr/src/lib/libc/port/sys/inotify.c + create mode 100644 usr/src/man/man3c/inotify_add_watch.3c + create mode 100644 usr/src/man/man3c/inotify_init.3c + create mode 100644 usr/src/man/man3c/inotify_rm_watch.3c + create mode 100644 usr/src/man/man5/inotify.5 + create mode 100644 usr/src/uts/common/io/inotify.c + create mode 100644 usr/src/uts/common/io/inotify.conf + create mode 100644 usr/src/uts/common/sys/inotify.h + create mode 100644 usr/src/uts/intel/inotify/Makefile + create mode 100644 usr/src/uts/sparc/inotify/Makefile + +diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c +index b7aef8b00d..70599d6039 100644 +--- a/usr/src/cmd/devfsadm/misc_link.c ++++ a/usr/src/cmd/devfsadm/misc_link.c +@@ -108,6 +108,9 @@ static devfsadm_create_t misc_cbt[] = { + "(^nca$)|(^rds$)|(^sdp$)|(^ipnet$)|(^dlpistub$)|(^bpf$)", + TYPE_EXACT | DRV_RE, ILEVEL_1, minor_name + }, ++ { "pseudo", "ddi_pseudo", "inotify", ++ TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name ++ }, + { "pseudo", "ddi_pseudo", "ipd", + TYPE_EXACT | DRV_EXACT, ILEVEL_0, minor_name + }, +diff --git a/usr/src/lib/libc/amd64/Makefile b/usr/src/lib/libc/amd64/Makefile +index 4afe40c01b..a7cfc36ddb 100644 +--- a/usr/src/lib/libc/amd64/Makefile ++++ b/usr/src/lib/libc/amd64/Makefile +@@ -860,6 +860,7 @@ PORTSYS= \ + fcntl.o \ + getpagesizes.o \ + getpeerucred.o \ ++ inotify.o \ + inst_sync.o \ + issetugid.o \ + label.o \ +diff --git a/usr/src/lib/libc/i386/Makefile.com b/usr/src/lib/libc/i386/Makefile.com +index f229dec61d..e8b72336f3 100644 +--- a/usr/src/lib/libc/i386/Makefile.com ++++ b/usr/src/lib/libc/i386/Makefile.com +@@ -898,6 +898,7 @@ PORTSYS= \ + fcntl.o \ + getpagesizes.o \ + getpeerucred.o \ ++ inotify.o \ + inst_sync.o \ + issetugid.o \ + label.o \ +diff --git a/usr/src/lib/libc/port/mapfile-vers b/usr/src/lib/libc/port/mapfile-vers +index ecbb16fe85..70d37d04be 100644 +--- a/usr/src/lib/libc/port/mapfile-vers ++++ b/usr/src/lib/libc/port/mapfile-vers +@@ -2858,6 +2858,10 @@ $endif + __idmap_unreg; + __init_daemon_priv; + __init_suid_priv; ++ inotify_init; ++ inotify_init1; ++ inotify_add_watch; ++ inotify_rm_watch; + _insert; + inst_sync; + _iswctype; +diff --git a/usr/src/lib/libc/port/sys/inotify.c b/usr/src/lib/libc/port/sys/inotify.c +new file mode 100644 +index 0000000000..6bfe988735 +--- /dev/null ++++ b/usr/src/lib/libc/port/sys/inotify.c +@@ -0,0 +1,142 @@ ++/* ++ * This file and its contents are supplied under the terms of the ++ * Common Development and Distribution License ("CDDL"), version 1.0. ++ * You may only use this file in accordance with the terms of version ++ * 1.0 of the CDDL. ++ * ++ * A full copy of the text of the CDDL should have accompanied this ++ * source. A copy of the CDDL is also available via the Internet at ++ * http://www.illumos.org/license/CDDL. ++ */ ++ ++/* ++ * Copyright (c) 2014, Joyent, Inc. All rights reserved. ++ */ ++ ++#include <sys/inotify.h> ++#include <sys/stat.h> ++#include <unistd.h> ++#include <errno.h> ++#include <fcntl.h> ++#include <strings.h> ++#include <dirent.h> ++ ++int ++inotify_init() ++{ ++ return (open("/dev/inotify", O_RDWR)); ++} ++ ++int ++inotify_init1(int flags) ++{ ++ int oflags = O_RDWR; ++ ++ if (flags & IN_NONBLOCK) ++ oflags |= O_NONBLOCK; ++ ++ if (flags & IN_CLOEXEC) ++ oflags |= O_CLOEXEC; ++ ++ return (open("/dev/inotify", oflags)); ++} ++ ++int ++inotify_add_watch(int fd, const char *pathname, uint32_t mask) ++{ ++ inotify_addwatch_t ioc; ++ inotify_addchild_t cioc; ++ struct stat buf; ++ int dirfd, wd; ++ DIR *dir; ++ struct dirent *dp; ++ int oflags = O_RDONLY; ++ ++ if (mask & IN_DONT_FOLLOW) ++ oflags |= O_NOFOLLOW; ++ ++ if ((dirfd = open(pathname, oflags)) < 0) ++ return (-1); ++ ++ if (fstat(dirfd, &buf) != 0) { ++ (void) close(dirfd); ++ return (-1); ++ } ++ ++ if ((mask & IN_ONLYDIR) && !(buf.st_mode & S_IFDIR)) { ++ (void) close(dirfd); ++ errno = ENOTDIR; ++ return (-1); ++ } ++ ++ bzero(&ioc, sizeof (ioc)); ++ ioc.inaw_fd = dirfd; ++ ioc.inaw_mask = mask; ++ ++ if ((wd = ioctl(fd, INOTIFYIOC_ADD_WATCH, &ioc)) < 0) { ++ (void) close(dirfd); ++ return (-1); ++ } ++ ++ if (!(buf.st_mode & S_IFDIR) || !(mask & IN_CHILD_EVENTS)) { ++ (void) ioctl(fd, INOTIFYIOC_ACTIVATE, wd); ++ (void) close(dirfd); ++ return (wd); ++ } ++ ++ /* ++ * If we have a directory and we have a mask that denotes child events, ++ * we need to manually add a child watch to every directory entry. ++ * (Because our watch is in place, it will automatically be added to ++ * files that are newly created after this point.) ++ */ ++ if ((dir = fdopendir(dirfd)) == NULL) { ++ (void) inotify_rm_watch(fd, wd); ++ (void) close(dirfd); ++ return (-1); ++ } ++ ++ bzero(&cioc, sizeof (cioc)); ++ cioc.inac_fd = dirfd; ++ ++ while ((dp = readdir(dir)) != NULL) { ++ if (strcmp(dp->d_name, ".") == 0) ++ continue; ++ ++ if (strcmp(dp->d_name, "..") == 0) ++ continue; ++ ++ cioc.inac_name = dp->d_name; ++ ++ if (ioctl(fd, INOTIFYIOC_ADD_CHILD, &cioc) != 0) { ++ /* ++ * If we get an error that indicates clear internal ++ * malfunctioning, we propagate the error. Otherwise ++ * we eat it: this could be a file that no longer ++ * exists or a symlink or something else that we ++ * can't lookup. ++ */ ++ switch (errno) { ++ case ENXIO: ++ case EFAULT: ++ case EBADF: ++ (void) closedir(dir); ++ inotify_rm_watch(fd, wd); ++ return (-1); ++ default: ++ break; ++ } ++ } ++ } ++ ++ (void) closedir(dir); ++ (void) ioctl(fd, INOTIFYIOC_ACTIVATE, wd); ++ ++ return (wd); ++} ++ ++int ++inotify_rm_watch(int fd, int wd) ++{ ++ return (ioctl(fd, INOTIFYIOC_RM_WATCH, wd)); ++} +diff --git a/usr/src/lib/libc/sparc/Makefile.com b/usr/src/lib/libc/sparc/Makefile.com +index b6f50d3263..4c5bbcfc96 100644 +--- a/usr/src/lib/libc/sparc/Makefile.com ++++ b/usr/src/lib/libc/sparc/Makefile.com +@@ -933,6 +933,7 @@ PORTSYS= \ + fcntl.o \ + getpagesizes.o \ + getpeerucred.o \ ++ inotify.o \ + inst_sync.o \ + issetugid.o \ + label.o \ +diff --git a/usr/src/man/man3c/Makefile b/usr/src/man/man3c/Makefile +index 8c9186bc0a..a2e6e99cbc 100644 +--- a/usr/src/man/man3c/Makefile ++++ b/usr/src/man/man3c/Makefile +@@ -221,6 +221,9 @@ MANFILES= __fbufsize.3c \ + index.3c \ + inet.3c \ + initgroups.3c \ ++ inotify_init.3c \ ++ inotify_add_watch.3c \ ++ inotify_rm_watch.3c \ + insque.3c \ + is_system_labeled.3c \ + isaexec.3c \ +diff --git a/usr/src/man/man3c/inotify_add_watch.3c b/usr/src/man/man3c/inotify_add_watch.3c +new file mode 100644 +index 0000000000..4f79e03c82 +--- /dev/null ++++ b/usr/src/man/man3c/inotify_add_watch.3c +@@ -0,0 +1,120 @@ ++'\" te ++.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved. ++.\" This file and its contents are supplied under the terms of the ++.\" Common Development and Distribution License ("CDDL"), version 1.0. ++.\" You may only use this file in accordance with the terms of version ++.\" 1.0 of the CDDL. ++.\" ++.\" A full copy of the text of the CDDL should have accompanied this ++.\" source. A copy of the CDDL is also available via the Internet at ++.\" http://www.illumos.org/license/CDDL. ++.TH INOTIFY_ADD_WATCH 3C "Sep 17, 2014" ++.SH NAME ++inotify_add_watch \- add a watch to an inotify instance ++.SH SYNOPSIS ++ ++.LP ++.nf ++#include <sys/inotify.h> ++ ++\fBint\fR \fBinotify_add_watch\fR(\fBint\fR \fIfd\fR, \fBconst char *\fR\fIpathname\fR, \fBuint32_t\fR \fImask\fR); ++.fi ++ ++.SH DESCRIPTION ++.sp ++.LP ++The \fBinotify_add_watch()\fR function adds a watch for the file or ++directory specified by \fIpathname\fR to the inotify instance ++specified by \fIfd\fR for the events specified by \fImask\fR. See ++\fBinotify\fR(5) for details on the meaning of \fImask\fR, how ++it affects the interpretation of \fIpathname\fR, and how ++events for the watched file or directory are subsequently ++retrieved via \fBread\fR(2). ++ ++.SH RETURN VALUES ++.sp ++.LP ++Upon succesful completion, \fBinotify_add_watch()\fR returns the ++watch descriptor associated with the new watch. ++If an error occurs, -1 is returned and errno is set to indicate ++the error. ++ ++.SH ERRORS ++.sp ++.LP ++\fBinotify_add_watch()\fR will fail if: ++.sp ++.ne 2 ++.na ++\fB\fBEACCES\fR\fR ++.ad ++.RS 10n ++\fIpathname\fR could not be opened for reading. ++.RE ++ ++.sp ++.ne 2 ++.na ++\fB\fBEBADF\fR\fR ++.ad ++.RS 10n ++The \fIfd\fR argument is not a valid open file descriptor. ++.RE ++ ++.sp ++.ne 2 ++.na ++\fB\fBEFAULT\fR\fR ++.ad ++.RS 10n ++The memory associated with \fIpathname\fR was not mapped. ++.RE ++ ++.sp ++.ne 2 ++.na ++\fB\fBEINVAL\fR\fR ++.ad ++.RS 10n ++The \fIfd\fR argument does not correspond to an ++\fBinotify\fR(5) instance as initialized with ++\fBinotify_init\fR(3C) or \fBinotify_init1\fR(3C). ++.RE ++ ++.sp ++.ne 2 ++.na ++\fB\fBENOSPC\fR\fR ++.ad ++.RS 10n ++The number of watches on the specified instance would exceed the ++maximum number of watches per \fBinotify\fR(5) instance. ++.RE ++ ++.sp ++.ne 2 ++.na ++\fB\fBENOTDIR\fR\fR ++.ad ++.RS 10n ++\fIpathname\fR does not correspond to a directory and ++\fBIN_ONLYDIR\fR was specified in \fImask\fR. ++.RE ++ ++.sp ++.SH NOTES ++.sp ++.LP ++ ++While the \fBinotify\fR(5) facility is implemented for purposes of ++offering compatibility for Linux-borne applications, native ++applications may opt to use it instead of (or in addition to) the ++\fBPORT_SOURCE_FILE\fR capability of event ports. See ++\fBinotify\fR(5) for details and restrictions. ++ ++.SH SEE ALSO ++.sp ++.LP ++\fBinotify_init\fR(3C), \fBinotify_init1\fR(3C), ++\fBport_create\fR(3C), \fBport_associate\fR(3C), \fBport_get\fR(3C), ++\fBinotify\fR(5) +diff --git a/usr/src/man/man3c/inotify_init.3c b/usr/src/man/man3c/inotify_init.3c +new file mode 100644 +index 0000000000..a091df2c26 +--- /dev/null ++++ b/usr/src/man/man3c/inotify_init.3c +@@ -0,0 +1,107 @@ ++'\" te ++.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved. ++.\" This file and its contents are supplied under the terms of the ++.\" Common Development and Distribution License ("CDDL"), version 1.0. ++.\" You may only use this file in accordance with the terms of version ++.\" 1.0 of the CDDL. ++.\" ++.\" A full copy of the text of the CDDL should have accompanied this ++.\" source. A copy of the CDDL is also available via the Internet at ++.\" http://www.illumos.org/license/CDDL. ++.TH INOTIFY_INIT 3C "Sep 17, 2014" ++.SH NAME ++inotify_init, inotify_init1 \- initialize an inotify instance ++.SH SYNOPSIS ++ ++.LP ++.nf ++#include <sys/inotify.h> ++ ++\fBint\fR \fBinotify_init\fR(\fBvoid\fR); ++.fi ++ ++.LP ++.nf ++\fBint\fR \fBinotify_init1\fR(\fBint\fR \fIflags\fR); ++.fi ++ ++.SH DESCRIPTION ++.sp ++.LP ++The \fBinotify_init()\fR and \fBinotify_init1()\fR functions both create an ++\fBinotify\fR(5) instance that can be operated upon via ++\fBinotify_add_watch\fR(3C), \fBinotify_rm_watch\fR(3C) and \fBread\fR(2). ++\fBinotify\fR instances are ++represented as file descriptors, and should be closed via \fBclose\fR(2). ++ ++The only difference between the two functions is their signature; ++\fBinotify_init()\fR takes no arguments, ++while \fBinotify_init1()\fR takes a \fIflags\fR argument that can have ++any of the following values: ++ ++.sp ++.ne 2 ++.na ++\fBIN_CLOEXEC\fR ++.ad ++.RS 12n ++Instance should be closed upon an ++\fBexec\fR(2); see \fBopen\fR(2)'s description of \fBO_CLOEXEC\fR. ++.RE ++ ++.sp ++.ne 2 ++.na ++\fBIN_NONBLOCK\fR ++.ad ++.RS 12n ++Instance be set to be non-blocking. A \fBread\fR(2) on an ++\fBinotify\fR instance that has been initialized with ++\fBIN_NONBLOCK\fR will return \fBEAGAIN\fR if there are ++no events enqueued in lieu of blocking. ++.RE ++ ++.SH RETURN VALUES ++.sp ++.LP ++Upon succesful completion, 0 is returned. Otherwise, -1 is returned and errno ++is set to indicate the error. ++.SH ERRORS ++.sp ++.LP ++The \fBinotify_init()\fR and \fBinotify_init1()\fR functions will fail if: ++.sp ++.ne 2 ++.na ++\fB\fBEINVAL\fR\fR ++.ad ++.RS 10n ++The \fIflags\fR are invalid (\fBinotify_init1()\fR). ++.RE ++ ++.sp ++.ne 2 ++.na ++\fB\fBEMFILE\fR\fR ++.ad ++.RS 10n ++There are currently {\fBOPEN_MAX\fR} file descriptors open in the calling ++process, or the maximum number of \fBinotify\fR instances for the user ++would be exceeded. ++.RE ++ ++.sp ++.SH NOTES ++.sp ++.LP ++ ++While the \fBinotify\fR(5) facility is implemented for purposes of ++offering compatibility for Linux-borne applications, native ++applications may opt to use it instead of (or in addition to) the ++\fBPORT_SOURCE_FILE\fR capability of event ports. See ++\fBinotify\fR(5) for details and restrictions. ++ ++.SH SEE ALSO ++.sp ++.LP ++\fBinotiy_add_watch\fR(3C), \fBinotify_rm_watch\fR(3C), \fBinotify\fR(5) +diff --git a/usr/src/man/man3c/inotify_rm_watch.3c b/usr/src/man/man3c/inotify_rm_watch.3c +new file mode 100644 +index 0000000000..de568f8e24 +--- /dev/null ++++ b/usr/src/man/man3c/inotify_rm_watch.3c +@@ -0,0 +1,81 @@ ++'\" te ++.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved. ++.\" This file and its contents are supplied under the terms of the ++.\" Common Development and Distribution License ("CDDL"), version 1.0. ++.\" You may only use this file in accordance with the terms of version ++.\" 1.0 of the CDDL. ++.\" ++.\" A full copy of the text of the CDDL should have accompanied this ++.\" source. A copy of the CDDL is also available via the Internet at ++.\" http://www.illumos.org/license/CDDL. ++.TH INOTIFY_RM_WATCH 3C "Sep 17, 2014" ++.SH NAME ++inotify_rm_watch \- remove a watch from an inotify instance ++.SH SYNOPSIS ++ ++.LP ++.nf ++#include <sys/inotify.h> ++ ++\fBint\fR \fBinotify_rm_watch\fR(\fBint\fR \fIfd\fR, \fBint\fR \fIwd\fR); ++.fi ++ ++.SH DESCRIPTION ++.sp ++.LP ++The \fBinotify_rm_watch()\fR function removes the watch specified ++by \fIwd\fR from the inotify instance associated with \fIfd\fR. ++Removing a watch will induce an \fBIN_IGNORED\fR event; see ++\fBinotify\fR(5) for details. ++ ++.SH RETURN VALUES ++.sp ++.LP ++Upon succesful completion, \fBinotify_add_watch()\fR returns the ++watch descriptor associated with the new watch. ++If an error occurs, -1 is returned and errno is set to indicate ++the error. ++ ++.SH ERRORS ++.sp ++.LP ++\fBinotify_rm_watch()\fR will fail if: ++.sp ++.ne 2 ++.na ++\fB\fBEBADF\fR\fR ++.ad ++.RS 10n ++The \fIfd\fR argument is not a valid open file descriptor. ++.RE ++ ++.sp ++.ne 2 ++.na ++\fB\fBEINVAL\fR\fR ++.ad ++.RS 10n ++The \fIfd\fR argument does not correspond to an ++\fBinotify\fR(5) instance as initialized with ++\fBinotify_init\fR(3C) or \fBinotify_init1\fR(3C), or ++\fIwd\fR is not a valid watch for the specified inotify ++instance. ++.RE ++ ++.sp ++.SH NOTES ++.sp ++.LP ++ ++While the \fBinotify\fR(5) facility is implemented for purposes of ++offering compatibility for Linux-borne applications, native ++applications may opt to use it instead of (or in addition to) the ++\fBPORT_SOURCE_FILE\fR capability of event ports. See ++\fBinotify\fR(5) for details and restrictions. ++ ++.SH SEE ALSO ++.sp ++.LP ++\fBinotify_init\fR(3C), \fBinotify_init1\fR(3C), ++\fBport_create\fR(3C), \fBport_associate\fR(3C), \fBport_get\fR(3C), ++\fBinotify\fR(5) +diff --git a/usr/src/man/man5/Makefile b/usr/src/man/man5/Makefile +index 624e0aeb48..a8015a3dd3 100644 +--- a/usr/src/man/man5/Makefile ++++ b/usr/src/man/man5/Makefile +@@ -89,6 +89,7 @@ MANFILES= Intro.5 \ + ib.5 \ + ike.config.5 \ + ike.preshared.5 \ ++ inotify.5 \ + ipf.5 \ + ipmon.5 \ + ipnat.5 \ +diff --git a/usr/src/man/man5/inotify.5 b/usr/src/man/man5/inotify.5 +new file mode 100644 +index 0000000000..810e889d74 +--- /dev/null ++++ b/usr/src/man/man5/inotify.5 +@@ -0,0 +1,305 @@ ++'\" te ++.\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved. ++.\" This file and its contents are supplied under the terms of the ++.\" Common Development and Distribution License ("CDDL"), version 1.0. ++.\" You may only use this file in accordance with the terms of version ++.\" 1.0 of the CDDL. ++.\" ++.\" A full copy of the text of the CDDL should have accompanied this ++.\" source. A copy of the CDDL is also available via the Internet at ++.\" http://www.illumos.org/license/CDDL. ++.TH INOTIFY 5 "Sep 17, 2014" ++.SH NAME ++inotify \- Linux-compatible file event notification facility ++.SH SYNOPSIS ++ ++.LP ++.nf ++#include <sys/inotify.h> ++.fi ++ ++.SH DESCRIPTION ++.sp ++.LP ++ ++\fBinotify\fR is a facility for receiving file system events on specified ++files or directories. When monitoring a directory, \fBinotify\fR can be ++used to retrieve events not only on the directory, but also on any files ++that the directory contains. \fBinotify\fR originated with Linux, and ++this facility is designed to be binary-compatible with the Linux facility, ++including the following interfaces: ++ ++.RS +4 ++.TP ++.ie t \(bu ++.el o ++\fBinotify_init\fR(3C) creates an \fBinotify\fR instance, returning a file ++descriptor associated with the in-kernel event queue. ++.RE ++.RS +4 ++.TP ++.ie t \(bu ++.el o ++\fBinotify_init1\fR(3C) also creates an \fBinotify\fR instance, but allows ++for a flags argument that controls some attributes of the returned file ++descriptor. ++.RE ++.RS +4 ++.TP ++.ie t \(bu ++.el o ++\fBinotify_add_watch\fR(3C) allows a watch of a particular file or directory ++to be added to a watch list associated with the specified \fBinotify\fR ++instance. \fBinotify_add_watch\fR(3C) returns a watch descriptor that will ++be reflected in the \fIwd\fR member of the \fIinotify_event\fR structure ++returned via a \fBread\fR(2) of the instance. ++.RE ++.RS +4 ++.TP ++.ie t \(bu ++.el o ++\fBinotify_rm_watch\fR(3C) removes the watch that corresponds to the specified ++watch descriptor. ++.RE ++ ++When all file descriptors referring to a particular \fBinotify\fR instance ++are closed, the instance and all watches associated with that instance are ++freed. ++ ++To consume events on an \fBinotify\fR instance, an application should ++issue a \fBread\fR(2) to the instance. If no events are available ++(and the \fBinotify\fR instance has not been explicitly made non-blocking ++via \fBinotify_init1\fR(3C)) the \fBread\fR(2) will block until a ++watched event occurs. If and when events are available, \fBread\fR(2) will ++return an array of the following structures: ++ ++.sp ++.in +2 ++.nf ++struct inotify_event { ++ int wd; /* watch descriptor */ ++ uint32_t mask; /* mask of event */ ++ uint32_t cookie; /* cookie for associating renames */ ++ uint32_t len; /* size of name field */ ++ char name[]; /* optional name */ ++}; ++.fi ++.in -2 ++ ++\fIwd\fR contains the watch descriptor that corresponds to the event, ++as returned by \fBinotify_add_watch\fR(3C). ++ ++\fImask\fR is a bitwise \fBOR\fR of event masks (see below) that ++describes the event. ++ ++\fIcookie\fR is an opaque value that can be used to associate different ++events into a single logical event. In particular, it allows consumers to ++associate \fBIN_MOVED_FROM\fR events with subsequent \fBIN_MOVED_TO\fR ++events. ++ ++\fIlen\fR denotes the length of the \fIname\fR field, including any padding ++required for trailing null bytes and alignment. The size of the entire ++event is therefore the size of the \fIinotify_event\fR structure plus the ++value of \fIlen\fR. ++ ++\fIname\fR contains the name of the file associated with the event, if any. ++This field is only present when the watched entity is a directory and ++the event corresponds to a file that was contained by the watched directory ++(though see \fBNOTES\fR and \fBWARNINGS\fR for details and limitations). ++When present, \fIname\fR is null terminated, and may contain additional ++zero bytes ++to pad for alignment. (The length of this field -- including any bytes ++for alignment -- is denoted by the \fIlen\fR field.) ++ ++.SS "Events" ++ ++The events that can be generated on a watched entity are as follows: ++ ++.sp ++.in +2 ++.TS ++c c ++l l . ++\fIEvent\fR \fIDescription\fR ++\fBIN_ACCESS\fR File/directory was accessed ++\fBIN_ATTRIB\fR File/directory attributes were changed ++\fBIN_CLOSE_WRITE\fR File/directory opened for writing was closed ++\fBIN_CLOSE_NOWRITE\fR File/directory not opened for writing was closed ++\fBIN_CREATE\fR File/directory created in watched directory ++\fBIN_DELETE\fR File/directory deleted from watched directory ++\fBIN_DELETE_SELF\fR Watched file/directory was deleted ++\fBIN_MODIFY\fR File/directory was modified ++\fBIN_MODIFY_SELF\fR Watched file/directory was modified ++\fBIN_MOVED_FROM\fR File was renamed from entity in watched directory ++\fBIN_MOVED_TO\fR File was renamed to entity in watched directory ++\fBIN_OPEN\fR File/directory was opened ++.TE ++.in -2 ++ ++Of these, all events except \fBIN_MOVE_SELF\fR and \fBIN_DELETE_SELF\fR ++can refer to either the watched entity or (if the watched entity ++is a directory) a file or directory contained by the watched directory. ++(See \fBNOTES\fR and \fBWARNINGS\fR, below for details on this ++mechanism and its limitations.) ++If the event corresponds to a contained entity, ++\fIname\fR will be set to the name of the affected ++entity. ++ ++In addition to speciyfing events of interest, watched events may ++be modified by potentially setting any of the following when adding a ++watch via \fBinotify_add_watch\fR(3C): ++ ++.sp ++.ne 2 ++.na ++\fBIN_DONT_FOLLOW\fR ++.ad ++.RS 12n ++Don't follow the specified pathname if it is a symbolic link. ++.RE ++ ++.sp ++.ne 2 ++.na ++\fBIN_EXCL_UNLINK\fR ++.ad ++.RS 12n ++If watching a directory and a contained entity becomes unlinked, cease ++generating events for that entity. (By default, contained entities will ++continue to generate events on their former parent directory.) ++.RE ++ ++.sp ++.ne 2 ++.na ++\fBIN_MASK_ADD\fR ++.ad ++.RS 12n ++If the specified pathname is already being watched, the specified events ++will be added to the watched events instead of the default behavior of ++replacing them. (If one ++may forgive the editorializing, this particular interface gewgaw ++seems entirely superfluous, and a canonical example of ++feasibility trumping wisdom.) ++.RE ++ ++.sp ++.ne 2 ++.na ++\fBIN_ONESHOT\fR ++.ad ++.RS 12n ++Once an event has been generated for the watched entity, remove the ++watch from the watch list as if \fBinotify_rm_watch\fR(3C) had been called ++on it (thereby inducing an \fBIN_IGNORED\fR event). ++.RE ++ ++.sp ++.ne 2 ++.na ++\fBIN_ONLYDIR\fR ++.ad ++.RS 12n ++Only watch the specified pathname if it is a directory. ++.RE ++ ++In addition to the specified events, the following bits may be specified ++in the \fImask\fR field as returned from \fBread\fR(2): ++ ++.sp ++.ne 2 ++.na ++\fBIN_IGNORED\fR ++.ad ++.RS 12n ++A watch was removed explicitly (i.e, via \fBinotify_rm_watch\fR(3C)) or ++implicitly (e.g., because \fBIN_ONESHOT\fR was set or because the watched ++entity was deleted). ++.RE ++ ++.sp ++.ne 2 ++.na ++\fBIN_ISDIR\fR ++.ad ++.RS 12n ++The entity inducing the event is a directory. ++.RE ++ ++.sp ++.ne 2 ++.na ++\fBIN_Q_OVERFLOW\fR ++.ad ++.RS 12n ++The event queue exceeded the maximum event queue length per instance. ++(By default, this is 16384, but it can be tuned by setting ++\fBinotify_maxevents\fR via \fB/etc/system\fR.) ++.RE ++ ++.sp ++.ne 2 ++.na ++\fBIN_UNMOUNT\fR ++.ad ++.RS 12n ++The filesystem containing the watched entity was unmounted. ++.RE ++ ++.sp ++.SH NOTES ++.sp ++.LP ++ ++\fBinotify\fR instances can be monitored via \fBpoll\fR(2), ++\fBport_get\fR(3C), \fBepoll\fR(5), etc. ++ ++The event queue associated with an \fBinotify\fR instance is serialized ++and ordered: events will be placed on the tail of the queue in the order ++that they occur. ++ ++If at the time an event occurs the tail of the event queue is identical ++to the newly received event, the newly received event will be dropped, ++effectively coalescing the two events. ++ ++When watching a directory and receieving events on contained elements ++(i.e., a contained file or subdirectory), note that the information ++received in the \fIname\fR field may be stale: the file may have been ++renamed between the event and its processing. If a file has been unlinked ++(and if \fBIN_EXCL_UNLINK\fR has not been set), ++the \fIname\fR will reflect the last name that resolved to the file. ++If a new file is created in the same directory with the old name, events ++on the new file and the old (unlinked) file will become undistinguishable. ++ ++The number of bytes that are available to be read on an \fBinotify\fR ++instance can be determined via a \fBFIONREAD\fR \fBioctl\fR(2). ++ ++.sp ++.SH WARNINGS ++.sp ++.LP ++ ++While a best effort has been made to mimic the Linux semantics, there ++remains a fundamental difference with respect to hard links: on Linux, ++if a file has multiple hard links to it, a notification on watched ++directory or file will be received if and only if that event was received ++via a watched directory or file. For events that are induced by open files ++(such as \fBIN_MODIFY\fR), these semantics seem peculiar: the watched ++file is in fact changing, but because it is not changing via the watched ++path, no notification is received. By contrast, the implementation here ++will always yield an event in this case -- even if the event was induced ++by an \fBopen\fR(2) via an unwatched path. If an event occurs within a ++watched directory on a file for which there exist multiple hard links within ++the same (watched) directory, the event's \fIname\fR will correspond to one ++of the links to the file. If multiple hard links exist to the ++same file in the same watched directory and one of the links is removed, ++notifications may not necessarily continue to be received for the file, ++despite the (remaining) link in the watched directory; users of ++\fBinotify\fR should exercise extreme caution when watching directories ++that contain files with multiple hard links in the same directory. ++ ++.SH SEE ALSO ++.sp ++.LP ++\fBinotify_init\fR(3C), \fBinotify_init1\fR(3C), \fBinotify_add_watch\fR(3C), ++\fBinotify_rm_watch\fR(3C), \fBport_get\fR(3C), \fBepoll\fR(5) +diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files +index ad37a2d6b5..b4ea210cac 100644 +--- a/usr/src/uts/common/Makefile.files ++++ b/usr/src/uts/common/Makefile.files +@@ -1007,6 +1007,8 @@ DEVPOOL_OBJS += devpool.o + + I8042_OBJS += i8042.o + ++INOTIFY_OBJS += inotify.o ++ + KB8042_OBJS += \ + at_keyprocess.o \ + kb8042.o \ +diff --git a/usr/src/uts/common/fs/nfs/nfs3_vnops.c b/usr/src/uts/common/fs/nfs/nfs3_vnops.c +index 291e5cd337..450cc22683 100644 +--- a/usr/src/uts/common/fs/nfs/nfs3_vnops.c ++++ b/usr/src/uts/common/fs/nfs/nfs3_vnops.c +@@ -3352,10 +3352,9 @@ nfs3rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr, + if (nvp) + vnevent_rename_dest(nvp, ndvp, nnm, ct); + +- if (odvp != ndvp) +- vnevent_rename_dest_dir(ndvp, ct); + ASSERT(ovp != NULL); + vnevent_rename_src(ovp, odvp, onm, ct); ++ vnevent_rename_dest_dir(ndvp, ovp, nnm, ct); + } + + if (nvp) { +diff --git a/usr/src/uts/common/fs/nfs/nfs4_vnops.c b/usr/src/uts/common/fs/nfs/nfs4_vnops.c +index b9ba9a6ead..31d922e4be 100644 +--- a/usr/src/uts/common/fs/nfs/nfs4_vnops.c ++++ b/usr/src/uts/common/fs/nfs/nfs4_vnops.c +@@ -8059,8 +8059,9 @@ link_call: + * vnode if it already existed. + */ + if (error == 0) { +- vnode_t *tvp; ++ vnode_t *tvp, *tovp; + rnode4_t *trp; ++ + /* + * Notify the vnode. Each links is represented by + * a different vnode, in nfsv4. +@@ -8073,23 +8074,20 @@ link_call: + vnevent_rename_dest(tvp, ndvp, nnm, ct); + } + +- /* +- * if the source and destination directory are not the +- * same notify the destination directory. +- */ +- if (VTOR4(odvp) != VTOR4(ndvp)) { +- trp = VTOR4(ndvp); +- tvp = ndvp; +- if (IS_SHADOW(ndvp, trp)) +- tvp = RTOV4(trp); +- vnevent_rename_dest_dir(tvp, ct); +- } +- + trp = VTOR4(ovp); +- tvp = ovp; ++ tovp = ovp; + if (IS_SHADOW(ovp, trp)) +- tvp = RTOV4(trp); ++ tovp = RTOV4(trp); ++ + vnevent_rename_src(tvp, odvp, onm, ct); ++ ++ trp = VTOR4(ndvp); ++ tvp = ndvp; ++ ++ if (IS_SHADOW(ndvp, trp)) ++ tvp = RTOV4(trp); ++ ++ vnevent_rename_dest_dir(tvp, tovp, nnm, ct); + } + + if (nvp) { +diff --git a/usr/src/uts/common/fs/nfs/nfs_vnops.c b/usr/src/uts/common/fs/nfs/nfs_vnops.c +index 4ac6450381..77d3541208 100644 +--- a/usr/src/uts/common/fs/nfs/nfs_vnops.c ++++ b/usr/src/uts/common/fs/nfs/nfs_vnops.c +@@ -2687,11 +2687,9 @@ nfsrename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, cred_t *cr, + if (nvp) + vnevent_rename_dest(nvp, ndvp, nnm, ct); + +- if (odvp != ndvp) +- vnevent_rename_dest_dir(ndvp, ct); +- + ASSERT(ovp != NULL); + vnevent_rename_src(ovp, odvp, onm, ct); ++ vnevent_rename_dest_dir(ndvp, ovp, nnm, ct); + } + + if (nvp) { +diff --git a/usr/src/uts/common/fs/pcfs/pc_dir.c b/usr/src/uts/common/fs/pcfs/pc_dir.c +index a9ee604b7c..21a0b1a4bd 100644 +--- a/usr/src/uts/common/fs/pcfs/pc_dir.c ++++ b/usr/src/uts/common/fs/pcfs/pc_dir.c +@@ -24,6 +24,10 @@ + * Use is subject to license terms. + */ + ++/* ++ * Copyright (c) 2014, Joyent, Inc. All rights reserved. ++ */ ++ + #include <sys/param.h> + #include <sys/errno.h> + #include <sys/systm.h> +diff --git a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c +index 624ea30b7f..c404433edd 100644 +--- a/usr/src/uts/common/fs/tmpfs/tmp_vnops.c ++++ b/usr/src/uts/common/fs/tmpfs/tmp_vnops.c +@@ -1307,6 +1307,5 @@ tmp_rename( + * vnevent_rename_dest is called in tdirenter(). + * Notify the target dir if not same as source dir. + */ +- if (ndvp != odvp) +- vnevent_rename_dest_dir(ndvp, ct); ++ vnevent_rename_dest_dir(ndvp, TNTOV(fromtp), nnm, ct); + } + + done: +diff --git a/usr/src/uts/common/fs/udfs/udf_dir.c b/usr/src/uts/common/fs/udfs/udf_dir.c +index c1e2c74a87..def046a0bf 100644 +--- a/usr/src/uts/common/fs/udfs/udf_dir.c ++++ b/usr/src/uts/common/fs/udfs/udf_dir.c +@@ -562,9 +563,8 @@ out: + namep, ctp); + } + +- if (sdp != tdp) { +- vnevent_rename_dest_dir(ITOV(tdp), ctp); +- } ++ vnevent_rename_dest_dir(ITOV(tdp), ITOV(tip), ++ namep, ctp); + } + + /* +diff --git a/usr/src/uts/common/fs/ufs/ufs_vnops.c b/usr/src/uts/common/fs/ufs/ufs_vnops.c +index 3fcfda1ab6..c77872b11d 100644 +--- a/usr/src/uts/common/fs/ufs/ufs_vnops.c ++++ b/usr/src/uts/common/fs/ufs/ufs_vnops.c +@@ -3705,12 +3705,7 @@ retry_firstlock: + error = 0; + + if (error == 0) { + vnevent_rename_src(ITOV(sip), sdvp, snm, ct); + /* + * Notify the target directory of the rename event + * if source and target directories are not the same. + */ +- if (sdvp != tdvp) +- vnevent_rename_dest_dir(tdvp, ct); ++ vnevent_rename_dest_dir(tdvp, ITOV(sip), tnm, ct); + + errout: + if (slot.fbp) +diff --git a/usr/src/uts/common/fs/vnode.c b/usr/src/uts/common/fs/vnode.c +index 846c343a4f..561fb1bd02 100644 +--- a/usr/src/uts/common/fs/vnode.c ++++ b/usr/src/uts/common/fs/vnode.c +@@ -21,7 +21,7 @@ + + /* + * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2013, Joyent, Inc. All rights reserved. ++ * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ + + /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ +@@ -2522,6 +2522,7 @@ vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct) + if (vp == NULL || vp->v_femhead == NULL) { + return; + } ++ (void) VOP_VNEVENT(dvp, VE_RENAME_SRC_DIR, vp, name, ct); + (void) VOP_VNEVENT(vp, VE_RENAME_SRC, dvp, name, ct); + } + +@@ -2536,12 +2537,13 @@ vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name, + } + + void +-vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct) ++vnevent_rename_dest_dir(vnode_t *vp, vnode_t *nvp, char *name, ++ caller_context_t *ct) + { + if (vp == NULL || vp->v_femhead == NULL) { + return; + } +- (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct); ++ (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, nvp, name, ct); + } + + void +diff --git a/usr/src/uts/common/fs/zfs/zfs_vnops.c b/usr/src/uts/common/fs/zfs/zfs_vnops.c +index f74bd50bbc..6f661d6cf8 100644 +--- a/usr/src/uts/common/fs/zfs/zfs_vnops.c ++++ b/usr/src/uts/common/fs/zfs/zfs_vnops.c +@@ -22,7 +22,7 @@ + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2014 by Delphix. All rights reserved. + * Copyright 2014 Nexenta Systems, Inc. All rights reserved. +- * Copyright 2013 Joyent, Inc. All rights reserved. ++ * Copyright 2014 Joyent, Inc. All rights reserved. + */ + + /* Portions Copyright 2007 Jeremy Teo */ +@@ -3698,9 +3698,7 @@ top: + + if (error == 0) { + vnevent_rename_src(ZTOV(szp), sdvp, snm, ct); +- /* notify the target dir if it is not the same as source dir */ +- if (tdvp != sdvp) +- vnevent_rename_dest_dir(tdvp, ct); ++ vnevent_rename_dest_dir(tdvp, ZTOV(szp), tnm, ct); + } + out: + if (zl != NULL) +diff --git a/usr/src/uts/common/io/inotify.c b/usr/src/uts/common/io/inotify.c +new file mode 100644 +index 0000000000..b8dfa1223b +--- /dev/null ++++ b/usr/src/uts/common/io/inotify.c +@@ -0,0 +1,1480 @@ ++/* ++ * This file and its contents are supplied under the terms of the ++ * Common Development and Distribution License ("CDDL"), version 1.0. ++ * You may only use this file in accordance with the terms of version ++ * 1.0 of the CDDL. ++ * ++ * A full copy of the text of the CDDL should have accompanied this ++ * source. A copy of the CDDL is also available via the Internet at ++ * http://www.illumos.org/license/CDDL. ++ */ ++ ++/* ++ * Copyright (c) 2014 Joyent, Inc. All rights reserved. ++ */ ++ ++/* ++ * Support for the inotify facility, a Linux-borne facility for asynchronous ++ * notification of certain events on specified files or directories. Our ++ * implementation broadly leverages the file event monitoring facility, and ++ * would actually be quite straightforward were it not for a very serious ++ * blunder in the inotify interface: in addition to allowing for one to be ++ * notified on events on a particular file or directory, inotify also allows ++ * for one to be notified on certain events on files _within_ a watched ++ * directory -- even though those events have absolutely nothing to do with ++ * the directory itself. This leads to all sorts of madness because file ++ * operations are (of course) not undertaken on paths but rather on open ++ * files -- and the relationships between open files and the paths that resolve ++ * to those files are neither static nor isomorphic. We implement this ++ * concept by having _child watches_ when directories are watched with events ++ * in IN_CHILD_EVENTS. We add child watches when a watch on a directory is ++ * first added, and we modify those child watches dynamically as files are ++ * created, deleted, moved into or moved out of the specified directory. This ++ * mechanism works well, absent hard links. Hard links, unfortunately, break ++ * this rather badly, and the user is warned that watches on directories that ++ * have multiple directory entries referring to the same file may behave ++ * unexpectedly. ++ */ ++ ++#include <sys/ddi.h> ++#include <sys/sunddi.h> ++#include <sys/inotify.h> ++#include <sys/fem.h> ++#include <sys/conf.h> ++#include <sys/stat.h> ++#include <sys/vfs_opreg.h> ++#include <sys/vmem.h> ++#include <sys/avl.h> ++#include <sys/sysmacros.h> ++#include <sys/cyclic.h> ++#include <sys/filio.h> ++ ++struct inotify_state; ++struct inotify_kevent; ++ ++typedef struct inotify_watch inotify_watch_t; ++typedef struct inotify_state inotify_state_t; ++typedef struct inotify_kevent inotify_kevent_t; ++ ++struct inotify_watch { ++ kmutex_t inw_lock; /* lock protecting ref count */ ++ int inw_refcnt; /* reference count */ ++ uint8_t inw_zombie:1; /* boolean: is zombie */ ++ uint8_t inw_fired:1; /* boolean: fired one-shot */ ++ uint8_t inw_active:1; /* boolean: watch is active */ ++ uint8_t inw_orphaned:1; /* boolean: orphaned */ ++ kcondvar_t inw_cv; /* condvar for zombifier */ ++ uint32_t inw_mask; /* mask of watch */ ++ int32_t inw_wd; /* watch descriptor */ ++ vnode_t *inw_vp; /* underlying vnode */ ++ inotify_watch_t *inw_parent; /* parent, if a child */ ++ avl_node_t inw_byvp; /* watches by vnode */ ++ avl_node_t inw_bywd; /* watches by descriptor */ ++ avl_tree_t inw_children; /* children, if a parent */ ++ char *inw_name; /* name, if a child */ ++ list_node_t inw_orphan; /* orphan list */ ++ inotify_state_t *inw_state; /* corresponding state */ ++}; ++ ++struct inotify_kevent { ++ inotify_kevent_t *ine_next; /* next event in queue */ ++ struct inotify_event ine_event; /* event (variable size) */ ++}; ++ ++#define INOTIFY_EVENT_LENGTH(ev) \ ++ (sizeof (inotify_kevent_t) + (ev)->ine_event.len) ++ ++struct inotify_state { ++ kmutex_t ins_lock; /* lock protecting state */ ++ avl_tree_t ins_byvp; /* watches by vnode */ ++ avl_tree_t ins_bywd; /* watches by descriptor */ ++ vmem_t *ins_wds; /* watch identifier arena */ ++ int ins_maxwatches; /* maximum number of watches */ ++ int ins_maxevents; /* maximum number of events */ ++ int ins_nevents; /* current # of events */ ++ int32_t ins_size; /* total size of events */ ++ inotify_kevent_t *ins_head; /* head of event queue */ ++ inotify_kevent_t *ins_tail; /* tail of event queue */ ++ pollhead_t ins_pollhd; /* poll head */ ++ kcondvar_t ins_cv; /* condvar for reading */ ++ list_t ins_orphans; /* orphan list */ ++ cyclic_id_t ins_cleaner; /* cyclic for cleaning */ ++ inotify_watch_t *ins_zombies; /* zombie watch list */ ++ cred_t *ins_cred; /* creator's credentials */ ++ inotify_state_t *ins_next; /* next state on global list */ ++}; ++ ++/* ++ * Tunables (exported read-only in lx-branded zones via /proc). ++ */ ++int inotify_maxwatches = 8192; /* max watches per instance */ ++int inotify_maxevents = 16384; /* max events */ ++int inotify_maxinstances = 128; /* max instances per user */ ++ ++/* ++ * Internal global variables. ++ */ ++static kmutex_t inotify_lock; /* lock protecting state */ ++static dev_info_t *inotify_devi; /* device info */ ++static fem_t *inotify_femp; /* FEM pointer */ ++static vmem_t *inotify_minor; /* minor number arena */ ++static void *inotify_softstate; /* softstate pointer */ ++static inotify_state_t *inotify_state; /* global list if state */ ++ ++static void inotify_watch_event(inotify_watch_t *, uint64_t, char *); ++static void inotify_watch_insert(inotify_watch_t *, vnode_t *, char *); ++static void inotify_watch_delete(inotify_watch_t *, uint32_t); ++static void inotify_watch_remove(inotify_state_t *state, ++ inotify_watch_t *watch); ++ ++static int ++inotify_fop_close(femarg_t *vf, int flag, int count, offset_t offset, ++ cred_t *cr, caller_context_t *ct) ++{ ++ inotify_watch_t *watch = vf->fa_fnode->fn_available; ++ int rval; ++ ++ if ((rval = vnext_close(vf, flag, count, offset, cr, ct)) == 0) { ++ inotify_watch_event(watch, flag & FWRITE ? ++ IN_CLOSE_WRITE : IN_CLOSE_NOWRITE, NULL); ++ } ++ ++ return (rval); ++} ++ ++static int ++inotify_fop_create(femarg_t *vf, char *name, vattr_t *vap, vcexcl_t excl, ++ int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct, ++ vsecattr_t *vsecp) ++{ ++ inotify_watch_t *watch = vf->fa_fnode->fn_available; ++ int rval; ++ ++ if ((rval = vnext_create(vf, name, vap, excl, mode, ++ vpp, cr, flag, ct, vsecp)) == 0) { ++ inotify_watch_insert(watch, *vpp, name); ++ inotify_watch_event(watch, IN_CREATE, name); ++ } ++ ++ return (rval); ++} ++ ++static int ++inotify_fop_link(femarg_t *vf, vnode_t *svp, char *tnm, cred_t *cr, ++ caller_context_t *ct, int flags) ++{ ++ inotify_watch_t *watch = vf->fa_fnode->fn_available; ++ int rval; ++ ++ if ((rval = vnext_link(vf, svp, tnm, cr, ct, flags)) == 0) { ++ inotify_watch_insert(watch, svp, tnm); ++ inotify_watch_event(watch, IN_CREATE, tnm); ++ } ++ ++ return (rval); ++} ++ ++static int ++inotify_fop_mkdir(femarg_t *vf, char *name, vattr_t *vap, vnode_t **vpp, ++ cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp) ++{ ++ inotify_watch_t *watch = vf->fa_fnode->fn_available; ++ int rval; ++ ++ if ((rval = vnext_mkdir(vf, name, vap, vpp, cr, ++ ct, flags, vsecp)) == 0) { ++ inotify_watch_insert(watch, *vpp, name); ++ inotify_watch_event(watch, IN_CREATE | IN_ISDIR, name); ++ } ++ ++ return (rval); ++} ++ ++static int ++inotify_fop_open(femarg_t *vf, int mode, cred_t *cr, caller_context_t *ct) ++{ ++ inotify_watch_t *watch = vf->fa_fnode->fn_available; ++ int rval; ++ ++ if ((rval = vnext_open(vf, mode, cr, ct)) == 0) ++ inotify_watch_event(watch, IN_OPEN, NULL); ++ ++ return (rval); ++} ++ ++static int ++inotify_fop_read(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr, ++ caller_context_t *ct) ++{ ++ inotify_watch_t *watch = vf->fa_fnode->fn_available; ++ int rval = vnext_read(vf, uiop, ioflag, cr, ct); ++ inotify_watch_event(watch, IN_ACCESS, NULL); ++ ++ return (rval); ++} ++ ++static int ++inotify_fop_readdir(femarg_t *vf, uio_t *uiop, cred_t *cr, int *eofp, ++ caller_context_t *ct, int flags) ++{ ++ inotify_watch_t *watch = vf->fa_fnode->fn_available; ++ int rval = vnext_readdir(vf, uiop, cr, eofp, ct, flags); ++ inotify_watch_event(watch, IN_ACCESS | IN_ISDIR, NULL); ++ ++ return (rval); ++} ++ ++int ++inotify_fop_remove(femarg_t *vf, char *nm, cred_t *cr, caller_context_t *ct, ++ int flags) ++{ ++ inotify_watch_t *watch = vf->fa_fnode->fn_available; ++ int rval; ++ ++ if ((rval = vnext_remove(vf, nm, cr, ct, flags)) == 0) ++ inotify_watch_event(watch, IN_DELETE, nm); ++ ++ return (rval); ++} ++ ++int ++inotify_fop_rmdir(femarg_t *vf, char *nm, vnode_t *cdir, cred_t *cr, ++ caller_context_t *ct, int flags) ++{ ++ inotify_watch_t *watch = vf->fa_fnode->fn_available; ++ int rval; ++ ++ if ((rval = vnext_rmdir(vf, nm, cdir, cr, ct, flags)) == 0) ++ inotify_watch_event(watch, IN_DELETE | IN_ISDIR, nm); ++ ++ return (rval); ++} ++ ++static int ++inotify_fop_setattr(femarg_t *vf, vattr_t *vap, int flags, cred_t *cr, ++ caller_context_t *ct) ++{ ++ inotify_watch_t *watch = vf->fa_fnode->fn_available; ++ int rval; ++ ++ if ((rval = vnext_setattr(vf, vap, flags, cr, ct)) == 0) ++ inotify_watch_event(watch, IN_ATTRIB, NULL); ++ ++ return (rval); ++} ++ ++static int ++inotify_fop_write(femarg_t *vf, struct uio *uiop, int ioflag, struct cred *cr, ++ caller_context_t *ct) ++{ ++ inotify_watch_t *watch = vf->fa_fnode->fn_available; ++ int rval = vnext_write(vf, uiop, ioflag, cr, ct); ++ inotify_watch_event(watch, IN_MODIFY, NULL); ++ ++ return (rval); ++} ++ ++static int ++inotify_fop_vnevent(femarg_t *vf, vnevent_t vnevent, vnode_t *dvp, char *name, ++ caller_context_t *ct) ++{ ++ inotify_watch_t *watch = vf->fa_fnode->fn_available; ++ ++ switch (vnevent) { ++ case VE_RENAME_SRC: ++ inotify_watch_event(watch, IN_MOVE_SELF, NULL); ++ inotify_watch_delete(watch, IN_MOVE_SELF); ++ break; ++ case VE_REMOVE: ++ /* ++ * Linux will apparently fire an IN_ATTRIB event when the link ++ * count changes (including when it drops to 0 on a remove). ++ * This is merely somewhat odd; what is amazing is that this ++ * IN_ATTRIB event is not visible on an inotify watch on the ++ * parent directory. (IN_ATTRIB events are normally sent to ++ * watches on the parent directory). While it's hard to ++ * believe that this constitutes desired semantics, ltp ++ * unfortunately tests this case (if implicitly); in the name ++ * of bug-for-bug compatibility, we fire IN_ATTRIB iff we are ++ * explicitly watching the file that has been removed. ++ */ ++ if (watch->inw_parent == NULL) ++ inotify_watch_event(watch, IN_ATTRIB, NULL); ++ ++ /*FALLTHROUGH*/ ++ case VE_RENAME_DEST: ++ inotify_watch_event(watch, IN_DELETE_SELF, NULL); ++ inotify_watch_delete(watch, IN_DELETE_SELF); ++ break; ++ case VE_RMDIR: ++ /* ++ * It seems that IN_ISDIR should really be OR'd in here, but ++ * Linux doesn't seem to do that in this case; for the sake of ++ * bug-for-bug compatibility, we don't do it either. ++ */ ++ inotify_watch_event(watch, IN_DELETE_SELF, NULL); ++ inotify_watch_delete(watch, IN_DELETE_SELF); ++ break; ++ case VE_CREATE: ++ inotify_watch_event(watch, IN_MODIFY | IN_ATTRIB, NULL); ++ break; ++ case VE_LINK: ++ inotify_watch_event(watch, IN_ATTRIB, NULL); ++ break; ++ case VE_RENAME_SRC_DIR: ++ inotify_watch_event(watch, IN_MOVED_FROM, name); ++ break; ++ case VE_RENAME_DEST_DIR: ++ if (name == NULL) ++ name = dvp->v_path; ++ ++ inotify_watch_insert(watch, dvp, name); ++ inotify_watch_event(watch, IN_MOVED_TO, name); ++ break; ++ case VE_SUPPORT: ++ case VE_MOUNTEDOVER: ++ case VE_TRUNCATE: ++ break; ++ } ++ ++ return (vnext_vnevent(vf, vnevent, dvp, name, ct)); ++} ++ ++const fs_operation_def_t inotify_vnodesrc_template[] = { ++ VOPNAME_CLOSE, { .femop_close = inotify_fop_close }, ++ VOPNAME_CREATE, { .femop_create = inotify_fop_create }, ++ VOPNAME_LINK, { .femop_link = inotify_fop_link }, ++ VOPNAME_MKDIR, { .femop_mkdir = inotify_fop_mkdir }, ++ VOPNAME_OPEN, { .femop_open = inotify_fop_open }, ++ VOPNAME_READ, { .femop_read = inotify_fop_read }, ++ VOPNAME_READDIR, { .femop_readdir = inotify_fop_readdir }, ++ VOPNAME_REMOVE, { .femop_remove = inotify_fop_remove }, ++ VOPNAME_RMDIR, { .femop_rmdir = inotify_fop_rmdir }, ++ VOPNAME_SETATTR, { .femop_setattr = inotify_fop_setattr }, ++ VOPNAME_WRITE, { .femop_write = inotify_fop_write }, ++ VOPNAME_VNEVENT, { .femop_vnevent = inotify_fop_vnevent }, ++ NULL, NULL ++}; ++ ++static int ++inotify_watch_cmpwd(inotify_watch_t *lhs, inotify_watch_t *rhs) ++{ ++ if (lhs->inw_wd < rhs->inw_wd) ++ return (-1); ++ ++ if (lhs->inw_wd > rhs->inw_wd) ++ return (1); ++ ++ return (0); ++} ++ ++static int ++inotify_watch_cmpvp(inotify_watch_t *lhs, inotify_watch_t *rhs) ++{ ++ uintptr_t lvp = (uintptr_t)lhs->inw_vp, rvp = (uintptr_t)rhs->inw_vp; ++ ++ if (lvp < rvp) ++ return (-1); ++ ++ if (lvp > rvp) ++ return (1); ++ ++ return (0); ++} ++ ++static void ++inotify_watch_hold(inotify_watch_t *watch) ++{ ++ mutex_enter(&watch->inw_lock); ++ VERIFY(watch->inw_refcnt > 0); ++ watch->inw_refcnt++; ++ mutex_exit(&watch->inw_lock); ++} ++ ++static void ++inotify_watch_release(inotify_watch_t *watch) ++{ ++ mutex_enter(&watch->inw_lock); ++ VERIFY(watch->inw_refcnt > 1); ++ ++ if (--watch->inw_refcnt == 1 && watch->inw_zombie) { ++ /* ++ * We're down to our last reference; kick anyone that might be ++ * waiting. ++ */ ++ cv_signal(&watch->inw_cv); ++ } ++ ++ mutex_exit(&watch->inw_lock); ++} ++ ++static void ++inotify_watch_event(inotify_watch_t *watch, uint64_t mask, char *name) ++{ ++ inotify_kevent_t *event, *tail; ++ inotify_state_t *state = watch->inw_state; ++ uint32_t wd = watch->inw_wd, cookie = 0, len; ++ int align = sizeof (uintptr_t) - 1; ++ boolean_t removal = mask & IN_REMOVAL ? B_TRUE : B_FALSE; ++ inotify_watch_t *source = watch; ++ ++ if (!(mask &= watch->inw_mask) || mask == IN_ISDIR) ++ return; ++ ++ if (watch->inw_parent != NULL) { ++ /* ++ * This is an event on the child; if this isn't a valid child ++ * event, return. Otherwise, we move our watch to be our ++ * parent (which we know is around because we have a hold on ++ * it) and continue. ++ */ ++ if (!(mask & IN_CHILD_EVENTS)) ++ return; ++ ++ name = watch->inw_name; ++ watch = watch->inw_parent; ++ } ++ ++ if (!removal) { ++ mutex_enter(&state->ins_lock); ++ ++ if (watch->inw_zombie || ++ watch->inw_fired || !watch->inw_active) { ++ mutex_exit(&state->ins_lock); ++ return; ++ } ++ } else { ++ if (!watch->inw_active) ++ return; ++ ++ VERIFY(MUTEX_HELD(&state->ins_lock)); ++ } ++ ++ /* ++ * If this is an operation on a directory and it's a child event ++ * (event if it's not on a child), we specify IN_ISDIR. ++ */ ++ if (source->inw_vp->v_type == VDIR && (mask & IN_CHILD_EVENTS)) ++ mask |= IN_ISDIR; ++ ++ if (mask & (IN_MOVED_FROM | IN_MOVED_TO)) ++ cookie = (uint32_t)curthread->t_did; ++ ++ if (state->ins_nevents >= state->ins_maxevents) { ++ /* ++ * We're at our maximum number of events -- turn our event ++ * into an IN_Q_OVERFLOW event, which will be coalesced if ++ * it's already the tail event. ++ */ ++ mask = IN_Q_OVERFLOW; ++ wd = (uint32_t)-1; ++ cookie = 0; ++ len = 0; ++ } ++ ++ if ((tail = state->ins_tail) != NULL && tail->ine_event.wd == wd && ++ tail->ine_event.mask == mask && tail->ine_event.cookie == cookie && ++ ((tail->ine_event.len == 0 && len == 0) || ++ (name != NULL && tail->ine_event.len != 0 && ++ strcmp(tail->ine_event.name, name) == 0))) { ++ /* ++ * This is an implicitly coalesced event; we're done. ++ */ ++ if (!removal) ++ mutex_exit(&state->ins_lock); ++ return; ++ } ++ ++ if (name != NULL) { ++ if ((len = strlen(name) + 1) & align) ++ len += (align + 1) - (len & align); ++ } else { ++ len = 0; ++ } ++ ++ event = kmem_zalloc(sizeof (inotify_kevent_t) + len, KM_SLEEP); ++ event->ine_event.wd = wd; ++ event->ine_event.mask = (uint32_t)mask; ++ event->ine_event.cookie = cookie; ++ event->ine_event.len = len; ++ ++ if (name != NULL) ++ strcpy(event->ine_event.name, name); ++ ++ if (tail != NULL) { ++ tail->ine_next = event; ++ } else { ++ VERIFY(state->ins_head == NULL); ++ state->ins_head = event; ++ cv_broadcast(&state->ins_cv); ++ } ++ ++ state->ins_tail = event; ++ state->ins_nevents++; ++ state->ins_size += sizeof (inotify_kevent_t) + len; ++ ++ if ((watch->inw_mask & IN_ONESHOT) && !watch->inw_fired) { ++ /* ++ * If this is a one-shot, we need to remove the watch. (Note ++ * that this will recurse back into inotify_watch_event() to ++ * fire the IN_IGNORED event -- but with "removal" set.) ++ */ ++ watch->inw_fired = 1; ++ inotify_watch_remove(state, watch); ++ } ++ ++ if (removal) ++ return; ++ ++ mutex_exit(&state->ins_lock); ++ pollwakeup(&state->ins_pollhd, POLLRDNORM | POLLIN); ++} ++ ++/* ++ * Destroy a watch. By the time we're in here, the watch must have exactly ++ * one reference. ++ */ ++static void ++inotify_watch_destroy(inotify_watch_t *watch) ++{ ++ VERIFY(MUTEX_HELD(&watch->inw_lock)); ++ ++ if (watch->inw_name != NULL) ++ kmem_free(watch->inw_name, strlen(watch->inw_name) + 1); ++ ++ kmem_free(watch, sizeof (inotify_watch_t)); ++} ++ ++/* ++ * Zombify a watch. By the time we come in here, it must be true that the ++ * watch has already been fem_uninstall()'d -- the only reference should be ++ * in the state's data structure. If we can get away with freeing it, we'll ++ * do that -- but if the reference count is greater than one due to an active ++ * vnode operation, we'll put this watch on the zombie list on the state ++ * structure. ++ */ ++static void ++inotify_watch_zombify(inotify_watch_t *watch) ++{ ++ inotify_state_t *state = watch->inw_state; ++ ++ VERIFY(MUTEX_HELD(&state->ins_lock)); ++ VERIFY(!watch->inw_zombie); ++ ++ watch->inw_zombie = 1; ++ ++ if (watch->inw_parent != NULL) { ++ inotify_watch_release(watch->inw_parent); ++ } else { ++ avl_remove(&state->ins_byvp, watch); ++ avl_remove(&state->ins_bywd, watch); ++ vmem_free(state->ins_wds, (void *)(uintptr_t)watch->inw_wd, 1); ++ watch->inw_wd = -1; ++ } ++ ++ mutex_enter(&watch->inw_lock); ++ ++ if (watch->inw_refcnt == 1) { ++ /* ++ * There are no operations in flight and there is no way ++ * for anyone to discover this watch -- we can destroy it. ++ */ ++ inotify_watch_destroy(watch); ++ } else { ++ /* ++ * There are operations in flight; we will need to enqueue ++ * this for later destruction. ++ */ ++ watch->inw_parent = state->ins_zombies; ++ state->ins_zombies = watch; ++ mutex_exit(&watch->inw_lock); ++ } ++} ++ ++static inotify_watch_t * ++inotify_watch_add(inotify_state_t *state, inotify_watch_t *parent, ++ const char *name, vnode_t *vp, uint32_t mask) ++{ ++ inotify_watch_t *watch; ++ int err; ++ ++ VERIFY(MUTEX_HELD(&state->ins_lock)); ++ ++ watch = kmem_zalloc(sizeof (inotify_watch_t), KM_SLEEP); ++ ++ watch->inw_vp = vp; ++ watch->inw_mask = mask; ++ watch->inw_state = state; ++ watch->inw_refcnt = 1; ++ ++ if (parent == NULL) { ++ watch->inw_wd = (int)(uintptr_t)vmem_alloc(state->ins_wds, ++ 1, VM_BESTFIT | VM_SLEEP); ++ avl_add(&state->ins_byvp, watch); ++ avl_add(&state->ins_bywd, watch); ++ ++ avl_create(&watch->inw_children, ++ (int(*)(const void *, const void *))inotify_watch_cmpvp, ++ sizeof (inotify_watch_t), ++ offsetof(inotify_watch_t, inw_byvp)); ++ } else { ++ VERIFY(name != NULL); ++ inotify_watch_hold(parent); ++ watch->inw_mask &= IN_CHILD_EVENTS; ++ watch->inw_parent = parent; ++ watch->inw_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); ++ strcpy(watch->inw_name, name); ++ ++ avl_add(&parent->inw_children, watch); ++ } ++ ++ /* ++ * Add our monitor to the vnode. We must not have the watch lock held ++ * when we do this, as it will immediately hold our watch. ++ */ ++ err = fem_install(vp, inotify_femp, watch, OPARGUNIQ, ++ (void (*)(void *))inotify_watch_hold, ++ (void (*)(void *))inotify_watch_release); ++ ++ VERIFY(err == 0); ++ ++ return (watch); ++} ++ ++/* ++ * Remove a (non-child) watch. This is called from either synchronous context ++ * via inotify_rm_watch() or monitor context via either a vnevent or a ++ * one-shot. ++ */ ++static void ++inotify_watch_remove(inotify_state_t *state, inotify_watch_t *watch) ++{ ++ inotify_watch_t *child; ++ int err; ++ ++ VERIFY(MUTEX_HELD(&state->ins_lock)); ++ VERIFY(watch->inw_parent == NULL); ++ ++ err = fem_uninstall(watch->inw_vp, inotify_femp, watch); ++ VERIFY(err == 0); ++ ++ /* ++ * If we have children, we're going to remove them all and set them ++ * all to be zombies. ++ */ ++ while ((child = avl_first(&watch->inw_children)) != NULL) { ++ VERIFY(child->inw_parent == watch); ++ avl_remove(&watch->inw_children, child); ++ ++ err = fem_uninstall(child->inw_vp, inotify_femp, child); ++ VERIFY(err == 0); ++ ++ /* ++ * If this child watch has been orphaned, remove it from the ++ * state's list of orphans. ++ */ ++ if (watch->inw_orphaned) ++ list_remove(&state->ins_orphans, watch); ++ ++ VN_RELE(child->inw_vp); ++ ++ /* ++ * We're down (or should be down) to a single reference to ++ * this child watch; it's safe to zombify it. ++ */ ++ inotify_watch_zombify(child); ++ } ++ ++ inotify_watch_event(watch, IN_IGNORED | IN_REMOVAL, NULL); ++ VN_RELE(watch->inw_vp); ++ ++ /* ++ * It's now safe to zombify the watch -- we know that the only reference ++ * can come from operations in flight. ++ */ ++ inotify_watch_zombify(watch); ++} ++ ++/* ++ * Delete a watch. Should only be called from VOP context. ++ */ ++static void ++inotify_watch_delete(inotify_watch_t *watch, uint32_t event) ++{ ++ inotify_state_t *state = watch->inw_state; ++ inotify_watch_t cmp = { .inw_vp = watch->inw_vp }, *parent; ++ int err; ++ ++ if (event != IN_DELETE_SELF && !(watch->inw_mask & IN_CHILD_EVENTS)) ++ return; ++ ++ mutex_enter(&state->ins_lock); ++ ++ if (watch->inw_zombie) { ++ mutex_exit(&state->ins_lock); ++ return; ++ } ++ ++ if ((parent = watch->inw_parent) == NULL) { ++ if (event == IN_DELETE_SELF) { ++ /* ++ * If we're here because we're being deleted and we ++ * are not a child watch, we need to delete the entire ++ * watch, children and all. ++ */ ++ inotify_watch_remove(state, watch); ++ } ++ ++ mutex_exit(&state->ins_lock); ++ return; ++ } else { ++ if (event == IN_DELETE_SELF && ++ !(parent->inw_mask & IN_EXCL_UNLINK)) { ++ /* ++ * This is a child watch for a file that is being ++ * removed and IN_EXCL_UNLINK has not been specified; ++ * indicate that it is orphaned and add it to the list ++ * of orphans. (This list will be checked by the ++ * cleaning cyclic to determine when the watch has ++ * become the only hold on the vnode, at which point ++ * the watch can be zombified.) Note that we check ++ * if the watch is orphaned before we orphan it: hard ++ * links make it possible for VE_REMOVE to be called ++ * multiple times on the same vnode. (!) ++ */ ++ if (!watch->inw_orphaned) { ++ watch->inw_orphaned = 1; ++ list_insert_head(&state->ins_orphans, watch); ++ } ++ ++ mutex_exit(&state->ins_lock); ++ return; ++ } ++ ++ if (watch->inw_orphaned) { ++ /* ++ * If we're here, a file was orphaned and then later ++ * moved -- which almost certainly means that hard ++ * links are on the scene. We choose the orphan over ++ * the move because we don't want to spuriously ++ * drop events if we can avoid it. ++ */ ++ list_remove(&state->ins_orphans, watch); ++ } ++ } ++ ++ if (avl_find(&parent->inw_children, &cmp, NULL) == NULL) { ++ /* ++ * This watch has already been deleted from the parent. ++ */ ++ mutex_exit(&state->ins_lock); ++ return; ++ } ++ ++ avl_remove(&parent->inw_children, watch); ++ err = fem_uninstall(watch->inw_vp, inotify_femp, watch); ++ VERIFY(err == 0); ++ ++ VN_RELE(watch->inw_vp); ++ ++ /* ++ * It's now safe to zombify the watch -- which won't actually delete ++ * it as we know that the reference count is greater than 1. ++ */ ++ inotify_watch_zombify(watch); ++ mutex_exit(&state->ins_lock); ++} ++ ++/* ++ * Insert a new child watch. Should only be called from VOP context when ++ * a child is created in a watched directory. ++ */ ++static void ++inotify_watch_insert(inotify_watch_t *watch, vnode_t *vp, char *name) ++{ ++ inotify_state_t *state = watch->inw_state; ++ inotify_watch_t cmp = { .inw_vp = vp }; ++ ++ if (!(watch->inw_mask & IN_CHILD_EVENTS)) ++ return; ++ ++ mutex_enter(&state->ins_lock); ++ ++ if (watch->inw_zombie || watch->inw_parent != NULL || vp == NULL) { ++ mutex_exit(&state->ins_lock); ++ return; ++ } ++ ++ if (avl_find(&watch->inw_children, &cmp, NULL) != NULL) { ++ mutex_exit(&state->ins_lock); ++ return; ++ } ++ ++ VN_HOLD(vp); ++ watch = inotify_watch_add(state, watch, name, vp, watch->inw_mask); ++ VERIFY(watch != NULL); ++ ++ mutex_exit(&state->ins_lock); ++} ++ ++ ++static int ++inotify_add_watch(inotify_state_t *state, vnode_t *vp, uint32_t mask, ++ int32_t *wdp) ++{ ++ inotify_watch_t *watch, cmp = { .inw_vp = vp }; ++ uint32_t set; ++ ++ set = (mask & (IN_ALL_EVENTS | IN_MODIFIERS)) | IN_UNMASKABLE; ++ ++ /* ++ * Lookup our vnode to determine if we already have a watch on it. ++ */ ++ mutex_enter(&state->ins_lock); ++ ++ if ((watch = avl_find(&state->ins_byvp, &cmp, NULL)) == NULL) { ++ /* ++ * We don't have this watch; allocate a new one, provided that ++ * we have fewer than our limit. ++ */ ++ if (avl_numnodes(&state->ins_bywd) >= state->ins_maxwatches) { ++ mutex_exit(&state->ins_lock); ++ return (ENOSPC); ++ } ++ ++ VN_HOLD(vp); ++ watch = inotify_watch_add(state, NULL, NULL, vp, set); ++ *wdp = watch->inw_wd; ++ mutex_exit(&state->ins_lock); ++ ++ return (0); ++ } ++ ++ VERIFY(!watch->inw_zombie); ++ ++ if (!(mask & IN_MASK_ADD)) { ++ /* ++ * Note that if we're resetting our event mask and we're ++ * transitioning from an event mask that includes child events ++ * to one that doesn't, there will be potentially some stale ++ * child watches. This is basically fine: they won't fire, ++ * and they will correctly be removed when the watch is ++ * removed. ++ */ ++ watch->inw_mask = 0; ++ } ++ ++ watch->inw_mask |= set; ++ ++ *wdp = watch->inw_wd; ++ ++ mutex_exit(&state->ins_lock); ++ ++ return (0); ++} ++ ++static int ++inotify_add_child(inotify_state_t *state, vnode_t *vp, char *name) ++{ ++ inotify_watch_t *watch, cmp = { .inw_vp = vp }; ++ vnode_t *cvp; ++ int err; ++ ++ /* ++ * Verify that the specified child doesn't have a directory component ++ * within it. ++ */ ++ if (strchr(name, '/') != NULL) ++ return (EINVAL); ++ ++ /* ++ * Lookup the underlying file. Note that this will succeed even if ++ * we don't have permissions to actually read the file. ++ */ ++ if ((err = lookupnameat(name, ++ UIO_SYSSPACE, NO_FOLLOW, NULL, &cvp, vp)) != 0) { ++ return (err); ++ } ++ ++ /* ++ * Use our vnode to find our watch, and then add our child watch to it. ++ */ ++ mutex_enter(&state->ins_lock); ++ ++ if ((watch = avl_find(&state->ins_byvp, &cmp, NULL)) == NULL) { ++ /* ++ * This is unexpected -- it means that we don't have the ++ * watch that we thought we had. ++ */ ++ mutex_exit(&state->ins_lock); ++ VN_RELE(cvp); ++ return (ENXIO); ++ } ++ ++ /* ++ * Now lookup the child vnode in the watch; we'll only add it if it ++ * isn't already there. ++ */ ++ cmp.inw_vp = cvp; ++ ++ if (avl_find(&watch->inw_children, &cmp, NULL) != NULL) { ++ mutex_exit(&state->ins_lock); ++ VN_RELE(cvp); ++ return (0); ++ } ++ ++ watch = inotify_watch_add(state, watch, name, cvp, watch->inw_mask); ++ VERIFY(watch != NULL); ++ mutex_exit(&state->ins_lock); ++ ++ return (0); ++} ++ ++static int ++inotify_rm_watch(inotify_state_t *state, int32_t wd) ++{ ++ inotify_watch_t *watch, cmp = { .inw_wd = wd }; ++ ++ mutex_enter(&state->ins_lock); ++ ++ if ((watch = avl_find(&state->ins_bywd, &cmp, NULL)) == NULL) { ++ mutex_exit(&state->ins_lock); ++ return (EINVAL); ++ } ++ ++ inotify_watch_remove(state, watch); ++ mutex_exit(&state->ins_lock); ++ ++ return (0); ++} ++ ++static int ++inotify_activate(inotify_state_t *state, int32_t wd) ++{ ++ inotify_watch_t *watch, cmp = { .inw_wd = wd }; ++ ++ mutex_enter(&state->ins_lock); ++ ++ if ((watch = avl_find(&state->ins_bywd, &cmp, NULL)) == NULL) { ++ mutex_exit(&state->ins_lock); ++ return (EINVAL); ++ } ++ ++ watch->inw_active = 1; ++ ++ mutex_exit(&state->ins_lock); ++ ++ return (0); ++} ++ ++/* ++ * Called periodically as a cyclic to process the orphans and zombies. ++ */ ++static void ++inotify_clean(void *arg) ++{ ++ inotify_state_t *state = arg; ++ inotify_watch_t *watch, *parent, *next, **prev; ++ int err; ++ ++ mutex_enter(&state->ins_lock); ++ ++ for (watch = list_head(&state->ins_orphans); ++ watch != NULL; watch = next) { ++ next = list_next(&state->ins_orphans, watch); ++ ++ VERIFY(!watch->inw_zombie); ++ VERIFY((parent = watch->inw_parent) != NULL); ++ ++ if (watch->inw_vp->v_count > 1) ++ continue; ++ ++ avl_remove(&parent->inw_children, watch); ++ err = fem_uninstall(watch->inw_vp, inotify_femp, watch); ++ VERIFY(err == 0); ++ ++ list_remove(&state->ins_orphans, watch); ++ ++ VN_RELE(watch->inw_vp); ++ inotify_watch_zombify(watch); ++ } ++ ++ prev = &state->ins_zombies; ++ ++ while ((watch = *prev) != NULL) { ++ mutex_enter(&watch->inw_lock); ++ ++ if (watch->inw_refcnt == 1) { ++ *prev = watch->inw_parent; ++ inotify_watch_destroy(watch); ++ continue; ++ } ++ ++ prev = &watch->inw_parent; ++ mutex_exit(&watch->inw_lock); ++ } ++ ++ mutex_exit(&state->ins_lock); ++} ++ ++/*ARGSUSED*/ ++static int ++inotify_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) ++{ ++ inotify_state_t *state; ++ major_t major = getemajor(*devp); ++ minor_t minor = getminor(*devp); ++ int instances = 0; ++ cyc_handler_t hdlr; ++ cyc_time_t when; ++ char c[64]; ++ ++ if (minor != INOTIFYMNRN_INOTIFY) ++ return (ENXIO); ++ ++ mutex_enter(&inotify_lock); ++ ++ for (state = inotify_state; state != NULL; state = state->ins_next) { ++ if (state->ins_cred == cred_p) ++ instances++; ++ } ++ ++ if (instances >= inotify_maxinstances) { ++ mutex_exit(&inotify_lock); ++ return (EMFILE); ++ } ++ ++ minor = (minor_t)(uintptr_t)vmem_alloc(inotify_minor, 1, ++ VM_BESTFIT | VM_SLEEP); ++ ++ if (ddi_soft_state_zalloc(inotify_softstate, minor) != DDI_SUCCESS) { ++ vmem_free(inotify_minor, (void *)(uintptr_t)minor, 1); ++ mutex_exit(&inotify_lock); ++ return (NULL); ++ } ++ ++ state = ddi_get_soft_state(inotify_softstate, minor); ++ *devp = makedevice(major, minor); ++ ++ crhold(cred_p); ++ state->ins_cred = cred_p; ++ state->ins_next = inotify_state; ++ inotify_state = state; ++ ++ (void) snprintf(c, sizeof (c), "inotify_watchid_%d", minor); ++ state->ins_wds = vmem_create(c, (void *)1, UINT32_MAX, 1, ++ NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); ++ ++ avl_create(&state->ins_bywd, ++ (int(*)(const void *, const void *))inotify_watch_cmpwd, ++ sizeof (inotify_watch_t), ++ offsetof(inotify_watch_t, inw_bywd)); ++ ++ avl_create(&state->ins_byvp, ++ (int(*)(const void *, const void *))inotify_watch_cmpvp, ++ sizeof (inotify_watch_t), ++ offsetof(inotify_watch_t, inw_byvp)); ++ ++ list_create(&state->ins_orphans, sizeof (inotify_watch_t), ++ offsetof(inotify_watch_t, inw_orphan)); ++ ++ state->ins_maxwatches = inotify_maxwatches; ++ state->ins_maxevents = inotify_maxevents; ++ ++ mutex_exit(&inotify_lock); ++ ++ mutex_enter(&cpu_lock); ++ ++ hdlr.cyh_func = inotify_clean; ++ hdlr.cyh_level = CY_LOW_LEVEL; ++ hdlr.cyh_arg = state; ++ ++ when.cyt_when = 0; ++ when.cyt_interval = NANOSEC; ++ ++ state->ins_cleaner = cyclic_add(&hdlr, &when); ++ mutex_exit(&cpu_lock); ++ ++ return (0); ++} ++ ++/*ARGSUSED*/ ++static int ++inotify_read(dev_t dev, uio_t *uio, cred_t *cr) ++{ ++ inotify_state_t *state; ++ inotify_kevent_t *event; ++ minor_t minor = getminor(dev); ++ int err = 0, nevents = 0; ++ size_t len; ++ ++ state = ddi_get_soft_state(inotify_softstate, minor); ++ ++ mutex_enter(&state->ins_lock); ++ ++ while (state->ins_head == NULL) { ++ if (uio->uio_fmode & (FNDELAY|FNONBLOCK)) { ++ mutex_exit(&state->ins_lock); ++ return (EAGAIN); ++ } ++ ++ if (!cv_wait_sig_swap(&state->ins_cv, &state->ins_lock)) { ++ mutex_exit(&state->ins_lock); ++ return (EINTR); ++ } ++ } ++ ++ /* ++ * We have events and we have our lock; return as many as we can. ++ */ ++ while ((event = state->ins_head) != NULL) { ++ len = sizeof (event->ine_event) + event->ine_event.len; ++ ++ if (uio->uio_resid < len) { ++ if (nevents == 0) ++ err = EINVAL; ++ break; ++ } ++ ++ nevents++; ++ ++ if ((err = uiomove(&event->ine_event, len, UIO_READ, uio)) != 0) ++ break; ++ ++ VERIFY(state->ins_nevents > 0); ++ state->ins_nevents--; ++ ++ VERIFY(state->ins_size > 0); ++ state->ins_size -= INOTIFY_EVENT_LENGTH(event); ++ ++ if ((state->ins_head = event->ine_next) == NULL) { ++ VERIFY(event == state->ins_tail); ++ VERIFY(state->ins_nevents == 0); ++ state->ins_tail = NULL; ++ } ++ ++ kmem_free(event, INOTIFY_EVENT_LENGTH(event)); ++ } ++ ++ mutex_exit(&state->ins_lock); ++ ++ return (err); ++} ++ ++/*ARGSUSED*/ ++static int ++inotify_poll(dev_t dev, short events, int anyyet, short *reventsp, ++ struct pollhead **phpp) ++{ ++ inotify_state_t *state; ++ minor_t minor = getminor(dev); ++ ++ state = ddi_get_soft_state(inotify_softstate, minor); ++ ++ mutex_enter(&state->ins_lock); ++ ++ if (state->ins_head != NULL) { ++ *reventsp = POLLRDNORM | POLLIN; ++ } else { ++ *reventsp = 0; ++ ++ if (!anyyet) ++ *phpp = &state->ins_pollhd; ++ } ++ ++ mutex_exit(&state->ins_lock); ++ ++ return (0); ++} ++ ++/*ARGSUSED*/ ++static int ++inotify_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) ++{ ++ inotify_state_t *state; ++ minor_t minor = getminor(dev); ++ file_t *fp; ++ int rval; ++ ++ state = ddi_get_soft_state(inotify_softstate, minor); ++ ++ switch (cmd) { ++ case INOTIFYIOC_ADD_WATCH: { ++ inotify_addwatch_t addwatch; ++ file_t *fp; ++ ++ if (copyin((void *)arg, &addwatch, sizeof (addwatch)) != 0) ++ return (EFAULT); ++ ++ if ((fp = getf(addwatch.inaw_fd)) == NULL) ++ return (EBADF); ++ ++ rval = inotify_add_watch(state, fp->f_vnode, ++ addwatch.inaw_mask, rv); ++ ++ releasef(addwatch.inaw_fd); ++ return (rval); ++ } ++ ++ case INOTIFYIOC_ADD_CHILD: { ++ inotify_addchild_t addchild; ++ char name[MAXPATHLEN]; ++ ++ if (copyin((void *)arg, &addchild, sizeof (addchild)) != 0) ++ return (EFAULT); ++ ++ if (copyinstr(addchild.inac_name, name, MAXPATHLEN, NULL) != 0) ++ return (EFAULT); ++ ++ if ((fp = getf(addchild.inac_fd)) == NULL) ++ return (EBADF); ++ ++ rval = inotify_add_child(state, fp->f_vnode, name); ++ ++ releasef(addchild.inac_fd); ++ return (rval); ++ } ++ ++ case INOTIFYIOC_RM_WATCH: ++ return (inotify_rm_watch(state, arg)); ++ ++ case INOTIFYIOC_ACTIVATE: ++ return (inotify_activate(state, arg)); ++ ++ case FIONREAD: ++ mutex_enter(&state->ins_lock); ++ *rv = state->ins_size; ++ mutex_exit(&state->ins_lock); ++ ++ return (0); ++ ++ default: ++ break; ++ } ++ ++ return (ENOTTY); ++} ++ ++/*ARGSUSED*/ ++static int ++inotify_close(dev_t dev, int flag, int otyp, cred_t *cred_p) ++{ ++ inotify_state_t *state, **sp; ++ inotify_watch_t *watch, *zombies; ++ inotify_kevent_t *event; ++ minor_t minor = getminor(dev); ++ ++ state = ddi_get_soft_state(inotify_softstate, minor); ++ ++ mutex_enter(&state->ins_lock); ++ ++ /* ++ * First, destroy all of our watches. ++ */ ++ while ((watch = avl_first(&state->ins_bywd)) != NULL) ++ inotify_watch_remove(state, watch); ++ ++ /* ++ * And now destroy our event queue. ++ */ ++ while ((event = state->ins_head) != NULL) { ++ state->ins_head = event->ine_next; ++ kmem_free(event, INOTIFY_EVENT_LENGTH(event)); ++ } ++ ++ zombies = state->ins_zombies; ++ state->ins_zombies = NULL; ++ mutex_exit(&state->ins_lock); ++ ++ /* ++ * Now that our state lock is dropped, we can synchronously wait on ++ * any zombies. ++ */ ++ while ((watch = zombies) != NULL) { ++ zombies = zombies->inw_parent; ++ ++ mutex_enter(&watch->inw_lock); ++ ++ while (watch->inw_refcnt > 1) ++ cv_wait(&watch->inw_cv, &watch->inw_lock); ++ ++ inotify_watch_destroy(watch); ++ } ++ ++ mutex_enter(&cpu_lock); ++ cyclic_remove(state->ins_cleaner); ++ mutex_exit(&cpu_lock); ++ ++ mutex_enter(&inotify_lock); ++ ++ /* ++ * Remove our state from our global list, and release our hold on ++ * the cred. ++ */ ++ for (sp = &inotify_state; *sp != state; sp = &((*sp)->ins_next)) ++ VERIFY(*sp != NULL); ++ ++ *sp = (*sp)->ins_next; ++ crfree(state->ins_cred); ++ ++ ddi_soft_state_free(inotify_softstate, minor); ++ vmem_free(inotify_minor, (void *)(uintptr_t)minor, 1); ++ ++ mutex_exit(&inotify_lock); ++ ++ return (0); ++} ++ ++/*ARGSUSED*/ ++static int ++inotify_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) ++{ ++ mutex_enter(&inotify_lock); ++ ++ if (ddi_soft_state_init(&inotify_softstate, ++ sizeof (inotify_state_t), 0) != 0) { ++ cmn_err(CE_NOTE, "/dev/inotify failed to create soft state"); ++ mutex_exit(&inotify_lock); ++ return (DDI_FAILURE); ++ } ++ ++ if (ddi_create_minor_node(devi, "inotify", S_IFCHR, ++ INOTIFYMNRN_INOTIFY, DDI_PSEUDO, NULL) == DDI_FAILURE) { ++ cmn_err(CE_NOTE, "/dev/inotify couldn't create minor node"); ++ ddi_soft_state_fini(&inotify_softstate); ++ mutex_exit(&inotify_lock); ++ return (DDI_FAILURE); ++ } ++ ++ if (fem_create("inotify_fem", ++ inotify_vnodesrc_template, &inotify_femp) != 0) { ++ cmn_err(CE_NOTE, "/dev/inotify couldn't create FEM state"); ++ ddi_remove_minor_node(devi, NULL); ++ ddi_soft_state_fini(&inotify_softstate); ++ mutex_exit(&inotify_lock); ++ return (DDI_FAILURE); ++ } ++ ++ ddi_report_dev(devi); ++ inotify_devi = devi; ++ ++ inotify_minor = vmem_create("inotify_minor", (void *)INOTIFYMNRN_CLONE, ++ UINT32_MAX - INOTIFYMNRN_CLONE, 1, NULL, NULL, NULL, 0, ++ VM_SLEEP | VMC_IDENTIFIER); ++ ++ mutex_exit(&inotify_lock); ++ ++ return (DDI_SUCCESS); ++} ++ ++/*ARGSUSED*/ ++static int ++inotify_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) ++{ ++ switch (cmd) { ++ case DDI_DETACH: ++ break; ++ ++ case DDI_SUSPEND: ++ return (DDI_SUCCESS); ++ ++ default: ++ return (DDI_FAILURE); ++ } ++ ++ mutex_enter(&inotify_lock); ++ fem_free(inotify_femp); ++ vmem_destroy(inotify_minor); ++ ++ ddi_remove_minor_node(inotify_devi, NULL); ++ inotify_devi = NULL; ++ ++ ddi_soft_state_fini(&inotify_softstate); ++ mutex_exit(&inotify_lock); ++ ++ return (DDI_SUCCESS); ++} ++ ++/*ARGSUSED*/ ++static int ++inotify_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) ++{ ++ int error; ++ ++ switch (infocmd) { ++ case DDI_INFO_DEVT2DEVINFO: ++ *result = (void *)inotify_devi; ++ error = DDI_SUCCESS; ++ break; ++ case DDI_INFO_DEVT2INSTANCE: ++ *result = (void *)0; ++ error = DDI_SUCCESS; ++ break; ++ default: ++ error = DDI_FAILURE; ++ } ++ return (error); ++} ++ ++static struct cb_ops inotify_cb_ops = { ++ inotify_open, /* open */ ++ inotify_close, /* close */ ++ nulldev, /* strategy */ ++ nulldev, /* print */ ++ nodev, /* dump */ ++ inotify_read, /* read */ ++ nodev, /* write */ ++ inotify_ioctl, /* ioctl */ ++ nodev, /* devmap */ ++ nodev, /* mmap */ ++ nodev, /* segmap */ ++ inotify_poll, /* poll */ ++ ddi_prop_op, /* cb_prop_op */ ++ 0, /* streamtab */ ++ D_NEW | D_MP /* Driver compatibility flag */ ++}; ++ ++static struct dev_ops inotify_ops = { ++ DEVO_REV, /* devo_rev */ ++ 0, /* refcnt */ ++ inotify_info, /* get_dev_info */ ++ nulldev, /* identify */ ++ nulldev, /* probe */ ++ inotify_attach, /* attach */ ++ inotify_detach, /* detach */ ++ nodev, /* reset */ ++ &inotify_cb_ops, /* driver operations */ ++ NULL, /* bus operations */ ++ nodev, /* dev power */ ++ ddi_quiesce_not_needed, /* quiesce */ ++}; ++ ++static struct modldrv modldrv = { ++ &mod_driverops, /* module type (this is a pseudo driver) */ ++ "inotify support", /* name of module */ ++ &inotify_ops, /* driver ops */ ++}; ++ ++static struct modlinkage modlinkage = { ++ MODREV_1, ++ (void *)&modldrv, ++ NULL ++}; ++ ++int ++_init(void) ++{ ++ return (mod_install(&modlinkage)); ++} ++ ++int ++_info(struct modinfo *modinfop) ++{ ++ return (mod_info(&modlinkage, modinfop)); ++} ++ ++int ++_fini(void) ++{ ++ return (mod_remove(&modlinkage)); ++} +diff --git a/usr/src/uts/common/io/inotify.conf b/usr/src/uts/common/io/inotify.conf +new file mode 100644 +index 0000000000..ce9da6180f +--- /dev/null ++++ b/usr/src/uts/common/io/inotify.conf +@@ -0,0 +1,16 @@ ++# ++# This file and its contents are supplied under the terms of the ++# Common Development and Distribution License ("CDDL"), version 1.0. ++# You may only use this file in accordance with the terms of version ++# 1.0 of the CDDL. ++# ++# A full copy of the text of the CDDL should have accompanied this ++# source. A copy of the CDDL is also available via the Internet at ++# http://www.illumos.org/license/CDDL. ++# ++ ++# ++# Copyright (c) 2014 Joyent, Inc. All rights reserved. ++# ++ ++name="inotify" parent="pseudo" instance=0; +diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile +index 08b2488b97..ce449efd20 100644 +--- a/usr/src/uts/common/sys/Makefile ++++ b/usr/src/uts/common/sys/Makefile +@@ -271,6 +271,7 @@ CHKHDRS= \ + idmap.h \ + ieeefp.h \ + id_space.h \ ++ inotify.h \ + instance.h \ + int_const.h \ + int_fmtio.h \ +diff --git a/usr/src/uts/common/sys/inotify.h b/usr/src/uts/common/sys/inotify.h +new file mode 100644 +index 0000000000..8acc1a7280 +--- /dev/null ++++ b/usr/src/uts/common/sys/inotify.h +@@ -0,0 +1,153 @@ ++/* ++ * This file and its contents are supplied under the terms of the ++ * Common Development and Distribution License ("CDDL"), version 1.0. ++ * You may only use this file in accordance with the terms of version ++ * 1.0 of the CDDL. ++ * ++ * A full copy of the text of the CDDL should have accompanied this ++ * source. A copy of the CDDL is also available via the Internet at ++ * http://www.illumos.org/license/CDDL. ++ */ ++ ++/* ++ * Copyright (c) 2014 Joyent, Inc. All rights reserved. ++ */ ++ ++/* ++ * Header file to support for the inotify facility. Note that this facility ++ * is designed to be binary compatible with the Linux inotify facility; values ++ * for constants here should therefore exactly match those found in Linux, and ++ * this facility shouldn't be extended independently of Linux. ++ */ ++ ++#ifndef _SYS_INOTIFY_H ++#define _SYS_INOTIFY_H ++ ++#include <sys/types.h> ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* ++ * Events that can be explicitly requested on any inotify watch. ++ */ ++#define IN_ACCESS 0x00000001 ++#define IN_MODIFY 0x00000002 ++#define IN_ATTRIB 0x00000004 ++#define IN_CLOSE_WRITE 0x00000008 ++#define IN_CLOSE_NOWRITE 0x00000010 ++#define IN_OPEN 0x00000020 ++#define IN_MOVED_FROM 0x00000040 ++#define IN_MOVED_TO 0x00000080 ++#define IN_CREATE 0x00000100 ++#define IN_DELETE 0x00000200 ++#define IN_DELETE_SELF 0x00000400 ++#define IN_MOVE_SELF 0x00000800 ++ ++/* ++ * Events that can be sent to an inotify watch -- requested or not. ++ */ ++#define IN_UNMOUNT 0x00002000 ++#define IN_Q_OVERFLOW 0x00004000 ++#define IN_IGNORED 0x00008000 ++ ++/* ++ * Flags that can modify an inotify event. ++ */ ++#define IN_ONLYDIR 0x01000000 ++#define IN_DONT_FOLLOW 0x02000000 ++#define IN_EXCL_UNLINK 0x04000000 ++#define IN_MASK_ADD 0x20000000 ++#define IN_ISDIR 0x40000000 ++#define IN_ONESHOT 0x80000000 ++ ++/* ++ * Helpful constants. ++ */ ++#define IN_CLOSE (IN_CLOSE_WRITE | IN_CLOSE_NOWRITE) ++#define IN_MOVE (IN_MOVED_FROM | IN_MOVED_TO) ++#define IN_ALL_EVENTS \ ++ (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \ ++ IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | IN_MOVED_TO | \ ++ IN_DELETE | IN_CREATE | IN_DELETE_SELF | IN_MOVE_SELF) ++ ++#define IN_CHILD_EVENTS \ ++ (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \ ++ IN_CLOSE_NOWRITE | IN_MODIFY | IN_OPEN) ++ ++/* ++ * To assure binary compatibility with Linux, these values are fixed at their ++ * Linux equivalents, not their native ones. ++ */ ++#define IN_CLOEXEC 02000000 /* LX_O_CLOEXEC */ ++#define IN_NONBLOCK 04000 /* LX_O_NONBLOCK */ ++ ++struct inotify_event { ++ int32_t wd; /* watch descriptor */ ++ uint32_t mask; /* mask of events */ ++ uint32_t cookie; /* event association cookie, if any */ ++ uint32_t len; /* size of name field */ ++ char name[]; /* optional NUL-terminated name */ ++}; ++ ++/* ++ * These ioctl values are specific to the native implementation; applications ++ * shouldn't be using them directly, and they should therefore be safe to ++ * change without breaking apps. ++ */ ++#define INOTIFYIOC (('i' << 24) | ('n' << 16) | ('y' << 8)) ++#define INOTIFYIOC_ADD_WATCH (INOTIFYIOC | 1) /* add watch */ ++#define INOTIFYIOC_RM_WATCH (INOTIFYIOC | 2) /* remove watch */ ++#define INOTIFYIOC_ADD_CHILD (INOTIFYIOC | 3) /* add child watch */ ++#define INOTIFYIOC_ACTIVATE (INOTIFYIOC | 4) /* activate watch */ ++ ++#ifndef _LP64 ++#ifndef _LITTLE_ENDIAN ++#define INOTIFY_PTR(type, name) uint32_t name##pad; type *name ++#else ++#define INOTIFY_PTR(type, name) type *name; uint32_t name##pad ++#endif ++#else ++#define INOTIFY_PTR(type, name) type *name ++#endif ++ ++typedef struct inotify_addwatch { ++ int inaw_fd; /* open fd for object */ ++ uint32_t inaw_mask; /* desired mask */ ++} inotify_addwatch_t; ++ ++typedef struct inotify_addchild { ++ INOTIFY_PTR(char, inac_name); /* pointer to name */ ++ int inac_fd; /* open fd for parent */ ++} inotify_addchild_t; ++ ++#ifndef _KERNEL ++ ++extern int inotify_init(void); ++extern int inotify_init1(int); ++extern int inotify_add_watch(int, const char *, uint32_t); ++extern int inotify_rm_watch(int, int); ++ ++#else ++ ++#define IN_UNMASKABLE \ ++ (IN_UNMOUNT | IN_Q_OVERFLOW | IN_IGNORED | IN_ISDIR) ++ ++#define IN_MODIFIERS \ ++ (IN_EXCL_UNLINK | IN_ONESHOT) ++ ++#define IN_FLAGS \ ++ (IN_ONLYDIR | IN_DONT_FOLLOW | IN_MASK_ADD) ++ ++#define IN_REMOVAL (1ULL << 32) ++#define INOTIFYMNRN_INOTIFY 0 ++#define INOTIFYMNRN_CLONE 1 ++ ++#endif /* _KERNEL */ ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif /* _SYS_INOTIFY_H */ +diff --git a/usr/src/uts/common/sys/vnode.h b/usr/src/uts/common/sys/vnode.h +index af9516fe52..c1c12a084e 100644 +--- a/usr/src/uts/common/sys/vnode.h ++++ b/usr/src/uts/common/sys/vnode.h +@@ -21,7 +21,7 @@ + + /* + * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved. +- * Copyright (c) 2013, Joyent, Inc. All rights reserved. ++ * Copyright (c) 2014, Joyent, Inc. All rights reserved. + */ + + /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ +@@ -735,7 +735,8 @@ typedef enum vnevent { + VE_LINK = 6, /* Link with vnode's name as source */ + VE_RENAME_DEST_DIR = 7, /* Rename with vnode as target dir */ + VE_MOUNTEDOVER = 8, /* File or Filesystem got mounted over vnode */ +- VE_TRUNCATE = 9 /* Truncate */ ++ VE_TRUNCATE = 9, /* Truncate */ ++ VE_RENAME_SRC_DIR = 10 /* Rename with vnode as source dir */ + } vnevent_t; + + /* +@@ -1290,7 +1291,8 @@ void vnevent_remove(vnode_t *, vnode_t *, char *, caller_context_t *); + void vnevent_rmdir(vnode_t *, vnode_t *, char *, caller_context_t *); + void vnevent_create(vnode_t *, caller_context_t *); + void vnevent_link(vnode_t *, caller_context_t *); +-void vnevent_rename_dest_dir(vnode_t *, caller_context_t *ct); ++void vnevent_rename_dest_dir(vnode_t *, vnode_t *, char *, ++ caller_context_t *ct); + void vnevent_mountedover(vnode_t *, caller_context_t *); + void vnevent_truncate(vnode_t *, caller_context_t *); + int vnevent_support(vnode_t *, caller_context_t *); +diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel +index 0abcdcb8c4..e534120477 100644 +--- a/usr/src/uts/intel/Makefile.intel ++++ b/usr/src/uts/intel/Makefile.intel +@@ -251,6 +251,7 @@ DRV_KMODS += i8042 + DRV_KMODS += i915 + DRV_KMODS += icmp + DRV_KMODS += icmp6 ++DRV_KMODS += inotify + DRV_KMODS += intel_nb5000 + DRV_KMODS += intel_nhm + DRV_KMODS += ip +diff --git a/usr/src/uts/intel/inotify/Makefile b/usr/src/uts/intel/inotify/Makefile +new file mode 100644 +index 0000000000..80e7a80404 +--- /dev/null ++++ b/usr/src/uts/intel/inotify/Makefile +@@ -0,0 +1,70 @@ ++# ++# This file and its contents are supplied under the terms of the ++# Common Development and Distribution License ("CDDL"), version 1.0. ++# You may only use this file in accordance with the terms of version ++# 1.0 of the CDDL. ++# ++# A full copy of the text of the CDDL should have accompanied this ++# source. A copy of the CDDL is also available via the Internet at ++# http://www.illumos.org/license/CDDL. ++# ++ ++# ++# Copyright (c) 2014 Joyent, Inc. All rights reserved. ++# ++ ++# ++# Path to the base of the uts directory tree (usually /usr/src/uts). ++# ++UTSBASE = ../.. ++ ++# ++# Define the module and object file sets. ++# ++MODULE = inotify ++OBJECTS = $(INOTIFY_OBJS:%=$(OBJS_DIR)/%) ++LINTS = $(INOTIFY_OBJS:%.o=$(LINTS_DIR)/%.ln) ++ROOTMODULE = $(USR_DRV_DIR)/$(MODULE) ++CONF_SRCDIR = $(UTSBASE)/common/io ++ ++# ++# Include common rules. ++# ++include $(UTSBASE)/intel/Makefile.intel ++ ++LINTTAGS += -erroff=E_STRUCT_DERIVED_FROM_FLEX_MBR ++CERRWARN += -_gcc=-Wno-parentheses ++LDFLAGS += -dy -Nfs/specfs ++ ++# ++# Define targets ++# ++ALL_TARGET = $(BINARY) $(SRC_CONFILE) ++LINT_TARGET = $(MODULE).lint ++INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) ++ ++# ++# Default build targets. ++# ++.KEEP_STATE: ++ ++def: $(DEF_DEPS) ++ ++all: $(ALL_DEPS) ++ ++clean: $(CLEAN_DEPS) ++ ++clobber: $(CLOBBER_DEPS) ++ ++lint: $(LINT_DEPS) ++ ++modlintlib: $(MODLINTLIB_DEPS) ++ ++clean.lint: $(CLEAN_LINT_DEPS) ++ ++install: $(INSTALL_DEPS) ++ ++# ++# Include common targets. ++# ++include $(UTSBASE)/intel/Makefile.targ +diff --git a/usr/src/uts/sparc/Makefile.sparc b/usr/src/uts/sparc/Makefile.sparc +index 5a1639a692..b989364998 100644 +--- a/usr/src/uts/sparc/Makefile.sparc ++++ b/usr/src/uts/sparc/Makefile.sparc +@@ -21,6 +21,7 @@ + + # Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + # Copyright (c) 2013 Andrew Stormont. All rights reserved. ++# Copyright (c) 2014, Joyent, Inc. All rights reserved. + + + # +@@ -236,6 +237,7 @@ DRV_KMODS += nulldriver + DRV_KMODS += bridge trill + DRV_KMODS += bpf + DRV_KMODS += dca ++DRV_KMODS += inotify + + # + # Hardware Drivers in common space +diff --git a/usr/src/uts/sparc/inotify/Makefile b/usr/src/uts/sparc/inotify/Makefile +new file mode 100644 +index 0000000000..ce2b956955 +--- /dev/null ++++ b/usr/src/uts/sparc/inotify/Makefile +@@ -0,0 +1,70 @@ ++# ++# This file and its contents are supplied under the terms of the ++# Common Development and Distribution License ("CDDL"), version 1.0. ++# You may only use this file in accordance with the terms of version ++# 1.0 of the CDDL. ++# ++# A full copy of the text of the CDDL should have accompanied this ++# source. A copy of the CDDL is also available via the Internet at ++# http://www.illumos.org/license/CDDL. ++# ++ ++# ++# Copyright (c) 2014 Joyent, Inc. All rights reserved. ++# ++ ++# ++# Path to the base of the uts directory tree (usually /usr/src/uts). ++# ++UTSBASE = ../.. ++ ++# ++# Define the module and object file sets. ++# ++MODULE = inotify ++OBJECTS = $(INOTIFY_OBJS:%=$(OBJS_DIR)/%) ++LINTS = $(INOTIFY_OBJS:%.o=$(LINTS_DIR)/%.ln) ++ROOTMODULE = $(USR_DRV_DIR)/$(MODULE) ++CONF_SRCDIR = $(UTSBASE)/common/io ++ ++# ++# Include common rules. ++# ++include $(UTSBASE)/sparc/Makefile.sparc ++ ++LINTTAGS += -erroff=E_STRUCT_DERIVED_FROM_FLEX_MBR ++CERRWARN += -_gcc=-Wno-parentheses ++LDFLAGS += -dy -Nfs/specfs ++ ++# ++# Define targets ++# ++ALL_TARGET = $(BINARY) $(SRC_CONFILE) ++LINT_TARGET = $(MODULE).lint ++INSTALL_TARGET = $(BINARY) $(ROOTMODULE) $(ROOT_CONFFILE) ++ ++# ++# Default build targets. ++# ++.KEEP_STATE: ++ ++def: $(DEF_DEPS) ++ ++all: $(ALL_DEPS) ++ ++clean: $(CLEAN_DEPS) ++ ++clobber: $(CLOBBER_DEPS) ++ ++lint: $(LINT_DEPS) ++ ++modlintlib: $(MODLINTLIB_DEPS) ++ ++clean.lint: $(CLEAN_LINT_DEPS) ++ ++install: $(INSTALL_DEPS) ++ ++# ++# Include common targets. ++# ++include $(UTSBASE)/sparc/Makefile.targ +-- +2.40.1 + -- Gitblit v1.9.3