[ghak90,V8,07/16] audit: add contid support for signalling the audit daemon

Submitted by Richard Guy Briggs on Dec. 31, 2019, 7:48 p.m.

Details

Message ID 7d7933d742fdf4a94c84b791906a450b16f2e81f.1577736799.git.rgb@redhat.com
State New
Series "audit: implement container identifier"
Headers show

Commit Message

Richard Guy Briggs Dec. 31, 2019, 7:48 p.m.
Add audit container identifier support to the action of signalling the
audit daemon.

Since this would need to add an element to the audit_sig_info struct,
a new record type AUDIT_SIGNAL_INFO2 was created with a new
audit_sig_info2 struct.  Corresponding support is required in the
userspace code to reflect the new record request and reply type.
An older userspace won't break since it won't know to request this
record type.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
---
 include/linux/audit.h       |  7 +++++++
 include/uapi/linux/audit.h  |  1 +
 kernel/audit.c              | 35 +++++++++++++++++++++++++++++++++++
 kernel/audit.h              |  1 +
 security/selinux/nlmsgtab.c |  1 +
 5 files changed, 45 insertions(+)

Patch hide | download patch | download mbox

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 2636b0ad0011..6929a02080f7 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -22,6 +22,13 @@  struct audit_sig_info {
 	char		ctx[0];
 };
 
+struct audit_sig_info2 {
+	uid_t		uid;
+	pid_t		pid;
+	u64		cid;
+	char		ctx[0];
+};
+
 struct audit_buffer;
 struct audit_context;
 struct inode;
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 93417a8af9d0..4f87b06f0acd 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -72,6 +72,7 @@ 
 #define AUDIT_SET_FEATURE	1018	/* Turn an audit feature on or off */
 #define AUDIT_GET_FEATURE	1019	/* Get which features are enabled */
 #define AUDIT_CONTAINER_OP	1020	/* Define the container id and info */
+#define AUDIT_SIGNAL_INFO2	1021	/* Get info auditd signal sender */
 
 #define AUDIT_FIRST_USER_MSG	1100	/* Userspace messages mostly uninteresting to kernel */
 #define AUDIT_USER_AVC		1107	/* We filter this differently */
diff --git a/kernel/audit.c b/kernel/audit.c
index 0871c3e5d6df..51159c94041c 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -126,6 +126,14 @@  struct auditd_connection {
 kuid_t		audit_sig_uid = INVALID_UID;
 pid_t		audit_sig_pid = -1;
 u32		audit_sig_sid = 0;
+/* Since the signal information is stored in the record buffer at the
+ * time of the signal, but not retrieved until later, there is a chance
+ * that the last process in the container could terminate before the
+ * signal record is delivered.  In this circumstance, there is a chance
+ * the orchestrator could reuse the audit container identifier, causing
+ * an overlap of audit records that refer to the same audit container
+ * identifier, but a different container instance.  */
+u64		audit_sig_cid = AUDIT_CID_UNSET;
 
 /* Records can be lost in several ways:
    0) [suppressed in audit_alloc]
@@ -1123,6 +1131,7 @@  static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
 	case AUDIT_ADD_RULE:
 	case AUDIT_DEL_RULE:
 	case AUDIT_SIGNAL_INFO:
+	case AUDIT_SIGNAL_INFO2:
 	case AUDIT_TTY_GET:
 	case AUDIT_TTY_SET:
 	case AUDIT_TRIM:
@@ -1286,6 +1295,7 @@  static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	struct audit_buffer	*ab;
 	u16			msg_type = nlh->nlmsg_type;
 	struct audit_sig_info   *sig_data;
+	struct audit_sig_info2  *sig_data2;
 	char			*ctx = NULL;
 	u32			len;
 
@@ -1545,6 +1555,30 @@  static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 				 sig_data, sizeof(*sig_data) + len);
 		kfree(sig_data);
 		break;
+	case AUDIT_SIGNAL_INFO2:
+		len = 0;
+		if (audit_sig_sid) {
+			err = security_secid_to_secctx(audit_sig_sid, &ctx, &len);
+			if (err)
+				return err;
+		}
+		sig_data2 = kmalloc(sizeof(*sig_data2) + len, GFP_KERNEL);
+		if (!sig_data2) {
+			if (audit_sig_sid)
+				security_release_secctx(ctx, len);
+			return -ENOMEM;
+		}
+		sig_data2->uid = from_kuid(&init_user_ns, audit_sig_uid);
+		sig_data2->pid = audit_sig_pid;
+		if (audit_sig_sid) {
+			memcpy(sig_data2->ctx, ctx, len);
+			security_release_secctx(ctx, len);
+		}
+		sig_data2->cid = audit_sig_cid;
+		audit_send_reply(skb, seq, AUDIT_SIGNAL_INFO2, 0, 0,
+				 sig_data2, sizeof(*sig_data2) + len);
+		kfree(sig_data2);
+		break;
 	case AUDIT_TTY_GET: {
 		struct audit_tty_status s;
 		unsigned int t;
@@ -2414,6 +2448,7 @@  int audit_signal_info(int sig, struct task_struct *t)
 		else
 			audit_sig_uid = uid;
 		security_task_getsecid(current, &audit_sig_sid);
+		audit_sig_cid = audit_get_contid(current);
 	}
 
 	return audit_signal_info_syscall(t);
diff --git a/kernel/audit.h b/kernel/audit.h
index 162de8366b32..de358ac61587 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -346,6 +346,7 @@  static inline int audit_signal_info_syscall(struct task_struct *t)
 extern pid_t audit_sig_pid;
 extern kuid_t audit_sig_uid;
 extern u32 audit_sig_sid;
+extern u64 audit_sig_cid;
 
 extern int audit_filter(int msgtype, unsigned int listtype);
 
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index c97fdae8f71b..f006d8b70b65 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -134,6 +134,7 @@  struct nlmsg_perm {
 	{ AUDIT_DEL_RULE,	NETLINK_AUDIT_SOCKET__NLMSG_WRITE    },
 	{ AUDIT_USER,		NETLINK_AUDIT_SOCKET__NLMSG_RELAY    },
 	{ AUDIT_SIGNAL_INFO,	NETLINK_AUDIT_SOCKET__NLMSG_READ     },
+	{ AUDIT_SIGNAL_INFO2,	NETLINK_AUDIT_SOCKET__NLMSG_READ     },
 	{ AUDIT_TRIM,		NETLINK_AUDIT_SOCKET__NLMSG_WRITE    },
 	{ AUDIT_MAKE_EQUIV,	NETLINK_AUDIT_SOCKET__NLMSG_WRITE    },
 	{ AUDIT_TTY_GET,	NETLINK_AUDIT_SOCKET__NLMSG_READ     },

Comments

Paul Moore Jan. 22, 2020, 9:28 p.m.
On Tue, Dec 31, 2019 at 2:50 PM Richard Guy Briggs <rgb@redhat.com> wrote:
>
> Add audit container identifier support to the action of signalling the
> audit daemon.
>
> Since this would need to add an element to the audit_sig_info struct,
> a new record type AUDIT_SIGNAL_INFO2 was created with a new
> audit_sig_info2 struct.  Corresponding support is required in the
> userspace code to reflect the new record request and reply type.
> An older userspace won't break since it won't know to request this
> record type.
>
> Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
> ---
>  include/linux/audit.h       |  7 +++++++
>  include/uapi/linux/audit.h  |  1 +
>  kernel/audit.c              | 35 +++++++++++++++++++++++++++++++++++
>  kernel/audit.h              |  1 +
>  security/selinux/nlmsgtab.c |  1 +
>  5 files changed, 45 insertions(+)

...

> diff --git a/kernel/audit.c b/kernel/audit.c
> index 0871c3e5d6df..51159c94041c 100644
> --- a/kernel/audit.c
> +++ b/kernel/audit.c
> @@ -126,6 +126,14 @@ struct auditd_connection {
>  kuid_t         audit_sig_uid = INVALID_UID;
>  pid_t          audit_sig_pid = -1;
>  u32            audit_sig_sid = 0;
> +/* Since the signal information is stored in the record buffer at the
> + * time of the signal, but not retrieved until later, there is a chance
> + * that the last process in the container could terminate before the
> + * signal record is delivered.  In this circumstance, there is a chance
> + * the orchestrator could reuse the audit container identifier, causing
> + * an overlap of audit records that refer to the same audit container
> + * identifier, but a different container instance.  */
> +u64            audit_sig_cid = AUDIT_CID_UNSET;

I believe we could prevent the case mentioned above by taking an
additional reference to the audit container ID object when the signal
information is collected, dropping it only after the signal
information is collected by userspace or another process signals the
audit daemon.  Yes, it would block that audit container ID from being
reused immediately, but since we are talking about one number out of
2^64 that seems like a reasonable tradeoff.

--
paul moore
www.paul-moore.com
Richard Guy Briggs Jan. 23, 2020, 4:29 p.m.
On 2020-01-22 16:28, Paul Moore wrote:
> On Tue, Dec 31, 2019 at 2:50 PM Richard Guy Briggs <rgb@redhat.com> wrote:
> >
> > Add audit container identifier support to the action of signalling the
> > audit daemon.
> >
> > Since this would need to add an element to the audit_sig_info struct,
> > a new record type AUDIT_SIGNAL_INFO2 was created with a new
> > audit_sig_info2 struct.  Corresponding support is required in the
> > userspace code to reflect the new record request and reply type.
> > An older userspace won't break since it won't know to request this
> > record type.
> >
> > Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
> > ---
> >  include/linux/audit.h       |  7 +++++++
> >  include/uapi/linux/audit.h  |  1 +
> >  kernel/audit.c              | 35 +++++++++++++++++++++++++++++++++++
> >  kernel/audit.h              |  1 +
> >  security/selinux/nlmsgtab.c |  1 +
> >  5 files changed, 45 insertions(+)
> 
> ...
> 
> > diff --git a/kernel/audit.c b/kernel/audit.c
> > index 0871c3e5d6df..51159c94041c 100644
> > --- a/kernel/audit.c
> > +++ b/kernel/audit.c
> > @@ -126,6 +126,14 @@ struct auditd_connection {
> >  kuid_t         audit_sig_uid = INVALID_UID;
> >  pid_t          audit_sig_pid = -1;
> >  u32            audit_sig_sid = 0;
> > +/* Since the signal information is stored in the record buffer at the
> > + * time of the signal, but not retrieved until later, there is a chance
> > + * that the last process in the container could terminate before the
> > + * signal record is delivered.  In this circumstance, there is a chance
> > + * the orchestrator could reuse the audit container identifier, causing
> > + * an overlap of audit records that refer to the same audit container
> > + * identifier, but a different container instance.  */
> > +u64            audit_sig_cid = AUDIT_CID_UNSET;
> 
> I believe we could prevent the case mentioned above by taking an
> additional reference to the audit container ID object when the signal
> information is collected, dropping it only after the signal
> information is collected by userspace or another process signals the
> audit daemon.  Yes, it would block that audit container ID from being
> reused immediately, but since we are talking about one number out of
> 2^64 that seems like a reasonable tradeoff.

I had thought that through and should have been more explicit about that
situation when I documented it.  We could do that, but then the syscall
records would be connected with the call from auditd on shutdown to
request that signal information, rather than the exit of that last
process that was using that container.  This strikes me as misleading.
Is that really what we want?

> paul moore

- RGB

--
Richard Guy Briggs <rgb@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Paul Moore Jan. 23, 2020, 5:09 p.m.
On Thu, Jan 23, 2020 at 11:29 AM Richard Guy Briggs <rgb@redhat.com> wrote:
> On 2020-01-22 16:28, Paul Moore wrote:
> > On Tue, Dec 31, 2019 at 2:50 PM Richard Guy Briggs <rgb@redhat.com> wrote:
> > >
> > > Add audit container identifier support to the action of signalling the
> > > audit daemon.
> > >
> > > Since this would need to add an element to the audit_sig_info struct,
> > > a new record type AUDIT_SIGNAL_INFO2 was created with a new
> > > audit_sig_info2 struct.  Corresponding support is required in the
> > > userspace code to reflect the new record request and reply type.
> > > An older userspace won't break since it won't know to request this
> > > record type.
> > >
> > > Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
> > > ---
> > >  include/linux/audit.h       |  7 +++++++
> > >  include/uapi/linux/audit.h  |  1 +
> > >  kernel/audit.c              | 35 +++++++++++++++++++++++++++++++++++
> > >  kernel/audit.h              |  1 +
> > >  security/selinux/nlmsgtab.c |  1 +
> > >  5 files changed, 45 insertions(+)
> >
> > ...
> >
> > > diff --git a/kernel/audit.c b/kernel/audit.c
> > > index 0871c3e5d6df..51159c94041c 100644
> > > --- a/kernel/audit.c
> > > +++ b/kernel/audit.c
> > > @@ -126,6 +126,14 @@ struct auditd_connection {
> > >  kuid_t         audit_sig_uid = INVALID_UID;
> > >  pid_t          audit_sig_pid = -1;
> > >  u32            audit_sig_sid = 0;
> > > +/* Since the signal information is stored in the record buffer at the
> > > + * time of the signal, but not retrieved until later, there is a chance
> > > + * that the last process in the container could terminate before the
> > > + * signal record is delivered.  In this circumstance, there is a chance
> > > + * the orchestrator could reuse the audit container identifier, causing
> > > + * an overlap of audit records that refer to the same audit container
> > > + * identifier, but a different container instance.  */
> > > +u64            audit_sig_cid = AUDIT_CID_UNSET;
> >
> > I believe we could prevent the case mentioned above by taking an
> > additional reference to the audit container ID object when the signal
> > information is collected, dropping it only after the signal
> > information is collected by userspace or another process signals the
> > audit daemon.  Yes, it would block that audit container ID from being
> > reused immediately, but since we are talking about one number out of
> > 2^64 that seems like a reasonable tradeoff.
>
> I had thought that through and should have been more explicit about that
> situation when I documented it.  We could do that, but then the syscall
> records would be connected with the call from auditd on shutdown to
> request that signal information, rather than the exit of that last
> process that was using that container.  This strikes me as misleading.
> Is that really what we want?

 ???

I think one of us is not understanding the other; maybe it's me, maybe
it's you, maybe it's both of us.

Anyway, here is what I was trying to convey with my original comment
... When we record the audit container ID in audit_signal_info() we
take an extra reference to the audit container ID object so that it
will not disappear (and get reused) until after we respond with an
AUDIT_SIGNAL_INFO2.  In audit_receive_msg() when we do the
AUDIT_SIGNAL_INFO2 processing we drop the extra reference we took in
audit_signal_info().  Unless I'm missing some other change you made,
this *shouldn't* affect the syscall records, all it does is preserve
the audit container ID object in the kernel's ACID store so it doesn't
get reused.

(We do need to do some extra housekeeping in audit_signal_info() to
deal with the case where nobody asks for AUDIT_SIGNAL_INFO2 -
basically if audit_sig_cid is not NULL we should drop a reference
before assigning it a new object pointer, and of course we would need
to set audit_sig_cid to NULL in audit_receive_msg() after sending it
up to userspace and dropping the extra ref.)
Richard Guy Briggs Jan. 23, 2020, 8:04 p.m.
On 2020-01-23 12:09, Paul Moore wrote:
> On Thu, Jan 23, 2020 at 11:29 AM Richard Guy Briggs <rgb@redhat.com> wrote:
> > On 2020-01-22 16:28, Paul Moore wrote:
> > > On Tue, Dec 31, 2019 at 2:50 PM Richard Guy Briggs <rgb@redhat.com> wrote:
> > > >
> > > > Add audit container identifier support to the action of signalling the
> > > > audit daemon.
> > > >
> > > > Since this would need to add an element to the audit_sig_info struct,
> > > > a new record type AUDIT_SIGNAL_INFO2 was created with a new
> > > > audit_sig_info2 struct.  Corresponding support is required in the
> > > > userspace code to reflect the new record request and reply type.
> > > > An older userspace won't break since it won't know to request this
> > > > record type.
> > > >
> > > > Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
> > > > ---
> > > >  include/linux/audit.h       |  7 +++++++
> > > >  include/uapi/linux/audit.h  |  1 +
> > > >  kernel/audit.c              | 35 +++++++++++++++++++++++++++++++++++
> > > >  kernel/audit.h              |  1 +
> > > >  security/selinux/nlmsgtab.c |  1 +
> > > >  5 files changed, 45 insertions(+)
> > >
> > > ...
> > >
> > > > diff --git a/kernel/audit.c b/kernel/audit.c
> > > > index 0871c3e5d6df..51159c94041c 100644
> > > > --- a/kernel/audit.c
> > > > +++ b/kernel/audit.c
> > > > @@ -126,6 +126,14 @@ struct auditd_connection {
> > > >  kuid_t         audit_sig_uid = INVALID_UID;
> > > >  pid_t          audit_sig_pid = -1;
> > > >  u32            audit_sig_sid = 0;
> > > > +/* Since the signal information is stored in the record buffer at the
> > > > + * time of the signal, but not retrieved until later, there is a chance
> > > > + * that the last process in the container could terminate before the
> > > > + * signal record is delivered.  In this circumstance, there is a chance
> > > > + * the orchestrator could reuse the audit container identifier, causing
> > > > + * an overlap of audit records that refer to the same audit container
> > > > + * identifier, but a different container instance.  */
> > > > +u64            audit_sig_cid = AUDIT_CID_UNSET;
> > >
> > > I believe we could prevent the case mentioned above by taking an
> > > additional reference to the audit container ID object when the signal
> > > information is collected, dropping it only after the signal
> > > information is collected by userspace or another process signals the
> > > audit daemon.  Yes, it would block that audit container ID from being
> > > reused immediately, but since we are talking about one number out of
> > > 2^64 that seems like a reasonable tradeoff.
> >
> > I had thought that through and should have been more explicit about that
> > situation when I documented it.  We could do that, but then the syscall
> > records would be connected with the call from auditd on shutdown to
> > request that signal information, rather than the exit of that last
> > process that was using that container.  This strikes me as misleading.
> > Is that really what we want?
> 
>  ???
> 
> I think one of us is not understanding the other; maybe it's me, maybe
> it's you, maybe it's both of us.
> 
> Anyway, here is what I was trying to convey with my original comment
> ... When we record the audit container ID in audit_signal_info() we
> take an extra reference to the audit container ID object so that it
> will not disappear (and get reused) until after we respond with an
> AUDIT_SIGNAL_INFO2.  In audit_receive_msg() when we do the
> AUDIT_SIGNAL_INFO2 processing we drop the extra reference we took in
> audit_signal_info().  Unless I'm missing some other change you made,
> this *shouldn't* affect the syscall records, all it does is preserve
> the audit container ID object in the kernel's ACID store so it doesn't
> get reused.

This is exactly what I had understood.  I hadn't considered the extra
details below in detail due to my original syscall concern, but they
make sense.

The syscall I refer to is the one connected with the drop of the
audit container identifier by the last process that was in that
container in patch 5/16.  The production of this record is contingent on
the last ref in a contobj being dropped.  So if it is due to that ref
being maintained by audit_signal_info() until the AUDIT_SIGNAL_INFO2
record it fetched, then it will appear that the fetch action closed the
container rather than the last process in the container to exit.

Does this make sense?

> (We do need to do some extra housekeeping in audit_signal_info() to
> deal with the case where nobody asks for AUDIT_SIGNAL_INFO2 -
> basically if audit_sig_cid is not NULL we should drop a reference
> before assigning it a new object pointer, and of course we would need
> to set audit_sig_cid to NULL in audit_receive_msg() after sending it
> up to userspace and dropping the extra ref.)
> 
> paul moore

- RGB

--
Richard Guy Briggs <rgb@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Paul Moore Jan. 23, 2020, 9:35 p.m.
On Thu, Jan 23, 2020 at 3:04 PM Richard Guy Briggs <rgb@redhat.com> wrote:
> On 2020-01-23 12:09, Paul Moore wrote:
> > On Thu, Jan 23, 2020 at 11:29 AM Richard Guy Briggs <rgb@redhat.com> wrote:
> > > On 2020-01-22 16:28, Paul Moore wrote:
> > > > On Tue, Dec 31, 2019 at 2:50 PM Richard Guy Briggs <rgb@redhat.com> wrote:
> > > > >
> > > > > Add audit container identifier support to the action of signalling the
> > > > > audit daemon.
> > > > >
> > > > > Since this would need to add an element to the audit_sig_info struct,
> > > > > a new record type AUDIT_SIGNAL_INFO2 was created with a new
> > > > > audit_sig_info2 struct.  Corresponding support is required in the
> > > > > userspace code to reflect the new record request and reply type.
> > > > > An older userspace won't break since it won't know to request this
> > > > > record type.
> > > > >
> > > > > Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
> > > > > ---
> > > > >  include/linux/audit.h       |  7 +++++++
> > > > >  include/uapi/linux/audit.h  |  1 +
> > > > >  kernel/audit.c              | 35 +++++++++++++++++++++++++++++++++++
> > > > >  kernel/audit.h              |  1 +
> > > > >  security/selinux/nlmsgtab.c |  1 +
> > > > >  5 files changed, 45 insertions(+)
> > > >
> > > > ...
> > > >
> > > > > diff --git a/kernel/audit.c b/kernel/audit.c
> > > > > index 0871c3e5d6df..51159c94041c 100644
> > > > > --- a/kernel/audit.c
> > > > > +++ b/kernel/audit.c
> > > > > @@ -126,6 +126,14 @@ struct auditd_connection {
> > > > >  kuid_t         audit_sig_uid = INVALID_UID;
> > > > >  pid_t          audit_sig_pid = -1;
> > > > >  u32            audit_sig_sid = 0;
> > > > > +/* Since the signal information is stored in the record buffer at the
> > > > > + * time of the signal, but not retrieved until later, there is a chance
> > > > > + * that the last process in the container could terminate before the
> > > > > + * signal record is delivered.  In this circumstance, there is a chance
> > > > > + * the orchestrator could reuse the audit container identifier, causing
> > > > > + * an overlap of audit records that refer to the same audit container
> > > > > + * identifier, but a different container instance.  */
> > > > > +u64            audit_sig_cid = AUDIT_CID_UNSET;
> > > >
> > > > I believe we could prevent the case mentioned above by taking an
> > > > additional reference to the audit container ID object when the signal
> > > > information is collected, dropping it only after the signal
> > > > information is collected by userspace or another process signals the
> > > > audit daemon.  Yes, it would block that audit container ID from being
> > > > reused immediately, but since we are talking about one number out of
> > > > 2^64 that seems like a reasonable tradeoff.
> > >
> > > I had thought that through and should have been more explicit about that
> > > situation when I documented it.  We could do that, but then the syscall
> > > records would be connected with the call from auditd on shutdown to
> > > request that signal information, rather than the exit of that last
> > > process that was using that container.  This strikes me as misleading.
> > > Is that really what we want?
> >
> >  ???
> >
> > I think one of us is not understanding the other; maybe it's me, maybe
> > it's you, maybe it's both of us.
> >
> > Anyway, here is what I was trying to convey with my original comment
> > ... When we record the audit container ID in audit_signal_info() we
> > take an extra reference to the audit container ID object so that it
> > will not disappear (and get reused) until after we respond with an
> > AUDIT_SIGNAL_INFO2.  In audit_receive_msg() when we do the
> > AUDIT_SIGNAL_INFO2 processing we drop the extra reference we took in
> > audit_signal_info().  Unless I'm missing some other change you made,
> > this *shouldn't* affect the syscall records, all it does is preserve
> > the audit container ID object in the kernel's ACID store so it doesn't
> > get reused.
>
> This is exactly what I had understood.  I hadn't considered the extra
> details below in detail due to my original syscall concern, but they
> make sense.
>
> The syscall I refer to is the one connected with the drop of the
> audit container identifier by the last process that was in that
> container in patch 5/16.  The production of this record is contingent on
> the last ref in a contobj being dropped.  So if it is due to that ref
> being maintained by audit_signal_info() until the AUDIT_SIGNAL_INFO2
> record it fetched, then it will appear that the fetch action closed the
> container rather than the last process in the container to exit.
>
> Does this make sense?

More so than your original reply, at least to me anyway.

It makes sense that the audit container ID wouldn't be marked as
"dead" since it would still be very much alive and available for use
by the orchestrator, the question is if that is desirable or not.  I
think the answer to this comes down the preserving the correctness of
the audit log.

If the audit container ID reported by AUDIT_SIGNAL_INFO2 has been
reused then I think there is a legitimate concern that the audit log
is not correct, and could be misleading.  If we solve that by grabbing
an extra reference, then there could also be some confusion as
userspace considers a container to be "dead" while the audit container
ID still exists in the kernel, and the kernel generated audit
container ID death record will not be generated until much later (and
possibly be associated with a different event, but that could be
solved by unassociating the container death record).  Of the two
approaches, I think the latter is safer in that it preserves the
correctness of the audit log, even though it could result in a delay
of the container death record.

Neither way is perfect, so if you have any other ideas I'm all ears.

> > (We do need to do some extra housekeeping in audit_signal_info() to
> > deal with the case where nobody asks for AUDIT_SIGNAL_INFO2 -
> > basically if audit_sig_cid is not NULL we should drop a reference
> > before assigning it a new object pointer, and of course we would need
> > to set audit_sig_cid to NULL in audit_receive_msg() after sending it
> > up to userspace and dropping the extra ref.)
Richard Guy Briggs Feb. 4, 2020, 11:14 p.m.
On 2020-01-23 16:35, Paul Moore wrote:
> On Thu, Jan 23, 2020 at 3:04 PM Richard Guy Briggs <rgb@redhat.com> wrote:
> > On 2020-01-23 12:09, Paul Moore wrote:
> > > On Thu, Jan 23, 2020 at 11:29 AM Richard Guy Briggs <rgb@redhat.com> wrote:
> > > > On 2020-01-22 16:28, Paul Moore wrote:
> > > > > On Tue, Dec 31, 2019 at 2:50 PM Richard Guy Briggs <rgb@redhat.com> wrote:
> > > > > >
> > > > > > Add audit container identifier support to the action of signalling the
> > > > > > audit daemon.
> > > > > >
> > > > > > Since this would need to add an element to the audit_sig_info struct,
> > > > > > a new record type AUDIT_SIGNAL_INFO2 was created with a new
> > > > > > audit_sig_info2 struct.  Corresponding support is required in the
> > > > > > userspace code to reflect the new record request and reply type.
> > > > > > An older userspace won't break since it won't know to request this
> > > > > > record type.
> > > > > >
> > > > > > Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
> > > > > > ---
> > > > > >  include/linux/audit.h       |  7 +++++++
> > > > > >  include/uapi/linux/audit.h  |  1 +
> > > > > >  kernel/audit.c              | 35 +++++++++++++++++++++++++++++++++++
> > > > > >  kernel/audit.h              |  1 +
> > > > > >  security/selinux/nlmsgtab.c |  1 +
> > > > > >  5 files changed, 45 insertions(+)
> > > > >
> > > > > ...
> > > > >
> > > > > > diff --git a/kernel/audit.c b/kernel/audit.c
> > > > > > index 0871c3e5d6df..51159c94041c 100644
> > > > > > --- a/kernel/audit.c
> > > > > > +++ b/kernel/audit.c
> > > > > > @@ -126,6 +126,14 @@ struct auditd_connection {
> > > > > >  kuid_t         audit_sig_uid = INVALID_UID;
> > > > > >  pid_t          audit_sig_pid = -1;
> > > > > >  u32            audit_sig_sid = 0;
> > > > > > +/* Since the signal information is stored in the record buffer at the
> > > > > > + * time of the signal, but not retrieved until later, there is a chance
> > > > > > + * that the last process in the container could terminate before the
> > > > > > + * signal record is delivered.  In this circumstance, there is a chance
> > > > > > + * the orchestrator could reuse the audit container identifier, causing
> > > > > > + * an overlap of audit records that refer to the same audit container
> > > > > > + * identifier, but a different container instance.  */
> > > > > > +u64            audit_sig_cid = AUDIT_CID_UNSET;
> > > > >
> > > > > I believe we could prevent the case mentioned above by taking an
> > > > > additional reference to the audit container ID object when the signal
> > > > > information is collected, dropping it only after the signal
> > > > > information is collected by userspace or another process signals the
> > > > > audit daemon.  Yes, it would block that audit container ID from being
> > > > > reused immediately, but since we are talking about one number out of
> > > > > 2^64 that seems like a reasonable tradeoff.
> > > >
> > > > I had thought that through and should have been more explicit about that
> > > > situation when I documented it.  We could do that, but then the syscall
> > > > records would be connected with the call from auditd on shutdown to
> > > > request that signal information, rather than the exit of that last
> > > > process that was using that container.  This strikes me as misleading.
> > > > Is that really what we want?
> > >
> > >  ???
> > >
> > > I think one of us is not understanding the other; maybe it's me, maybe
> > > it's you, maybe it's both of us.
> > >
> > > Anyway, here is what I was trying to convey with my original comment
> > > ... When we record the audit container ID in audit_signal_info() we
> > > take an extra reference to the audit container ID object so that it
> > > will not disappear (and get reused) until after we respond with an
> > > AUDIT_SIGNAL_INFO2.  In audit_receive_msg() when we do the
> > > AUDIT_SIGNAL_INFO2 processing we drop the extra reference we took in
> > > audit_signal_info().  Unless I'm missing some other change you made,
> > > this *shouldn't* affect the syscall records, all it does is preserve
> > > the audit container ID object in the kernel's ACID store so it doesn't
> > > get reused.
> >
> > This is exactly what I had understood.  I hadn't considered the extra
> > details below in detail due to my original syscall concern, but they
> > make sense.
> >
> > The syscall I refer to is the one connected with the drop of the
> > audit container identifier by the last process that was in that
> > container in patch 5/16.  The production of this record is contingent on
> > the last ref in a contobj being dropped.  So if it is due to that ref
> > being maintained by audit_signal_info() until the AUDIT_SIGNAL_INFO2
> > record it fetched, then it will appear that the fetch action closed the
> > container rather than the last process in the container to exit.
> >
> > Does this make sense?
> 
> More so than your original reply, at least to me anyway.
> 
> It makes sense that the audit container ID wouldn't be marked as
> "dead" since it would still be very much alive and available for use
> by the orchestrator, the question is if that is desirable or not.  I
> think the answer to this comes down the preserving the correctness of
> the audit log.
> 
> If the audit container ID reported by AUDIT_SIGNAL_INFO2 has been
> reused then I think there is a legitimate concern that the audit log
> is not correct, and could be misleading.  If we solve that by grabbing
> an extra reference, then there could also be some confusion as
> userspace considers a container to be "dead" while the audit container
> ID still exists in the kernel, and the kernel generated audit
> container ID death record will not be generated until much later (and
> possibly be associated with a different event, but that could be
> solved by unassociating the container death record).

How does syscall association of the death record with AUDIT_SIGNAL_INFO2
possibly get associated with another event?  Or is the syscall
association with the fetch for the AUDIT_SIGNAL_INFO2 the other event?

Another idea might be to bump the refcount in audit_signal_info() but
mark tht contid as dead so it can't be reused if we are concerned that
the dead contid be reused?

There is still the problem later that the reported contid is incomplete
compared to the rest of the contid reporting cycle wrt nesting since
AUDIT_SIGNAL_INFO2 will need to be more complex w/2 variable length
fields to accommodate a nested contid list.

>  Of the two
> approaches, I think the latter is safer in that it preserves the
> correctness of the audit log, even though it could result in a delay
> of the container death record.

I prefer the former since it strongly indicates last task in the
container.  The AUDIT_SIGNAL_INFO2 msg has the pid and other subject
attributes and the contid to strongly link the responsible party.

> Neither way is perfect, so if you have any other ideas I'm all ears.
> 
> > > (We do need to do some extra housekeeping in audit_signal_info() to
> > > deal with the case where nobody asks for AUDIT_SIGNAL_INFO2 -
> > > basically if audit_sig_cid is not NULL we should drop a reference
> > > before assigning it a new object pointer, and of course we would need
> > > to set audit_sig_cid to NULL in audit_receive_msg() after sending it
> > > up to userspace and dropping the extra ref.)
> 
> -- 
> paul moore
> www.paul-moore.com
> 

- RGB

--
Richard Guy Briggs <rgb@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Paul Moore Feb. 5, 2020, 10:50 p.m.
On Tue, Feb 4, 2020 at 6:15 PM Richard Guy Briggs <rgb@redhat.com> wrote:
> On 2020-01-23 16:35, Paul Moore wrote:
> > On Thu, Jan 23, 2020 at 3:04 PM Richard Guy Briggs <rgb@redhat.com> wrote:
> > > On 2020-01-23 12:09, Paul Moore wrote:
> > > > On Thu, Jan 23, 2020 at 11:29 AM Richard Guy Briggs <rgb@redhat.com> wrote:
> > > > > On 2020-01-22 16:28, Paul Moore wrote:
> > > > > > On Tue, Dec 31, 2019 at 2:50 PM Richard Guy Briggs <rgb@redhat.com> wrote:
> > > > > > >
> > > > > > > Add audit container identifier support to the action of signalling the
> > > > > > > audit daemon.
> > > > > > >
> > > > > > > Since this would need to add an element to the audit_sig_info struct,
> > > > > > > a new record type AUDIT_SIGNAL_INFO2 was created with a new
> > > > > > > audit_sig_info2 struct.  Corresponding support is required in the
> > > > > > > userspace code to reflect the new record request and reply type.
> > > > > > > An older userspace won't break since it won't know to request this
> > > > > > > record type.
> > > > > > >
> > > > > > > Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
> > > > > > > ---
> > > > > > >  include/linux/audit.h       |  7 +++++++
> > > > > > >  include/uapi/linux/audit.h  |  1 +
> > > > > > >  kernel/audit.c              | 35 +++++++++++++++++++++++++++++++++++
> > > > > > >  kernel/audit.h              |  1 +
> > > > > > >  security/selinux/nlmsgtab.c |  1 +
> > > > > > >  5 files changed, 45 insertions(+)
> > > > > >
> > > > > > ...
> > > > > >
> > > > > > > diff --git a/kernel/audit.c b/kernel/audit.c
> > > > > > > index 0871c3e5d6df..51159c94041c 100644
> > > > > > > --- a/kernel/audit.c
> > > > > > > +++ b/kernel/audit.c
> > > > > > > @@ -126,6 +126,14 @@ struct auditd_connection {
> > > > > > >  kuid_t         audit_sig_uid = INVALID_UID;
> > > > > > >  pid_t          audit_sig_pid = -1;
> > > > > > >  u32            audit_sig_sid = 0;
> > > > > > > +/* Since the signal information is stored in the record buffer at the
> > > > > > > + * time of the signal, but not retrieved until later, there is a chance
> > > > > > > + * that the last process in the container could terminate before the
> > > > > > > + * signal record is delivered.  In this circumstance, there is a chance
> > > > > > > + * the orchestrator could reuse the audit container identifier, causing
> > > > > > > + * an overlap of audit records that refer to the same audit container
> > > > > > > + * identifier, but a different container instance.  */
> > > > > > > +u64            audit_sig_cid = AUDIT_CID_UNSET;
> > > > > >
> > > > > > I believe we could prevent the case mentioned above by taking an
> > > > > > additional reference to the audit container ID object when the signal
> > > > > > information is collected, dropping it only after the signal
> > > > > > information is collected by userspace or another process signals the
> > > > > > audit daemon.  Yes, it would block that audit container ID from being
> > > > > > reused immediately, but since we are talking about one number out of
> > > > > > 2^64 that seems like a reasonable tradeoff.
> > > > >
> > > > > I had thought that through and should have been more explicit about that
> > > > > situation when I documented it.  We could do that, but then the syscall
> > > > > records would be connected with the call from auditd on shutdown to
> > > > > request that signal information, rather than the exit of that last
> > > > > process that was using that container.  This strikes me as misleading.
> > > > > Is that really what we want?
> > > >
> > > >  ???
> > > >
> > > > I think one of us is not understanding the other; maybe it's me, maybe
> > > > it's you, maybe it's both of us.
> > > >
> > > > Anyway, here is what I was trying to convey with my original comment
> > > > ... When we record the audit container ID in audit_signal_info() we
> > > > take an extra reference to the audit container ID object so that it
> > > > will not disappear (and get reused) until after we respond with an
> > > > AUDIT_SIGNAL_INFO2.  In audit_receive_msg() when we do the
> > > > AUDIT_SIGNAL_INFO2 processing we drop the extra reference we took in
> > > > audit_signal_info().  Unless I'm missing some other change you made,
> > > > this *shouldn't* affect the syscall records, all it does is preserve
> > > > the audit container ID object in the kernel's ACID store so it doesn't
> > > > get reused.
> > >
> > > This is exactly what I had understood.  I hadn't considered the extra
> > > details below in detail due to my original syscall concern, but they
> > > make sense.
> > >
> > > The syscall I refer to is the one connected with the drop of the
> > > audit container identifier by the last process that was in that
> > > container in patch 5/16.  The production of this record is contingent on
> > > the last ref in a contobj being dropped.  So if it is due to that ref
> > > being maintained by audit_signal_info() until the AUDIT_SIGNAL_INFO2
> > > record it fetched, then it will appear that the fetch action closed the
> > > container rather than the last process in the container to exit.
> > >
> > > Does this make sense?
> >
> > More so than your original reply, at least to me anyway.
> >
> > It makes sense that the audit container ID wouldn't be marked as
> > "dead" since it would still be very much alive and available for use
> > by the orchestrator, the question is if that is desirable or not.  I
> > think the answer to this comes down the preserving the correctness of
> > the audit log.
> >
> > If the audit container ID reported by AUDIT_SIGNAL_INFO2 has been
> > reused then I think there is a legitimate concern that the audit log
> > is not correct, and could be misleading.  If we solve that by grabbing
> > an extra reference, then there could also be some confusion as
> > userspace considers a container to be "dead" while the audit container
> > ID still exists in the kernel, and the kernel generated audit
> > container ID death record will not be generated until much later (and
> > possibly be associated with a different event, but that could be
> > solved by unassociating the container death record).
>
> How does syscall association of the death record with AUDIT_SIGNAL_INFO2
> possibly get associated with another event?  Or is the syscall
> association with the fetch for the AUDIT_SIGNAL_INFO2 the other event?

The issue is when does the audit container ID "die".  If it is when
the last task in the container exits, then the death record will be
associated when the task's exit.  If the audit container ID lives on
until the last reference of it in the audit logs, including the
SIGNAL_INFO2 message, the death record will be associated with the
related SIGNAL_INFO2 syscalls, or perhaps unassociated depending on
the details of the syscalls/netlink.

> Another idea might be to bump the refcount in audit_signal_info() but
> mark tht contid as dead so it can't be reused if we are concerned that
> the dead contid be reused?

Ooof.  Yes, maybe, but that would be ugly.

> There is still the problem later that the reported contid is incomplete
> compared to the rest of the contid reporting cycle wrt nesting since
> AUDIT_SIGNAL_INFO2 will need to be more complex w/2 variable length
> fields to accommodate a nested contid list.

Do we really care about the full nested audit container ID list in the
SIGNAL_INFO2 record?

> >  Of the two
> > approaches, I think the latter is safer in that it preserves the
> > correctness of the audit log, even though it could result in a delay
> > of the container death record.
>
> I prefer the former since it strongly indicates last task in the
> container.  The AUDIT_SIGNAL_INFO2 msg has the pid and other subject
> attributes and the contid to strongly link the responsible party.

Steve is the only one who really tracks the security certifications
that are relevant to audit, see what the certification requirements
have to say and we can revisit this.
Steve Grubb Feb. 12, 2020, 10:38 p.m.
On Wednesday, February 5, 2020 5:50:28 PM EST Paul Moore wrote:
> > > > > ... When we record the audit container ID in audit_signal_info() we
> > > > > take an extra reference to the audit container ID object so that it
> > > > > will not disappear (and get reused) until after we respond with an
> > > > > AUDIT_SIGNAL_INFO2.  In audit_receive_msg() when we do the
> > > > > AUDIT_SIGNAL_INFO2 processing we drop the extra reference we took
> > > > > in
> > > > > audit_signal_info().  Unless I'm missing some other change you
> > > > > made,
> > > > > this *shouldn't* affect the syscall records, all it does is
> > > > > preserve
> > > > > the audit container ID object in the kernel's ACID store so it
> > > > > doesn't
> > > > > get reused.
> > > > 
> > > > This is exactly what I had understood.  I hadn't considered the extra
> > > > details below in detail due to my original syscall concern, but they
> > > > make sense.
> > > > 
> > > > The syscall I refer to is the one connected with the drop of the
> > > > audit container identifier by the last process that was in that
> > > > container in patch 5/16.  The production of this record is contingent
> > > > on
> > > > the last ref in a contobj being dropped.  So if it is due to that ref
> > > > being maintained by audit_signal_info() until the AUDIT_SIGNAL_INFO2
> > > > record it fetched, then it will appear that the fetch action closed
> > > > the
> > > > container rather than the last process in the container to exit.
> > > > 
> > > > Does this make sense?
> > > 
> > > More so than your original reply, at least to me anyway.
> > > 
> > > It makes sense that the audit container ID wouldn't be marked as
> > > "dead" since it would still be very much alive and available for use
> > > by the orchestrator, the question is if that is desirable or not.  I
> > > think the answer to this comes down the preserving the correctness of
> > > the audit log.
> > > 
> > > If the audit container ID reported by AUDIT_SIGNAL_INFO2 has been
> > > reused then I think there is a legitimate concern that the audit log
> > > is not correct, and could be misleading.  If we solve that by grabbing
> > > an extra reference, then there could also be some confusion as
> > > userspace considers a container to be "dead" while the audit container
> > > ID still exists in the kernel, and the kernel generated audit
> > > container ID death record will not be generated until much later (and
> > > possibly be associated with a different event, but that could be
> > > solved by unassociating the container death record).
> > 
> > How does syscall association of the death record with AUDIT_SIGNAL_INFO2
> > possibly get associated with another event?  Or is the syscall
> > association with the fetch for the AUDIT_SIGNAL_INFO2 the other event?
> 
> The issue is when does the audit container ID "die".  If it is when
> the last task in the container exits, then the death record will be
> associated when the task's exit.  If the audit container ID lives on
> until the last reference of it in the audit logs, including the
> SIGNAL_INFO2 message, the death record will be associated with the
> related SIGNAL_INFO2 syscalls, or perhaps unassociated depending on
> the details of the syscalls/netlink.
> 
> > Another idea might be to bump the refcount in audit_signal_info() but
> > mark tht contid as dead so it can't be reused if we are concerned that
> > the dead contid be reused?
> 
> Ooof.  Yes, maybe, but that would be ugly.
> 
> > There is still the problem later that the reported contid is incomplete
> > compared to the rest of the contid reporting cycle wrt nesting since
> > AUDIT_SIGNAL_INFO2 will need to be more complex w/2 variable length
> > fields to accommodate a nested contid list.
> 
> Do we really care about the full nested audit container ID list in the
> SIGNAL_INFO2 record?
> 
> > > Of the two
> > > approaches, I think the latter is safer in that it preserves the
> > > correctness of the audit log, even though it could result in a delay
> > > of the container death record.
> > 
> > I prefer the former since it strongly indicates last task in the
> > container.  The AUDIT_SIGNAL_INFO2 msg has the pid and other subject
> > attributes and the contid to strongly link the responsible party.
> 
> Steve is the only one who really tracks the security certifications
> that are relevant to audit, see what the certification requirements
> have to say and we can revisit this.

Sever Virtualization Protection Profile is the closest applicable standard

https://www.niap-ccevs.org/Profile/Info.cfm?PPID=408&id=408

It is silent on audit requirements for the lifecycle of a VM. I assume that 
all that is needed is what the orchestrator says its doing at the high level. 
So, if an orchestrator wants to shutdown a container, the orchestrator must 
log that intent and its results. In a similar fashion, systemd logs that it's 
killing a service and we don't actually hook the exit syscall of the service 
to record that.

Now, if a container was being used as a VPS, and it had a fully functioning 
userspace, it's own services, and its very own audit daemon, then in this 
case it would care who sent a signal to its auditd. The tenant of that 
container may have to comply with PCI-DSS or something else. It would log the 
audit service is being terminated and systemd would record that its tearing 
down the environment. The OS doesn't need to do anything.

-Steve
Paul Moore Feb. 13, 2020, 12:09 a.m.
On Wed, Feb 12, 2020 at 5:39 PM Steve Grubb <sgrubb@redhat.com> wrote:
> On Wednesday, February 5, 2020 5:50:28 PM EST Paul Moore wrote:
> > > > > > ... When we record the audit container ID in audit_signal_info() we
> > > > > > take an extra reference to the audit container ID object so that it
> > > > > > will not disappear (and get reused) until after we respond with an
> > > > > > AUDIT_SIGNAL_INFO2.  In audit_receive_msg() when we do the
> > > > > > AUDIT_SIGNAL_INFO2 processing we drop the extra reference we took
> > > > > > in
> > > > > > audit_signal_info().  Unless I'm missing some other change you
> > > > > > made,
> > > > > > this *shouldn't* affect the syscall records, all it does is
> > > > > > preserve
> > > > > > the audit container ID object in the kernel's ACID store so it
> > > > > > doesn't
> > > > > > get reused.
> > > > >
> > > > > This is exactly what I had understood.  I hadn't considered the extra
> > > > > details below in detail due to my original syscall concern, but they
> > > > > make sense.
> > > > >
> > > > > The syscall I refer to is the one connected with the drop of the
> > > > > audit container identifier by the last process that was in that
> > > > > container in patch 5/16.  The production of this record is contingent
> > > > > on
> > > > > the last ref in a contobj being dropped.  So if it is due to that ref
> > > > > being maintained by audit_signal_info() until the AUDIT_SIGNAL_INFO2
> > > > > record it fetched, then it will appear that the fetch action closed
> > > > > the
> > > > > container rather than the last process in the container to exit.
> > > > >
> > > > > Does this make sense?
> > > >
> > > > More so than your original reply, at least to me anyway.
> > > >
> > > > It makes sense that the audit container ID wouldn't be marked as
> > > > "dead" since it would still be very much alive and available for use
> > > > by the orchestrator, the question is if that is desirable or not.  I
> > > > think the answer to this comes down the preserving the correctness of
> > > > the audit log.
> > > >
> > > > If the audit container ID reported by AUDIT_SIGNAL_INFO2 has been
> > > > reused then I think there is a legitimate concern that the audit log
> > > > is not correct, and could be misleading.  If we solve that by grabbing
> > > > an extra reference, then there could also be some confusion as
> > > > userspace considers a container to be "dead" while the audit container
> > > > ID still exists in the kernel, and the kernel generated audit
> > > > container ID death record will not be generated until much later (and
> > > > possibly be associated with a different event, but that could be
> > > > solved by unassociating the container death record).
> > >
> > > How does syscall association of the death record with AUDIT_SIGNAL_INFO2
> > > possibly get associated with another event?  Or is the syscall
> > > association with the fetch for the AUDIT_SIGNAL_INFO2 the other event?
> >
> > The issue is when does the audit container ID "die".  If it is when
> > the last task in the container exits, then the death record will be
> > associated when the task's exit.  If the audit container ID lives on
> > until the last reference of it in the audit logs, including the
> > SIGNAL_INFO2 message, the death record will be associated with the
> > related SIGNAL_INFO2 syscalls, or perhaps unassociated depending on
> > the details of the syscalls/netlink.
> >
> > > Another idea might be to bump the refcount in audit_signal_info() but
> > > mark tht contid as dead so it can't be reused if we are concerned that
> > > the dead contid be reused?
> >
> > Ooof.  Yes, maybe, but that would be ugly.
> >
> > > There is still the problem later that the reported contid is incomplete
> > > compared to the rest of the contid reporting cycle wrt nesting since
> > > AUDIT_SIGNAL_INFO2 will need to be more complex w/2 variable length
> > > fields to accommodate a nested contid list.
> >
> > Do we really care about the full nested audit container ID list in the
> > SIGNAL_INFO2 record?
> >
> > > > Of the two
> > > > approaches, I think the latter is safer in that it preserves the
> > > > correctness of the audit log, even though it could result in a delay
> > > > of the container death record.
> > >
> > > I prefer the former since it strongly indicates last task in the
> > > container.  The AUDIT_SIGNAL_INFO2 msg has the pid and other subject
> > > attributes and the contid to strongly link the responsible party.
> >
> > Steve is the only one who really tracks the security certifications
> > that are relevant to audit, see what the certification requirements
> > have to say and we can revisit this.
>
> Sever Virtualization Protection Profile is the closest applicable standard
>
> https://www.niap-ccevs.org/Profile/Info.cfm?PPID=408&id=408
>
> It is silent on audit requirements for the lifecycle of a VM. I assume that
> all that is needed is what the orchestrator says its doing at the high level.
> So, if an orchestrator wants to shutdown a container, the orchestrator must
> log that intent and its results. In a similar fashion, systemd logs that it's
> killing a service and we don't actually hook the exit syscall of the service
> to record that.
>
> Now, if a container was being used as a VPS, and it had a fully functioning
> userspace, it's own services, and its very own audit daemon, then in this
> case it would care who sent a signal to its auditd. The tenant of that
> container may have to comply with PCI-DSS or something else. It would log the
> audit service is being terminated and systemd would record that its tearing
> down the environment. The OS doesn't need to do anything.

This latter case is the case of interest here, since the host auditd
should only be killed from a process on the host itself, not a process
running in a container.  If we work under the assumption (and this may
be a break in our approach to not defining "container") that an auditd
instance is only ever signaled by a process with the same audit
container ID (ACID), is this really even an issue?  Right now it isn't
as even with this patchset we will still really only support one
auditd instance, presumably on the host, so this isn't a significant
concern.  Moving forward, once we add support for multiple auditd
instances we will likely need to move the signal info into
(potentially) s per-ACID struct, a struct whose lifetime would match
that of the associated container by definition; as the auditd
container died, the struct would die, the refcounts dropped, and any
ACID held only the signal info refcount would be dropped/killed.

However, making this assumption would mean that we are expecting a
"container" to provide some level of isolation such that processes
with a different audit container ID do not signal each other.  From a
practical perspective I think that fits with the most (all?)
definitions of "container", but I can't say that for certain.  In
those cases where the assumption is not correct and processes can
signal each other across audit container ID boundaries, perhaps it is
enough to explain that an audit container ID may not fully disappear
until it has been fetched with a SIGNAL_INFO2 message.
Paul Moore Feb. 13, 2020, 9:44 p.m.
This is a bit of a thread-hijack, and for that I apologize, but
another thought crossed my mind while thinking about this issue
further ... Once we support multiple auditd instances, including the
necessary record routing and duplication/multiple-sends (the host
always sees *everything*), we will likely need to find a way to "trim"
the audit container ID (ACID) lists we send in the records.  The
auditd instance running on the host/initns will always see everything,
so it will want the full container ACID list; however an auditd
instance running inside a container really should only see the ACIDs
of any child containers.

For example, imagine a system where the host has containers 1 and 2,
each running an auditd instance.  Inside container 1 there are
containers A and B.  Inside container 2 there are containers Y and Z.
If an audit event is generated in container Z, I would expect the
host's auditd to see a ACID list of "1,Z" but container 1's auditd
should only see an ACID list of "Z".  The auditd running in container
2 should not see the record at all (that will be relatively
straightforward).  Does that make sense?  Do we have the record
formats properly designed to handle this without too much problem (I'm
not entirely sure we do)?