View Issue Details

IDProjectCategoryView StatusLast Update
0007538CentOS-6kernelpublic2015-10-08 04:29
Reportersoos Assigned To 
PriorityimmediateSeveritycrashReproducibilityrandom
Status resolvedResolutionfixed 
PlatformDELL PowerEdge R610OSCentOSOS Version6.5
Product Version6.5 
Summary0007538: kernel crash
DescriptionHi, cgroup_monitor Not tainted 2.6.32-431.11.2.el6.x86_64, cgroup_monitor process is a cgroup event listener which works fine under centos 6.4 2.6.32-358.el6.x86_64, is this a kernel bug?

crash vmcore-dmesg.txt

<4>general protection fault: 0000 [#1] SMP
<4>last sysfs file: /sys/devices/system/cpu/online
<4>CPU 16
<4>Modules linked in: ipv6 iptable_filter ip_tables ext3 jbd power_meter microcode iTCO_wdt iTCO_vendor_support dcdbas serio_raw lpc_ich mfd_core sg ses enclosure i7core_edac edac_core bnx2 ext4 jbd2 mbcache sd_mod crc_t10dif megaraid_sas dm_mirror dm_region_hash dm_log dm_mod [last unloaded: scsi_wait_scan]
<4>
<4>Pid: 10106, comm: cgroup_monitor Not tainted 2.6.32-431.11.2.el6.x86_64 #1 Dell Inc. PowerEdge R610/08GXHX
<4>RIP: 0010:[<ffffffff81294610>] [<ffffffff81294610>] list_del+0x10/0xa0
<4>RSP: 0018:ffff880314615dc8 EFLAGS: 00010092
<4>RAX: dead000000200200 RBX: ffff880310ee5c98 RCX: 0000000000000010
<4>RDX: 0000000000000002 RSI: 0000000000000003 RDI: ffff880310ee5c98
<4>RBP: ffff880314615dd8 R08: 0000000000000010 R09: 0000000000000000
<4>R10: 00007fffd55e35e0 R11: 0000000000000246 R12: ffff880310ee5c80
<4>R13: ffff880303bfc718 R14: 0000000000000010 R15: 0000000000000000
<4>FS: 00007fbe7ec11700(0000) GS:ffff8801b5900000(0000) knlGS:0000000000000000
<4>CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4>CR2: 00007fbe7ec25000 CR3: 00000003038a6000 CR4: 00000000000007e0
<4>DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
<4>DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
<4>Process cgroup_monitor (pid: 10106, threadinfo ffff880314614000, task ffff88031e604aa0)
<4>Stack:
<4> ffff880315df5900 ffff880310ee5cc0 ffff880314615e08 ffffffff810c9fd2
<4><d> ffff88030851e700 ffff8802fc98feb8 0000000000000000 ffff8802fc98fed0
<4><d> ffff880314615e58 ffffffff81054839 ffff880314615f58 0000000300000001
<4>Call Trace:
<4> [<ffffffff810c9fd2>] cgroup_event_wake+0x42/0x70
<4> [<ffffffff81054839>] __wake_up_common+0x59/0x90
<4> [<ffffffff81058d48>] __wake_up+0x48/0x70
<4> [<ffffffff811d4dfd>] eventfd_release+0x2d/0x40
<4> [<ffffffff8118a7f5>] __fput+0xf5/0x210
<4> [<ffffffff8118a935>] fput+0x25/0x30
<4> [<ffffffff81185c5d>] filp_close+0x5d/0x90
<4> [<ffffffff81185d35>] sys_close+0xa5/0x100
<4> [<ffffffff8100b072>] system_call_fastpath+0x16/0x1b
<4>Code: 89 95 fc fe ff ff e9 ab fd ff ff 4c 8b ad e8 fe ff ff e9 db fd ff ff 90 90 90 90 55 48 89 e5 53 48 89 fb 48 83 ec 08 48 8b 47 08 <4c> 8b 00 4c 39 c7 75 39 48 8b 03 4c 8b 40 08 4c 39 c3 75 4c 48
<1>RIP [<ffffffff81294610>] list_del+0x10/0xa0
<4> RSP <ffff880314615dc8>



TagsNo tags attached.

Activities

soos

soos

2014-08-28 09:48

reporter  

cgroup_monitor.c (4,957 bytes)   
/*
 * fork from cgroup_event_listener.c - Simple listener of cgroup events
 *
 * Copyright (C) Kirill A. Shutemov <kirill@shutemov.name>
 */

#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <libgen.h>
#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <signal.h>
#include <syslog.h>
#include <stdlib.h>

#include <sys/eventfd.h>
#include <sys/types.h>

#define USAGE_STR "Usage: cgroup_monitor <path-to-control-file> <args>\n"

int main(int argc, char **argv)
{
	int efd = -1;
	int cfd = -1;
	int event_control = -1;
	char event_control_path[PATH_MAX];
	char line[LINE_MAX];
	int ret;
        char tasks_path[PATH_MAX];
        FILE *fp;
        char *task = NULL;
        size_t len = 0;
        ssize_t r;
        pid_t pid;
        int res;
        char *dirc, *basec, *bname, *dname;

	if (argc != 3) {
		fputs(USAGE_STR, stderr);
		return 1;
	}

        dirc = strdup(argv[1]);
        basec = strdup(argv[1]);
        dname = dirname(dirc);
        bname = basename(dirc);

        const char *fpmbin = "/usr/local/sbin/php-fpm";
        char confdir[100], conf[100], pidfile[100];
        snprintf(confdir, sizeof(confdir), "/usr/local/etc/fpm.d/%s", bname);
        snprintf(conf, sizeof(conf), "%s/php-fpm.conf", confdir);
        snprintf(pidfile, sizeof(pidfile), "%s/var/run/php-fpm.pid", confdir);

	cfd = open(argv[1], O_RDONLY);
	if (cfd == -1) {
		fprintf(stderr, "Cannot open %s: %s\n", argv[1],
				strerror(errno));
		goto out;
	}

	ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control",
			dname);
	if (ret >= PATH_MAX) {
		fputs("Path to cgroup.event_control is too long\n", stderr);
		goto out;
	}

	event_control = open(event_control_path, O_WRONLY);
	if (event_control == -1) {
		fprintf(stderr, "Cannot open %s: %s\n", event_control_path,
				strerror(errno));
		goto out;
	}

	efd = eventfd(0, 0);
	if (efd == -1) {
		perror("eventfd() failed");
		goto out;
	}

	ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]);
	if (ret >= LINE_MAX) {
		fputs("Arguments string is too long\n", stderr);
		goto out;
	}

	ret = write(event_control, line, strlen(line) + 1);
	if (ret == -1) {
		perror("Cannot write to cgroup.event_control");
		goto out;
	}

	snprintf(tasks_path, PATH_MAX, "%s/tasks", dname);
        fp = fopen(tasks_path, "r");
        openlog("cgroup_monitor", LOG_PID, LOG_USER);

	while (1) {
		uint64_t result;

		ret = read(efd, &result, sizeof(result));
		if (ret == -1) {
			if (errno == EINTR)
				continue;
			perror("Cannot read from eventfd");
			break;
		}
		assert(ret == sizeof(result));

		ret = access(event_control_path, W_OK);
		if ((ret == -1) && (errno == ENOENT)) {
				puts("The cgroup seems to have removed.");
				ret = 0;
				break;
		}

		if (ret == -1) {
			perror("cgroup.event_control "
					"is not accessible any more");
			break;
		}

		/*printf("%s %s: crossed\n", argv[1], argv[2]);*/
		syslog(LOG_INFO, "%s %s: under oom\n", argv[1], argv[2]);

                fp = fopen(tasks_path, "r");
                if (fp == NULL) {
                    syslog(LOG_INFO, "%s tasks file not found\n", bname);
                    sleep(10);
                    continue;
                }
                fseek(fp, 0, SEEK_SET);

                while ((r = getline(&task, &len, fp)) != -1) {
                    task[strlen(task) - 1] = '\0';
                    pid = atoi(task);
                    /*printf("kill %d %d\n", pid, res);*/
                    if ((res = kill(pid, 9)) == 0) {
                        syslog(LOG_INFO, "%s kill pid:%d OK\n", bname, pid);
                    } else {
                        syslog(LOG_INFO, "%s kill pid:%d FAIL\n", bname, pid);
                    }
                }
                fclose(fp);

                sleep(30);

                char cmd[300];
                snprintf(cmd, sizeof(cmd), "%s -p %s -y %s", fpmbin, confdir, conf);
                if ((res = system(cmd)) == 0) {
                    syslog(LOG_INFO, "%s start OK\n", bname);
                } else {
                    syslog(LOG_INFO, "%s start FAIL\n", bname);
                }
                fp = fopen(pidfile, "r");
                if (fp == NULL) {
                    syslog(LOG_INFO, "%s fpm pid file not found\n", bname);
                    sleep(10);
                    continue;
                }
                if ((r = getline(&task, &len, fp)) != -1) {
                    fclose(fp);
                    fp = fopen(tasks_path, "w");
                    fwrite(task, strlen(task), 1, fp);
                    fclose(fp);
                    syslog(LOG_INFO, "%s add task:%s OK\n", bname, task);
                } else {
                    syslog(LOG_INFO, "%s add task:%s FAIL\n", bname, task);
                }
	}

        closelog();

out:
	if (efd >= 0)
		close(efd);
	if (event_control >= 0)
		close(event_control);
	if (cfd >= 0)
		close(cfd);

	return (ret != 0);
}
cgroup_monitor.c (4,957 bytes)   
james-p

james-p

2014-11-25 13:05

reporter   ~0021801

We have a similar problem - which we think can be fixed using a patch based on https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/kernel/cgroup.c?id=810cbee4fad570ff167132d4ecf247d99c48f71d

I've created Redhat BZ 1167772 - but it is a kernel bug it is marked a private by default
james-p

james-p

2014-11-25 13:05

reporter  

kernel-2.6.32-431-cgroup.patch (2,537 bytes)   
--- ./kernel/cgroup.c.dist	2014-07-27 12:22:04.000000000 -0700
+++ ./kernel/cgroup.c	2014-11-25 03:04:12.870416711 -0800
@@ -193,6 +193,9 @@ struct cgroup_event {
 	 * Each of these stored in a list by the cgroup.
 	 */
 	struct list_head list;
+	/* Need to notify userspace when this event is removed?
+	 */
+	bool signal_on_remove;
 	/*
 	 * All fields below needed to unregister event when
 	 * userspace closes eventfd.
@@ -802,11 +805,13 @@ static int cgroup_call_pre_destroy(struc
 
 	/*
 	 * Unregister events and notify userspace.
+	 * Notify userspace about cgroup removing only after rmdir of cgroup
+	 * directory to avoid race between userspace and kernelspace.
 	 */
 	spin_lock(&cgrp->event_list_lock);
 	list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) {
-		list_del(&event->list);
-		eventfd_signal(event->eventfd, 1);
+		event->signal_on_remove = true;
+		list_del_init(&event->list);
 		schedule_work(&event->remove);
 	}
 	spin_unlock(&cgrp->event_list_lock);
@@ -3310,11 +3315,19 @@ static void cgroup_event_remove(struct w
 			remove);
 	struct cgroup *cgrp = event->cgrp;
 
+	remove_wait_queue(event->wqh, &event->wait);
+
 	/* TODO: check return code */
 	event->cft->unregister_event(cgrp, event->cft, event->eventfd);
 
+	/*
+	 * If this event is to be removed due to cgroup removal,
+	 * we notify userspace.
+	 */
+	if (event->signal_on_remove)
+		eventfd_signal(event->eventfd, 1);
+
 	eventfd_ctx_put(event->eventfd);
-	remove_wait_queue(event->wqh, &event->wait);
 	kfree(event);
 }
 
@@ -3332,14 +3345,25 @@ static int cgroup_event_wake(wait_queue_
 	unsigned long flags = (unsigned long)key;
 
 	if (flags & POLLHUP) {
-		spin_lock(&cgrp->event_list_lock);
-		list_del(&event->list);
-		spin_unlock(&cgrp->event_list_lock);
 		/*
-		 * We are in atomic context, but cgroup_event_remove() may
-		 * sleep, so we have to call it in workqueue.
+		 * If the event has been detached at cgroup removal, we
+		 * can simply return knowing the other side will cleanup
+		 * for us.
+		 *
+		 * We can't race against event freeing since the other
+		 * side will require wqh->lock via remove_wait_queue(),
+		 * which we hold.
 		 */
-		schedule_work(&event->remove);
+		spin_lock(&cgrp->event_list_lock);
+		if (!list_empty(&event->list)) {
+			list_del_init(&event->list);
+			/*
+			 * We are in atomic context, but cgroup_event_remove()
+			 * may sleep, so we have to call it in workqueue.
+			 */
+			schedule_work(&event->remove);
+		}
+		spin_unlock(&cgrp->event_list_lock);
 	}
 
 	return 0;
kernel-2.6.32-431-cgroup.patch (2,537 bytes)   
toracat

toracat

2014-11-25 13:23

manager   ~0021804

Last edited: 2014-11-25 13:24

@james-p

Please update the status of the upstream BZ here as it progresses. In the meantime, I will try and see if the patch can be added to the centosplus kernel.

toracat

toracat

2014-12-16 17:22

manager   ~0021971

The patch in commit 810cbee4fad570ff167132d4ecf247d99c48f71d does not cleanly apply in the current RHEL/CentOS kernel source code. We have to wait for RH to fix the problem at this point.
toracat

toracat

2014-12-16 18:30

manager   ~0021974

Sorry, I was looking in a wrong place. The patch submitted is looking good. Will try to add it to the next update.
toracat

toracat

2014-12-17 22:04

manager   ~0021987

The centosplus kernel 2.6.32-504.3.3.el6 has been released. It now has the patch reported here.
james-p

james-p

2015-06-18 21:58

reporter   ~0023438

According to BZ 1167772, this problem will be fixed in kernel-2.6.32-564.el6
toracat

toracat

2015-06-19 07:21

manager   ~0023442

Thanks for the update. So, that'll be in CentOS 6.7, I suppose.
toracat

toracat

2015-10-08 04:29

manager   ~0024553

Fixed in the 6.7 kernel.

Issue History

Date Modified Username Field Change
2014-08-28 04:59 soos New Issue
2014-08-28 09:48 soos File Added: cgroup_monitor.c
2014-11-25 13:05 james-p Note Added: 0021801
2014-11-25 13:05 james-p File Added: kernel-2.6.32-431-cgroup.patch
2014-11-25 13:23 toracat Note Added: 0021804
2014-11-25 13:24 toracat Note Edited: 0021804
2014-12-16 17:22 toracat Note Added: 0021971
2014-12-16 18:30 toracat Note Added: 0021974
2014-12-16 18:30 toracat Status new => assigned
2014-12-17 22:04 toracat Note Added: 0021987
2015-06-18 21:58 james-p Note Added: 0023438
2015-06-19 07:21 toracat Note Added: 0023442
2015-10-08 04:29 toracat Note Added: 0024553
2015-10-08 04:29 toracat Status assigned => resolved
2015-10-08 04:29 toracat Resolution open => fixed