Home
Reading
Searching
Subscribe
Sponsors
Statistics
Posting
Contact
Spam
Lists
Links
About
Hosting
Filtering
Features Download
Marketing
Archives
FAQ
Blog
 
Gmane
From: Kiran <kiran <at> linux.vnet.ibm.com>
Subject: [PATCH] Scheduler Tapset based on kernel tracepoints
Newsgroups: gmane.linux.systemtap
Date: Thursday 17th September 2009 09:10:28 UTC (over 8 years ago)
Hi,

This patch adds kernel tracepoints based probes to the scheduler tapset
along with the testcase, scheduler-test-tracepoints.stp and an example
script, sched_switch.stp.

Signed-off-by: Kiran Prakash 

diff -Naur systemtap-0.9.9-orig/tapset/scheduler.stp
systemtap-0.9.9/tapset/scheduler.stp
--- systemtap-0.9.9-orig/tapset/scheduler.stp	2009-09-17 02:35:18.000000000
-0400
+++ systemtap-0.9.9/tapset/scheduler.stp	2009-09-17 02:32:49.000000000
-0400
@@ -33,7 +33,7 @@
  *  idle - boolean indicating whether current is the idle process
  */
 probe scheduler.cpu_off
-    = kernel.function("context_switch")
+    =  kernel.trace("sched_switch")
 {
     task_prev = $prev
     task_next = $next
@@ -124,6 +124,7 @@
 %( arch != "x86_64" && arch != "ia64" %?
 	kernel.function("__switch_to")
 %:
+	kernel.trace("sched_switch") ?,
 	kernel.function("context_switch")
 %)
 {
@@ -147,3 +148,167 @@
         prevtsk_state = $prev_p->state
 %) %)
 }
+
+/**
+ * probe scheduler.kthread_stop - Fires when a thread created by
kthread_create is stopped.
+ * @thread_pid: pid of the thread being stopped.
+ * @thread_priority: priority of the thread.
+ */
+probe scheduler.kthread_stop
+ = kernel.trace("sched_kthread_stop")
+{
+        thread_pid = $t->pid
+        thread_priority = $t->prio
+}
+
+
+/**
+ * probe scheduler.kthread_stop.return - Fires once the kthread is stopped
and gets the return value
+ * @return_value: return value after stopping the thread.
+ */
+probe scheduler.kthread_stop.return
+ = kernel.trace("sched_kthread_stop_ret")
+{
+        return_value = $ret
+}
+
+/**
+ * probe scheduler.wait_task - Fires when waiting on a task to unschedule.

+ *                             It waits till the task becomes inactive.
+ * @task_pid: pid of the task the scheduler is waiting on.
+ * @task_priority: priority of the task
+ */
+probe scheduler.wait_task
+ = kernel.trace("sched_wait_task") ?,
+   kernel.function("wait_task_inactive")
+{
+        task_pid = $p->pid
+        task_priority = $p->prio
+}
+
+/**
+ * probe scheduler.wakeup - Fires when a task is woken up 
+ * @task_pid: pid of the task being woken up
+ * @task_priority: priority of the task being woken up
+ * @success: returns 1 if the wakeup is successful
+ */
+probe scheduler.wakeup
+ = kernel.trace("sched_wakeup")
+{
+        task_pid = $p->pid
+        task_priority = $p->prio
+        success = $success
+
+}
+
+/**
+ * probe scheduler.wakeup_new - Fires when a newly created task is woken
up for the first time
+ * @task_pid: pid of the new task woken up
+ * @task_priority: priority of the new task
+ * @success: returns 1 if the wake-up is successful
+ */
+probe scheduler.wakeup_new
+ = kernel.trace("sched_wakeup_new")
+{
+        task_pid = $p->pid
+        task_priority = $p->prio
+        success = $success
+}
+
+/**
+ * probe scheduler.sched_switch - Traces the context switches performed by
the scheduler
+ * @prev_pid: pid of the process currently running on scheduler.
+ * @prev_priority: priority of the process currently running on scheduler
+ * @next_pid: pid of the process running next on the scheduler.
+ * @next_priority: priority of the process running next on the scheduler.
+ */
+probe scheduler.sched_switch
+ = kernel.trace("sched_switch") 
+{
+        prev_pid = $prev->pid
+        prev_priority = $prev->prio
+	prev_state = $prev->state
+	prev_tid = task_tid($prev)
+        prev_task_name = task_execname($prev)
+	next_pid = $next->pid
+        next_priority = $next->prio
+	next_state = $next->state
+	next_tid = task_tid($next)
+	next_task_name = task_execname($next)
+}
+
+/**
+ * probe scheduler.migrate_task - Traces the migration of the tasks across
cpus by the scheduler.
+ * @pid: pid of the task being migrated.
+ * @priority: priority of the task being migrated.
+ * @original_cpu: the original cpu
+ * @destination_cpu: the destination cpu
+ */
+probe scheduler.migrate_task
+ = kernel.trace("sched_migrate_task")
+{
+        pid = $p->pid
+        priority = $p->prio
+        original_cpu = task_cpu($p)
+        destination_cpu = $dest_cpu
+}
+
+/**
+ * probe scheduler.process_free - Traces the process of freeing up of a
process
+ * @pid: PID of the process getting freed
+ * @priority: priority of the process getting freed
+ */
+probe scheduler.process_free
+ = kernel.trace("sched_process_free")
+{
+        pid = $p->pid
+        priority = $p->prio
+}
+
+/**
+ * probe scheduler.process_exit - Fires when a process exits
+ * @pid: pid of the process exiting
+ * @priority: priority of the process exiting
+ */
+probe scheduler.process_exit
+ = kernel.trace("sched_process_exit")
+{
+        pid = $p->pid
+        priority = $p->prio
+}
+
+/**
+ * probe scheduler.process_wait - Fires when scheduler waits on a process
+ * @pid: PID of the process scheduler is waiting on
+ */
+probe scheduler.process_wait
+ = kernel.trace("sched_process_wait")
+{
+        pid = $pid
+}
+
+
+/**
+ * probe scheduler.process_fork - Probes the tracepoint for forking a
process
+ * @parent_pid: PID of the parent process
+ * @child_pid: PID of the child process
+ */
+probe scheduler.process_fork
+ = kernel.trace("sched_process_fork")
+{
+        parent_pid = $parent->pid
+        child_pid = $child->pid
+}
+
+/**
+ * probe scheduler.signal_send - Probes the tracepoint for sending a
signal
+ * @pid: pid of the process sending signal
+ * @signal_number: signal number
+ */
+probe scheduler.signal_send
+ = kernel.trace("sched_signal_send")
+{
+        pid = $p->pid
+        signal_number = $sig
+}
+
diff -Naur
systemtap-0.9.9-orig/testsuite/buildok/scheduler-test-tracepoints.stp
systemtap-0.9.9/testsuite/buildok/scheduler-test-tracepoints.stp
---
systemtap-0.9.9-orig/testsuite/buildok/scheduler-test-tracepoints.stp	1969-12-31
19:00:00.000000000 -0500
+++
systemtap-0.9.9/testsuite/buildok/scheduler-test-tracepoints.stp	2009-09-16
03:21:34.000000000 -0400
@@ -0,0 +1,53 @@
+#! stap -up4
+
+//Tests if all probes in the scheduler tapset are resolvable.
+
+probe scheduler.kthread_stop {
+	printf("pid = %d, priority = %d\n", thread_pid, thread_priority);
+}
+
+probe scheduler.kthread_stop.return {
+        printf("return value = %d\n", return_value);
+}
+
+probe scheduler.wait_task {
+        printf("pid = %d, priority = %d\n", task_pid, task_priority);
+}
+
+probe scheduler.wakeup {
+        printf("pid = %d, priority = %d\n, state = %d, cpu = %d, success =
%d",task_pid, task_priority, task_state, task_cpu, success);
+}
+
+probe scheduler.wakeup_new {
+        printf("pid = %d, priority = %d, success = %d\n", task_pid,
task_priority, success);
+}
+
+probe scheduler.sched_switch {
+        printf("prev_pid = %d, prev_priority = %d, prev_state = %d,
prev_task_name = %s, prev_tid = %d, next_pid = %d, next_priority = %d,
next_state = %d, next_task_name = %s, next_tid = %d\n", prev_pid,
prev_priority, prev_state, prev_task_name, prev_tid, next_pid,
next_priority, next_state, next_task_name, next_tid);
+}
+
+probe scheduler.migrate_task {
+        printf("pid = %d, priority = %d, original cpu = %d destination cpu
= %d\n", pid, priority, original_cpu, destination_cpu);
+}
+
+probe scheduler.process_free {
+        printf("pid = %d, priority = %d\n", pid, priority);
+}
+
+probe scheduler.process_exit {
+        printf("pid = %d, priority = %d\n", pid, priority);
+}
+
+probe scheduler.process_wait {
+        printf("pid = %d, priority = %d\n", pid, priority);
+}
+
+probe scheduler.process_fork {
+        printf("parent pid = %d, child pid = %d\n", parent_pid,
child_pid);
+}
+
+probe scheduler.signal_send {
+        printf("pid = %d, signal = %d\n", pid, signal);
+}
+
+
diff -Naur
systemtap-0.9.9-orig/testsuite/systemtap.examples/profiling/sched_switch.meta
systemtap-0.9.9/testsuite/systemtap.examples/profiling/sched_switch.meta
---
systemtap-0.9.9-orig/testsuite/systemtap.examples/profiling/sched_switch.meta	1969-12-31
19:00:00.000000000 -0500
+++
systemtap-0.9.9/testsuite/systemtap.examples/profiling/sched_switch.meta	2009-09-16
03:21:51.000000000 -0400
@@ -0,0 +1,14 @@
+title: Display the task switches happeningt the scheduler
+name: sched_switch.stp
+version: 1.0
+author: kiran
+keywords: profiling functions
+subsystem: kernel
+status: production
+exit: user-controlled
+output: sorted-list on-exit
+scope: system-wide
+description: The sched_switch.stp script takes two arguments, first
argument can be "pid" or "name" to indicate what is being passed as second
argument. The script will trace the process based on pid/name and print the
scheduler switches happening with the process. If no arguments are passed,
it displays all the scheduler switches.
+test_check: stap -p4 sched_switch.stp
+test_installcheck: stap  sched_switch.stp -c "sleep 1"
+
diff -Naur
systemtap-0.9.9-orig/testsuite/systemtap.examples/profiling/sched_switch.stp
systemtap-0.9.9/testsuite/systemtap.examples/profiling/sched_switch.stp
---
systemtap-0.9.9-orig/testsuite/systemtap.examples/profiling/sched_switch.stp	1969-12-31
19:00:00.000000000 -0500
+++
systemtap-0.9.9/testsuite/systemtap.examples/profiling/sched_switch.stp	2009-09-16
03:21:53.000000000 -0400
@@ -0,0 +1,71 @@
+/* This script works similar to ftrace's sched_switch. It displays a list
of
+ * processes which get switched in and out of the scheduler. The format of
display
+ * is PROCESS_NAME PROCESS_PID CPU TIMESTAMP PID: PRIORITY: PROCESS STATE
->/+
+ *    NEXT_PID : NEXT_PRIORITY: NEXT_STATE NEXT_PROCESS_NAME 
+ * -> indicates that previous process is scheduled out and the next
process is 
+ *    scheduled in.
+ * + indicates that previous process has woken up the next process.
+ * The usage is sched_switch.stp <"pid"/"name"> pid/name
+ */
+
+global task_cpu_old[9999]
+global pids[999]
+global processes
+global prev
+
+function state_calc(state) {
+        if(state == 0)
+        status = "R"
+        if(state == 1)
+        status = "S"
+        if(state == 2)
+        status = "D"
+        if(state == 4)
+        status = "T"
+        if(state == 8)
+        status = "T"
+        if(state == 16)
+        status = "Z"
+        if(state == 32)
+        status = "EXIT_DEAD"
+        return status
+}
+probe scheduler.wakeup
+{
+	pids[task_pid]++
+	processes[task_pid] = $p;
+	prev[task_pid] = task_current()
+	
+}
+probe scheduler.sched_switch
+{
+	tid = next_tid
+	tid1 = prev_tid
+	state = prev_state
+	state1 = next_state
+	
+	%( $# == 2 %?
+	
+	if(@1 == "pid") 
+		if (tid != $2 && tid1 != $2)
+			next
+	if(@1 == "name")
+		if (task_execname(task_current()) != @2 && task_execname($next) != @2)
+               		next 
+	
+	foreach (name in pids-) {
+		if ((@1 == "pid" && (name == $2 || task_pid(prev[name]) == $2)) || 
+		   (@1 == "name" && (task_execname(prev[name]) == @2 ||
task_execname(processes[name]) == @2)))
+			printf("%s\t\t%d\t%d\t%d\t%d:%d:%s + %d:%d:%s %s\n",
+				task_execname(prev[name]), task_pid(prev[name]),
task_cpu(processes[name]), gettimeofday_ns(), 
+				task_pid(prev[name]), task_prio(prev[name]),
state_calc(task_state(prev[name])), 
+				task_pid(processes[name]), task_prio(processes[name]),
state_calc(task_state(processes[name])), 
+				task_execname(processes[name]))
+	} %)
+
+	old_cpu = task_cpu_old[tid]
+	printf("%s\t\t%d\t%d\t%d\t%d:%d:%s ==> %d:%d:%s
%s\n",task_execname(task_current()),tid1,
+		old_cpu,gettimeofday_ns(),tid1,task_prio(task_current()),state_calc(state),next_pid,
+		next_prio,state_calc(next_state),next_task_name )
+	task_cpu_old[next_tid] = cpu()
+}


Thanks,
Kiran
 
CD: 3ms