Context Navigation

Back to Ticket #1069

Ticket #1069: scheduler.diff

File scheduler.diff, 15.5 KB (added by Duggan, 13 years ago)

This is an intermediate patch to some work I'm doing on the extant affine scheduler. Theoretically hyperthreaded cores should share ready queues now and there's a little support for soft affinities for teams. I can't test the hyperthreading code so if you guys can test this on various Intel and AMD processors and provide some feedback (mostly syslogs, bt's if it crashes) I would appreciate it. Future work includes proper load balancing and affinities, API functions to support affinities, possibly a thread class (a la BeAPI) that provides an OO wrapper to the API functions. Other changes I would like to make as well include shutting down unnecessary cores when the load is light and reenabling them as the load increases as well as potentially doing away with the global scheduler lock in favour of per-queue locks. Again, this is an intermediate patch so there's likely alot of problems with it besides bugs (but I did run it through the style checker and did the diff from trunk this time ;) ... not saying there's not still style errors, just not in my code). Cheers!

src/system/kernel/arch/x86/arch_cpu.cpp

 #include <arch_system_info.h>
 #include <arch/x86/selector.h>
 #include <boot/kernel_args.h>
+#include <arch/x86/apic.h>
 #include "interrupts.h"
 #include "paging/X86PagingStructures.h"
 …
 #endif  // DUMP_FEATURE_STRING
+int32
+round_to_pwr_of_2(int32 in)
+{
+    if (in > 64)
+        return 128;
+    else if (in > 32)
+        return 64;
+    else if (in > 16)
+        return 32;
+    else if (in > 8)
+        return 16;
+    else if (in > 4)
+        return 8;
+    else if (in > 2)
+        return 4;
+    else if (in > 1)
+        return 2;
+    else
+        return in;
+}
 static int
 detect_cpu(int currentCPU)
+{
 …
     cpu->arch.feature[FEATURE_EXT_AMD] = 0;
     cpu->arch.model_name[0] = 0;
+    // initialize the topology data
+    // TODO: should this be negative since 0s are valid?
+    cpu->cpu_num_in_core = 0;
+    cpu->core_num = 0;
+    cpu->package_num = 0;
+    cpu->numa_num = 0;
     // print some fun data
     get_current_cpuid(&cpuid, 0);
 …
         cpu->arch.feature[FEATURE_EXT_AMD] = cpuid.regs.edx; // edx
+    }
+    // determine processor topology for scheduler
+    int32 logcpubits = 0;
+    int32 corebits = 0;
+    if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_HTT) {
+        // Has HT
+        cpu->has_ht = true;
+        get_current_cpuid(&cpuid, 0x00000001);
+        int32 count = (cpuid.regs.ebx >> 16) & 255; // EBX[23:16]
+        if (cpu->arch.vendor == VENDOR_INTEL) {
+            // retrieve topology data for Intel chipsets
+            get_current_cpuid_ex(&cpuid, 0x0000000B, 0);
+            // check if leaf 0x0000000B exists
+            if ((cpuid.regs.ebx & ((1<<15)-1)) != 0) { // EBX[15:0]
+                dprintf("******* Intel Processor - Leaf 0x0000000B Exists!\n");
+                logcpubits = cpuid.regs.eax & ((1<<4)-1); // EAX[4:0]
+                get_current_cpuid_ex(&cpuid, 0x0000000B, 1);
+                corebits = (cpuid.regs.eax & ((1<<4)-1)) - logcpubits;
+            } else {
+                // retrieve topology data without using leaf 0x0000000B
+                dprintf("******* Intel Processor - Leaf 0x0000000B Does NOT Exist!\n");
+                // NOTE: There are some cases where leaf 0x00000004 isn't
+                // supported... I don't know how to test for that yet (Duggan)
+                get_current_cpuid(&cpuid, 0x00000004);
+                int32 temp = cpuid.regs.eax >> 26; // EAX[31:26]
+                logcpubits = round_to_pwr_of_2(temp);
+                temp = temp >> logcpubits;
+                corebits = round_to_pwr_of_2(temp);
+            }
+        } else if (cpu->arch.vendor == VENDOR_AMD) {
+            // retrieve topology data for AMD chipsets
+            // NOTE: There are some cases where 0x80000008 isn't supported...
+            // I have no clue how to test that right now (Duggan)
+            dprintf("******* AMD Processor - Has Hyperthreading!\n");
+            get_current_cpuid(&cpuid, 0x80000008);
+            int32 temp = (cpuid.regs.ecx >> 12) & 15; // ECX[15:12]
+            if (temp != 0)
+                corebits = temp;
+            else
+                corebits = round_to_pwr_of_2(cpuid.regs.ecx & 255);
+            logcpubits = round_to_pwr_of_2(count >> corebits);
+        } else {
+            // no clue how to retrieve information for non-Intel/AMD vendors
+            dprintf("******* Cannot determine topology for non-Intel/AMD OEM!\n");
+            cpu->has_ht = false;
+        }
+        // Now determine the topology
+        if (cpu->has_ht) {
+            cpu->cpu_num_in_core = apic_local_id() & ((1 << logcpubits) - 1);
+            cpu->core_num = (apic_local_id() >> logcpubits) &
+                ((1 << corebits) -1);
+            cpu->package_num = apic_local_id() &
+                ~((1 << (logcpubits + corebits)) -1);
+        }
+    } else {
+        // No HT
+        dprintf("******* No Hyperthreading!\n");
+        // set flag in cpu_ent to say there's no HT
+        cpu->has_ht = false;
+    }
 #if DUMP_FEATURE_STRING
     dump_feature_string(currentCPU, cpu);
 #endif

src/system/kernel/arch/x86/cpuid.S

     ret
 FUNCTION_END(get_current_cpuid)
+/* void get_current_cpuid_ex(cpuid_info *info, uint32 eaxRegister,
+uint32 ecxRegister = 0) */
+FUNCTION(get_current_cpuid_ex):
+    pushl   %ebx
+    pushl   %edi
+    movl    12(%esp),%edi   /* first arg points to the cpuid_info structure */
+    movl    16(%esp),%eax   /* second arg sets up eax */
+    movl    20(%esp),%ecx   /* third arg sets up ecx */
+    cpuid
+    movl    %eax,0(%edi)    /* copy the regs into the cpuid_info structure */
+    movl    %ebx,4(%edi)
+    movl    %edx,8(%edi)
+    movl    %ecx,12(%edi)
+    popl    %edi
+    popl    %ebx
+    xorl    %eax, %eax      /* return B_OK */
+    ret
+FUNCTION_END(get_current_cpuid_ex)
 /* unsigned int get_eflags(void) */
 FUNCTION(get_eflags):
     pushfl

src/system/kernel/team.cpp

     user_data_size = 0;
     free_user_threads = NULL;
+    // new team has no soft affinity
+    preferred_cpu = -1;
     supplementary_groups = NULL;
     supplementary_group_count = 0;

src/system/kernel/scheduler/scheduler.cpp

         cpuCount != 1 ? "s" : "");
     if (cpuCount > 1) {
 #if 0
+#if 1
         dprintf("scheduler_init: using affine scheduler\n");
         scheduler_affine_init();
 #else

src/system/kernel/scheduler/scheduler_affine.cpp

 /*
+ * Copyright 2011, James Dewey Taylor, james.dewey.taylor@gmail.com
  * Copyright 2009, Rene Gollent, rene@gollent.com.
  * Copyright 2008-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
 …
 #   define TRACE(x) ;
 #endif
+// Helper macros
+#define RunQueue(x) sRunQueue[sCPUMap[x]]
+#define RunQueueSize(x) sRunQueueSize[sCPUMap[x]]
 // The run queues. Holds the threads ready to run ordered by priority.
 // One queue per schedulable target (CPU, core, etc.).
 // TODO: consolidate this such that HT/SMT entities on the same physical core
 // share a queue, once we have the necessary API for retrieving the topology
 // information
+static int32 sCPUMap[B_MAX_CPU_COUNT];
 static Thread* sRunQueue[B_MAX_CPU_COUNT];
 static int32 sRunQueueSize[B_MAX_CPU_COUNT];
 static Thread* sIdleThreads;
 …
     Thread *thread = NULL;
     for (int32 i = 0; i < smp_get_num_cpus(); i++) {
         thread = sRunQueue[i];
+        thread = RunQueue(i);
         kprintf("Run queue for cpu %ld (%ld threads)\n", i,
             sRunQueueSize[i]);
         if (sRunQueueSize[i] > 0) {
+            RunQueueSize(i));
+        if (RunQueueSize(i) > 0) {
             kprintf("thread      id      priority  avg. quantum  name\n");
             while (thread) {
                 kprintf("%p  %-7ld %-8ld  %-12ld  %s\n", thread, thread->id,
 …
+}
+void
+display_topology()
+{
+    dprintf_no_syslog("Processor Topology Data\n");
+    dprintf_no_syslog("Num\tCPU\tNumOnCore\tCore\tPackage\n");
+    for (int32 i = 0; i < smp_get_num_cpus(); i++) {
+        dprintf_no_syslog("%ld\t%d\t%d\t%d\t%d\n",
+            i, gCPU[i].cpu_num, gCPU[i].cpu_num_in_core, gCPU[i].core_num,
+            gCPU[i].package_num);
+    }
+}
 /*! Returns the most idle CPU based on the active time counters.
     Note: thread lock must be held when entering this function
 */
 …
     for (int32 i = 0; i < smp_get_num_cpus(); i++) {
         if (gCPU[i].disabled)
             continue;
         if (targetCPU < 0 || sRunQueueSize[i] < sRunQueueSize[targetCPU])
+        if (targetCPU < 0 || RunQueueSize(i) < RunQueueSize(targetCPU))
             targetCPU = i;
+    }
 …
     int32 targetCPU = -1;
     if (thread->pinned_to_cpu > 0)
         targetCPU = thread->previous_cpu->cpu_num;
+    else if (thread->previous_cpu == NULL || thread->previous_cpu->disabled)
+        targetCPU = affine_get_most_idle_cpu();
+    else
+    else if (thread->previous_cpu == NULL || thread->previous_cpu->disabled) {
+        if (thread->team->preferred_cpu < 0)
+            thread->team->preferred_cpu = affine_get_most_idle_cpu();
+        targetCPU = thread->team->preferred_cpu;
+    } else
         targetCPU = thread->previous_cpu->cpu_num;
     thread->state = thread->next_state = B_THREAD_READY;
 …
         sIdleThreads = thread;
     } else {
         Thread *curr, *prev;
         for (curr = sRunQueue[targetCPU], prev = NULL; curr
+        for (curr = RunQueue(targetCPU), prev = NULL; curr
             && curr->priority >= thread->next_priority;
             curr = curr->queue_next) {
             if (prev)
                 prev = prev->queue_next;
             else
                 prev = sRunQueue[targetCPU];
+                prev = RunQueue(targetCPU);
+        }
         T(EnqueueThread(thread, prev, curr));
         sRunQueueSize[targetCPU]++;
+        RunQueueSize(targetCPU)++;
         thread->queue_next = curr;
         if (prev)
             prev->queue_next = thread;
         else
             sRunQueue[targetCPU] = thread;
+            RunQueue(targetCPU) = thread;
         thread->scheduler_data->fLastQueue = targetCPU;
+    }
 …
         resultThread = prevThread->queue_next;
         prevThread->queue_next = resultThread->queue_next;
     } else {
         resultThread = sRunQueue[currentCPU];
         sRunQueue[currentCPU] = resultThread->queue_next;
+        resultThread = RunQueue(currentCPU);
+        RunQueue(currentCPU) = resultThread->queue_next;
+    }
     sRunQueueSize[currentCPU]--;
+    RunQueueSize(currentCPU)--;
     resultThread->scheduler_data->fLastQueue = -1;
     return resultThread;
 …
     int32 targetCPU = -1;
     for (int32 i = 0; i < smp_get_num_cpus(); i++) {
         // skip CPUs that have either no or only one thread
         if (i == currentCPU || sRunQueueSize[i] < 2)
+        if (i == currentCPU || RunQueueSize(i) < 2)
             continue;
         // out of the CPUs with threads available to steal,
         // pick whichever one is generally the most CPU bound.
         if (targetCPU < 0
             || sRunQueue[i]->priority > sRunQueue[targetCPU]->priority
             || (sRunQueue[i]->priority == sRunQueue[targetCPU]->priority
                 && sRunQueueSize[i] > sRunQueueSize[targetCPU]))
+            || RunQueue(i)->priority > RunQueue(targetCPU)->priority
+            || (RunQueue(i)->priority == RunQueue(targetCPU)->priority
+                && RunQueueSize(i) > RunQueueSize(targetCPU)))
             targetCPU = i;
+    }
     if (targetCPU < 0)
         return NULL;
     Thread* nextThread = sRunQueue[targetCPU];
+    Thread* nextThread = RunQueue(targetCPU);
     Thread* prevThread = NULL;
     while (nextThread != NULL) {
 …
     Thread *item = NULL, *prev = NULL;
     targetCPU = thread->scheduler_data->fLastQueue;
     for (item = sRunQueue[targetCPU], prev = NULL; item && item != thread;
+    for (item = RunQueue(targetCPU), prev = NULL; item && item != thread;
             item = item->queue_next) {
         if (prev)
             prev = prev->queue_next;
 …
     Thread *nextThread, *prevThread;
+    TRACE(("reschedule(): cpu %ld, cur_thread = %ld\n", currentCPU, oldThread->id));
+    TRACE(("reschedule(): cpu %ld, cur_thread = %ld\n", currentCPU,
+        oldThread->id));
     oldThread->state = oldThread->next_state;
     switch (oldThread->next_state) {
         case B_THREAD_RUNNING:
         case B_THREAD_READY:
+            TRACE(("enqueueing thread %ld into run q. pri = %ld\n", oldThread->id, oldThread->priority));
+            TRACE(("enqueueing thread %ld into run q. pri = %ld\n",
+                oldThread->id, oldThread->priority));
             affine_enqueue_in_run_queue(oldThread);
             break;
         case B_THREAD_SUSPENDED:
 …
         case THREAD_STATE_FREE_ON_RESCHED:
             break;
         default:
+            TRACE(("not enqueueing thread %ld into run q. next_state = %ld\n", oldThread->id, oldThread->next_state));
+            TRACE(("not enqueueing thread %ld into run q. next_state = %ld\n",
+                oldThread->id, oldThread->next_state));
             break;
+    }
     nextThread = sRunQueue[currentCPU];
+    nextThread = RunQueue(currentCPU);
     prevThread = NULL;
     if (sRunQueue[currentCPU] != NULL) {
+    if (RunQueue(currentCPU) != NULL) {
         TRACE(("dequeueing next thread from cpu %ld\n", currentCPU));
         // select next thread from the run queue
         while (nextThread->queue_next) {
 …
             cancel_timer(quantumTimer);
         oldThread->cpu->preempted = 0;
         // we do not adjust the quantum for the idle thread as it is going to be
         // preempted most of the time and would likely get the longer quantum
         // over time, indeed we use a smaller quantum to avoid running idle too
         // long
+        // we do not adjust the quantum for the idle thread as it is going to
+        // be preempted most of the time and would likely get the longer
+        // quantum over time, indeed we use a smaller quantum to avoid running
+        // idle too long
         bigtime_t quantum = kMinThreadQuantum;
         // give CPU-bound background threads a larger quantum size
         // to minimize unnecessary context switches if the system is idle
 …
     memset(sRunQueueSize, 0, sizeof(sRunQueueSize));
     add_debugger_command_etc("run_queue", &dump_run_queue,
         "List threads in run queue", "\nLists threads in run queue", 0);
+    // TODO: get topology info to initialize sCPUMap
+    // we're assuming a homogenous topology for now
+    // also we're just worried about HT not various levels of cache sharing
+    if (gCPU[0].has_ht) {
+        int maxcorenum = 0;
+        for (int i = 0; i < smp_get_num_cpus(); i++) {
+            if (gCPU[i].core_num > maxcorenum)
+                maxcorenum = gCPU[i].core_num;
+        }
+        for (int i = 0; i < smp_get_num_cpus(); i++) {
+            sCPUMap[i] = (maxcorenum + 1) * gCPU[i].package_num +
+                gCPU[i].core_num;
+        }
+    } else {
+        for (int i = 0; i < B_MAX_CPU_COUNT ; i++) {
+            sCPUMap[i] = i;
+        }
+    }
+#if 1
+    display_topology();
+#endif
+}

headers/private/kernel/arch/x86/arch_system_info.h

 #endif
 status_t get_current_cpuid(cpuid_info *info, uint32 eax);
+status_t get_current_cpuid_ex(cpuid_info *info, uint32 eax, uint32 ecx = 0);
 uint32 get_eflags(void);
 void set_eflags(uint32 value);

headers/private/kernel/cpu.h

 /* CPU local data structure */
 typedef struct cpu_ent {
+    // the logical cpu id
     int             cpu_num;
+    // the physical location of the logical cpu
+    int             cpu_num_in_core;
+    int             core_num;
+    int             package_num;
+    int             numa_num;
+    bool            has_ht;
     // thread.c: used to force a reschedule at quantum expiration time
     int             preempted;
     timer           quantum_timer;

headers/private/kernel/thread_types.h

     struct list     dead_threads;
     int             dead_threads_count;
+    int32           preferred_cpu;  // soft affinity for the team (can be
+                                    // overridden by setting a thread's hard
+                                    // affinity
     // protected by the team's fLock
     team_dead_children dead_children;
     team_job_control_children stopped_children;
 …
     struct cpu_ent  *previous_cpu;  // protected by scheduler lock
     int32           pinned_to_cpu;  // only accessed by this thread or in the
                                     // scheduler, when thread is not running
+    bool            affinity_hard;  // I think this is what pinned_to_cpu is
+                                    // supposed to be but I'm not sure (Duggan)
     sigset_t        sig_block_mask; // protected by scheduler lock,
                                     // only modified by the thread itself

Download in other formats:

Original Format