Ticket #1069: scheduler1.diff
File scheduler1.diff, 16.7 KB (added by , 12 years ago) |
---|
-
src/system/kernel/arch/x86/arch_cpu_specific.h
1 /* 2 * Copyright 2011, James Dewey Taylor, james.dewey.taylor@gmail.com 3 * Distributed under the terms of the MIT license. 4 */ 5 #ifndef ARCH_CPU_SPECIFIC_H 6 #define ARCH_CPU_SPECIFIC_H 7 8 #include <cpu.h> 9 10 11 status_t get_topology_intel(cpu_ent*); 12 status_t get_topology_amd(cpu_ent*); 13 14 inline uint64 15 next_power_of_two(uint32 in) 16 { 17 if (in > 2147483648UL) 18 return 4294967296ULL; 19 else if (in > 1073741824U) 20 return 2147483648UL; 21 else if (in > 536870912) 22 return 1073741824; 23 else if (in > 268435456) 24 return 536870912; 25 else if (in > 134217728) 26 return 268435456; 27 else if (in > 67108864) 28 return 134217728; 29 else if (in > 33554432) 30 return 67108864; 31 else if (in > 16777216) 32 return 33554432; 33 else if (in > 8388608) 34 return 16777216; 35 else if (in > 4194304) 36 return 8388608; 37 else if (in > 2097152) 38 return 4194304; 39 else if (in > 1048576) 40 return 2097152; 41 else if (in > 524288) 42 return 1048576; 43 else if (in > 262144) 44 return 524288; 45 else if (in > 131072) 46 return 262144; 47 else if (in > 65536) 48 return 131072; 49 else if (in > 32768) 50 return 65536; 51 else if (in > 16384) 52 return 32768; 53 else if (in > 8192) 54 return 16384; 55 else if (in > 4096) 56 return 8192; 57 else if (in > 2048) 58 return 4096; 59 else if (in > 2048) 60 return 4096; 61 else if (in > 1024) 62 return 2048; 63 else if (in > 512) 64 return 1024; 65 else if (in > 256) 66 return 512; 67 else if (in > 128) 68 return 256; 69 else if (in > 64) 70 return 128; 71 else if (in > 32) 72 return 64; 73 else if (in > 16) 74 return 32; 75 else if (in > 8) 76 return 16; 77 else if (in > 4) 78 return 8; 79 else if (in > 2) 80 return 4; 81 else 82 return in; 83 } 84 85 86 inline uint32 87 log2(uint32 in) 88 { 89 if (in == 536870912) 90 return 31; 91 else if (in == 268435456) 92 return 30; 93 else if (in == 134217728) 94 return 29; 95 else if (in == 67108864) 96 return 28; 97 else if (in == 33554432) 98 return 27; 99 else if (in == 16777216) 100 return 26; 101 else if (in == 8388608) 102 return 25; 103 else if (in == 4194304) 104 return 24; 105 else if (in == 2097152) 106 return 23; 107 else if (in == 1048576) 108 return 22; 109 else if (in == 524288) 110 return 21; 111 else if (in == 262144) 112 return 20; 113 else if (in == 131072) 114 return 19; 115 else if (in == 65536) 116 return 18; 117 else if (in == 32768) 118 return 17; 119 else if (in == 16384) 120 return 16; 121 else if (in == 8192) 122 return 15; 123 else if (in == 4096) 124 return 14; 125 else if (in == 2048) 126 return 13; 127 else if (in == 2048) 128 return 12; 129 else if (in == 1024) 130 return 11; 131 else if (in == 512) 132 return 10; 133 else if (in == 256) 134 return 9; 135 else if (in == 128) 136 return 8; 137 else if (in == 64) 138 return 7; 139 else if (in == 32) 140 return 6; 141 else if (in == 16) 142 return 5; 143 else if (in == 8) 144 return 4; 145 else if (in == 4) 146 return 3; 147 else if (in == 2) 148 return 2; 149 else if (in == 1) 150 return 1; 151 else 152 return 0; 153 } 154 155 156 #endif // ARCH_CPU_SPECIFIC_H -
src/system/kernel/arch/x86/arch_cpu_intel.cpp
1 /* 2 * Copyright 2011, James Dewey Taylor, james.dewey.taylor@gmail.com 3 * Distributed under the terms of the MIT license. 4 */ 5 6 7 #include <arch_system_info.h> 8 #include <apic.h> 9 10 #include "arch_cpu_specific.h" 11 12 status_t 13 get_topology_intel(cpu_ent* cpu) 14 { 15 cpuid_info cpuid; 16 17 uint32 logicalCPUBits = 0; 18 uint32 coreBits = 0; 19 20 cpu->has_topology_data = false; 21 22 // We will NOT use cpuid leaf 11 because x2APIC isn't enabled, so use 23 // leaves 1 & 4 if they're available, even if leaf 11 is also available 24 get_current_cpuid(&cpuid, 0); 25 if (cpuid.regs.eax >= 4) { 26 // do leaf 1 & 4 crap... 27 uint32 apicID = apic_local_id(); 28 get_current_cpuid(&cpuid, 1); 29 uint32 temp1 = (cpuid.regs.ebx >> 16) & 255; 30 get_current_cpuid(&cpuid, 4, 0); 31 uint32 temp2 = cpuid.regs.eax >> 26; 32 logicalCPUBits = log2(next_power_of_two(temp1) / (temp2 + 1)); 33 coreBits = log2(temp2); 34 35 cpu->cpu_num_in_core = apicID & ((1 << logicalCPUBits) -1); 36 apicID = apicID >> logicalCPUBits; 37 cpu->core_num = apicID & ((1 << coreBits) -1); 38 apicID = apicID >> coreBits; 39 cpu->package_num = apicID; 40 cpu->has_topology_data = true; 41 42 return B_OK; 43 } else { 44 get_current_cpuid(&cpuid, 1); 45 if ((cpuid.regs.ebx & 0xFF0000) > 0) { 46 // 2 level topology 47 // 2 level topologies will not have HT/SMT enabled, so ultimately 48 // they don't matter 49 // --- I have no clue how to get this without leaves 1 & 4 50 cpu->package_num = cpu->cpu_num; 51 return B_OK; 52 } else { 53 // 1 level topology 54 // 1 level topologies are a 1:1 relationship between packages and 55 // logical cpu ids... no HT/SMT, so they don't matter either 56 cpu->package_num = cpu->cpu_num; 57 return B_OK; 58 } 59 } 60 } -
src/system/kernel/arch/x86/Jamfile
19 19 KernelMergeObject kernel_arch_x86.o : 20 20 arch_commpage.cpp 21 21 arch_cpu.cpp 22 arch_cpu_amd.cpp 23 arch_cpu_intel.cpp 22 24 arch_debug.cpp 23 25 arch_debug_console.cpp 24 26 arch_elf.cpp -
src/system/kernel/arch/x86/arch_cpu_amd.cpp
1 /* 2 * Copyright 2011, James Dewey Taylor, james.dewey.taylor@gmail.com 3 * Distributed under the terms of the MIT license. 4 */ 5 6 #include "arch_cpu_specific.h" 7 8 9 status_t 10 get_topology_amd(cpu_ent* cpu) 11 { 12 //uint32 logicalCPUBits = 0; 13 //uint32 coreBits = 0; 14 15 // TODO: return true by default? 16 return !B_OK; 17 } -
src/system/kernel/arch/x86/cpuid.S
11 11 12 12 .text 13 13 14 /* void get_current_cpuid(cpuid_info *info, uint32 eaxRegister) */ 14 /* status_t get_current_cpuid(cpuid_info *info, uint32 eaxRegister, 15 uint32 ecxRegister = 0) */ 15 16 FUNCTION(get_current_cpuid): 16 17 pushl %ebx 17 18 pushl %edi 18 19 movl 12(%esp),%edi /* first arg points to the cpuid_info structure */ 19 20 movl 16(%esp),%eax /* second arg sets up eax */ 21 movl 20(%esp),%ecx /* third arg sets up ecx */ 20 22 cpuid 21 23 movl %eax,0(%edi) /* copy the regs into the cpuid_info structure */ 22 24 movl %ebx,4(%edi) -
src/system/kernel/arch/x86/arch_smp.cpp
26 26 #include <stdio.h> 27 27 28 28 29 #include "arch_cpu_specific.h" 30 29 31 //#define TRACE_ARCH_SMP 30 32 #ifdef TRACE_ARCH_SMP 31 33 # define TRACE(x) dprintf x … … 109 111 110 112 init_sse(); 111 113 114 115 // determine processor topology for scheduler 116 if (gCPU[cpu].arch.vendor == VENDOR_INTEL) { 117 if (get_topology_intel(&gCPU[cpu]) != B_OK) { 118 panic("detect_cpu(): unable to get topology for Intel chip\n"); 119 } 120 } else if (gCPU[cpu].arch.vendor == VENDOR_AMD) { 121 if (get_topology_amd(&gCPU[cpu]) != B_OK) { 122 panic("detect_cpu(): unable to get topology for AMD chip\n"); 123 } 124 } else { // Only Intel and AMD vendors currently supported. 125 gCPU[cpu].has_topology_data = false; 126 } 127 128 112 129 return B_OK; 113 130 } 114 131 -
src/system/kernel/team.cpp
447 447 user_data_size = 0; 448 448 free_user_threads = NULL; 449 449 450 // new team has no soft affinity 451 preferred_cpu = -1; 452 450 453 supplementary_groups = NULL; 451 454 supplementary_group_count = 0; 452 455 -
src/system/kernel/scheduler/scheduler.cpp
68 68 cpuCount != 1 ? "s" : ""); 69 69 70 70 if (cpuCount > 1) { 71 #if 071 #if 1 72 72 dprintf("scheduler_init: using affine scheduler\n"); 73 73 scheduler_affine_init(); 74 74 #else -
src/system/kernel/scheduler/scheduler_affine.cpp
1 1 /* 2 * Copyright 2011, James Dewey Taylor, james.dewey.taylor@gmail.com 2 3 * Copyright 2009, Rene Gollent, rene@gollent.com. 3 4 * Copyright 2008-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 4 5 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. … … 37 38 # define TRACE(x) ; 38 39 #endif 39 40 41 // Helper macros 42 #define RUN_QUEUE(x) sRunQueue[sCPUMap[x]] 43 #define RUN_QUEUE_SIZE(x) sRunQueueSize[sCPUMap[x]] 44 40 45 // The run queues. Holds the threads ready to run ordered by priority. 41 46 // One queue per schedulable target (CPU, core, etc.). 42 47 // TODO: consolidate this such that HT/SMT entities on the same physical core 43 48 // share a queue, once we have the necessary API for retrieving the topology 44 49 // information 50 static int32 sCPUMap[B_MAX_CPU_COUNT]; 45 51 static Thread* sRunQueue[B_MAX_CPU_COUNT]; 46 52 static int32 sRunQueueSize[B_MAX_CPU_COUNT]; 47 53 static Thread* sIdleThreads; … … 108 114 Thread *thread = NULL; 109 115 110 116 for (int32 i = 0; i < smp_get_num_cpus(); i++) { 111 thread = sRunQueue[i];117 thread = RUN_QUEUE(i); 112 118 kprintf("Run queue for cpu %ld (%ld threads)\n", i, 113 sRunQueueSize[i]);114 if ( sRunQueueSize[i]> 0) {119 RUN_QUEUE_SIZE(i)); 120 if (RUN_QUEUE_SIZE(i) > 0) { 115 121 kprintf("thread id priority avg. quantum name\n"); 116 122 while (thread) { 117 123 kprintf("%p %-7ld %-8ld %-12ld %s\n", thread, thread->id, … … 126 132 } 127 133 128 134 135 static int 136 dump_topology(int argc, char **argv) 137 { 138 kprintf("Processor Topology Data\n"); 139 kprintf("Num\tCPU\tNumOnCore\tCore\tPackage\n"); 140 for (int32 i = 0; i < smp_get_num_cpus(); i++) { 141 kprintf("%ld\t%d\t:\t%d\t%d\t%d\n", 142 i, gCPU[i].cpu_num, gCPU[i].cpu_num_in_core, gCPU[i].core_num, 143 gCPU[i].package_num); 144 } 145 return 0; 146 } 147 148 129 149 /*! Returns the most idle CPU based on the active time counters. 130 150 Note: thread lock must be held when entering this function 131 151 */ … … 136 156 for (int32 i = 0; i < smp_get_num_cpus(); i++) { 137 157 if (gCPU[i].disabled) 138 158 continue; 139 if (targetCPU < 0 || sRunQueueSize[i] < sRunQueueSize[targetCPU])159 if (targetCPU < 0 || RUN_QUEUE_SIZE(i) < RUN_QUEUE_SIZE(targetCPU)) 140 160 targetCPU = i; 141 161 } 142 162 … … 153 173 int32 targetCPU = -1; 154 174 if (thread->pinned_to_cpu > 0) 155 175 targetCPU = thread->previous_cpu->cpu_num; 156 else if (thread->previous_cpu == NULL || thread->previous_cpu->disabled) 157 targetCPU = affine_get_most_idle_cpu(); 158 else 176 else if (thread->previous_cpu == NULL || thread->previous_cpu->disabled) { 177 if (thread->team->preferred_cpu < 0) 178 thread->team->preferred_cpu = affine_get_most_idle_cpu(); 179 targetCPU = thread->team->preferred_cpu; 180 } else 159 181 targetCPU = thread->previous_cpu->cpu_num; 160 182 161 183 thread->state = thread->next_state = B_THREAD_READY; … … 165 187 sIdleThreads = thread; 166 188 } else { 167 189 Thread *curr, *prev; 168 for (curr = sRunQueue[targetCPU], prev = NULL; curr190 for (curr = RUN_QUEUE(targetCPU), prev = NULL; curr 169 191 && curr->priority >= thread->next_priority; 170 192 curr = curr->queue_next) { 171 193 if (prev) 172 194 prev = prev->queue_next; 173 195 else 174 prev = sRunQueue[targetCPU];196 prev = RUN_QUEUE(targetCPU); 175 197 } 176 198 177 199 T(EnqueueThread(thread, prev, curr)); 178 sRunQueueSize[targetCPU]++;200 RUN_QUEUE_SIZE(targetCPU)++; 179 201 thread->queue_next = curr; 180 202 if (prev) 181 203 prev->queue_next = thread; 182 204 else 183 sRunQueue[targetCPU]= thread;205 RUN_QUEUE(targetCPU) = thread; 184 206 185 207 thread->scheduler_data->fLastQueue = targetCPU; 186 208 } … … 213 235 resultThread = prevThread->queue_next; 214 236 prevThread->queue_next = resultThread->queue_next; 215 237 } else { 216 resultThread = sRunQueue[currentCPU];217 sRunQueue[currentCPU]= resultThread->queue_next;238 resultThread = RUN_QUEUE(currentCPU); 239 RUN_QUEUE(currentCPU) = resultThread->queue_next; 218 240 } 219 sRunQueueSize[currentCPU]--;241 RUN_QUEUE_SIZE(currentCPU)--; 220 242 resultThread->scheduler_data->fLastQueue = -1; 221 243 222 244 return resultThread; … … 239 261 int32 targetCPU = -1; 240 262 for (int32 i = 0; i < smp_get_num_cpus(); i++) { 241 263 // skip CPUs that have either no or only one thread 242 if (i == currentCPU || sRunQueueSize[i]< 2)264 if (i == currentCPU || RUN_QUEUE_SIZE(i) < 2) 243 265 continue; 244 266 245 267 // out of the CPUs with threads available to steal, 246 268 // pick whichever one is generally the most CPU bound. 247 269 if (targetCPU < 0 248 || sRunQueue[i]->priority > sRunQueue[targetCPU]->priority249 || ( sRunQueue[i]->priority == sRunQueue[targetCPU]->priority250 && sRunQueueSize[i] > sRunQueueSize[targetCPU]))270 || RUN_QUEUE(i)->priority > RUN_QUEUE(targetCPU)->priority 271 || (RUN_QUEUE(i)->priority == RUN_QUEUE(targetCPU)->priority 272 && RUN_QUEUE_SIZE(i) > RUN_QUEUE_SIZE(targetCPU))) 251 273 targetCPU = i; 252 274 } 253 275 254 276 if (targetCPU < 0) 255 277 return NULL; 256 278 257 Thread* nextThread = sRunQueue[targetCPU];279 Thread* nextThread = RUN_QUEUE(targetCPU); 258 280 Thread* prevThread = NULL; 259 281 260 282 while (nextThread != NULL) { … … 302 324 Thread *item = NULL, *prev = NULL; 303 325 targetCPU = thread->scheduler_data->fLastQueue; 304 326 305 for (item = sRunQueue[targetCPU], prev = NULL; item && item != thread;327 for (item = RUN_QUEUE(targetCPU), prev = NULL; item && item != thread; 306 328 item = item->queue_next) { 307 329 if (prev) 308 330 prev = prev->queue_next; … … 392 414 break; 393 415 } 394 416 395 nextThread = sRunQueue[currentCPU];417 nextThread = RUN_QUEUE(currentCPU); 396 418 prevThread = NULL; 397 419 398 if ( sRunQueue[currentCPU]!= NULL) {420 if (RUN_QUEUE(currentCPU) != NULL) { 399 421 TRACE(("dequeueing next thread from cpu %ld\n", currentCPU)); 400 422 // select next thread from the run queue 401 423 while (nextThread->queue_next) { … … 574 596 memset(sRunQueueSize, 0, sizeof(sRunQueueSize)); 575 597 add_debugger_command_etc("run_queue", &dump_run_queue, 576 598 "List threads in run queue", "\nLists threads in run queue", 0); 599 add_debugger_command_etc("topology", &dump_topology, 600 "List processor topology", "\nLists processor topology", 0); 601 // TODO: get topology info to initialize sCPUMap 602 // we're assuming a homogenous topology for now 603 // also we're just worried about HT not various levels of cache sharing 604 if (gCPU[0].has_topology_data) { 605 int maxcorenum = 0; 606 for (int i = 0; i < smp_get_num_cpus(); i++) { 607 if (gCPU[i].core_num > maxcorenum) 608 maxcorenum = gCPU[i].core_num; 609 } 610 for (int i = 0; i < smp_get_num_cpus(); i++) { 611 sCPUMap[i] = (maxcorenum + 1) * gCPU[i].package_num + 612 gCPU[i].core_num; 613 } 614 } else { 615 for (int i = 0; i < B_MAX_CPU_COUNT ; i++) { 616 sCPUMap[i] = i; 617 } 618 } 619 #if 0 620 //dump_topology(0, NULL); 621 #endif 577 622 } -
headers/private/kernel/arch/x86/arch_system_info.h
12 12 extern "C" { 13 13 #endif 14 14 15 status_t get_current_cpuid(cpuid_info *info, uint32 eax);15 status_t get_current_cpuid(cpuid_info* info, uint32 eax, uint32 ecx = 0); 16 16 uint32 get_eflags(void); 17 17 void set_eflags(uint32 value); 18 18 -
headers/private/kernel/cpu.h
34 34 /* CPU local data structure */ 35 35 36 36 typedef struct cpu_ent { 37 // the logical cpu id 37 38 int cpu_num; 38 39 40 // the physical location of the logical cpu 41 int cpu_num_in_core; 42 int core_num; 43 int package_num; 44 45 bool has_topology_data; 46 39 47 // thread.c: used to force a reschedule at quantum expiration time 40 48 int preempted; 41 49 timer quantum_timer; -
headers/private/kernel/thread_types.h
236 236 struct list dead_threads; 237 237 int dead_threads_count; 238 238 239 int32 preferred_cpu; // soft affinity for the team (can be 240 // overridden by setting a thread's hard 241 // affinity 242 239 243 // protected by the team's fLock 240 244 team_dead_children dead_children; 241 245 team_job_control_children stopped_children;