Ticket #1069: scheduler.diff

File scheduler.diff, 15.5 KB (added by Duggan, 8 years ago)

This is an intermediate patch to some work I'm doing on the extant affine scheduler. Theoretically hyperthreaded cores should share ready queues now and there's a little support for soft affinities for teams. I can't test the hyperthreading code so if you guys can test this on various Intel and AMD processors and provide some feedback (mostly syslogs, bt's if it crashes) I would appreciate it. Future work includes proper load balancing and affinities, API functions to support affinities, possibly a thread class (a la BeAPI) that provides an OO wrapper to the API functions. Other changes I would like to make as well include shutting down unnecessary cores when the load is light and reenabling them as the load increases as well as potentially doing away with the global scheduler lock in favour of per-queue locks. Again, this is an intermediate patch so there's likely alot of problems with it besides bugs (but I did run it through the style checker and did the diff from trunk this time ;) ... not saying there's not still style errors, just not in my code). Cheers!

  • src/system/kernel/arch/x86/arch_cpu.cpp

     
    2828#include <arch_system_info.h>
    2929#include <arch/x86/selector.h>
    3030#include <boot/kernel_args.h>
     31#include <arch/x86/apic.h>
    3132
    3233#include "interrupts.h"
    3334#include "paging/X86PagingStructures.h"
     
    504505#endif  // DUMP_FEATURE_STRING
    505506
    506507
     508int32
     509round_to_pwr_of_2(int32 in)
     510{
     511    if (in > 64)
     512        return 128;
     513    else if (in > 32)
     514        return 64;
     515    else if (in > 16)
     516        return 32;
     517    else if (in > 8)
     518        return 16;
     519    else if (in > 4)
     520        return 8;
     521    else if (in > 2)
     522        return 4;
     523    else if (in > 1)
     524        return 2;
     525    else
     526        return in;
     527}
     528
     529
    507530static int
    508531detect_cpu(int currentCPU)
    509532{
     
    519542    cpu->arch.feature[FEATURE_EXT_AMD] = 0;
    520543    cpu->arch.model_name[0] = 0;
    521544
     545    // initialize the topology data
     546    // TODO: should this be negative since 0s are valid?
     547    cpu->cpu_num_in_core = 0;
     548    cpu->core_num = 0;
     549    cpu->package_num = 0;
     550    cpu->numa_num = 0;
     551
    522552    // print some fun data
    523553    get_current_cpuid(&cpuid, 0);
    524554
     
    608638        cpu->arch.feature[FEATURE_EXT_AMD] = cpuid.regs.edx; // edx
    609639    }
    610640
     641    // determine processor topology for scheduler
     642    int32 logcpubits = 0;
     643    int32 corebits = 0;
     644    if (cpu->arch.feature[FEATURE_COMMON] & IA32_FEATURE_HTT) {
     645        // Has HT
     646        cpu->has_ht = true;
     647        get_current_cpuid(&cpuid, 0x00000001);
     648        int32 count = (cpuid.regs.ebx >> 16) & 255; // EBX[23:16]
     649        if (cpu->arch.vendor == VENDOR_INTEL) {
     650            // retrieve topology data for Intel chipsets
     651            get_current_cpuid_ex(&cpuid, 0x0000000B, 0);
     652            // check if leaf 0x0000000B exists
     653            if ((cpuid.regs.ebx & ((1<<15)-1)) != 0) { // EBX[15:0]
     654                dprintf("******* Intel Processor - Leaf 0x0000000B Exists!\n");
     655                logcpubits = cpuid.regs.eax & ((1<<4)-1); // EAX[4:0]
     656                get_current_cpuid_ex(&cpuid, 0x0000000B, 1);
     657                corebits = (cpuid.regs.eax & ((1<<4)-1)) - logcpubits;
     658            } else {
     659                // retrieve topology data without using leaf 0x0000000B
     660                dprintf("******* Intel Processor - Leaf 0x0000000B Does NOT Exist!\n");
     661                // NOTE: There are some cases where leaf 0x00000004 isn't
     662                // supported... I don't know how to test for that yet (Duggan)
     663                get_current_cpuid(&cpuid, 0x00000004);
     664                int32 temp = cpuid.regs.eax >> 26; // EAX[31:26]
     665                logcpubits = round_to_pwr_of_2(temp);
     666                temp = temp >> logcpubits;
     667                corebits = round_to_pwr_of_2(temp);
     668            }
     669        } else if (cpu->arch.vendor == VENDOR_AMD) {
     670            // retrieve topology data for AMD chipsets
     671            // NOTE: There are some cases where 0x80000008 isn't supported...
     672            // I have no clue how to test that right now (Duggan)
     673            dprintf("******* AMD Processor - Has Hyperthreading!\n");
     674            get_current_cpuid(&cpuid, 0x80000008);
     675            int32 temp = (cpuid.regs.ecx >> 12) & 15; // ECX[15:12]
     676            if (temp != 0)
     677                corebits = temp;
     678            else
     679                corebits = round_to_pwr_of_2(cpuid.regs.ecx & 255);
     680            logcpubits = round_to_pwr_of_2(count >> corebits);
     681        } else {
     682            // no clue how to retrieve information for non-Intel/AMD vendors
     683            dprintf("******* Cannot determine topology for non-Intel/AMD OEM!\n");
     684            cpu->has_ht = false;
     685        }
     686
     687        // Now determine the topology
     688        if (cpu->has_ht) {
     689            cpu->cpu_num_in_core = apic_local_id() & ((1 << logcpubits) - 1);
     690            cpu->core_num = (apic_local_id() >> logcpubits) &
     691                ((1 << corebits) -1);
     692            cpu->package_num = apic_local_id() &
     693                ~((1 << (logcpubits + corebits)) -1);
     694        }
     695    } else {
     696        // No HT
     697        dprintf("******* No Hyperthreading!\n");
     698        // set flag in cpu_ent to say there's no HT
     699        cpu->has_ht = false;
     700    }
     701
    611702#if DUMP_FEATURE_STRING
    612703    dump_feature_string(currentCPU, cpu);
    613704#endif
  • src/system/kernel/arch/x86/cpuid.S

     
    2828    ret
    2929FUNCTION_END(get_current_cpuid)
    3030
     31/* void get_current_cpuid_ex(cpuid_info *info, uint32 eaxRegister,
     32uint32 ecxRegister = 0) */
     33FUNCTION(get_current_cpuid_ex):
     34    pushl   %ebx
     35    pushl   %edi
     36    movl    12(%esp),%edi   /* first arg points to the cpuid_info structure */
     37    movl    16(%esp),%eax   /* second arg sets up eax */
     38    movl    20(%esp),%ecx   /* third arg sets up ecx */
     39    cpuid
     40    movl    %eax,0(%edi)    /* copy the regs into the cpuid_info structure */
     41    movl    %ebx,4(%edi)
     42    movl    %edx,8(%edi)
     43    movl    %ecx,12(%edi)
     44    popl    %edi
     45    popl    %ebx
     46    xorl    %eax, %eax      /* return B_OK */
     47    ret
     48FUNCTION_END(get_current_cpuid_ex)
    3149
     50
    3251/* unsigned int get_eflags(void) */
    3352FUNCTION(get_eflags):
    3453    pushfl
  • src/system/kernel/team.cpp

     
    447447    user_data_size = 0;
    448448    free_user_threads = NULL;
    449449
     450    // new team has no soft affinity
     451    preferred_cpu = -1;
     452
    450453    supplementary_groups = NULL;
    451454    supplementary_group_count = 0;
    452455
  • src/system/kernel/scheduler/scheduler.cpp

     
    6868        cpuCount != 1 ? "s" : "");
    6969
    7070    if (cpuCount > 1) {
    71 #if 0
     71#if 1
    7272        dprintf("scheduler_init: using affine scheduler\n");
    7373        scheduler_affine_init();
    7474#else
  • src/system/kernel/scheduler/scheduler_affine.cpp

     
    11/*
     2 * Copyright 2011, James Dewey Taylor, james.dewey.taylor@gmail.com
    23 * Copyright 2009, Rene Gollent, rene@gollent.com.
    34 * Copyright 2008-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
    45 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
     
    3738#   define TRACE(x) ;
    3839#endif
    3940
     41// Helper macros
     42#define RunQueue(x) sRunQueue[sCPUMap[x]]
     43#define RunQueueSize(x) sRunQueueSize[sCPUMap[x]]
     44
    4045// The run queues. Holds the threads ready to run ordered by priority.
    4146// One queue per schedulable target (CPU, core, etc.).
    4247// TODO: consolidate this such that HT/SMT entities on the same physical core
    4348// share a queue, once we have the necessary API for retrieving the topology
    4449// information
     50static int32 sCPUMap[B_MAX_CPU_COUNT];
    4551static Thread* sRunQueue[B_MAX_CPU_COUNT];
    4652static int32 sRunQueueSize[B_MAX_CPU_COUNT];
    4753static Thread* sIdleThreads;
     
    108114    Thread *thread = NULL;
    109115
    110116    for (int32 i = 0; i < smp_get_num_cpus(); i++) {
    111         thread = sRunQueue[i];
     117        thread = RunQueue(i);
    112118        kprintf("Run queue for cpu %ld (%ld threads)\n", i,
    113             sRunQueueSize[i]);
    114         if (sRunQueueSize[i] > 0) {
     119            RunQueueSize(i));
     120        if (RunQueueSize(i) > 0) {
    115121            kprintf("thread      id      priority  avg. quantum  name\n");
    116122            while (thread) {
    117123                kprintf("%p  %-7ld %-8ld  %-12ld  %s\n", thread, thread->id,
     
    126132}
    127133
    128134
     135void
     136display_topology()
     137{
     138    dprintf_no_syslog("Processor Topology Data\n");
     139    dprintf_no_syslog("Num\tCPU\tNumOnCore\tCore\tPackage\n");
     140    for (int32 i = 0; i < smp_get_num_cpus(); i++) {
     141        dprintf_no_syslog("%ld\t%d\t%d\t%d\t%d\n",
     142            i, gCPU[i].cpu_num, gCPU[i].cpu_num_in_core, gCPU[i].core_num,
     143            gCPU[i].package_num);
     144    }
     145}
     146
     147
    129148/*! Returns the most idle CPU based on the active time counters.
    130149    Note: thread lock must be held when entering this function
    131150*/
     
    136155    for (int32 i = 0; i < smp_get_num_cpus(); i++) {
    137156        if (gCPU[i].disabled)
    138157            continue;
    139         if (targetCPU < 0 || sRunQueueSize[i] < sRunQueueSize[targetCPU])
     158        if (targetCPU < 0 || RunQueueSize(i) < RunQueueSize(targetCPU))
    140159            targetCPU = i;
    141160    }
    142161
     
    153172    int32 targetCPU = -1;
    154173    if (thread->pinned_to_cpu > 0)
    155174        targetCPU = thread->previous_cpu->cpu_num;
    156     else if (thread->previous_cpu == NULL || thread->previous_cpu->disabled)
    157         targetCPU = affine_get_most_idle_cpu();
    158     else
     175    else if (thread->previous_cpu == NULL || thread->previous_cpu->disabled) {
     176        if (thread->team->preferred_cpu < 0)
     177            thread->team->preferred_cpu = affine_get_most_idle_cpu();
     178        targetCPU = thread->team->preferred_cpu;
     179    } else
    159180        targetCPU = thread->previous_cpu->cpu_num;
    160181
    161182    thread->state = thread->next_state = B_THREAD_READY;
     
    165186        sIdleThreads = thread;
    166187    } else {
    167188        Thread *curr, *prev;
    168         for (curr = sRunQueue[targetCPU], prev = NULL; curr
     189        for (curr = RunQueue(targetCPU), prev = NULL; curr
    169190            && curr->priority >= thread->next_priority;
    170191            curr = curr->queue_next) {
    171192            if (prev)
    172193                prev = prev->queue_next;
    173194            else
    174                 prev = sRunQueue[targetCPU];
     195                prev = RunQueue(targetCPU);
    175196        }
    176197
    177198        T(EnqueueThread(thread, prev, curr));
    178         sRunQueueSize[targetCPU]++;
     199        RunQueueSize(targetCPU)++;
    179200        thread->queue_next = curr;
    180201        if (prev)
    181202            prev->queue_next = thread;
    182203        else
    183             sRunQueue[targetCPU] = thread;
     204            RunQueue(targetCPU) = thread;
    184205
    185206        thread->scheduler_data->fLastQueue = targetCPU;
    186207    }
     
    213234        resultThread = prevThread->queue_next;
    214235        prevThread->queue_next = resultThread->queue_next;
    215236    } else {
    216         resultThread = sRunQueue[currentCPU];
    217         sRunQueue[currentCPU] = resultThread->queue_next;
     237        resultThread = RunQueue(currentCPU);
     238        RunQueue(currentCPU) = resultThread->queue_next;
    218239    }
    219     sRunQueueSize[currentCPU]--;
     240    RunQueueSize(currentCPU)--;
    220241    resultThread->scheduler_data->fLastQueue = -1;
    221242
    222243    return resultThread;
     
    239260    int32 targetCPU = -1;
    240261    for (int32 i = 0; i < smp_get_num_cpus(); i++) {
    241262        // skip CPUs that have either no or only one thread
    242         if (i == currentCPU || sRunQueueSize[i] < 2)
     263        if (i == currentCPU || RunQueueSize(i) < 2)
    243264            continue;
    244265
    245266        // out of the CPUs with threads available to steal,
    246267        // pick whichever one is generally the most CPU bound.
    247268        if (targetCPU < 0
    248             || sRunQueue[i]->priority > sRunQueue[targetCPU]->priority
    249             || (sRunQueue[i]->priority == sRunQueue[targetCPU]->priority
    250                 && sRunQueueSize[i] > sRunQueueSize[targetCPU]))
     269            || RunQueue(i)->priority > RunQueue(targetCPU)->priority
     270            || (RunQueue(i)->priority == RunQueue(targetCPU)->priority
     271                && RunQueueSize(i) > RunQueueSize(targetCPU)))
    251272            targetCPU = i;
    252273    }
    253274
    254275    if (targetCPU < 0)
    255276        return NULL;
    256277
    257     Thread* nextThread = sRunQueue[targetCPU];
     278    Thread* nextThread = RunQueue(targetCPU);
    258279    Thread* prevThread = NULL;
    259280
    260281    while (nextThread != NULL) {
     
    302323    Thread *item = NULL, *prev = NULL;
    303324    targetCPU = thread->scheduler_data->fLastQueue;
    304325
    305     for (item = sRunQueue[targetCPU], prev = NULL; item && item != thread;
     326    for (item = RunQueue(targetCPU), prev = NULL; item && item != thread;
    306327            item = item->queue_next) {
    307328        if (prev)
    308329            prev = prev->queue_next;
     
    373394
    374395    Thread *nextThread, *prevThread;
    375396
    376     TRACE(("reschedule(): cpu %ld, cur_thread = %ld\n", currentCPU, oldThread->id));
     397    TRACE(("reschedule(): cpu %ld, cur_thread = %ld\n", currentCPU,
     398        oldThread->id));
    377399
    378400    oldThread->state = oldThread->next_state;
    379401    switch (oldThread->next_state) {
    380402        case B_THREAD_RUNNING:
    381403        case B_THREAD_READY:
    382             TRACE(("enqueueing thread %ld into run q. pri = %ld\n", oldThread->id, oldThread->priority));
     404            TRACE(("enqueueing thread %ld into run q. pri = %ld\n",
     405                oldThread->id, oldThread->priority));
    383406            affine_enqueue_in_run_queue(oldThread);
    384407            break;
    385408        case B_THREAD_SUSPENDED:
     
    388411        case THREAD_STATE_FREE_ON_RESCHED:
    389412            break;
    390413        default:
    391             TRACE(("not enqueueing thread %ld into run q. next_state = %ld\n", oldThread->id, oldThread->next_state));
     414            TRACE(("not enqueueing thread %ld into run q. next_state = %ld\n",
     415                oldThread->id, oldThread->next_state));
    392416            break;
    393417    }
    394418
    395     nextThread = sRunQueue[currentCPU];
     419    nextThread = RunQueue(currentCPU);
    396420    prevThread = NULL;
    397421
    398     if (sRunQueue[currentCPU] != NULL) {
     422    if (RunQueue(currentCPU) != NULL) {
    399423        TRACE(("dequeueing next thread from cpu %ld\n", currentCPU));
    400424        // select next thread from the run queue
    401425        while (nextThread->queue_next) {
     
    487511            cancel_timer(quantumTimer);
    488512        oldThread->cpu->preempted = 0;
    489513
    490         // we do not adjust the quantum for the idle thread as it is going to be
    491         // preempted most of the time and would likely get the longer quantum
    492         // over time, indeed we use a smaller quantum to avoid running idle too
    493         // long
     514        // we do not adjust the quantum for the idle thread as it is going to
     515        // be preempted most of the time and would likely get the longer
     516        // quantum over time, indeed we use a smaller quantum to avoid running
     517        // idle too long
    494518        bigtime_t quantum = kMinThreadQuantum;
    495519        // give CPU-bound background threads a larger quantum size
    496520        // to minimize unnecessary context switches if the system is idle
     
    574598    memset(sRunQueueSize, 0, sizeof(sRunQueueSize));
    575599    add_debugger_command_etc("run_queue", &dump_run_queue,
    576600        "List threads in run queue", "\nLists threads in run queue", 0);
     601    // TODO: get topology info to initialize sCPUMap
     602    // we're assuming a homogenous topology for now
     603    // also we're just worried about HT not various levels of cache sharing
     604    if (gCPU[0].has_ht) {
     605        int maxcorenum = 0;
     606        for (int i = 0; i < smp_get_num_cpus(); i++) {
     607            if (gCPU[i].core_num > maxcorenum)
     608                maxcorenum = gCPU[i].core_num;
     609        }
     610        for (int i = 0; i < smp_get_num_cpus(); i++) {
     611            sCPUMap[i] = (maxcorenum + 1) * gCPU[i].package_num +
     612                gCPU[i].core_num;
     613        }
     614    } else {
     615        for (int i = 0; i < B_MAX_CPU_COUNT ; i++) {
     616            sCPUMap[i] = i;
     617        }
     618    }
     619#if 1
     620    display_topology();
     621#endif
    577622}
  • headers/private/kernel/arch/x86/arch_system_info.h

     
    1313#endif
    1414
    1515status_t get_current_cpuid(cpuid_info *info, uint32 eax);
     16status_t get_current_cpuid_ex(cpuid_info *info, uint32 eax, uint32 ecx = 0);
    1617uint32 get_eflags(void);
    1718void set_eflags(uint32 value);
    1819
  • headers/private/kernel/cpu.h

     
    3434/* CPU local data structure */
    3535
    3636typedef struct cpu_ent {
     37    // the logical cpu id
    3738    int             cpu_num;
    3839
     40    // the physical location of the logical cpu
     41    int             cpu_num_in_core;
     42    int             core_num;
     43    int             package_num;
     44    int             numa_num;
     45
     46    bool            has_ht;
     47
    3948    // thread.c: used to force a reschedule at quantum expiration time
    4049    int             preempted;
    4150    timer           quantum_timer;
  • headers/private/kernel/thread_types.h

     
    236236    struct list     dead_threads;
    237237    int             dead_threads_count;
    238238
     239    int32           preferred_cpu;  // soft affinity for the team (can be
     240                                    // overridden by setting a thread's hard
     241                                    // affinity
     242
    239243    // protected by the team's fLock
    240244    team_dead_children dead_children;
    241245    team_job_control_children stopped_children;
     
    429433    struct cpu_ent  *previous_cpu;  // protected by scheduler lock
    430434    int32           pinned_to_cpu;  // only accessed by this thread or in the
    431435                                    // scheduler, when thread is not running
     436    bool            affinity_hard;  // I think this is what pinned_to_cpu is
     437                                    // supposed to be but I'm not sure (Duggan)
    432438
    433439    sigset_t        sig_block_mask; // protected by scheduler lock,
    434440                                    // only modified by the thread itself