/****************************************************************************** * sched_arinc653.c * * An ARINC653-compatible scheduling algorithm for use in Xen. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2010, DornerWorks, Ltd. */ #include #include #include #include #include #include #include #include #include #include /************************************************************************** * Private Macros * **************************************************************************/ /** * Default timeslice for domain 0. */ #define DEFAULT_TIMESLICE MILLISECS(10) /** * Retrieve the idle VCPU for a given physical CPU */ #define IDLETASK(cpu) (idle_vcpu[cpu]) /** * Return a pointer to the ARINC 653-specific scheduler data information * associated with the given VCPU (vc) */ #define AVCPU(vc) ((arinc653_vcpu_t *)(vc)->sched_priv) /** * Return the global scheduler private data given the scheduler ops pointer */ #define SCHED_PRIV(s) ((a653sched_priv_t *)((s)->sched_data)) /************************************************************************** * Private Type Definitions * **************************************************************************/ /** * The arinc653_vcpu_t structure holds ARINC 653-scheduler-specific * information for all non-idle VCPUs */ typedef struct arinc653_vcpu_s { /* vc points to Xen's struct vcpu so we can get to it from an * arinc653_vcpu_t pointer. */ struct vcpu * vc; /* awake holds whether the VCPU has been woken with vcpu_wake() */ bool_t awake; /* list holds the linked list information for the list this VCPU * is stored in */ struct list_head list; } arinc653_vcpu_t; /** * The sched_entry_t structure holds a single entry of the * ARINC 653 schedule. */ typedef struct sched_entry_s { /* dom_handle holds the handle ("UUID") for the domain that this * schedule entry refers to. */ xen_domain_handle_t dom_handle; /* vcpu_id holds the VCPU number for the VCPU that this schedule * entry refers to. */ int vcpu_id; /* runtime holds the number of nanoseconds that the VCPU for this * schedule entry should be allowed to run per major frame. */ s_time_t runtime; /* vc holds a pointer to the Xen VCPU structure */ struct vcpu * vc; } sched_entry_t; /** * This structure defines data that is global to an instance of the scheduler */ typedef struct a653sched_priv_s { /* lock for the whole pluggable scheduler, nests inside cpupool_lock */ spinlock_t lock; /** * This array holds the active ARINC 653 schedule. * * When the system tries to start a new VCPU, this schedule is scanned * to look for a matching (handle, VCPU #) pair. If both the handle (UUID) * and VCPU number match, then the VCPU is allowed to run. Its run time * (per major frame) is given in the third entry of the schedule. */ sched_entry_t schedule[ARINC653_MAX_DOMAINS_PER_SCHEDULE]; /** * This variable holds the number of entries that are valid in * the arinc653_schedule table. * * This is not necessarily the same as the number of domains in the * schedule. A domain could be listed multiple times within the schedule, * or a domain with multiple VCPUs could have a different * schedule entry for each VCPU. */ unsigned int num_schedule_entries; /** * the major frame time for the ARINC 653 schedule. */ s_time_t major_frame; /** * the time that the next major frame starts */ s_time_t next_major_frame; /** * pointers to all Xen VCPU structures for iterating through */ struct list_head vcpu_list; } a653sched_priv_t; /************************************************************************** * Helper functions * **************************************************************************/ /** * This function compares two domain handles. * * @param h1 Pointer to handle 1 * @param h2 Pointer to handle 2 * * @return
    *
  • <0: handle 1 is less than handle 2 *
  • 0: handle 1 is equal to handle 2 *
  • >0: handle 1 is greater than handle 2 *
*/ static int dom_handle_cmp(const xen_domain_handle_t h1, const xen_domain_handle_t h2) { return memcmp(h1, h2, sizeof(xen_domain_handle_t)); } /** * This function searches the vcpu list to find a VCPU that matches * the domain handle and VCPU ID specified. * * @param ops Pointer to this instance of the scheduler structure * @param handle Pointer to handler * @param vcpu_id VCPU ID * * @return
    *
  • Pointer to the matching VCPU if one is found *
  • NULL otherwise *
*/ static struct vcpu *find_vcpu( const struct scheduler *ops, xen_domain_handle_t handle, int vcpu_id) { arinc653_vcpu_t *avcpu; /* loop through the vcpu_list looking for the specified VCPU */ list_for_each_entry ( avcpu, &SCHED_PRIV(ops)->vcpu_list, list ) if ( (dom_handle_cmp(avcpu->vc->domain->handle, handle) == 0) && (vcpu_id == avcpu->vc->vcpu_id) ) return avcpu->vc; return NULL; } /** * This function updates the pointer to the Xen VCPU structure for each entry * in the ARINC 653 schedule. * * @param ops Pointer to this instance of the scheduler structure * @return */ static void update_schedule_vcpus(const struct scheduler *ops) { unsigned int i, n_entries = SCHED_PRIV(ops)->num_schedule_entries; for ( i = 0; i < n_entries; i++ ) SCHED_PRIV(ops)->schedule[i].vc = find_vcpu(ops, SCHED_PRIV(ops)->schedule[i].dom_handle, SCHED_PRIV(ops)->schedule[i].vcpu_id); } /** * This function is called by the adjust_global scheduler hook to put * in place a new ARINC653 schedule. * * @param ops Pointer to this instance of the scheduler structure * * @return
    *
  • 0 = success *
  • !0 = error *
*/ static int arinc653_sched_set( const struct scheduler *ops, struct xen_sysctl_arinc653_schedule *schedule) { a653sched_priv_t *sched_priv = SCHED_PRIV(ops); s_time_t total_runtime = 0; unsigned int i; unsigned long flags; int rc = -EINVAL; spin_lock_irqsave(&sched_priv->lock, flags); /* Check for valid major frame and number of schedule entries. */ if ( (schedule->major_frame <= 0) || (schedule->num_sched_entries < 1) || (schedule->num_sched_entries > ARINC653_MAX_DOMAINS_PER_SCHEDULE) ) goto fail; for ( i = 0; i < schedule->num_sched_entries; i++ ) { /* Check for a valid run time. */ if ( schedule->sched_entries[i].runtime <= 0 ) goto fail; /* Add this entry's run time to total run time. */ total_runtime += schedule->sched_entries[i].runtime; } /* * Error if the major frame is not large enough to run all entries as * indicated by comparing the total run time to the major frame length. */ if ( total_runtime > schedule->major_frame ) goto fail; /* Copy the new schedule into place. */ sched_priv->num_schedule_entries = schedule->num_sched_entries; sched_priv->major_frame = schedule->major_frame; for ( i = 0; i < schedule->num_sched_entries; i++ ) { memcpy(sched_priv->schedule[i].dom_handle, schedule->sched_entries[i].dom_handle, sizeof(sched_priv->schedule[i].dom_handle)); sched_priv->schedule[i].vcpu_id = schedule->sched_entries[i].vcpu_id; sched_priv->schedule[i].runtime = schedule->sched_entries[i].runtime; } update_schedule_vcpus(ops); /* * The newly-installed schedule takes effect immediately. We do not even * wait for the current major frame to expire. * * Signal a new major frame to begin. The next major frame is set up by * the do_schedule callback function when it is next invoked. */ sched_priv->next_major_frame = NOW(); rc = 0; fail: spin_unlock_irqrestore(&sched_priv->lock, flags); return rc; } /** * This function is called by the adjust_global scheduler hook to read the * current ARINC 653 schedule * * @param ops Pointer to this instance of the scheduler structure * @return
    *
  • 0 = success *
  • !0 = error *
*/ static int arinc653_sched_get( const struct scheduler *ops, struct xen_sysctl_arinc653_schedule *schedule) { a653sched_priv_t *sched_priv = SCHED_PRIV(ops); unsigned int i; unsigned long flags; spin_lock_irqsave(&sched_priv->lock, flags); schedule->num_sched_entries = sched_priv->num_schedule_entries; schedule->major_frame = sched_priv->major_frame; for ( i = 0; i < sched_priv->num_schedule_entries; i++ ) { memcpy(schedule->sched_entries[i].dom_handle, sched_priv->schedule[i].dom_handle, sizeof(sched_priv->schedule[i].dom_handle)); schedule->sched_entries[i].vcpu_id = sched_priv->schedule[i].vcpu_id; schedule->sched_entries[i].runtime = sched_priv->schedule[i].runtime; } spin_unlock_irqrestore(&sched_priv->lock, flags); return 0; } /************************************************************************** * Scheduler callback functions * **************************************************************************/ /** * This function performs initialization for an instance of the scheduler. * * @param ops Pointer to this instance of the scheduler structure * * @return
    *
  • 0 = success *
  • !0 = error *
*/ static int a653sched_init(struct scheduler *ops) { a653sched_priv_t *prv; prv = xzalloc(a653sched_priv_t); if ( prv == NULL ) return -ENOMEM; ops->sched_data = prv; prv->next_major_frame = 0; spin_lock_init(&prv->lock); INIT_LIST_HEAD(&prv->vcpu_list); return 0; } /** * This function performs deinitialization for an instance of the scheduler * * @param ops Pointer to this instance of the scheduler structure */ static void a653sched_deinit(struct scheduler *ops) { xfree(SCHED_PRIV(ops)); ops->sched_data = NULL; } /** * This function allocates scheduler-specific data for a VCPU * * @param ops Pointer to this instance of the scheduler structure * * @return Pointer to the allocated data */ static void * a653sched_alloc_vdata(const struct scheduler *ops, struct vcpu *vc, void *dd) { a653sched_priv_t *sched_priv = SCHED_PRIV(ops); arinc653_vcpu_t *svc; unsigned int entry; unsigned long flags; /* * Allocate memory for the ARINC 653-specific scheduler data information * associated with the given VCPU (vc). */ svc = xmalloc(arinc653_vcpu_t); if ( svc == NULL ) return NULL; spin_lock_irqsave(&sched_priv->lock, flags); /* * Add every one of dom0's vcpus to the schedule, as long as there are * slots available. */ if ( vc->domain->domain_id == 0 ) { entry = sched_priv->num_schedule_entries; if ( entry < ARINC653_MAX_DOMAINS_PER_SCHEDULE ) { sched_priv->schedule[entry].dom_handle[0] = '\0'; sched_priv->schedule[entry].vcpu_id = vc->vcpu_id; sched_priv->schedule[entry].runtime = DEFAULT_TIMESLICE; sched_priv->schedule[entry].vc = vc; sched_priv->major_frame += DEFAULT_TIMESLICE; ++sched_priv->num_schedule_entries; } } /* * Initialize our ARINC 653 scheduler-specific information for the VCPU. * The VCPU starts "asleep." When Xen is ready for the VCPU to run, it * will call the vcpu_wake scheduler callback function and our scheduler * will mark the VCPU awake. */ svc->vc = vc; svc->awake = 0; if ( !is_idle_vcpu(vc) ) list_add(&svc->list, &SCHED_PRIV(ops)->vcpu_list); update_schedule_vcpus(ops); spin_unlock_irqrestore(&sched_priv->lock, flags); return svc; } /** * This function frees scheduler-specific VCPU data * * @param ops Pointer to this instance of the scheduler structure */ static void a653sched_free_vdata(const struct scheduler *ops, void *priv) { arinc653_vcpu_t *av = priv; if (av == NULL) return; if ( !is_idle_vcpu(av->vc) ) list_del(&av->list); xfree(av); update_schedule_vcpus(ops); } /** * This function allocates scheduler-specific data for a domain * * We do not actually make use of any per-domain data but the hypervisor * expects a non-NULL return value * * @param ops Pointer to this instance of the scheduler structure * * @return Pointer to the allocated data */ static void * a653sched_alloc_domdata(const struct scheduler *ops, struct domain *dom) { /* return a non-NULL value to keep schedule.c happy */ return SCHED_PRIV(ops); } /** * This function frees scheduler-specific data for a domain * * @param ops Pointer to this instance of the scheduler structure */ static void a653sched_free_domdata(const struct scheduler *ops, void *data) { /* nop */ } /** * Xen scheduler callback function to sleep a VCPU * * @param ops Pointer to this instance of the scheduler structure * @param vc Pointer to the VCPU structure for the current domain */ static void a653sched_vcpu_sleep(const struct scheduler *ops, struct vcpu *vc) { if ( AVCPU(vc) != NULL ) AVCPU(vc)->awake = 0; /* * If the VCPU being put to sleep is the same one that is currently * running, raise a softirq to invoke the scheduler to switch domains. */ if ( per_cpu(schedule_data, vc->processor).curr == vc ) cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); } /** * Xen scheduler callback function to wake up a VCPU * * @param ops Pointer to this instance of the scheduler structure * @param vc Pointer to the VCPU structure for the current domain */ static void a653sched_vcpu_wake(const struct scheduler *ops, struct vcpu *vc) { if ( AVCPU(vc) != NULL ) AVCPU(vc)->awake = 1; cpu_raise_softirq(vc->processor, SCHEDULE_SOFTIRQ); } /** * Xen scheduler callback function to select a VCPU to run. * This is the main scheduler routine. * * @param ops Pointer to this instance of the scheduler structure * @param now Current time * * @return Address of the VCPU structure scheduled to be run next * Amount of time to execute the returned VCPU * Flag for whether the VCPU was migrated */ static struct task_slice a653sched_do_schedule( const struct scheduler *ops, s_time_t now, bool_t tasklet_work_scheduled) { struct task_slice ret; /* hold the chosen domain */ struct vcpu * new_task = NULL; static unsigned int sched_index = 0; static s_time_t next_switch_time; a653sched_priv_t *sched_priv = SCHED_PRIV(ops); const unsigned int cpu = smp_processor_id(); unsigned long flags; spin_lock_irqsave(&sched_priv->lock, flags); if ( sched_priv->num_schedule_entries < 1 ) sched_priv->next_major_frame = now + DEFAULT_TIMESLICE; else if ( now >= sched_priv->next_major_frame ) { /* time to enter a new major frame * the first time this function is called, this will be true */ /* start with the first domain in the schedule */ sched_index = 0; sched_priv->next_major_frame = now + sched_priv->major_frame; next_switch_time = now + sched_priv->schedule[0].runtime; } else { while ( (now >= next_switch_time) && (sched_index < sched_priv->num_schedule_entries) ) { /* time to switch to the next domain in this major frame */ sched_index++; next_switch_time += sched_priv->schedule[sched_index].runtime; } } /* * If we exhausted the domains in the schedule and still have time left * in the major frame then switch next at the next major frame. */ if ( sched_index >= sched_priv->num_schedule_entries ) next_switch_time = sched_priv->next_major_frame; /* * If there are more domains to run in the current major frame, set * new_task equal to the address of next domain's VCPU structure. * Otherwise, set new_task equal to the address of the idle task's VCPU * structure. */ new_task = (sched_index < sched_priv->num_schedule_entries) ? sched_priv->schedule[sched_index].vc : IDLETASK(cpu); /* Check to see if the new task can be run (awake & runnable). */ if ( !((new_task != NULL) && (AVCPU(new_task) != NULL) && AVCPU(new_task)->awake && vcpu_runnable(new_task)) ) new_task = IDLETASK(cpu); BUG_ON(new_task == NULL); /* * Check to make sure we did not miss a major frame. * This is a good test for robust partitioning. */ BUG_ON(now >= sched_priv->next_major_frame); spin_unlock_irqrestore(&sched_priv->lock, flags); /* Tasklet work (which runs in idle VCPU context) overrides all else. */ if ( tasklet_work_scheduled ) new_task = IDLETASK(cpu); /* Running this task would result in a migration */ if ( !is_idle_vcpu(new_task) && (new_task->processor != cpu) ) new_task = IDLETASK(cpu); /* * Return the amount of time the next domain has to run and the address * of the selected task's VCPU structure. */ ret.time = next_switch_time - now; ret.task = new_task; ret.migrated = 0; BUG_ON(ret.time <= 0); return ret; } /** * Xen scheduler callback function to select a CPU for the VCPU to run on * * @param ops Pointer to this instance of the scheduler structure * @param v Pointer to the VCPU structure for the current domain * * @return Number of selected physical CPU */ static int a653sched_pick_cpu(const struct scheduler *ops, struct vcpu *vc) { cpumask_t *online; unsigned int cpu; /* * If present, prefer vc's current processor, else * just find the first valid vcpu . */ online = cpupool_domain_cpumask(vc->domain); cpu = cpumask_first(online); if ( cpumask_test_cpu(vc->processor, online) || (cpu >= nr_cpu_ids) ) cpu = vc->processor; return cpu; } /** * Xen scheduler callback to change the scheduler of a cpu * * @param new_ops Pointer to this instance of the scheduler structure * @param cpu The cpu that is changing scheduler * @param pdata scheduler specific PCPU data (we don't have any) * @param vdata scheduler specific VCPU data of the idle vcpu */ static void a653_switch_sched(struct scheduler *new_ops, unsigned int cpu, void *pdata, void *vdata) { struct schedule_data *sd = &per_cpu(schedule_data, cpu); arinc653_vcpu_t *svc = vdata; ASSERT(!pdata && svc && is_idle_vcpu(svc->vc)); idle_vcpu[cpu]->sched_priv = vdata; per_cpu(scheduler, cpu) = new_ops; per_cpu(schedule_data, cpu).sched_priv = NULL; /* no pdata */ /* * (Re?)route the lock to its default location. We actually do not use * it, but if we leave it pointing to where it does now (i.e., the * runqueue lock for this PCPU in the default scheduler), we'd be * causing unnecessary contention on that lock (in cases where it is * shared among multiple PCPUs, like in Credit2 and RTDS). */ sd->schedule_lock = &sd->_lock; } /** * Xen scheduler callback function to perform a global (not domain-specific) * adjustment. It is used by the ARINC 653 scheduler to put in place a new * ARINC 653 schedule or to retrieve the schedule currently in place. * * @param ops Pointer to this instance of the scheduler structure * @param sc Pointer to the scheduler operation specified by Domain 0 */ static int a653sched_adjust_global(const struct scheduler *ops, struct xen_sysctl_scheduler_op *sc) { struct xen_sysctl_arinc653_schedule local_sched; int rc = -EINVAL; switch ( sc->cmd ) { case XEN_SYSCTL_SCHEDOP_putinfo: if ( copy_from_guest(&local_sched, sc->u.sched_arinc653.schedule, 1) ) { rc = -EFAULT; break; } rc = arinc653_sched_set(ops, &local_sched); break; case XEN_SYSCTL_SCHEDOP_getinfo: memset(&local_sched, -1, sizeof(local_sched)); rc = arinc653_sched_get(ops, &local_sched); if ( rc ) break; if ( copy_to_guest(sc->u.sched_arinc653.schedule, &local_sched, 1) ) rc = -EFAULT; break; } return rc; } /** * This structure defines our scheduler for Xen. * The entries tell Xen where to find our scheduler-specific * callback functions. * The symbol must be visible to the rest of Xen at link time. */ static const struct scheduler sched_arinc653_def = { .name = "ARINC 653 Scheduler", .opt_name = "arinc653", .sched_id = XEN_SCHEDULER_ARINC653, .sched_data = NULL, .init = a653sched_init, .deinit = a653sched_deinit, .free_vdata = a653sched_free_vdata, .alloc_vdata = a653sched_alloc_vdata, .free_domdata = a653sched_free_domdata, .alloc_domdata = a653sched_alloc_domdata, .init_domain = NULL, .destroy_domain = NULL, .insert_vcpu = NULL, .remove_vcpu = NULL, .sleep = a653sched_vcpu_sleep, .wake = a653sched_vcpu_wake, .yield = NULL, .context_saved = NULL, .do_schedule = a653sched_do_schedule, .pick_cpu = a653sched_pick_cpu, .switch_sched = a653_switch_sched, .adjust = NULL, .adjust_global = a653sched_adjust_global, .dump_settings = NULL, .dump_cpu_state = NULL, .tick_suspend = NULL, .tick_resume = NULL, }; REGISTER_SCHEDULER(sched_arinc653_def); /* * Local variables: * mode: C * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */