1 /******************************************************************************
2  * wait.c
3  *
4  * Sleep in hypervisor context for some event to occur.
5  *
6  * Copyright (c) 2010, Keir Fraser <keir@xen.org>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; If not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 #include <xen/sched.h>
23 #include <xen/wait.h>
24 #include <xen/errno.h>
25 
26 struct waitqueue_vcpu {
27     struct list_head list;
28     struct vcpu *vcpu;
29 #ifdef CONFIG_X86
30     /*
31      * Xen/x86 does not have per-vcpu hypervisor stacks. So we must save the
32      * hypervisor context before sleeping (descheduling), setjmp/longjmp-style.
33      */
34     void *esp;
35     char *stack;
36     cpumask_t saved_affinity;
37     unsigned int wakeup_cpu;
38 #endif
39 };
40 
init_waitqueue_vcpu(struct vcpu * v)41 int init_waitqueue_vcpu(struct vcpu *v)
42 {
43     struct waitqueue_vcpu *wqv;
44 
45     wqv = xzalloc(struct waitqueue_vcpu);
46     if ( wqv == NULL )
47         return -ENOMEM;
48 
49 #ifdef CONFIG_X86
50     wqv->stack = alloc_xenheap_page();
51     if ( wqv->stack == NULL )
52     {
53         xfree(wqv);
54         return -ENOMEM;
55     }
56 #endif
57 
58     INIT_LIST_HEAD(&wqv->list);
59     wqv->vcpu = v;
60 
61     v->waitqueue_vcpu = wqv;
62 
63     return 0;
64 }
65 
destroy_waitqueue_vcpu(struct vcpu * v)66 void destroy_waitqueue_vcpu(struct vcpu *v)
67 {
68     struct waitqueue_vcpu *wqv;
69 
70     wqv = v->waitqueue_vcpu;
71     if ( wqv == NULL )
72         return;
73 
74     BUG_ON(!list_empty(&wqv->list));
75 #ifdef CONFIG_X86
76     free_xenheap_page(wqv->stack);
77 #endif
78     xfree(wqv);
79 
80     v->waitqueue_vcpu = NULL;
81 }
82 
init_waitqueue_head(struct waitqueue_head * wq)83 void init_waitqueue_head(struct waitqueue_head *wq)
84 {
85     spin_lock_init(&wq->lock);
86     INIT_LIST_HEAD(&wq->list);
87 }
88 
destroy_waitqueue_head(struct waitqueue_head * wq)89 void destroy_waitqueue_head(struct waitqueue_head *wq)
90 {
91     wake_up_all(wq);
92 }
93 
wake_up_nr(struct waitqueue_head * wq,unsigned int nr)94 void wake_up_nr(struct waitqueue_head *wq, unsigned int nr)
95 {
96     struct waitqueue_vcpu *wqv;
97 
98     spin_lock(&wq->lock);
99 
100     while ( !list_empty(&wq->list) && nr-- )
101     {
102         wqv = list_entry(wq->list.next, struct waitqueue_vcpu, list);
103         list_del_init(&wqv->list);
104         vcpu_unpause(wqv->vcpu);
105         put_domain(wqv->vcpu->domain);
106     }
107 
108     spin_unlock(&wq->lock);
109 }
110 
wake_up_one(struct waitqueue_head * wq)111 void wake_up_one(struct waitqueue_head *wq)
112 {
113     wake_up_nr(wq, 1);
114 }
115 
wake_up_all(struct waitqueue_head * wq)116 void wake_up_all(struct waitqueue_head *wq)
117 {
118     wake_up_nr(wq, UINT_MAX);
119 }
120 
121 #ifdef CONFIG_X86
122 
__prepare_to_wait(struct waitqueue_vcpu * wqv)123 static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
124 {
125     struct cpu_info *cpu_info = get_cpu_info();
126     struct vcpu *curr = current;
127     unsigned long dummy;
128     u32 entry_vector = cpu_info->guest_cpu_user_regs.entry_vector;
129 
130     cpu_info->guest_cpu_user_regs.entry_vector &= ~TRAP_regs_partial;
131     ASSERT(wqv->esp == 0);
132 
133     /* Save current VCPU affinity; force wakeup on *this* CPU only. */
134     wqv->wakeup_cpu = smp_processor_id();
135     cpumask_copy(&wqv->saved_affinity, curr->cpu_hard_affinity);
136     if ( vcpu_set_hard_affinity(curr, cpumask_of(wqv->wakeup_cpu)) )
137     {
138         gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
139         domain_crash_synchronous();
140     }
141 
142     asm volatile (
143         "push %%rax; push %%rbx; push %%rdx; "
144         "push %%rbp; push %%r8; push %%r9; push %%r10; push %%r11; "
145         "push %%r12; push %%r13; push %%r14; push %%r15; call 1f; "
146         "1: addq $2f-1b,(%%rsp); sub %%esp,%%ecx; cmp %3,%%ecx; ja 3f; "
147         "mov %%rsp,%%rsi; 2: rep movsb; mov %%rsp,%%rsi; 3: pop %%rax; "
148         "pop %%r15; pop %%r14; pop %%r13; pop %%r12; "
149         "pop %%r11; pop %%r10; pop %%r9; pop %%r8; "
150         "pop %%rbp; pop %%rdx; pop %%rbx; pop %%rax"
151         : "=&S" (wqv->esp), "=&c" (dummy), "=&D" (dummy)
152         : "i" (PAGE_SIZE), "0" (0), "1" (cpu_info), "2" (wqv->stack)
153         : "memory" );
154 
155     if ( unlikely(wqv->esp == 0) )
156     {
157         gdprintk(XENLOG_ERR, "Stack too large in %s\n", __func__);
158         domain_crash_synchronous();
159     }
160 
161     cpu_info->guest_cpu_user_regs.entry_vector = entry_vector;
162 }
163 
__finish_wait(struct waitqueue_vcpu * wqv)164 static void __finish_wait(struct waitqueue_vcpu *wqv)
165 {
166     wqv->esp = NULL;
167     (void)vcpu_set_hard_affinity(current, &wqv->saved_affinity);
168 }
169 
check_wakeup_from_wait(void)170 void check_wakeup_from_wait(void)
171 {
172     struct waitqueue_vcpu *wqv = current->waitqueue_vcpu;
173 
174     ASSERT(list_empty(&wqv->list));
175 
176     if ( likely(wqv->esp == NULL) )
177         return;
178 
179     /* Check if we woke up on the wrong CPU. */
180     if ( unlikely(smp_processor_id() != wqv->wakeup_cpu) )
181     {
182         /* Re-set VCPU affinity and re-enter the scheduler. */
183         struct vcpu *curr = current;
184         cpumask_copy(&wqv->saved_affinity, curr->cpu_hard_affinity);
185         if ( vcpu_set_hard_affinity(curr, cpumask_of(wqv->wakeup_cpu)) )
186         {
187             gdprintk(XENLOG_ERR, "Unable to set vcpu affinity\n");
188             domain_crash_synchronous();
189         }
190         wait(); /* takes us back into the scheduler */
191     }
192 
193     asm volatile (
194         "mov %1,%%"__OP"sp; jmp *(%0)"
195         : : "S" (wqv->stack), "D" (wqv->esp),
196         "c" ((char *)get_cpu_info() - (char *)wqv->esp)
197         : "memory" );
198 }
199 
200 #else /* !CONFIG_X86 */
201 
202 #define __prepare_to_wait(wqv) ((void)0)
203 #define __finish_wait(wqv) ((void)0)
204 
205 #endif
206 
prepare_to_wait(struct waitqueue_head * wq)207 void prepare_to_wait(struct waitqueue_head *wq)
208 {
209     struct vcpu *curr = current;
210     struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
211 
212     ASSERT_NOT_IN_ATOMIC();
213     __prepare_to_wait(wqv);
214 
215     ASSERT(list_empty(&wqv->list));
216     spin_lock(&wq->lock);
217     list_add_tail(&wqv->list, &wq->list);
218     vcpu_pause_nosync(curr);
219     get_knownalive_domain(curr->domain);
220     spin_unlock(&wq->lock);
221 }
222 
finish_wait(struct waitqueue_head * wq)223 void finish_wait(struct waitqueue_head *wq)
224 {
225     struct vcpu *curr = current;
226     struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
227 
228     __finish_wait(wqv);
229 
230     if ( list_empty(&wqv->list) )
231         return;
232 
233     spin_lock(&wq->lock);
234     if ( !list_empty(&wqv->list) )
235     {
236         list_del_init(&wqv->list);
237         vcpu_unpause(curr);
238         put_domain(curr->domain);
239     }
240     spin_unlock(&wq->lock);
241 }
242