From: Jan Beulich Subject: evtchn: arrange for preemption in evtchn_reset() Like for evtchn_destroy() looping over all possible event channels to close them can take a significant amount of time. Unlike done there, we can't alter domain properties (i.e. d->valid_evtchns) here. Borrow, in a lightweight form, the paging domctl continuation concept, redirecting the continuations to different sub-ops. Just like there this is to be able to allow for predictable overall results of the involved sub-ops: Racing requests should either complete or be refused. Note that a domain can't interfere with an already started (by a remote domain) reset, due to being paused. It can prevent a remote reset from happening by leaving a reset unfinished, but that's only going to affect itself. This is part of XSA-344. Signed-off-by: Jan Beulich Acked-by: Julien Grall Reviewed-by: Stefano Stabellini --- a/xen/common/domain.c +++ b/xen/common/domain.c @@ -1214,7 +1214,7 @@ void domain_unpause_except_self(struct d domain_unpause(d); } -int domain_soft_reset(struct domain *d) +int domain_soft_reset(struct domain *d, bool resuming) { struct vcpu *v; int rc; @@ -1228,7 +1228,7 @@ int domain_soft_reset(struct domain *d) } spin_unlock(&d->shutdown_lock); - rc = evtchn_reset(d); + rc = evtchn_reset(d, resuming); if ( rc ) return rc; --- a/xen/common/domctl.c +++ b/xen/common/domctl.c @@ -572,12 +572,22 @@ long do_domctl(XEN_GUEST_HANDLE_PARAM(xe } case XEN_DOMCTL_soft_reset: + case XEN_DOMCTL_soft_reset_cont: if ( d == current->domain ) /* no domain_pause() */ { ret = -EINVAL; break; } - ret = domain_soft_reset(d); + ret = domain_soft_reset(d, op->cmd == XEN_DOMCTL_soft_reset_cont); + if ( ret == -ERESTART ) + { + op->cmd = XEN_DOMCTL_soft_reset_cont; + if ( !__copy_field_to_guest(u_domctl, op, cmd) ) + ret = hypercall_create_continuation(__HYPERVISOR_domctl, + "h", u_domctl); + else + ret = -EFAULT; + } break; case XEN_DOMCTL_destroydomain: --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -1057,7 +1057,7 @@ int evtchn_unmask(unsigned int port) return 0; } -int evtchn_reset(struct domain *d) +int evtchn_reset(struct domain *d, bool resuming) { unsigned int i; int rc = 0; @@ -1065,11 +1065,40 @@ int evtchn_reset(struct domain *d) if ( d != current->domain && !d->controller_pause_count ) return -EINVAL; - for ( i = 0; port_is_valid(d, i); i++ ) + spin_lock(&d->event_lock); + + /* + * If we are resuming, then start where we stopped. Otherwise, check + * that a reset operation is not already in progress, and if none is, + * record that this is now the case. + */ + i = resuming ? d->next_evtchn : !d->next_evtchn; + if ( i > d->next_evtchn ) + d->next_evtchn = i; + + spin_unlock(&d->event_lock); + + if ( !i ) + return -EBUSY; + + for ( ; port_is_valid(d, i); i++ ) + { evtchn_close(d, i, 1); + /* NB: Choice of frequency is arbitrary. */ + if ( !(i & 0x3f) && hypercall_preempt_check() ) + { + spin_lock(&d->event_lock); + d->next_evtchn = i; + spin_unlock(&d->event_lock); + return -ERESTART; + } + } + spin_lock(&d->event_lock); + d->next_evtchn = 0; + if ( d->active_evtchns > d->xen_evtchns ) rc = -EAGAIN; else if ( d->evtchn_fifo ) @@ -1204,7 +1233,8 @@ long do_event_channel_op(int cmd, XEN_GU break; } - case EVTCHNOP_reset: { + case EVTCHNOP_reset: + case EVTCHNOP_reset_cont: { struct evtchn_reset reset; struct domain *d; @@ -1217,9 +1247,13 @@ long do_event_channel_op(int cmd, XEN_GU rc = xsm_evtchn_reset(XSM_TARGET, current->domain, d); if ( !rc ) - rc = evtchn_reset(d); + rc = evtchn_reset(d, cmd == EVTCHNOP_reset_cont); rcu_unlock_domain(d); + + if ( rc == -ERESTART ) + rc = hypercall_create_continuation(__HYPERVISOR_event_channel_op, + "ih", EVTCHNOP_reset_cont, arg); break; } --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -1152,7 +1152,10 @@ struct xen_domctl { #define XEN_DOMCTL_iomem_permission 20 #define XEN_DOMCTL_ioport_permission 21 #define XEN_DOMCTL_hypercall_init 22 -#define XEN_DOMCTL_arch_setup 23 /* Obsolete IA64 only */ +#ifdef __XEN__ +/* #define XEN_DOMCTL_arch_setup 23 Obsolete IA64 only */ +#define XEN_DOMCTL_soft_reset_cont 23 +#endif #define XEN_DOMCTL_settimeoffset 24 #define XEN_DOMCTL_getvcpuaffinity 25 #define XEN_DOMCTL_real_mode_area 26 /* Obsolete PPC only */ --- a/xen/include/public/event_channel.h +++ b/xen/include/public/event_channel.h @@ -74,6 +74,9 @@ #define EVTCHNOP_init_control 11 #define EVTCHNOP_expand_array 12 #define EVTCHNOP_set_priority 13 +#ifdef __XEN__ +#define EVTCHNOP_reset_cont 14 +#endif /* ` } */ typedef uint32_t evtchn_port_t; --- a/xen/include/xen/event.h +++ b/xen/include/xen/event.h @@ -171,7 +171,7 @@ void evtchn_check_pollers(struct domain void evtchn_2l_init(struct domain *d); /* Close all event channels and reset to 2-level ABI. */ -int evtchn_reset(struct domain *d); +int evtchn_reset(struct domain *d, bool resuming); /* * Low-level event channel port ops. --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -394,6 +394,8 @@ struct domain * EVTCHNOP_reset). Read/write access like for active_evtchns. */ unsigned int xen_evtchns; + /* Port to resume from in evtchn_reset(), when in a continuation. */ + unsigned int next_evtchn; spinlock_t event_lock; const struct evtchn_port_ops *evtchn_port_ops; struct evtchn_fifo_domain *evtchn_fifo; @@ -663,7 +665,7 @@ int domain_shutdown(struct domain *d, u8 void domain_resume(struct domain *d); void domain_pause_for_debugger(void); -int domain_soft_reset(struct domain *d); +int domain_soft_reset(struct domain *d, bool resuming); int vcpu_start_shutdown_deferral(struct vcpu *v); void vcpu_end_shutdown_deferral(struct vcpu *v);