diff --git a/libcilkrts/runtime/os-unix.c b/libcilkrts/runtime/os-unix.c
index c419fb68abf..155d8669e91 100644
--- a/libcilkrts/runtime/os-unix.c
+++ b/libcilkrts/runtime/os-unix.c
@@ -55,6 +55,7 @@
 #if defined __linux__
 #   include <sys/sysinfo.h>
 #   include <sys/syscall.h>
+#   include <sched.h>
 
 #elif defined __APPLE__
 #   include <sys/sysctl.h>
@@ -452,28 +453,19 @@ COMMON_SYSDEP void __cilkrts_sleep(void)
 
 COMMON_SYSDEP void __cilkrts_yield(void)
 {
-#if defined(__ANDROID__)  || \
-    defined(__APPLE__)    || \
-    defined(__CYGWIN__)   || \
-    defined(__FreeBSD__)  || \
-    defined(__VXWORKS__)  || \
-    (defined(__sun__) && defined(__svr4__))
-    // Call sched_yield to yield quantum.  I'm not sure why we
-    // don't do this on Linux also.
-    sched_yield();
-#elif defined(__MIC__)
+#if defined(__MIC__)
     // On MIC, pthread_yield() really trashes things.  Arch's measurements
     // showed that calling _mm_delay_32() (or doing nothing) was a better
     // option.  Delaying 1024 clock cycles is a reasonable compromise between
     // giving up the processor and latency starting up when work becomes
     // available
     _mm_delay_32(1024);
-#elif defined(__linux__)
-    // On Linux, call pthread_yield (which in turn will call sched_yield)
-    // to yield quantum.
+#elif defined(__sun__) && !defined(__svr4__)
+    // On old SunOS call pthread_yield to yield a quantum.
     pthread_yield();
 #else
-# error "Unsupported architecture"
+    // On other platforms call sched_yield to yield a quantum.
+    sched_yield();
 #endif
 }