//===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file provides the Win32 specific implementation of Threading functions. // //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Windows/WindowsSupport.h" #include #include // Windows will at times define MemoryFence. #ifdef MemoryFence #undef MemoryFence #endif static unsigned __stdcall threadFuncSync(void *Arg) { SyncThreadInfo *TI = static_cast(Arg); TI->UserFn(TI->UserData); return 0; } static unsigned __stdcall threadFuncAsync(void *Arg) { std::unique_ptr Info(static_cast(Arg)); (*Info)(); return 0; } static void llvm_execute_on_thread_impl(unsigned (__stdcall *ThreadFunc)(void *), void *Arg, llvm::Optional StackSizeInBytes, JoiningPolicy JP) { HANDLE hThread = (HANDLE)::_beginthreadex( NULL, StackSizeInBytes.getValueOr(0), ThreadFunc, Arg, 0, NULL); if (!hThread) { ReportLastErrorFatal("_beginthreadex failed"); } if (JP == JoiningPolicy::Join) { if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) { ReportLastErrorFatal("WaitForSingleObject failed"); } } if (::CloseHandle(hThread) == FALSE) { ReportLastErrorFatal("CloseHandle failed"); } } uint64_t llvm::get_threadid() { return uint64_t(::GetCurrentThreadId()); } uint32_t llvm::get_max_thread_name_length() { return 0; } #if defined(_MSC_VER) static void SetThreadName(DWORD Id, LPCSTR Name) { constexpr DWORD MS_VC_EXCEPTION = 0x406D1388; #pragma pack(push, 8) struct THREADNAME_INFO { DWORD dwType; // Must be 0x1000. LPCSTR szName; // Pointer to thread name DWORD dwThreadId; // Thread ID (-1 == current thread) DWORD dwFlags; // Reserved. Do not use. }; #pragma pack(pop) THREADNAME_INFO info; info.dwType = 0x1000; info.szName = Name; info.dwThreadId = Id; info.dwFlags = 0; __try { ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR), (ULONG_PTR *)&info); } __except (EXCEPTION_EXECUTE_HANDLER) { } } #endif void llvm::set_thread_name(const Twine &Name) { #if defined(_MSC_VER) // Make sure the input is null terminated. SmallString<64> Storage; StringRef NameStr = Name.toNullTerminatedStringRef(Storage); SetThreadName(::GetCurrentThreadId(), NameStr.data()); #endif } void llvm::get_thread_name(SmallVectorImpl &Name) { // "Name" is not an inherent property of a thread on Windows. In fact, when // you "set" the name, you are only firing a one-time message to a debugger // which it interprets as a program setting its threads' name. We may be // able to get fancy by creating a TLS entry when someone calls // set_thread_name so that subsequent calls to get_thread_name return this // value. Name.clear(); } SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) { // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority // Begin background processing mode. The system lowers the resource scheduling // priorities of the thread so that it can perform background work without // significantly affecting activity in the foreground. // End background processing mode. The system restores the resource scheduling // priorities of the thread as they were before the thread entered background // processing mode. return SetThreadPriority(GetCurrentThread(), Priority == ThreadPriority::Background ? THREAD_MODE_BACKGROUND_BEGIN : THREAD_MODE_BACKGROUND_END) ? SetThreadPriorityResult::SUCCESS : SetThreadPriorityResult::FAILURE; } struct ProcessorGroup { unsigned ID; unsigned AllThreads; unsigned UsableThreads; unsigned ThreadsPerCore; uint64_t Affinity; unsigned useableCores() const { return std::max(1U, UsableThreads / ThreadsPerCore); } }; template static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) { DWORD Len = 0; BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len); if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) { return false; } auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len); R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len); if (R) { auto *End = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len); for (auto *Curr = Info; Curr < End; Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr + Curr->Size)) { if (Curr->Relationship != Relationship) continue; Fn(Curr); } } free(Info); return true; } static ArrayRef getProcessorGroups() { auto computeGroups = []() { SmallVector Groups; auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) { GROUP_RELATIONSHIP &El = ProcInfo->Group; for (unsigned J = 0; J < El.ActiveGroupCount; ++J) { ProcessorGroup G; G.ID = Groups.size(); G.AllThreads = El.GroupInfo[J].MaximumProcessorCount; G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount; assert(G.UsableThreads <= 64); G.Affinity = El.GroupInfo[J].ActiveProcessorMask; Groups.push_back(G); } }; if (!IterateProcInfo(RelationGroup, HandleGroup)) return std::vector(); auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) { PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor; assert(El.GroupCount == 1); unsigned NumHyperThreads = 1; // If the flag is set, each core supports more than one hyper-thread. if (El.Flags & LTP_PC_SMT) NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count(); unsigned I = El.GroupMask[0].Group; Groups[I].ThreadsPerCore = NumHyperThreads; }; if (!IterateProcInfo(RelationProcessorCore, HandleProc)) return std::vector(); // If there's an affinity mask set, assume the user wants to constrain the // current process to only a single CPU group. On Windows, it is not // possible for affinity masks to cross CPU group boundaries. DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0; if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask, &SystemAffinityMask) && ProcessAffinityMask != SystemAffinityMask) { // We don't expect more that 4 CPU groups on Windows (256 processors). USHORT GroupCount = 4; USHORT GroupArray[4]{}; if (::GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount, GroupArray)) { assert(GroupCount == 1 && "On startup, a program is expected to be assigned only to " "one processor group!"); unsigned CurrentGroupID = GroupArray[0]; ProcessorGroup NewG{Groups[CurrentGroupID]}; NewG.Affinity = ProcessAffinityMask; NewG.UsableThreads = countPopulation(ProcessAffinityMask); Groups.clear(); Groups.push_back(NewG); } } return std::vector(Groups.begin(), Groups.end()); }; static auto Groups = computeGroups(); return ArrayRef(Groups); } template static unsigned aggregate(R &&Range, UnaryPredicate P) { unsigned I{}; for (const auto &It : Range) I += P(It); return I; } // for sys::getHostNumPhysicalCores int computeHostNumPhysicalCores() { static unsigned Cores = aggregate(getProcessorGroups(), [](const ProcessorGroup &G) { return G.UsableThreads / G.ThreadsPerCore; }); return Cores; } int computeHostNumHardwareThreads() { static unsigned Threads = aggregate(getProcessorGroups(), [](const ProcessorGroup &G) { return G.UsableThreads; }); return Threads; } // Finds the proper CPU socket where a thread number should go. Returns 'None' // if the thread shall remain on the actual CPU socket. Optional llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const { ArrayRef Groups = getProcessorGroups(); // Only one CPU socket in the system or process affinity was set, no need to // move the thread(s) to another CPU socket. if (Groups.size() <= 1) return None; // We ask for less threads than there are hardware threads per CPU socket, no // need to dispatch threads to other CPU sockets. unsigned MaxThreadsPerSocket = UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores(); if (compute_thread_count() <= MaxThreadsPerSocket) return None; assert(ThreadPoolNum < compute_thread_count() && "The thread index is not within thread strategy's range!"); // Assumes the same number of hardware threads per CPU socket. return (ThreadPoolNum * Groups.size()) / compute_thread_count(); } // Assign the current thread to a more appropriate CPU socket or CPU group void llvm::ThreadPoolStrategy::apply_thread_strategy( unsigned ThreadPoolNum) const { Optional Socket = compute_cpu_socket(ThreadPoolNum); if (!Socket) return; ArrayRef Groups = getProcessorGroups(); GROUP_AFFINITY Affinity{}; Affinity.Group = Groups[*Socket].ID; Affinity.Mask = Groups[*Socket].Affinity; SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr); } llvm::BitVector llvm::get_thread_affinity_mask() { GROUP_AFFINITY Affinity{}; GetThreadGroupAffinity(GetCurrentThread(), &Affinity); static unsigned All = aggregate(getProcessorGroups(), [](const ProcessorGroup &G) { return G.AllThreads; }); unsigned StartOffset = aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) { return G.ID < Affinity.Group ? G.AllThreads : 0; }); llvm::BitVector V; V.resize(All); for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) { if ((Affinity.Mask >> I) & 1) V.set(StartOffset + I); } return V; } unsigned llvm::get_cpus() { return getProcessorGroups().size(); }