From 94fafe580e31ad6b3a83b343205b5ddfcae08745 Mon Sep 17 00:00:00 2001 From: Zu-shi Date: Tue, 22 Oct 2024 17:53:48 -0400 Subject: [PATCH] For NPU Sample, add flexibility in device creation options and fix Generic ML Device logic (#624) * For NPU Sample, add flexibility in device creation options and fix Generic ML Device logic Expand NPU sample's capabilities for creating devices based on attributes. More specifically, allow options to filter based on allowed, unallowed, and required attributes. Then add some flags for the most commonly needed options for this sample. The logic for how each interacts with CreateAdapterList is a bit unconventional since CreateAdapterList ANDs the passed in attributes. Tested locally on Intel NPU. * Fix comment typo in NPU Sample * Set D3D Feature Level Requirement to Generic, Optimize Adapter Selection * Add flags to filter for GENERIC_ML only without COMPUTE * Improve labelling of flags & lists for developer friendliness * Minor smaller PR feedback in comments & loop optimization * address initial PR feedback * Simplify adapter discovery logic * Update Package Versions, Simplify Device-Querying Logic * Address PR feedback * Additional PR feedback * Improve documentation to add context for NPU creation * minor fix to sample output * Address Initial PR feedback * Create adapter under COMPUTE feature level if CORE adapters are not availible * Add comments noting that HMODULEs should be freed after usage --------- Co-authored-by: Zu Shi --- README.md | 1 + .../DirectMLNpuInference.vcxproj | 8 +- Samples/DirectMLNpuInference/main.cpp | 110 +++++++++++++----- Samples/DirectMLNpuInference/packages.config | 2 +- 4 files changed, 86 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 5be72073..a198b0b1 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,7 @@ See the following sections for more information: DirectML C++ sample code is available under [Samples](./Samples). * [HelloDirectML](./Samples/HelloDirectML): A minimal "hello world" application that executes a single DirectML operator. +* [DirectMLNpuInference](./Samples\DirectMLNpuInference): A sample that showcases how to utilize NPU hardware with DirectML. * [DirectMLSuperResolution](./Samples/DirectMLSuperResolution): A sample that uses DirectML to execute a basic super-resolution model to upscale video from 540p to 1080p in real time. * [yolov4](./Samples/yolov4): YOLOv4 is an object detection model capable of recognizing up to 80 different classes of objects in an image. This sample contains a complete end-to-end implementation of the model using DirectML, and is able to run in real time on a user-provided video stream. diff --git a/Samples/DirectMLNpuInference/DirectMLNpuInference.vcxproj b/Samples/DirectMLNpuInference/DirectMLNpuInference.vcxproj index b55bce82..9e6c2da2 100644 --- a/Samples/DirectMLNpuInference/DirectMLNpuInference.vcxproj +++ b/Samples/DirectMLNpuInference/DirectMLNpuInference.vcxproj @@ -1,6 +1,6 @@ - + @@ -444,7 +444,7 @@ - + @@ -453,7 +453,7 @@ - - + + \ No newline at end of file diff --git a/Samples/DirectMLNpuInference/main.cpp b/Samples/DirectMLNpuInference/main.cpp index b958d373..ef68500b 100644 --- a/Samples/DirectMLNpuInference/main.cpp +++ b/Samples/DirectMLNpuInference/main.cpp @@ -1,4 +1,4 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. +// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include "pch.h" @@ -13,13 +13,52 @@ using Microsoft::WRL::ComPtr; -void InitializeDirectML(ID3D12Device1** d3dDeviceOut, ID3D12CommandQueue** commandQueueOut, IDMLDevice** dmlDeviceOut) { - // Whether to skip adapters which support Graphics in order to target NPU for testing - bool forceComputeOnlyDevice = true; - bool forceGenericMLDevice = false; - +bool TryGetProperty(IDXCoreAdapter* adapter, DXCoreAdapterProperty prop, std::string& outputValue) +{ + if (adapter->IsPropertySupported(prop)) + { + size_t propSize; + THROW_IF_FAILED(adapter->GetPropertySize(prop, &propSize)); + + outputValue.resize(propSize); + THROW_IF_FAILED(adapter->GetProperty(prop, propSize, outputValue.data())); + + // Trim any trailing nul characters. + while (!outputValue.empty() && outputValue.back() == '\0') + { + outputValue.pop_back(); + } + + return true; + } + return false; +} + +// Returns nullptr if not found. +void GetNonGraphicsAdapter(IDXCoreAdapterList* adapterList, IDXCoreAdapter** outAdapter) +{ + for (uint32_t i = 0, adapterCount = adapterList->GetAdapterCount(); i < adapterCount; i++) + { + ComPtr possibleAdapter; + THROW_IF_FAILED(adapterList->GetAdapter(i, IID_PPV_ARGS(&possibleAdapter))); + + if (!possibleAdapter->IsAttributeSupported(DXCORE_ADAPTER_ATTRIBUTE_D3D12_GRAPHICS)) + { + *outAdapter = possibleAdapter.Detach(); + return; + } + } + *outAdapter = nullptr; +} + +void InitializeDirectML(ID3D12Device1** d3dDeviceOut, ID3D12CommandQueue** commandQueueOut, IDMLDevice** dmlDeviceOut) +{ + // Create Adapter Factory ComPtr factory; + + // Note: this module is not currently properly freed. Outside of sample usage, this module should freed e.g. with an explicit free or through wil::unique_hmodule. HMODULE dxCoreModule = LoadLibraryW(L"DXCore.dll"); + if (dxCoreModule) { auto dxcoreCreateAdapterFactory = reinterpret_cast( @@ -30,40 +69,47 @@ void InitializeDirectML(ID3D12Device1** d3dDeviceOut, ID3D12CommandQueue** comma dxcoreCreateAdapterFactory(IID_PPV_ARGS(&factory)); } } - // Create the DXCore Adapter + + // Create the DXCore Adapter, for the purposes of selecting NPU we look for (!GRAPHICS && (GENERIC_ML || CORE_COMPUTE)) ComPtr adapter; + ComPtr adapterList; + D3D_FEATURE_LEVEL featureLevel = D3D_FEATURE_LEVEL_1_0_GENERIC; + if (factory) { - const GUID dxGUIDs[] = { DXCORE_ADAPTER_ATTRIBUTE_D3D12_CORE_COMPUTE }; - ComPtr adapterList; - THROW_IF_FAILED(factory->CreateAdapterList(ARRAYSIZE(dxGUIDs), dxGUIDs, IID_PPV_ARGS(&adapterList))); - for (uint32_t i = 0, adapterCount = adapterList->GetAdapterCount(); i < adapterCount; i++) + THROW_IF_FAILED(factory->CreateAdapterList(1, &DXCORE_ADAPTER_ATTRIBUTE_D3D12_GENERIC_ML, IID_PPV_ARGS(&adapterList))); + + if (adapterList->GetAdapterCount() > 0) { - ComPtr currentGpuAdapter; - THROW_IF_FAILED(adapterList->GetAdapter(static_cast(i), IID_PPV_ARGS(¤tGpuAdapter))); + GetNonGraphicsAdapter(adapterList.Get(), adapter.GetAddressOf()); + } + + if (!adapter) + { + featureLevel = D3D_FEATURE_LEVEL_1_0_CORE; + THROW_IF_FAILED(factory->CreateAdapterList(1, &DXCORE_ADAPTER_ATTRIBUTE_D3D12_CORE_COMPUTE, IID_PPV_ARGS(&adapterList))); + GetNonGraphicsAdapter(adapterList.Get(), adapter.GetAddressOf()); + } + } - if (!forceComputeOnlyDevice && !forceGenericMLDevice) - { - // No device restrictions - adapter = std::move(currentGpuAdapter); - break; - } - else if (forceComputeOnlyDevice && currentGpuAdapter->IsAttributeSupported(DXCORE_ADAPTER_ATTRIBUTE_D3D12_CORE_COMPUTE)) - { - adapter = std::move(currentGpuAdapter); - break; - } - else if (forceGenericMLDevice && currentGpuAdapter->IsAttributeSupported(DXCORE_ADAPTER_ATTRIBUTE_D3D12_GENERIC_ML)) - { - adapter = std::move(currentGpuAdapter); - break; - } + if (adapter) + { + std::string adapterName; + if (TryGetProperty(adapter.Get(), DXCoreAdapterProperty::DriverDescription, adapterName)) + { + printf("Successfully found adapter %s\n", adapterName.c_str()); + } + else + { + printf("Failed to get adapter description.\n"); } } + // Create the D3D12 Device ComPtr d3dDevice; if (adapter) { + // Note: this module is not currently properly freed. Outside of sample usage, this module should freed e.g. with an explicit free or through wil::unique_hmodule. HMODULE d3d12Module = LoadLibraryW(L"d3d12.dll"); if (d3d12Module) { @@ -72,10 +118,12 @@ void InitializeDirectML(ID3D12Device1** d3dDeviceOut, ID3D12CommandQueue** comma ); if (d3d12CreateDevice) { - THROW_IF_FAILED(d3d12CreateDevice(adapter.Get(), D3D_FEATURE_LEVEL_1_0_CORE, IID_PPV_ARGS(&d3dDevice))); + // The GENERIC feature level minimum allows for the creation of both compute only and generic ML devices. + THROW_IF_FAILED(d3d12CreateDevice(adapter.Get(), featureLevel, IID_PPV_ARGS(&d3dDevice))); } } } + // Create the DML Device and D3D12 Command Queue ComPtr dmlDevice; ComPtr commandQueue; @@ -86,6 +134,8 @@ void InitializeDirectML(ID3D12Device1** d3dDeviceOut, ID3D12CommandQueue** comma THROW_IF_FAILED(d3dDevice->CreateCommandQueue( &queueDesc, IID_PPV_ARGS(commandQueue.ReleaseAndGetAddressOf()))); + + // Note: this module is not currently properly freed. Outside of sample usage, this module should freed e.g. with an explicit free or through wil::unique_hmodule. HMODULE dmlModule = LoadLibraryW(L"DirectML.dll"); if (dmlModule) { diff --git a/Samples/DirectMLNpuInference/packages.config b/Samples/DirectMLNpuInference/packages.config index 5eca217a..b5c8e677 100644 --- a/Samples/DirectMLNpuInference/packages.config +++ b/Samples/DirectMLNpuInference/packages.config @@ -1,6 +1,6 @@  - + \ No newline at end of file