转载:UE5 中的computer shader使用 - 知乎 (zhihu.com)
目标
- 通过蓝图输入参数,经过Compture Shader做矩阵运算
流程
- 新建插件
- 插件设置
- 声明和GPU内存对齐的参数结构
- 声明Compture Shader结构
- 参数绑定
- 着色器实现
- 分配 work groups
- 计算和输出
- 额外添加参数
1. 新建插件
新建空白插件即可,正常插件创建流程,看官方文档,
2. 插件设置
XXX.Build.cs
PrivateDependencyModuleNames.AddRange(
new string[]
{
"CoreUObject",
"Engine",
"Renderer",
"RenderCore",
"RHI",
"Projects"
// ... add private dependencies that you statically link with here ...
}
);
XXX.uplugin
"Modules": [
{
"Name": "CS_Test",
"Type": "Runtime",
"LoadingPhase": "PostConfigInit"
}
]
3. 声明和GPU内存对齐的参数结构
struct CS_TEST_API FMySimpleComputeShaderDispatchParams
{
int X;
int Y;
int Z;
int Input[2];
int Output;
FMySimpleComputeShaderDispatchParams(int x, int y, int z)
: X(x)
, Y(y)
, Z(z)
{
}
};
4. 声明Compture Shader结构和参数绑定
MySimpleComputeShader.cpp
#include "MySimpleComputeShader.h"
#include "../../../Shaders/Public/MySimpleComputeShader.h"
#include "PixelShaderUtils.h"
#include "RenderCore/Public/RenderGraphUtils.h"
#include "MeshPassProcessor.inl"
#include "StaticMeshResources.h"
#include "DynamicMeshBuilder.h"
#include "RenderGraphResources.h"
#include "GlobalShader.h"
#include "UnifiedBuffer.h"
#include "CanvasTypes.h"
#include "MaterialShader.h"
DECLARE_STATS_GROUP(TEXT("MySimpleComputeShader"), STATGROUP_MySimpleComputeShader, STATCAT_Advanced);
DECLARE_CYCLE_STAT(TEXT("MySimpleComputeShader Execute"), STAT_MySimpleComputeShader_Execute, STATGROUP_MySimpleComputeShader);
// This class carries our parameter declarations and acts as the bridge between cpp and HLSL.
class CS_TEST_API FMySimpleComputeShader : public FGlobalShader
{
public:
DECLARE_GLOBAL_SHADER(FMySimpleComputeShader);
SHADER_USE_PARAMETER_STRUCT(FMySimpleComputeShader, FGlobalShader);
class FMySimpleComputeShader_Perm_TEST : SHADER_PERMUTATION_INT("TEST", 1);
using FPermutationDomain = TShaderPermutationDomain<
FMySimpleComputeShader_Perm_TEST
>;
BEGIN_SHADER_PARAMETER_STRUCT(FParameters, )
/*
* Here's where you define one or more of the input parameters for your shader.
* Some examples:
*/
// SHADER_PARAMETER(uint32, MyUint32) // On the shader side: uint32 MyUint32;
// SHADER_PARAMETER(FVector3f, MyVector) // On the shader side: float3 MyVector;
// SHADER_PARAMETER_TEXTURE(Texture2D, MyTexture) // On the shader side: Texture2D<float4> MyTexture; (float4 should be whatever you expect each pixel in the texture to be, in this case float4(R,G,B,A) for 4 channels)
// SHADER_PARAMETER_SAMPLER(SamplerState, MyTextureSampler) // On the shader side: SamplerState MySampler; // CPP side: TStaticSamplerState<ESamplerFilter::SF_Bilinear>::GetRHI();
// SHADER_PARAMETER_ARRAY(float, MyFloatArray, [3]) // On the shader side: float MyFloatArray[3];
// SHADER_PARAMETER_UAV(RWTexture2D<FVector4f>, MyTextureUAV) // On the shader side: RWTexture2D<float4> MyTextureUAV;
// SHADER_PARAMETER_UAV(RWStructuredBuffer<FMyCustomStruct>, MyCustomStructs) // On the shader side: RWStructuredBuffer<FMyCustomStruct> MyCustomStructs;
// SHADER_PARAMETER_UAV(RWBuffer<FMyCustomStruct>, MyCustomStructs) // On the shader side: RWBuffer<FMyCustomStruct> MyCustomStructs;
// SHADER_PARAMETER_SRV(StructuredBuffer<FMyCustomStruct>, MyCustomStructs) // On the shader side: StructuredBuffer<FMyCustomStruct> MyCustomStructs;
// SHADER_PARAMETER_SRV(Buffer<FMyCustomStruct>, MyCustomStructs) // On the shader side: Buffer<FMyCustomStruct> MyCustomStructs;
// SHADER_PARAMETER_SRV(Texture2D<FVector4f>, MyReadOnlyTexture) // On the shader side: Texture2D<float4> MyReadOnlyTexture;
// SHADER_PARAMETER_STRUCT_REF(FMyCustomStruct, MyCustomStruct)
SHADER_PARAMETER_RDG_BUFFER_SRV(Buffer<int>, Input)
SHADER_PARAMETER_RDG_BUFFER_UAV(RWBuffer<int>, Output)
END_SHADER_PARAMETER_STRUCT()
public:
static bool ShouldCompilePermutation(const FGlobalShaderPermutationParameters& Parameters)
{
const FPermutationDomain PermutationVector(Parameters.PermutationId);
return true;
}
static void ModifyCompilationEnvironment(const FGlobalShaderPermutationParameters& Parameters, FShaderCompilerEnvironment& OutEnvironment)
{
FGlobalShader::ModifyCompilationEnvironment(Parameters, OutEnvironment);
const FPermutationDomain PermutationVector(Parameters.PermutationId);
/*
* Here you define constants that can be used statically in the shader code.
* Example:
*/
// OutEnvironment.SetDefine(TEXT("MY_CUSTOM_CONST"), TEXT("1"));
/*
* These defines are used in the thread count section of our shader
*/
OutEnvironment.SetDefine(TEXT("THREADS_X"), NUM_THREADS_MySimpleComputeShader_X);
OutEnvironment.SetDefine(TEXT("THREADS_Y"), NUM_THREADS_MySimpleComputeShader_Y);
OutEnvironment.SetDefine(TEXT("THREADS_Z"), NUM_THREADS_MySimpleComputeShader_Z);
// This shader must support typed UAV load and we are testing if it is supported at runtime using RHIIsTypedUAVLoadSupported
//OutEnvironment.CompilerFlags.Add(CFLAG_AllowTypedUAVLoads);
// FForwardLightingParameters::ModifyCompilationEnvironment(Parameters.Platform, OutEnvironment);
}
private:
};
// This will tell the engine to create the shader and where the shader entry point is.
// ShaderType ShaderPath Shader function name Type
IMPLEMENT_GLOBAL_SHADER(FMySimpleComputeShader, "/Plugin/CS_Test/Private/MySimpleComputeShader.usf", "MySimpleComputeShader", SF_Compute);
void FMySimpleComputeShaderInterface::DispatchRenderThread(FRHICommandListImmediate& RHICmdList, FMySimpleComputeShaderDispatchParams Params, TFunction<void(int OutputVal)> AsyncCallback) {
FRDGBuilder GraphBuilder(RHICmdList);
{
SCOPE_CYCLE_COUNTER(STAT_MySimpleComputeShader_Execute);
DECLARE_GPU_STAT(MySimpleComputeShader)
RDG_EVENT_SCOPE(GraphBuilder, "MySimpleComputeShader");
RDG_GPU_STAT_SCOPE(GraphBuilder, MySimpleComputeShader);
typename FMySimpleComputeShader::FPermutationDomain PermutationVector;
// Add any static permutation options here
// PermutationVector.Set<FMySimpleComputeShader::FMyPermutationName>(12345);
TShaderMapRef<FMySimpleComputeShader> ComputeShader(GetGlobalShaderMap(GMaxRHIFeatureLevel), PermutationVector);
bool bIsShaderValid = ComputeShader.IsValid();
if (bIsShaderValid) {
FMySimpleComputeShader::FParameters* PassParameters = GraphBuilder.AllocParameters<FMySimpleComputeShader::FParameters>();
const void* RawData = (void*)Params.Input;
int NumInputs = 2;
int InputSize = sizeof(int);
FRDGBufferRef InputBuffer = CreateUploadBuffer(GraphBuilder, TEXT("InputBuffer"), InputSize, NumInputs, RawData, InputSize * NumInputs);
PassParameters->Input = GraphBuilder.CreateSRV(FRDGBufferSRVDesc(InputBuffer, PF_R32_SINT));
FRDGBufferRef OutputBuffer = GraphBuilder.CreateBuffer(
FRDGBufferDesc::CreateBufferDesc(sizeof(int32), 1),
TEXT("OutputBuffer"));
PassParameters->Output = GraphBuilder.CreateUAV(FRDGBufferUAVDesc(OutputBuffer, PF_R32_SINT));
auto GroupCount = FComputeShaderUtils::GetGroupCount(FIntVector(Params.X, Params.Y, Params.Z), FComputeShaderUtils::kGolden2DGroupSize);
GraphBuilder.AddPass(
RDG_EVENT_NAME("ExecuteMySimpleComputeShader"),
PassParameters,
ERDGPassFlags::AsyncCompute,
[&PassParameters, ComputeShader, GroupCount](FRHIComputeCommandList& RHICmdList)
{
FComputeShaderUtils::Dispatch(RHICmdList, ComputeShader, *PassParameters, GroupCount);
});
FRHIGPUBufferReadback* GPUBufferReadback = new FRHIGPUBufferReadback(TEXT("ExecuteMySimpleComputeShaderOutput"));
AddEnqueueCopyPass(GraphBuilder, GPUBufferReadback, OutputBuffer, 0u);
auto RunnerFunc = [GPUBufferReadback, AsyncCallback](auto&& RunnerFunc) -> void {
if (GPUBufferReadback->IsReady()) {
int32* Buffer = (int32*)GPUBufferReadback->Lock(1);
int OutVal = Buffer[0];
GPUBufferReadback->Unlock();
AsyncTask(ENamedThreads::GameThread, [AsyncCallback, OutVal]() {
AsyncCallback(OutVal);
});
delete GPUBufferReadback;
} else {
AsyncTask(ENamedThreads::ActualRenderingThread, [RunnerFunc]() {
RunnerFunc(RunnerFunc);
});
}
};
AsyncTask(ENamedThreads::ActualRenderingThread, [RunnerFunc]() {
RunnerFunc(RunnerFunc);
});
} else {
// We silently exit here as we don't want to crash the game if the shader is not found or has an error.
}
}
GraphBuilder.Execute();
}
MySimpleComputeShader.h
#pragma once
#include "CoreMinimal.h"
#include "GenericPlatform/GenericPlatformMisc.h"
#include "Kismet/BlueprintAsyncActionBase.h"
#include "MySimpleComputeShader.generated.h"
struct CS_TEST_API FMySimpleComputeShaderDispatchParams
{
int X;
int Y;
int Z;
int Input[2];
int Output;
FMySimpleComputeShaderDispatchParams(int x, int y, int z)
: X(x)
, Y(y)
, Z(z)
{
}
};
// This is a public interface that we define so outside code can invoke our compute shader.
class CS_TEST_API FMySimpleComputeShaderInterface {
public:
// Executes this shader on the render thread
static void DispatchRenderThread(
FRHICommandListImmediate& RHICmdList,
FMySimpleComputeShaderDispatchParams Params,
TFunction<void(int OutputVal)> AsyncCallback
);
// Executes this shader on the render thread from the game thread via EnqueueRenderThreadCommand
static void DispatchGameThread(
FMySimpleComputeShaderDispatchParams Params,
TFunction<void(int OutputVal)> AsyncCallback
)
{
ENQUEUE_RENDER_COMMAND(SceneDrawCompletion)(
[Params, AsyncCallback](FRHICommandListImmediate& RHICmdList)
{
DispatchRenderThread(RHICmdList, Params, AsyncCallback);
});
}
// Dispatches this shader. Can be called from any thread
static void Dispatch(
FMySimpleComputeShaderDispatchParams Params,
TFunction<void(int OutputVal)> AsyncCallback
)
{
if (IsInRenderingThread()) {
DispatchRenderThread(GetImmediateCommandList_ForRenderCommand(), Params, AsyncCallback);
}else{
DispatchGameThread(Params, AsyncCallback);
}
}
};
DECLARE_DYNAMIC_MULTICAST_DELEGATE_OneParam(FOnMySimpleComputeShaderLibrary_AsyncExecutionCompleted, const int, Value);
UCLASS() // Change the _API to match your project
class CS_TEST_API UMySimpleComputeShaderLibrary_AsyncExecution : public UBlueprintAsyncActionBase
{
GENERATED_BODY()
public:
// Execute the actual load
virtual void Activate() override {
// Create a dispatch parameters struct and fill it the input array with our args
FMySimpleComputeShaderDispatchParams Params(1, 1, 1);
Params.Input[0] = Arg1;
Params.Input[1] = Arg2;
// Dispatch the compute shader and wait until it completes
FMySimpleComputeShaderInterface::Dispatch(Params, [this](int OutputVal) {
this->Completed.Broadcast(OutputVal);
});
}
UFUNCTION(BlueprintCallable, meta = (BlueprintInternalUseOnly = "true", Category = "ComputeShader", WorldContext = "WorldContextObject"))
static UMySimpleComputeShaderLibrary_AsyncExecution* ExecuteBaseComputeShader(UObject* WorldContextObject, int Arg1, int Arg2) {
UMySimpleComputeShaderLibrary_AsyncExecution* Action = NewObject<UMySimpleComputeShaderLibrary_AsyncExecution>();
Action->Arg1 = Arg1;
Action->Arg2 = Arg2;
Action->RegisterWithGameInstance(WorldContextObject);
return Action;
}
UPROPERTY(BlueprintAssignable)
FOnMySimpleComputeShaderLibrary_AsyncExecutionCompleted Completed;
int Arg1;
int Arg2;
};
6. 着色器实现
MySimpleComputeShader.usf
#include "/Engine/Public/Platform.ush"
Buffer<int> Input;
RWBuffer<int> Output;
[numthreads(THREADS_X, THREADS_Y, THREADS_Z)]
void MySimpleComputeShader(
uint3 DispatchThreadId : SV_DispatchThreadID,
uint GroupIndex : SV_GroupIndex )
{
// Outputs one number
Output[0] = Input[0] * Input[1];
}
7. 分配 work groups
关于整个解释
[numthreads(THREADS_X, THREADS_Y, THREADS_Z)]
是在HLSL中分配计算空间的语法