diff --git a/Common/GPU/D3D11/thin3d_d3d11.cpp b/Common/GPU/D3D11/thin3d_d3d11.cpp index 0014b5bb052d..b9f829e6c71a 100644 --- a/Common/GPU/D3D11/thin3d_d3d11.cpp +++ b/Common/GPU/D3D11/thin3d_d3d11.cpp @@ -26,6 +26,8 @@ namespace Draw { +static constexpr int MAX_BOUND_TEXTURES = 8; + // A problem is that we can't get the D3Dcompiler.dll without using a later SDK than 7.1, which was the last that // supported XP. A possible solution might be here: // https://tedwvc.wordpress.com/2014/01/01/how-to-target-xp-with-vc2012-or-vc2013-and-continue-to-use-the-windows-8-x-sdk/ @@ -1162,6 +1164,7 @@ void D3D11DrawContext::UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t } void D3D11DrawContext::BindVertexBuffers(int start, int count, Buffer **buffers, int *offsets) { + _assert_(start + count <= ARRAY_SIZE(nextVertexBuffers_)); // Lazy application for (int i = 0; i < count; i++) { D3D11Buffer *buf = (D3D11Buffer *)buffers[i]; @@ -1328,7 +1331,8 @@ Framebuffer *D3D11DrawContext::CreateFramebuffer(const FramebufferDesc &desc) { void D3D11DrawContext::BindTextures(int start, int count, Texture **textures) { // Collect the resource views from the textures. - ID3D11ShaderResourceView *views[8]; + ID3D11ShaderResourceView *views[MAX_BOUND_TEXTURES]; + _assert_(start + count <= ARRAY_SIZE(views)); for (int i = 0; i < count; i++) { D3D11Texture *tex = (D3D11Texture *)textures[i]; views[i] = tex ? tex->view : nullptr; @@ -1337,7 +1341,8 @@ void D3D11DrawContext::BindTextures(int start, int count, Texture **textures) { } void D3D11DrawContext::BindSamplerStates(int start, int count, SamplerState **states) { - ID3D11SamplerState *samplers[8]; + ID3D11SamplerState *samplers[MAX_BOUND_TEXTURES]; + _assert_(start + count <= ARRAY_SIZE(samplers)); for (int i = 0; i < count; i++) { D3D11SamplerState *samp = (D3D11SamplerState *)states[i]; samplers[i] = samp->ss; @@ -1613,6 +1618,7 @@ void D3D11DrawContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const Ren } void D3D11DrawContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) { + _assert_(binding < MAX_BOUND_TEXTURES); D3D11Framebuffer *fb = (D3D11Framebuffer *)fbo; switch (channelBit) { case FBChannel::FB_COLOR_BIT: diff --git a/Common/GPU/D3D9/thin3d_d3d9.cpp b/Common/GPU/D3D9/thin3d_d3d9.cpp index 79a0bb0dcaaa..39a8b9d5b704 100644 --- a/Common/GPU/D3D9/thin3d_d3d9.cpp +++ b/Common/GPU/D3D9/thin3d_d3d9.cpp @@ -28,6 +28,8 @@ namespace Draw { +static constexpr int MAX_BOUND_TEXTURES = 8; + // Could be declared as u8 static const D3DCMPFUNC compareToD3D9[] = { D3DCMP_NEVER, @@ -525,12 +527,14 @@ class D3D9Context : public DrawContext { void BindTextures(int start, int count, Texture **textures) override; void BindSamplerStates(int start, int count, SamplerState **states) override { + _assert_(start + count <= MAX_BOUND_TEXTURES); for (int i = 0; i < count; ++i) { D3D9SamplerState *s = static_cast(states[i]); s->Apply(device_, start + i); } } void BindVertexBuffers(int start, int count, Buffer **buffers, int *offsets) override { + _assert_(start + count <= ARRAY_SIZE(curVBuffers_)); for (int i = 0; i < count; i++) { curVBuffers_[i + start] = (D3D9Buffer *)buffers[i]; curVBufferOffsets_[i + start] = offsets ? offsets[i] : 0; @@ -785,6 +789,7 @@ Texture *D3D9Context::CreateTexture(const TextureDesc &desc) { } void D3D9Context::BindTextures(int start, int count, Texture **textures) { + _assert_(start + count <= MAX_BOUND_TEXTURES); for (int i = start; i < start + count; i++) { D3D9Texture *tex = static_cast(textures[i - start]); if (tex) { @@ -1182,6 +1187,7 @@ uintptr_t D3D9Context::GetFramebufferAPITexture(Framebuffer *fbo, int channelBit } void D3D9Context::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int color) { + _assert_(binding < MAX_BOUND_TEXTURES); D3D9Framebuffer *fb = (D3D9Framebuffer *)fbo; switch (channelBit) { case FB_DEPTH_BIT: diff --git a/Common/GPU/OpenGL/GLQueueRunner.cpp b/Common/GPU/OpenGL/GLQueueRunner.cpp index 3f3dec4c4d8b..2fe56bfac61c 100644 --- a/Common/GPU/OpenGL/GLQueueRunner.cpp +++ b/Common/GPU/OpenGL/GLQueueRunner.cpp @@ -17,7 +17,7 @@ #include "GLRenderManager.h" #include "DataFormatGL.h" -#define TEXCACHE_NAME_CACHE_SIZE 16 +static constexpr int TEXCACHE_NAME_CACHE_SIZE = 16; #if PPSSPP_PLATFORM(IOS) extern void bindDefaultFBO(); @@ -800,7 +800,7 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last GLuint blendEqColor = (GLuint)-1; GLuint blendEqAlpha = (GLuint)-1; - GLRTexture *curTex[8]{}; + GLRTexture *curTex[MAX_GL_TEXTURE_SLOTS]{}; CHECK_GL_ERROR_IF_DEBUG(); auto &commands = step.commands; diff --git a/Common/GPU/OpenGL/GLRenderManager.cpp b/Common/GPU/OpenGL/GLRenderManager.cpp index 6229aa676a43..1475d8d5ff4a 100644 --- a/Common/GPU/OpenGL/GLRenderManager.cpp +++ b/Common/GPU/OpenGL/GLRenderManager.cpp @@ -357,6 +357,7 @@ void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRende void GLRenderManager::BindFramebufferAsTexture(GLRFramebuffer *fb, int binding, int aspectBit, int attachment) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); + _dbg_assert_(binding < MAX_GL_TEXTURE_SLOTS); GLRRenderData data{ GLRRenderCommand::BIND_FB_TEXTURE }; data.bind_fb_texture.slot = binding; data.bind_fb_texture.framebuffer = fb; diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h index 69cd0e9d37a0..9edd5af35cf8 100644 --- a/Common/GPU/OpenGL/GLRenderManager.h +++ b/Common/GPU/OpenGL/GLRenderManager.h @@ -21,6 +21,8 @@ namespace Draw { class DrawContext; } +constexpr int MAX_GL_TEXTURE_SLOTS = 8; + class GLRTexture { public: GLRTexture(int width, int height, int numMips); @@ -571,8 +573,8 @@ class GLRenderManager { void BindTexture(int slot, GLRTexture *tex) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); + _dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS); GLRRenderData data{ GLRRenderCommand::BINDTEXTURE }; - _dbg_assert_(slot < 16); data.texture.slot = slot; data.texture.texture = tex; curRenderStep_->commands.push_back(data); @@ -824,6 +826,7 @@ class GLRenderManager { // Modifies the current texture as per GL specs, not global state. void SetTextureSampler(int slot, GLenum wrapS, GLenum wrapT, GLenum magFilter, GLenum minFilter, float anisotropy) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); + _dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS); GLRRenderData data{ GLRRenderCommand::TEXTURESAMPLER }; data.textureSampler.slot = slot; data.textureSampler.wrapS = wrapS; @@ -836,6 +839,7 @@ class GLRenderManager { void SetTextureLod(int slot, float minLod, float maxLod, float lodBias) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER); + _dbg_assert_(slot < MAX_GL_TEXTURE_SLOTS); GLRRenderData data{ GLRRenderCommand::TEXTURELOD}; data.textureLod.slot = slot; data.textureLod.minLod = minLod; diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index 454cde20f502..161c04eecfaf 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -299,7 +299,7 @@ class OpenGLPipeline : public Pipeline { // TODO: Optimize by getting the locations first and putting in a custom struct UniformBufferDesc dynamicUniforms; - GLint samplerLocs_[8]{}; + GLint samplerLocs_[MAX_TEXTURE_SLOTS]{}; std::vector dynamicUniformLocs_; GLRProgram *program_ = nullptr; @@ -368,9 +368,7 @@ class OpenGLContext : public DrawContext { void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) override; void BindSamplerStates(int start, int count, SamplerState **states) override { - if (start + count > MAX_TEXTURE_SLOTS) { - return; - } + _assert_(start + count <= MAX_TEXTURE_SLOTS); for (int i = 0; i < count; i++) { int index = i + start; boundSamplers_[index] = static_cast(states[i]); @@ -402,6 +400,7 @@ class OpenGLContext : public DrawContext { void BindTextures(int start, int count, Texture **textures) override; void BindPipeline(Pipeline *pipeline) override; void BindVertexBuffers(int start, int count, Buffer **buffers, int *offsets) override { + _assert_(start + count <= ARRAY_SIZE(curVBuffers_)); for (int i = 0; i < count; i++) { curVBuffers_[i + start] = (OpenGLBuffer *)buffers[i]; curVBufferOffsets_[i + start] = offsets ? offsets[i] : 0; @@ -1070,9 +1069,7 @@ Pipeline *OpenGLContext::CreateGraphicsPipeline(const PipelineDesc &desc) { } void OpenGLContext::BindTextures(int start, int count, Texture **textures) { - if (start + count > MAX_TEXTURE_SLOTS) { - return; - } + _assert_(start + count <= MAX_TEXTURE_SLOTS); for (int i = start; i < start + count; i++) { OpenGLTexture *glTex = static_cast(textures[i - start]); if (!glTex) { @@ -1151,12 +1148,14 @@ bool OpenGLPipeline::LinkShaders() { std::vector queries; queries.push_back({ &samplerLocs_[0], "sampler0" }); queries.push_back({ &samplerLocs_[1], "sampler1" }); + queries.push_back({ &samplerLocs_[2], "sampler2" }); + _assert_(queries.size() >= MAX_TEXTURE_SLOTS); for (size_t i = 0; i < dynamicUniforms.uniforms.size(); ++i) { queries.push_back({ &dynamicUniformLocs_[i], dynamicUniforms.uniforms[i].name }); } std::vector initialize; - initialize.push_back({ &samplerLocs_[0], 0, 0 }); - initialize.push_back({ &samplerLocs_[1], 0, 1 }); + for (int i = 0; i < MAX_TEXTURE_SLOTS; ++i) + initialize.push_back({ &samplerLocs_[i], 0, i }); program_ = render_->CreateProgram(linkShaders, semantics, queries, initialize, false); return true; } @@ -1354,6 +1353,7 @@ bool OpenGLContext::BlitFramebuffer(Framebuffer *fbsrc, int srcX1, int srcY1, in void OpenGLContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int color) { OpenGLFramebuffer *fb = (OpenGLFramebuffer *)fbo; + _assert_(binding < MAX_TEXTURE_SLOTS); GLuint aspect = 0; if (channelBit & FB_COLOR_BIT) { diff --git a/Common/GPU/ShaderTranslation.cpp b/Common/GPU/ShaderTranslation.cpp index 5d92d2fd7d19..34de21345134 100644 --- a/Common/GPU/ShaderTranslation.cpp +++ b/Common/GPU/ShaderTranslation.cpp @@ -85,6 +85,7 @@ cbuffer data : register(b0) { float2 u_texelDelta; float2 u_pixelDelta; float4 u_time; + float4 u_timeDelta; float4 u_setting; float u_video; }; @@ -101,6 +102,7 @@ layout (std140, set = 0, binding = 0) uniform Data { vec2 u_texelDelta; vec2 u_pixelDelta; vec4 u_time; + vec4 u_timeDelta; vec4 u_setting; float u_video; }; @@ -111,8 +113,9 @@ float4 gl_HalfPixel : register(c0); float2 u_texelDelta : register(c1); float2 u_pixelDelta : register(c2); float4 u_time : register(c3); -float4 u_setting : register(c4); -float u_video : register(c5); +float4 u_timeDelta : register(c4); +float4 u_setting : register(c5); +float u_video : register(c6); )"; // SPIRV-Cross' HLSL output has some deficiencies we need to work around. @@ -135,12 +138,9 @@ std::string Postprocess(std::string code, ShaderLanguage lang, ShaderStage stage std::string line; std::stringstream instream(code); while (std::getline(instream, line)) { - if (line == "uniform sampler2D sampler0;" && lang == HLSL_D3D9) { - out << "sampler2D sampler0 : register(s0);\n"; - continue; - } - if (line == "uniform sampler2D sampler1;" && lang == HLSL_D3D9) { - out << "sampler2D sampler1 : register(s1);\n"; + int num; + if (lang == HLSL_D3D9 && sscanf(line.c_str(), "uniform sampler2D sampler%d;", &num) == 1) { + out << "sampler2D sampler" << num << " : register(s" << num << ");\n"; continue; } if (line.find("uniform float") != std::string::npos) { @@ -184,7 +184,9 @@ bool ConvertToVulkanGLSL(std::string *dest, TranslatedShaderMetadata *destMetada if (line.find("uniform bool") != std::string::npos) { continue; } else if (line.find("uniform sampler2D") == 0) { - if (line.find("sampler0") != line.npos) + if (sscanf(line.c_str(), "uniform sampler2D sampler%d", &num) == 1) + line = StringFromFormat("layout(set = 0, binding = %d) ", num + 1) + line; + else if (line.find("sampler0") != line.npos) line = "layout(set = 0, binding = 1) " + line; else line = "layout(set = 0, binding = 2) " + line; @@ -299,8 +301,11 @@ bool TranslateShader(std::string *dest, ShaderLanguage destLang, const ShaderLan int i = 0; for (auto &resource : resources.sampled_images) { - // int location = hlsl.get_decoration(resource.id, spv::DecorationLocation); - hlsl.set_decoration(resource.id, spv::DecorationLocation, i); + const std::string &name = hlsl.get_name(resource.id); + int num; + if (sscanf(name.c_str(), "sampler%d", &num) != 1) + num = i; + hlsl.set_decoration(resource.id, spv::DecorationBinding, num); i++; } spirv_cross::CompilerHLSL::Options options{}; diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index 054096f37766..f68132f5e4ed 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -295,7 +295,7 @@ class VKBuffer; class VKSamplerState; enum { - MAX_BOUND_TEXTURES = 2 + MAX_BOUND_TEXTURES = MAX_TEXTURE_SLOTS, }; struct DescriptorSetKey { @@ -416,6 +416,7 @@ class VKContext : public DrawContext { // TODO: Make VKBuffers proper buffers, and do a proper binding model. This is just silly. void BindVertexBuffers(int start, int count, Buffer **buffers, int *offsets) override { + _assert_(start + count <= ARRAY_SIZE(curVBuffers_)); for (int i = 0; i < count; i++) { curVBuffers_[i + start] = (VKBuffer *)buffers[i]; curVBufferOffsets_[i + start] = offsets ? offsets[i] : 0; @@ -689,6 +690,7 @@ RasterState *VKContext::CreateRasterState(const RasterStateDesc &desc) { } void VKContext::BindSamplerStates(int start, int count, SamplerState **state) { + _assert_(start + count <= MAX_BOUND_TEXTURES); for (int i = start; i < start + count; i++) { boundSamplers_[i] = (VKSamplerState *)state[i - start]; } @@ -1271,6 +1273,7 @@ void VKContext::UpdateBuffer(Buffer *buffer, const uint8_t *data, size_t offset, } void VKContext::BindTextures(int start, int count, Texture **textures) { + _assert_(start + count <= MAX_BOUND_TEXTURES); for (int i = start; i < start + count; i++) { boundTextures_[i] = static_cast(textures[i - start]); boundImageView_[i] = boundTextures_[i] ? boundTextures_[i]->GetImageView() : GetNullTexture()->GetImageView(); @@ -1544,6 +1547,7 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) { VKFramebuffer *fb = (VKFramebuffer *)fbo; + _assert_(binding < MAX_BOUND_TEXTURES); // TODO: There are cases where this is okay, actually. But requires layout transitions and stuff - // we're not ready for this. diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index 9ee285c80452..853ed27542b4 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -278,7 +278,7 @@ enum class Event { PRESENTED, }; -constexpr uint32_t MAX_TEXTURE_SLOTS = 2; +constexpr uint32_t MAX_TEXTURE_SLOTS = 3; struct FramebufferDesc { int width; diff --git a/Core/Font/PGF.cpp b/Core/Font/PGF.cpp index c1a880257179..20315f4c016d 100644 --- a/Core/Font/PGF.cpp +++ b/Core/Font/PGF.cpp @@ -230,14 +230,14 @@ bool PGF::ReadPtr(const u8 *ptr, size_t dataSize) { const u8 *uptr = (const u8 *)wptr; - if (uptr >= startPtr + dataSize) { - return false; - } - int shadowCharMapSize = ((header.shadowMapLength * header.shadowMapBpe + 31) & ~31) / 8; const u8 *shadowCharMap = uptr; uptr += shadowCharMapSize; + if (uptr < startPtr || uptr >= startPtr + dataSize) { + return false; + } + const u16_le *sptr = (const u16_le *)uptr; if (header.revision == 3) { charmapCompressionTable1[0].resize(rev3extra.compCharMapLength1); @@ -257,10 +257,6 @@ bool PGF::ReadPtr(const u8 *ptr, size_t dataSize) { uptr = (const u8 *)sptr; - if (uptr >= startPtr + dataSize) { - return false; - } - int charMapSize = ((header.charMapLength * header.charMapBpe + 31) & ~31) / 8; const u8 *charMap = uptr; uptr += charMapSize; @@ -269,6 +265,10 @@ bool PGF::ReadPtr(const u8 *ptr, size_t dataSize) { const u8 *charPointerTable = uptr; uptr += charPointerSize; + if (uptr < startPtr || uptr >= startPtr + dataSize) { + return false; + } + // PGF Fontdata. u32 fontDataOffset = (u32)(uptr - startPtr); diff --git a/GPU/Common/PostShader.cpp b/GPU/Common/PostShader.cpp index c14754ff321d..96e4405b36f6 100644 --- a/GPU/Common/PostShader.cpp +++ b/GPU/Common/PostShader.cpp @@ -127,6 +127,7 @@ void LoadPostShaderInfo(const std::vector &directories) { section.Get("Upscaling", &info.isUpscalingFilter, false); section.Get("SSAA", &info.SSAAFilterLevel, 0); section.Get("60fps", &info.requires60fps, false); + section.Get("UsePreviousFrame", &info.usePreviousFrame, false); if (info.parent == "Off") info.parent = ""; diff --git a/GPU/Common/PostShader.h b/GPU/Common/PostShader.h index 76c2e8ce6fea..5dfad5c03f87 100644 --- a/GPU/Common/PostShader.h +++ b/GPU/Common/PostShader.h @@ -43,6 +43,8 @@ struct ShaderInfo { int SSAAFilterLevel; // Force constant/max refresh for animated filters bool requires60fps; + // Takes previous frame as input (for blending effects.) + bool usePreviousFrame; struct Setting { std::string name; diff --git a/GPU/Common/PresentationCommon.cpp b/GPU/Common/PresentationCommon.cpp index 59590e8401be..b6e9ecbdfc0e 100644 --- a/GPU/Common/PresentationCommon.cpp +++ b/GPU/Common/PresentationCommon.cpp @@ -185,6 +185,10 @@ void PresentationCommon::CalculatePostShaderUniforms(int bufferWidth, int buffer uniforms->pixelDelta[0] = u_pixel_delta; uniforms->pixelDelta[1] = v_pixel_delta; memcpy(uniforms->time, time, 4 * sizeof(float)); + uniforms->timeDelta[0] = time[0] - previousUniforms_.time[0]; + uniforms->timeDelta[1] = (time[2] - previousUniforms_.time[2]) * (1.0f / 60.0f); + uniforms->timeDelta[2] = time[2] - previousUniforms_.time[2]; + uniforms->timeDelta[3] = time[3] != previousUniforms_.time[3] ? 1.0f : 0.0f; uniforms->video = hasVideo_ ? 1.0f : 0.0f; // The shader translator tacks this onto our shaders, if we don't set it they render garbage. @@ -222,12 +226,35 @@ bool PresentationCommon::UpdatePostShader() { if (shaderInfo.empty()) return false; + bool usePreviousFrame = false; + bool usePreviousAtOutputResolution = false; for (int i = 0; i < shaderInfo.size(); ++i) { const ShaderInfo *next = i + 1 < shaderInfo.size() ? shaderInfo[i + 1] : nullptr; if (!BuildPostShader(shaderInfo[i], next)) { DestroyPostShader(); return false; } + if (shaderInfo[i]->usePreviousFrame) { + usePreviousFrame = true; + usePreviousAtOutputResolution = shaderInfo[i]->outputResolution; + } + } + + if (usePreviousFrame) { + int w = usePreviousAtOutputResolution ? pixelWidth_ : renderWidth_; + int h = usePreviousAtOutputResolution ? pixelHeight_ : renderHeight_; + + static constexpr int FRAMES = 2; + previousFramebuffers_.resize(FRAMES); + previousIndex_ = 0; + + for (int i = 0; i < FRAMES; ++i) { + previousFramebuffers_[i] = draw_->CreateFramebuffer({ w, h, 1, 1, false, "inter_presentation" }); + if (!previousFramebuffers_[i]) { + DestroyPostShader(); + return false; + } + } } usePostShader_ = true; @@ -263,8 +290,9 @@ bool PresentationCommon::BuildPostShader(const ShaderInfo *shaderInfo, const Sha { "u_texelDelta", 1, 1, UniformType::FLOAT2, offsetof(PostShaderUniforms, texelDelta) }, { "u_pixelDelta", 2, 2, UniformType::FLOAT2, offsetof(PostShaderUniforms, pixelDelta) }, { "u_time", 3, 3, UniformType::FLOAT4, offsetof(PostShaderUniforms, time) }, - { "u_setting", 4, 4, UniformType::FLOAT4, offsetof(PostShaderUniforms, setting) }, - { "u_video", 5, 5, UniformType::FLOAT1, offsetof(PostShaderUniforms, video) }, + { "u_timeDelta", 4, 4, UniformType::FLOAT4, offsetof(PostShaderUniforms, timeDelta) }, + { "u_setting", 5, 5, UniformType::FLOAT4, offsetof(PostShaderUniforms, setting) }, + { "u_video", 6, 6, UniformType::FLOAT1, offsetof(PostShaderUniforms, video) }, } }; Draw::Pipeline *pipeline = CreatePipeline({ vs, fs }, true, &postShaderDesc); @@ -467,6 +495,7 @@ void PresentationCommon::DestroyPostShader() { DoReleaseVector(postShaderModules_); DoReleaseVector(postShaderPipelines_); DoReleaseVector(postShaderFramebuffers_); + DoReleaseVector(previousFramebuffers_); postShaderInfo_.clear(); postShaderFBOUsage_.clear(); } @@ -531,7 +560,7 @@ void PresentationCommon::CopyToOutput(OutputFlags flags, int uvRotation, float u bool useNearest = flags & OutputFlags::NEAREST; const bool usePostShader = usePostShader_ && !(flags & OutputFlags::RB_SWIZZLE); const bool isFinalAtOutputResolution = usePostShader && postShaderFramebuffers_.size() < postShaderPipelines_.size(); - bool usePostShaderOutput = false; + Draw::Framebuffer *postShaderOutput = nullptr; int lastWidth = srcWidth_; int lastHeight = srcHeight_; @@ -615,6 +644,47 @@ void PresentationCommon::CopyToOutput(OutputFlags flags, int uvRotation, float u } } + // Grab the previous framebuffer early so we can change previousIndex_ when we want. + Draw::Framebuffer *previousFramebuffer = previousFramebuffers_.empty() ? nullptr : previousFramebuffers_[previousIndex_]; + + PostShaderUniforms uniforms; + const auto performShaderPass = [&](const ShaderInfo *shaderInfo, Draw::Framebuffer *postShaderFramebuffer, Draw::Pipeline *postShaderPipeline) { + if (postShaderOutput) { + draw_->BindFramebufferAsTexture(postShaderOutput, 0, Draw::FB_COLOR_BIT, 0); + } else { + BindSource(0); + } + BindSource(1); + if (shaderInfo->usePreviousFrame) + draw_->BindFramebufferAsTexture(previousFramebuffer, 2, Draw::FB_COLOR_BIT, 0); + + int nextWidth, nextHeight; + draw_->GetFramebufferDimensions(postShaderFramebuffer, &nextWidth, &nextHeight); + Draw::Viewport viewport{ 0, 0, (float)nextWidth, (float)nextHeight, 0.0f, 1.0f }; + draw_->SetViewports(1, &viewport); + draw_->SetScissorRect(0, 0, nextWidth, nextHeight); + + CalculatePostShaderUniforms(lastWidth, lastHeight, nextWidth, nextHeight, shaderInfo, &uniforms); + + draw_->BindPipeline(postShaderPipeline); + draw_->UpdateDynamicUniformBuffer(&uniforms, sizeof(uniforms)); + + Draw::SamplerState *sampler = useNearest || shaderInfo->isUpscalingFilter ? samplerNearest_ : samplerLinear_; + draw_->BindSamplerStates(0, 1, &sampler); + draw_->BindSamplerStates(1, 1, &sampler); + if (shaderInfo->usePreviousFrame) + draw_->BindSamplerStates(2, 1, &sampler); + + draw_->BindVertexBuffers(0, 1, &vdata_, &postVertsOffset); + draw_->BindIndexBuffer(idata_, 0); + draw_->DrawIndexed(6, 0); + draw_->BindIndexBuffer(nullptr, 0); + + postShaderOutput = postShaderFramebuffer; + lastWidth = nextWidth; + lastHeight = nextHeight; + }; + if (usePostShader) { bool flipped = flags & OutputFlags::POSITION_FLIPPED; float post_v0 = !flipped ? 1.0f : 0.0f; @@ -629,40 +699,17 @@ void PresentationCommon::CopyToOutput(OutputFlags flags, int uvRotation, float u Draw::Pipeline *postShaderPipeline = postShaderPipelines_[i]; const ShaderInfo *shaderInfo = &postShaderInfo_[i]; Draw::Framebuffer *postShaderFramebuffer = postShaderFramebuffers_[i]; - - draw_->BindFramebufferAsRenderTarget(postShaderFramebuffer, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "PostShader"); - - if (usePostShaderOutput) { - draw_->BindFramebufferAsTexture(postShaderFramebuffers_[i - 1], 0, Draw::FB_COLOR_BIT, 0); - } else { - BindSource(0); + if (!isFinalAtOutputResolution && i == postShaderFramebuffers_.size() - 1 && !previousFramebuffers_.empty()) { + // This is the last pass and we're going direct to the backbuffer after this. + // Redirect output to a separate framebuffer to keep the previous frame. + previousIndex_++; + if (previousIndex_ >= previousFramebuffers_.size()) + previousIndex_ = 0; + postShaderFramebuffer = previousFramebuffers_[previousIndex_]; } - BindSource(1); - - int nextWidth, nextHeight; - draw_->GetFramebufferDimensions(postShaderFramebuffer, &nextWidth, &nextHeight); - Draw::Viewport viewport{ 0, 0, (float)nextWidth, (float)nextHeight, 0.0f, 1.0f }; - draw_->SetViewports(1, &viewport); - draw_->SetScissorRect(0, 0, nextWidth, nextHeight); - - PostShaderUniforms uniforms; - CalculatePostShaderUniforms(lastWidth, lastHeight, nextWidth, nextHeight, shaderInfo, &uniforms); - - draw_->BindPipeline(postShaderPipeline); - draw_->UpdateDynamicUniformBuffer(&uniforms, sizeof(uniforms)); - - Draw::SamplerState *sampler = useNearest || shaderInfo->isUpscalingFilter ? samplerNearest_ : samplerLinear_; - draw_->BindSamplerStates(0, 1, &sampler); - draw_->BindSamplerStates(1, 1, &sampler); - draw_->BindVertexBuffers(0, 1, &vdata_, &postVertsOffset); - draw_->BindIndexBuffer(idata_, 0); - draw_->DrawIndexed(6, 0); - draw_->BindIndexBuffer(nullptr, 0); - - usePostShaderOutput = true; - lastWidth = nextWidth; - lastHeight = nextHeight; + draw_->BindFramebufferAsRenderTarget(postShaderFramebuffer, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "PostShader"); + performShaderPass(shaderInfo, postShaderFramebuffer, postShaderPipeline); } if (isFinalAtOutputResolution && postShaderInfo_.back().isUpscalingFilter) @@ -671,8 +718,23 @@ void PresentationCommon::CopyToOutput(OutputFlags flags, int uvRotation, float u draw_->UpdateBuffer(vdata_, (const uint8_t *)verts, 0, postVertsOffset, Draw::UPDATE_DISCARD); } + // If we need to save the previous frame, we have to save any final pass in a framebuffer. + if (isFinalAtOutputResolution && !previousFramebuffers_.empty()) { + Draw::Pipeline *postShaderPipeline = postShaderPipelines_.back(); + const ShaderInfo *shaderInfo = &postShaderInfo_.back(); + + // Pick the next to render to. + previousIndex_++; + if (previousIndex_ >= previousFramebuffers_.size()) + previousIndex_ = 0; + Draw::Framebuffer *postShaderFramebuffer = previousFramebuffers_[previousIndex_]; + + draw_->BindFramebufferAsRenderTarget(postShaderFramebuffer, { Draw::RPAction::CLEAR, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "InterFrameBlit"); + performShaderPass(shaderInfo, postShaderFramebuffer, postShaderPipeline); + } + Draw::Pipeline *pipeline = flags & OutputFlags::RB_SWIZZLE ? texColorRBSwizzle_ : texColor_; - if (isFinalAtOutputResolution) { + if (isFinalAtOutputResolution && previousFramebuffers_.empty()) { pipeline = postShaderPipelines_.back(); } @@ -681,15 +743,14 @@ void PresentationCommon::CopyToOutput(OutputFlags flags, int uvRotation, float u draw_->BindPipeline(pipeline); - if (usePostShaderOutput) { - draw_->BindFramebufferAsTexture(postShaderFramebuffers_.back(), 0, Draw::FB_COLOR_BIT, 0); + if (postShaderOutput) { + draw_->BindFramebufferAsTexture(postShaderOutput, 0, Draw::FB_COLOR_BIT, 0); } else { BindSource(0); } BindSource(1); - if (isFinalAtOutputResolution) { - PostShaderUniforms uniforms; + if (isFinalAtOutputResolution && previousFramebuffers_.empty()) { CalculatePostShaderUniforms(lastWidth, lastHeight, (int)rc.w, (int)rc.h, &postShaderInfo_.back(), &uniforms); draw_->UpdateDynamicUniformBuffer(&uniforms, sizeof(uniforms)); } else { @@ -732,6 +793,8 @@ void PresentationCommon::CopyToOutput(OutputFlags flags, int uvRotation, float u // Unbinds all textures and samplers too, needed since sometimes a MakePixelTexture is deleted etc. draw_->BindPipeline(nullptr); + + previousUniforms_ = uniforms; } void PresentationCommon::CalculateRenderResolution(int *width, int *height, int *scaleFactor, bool *upscaling, bool *ssaa) { diff --git a/GPU/Common/PresentationCommon.h b/GPU/Common/PresentationCommon.h index c3f56699f983..63d8e4a2dcde 100644 --- a/GPU/Common/PresentationCommon.h +++ b/GPU/Common/PresentationCommon.h @@ -31,6 +31,7 @@ struct CardboardSettings { struct PostShaderUniforms { float texelDelta[2]; float pixelDelta[2]; float time[4]; + float timeDelta[4]; float setting[4]; float video; float pad[3]; // Used on Direct3D9. @@ -131,6 +132,9 @@ class PresentationCommon { std::vector postShaderPipelines_; std::vector postShaderFramebuffers_; std::vector postShaderInfo_; + std::vector previousFramebuffers_; + int previousIndex_ = 0; + PostShaderUniforms previousUniforms_{}; Draw::Texture *srcTexture_ = nullptr; Draw::Framebuffer *srcFramebuffer_ = nullptr;