Skip to content
Closed
17 changes: 15 additions & 2 deletions pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.all.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,13 @@ void GSDrawScanlineCodeGenerator::modulate16(const XYm& a, const Operand& f, u8
}
}

void GSDrawScanlineCodeGenerator::modulate16_noround(const XYm& a, const Operand& f)
{
// Non-rounding equivalent of pmulhrsw: ((a << 1) * f) >> 16 = (a * f) >> 15
psllw(a, 1);
pmulhw(a, f);
}

void GSDrawScanlineCodeGenerator::lerp16(const XYm& a, const XYm& b, const XYm& f, u8 shift)
{
psubw(a, b);
Expand Down Expand Up @@ -2395,10 +2402,16 @@ void GSDrawScanlineCodeGenerator::Fog()
movdqa(xym1, _ga);

pbroadcastdLocal(tmp, _rip_global(frb));
lerp16(_rb, tmp, f, 0);
// Use non-rounding interpolation for fog (PS2 hardware doesn't round)
psubw(_rb, tmp);
modulate16_noround(_rb, f);
paddw(_rb, tmp);

pbroadcastdLocal(tmp, _rip_global(fga));
lerp16(_ga, tmp, f, 0);
// Use non-rounding interpolation for fog (PS2 hardware doesn't round)
psubw(_ga, tmp);
modulate16_noround(_ga, f);
paddw(_ga, tmp);

mix16(_ga, xym1, xym0);
}
Expand Down
1 change: 1 addition & 0 deletions pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.all.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class GSDrawScanlineCodeGenerator : public GSNewCodeGenerator
/// Broadcast a 32-bit GPR to a vector register
void broadcastGPRToVec(const XYm& vec, const Xbyak::Reg32& gpr);
void modulate16(const XYm& a, const Xbyak::Operand& f, u8 shift);
void modulate16_noround(const XYm& a, const Xbyak::Operand& f);
void lerp16(const XYm& a, const XYm& b, const XYm& f, u8 shift);
void lerp16_4(const XYm& a, const XYm& b, const XYm& f);
void mix16(const XYm& a, const XYm& b, const XYm& temp);
Expand Down
20 changes: 17 additions & 3 deletions pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.arm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1659,9 +1659,16 @@ void GSDrawScanlineCodeGenerator::Fog()
armAsm->Dup(_vscratch2.V4S(), _global_fga);
armAsm->Mov(v1, v6);

lerp16(v5, _vscratch, _temp_f, 0);

lerp16(v6, _vscratch2, _temp_f, 0);
// Use non-rounding interpolation for fog (PS2 hardware doesn't round)
armAsm->Sub(v5.V8H(), v5.V8H(), _vscratch.V8H());
modulate16_noround(v5, _temp_f);
armAsm->Add(v5.V8H(), v5.V8H(), _vscratch.V8H());

// Use non-rounding interpolation for fog (PS2 hardware doesn't round)
armAsm->Sub(v6.V8H(), v6.V8H(), _vscratch2.V8H());
modulate16_noround(v6, _temp_f);
armAsm->Add(v6.V8H(), v6.V8H(), _vscratch2.V8H());

mix16(v6, v1, v0);
}

Expand Down Expand Up @@ -2362,6 +2369,13 @@ void GSDrawScanlineCodeGenerator::modulate16(const VRegister& d, const VRegister
armAsm->Sshr(a.V8H(), a.V8H(), 1);
}

void GSDrawScanlineCodeGenerator::modulate16_noround(const VRegister& a, const VRegister& f)
{
// Non-rounding equivalent of sqrdmulh: use sqdmulh directly after left shift
armAsm->Shl(a.V8H(), a.V8H(), 1);
armAsm->Sqdmulh(a.V8H(), a.V8H(), f.V8H());
}

void GSDrawScanlineCodeGenerator::lerp16(const VRegister& a, const VRegister& b, const VRegister& f, u8 shift)
{
armAsm->Sub(a.V8H(), a.V8H(), b.V8H());
Expand Down
1 change: 1 addition & 0 deletions pcsx2/GS/Renderers/SW/GSDrawScanlineCodeGenerator.arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class GSDrawScanlineCodeGenerator

void modulate16(const vixl::aarch64::VRegister& d, const vixl::aarch64::VRegister& a, const vixl::aarch64::VRegister& f, u8 shift);
void modulate16(const vixl::aarch64::VRegister& a, const vixl::aarch64::VRegister& f, u8 shift);
void modulate16_noround(const vixl::aarch64::VRegister& a, const vixl::aarch64::VRegister& f);
void lerp16(const vixl::aarch64::VRegister& a, const vixl::aarch64::VRegister& b, const vixl::aarch64::VRegister& f, u8 shift);
void lerp16_4(const vixl::aarch64::VRegister& a, const vixl::aarch64::VRegister& b, const vixl::aarch64::VRegister& f);
void mix16(const vixl::aarch64::VRegister& a, const vixl::aarch64::VRegister& b, const vixl::aarch64::VRegister& temp);
Expand Down