Skip to content

DX12: Align copies b/w textures and buffers when D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported is false #7721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ Naga now requires that no type be larger than 1 GB. This limit may be lowered in

### Bug Fixes

#### DX12

- Align copies b/w textures and buffers via a single intermediate buffer per copy when `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported` is `false`. By @ErichDonGubler in [#7721](https://github.com/gfx-rs/wgpu/pull/7721).

#### Vulkan

Fix `STATUS_HEAP_CORRUPTION` crash when concurrently calling `create_sampler`. By @atlv24 in [#8043](https://github.com/gfx-rs/wgpu/pull/8043).
Expand Down
12 changes: 1 addition & 11 deletions tests/tests/wgpu-gpu/regression/issue_6827.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,7 @@ static TEST_SCATTER: GpuTestConfiguration = GpuTestConfiguration::new()
.expect_fail(FailureCase::backend_adapter(
wgpu::Backends::METAL,
"Apple Paravirtual device", // CI on M1
))
.expect_fail(
// Unfortunately this depends on if `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported`
// is true, which we have no way to encode. This reproduces in CI though, so not too worried about it.
FailureCase::backend(wgpu::Backends::DX12)
.flaky()
.validation_error(
"D3D12_PLACED_SUBRESOURCE_FOOTPRINT::Offset must be a multiple of 512",
)
.panic("GraphicsCommandList::close failed: The parameter is incorrect"),
),
)),
)
.run_async(|ctx| async move { run_test(ctx, true).await });

Expand Down
16 changes: 16 additions & 0 deletions wgpu-hal/src/dx12/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,21 @@ impl super::Adapter {
.is_ok()
};

let unrestricted_buffer_texture_copy_pitch_supported = {
let mut features13 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS13::default();
unsafe {
device.CheckFeatureSupport(
Direct3D12::D3D12_FEATURE_D3D12_OPTIONS13,
<*mut _>::cast(&mut features13),
size_of_val(&features13) as u32,
)
}
.is_ok()
&& features13
.UnrestrictedBufferTextureCopyPitchSupported
.as_bool()
};

let mut max_sampler_descriptor_heap_size =
Direct3D12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE;
{
Expand Down Expand Up @@ -302,6 +317,7 @@ impl super::Adapter {
suballocation_supported: !info.name.contains("Iris(R) Xe"),
shader_model,
max_sampler_descriptor_heap_size,
unrestricted_buffer_texture_copy_pitch_supported,
};

// Theoretically vram limited, but in practice 2^20 is the limit
Expand Down
163 changes: 152 additions & 11 deletions wgpu-hal/src/dx12/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::{
dxgi::{name::ObjectExt, result::HResult as _},
},
dx12::borrow_interface_temporarily,
AccelerationStructureEntries,
AccelerationStructureEntries, CommandEncoder as _,
};

fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> Direct3D12::D3D12_BOX {
Expand Down Expand Up @@ -312,6 +312,78 @@ impl super::CommandEncoder {
}
}
}

unsafe fn buf_tex_intermediate<T>(
&mut self,
region: crate::BufferTextureCopy,
tex_fmt: wgt::TextureFormat,
copy_op: impl FnOnce(&mut Self, &super::Buffer, wgt::BufferSize, crate::BufferTextureCopy) -> T,
) -> Option<(T, super::Buffer)> {
let size = {
let copy_info = region.buffer_layout.get_buffer_texture_copy_info(
tex_fmt,
region.texture_base.aspect.map(),
&region.size.into(),
);
copy_info.unwrap().bytes_in_copy
};

let size = wgt::BufferSize::new(size)?;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can unwrap here, since it would simplify the code and 0 sized copies shouldn't appear at the hal level.


let buffer = {
let (resource, allocation) =
super::suballocation::DeviceAllocationContext::from(&*self)
.create_buffer(&crate::BufferDescriptor {
label: None,
size: size.get(),
usage: wgt::BufferUses::COPY_SRC | wgt::BufferUses::COPY_DST,
memory_flags: crate::MemoryFlags::empty(),
})
.expect(concat!(
"internal error: ",
"failed to allocate intermediate buffer ",
"for offset alignment"
));
super::Buffer {
resource,
size: size.get(),
allocation,
}
};

let mut region = region;
region.buffer_layout.offset = 0;

unsafe {
self.transition_buffers(
[crate::BufferBarrier {
buffer: &buffer,
usage: crate::StateTransition {
from: wgt::BufferUses::empty(),
to: wgt::BufferUses::COPY_DST,
},
}]
.into_iter(),
)
};

let t = copy_op(self, &buffer, size, region);

unsafe {
self.transition_buffers(
[crate::BufferBarrier {
buffer: &buffer,
usage: crate::StateTransition {
from: wgt::BufferUses::COPY_DST,
to: wgt::BufferUses::COPY_SRC,
},
}]
.into_iter(),
)
};

Some((t, buffer))
}
}

impl crate::CommandEncoder for super::CommandEncoder {
Expand Down Expand Up @@ -366,6 +438,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
Ok(super::CommandBuffer { raw })
}
unsafe fn reset_all<I: Iterator<Item = super::CommandBuffer>>(&mut self, command_buffers: I) {
self.intermediate_copy_bufs.clear();
for cmd_buf in command_buffers {
self.free_lists.push(cmd_buf.raw);
}
Expand Down Expand Up @@ -612,30 +685,61 @@ impl crate::CommandEncoder for super::CommandEncoder {
) where
T: Iterator<Item = crate::BufferTextureCopy>,
{
let list = self.list.as_ref().unwrap();
for r in regions {
let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();

for naive_copy_region in regions {
let is_offset_aligned = naive_copy_region.buffer_layout.offset % offset_alignment == 0;
let (final_copy_region, src) = if is_offset_aligned {
(naive_copy_region, src)
} else {
let Some((intermediate_to_dst_region, intermediate_buf)) = (unsafe {
let src_offset = naive_copy_region.buffer_layout.offset;
self.buf_tex_intermediate(
naive_copy_region,
dst.format,
|this, buf, size, intermediate_to_dst_region| {
let layout = crate::BufferCopy {
src_offset,
dst_offset: 0,
size,
};
this.copy_buffer_to_buffer(src, buf, [layout].into_iter());
intermediate_to_dst_region
},
)
}) else {
continue;
};
self.intermediate_copy_bufs.push(intermediate_buf);
let intermediate_buf = self.intermediate_copy_bufs.last().unwrap();
(intermediate_to_dst_region, intermediate_buf)
};

let list = self.list.as_ref().unwrap();

let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
pResource: unsafe { borrow_interface_temporarily(&src.resource) },
Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
PlacedFootprint: r.to_subresource_footprint(dst.format),
PlacedFootprint: final_copy_region.to_subresource_footprint(dst.format),
},
};
let dst_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
pResource: unsafe { borrow_interface_temporarily(&dst.resource) },
Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
SubresourceIndex: dst.calc_subresource_for_copy(&r.texture_base),
SubresourceIndex: dst
.calc_subresource_for_copy(&final_copy_region.texture_base),
},
};

let src_box = make_box(&wgt::Origin3d::ZERO, &r.size);
let src_box = make_box(&wgt::Origin3d::ZERO, &final_copy_region.size);
unsafe {
list.CopyTextureRegion(
&dst_location,
r.texture_base.origin.x,
r.texture_base.origin.y,
r.texture_base.origin.z,
final_copy_region.texture_base.origin.x,
final_copy_region.texture_base.origin.y,
final_copy_region.texture_base.origin.z,
&src_location,
Some(&src_box),
)
Expand All @@ -652,8 +756,12 @@ impl crate::CommandEncoder for super::CommandEncoder {
) where
T: Iterator<Item = crate::BufferTextureCopy>,
{
let list = self.list.as_ref().unwrap();
for r in regions {
let copy_aligned = |this: &mut Self,
src: &super::Texture,
dst: &super::Buffer,
r: crate::BufferTextureCopy| {
let list = this.list.as_ref().unwrap();

let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
pResource: unsafe { borrow_interface_temporarily(&src.resource) },
Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
Expand All @@ -673,6 +781,39 @@ impl crate::CommandEncoder for super::CommandEncoder {
unsafe {
list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, Some(&src_box))
};
};

let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();

for r in regions {
let is_offset_aligned = r.buffer_layout.offset % offset_alignment == 0;
if is_offset_aligned {
copy_aligned(self, src, dst, r)
} else {
let orig_offset = r.buffer_layout.offset;
let Some((intermediate_to_dst_region, src)) = (unsafe {
self.buf_tex_intermediate(
r,
src.format,
|this, buf, size, intermediate_region| {
copy_aligned(this, src, buf, intermediate_region);
crate::BufferCopy {
src_offset: orig_offset,
dst_offset: 0,
Comment on lines +801 to +802
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These seem swapped.

size,
}
},
)
}) else {
continue;
};

unsafe {
self.copy_buffer_to_buffer(&src, dst, [intermediate_to_dst_region].into_iter());
}

self.intermediate_copy_bufs.push(src);
};
}
}

Expand Down
1 change: 1 addition & 0 deletions wgpu-hal/src/dx12/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,7 @@ impl crate::Device for super::Device {
mem_allocator: self.mem_allocator.clone(),
rtv_pool: Arc::clone(&self.rtv_pool),
temp_rtv_handles: Vec::new(),
intermediate_copy_bufs: Vec::new(),
null_rtv_handle: self.null_rtv_handle,
list: None,
free_lists: Vec::new(),
Expand Down
19 changes: 18 additions & 1 deletion wgpu-hal/src/dx12/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,11 @@ use windows::{
core::{Free, Interface},
Win32::{
Foundation,
Graphics::{Direct3D, Direct3D12, DirectComposition, Dxgi},
Graphics::{
Direct3D,
Direct3D12::{self, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT},
DirectComposition, Dxgi,
},
System::Threading,
},
};
Expand Down Expand Up @@ -581,6 +585,17 @@ struct PrivateCapabilities {
suballocation_supported: bool,
shader_model: naga::back::hlsl::ShaderModel,
max_sampler_descriptor_heap_size: u32,
unrestricted_buffer_texture_copy_pitch_supported: bool,
}

impl PrivateCapabilities {
fn texture_data_placement_alignment(&self) -> u64 {
if self.unrestricted_buffer_texture_copy_pitch_supported {
4
} else {
D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT.into()
}
}
}

#[derive(Default)]
Expand Down Expand Up @@ -816,6 +831,8 @@ pub struct CommandEncoder {
rtv_pool: Arc<Mutex<descriptor::CpuPool>>,
temp_rtv_handles: Vec<descriptor::Handle>,

intermediate_copy_bufs: Vec<Buffer>,

null_rtv_handle: descriptor::Handle,
list: Option<Direct3D12::ID3D12GraphicsCommandList>,
free_lists: Vec<Direct3D12::ID3D12GraphicsCommandList>,
Expand Down
30 changes: 30 additions & 0 deletions wgpu-hal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2430,6 +2430,36 @@ pub struct CopyExtent {
pub depth: u32,
}

impl From<wgt::Extent3d> for CopyExtent {
fn from(value: wgt::Extent3d) -> Self {
let wgt::Extent3d {
width,
height,
depth_or_array_layers,
} = value;
Self {
width,
height,
depth: depth_or_array_layers,
}
}
}

impl From<CopyExtent> for wgt::Extent3d {
fn from(value: CopyExtent) -> Self {
let CopyExtent {
width,
height,
depth,
} = value;
Self {
width,
height,
depth_or_array_layers: depth,
}
}
}

#[derive(Clone, Debug)]
pub struct TextureCopy {
pub src_base: TextureCopyBase,
Expand Down
Loading