diff --git a/Cargo.lock b/Cargo.lock index a6b99aad..84a292e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -749,9 +749,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.2" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" [[package]] name = "heapless" @@ -828,6 +828,15 @@ dependencies = [ "kernel_guard", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +dependencies = [ + "spin", +] + [[package]] name = "lazyinit" version = "0.2.1" @@ -982,7 +991,7 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "page_table_entry" version = "0.5.3" -source = "git+https://github.com/Mivik/page_table_multiarch.git?rev=19ededd#19ededdb806ab3b22efb4880661790524fa421a5" +source = "git+https://github.com/OrangeQi-CQ/page_table_multiarch?rev=8bbd18a#8bbd18a15770c74734d2b2f69b69243ee53d6739" dependencies = [ "aarch64-cpu", "bitflags 2.9.0", @@ -993,7 +1002,7 @@ dependencies = [ [[package]] name = "page_table_multiarch" version = "0.5.3" -source = "git+https://github.com/Mivik/page_table_multiarch.git?rev=19ededd#19ededdb806ab3b22efb4880661790524fa421a5" +source = "git+https://github.com/OrangeQi-CQ/page_table_multiarch?rev=8bbd18a#8bbd18a15770c74734d2b2f69b69243ee53d6739" dependencies = [ "log", "memory_addr", @@ -1232,6 +1241,7 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" name = "starry" version = "0.1.0" dependencies = [ + "axalloc", "axerrno", "axfeat", "axfs", @@ -1254,6 +1264,7 @@ dependencies = [ name = "starry-api" version = "0.1.0" dependencies = [ + "axalloc", "axconfig", "axerrno", "axfeat", @@ -1272,6 +1283,7 @@ dependencies = [ "cfg-if", "ctor_bare", "flatten_objects", + "lazy_static", "linkme", "linux-raw-sys", "memory_addr", diff --git a/Cargo.toml b/Cargo.toml index 9d11edb5..3953c61a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,7 @@ axtask = { git = "https://github.com/oscomp/arceos.git" } axprocess = { git = "https://github.com/Starry-OS/axprocess.git" } axsignal = { git = "https://github.com/Starry-OS/axsignal.git", rev = "b5b6089" } +axalloc = { git = "https://github.com/oscomp/arceos.git" } axerrno = "0.1" bitflags = "2.6" @@ -51,6 +52,7 @@ linux-raw-sys = { version = "0.9.3", default-features = false, features = [ ] } memory_addr = "0.3" spin = "0.9" +lazy_static = { version = "1.5", features = ["spin_no_std"] } starry-core = { path = "./core" } starry-api = { path = "./api" } @@ -78,6 +80,7 @@ axtask.workspace = true axprocess.workspace = true axsignal.workspace = true +axalloc.workspace = true axerrno.workspace = true linkme.workspace = true @@ -90,5 +93,5 @@ shlex = { version = "1.3.0", default-features = false } syscalls = { git = "https://github.com/jasonwhite/syscalls.git", rev = "92624de", default-features = false } [patch.crates-io] -page_table_multiarch = { git = "https://github.com/Mivik/page_table_multiarch.git", rev = "19ededd" } -page_table_entry = { git = "https://github.com/Mivik/page_table_multiarch.git", rev = "19ededd" } +page_table_multiarch = { git = "https://github.com/OrangeQi-CQ/page_table_multiarch", rev = "8bbd18a"} +page_table_entry = { git = "https://github.com/OrangeQi-CQ/page_table_multiarch", rev = "8bbd18a"} diff --git a/api/Cargo.toml b/api/Cargo.toml index 1065ed0a..058fa9e2 100644 --- a/api/Cargo.toml +++ b/api/Cargo.toml @@ -18,6 +18,7 @@ axnet.workspace = true axns.workspace = true axsync.workspace = true axtask.workspace = true +axalloc.workspace = true axprocess.workspace = true axsignal.workspace = true @@ -36,6 +37,7 @@ axio = "0.1.1" ctor_bare = "0.2.1" flatten_objects = "0.2.3" num_enum = { version = "0.7", default-features = false } +lazy_static = { version = "1.5", features = ["spin_no_std"] } [target.'cfg(target_arch = "x86_64")'.dependencies] x86 = "0.52" diff --git a/api/src/file/fs.rs b/api/src/file/fs.rs index d4f4d9dc..19eda1c4 100644 --- a/api/src/file/fs.rs +++ b/api/src/file/fs.rs @@ -1,60 +1,258 @@ -use core::{any::Any, ffi::c_int}; +use core::{ + any::Any, + ffi::c_int, + sync::atomic::{AtomicUsize, Ordering}, +}; -use alloc::{string::String, sync::Arc}; +use alloc::{ + string::String, + sync::{Arc, Weak}, +}; use axerrno::{LinuxError, LinuxResult}; use axfs::fops::DirEntry; -use axio::PollState; +use axio::{PollState, SeekFrom}; use axsync::{Mutex, MutexGuard}; use linux_raw_sys::general::S_IFDIR; -use super::{FileLike, Kstat, get_file_like}; +use super::{ + FileLike, Kstat, get_file_like, + page_cache::{PageCache, page_cache_manager}, +}; -/// File wrapper for `axfs::fops::File`. -pub struct File { - inner: Mutex, - path: String, +/// File wrapper for `axfs::fops::File` with two variants. +pub enum File { + Direct { + path: String, + inner: Arc>, + }, + Cached { + path: String, + size: AtomicUsize, + offset: AtomicUsize, + cache: Weak, + }, } impl File { - pub fn new(inner: axfs::fops::File, path: String) -> Self { - Self { - inner: Mutex::new(inner), + pub fn new_file_direct(path: String, inner: Arc>) -> Self { + let size = { + let inner = inner.clone(); + let inner = inner.lock(); + let metadata = inner.get_attr().unwrap(); + metadata.size() as usize + }; + File::Direct { + path, + inner, + } + } + + pub fn new_file_cached(path: String, cache: Weak) -> Self { + let size = { + let cache = cache.upgrade().unwrap(); + cache.file_size() + }; + File::Cached { path, + size: AtomicUsize::new(size), + offset: AtomicUsize::new(0), + cache, + } + } + + pub fn size(&self) -> usize { + match self { + File::Direct { inner, .. } => { + let inner = inner.lock(); + inner.get_attr().unwrap().size() as usize + }, + File::Cached { size, .. } => { + size.load(Ordering::Relaxed) + } + } + } + + pub fn offset(&self) -> usize { + match self { + File::Direct { .. } => { + panic!("Please use seek for File::Direc"); + }, + File::Cached { offset, .. } => { + offset.load(Ordering::Relaxed) + } } } /// Get the path of the file. - pub fn path(&self) -> &str { - &self.path + pub fn path(&self) -> &String { + match self { + File::Direct { path, .. } | File::Cached { path, .. } => path, + } } - /// Get the inner node of the file. - pub fn inner(&self) -> MutexGuard { - self.inner.lock() + /// Get the cache of the file. Only available in File::Cached. + pub fn cache(&self) -> Arc { + match self { + File::Direct { .. } => { + panic!("File::Direct doesn't have cache."); + } + File::Cached { cache, .. } => cache.upgrade().unwrap(), + } + } + + /// Get the inner node of the file. Only available in File::Direct. + fn inner(&self) -> MutexGuard { + match self { + File::Direct { inner, .. } => inner.lock(), + File::Cached { .. } => { + panic!("File::Cached doesn't have inner."); + } + } + } + + pub fn seek(&self, pos: SeekFrom) -> LinuxResult { + match self { + File::Direct { .. } => { + let mut inner = self.inner(); + match inner.seek(pos) { + Ok(pos) => Ok(pos as isize), + Err(e) => { + error!("Seek failed: {}", e); + Err(LinuxError::EINVAL) + } + } + } + File::Cached { offset, .. } => { + let size = self.size() as u64; + let offset_val = offset.load(Ordering::Relaxed) as u64; + let new_offset = match pos { + SeekFrom::Start(pos) => Some(pos), + SeekFrom::Current(off) => offset_val.checked_add_signed(off), + SeekFrom::End(off) => size.checked_add_signed(off), + } + .unwrap_or(0); + offset.store(new_offset as usize, Ordering::SeqCst); + Ok(new_offset as isize) + } + } + } + + pub fn fsync(&self) -> LinuxResult { + match self { + File::Direct { .. } => Ok(0), + File::Cached { .. } => { + let cache = self.cache(); + Ok(cache.sync()?) + } + } + } + + pub fn read_at(&self, buf: &mut [u8], offset: usize) -> LinuxResult { + match self { + File::Direct { .. } => { + let inner = self.inner(); + Ok(inner.read_at(offset as u64, buf)?) + } + File::Cached { .. } => { + let cache = self.cache(); + Ok(cache.read_at(offset, buf)) + } + } + } + + pub fn write_at(&self, buf: &[u8], offset: usize) -> LinuxResult { + match self { + File::Direct { .. } => { + let inner = self.inner(); + Ok(inner.write_at(offset as u64, buf)?) + } + File::Cached { .. } => { + let cache = self.cache(); + Ok(cache.write_at(offset, buf)) + } + } + } + + pub fn truncate(&self, offset: usize) -> LinuxResult { + match self { + File::Direct { .. } => { + let inner = self.inner(); + inner.truncate(offset as u64)?; + Ok(offset) + } + File::Cached { .. } => { + let cache = self.cache(); + Ok(cache.truncate(offset) as usize) + } + } + } +} + +impl Drop for File { + fn drop(&mut self) { + debug!("Starry-api drop file {}", self.path()); + if let File::Cached { .. } = self { + let cache_manager = page_cache_manager(); + cache_manager.close_page_cache(self.path()); + } } } impl FileLike for File { fn read(&self, buf: &mut [u8]) -> LinuxResult { - Ok(self.inner().read(buf)?) + match self { + File::Direct { .. } => { + let mut inner = self.inner(); + Ok(inner.read(buf)?) + } + File::Cached { offset, .. } => { + let cache = self.cache(); + let offset_val = offset.load(Ordering::SeqCst); + let len = cache.read_at(offset_val, buf); + offset.fetch_add(len, Ordering::SeqCst); + Ok(len) + } + } } fn write(&self, buf: &[u8]) -> LinuxResult { - Ok(self.inner().write(buf)?) + match self { + File::Direct { .. } => { + let mut inner = self.inner(); + Ok(inner.write(buf)?) + } + File::Cached { offset, size, .. } => { + let cache = self.cache(); + let offset_val = offset.load(Ordering::SeqCst); + let len = cache.write_at(offset_val, buf); + offset.fetch_add(len, Ordering::SeqCst); + size.fetch_max(offset_val + len, Ordering::SeqCst); + Ok(len) + } + } } fn stat(&self) -> LinuxResult { - let metadata = self.inner().get_attr()?; - let ty = metadata.file_type() as u8; - let perm = metadata.perm().bits() as u32; - - Ok(Kstat { - mode: ((ty as u32) << 12) | perm, - size: metadata.size(), - blocks: metadata.blocks(), - blksize: 512, - ..Default::default() - }) + match self { + File::Direct { .. } => { + let inner = self.inner(); + let metadata = inner.get_attr()?; + let ty = metadata.file_type() as u8; + let perm = metadata.perm().bits() as u32; + let size = self.size(); + Ok(Kstat { + mode: ((ty as u32) << 12) | perm, + size: size as u64, + blocks: metadata.blocks(), + blksize: 512, + ..Default::default() + }) + } + File::Cached { .. } => { + let cache = self.cache(); + cache.stat() + } + } } fn into_any(self: Arc) -> Arc { diff --git a/api/src/file/mod.rs b/api/src/file/mod.rs index 8a19843d..2dcc46c7 100644 --- a/api/src/file/mod.rs +++ b/api/src/file/mod.rs @@ -1,5 +1,6 @@ mod fs; mod net; +mod page_cache; mod pipe; mod stdio; @@ -16,6 +17,7 @@ use spin::RwLock; pub use self::{ fs::{Directory, File}, net::Socket, + page_cache::page_cache_manager, pipe::Pipe, }; diff --git a/api/src/file/page_cache.rs b/api/src/file/page_cache.rs new file mode 100644 index 00000000..d794e5ee --- /dev/null +++ b/api/src/file/page_cache.rs @@ -0,0 +1,881 @@ +use super::Kstat; +use crate::file::{File, FileLike}; +use alloc::{ + collections::{btree_map::BTreeMap, btree_set::BTreeSet, linked_list::LinkedList}, + string::String, + sync::{Arc, Weak}, + vec, + vec::Vec, +}; +use axalloc::GlobalPage; +use axconfig::plat::PHYS_VIRT_OFFSET; +use axerrno::{LinuxError, LinuxResult}; +use axfs::fops::OpenOptions; +use axhal::{arch::flush_tlb, paging::MappingFlags}; +use axio::SeekFrom; +use axprocess::Pid; +use axsync::Mutex; +use axtask::{TaskExtRef, current}; +use core::sync::atomic::{AtomicUsize, Ordering}; +use core::{isize, mem::ManuallyDrop, ops::DerefMut, sync::atomic::AtomicIsize}; +use lazy_static::lazy_static; +use memory_addr::{PAGE_SIZE_4K, PhysAddr, VirtAddr}; +use spin::{RwLockWriteGuard, rwlock::RwLock}; +use starry_core::task::{ProcessData, get_process}; + +// GlobalAllocator 的性能并不高(似乎是 O(n)?),尽可能频繁避免申请释放 GlobalPage +lazy_static! { + static ref GLOBAL_PAGE_POOL: Arc>> = Arc::new(Mutex::new(Vec::new())); +} + +/// 文件页,在创建的时候自动加载文件内容。 +/// 并发不安全,需要上层加读写锁。 +/// +/// 脏页管理机制: +/// - 由 sys_write 等产生的脏页:特点是会调用 Page::write 操作,直接修改 Page 的 dirty 成员。 +/// - 由 mmap 后的内存访问产生的脏页:由于直接内存读写不会调用 Page::write,此时必须查询对应页表的 +/// 页表项脏位。mmap 后访问虚拟页面触发 page fault,此时通过 Page::map_virt_page 建立页表映射,并在 +/// virt_pages 里面添加对应的 pid 和 虚拟页号。查询脏位时,检查所有相关进程的页表。 +struct Page { + inner: ManuallyDrop, + file: Weak>, + page_id: usize, + dirty: bool, + virt_pages: BTreeSet<(Pid, VirtAddr)>, +} + +impl Page { + /// 构造一个 Page。 + /// + /// - 优先从 GLOBAL_PAGE_POOL 里面分配 GlobalPage 作为 inner, + /// 以尽可能避免 GLOBAL_ALLOCATOR 的分配释放 + fn new() -> Self { + let mut inner_page = { + let mut global_page_pool = GLOBAL_PAGE_POOL.lock(); + global_page_pool + .pop() + .unwrap_or_else(|| GlobalPage::alloc().expect("GlobalPage alloc failed")) + }; + debug!( + "New file page: paddr {:#x}", + inner_page.start_paddr(axhal::mem::virt_to_phys) + ); + assert!( + // 保证该页在线性映射区 + inner_page.start_vaddr().as_usize() >= PHYS_VIRT_OFFSET, + "Converted address is invalid, check if the virtual address is in kernel space" + ); + inner_page.zero(); + + Self { + inner: ManuallyDrop::new(inner_page), + file: Weak::new(), + page_id: 0, + dirty: false, + virt_pages: BTreeSet::new(), + } + } + + /// 初始化 file 和 page_id 字段,加载文件内容,清空脏页标记。 + /// + /// 注意: + /// - 只有 Page::load 涉及 axfs 层的文件 read 操作。 + fn load(&mut self, file: Weak>, page_id: usize) { + debug!("ready load page {}", self.page_id); + self.file = file.clone(); + self.page_id = page_id; + let file_arc = file.upgrade().unwrap(); + let mut file = file_arc.lock(); + let offset = (self.page_id * PAGE_SIZE_4K) as u64; + file.seek(SeekFrom::Start(offset)).unwrap(); + + let buf = self.inner.as_slice_mut(); + let mut bytes_read = 0; + loop { + match file.read(&mut buf[bytes_read..]) { + Ok(0) => break, + Ok(n) => bytes_read += n, + _ => break, + } + } + debug!( + "Load page {} done, length = {}, {:?}..", + self.page_id, + bytes_read, + &buf[0..5] + ); + self.set_clean(); + } + + /// 将 Page 的内容刷新回文件,清空脏页标记,返回本页写回文件的大小。 + /// + /// 注意: + /// 1. 只有 Page::flush 涉及 axfs 层的文件 write 操作; + /// 2. 把整个 4K 都写入磁盘,同步文件属性时上层 PagePool 会调用 truncate 修正文件大小。 + fn flush(&mut self) -> LinuxResult { + let file = self.file.upgrade(); + if file.is_none() { + return Ok(0); + } + if !self.is_dirty() { + return Ok(0); + } + + let file_arc = file.unwrap(); + let mut file = file_arc.lock(); + let offset = (self.page_id * PAGE_SIZE_4K) as u64; + + // fatfs 调用 file.seek(SeekFrom::Start(offset as u64)) 会奇慢无比,似乎是 O(n) 的链式查找 + // ext4 可以做到 O(1) 随即寻址,性能正常 + file.seek(SeekFrom::Start(offset))?; + let buf = self.inner.as_slice(); + let mut bytes_write = 0; + loop { + match file.write_at(offset + bytes_write, &buf[bytes_write as usize..]) { + Ok(0) => break, + Ok(n) => bytes_write += n as u64, + Err(e) => { + warn!("Write failed at offset {}: {:?}", offset + bytes_write, e); + return Err(e.into()); + } + } + } + + debug!( + "Flush page {} done, length = {}, {:?}..", + self.page_id, + bytes_write, + &buf[0..5] + ); + self.set_clean(); + Ok(bytes_write as isize) + } + + /// 从内存中的缓存页面中读取数据到 buf,返回读取的长度,返回读取的长度 + fn read(&self, offset: usize, buf: &mut [u8]) -> LinuxResult { + let start = offset; + let end = start + buf.len(); + if end > PAGE_SIZE_4K { + return Err(LinuxError::EINVAL); + } + assert!(start <= end && end <= PAGE_SIZE_4K); + let slice = &self.inner.as_slice()[start..end]; + buf.copy_from_slice(slice); + Ok((end - start) as isize) + } + + /// 将 buf 中的数据写入到内存中的缓存页面,返回写入的长度,返回写入的长度 + /// + /// 参数 direct: 是否直接同步到文件,主要用于 PageCache::fill_gap + fn write(&mut self, offset: usize, buf: &[u8], direct: bool) -> LinuxResult { + debug!( + "write page {}, len {}, direct {}", + self.page_id, + buf.len(), + direct + ); + let start = offset; + let end = start + buf.len(); + if end > PAGE_SIZE_4K { + return Err(LinuxError::EINVAL); + } + assert!(start < end && end <= PAGE_SIZE_4K); + let slice = &mut self.inner.as_slice_mut()[start..end]; + slice.copy_from_slice(buf); + self.dirty = true; + assert!(self.is_dirty()); + if direct { + self.flush()?; + } + Ok((end - start) as isize) + } + + /// 查询本页面的实际起始物理地址 + fn phys_addr(&self) -> PhysAddr { + let vaddr = self.inner.start_vaddr(); + assert!( + // 保证该页在线性映射区 + vaddr.as_usize() >= PHYS_VIRT_OFFSET, + "Converted address is invalid, check if the virtual address is in kernel space" + ); + axhal::mem::virt_to_phys(vaddr) + } + + /// 遍历所有映射到这个物理页的页表,查询本页面是否为脏页 + fn is_dirty(&self) -> bool { + if self.dirty { + return true; + } + + let virt_pages: Vec<_> = self.virt_pages.iter().copied().collect(); + for (pid, vaddr) in virt_pages { + match get_process(pid) { + Ok(process) => { + let process_data = process.data::().unwrap(); + let mmap_manager = process_data.vma_mnager(); + + // 可能进程 munmap 取消映射,但是 Page 中没有实时更新 + if mmap_manager.query(vaddr).is_some() { + let mut aspace = process_data.aspace.lock(); + if aspace.check_page_dirty(vaddr) { + debug!("check dirty {}, {:#x}", self.page_id, vaddr); + return true; + } + } + } + _ => continue, + } + } + return false; + } + + /// 遍历所有映射到这个物理页的页表,清除本页脏位 + fn set_clean(&mut self) { + self.dirty = false; + + let virt_pages: Vec<_> = self.virt_pages.iter().copied().collect(); + for (pid, vaddr) in virt_pages { + match get_process(pid) { + Ok(process) => { + let process_data = process.data::().unwrap(); + let mmap_manager = process_data.vma_mnager(); + + // 可能进程 munmap 取消映射,但是 Page 中没有实时更新 + if mmap_manager.query(vaddr).is_some() { + debug!("page {} set clean vadr = {:#x}", self.page_id, vaddr); + let mut aspace = process_data.aspace.lock(); + aspace.set_page_dirty(vaddr, false); + } else { + self.virt_pages.remove(&(pid, vaddr)); + } + } + _ => continue, + } + } + } + + /// 将当前进程的一个虚拟页映射到此 Page。具体的:先尝试建立页表映射,成功的就更新 virt_pages + /// + /// **注意:直接修改了页表!** + fn map_virt_page(&mut self, aligned_virt_page: VirtAddr, access_flags: MappingFlags) -> bool { + let curr = current(); + let pid = curr.task_ext().thread.process().pid(); + assert!(aligned_virt_page.as_usize() % PAGE_SIZE_4K == 0); + + let mut aspace = curr.task_ext().process_data().aspace.lock(); + let paddr = self.inner.start_paddr(axhal::mem::virt_to_phys); + + debug!( + "Try force map page {}, {:#x} => {:#x}, {}", + self.page_id, + aligned_virt_page, + paddr, + current().id_name() + ); + + // 这里先 flush,再 set_clean 的目的是:单纯修改页表也会导致页面被标记为脏页。 + match self.flush() { + Ok(_) => (), + _ => return false, + } + if aspace.force_map_page(aligned_virt_page, paddr, access_flags) { + self.virt_pages.insert((pid, aligned_virt_page)); + drop(aspace); + self.set_clean(); + return true; + } + return false; + } + + /// 功能: + /// - 取消当前进程的一个虚拟页到此 Page 的映射,修改页表 + /// + /// 注意: + /// - 这里修改了页表,在 aspace 视角,这个页面始终是一个未分配的 unpopulate 页面 + /// - Drop Page 修改也表是在 drop 里面进行的 + fn unmap_virt_page(&mut self, aligned_virt_page: VirtAddr) { + let curr = current(); + let pid = curr.task_ext().thread.process().pid(); + assert!(aligned_virt_page.as_usize() % PAGE_SIZE_4K == 0); + + // 理论上不应该出现这种情况 + if !self.virt_pages.remove(&(pid, aligned_virt_page)) { + return; + } + + debug!( + "unmap virt page {} ({}, {:#x})", + self.page_id, pid, aligned_virt_page + ); + + let mut aspace = curr.task_ext().process_data().aspace.lock(); + aspace.force_unmap_page(aligned_virt_page); + flush_tlb(Some(aligned_virt_page)); + } +} + +impl Drop for Page { + fn drop(&mut self) { + // 写回文件 + self.flush().unwrap_or_else(|_| { + error!("Failed to Drop page {}", self.page_id); + 0 + }); + + // 取消所有映射到该物理页的映射。 + // 注意:这里直接修改了页表! + let virt_pages: Vec<_> = self.virt_pages.iter().copied().collect(); + for (pid, vaddr) in virt_pages { + match get_process(pid) { + Ok(process) => { + let process_data = process.data::().unwrap(); + let mmap_manager = process_data.vma_mnager(); + + if let Some(_) = mmap_manager.query(vaddr) { + let mut aspace = process_data.aspace.lock(); + debug!("force unmap {:#x}", vaddr); + aspace.force_unmap_page(vaddr); + } + } + _ => continue, + } + } + + let mut global_page_pool = GLOBAL_PAGE_POOL.lock(); + let inner = unsafe { ManuallyDrop::take(&mut self.inner) }; + global_page_pool.push(inner); + } +} + +type PageKey = isize; +type PagePtr = Arc>; + +/// 并发安全的文件页池 +struct PagePool { + // 唯一的数据结构存储 Page,只要在 pages 中删掉 PageKey,就会 Drop 该 Page + pages: RwLock>, + // 允许同时持有的最多 Page 数量 + max_size: AtomicUsize, + // 用于分配 page_key + page_key_cnt: AtomicIsize, + // 没有被修改过的页面 + clean_list: Mutex>, + // 修改过的页面 + dirty_list: Mutex>, +} + +impl PagePool { + fn new(max_size: usize) -> Self { + Self { + pages: RwLock::new(BTreeMap::new()), + max_size: AtomicUsize::new(max_size), + page_key_cnt: AtomicIsize::new(0), + clean_list: Mutex::new(LinkedList::new()), + dirty_list: Mutex::new(LinkedList::new()), + } + } + + /// 调整 PagePool 的大小。 + /// 暂时没用到,给以后留作扩展,例如根据内存使用情况,动态调整 + fn resize(&self, size: usize) { + self.max_size.store(size, Ordering::SeqCst); + let mut pages = self.pages.write(); + while pages.len() >= self.max_size.load(Ordering::SeqCst) { + self.auto_drop_page(&mut pages); + } + let mut global_page_pool = GLOBAL_PAGE_POOL.lock(); + while pages.len() + global_page_pool.len() > size { + global_page_pool.pop(); + } + } + + /// 功能: + /// - 根据 page_key 尝试获取页面的 Arc 指针; + /// - 如果 page_key 查找命中,则返回的 PageKey 即使传入的 page_key; + /// - 如果 page_key 查找失败,说明该页已经被 Drop,那么重新分配一个页面, + /// 返回的 PageKey != 传入的 page_key,这时上层调用者就需要初始化这个 page。 + fn acquire_page(&self, page_key: PageKey) -> (PagePtr, PageKey) { + let pages = self.pages.read(); + if let Some(page) = pages.get(&page_key) { + return (page.clone(), page_key); + } + drop(pages); + + let mut pages = self.pages.write(); + while pages.len() >= self.max_size.load(Ordering::SeqCst) { + self.auto_drop_page(&mut pages); + } + let page = Arc::new(RwLock::new(Page::new())); + let key = self.page_key_cnt.fetch_add(1, Ordering::SeqCst); + pages.insert(key.clone(), page.clone()); + let mut clean_list = self.clean_list.lock(); + clean_list.push_back(key); + (page, key) + } + + /// 根据 page_key 判断该页是否仍在 PagePool 中 + fn check_page_exist(&self, page_key: &PageKey) -> bool { + let pages = self.pages.read(); + pages.contains_key(page_key) + } + + /// 自动挑选一个 Page,Drop 它 + fn auto_drop_page(&self, pages: &mut RwLockWriteGuard>>>) { + // 选择需要被丢弃的页面 + let key = { + let mut clean_list = self.clean_list.lock(); + let mut dirty_list = self.dirty_list.lock(); + + let mut drop_key: Option = None; + while !clean_list.is_empty() { + let key = clean_list.pop_front().unwrap(); + let _page = pages.get(&key); + if _page.is_none() { + continue; + } + let arc_page = _page.unwrap(); + let page = arc_page.read(); + if page.is_dirty() { + dirty_list.push_back(key); + } else { + drop_key = Some(key); + break; + } + } + if drop_key.is_some() { + drop_key.unwrap() + } else { + dirty_list.pop_front().unwrap() as isize + } + }; + + pages.remove(&key); + debug!("Auto drop page: {:#x}", key); + } + + /// 将一个页面刷新回文件并 Drop + /// 1. 外部调用的页面置换,由于 rust 的 RAII 机制,这里会自动调用 Page 的 drop 方法,将脏页写回内存。 + /// 2. 没有修改干净页/脏页队列 + fn drop_page(&self, page_key: PageKey) { + let mut pages = self.pages.write(); + pages.remove(&page_key); + } +} + +/// 全局的 PagePool,但是可以扩展成灵活地给 PageCache 绑定 PagePool +const DEFAULT_PAGE_POOL_SIZE: usize = 10; +lazy_static! { + static ref PAGE_POOL: Arc = Arc::new(PagePool::new(DEFAULT_PAGE_POOL_SIZE)); +} + +/// 用于维护一个文件的页缓存,接管文件的基本操作 +pub struct PageCache { + // 对接的文件名 + path: String, + // axfs 层的 File 指针 + file: Arc>, + // 文件大小 + size: AtomicUsize, + // 已占用的最大 page id + max_page_id: AtomicIsize, + // fill gap 预留的 最大 page od + reserved_page_id: AtomicIsize, + // 使用的 PagePool + page_pool: Arc, + // 维护页面 -> PageKey 的缓存 + pages: RwLock>, + // 这个 dirty_pages 只管理 write 产生的脏页,无法维护 mmap 后通过地址映射修改的脏页 + dirty_pages: RwLock>, +} + +impl PageCache { + pub fn new(path: String) -> Self { + warn!("Create Pagecache {}", path); + let opts = OpenOptions::new().set_read(true).set_write(true); + let file = axfs::fops::File::open(path.as_str(), &opts).unwrap(); + let file = Arc::new(Mutex::new(file)); + let size = { + let file = file.lock(); + file.get_attr().unwrap().size() + }; + + let max_page_id = { + if size == 0 { + -1 as isize + } else { + (size as isize - 1) / PAGE_SIZE_4K as isize + } + }; + + Self { + path: path, + file: file.clone(), + size: AtomicUsize::new(size as usize), + max_page_id: AtomicIsize::new(max_page_id), + reserved_page_id: AtomicIsize::new(max_page_id), + page_pool: PAGE_POOL.clone(), + pages: RwLock::new(BTreeMap::new()), + dirty_pages: RwLock::new(BTreeSet::new()), + } + } + + /// 将当前进程的 aligned_vaddr 映射到文件的 page_id 页面 + /// + /// 注意:直接修改了页表! + pub fn map_virt_page( + &self, + page_id: usize, + aligned_vaddr: VirtAddr, + access_flags: MappingFlags, + ) -> bool { + let rwlock = self.acquire_page(page_id); + let mut page = rwlock.write(); + page.map_virt_page(aligned_vaddr, access_flags) + } + + /// 取消当前进程的 aligned_vaddr 到文件的 page_id 页面的映射 + pub fn unmap_virt_page(&self, page_id: usize, aligned_vaddr: VirtAddr) { + self._with_valid_page(page_id, |page| { + page.unmap_virt_page(aligned_vaddr); + }); + } + + /// 将缓存页 page_id 同步到文件 + pub fn flush_page(&self, page_id: usize) -> LinuxResult { + self._with_valid_page(page_id, |page| page.flush()); + Ok(0) + } + + /// 并发安全的读取给定位置,返回读取到的长度 + pub fn read_at(&self, offset: usize, buf: &mut [u8]) -> usize { + let file_len = self.size.load(Ordering::SeqCst); + let read_len = buf.len().min(file_len - offset); + + if offset >= file_len || read_len == 0 { + return 0; + } + let mut ret = 0 as usize; + while ret < read_len { + let current_pos = offset + ret; + let page_id = current_pos / PAGE_SIZE_4K; + let page_offset_start = current_pos % PAGE_SIZE_4K; + // 计算当前页剩余空间 + let bytes_left_in_page = PAGE_SIZE_4K - page_offset_start; + // 计算本次可读取的最大长度(不超过页尾和总读取长度) + let cur_len = bytes_left_in_page.min(read_len - ret); + let slice = &mut buf[ret..ret + cur_len]; + self.read_slice_from_page(page_id, page_offset_start, slice) + .expect("PageCache read failed"); + ret += cur_len; + } + ret + } + + /// 并发安全地写入给定位置,更新文件大小,返回写入长度 + pub fn write_at(&self, offset: usize, buf: &[u8]) -> usize { + // 把间隙填 0 + self.fill_gap(offset); + + let mut ret = 0; + let write_len = buf.len(); + + while ret < write_len { + let current_pos = offset + ret; + let page_id = current_pos / PAGE_SIZE_4K; + let page_offset_start = current_pos % PAGE_SIZE_4K; + // 计算当前页剩余空间 + let bytes_left_in_page = PAGE_SIZE_4K - page_offset_start; + // 计算本次可写入的最大长度 + let cur_len = bytes_left_in_page.min(write_len - ret); + + //将 buf 的数据写入 pagecache,自动标记脏页 + let slice = &buf[ret..ret + cur_len]; + let len = self + .write_slice_into_page(page_id, page_offset_start, slice, false) + .expect("PageCache writea_at failed"); + ret += len as usize; + self.size.fetch_max(offset + ret, Ordering::SeqCst); + } + ret + } + + pub fn truncate(&self, offset: usize) -> usize { + if self.size.fetch_min(offset, Ordering::SeqCst) == offset { + return offset; + } + self.fill_gap(offset); + return offset; + } + + /// 获取文件属性。只有 size 是从 page cache 里面维护,其他都是从 axfs 层读取地。 + pub fn stat(&self) -> LinuxResult { + let file = self.file.lock(); + let metadata = file.get_attr()?; + let ty = metadata.file_type() as u8; + let perm = metadata.perm().bits() as u32; + + return Ok(Kstat { + mode: ((ty as u32) << 12) | perm, + size: self.size.load(Ordering::SeqCst) as u64, + blocks: metadata.blocks(), + blksize: 512, + ..Default::default() + }); + } + + /// 获取文件大小 + pub fn file_size(&self) -> usize { + self.size.load(Ordering::SeqCst) + } + + /// 同步内容修改到文件。 + /// + /// 注意:mmap 后通过内存读写修改文件不经过 page cache,所以不会在 dirty_pages 中有记录。 + /// 但是 munmap 时会遍历所有页面并flush,保证了数据落到文件。 + pub fn sync(&self) -> LinuxResult { + let page_ids: Vec = { + let dirty_pages_lock = self.dirty_pages.read(); + dirty_pages_lock.iter().copied().collect() + }; + + for page_id in page_ids { + self.flush_page(page_id) + .expect("PageCache::sync failed to flush page"); + } + + let mut dirty_pages = self.dirty_pages.write(); + dirty_pages.clear(); + drop(dirty_pages); + + self.update_metadata()?; + Ok(0) + } + + /// 根据 page_id,获得 Page 的 Arc 指针。 + /// 如果 PagePool 此时不存在,则新建一个 Page 并初始化 + fn acquire_page(&self, page_id: usize) -> PagePtr { + let mut pages = self.pages.write(); + let orig_key = pages.get(&page_id).cloned().unwrap_or(-1); + + let (page, new_key) = self.page_pool.acquire_page(orig_key); + if orig_key != new_key { + let mut page = page.write(); + page.load(Arc::downgrade(&self.file), page_id); + pages.remove(&page_id); + pages.insert(page_id, new_key); + } + page + } + + /// 检查页号为 page_id 的页是否存在于 PagePool,若存在则执行操作 f + fn _with_valid_page(&self, page_id: usize, f: F) -> bool + where + F: FnOnce(&mut Page) -> T, + { + { + let pages = self.pages.read(); + if !pages.contains_key(&page_id) { + return false; + } + let page_key = pages.get(&page_id).unwrap(); + if !self.page_pool.check_page_exist(page_key) { + return false; + } + } + + let rwlock = self.acquire_page(page_id); + let mut page = rwlock.write(); + f(page.deref_mut()); + true + } + + /// 读取页面 page_id,从 page_start 起的内容到 buf + fn read_slice_from_page( + &self, + page_id: usize, + page_start: usize, + buf: &mut [u8], + ) -> LinuxResult { + let rwlock = self.acquire_page(page_id); + let page = rwlock.read(); + page.read(page_start, buf) + } + + /// 将 buf 的内容写到页面 page_id,从 page_start 起的位置 + /// + /// fill_gap 参数含义:表明这次写入是为了扩展文件大小,写入内容全为 0。 + /// 要求立即刷回文件(direct),并且如果文件已经是脏页(被其他进程写入了)则取消写入。 + fn write_slice_into_page( + &self, + page_id: usize, + page_start: usize, + buf: &[u8], + fill_gap: bool, + ) -> LinuxResult { + { + let mut dirty_pages = self.dirty_pages.write(); + dirty_pages.insert(page_id); + } + + let rwlock = self.acquire_page(page_id); + let mut page = rwlock.write(); + + // 避免一种情况:文件初始为空,进程 A 需要在第8页写入,进程 B 需要在第 4页写入, + // 而第 4 页再进程 B 写入之后又被进程 A 清空 + if fill_gap && self.size.load(Ordering::SeqCst) > page_id * PAGE_SIZE_4K { + return Ok(0); + } + + page.write(page_start, buf, fill_gap) + } + + /// 处理新建的页,全部初始化为 0 + fn fill_gap(&self, offset: usize) { + let orig_size = self.size.load(Ordering::SeqCst); + if offset <= orig_size { + return; + } + + let buf = vec![0 as u8; PAGE_SIZE_4K]; + let start_offset = orig_size; + let start_page_id = start_offset / PAGE_SIZE_4K; + let end_page_id = offset / PAGE_SIZE_4K; + + for page_id in start_page_id..end_page_id { + self.write_slice_into_page(page_id, 0, &buf, true) + .expect("Failed to write gap"); + self.size + .fetch_max(offset.min((page_id + 1) * PAGE_SIZE_4K), Ordering::SeqCst); + } + } + + /// 这里主要更新文件大小,以后可以扩展同步更多信息,比如修改 + fn update_metadata(&self) -> LinuxResult { + let file = self.file.lock(); + + // 更新文件大小 + let size = self.size.load(Ordering::SeqCst); + if size != file.get_attr()?.size() as usize { + assert!(size <= file.get_attr()?.size() as usize); + file.truncate(size as u64).unwrap_or_else(|e| { + error!("Failed to truncate file to size {}: {}", size, e); + }); + } + info!("Update File Metadata size {}", file.get_attr()?.size()); + Ok(0) + } + + /// 在 PagePool 中清除关于这个文件的所有页面,并同步信息。 + fn clear(&self) -> LinuxResult { + let page_keys: Vec = { + let pages = self.pages.read(); + pages.values().copied().collect() + }; + for key in page_keys { + self.page_pool.drop_page(key); + } + self.update_metadata()?; + let mut pages = self.pages.write(); + pages.clear(); + Ok(0) + } +} + +impl Drop for PageCache { + fn drop(&mut self) { + warn!("Drop page cache {}", self.path); + self.clear().expect("PageCache::drop failed to clear"); + } +} + +/// 仅有 PageCacheManager 长久持有 Arc,其他地方只允许临时持有 Arc +/// 当 PageCache 中删掉 Arc,即会 RAII 析构 PageCache +pub struct PageCacheManager { + path_cache: Mutex>>, + path_cnt: Mutex>, +} + +impl PageCacheManager { + const fn new() -> Self { + Self { + path_cache: Mutex::new(BTreeMap::new()), + path_cnt: Mutex::new(BTreeMap::new()), + } + } + + // 由 File 的构造函数调用,获得 page cache 的指针并增加 path 的打开计数 + pub fn open_page_cache(&self, path: &String) -> Weak { + let mut path_cnt = self.path_cnt.lock(); + let mut path_cache = self.path_cache.lock(); + + if let Some(cache) = path_cache.get(path) { + let cnt = path_cnt.get_mut(path).unwrap(); + *cnt += 1; + return Arc::downgrade(cache); + } + + info!("Create a new page cache for {}", path); + assert!(!path_cnt.contains_key(path)); + path_cnt.insert(path.clone(), 1); + let cache = Arc::new(PageCache::new(path.clone())); + path_cache.insert(path.clone(), cache.clone()); + assert!(path_cache.contains_key(path)); + return Arc::downgrade(&cache); + } + + /// 由 File 的析构函数调用,减少 path 的引用计数 + pub fn close_page_cache(&self, path: &String) { + let mut path_cnt = self.path_cnt.lock(); + let mut path_cache = self.path_cache.lock(); + let cnt = path_cnt.get_mut(path).unwrap(); + *cnt -= 1; + + if *cnt == 0 as i32 { + info!("Close page cache for {}", path); + path_cache.remove(path).unwrap(); + path_cnt.remove(path).unwrap(); + } + } + + pub fn populate(&self, fd: i32, offset: usize, length: usize) { + let cache = self.fd_cache(fd); + let start_id = offset / PAGE_SIZE_4K; + let end_id = (offset + length) / PAGE_SIZE_4K; + for id in start_id..end_id { + let _ = cache.acquire_page(id); + } + } + + pub fn munmap(&self, fd: i32, offset: usize, length: usize, vaddr: usize) { + let cache = self.fd_cache(fd); + let start_id = offset / PAGE_SIZE_4K; + let end_id = (offset + length) / PAGE_SIZE_4K; + for id in start_id..end_id { + let vaddr = VirtAddr::from(vaddr + (id - start_id) * PAGE_SIZE_4K); + cache.unmap_virt_page(id, vaddr); + } + } + + pub fn msync(&self, fd: i32, offset: usize, length: usize) { + let cache = self.fd_cache(fd); + let start_id = offset / PAGE_SIZE_4K; + let end_id = (offset + length) / PAGE_SIZE_4K; + for id in start_id..end_id { + cache.flush_page(id).expect("msync flush page failed"); + } + } + + // 主要用于 mmap 相关函数,需要根据 fd 找到对应的 page cache + pub fn fd_cache(&self, fd: i32) -> Arc { + let file = File::from_fd(fd).unwrap(); + file.cache() + } +} + +lazy_static! { + static ref PAGE_CACHE_MANAGER: Arc = Arc::new(PageCacheManager::new()); +} + +pub fn page_cache_manager() -> Arc { + PAGE_CACHE_MANAGER.clone() +} diff --git a/api/src/imp/fs/fd_ops.rs b/api/src/imp/fs/fd_ops.rs index 0d5fa34b..eb9b0851 100644 --- a/api/src/imp/fs/fd_ops.rs +++ b/api/src/imp/fs/fd_ops.rs @@ -4,15 +4,20 @@ use core::{ }; use alloc::string::ToString; +use alloc::sync::{Arc, Weak}; use axerrno::{AxError, LinuxError, LinuxResult}; use axfs::fops::OpenOptions; +use axsync::Mutex; use linux_raw_sys::general::{ - __kernel_mode_t, AT_FDCWD, F_DUPFD, F_DUPFD_CLOEXEC, F_SETFL, O_APPEND, O_CREAT, O_DIRECTORY, - O_NONBLOCK, O_PATH, O_RDONLY, O_TRUNC, O_WRONLY, + __kernel_mode_t, AT_FDCWD, F_DUPFD, F_DUPFD_CLOEXEC, F_SETFL, O_APPEND, O_CREAT, O_DIRECT, + O_DIRECTORY, O_NONBLOCK, O_PATH, O_RDONLY, O_TRUNC, O_WRONLY, }; use crate::{ - file::{Directory, FD_TABLE, File, FileLike, add_file_like, close_file_like, get_file_like}, + file::{ + Directory, FD_TABLE, File, FileLike, add_file_like, close_file_like, get_file_like, + page_cache_manager, + }, path::handle_file_path, ptr::UserConstPtr, }; @@ -47,6 +52,9 @@ fn flags_to_options(flags: c_int, _mode: __kernel_mode_t) -> OpenOptions { if flags & O_DIRECTORY != 0 { options.directory(true); } + if flags & O_DIRECT != 0 { + options.direct(true); + } options } @@ -75,14 +83,31 @@ pub fn sys_openat( if !opts.has_directory() { match dir.as_ref().map_or_else( - || axfs::fops::File::open(path, &opts), - |dir| dir.inner().open_file_at(path, &opts), + || axfs::fops::File::open(real_path.as_str(), &opts), + |dir| dir.inner().open_file_at(real_path.as_str(), &opts), ) { - Err(AxError::IsADirectory) => {} - r => { - let fd = File::new(r?, real_path.to_string()).add_to_fd_table()?; + Ok(axfile) => { + let path = real_path.to_string(); + let fd = if opts.has_direct() { + // 不经过 page cache + let file = Arc::new(Mutex::new(axfile)); + File::new_file_direct(path, file).add_to_fd_table()? + } else { + // 经过 page cache + drop(axfile); + let cache = { + let manager = page_cache_manager(); + manager.open_page_cache(&path) + }; + File::new_file_cached(path, cache).add_to_fd_table()? + }; return Ok(fd as _); } + Err(AxError::IsADirectory) => {} + Err(e) => { + error!("sys_open at {} failed: {}", path, e); + return Err(LinuxError::EINVAL); + } } } diff --git a/api/src/imp/fs/io.rs b/api/src/imp/fs/io.rs index fcbc2f1d..3f5e01b4 100644 --- a/api/src/imp/fs/io.rs +++ b/api/src/imp/fs/io.rs @@ -23,6 +23,18 @@ pub fn sys_read(fd: i32, buf: UserPtr, len: usize) -> LinuxResult { Ok(get_file_like(fd)?.read(buf)? as _) } +pub fn sys_pread(fd: c_int, buf: UserPtr, len: usize, offset: usize) -> LinuxResult { + let buf = buf.get_as_mut_slice(len)?; + debug!( + "sys_read <= fd: {}, buf: {:p}, len: {}", + fd, + buf.as_ptr(), + buf.len() + ); + let file = File::from_fd(fd)?; + Ok(file.read_at(buf, offset)? as _) +} + pub fn sys_readv(fd: i32, iov: UserPtr, iocnt: usize) -> LinuxResult { if !(0..=1024).contains(&iocnt) { return Err(LinuxError::EINVAL); @@ -68,6 +80,23 @@ pub fn sys_write(fd: i32, buf: UserConstPtr, len: usize) -> LinuxResult, + len: usize, + offset: usize, +) -> LinuxResult { + let buf = buf.get_as_slice(len)?; + debug!( + "sys_pwrite <= fd: {}, buf: {:p}, len: {}", + fd, + buf.as_ptr(), + buf.len() + ); + let file = File::from_fd(fd)?; + Ok(file.write_at(buf, offset)? as _) +} + pub fn sys_writev(fd: i32, iov: UserConstPtr, iocnt: usize) -> LinuxResult { if !(0..=1024).contains(&iocnt) { return Err(LinuxError::EINVAL); @@ -99,6 +128,11 @@ pub fn sys_writev(fd: i32, iov: UserConstPtr, iocnt: usize) -> LinuxResul Ok(ret) } +pub fn sys_ftruncate(fd: c_int, offset: usize) -> LinuxResult { + let file = File::from_fd(fd)?; + Ok(file.truncate(offset)? as _) +} + pub fn sys_lseek(fd: c_int, offset: __kernel_off_t, whence: c_int) -> LinuxResult { debug!("sys_lseek <= {} {} {}", fd, offset, whence); let pos = match whence { @@ -107,6 +141,10 @@ pub fn sys_lseek(fd: c_int, offset: __kernel_off_t, whence: c_int) -> LinuxResul 2 => SeekFrom::End(offset as _), _ => return Err(LinuxError::EINVAL), }; - let off = File::from_fd(fd)?.inner().seek(pos)?; - Ok(off as _) + File::from_fd(fd)?.seek(pos) +} + +pub fn sys_fsync(fd: c_int) -> LinuxResult { + let file = File::from_fd(fd)?; + Ok(file.fsync()? as _) } diff --git a/api/src/imp/fs/stat.rs b/api/src/imp/fs/stat.rs index 31d08190..ef39bd15 100644 --- a/api/src/imp/fs/stat.rs +++ b/api/src/imp/fs/stat.rs @@ -1,7 +1,9 @@ use core::ffi::{c_char, c_int}; +use alloc::sync::Arc; use axerrno::{AxError, LinuxError, LinuxResult}; use axfs::fops::OpenOptions; +use axsync::Mutex; use linux_raw_sys::general::{AT_EMPTY_PATH, stat, statx}; use crate::{ @@ -11,9 +13,13 @@ use crate::{ }; fn stat_at_path(path: &str) -> LinuxResult { - let opts = OpenOptions::new().set_read(true); + let opts = OpenOptions::new().set_read(true).set_direct(true); match axfs::fops::File::open(path, &opts) { - Ok(file) => File::new(file, path.into()).stat(), + Ok(file) => File::new_file_direct( + path.into(), + Arc::new(Mutex::new(file)), + ) + .stat(), Err(AxError::IsADirectory) => { let dir = axfs::fops::Directory::open_dir(path, &opts)?; Directory::new(dir, path.into()).stat() diff --git a/api/src/imp/mm/mmap.rs b/api/src/imp/mm/mmap.rs index a33b0fc7..5442f17f 100644 --- a/api/src/imp/mm/mmap.rs +++ b/api/src/imp/mm/mmap.rs @@ -3,9 +3,13 @@ use axerrno::{LinuxError, LinuxResult}; use axhal::paging::{MappingFlags, PageSize}; use axtask::{TaskExtRef, current}; use linux_raw_sys::general::*; -use memory_addr::{MemoryAddr, VirtAddr, VirtAddrRange, align_up_4k}; +use linux_raw_sys::general::{ + MAP_ANONYMOUS, MAP_FIXED, MAP_NORESERVE, MAP_POPULATE, MAP_PRIVATE, MAP_SHARED, MAP_STACK, + PROT_EXEC, PROT_GROWSDOWN, PROT_GROWSUP, PROT_READ, PROT_WRITE, +}; +use memory_addr::{MemoryAddr, PAGE_SIZE_4K, VirtAddr, VirtAddrRange, align_up_4k}; -use crate::file::{File, FileLike}; +use crate::file::{File, FileLike, page_cache_manager}; bitflags::bitflags! { /// `PROT_*` flags for use with [`sys_mmap`]. @@ -60,6 +64,8 @@ bitflags::bitflags! { const NORESERVE = MAP_NORESERVE; /// Allocation is for a stack. const STACK = MAP_STACK; + /// Allocation s populate + const POPULATE = MAP_POPULATE; /// Huge page const HUGE = MAP_HUGETLB; /// Huge page 1g size @@ -67,6 +73,27 @@ bitflags::bitflags! { } } +/// ### 需要在维护的信息: +/// 1. process_data 里的 aspace,即 axmm 层的 AddrSpace。它维护已分配的虚拟地址段,并分配新地址。 +/// 插入在 sys_mmap 时执行,删除在 sys_munmap 时执行。 +/// 2. process_data 里的 vma_mnager。它的主要作用是在 page fault 时找到相应的 VMA 信息, +/// 包括 fd/start_addr/shared 等等。为了维持底层 Unikernel 的简洁性,将这些内容放在 Starry 层维护。 +/// 插入在 sys_mmap 时执行,删除在 sys_munmap 时执行。 +/// 3. Page 里的 virt_pages。它的作用是实现物理页的反向映射,用于检查页面是否为脏页,在页面置换的时候 +/// 取消所有相关的页表映射。 +/// 插入在 page fault 时由 lazy_map_file 执行,删除在 sys_munmap 中执行。 +/// +/// ### 页表的修改: +/// 1. 页表的删除:一律在 AddrSpace::unmap 中执行,取消整个 VMA 的页表映射。 +/// 2. 对于非 populate 文件映射,在 page fault 时由 lazy_map_file 建立页表映射。 +/// 3. 对于非 populate 匿名映射,在 page fault 时由 Addrspace::handle_page_fault 建立页表映射。 +/// 4. 对于 populate 映射,在sys_mmmap 时由 AddrSpace::map_alloc 直接建立页表映射。 +/// +/// ### 根据匿名/文件、私有/共享,主要有 4 种 mmap: +/// 1. 匿名私有:相当于 malloc +/// 2. 匿名共享:相当于 Private 的共享内存,只能在父子进程之间共享。 TODO: 尚未实现。 +/// 3. 文件私有:仅将文件内容加载进内存,但是修改不会同步到文件。 +/// 4. 文件共享:对文件的修改会被同步,并且允许多个进程并发读写文件。底层会将不同进程的虚拟页面映射到同一个页缓存物理页面。 pub fn sys_mmap( addr: usize, length: usize, @@ -79,10 +106,18 @@ pub fn sys_mmap( let process_data = curr.task_ext().process_data(); let mut aspace = process_data.aspace.lock(); let permission_flags = MmapProt::from_bits_truncate(prot); - // TODO: check illegal flags for mmap - // An example is the flags contained none of MAP_PRIVATE, MAP_SHARED, or MAP_SHARED_VALIDATE. + let map_flags = MmapFlags::from_bits_truncate(flags); - if map_flags.contains(MmapFlags::PRIVATE | MmapFlags::SHARED) { + if !(map_flags.contains(MmapFlags::PRIVATE | MmapFlags::SHARED)) { + error!("MAP FAILED: flags must contains one of SHARED or PRIVATE"); + return Err(LinuxError::EINVAL); + } + + // TODO: Not satisfy MAP_SHARED_VALIDATE. + + let offset = offset as usize; + if offset % PAGE_SIZE_4K != 0 { + error!("MAP_FAILED: offset must aligned to 4K"); return Err(LinuxError::EINVAL); } @@ -99,14 +134,21 @@ pub fn sys_mmap( PageSize::Size4K }; + let anonymous = map_flags.contains(MmapFlags::ANONYMOUS) || fd == -1; + let private = map_flags.contains(MmapFlags::PRIVATE); + let fd = { if anonymous { -1 } else { fd } }; + let populate = map_flags.contains(MmapFlags::POPULATE); + + if anonymous && fd != -1 { + error!("Anonymous mmap must with fd == -1!"); + return Err(LinuxError::EINVAL); + } + let start = addr.align_down(page_size); let end = (addr + length).align_up(page_size); let aligned_length = end - start; - debug!( - "start: {:x?}, end: {:x?}, aligned_length: {:x?}", - start, end, aligned_length - ); + // 分配虚拟地址段 let start_addr = if map_flags.contains(MmapFlags::FIXED) { if start == 0 { return Err(LinuxError::EINVAL); @@ -130,40 +172,97 @@ pub fn sys_mmap( )) .ok_or(LinuxError::ENOMEM)? }; + info!( + "mmap: start_addr = {:#x}, length = {:#x}, fd = {}, offset = {:#x}", + start_addr, aligned_length, fd, offset + ); - let populate = if fd == -1 { - false + // 添加 VMA 信息到 vma_mnager 和 aspace,等访问页面时触发 page fault 后建立页表映射 + let curr = current(); + let manager = curr.task_ext().process_data().vma_mnager(); + manager.add_area(start_addr, length, fd, offset, !private)?; + + if anonymous { + // 匿名映射 + aspace.map_alloc( + start_addr, + aligned_length, + permission_flags.into(), + populate, + page_size, + )?; } else { - !map_flags.contains(MmapFlags::ANONYMOUS) - }; + // 目前只有私有文件 populate 映射支持大页,因为不需要页缓存 + if populate && private { + aspace.map_alloc( + start_addr, + aligned_length, + permission_flags.into(), + false, + page_size, + )?; + } else { + aspace.map_alloc( + start_addr, + aligned_length, + permission_flags.into(), + false, + PageSize::Size4K, + )?; + } - aspace.map_alloc( - start_addr, - aligned_length, - permission_flags.into(), - populate, - page_size, - )?; - - if populate { - let file = File::from_fd(fd)?; - let file = file.inner(); - let file_size = file.get_attr()?.size() as usize; - if offset < 0 || offset as usize >= file_size { - return Err(LinuxError::EINVAL); + if populate { + if private { + // 文件私有映射的 populate: 直接把文件内容加载进 aspace + let file = File::from_fd(fd)?; + let file_size = file.size() as usize; + if offset as usize >= file_size { + return Err(LinuxError::EINVAL); + } + let offset = offset as usize; + let length = core::cmp::min(length, file_size - offset); + let mut buf = vec![0u8; length]; + file.read_at(&mut buf, offset)?; + aspace.write(start_addr, page_size, &buf)?; + } else { + // 文件共享映射的 populate: 交由 page cache 完成加载 + let cache_manager = page_cache_manager(); + cache_manager.populate(fd, offset, length); + } } - let offset = offset as usize; - let length = core::cmp::min(length, file_size - offset); - let mut buf = vec![0u8; length]; - file.read_at(offset as u64, &mut buf)?; - aspace.write(start_addr, page_size, &buf)?; } - Ok(start_addr.as_usize() as _) + + return Ok(start_addr.as_usize() as _); } pub fn sys_munmap(addr: usize, length: usize) -> LinuxResult { + // 同步文件 + sys_msync(addr, length, 0)?; + + // 从 vma_mnager 中移除 VMA let curr = current(); let process_data = curr.task_ext().process_data(); + let mmap_manager = process_data.vma_mnager(); + let area = mmap_manager.query(VirtAddr::from(addr)); + if area.is_none() { + error!("Invalid munmap area!"); + return Err(LinuxError::EINVAL); + } + let area = area.unwrap(); + if area.length != length { + error!("Invalid munmap length!"); + return Err(LinuxError::EINVAL); + } + mmap_manager.remove_area(VirtAddr::from(addr))?; + + // 对于文件mmap,在缓存页中取消反向映射,修改页表 + if area.fd != -1 { + assert!(addr % PAGE_SIZE_4K == 0); + let cache_manager = page_cache_manager(); + cache_manager.munmap(area.fd, area.offset, area.length, addr); + } + + // 在 aspace 中移除 VMA,匿名映射修改页表 let mut aspace = process_data.aspace.lock(); let length = align_up_4k(length); let start_addr = VirtAddr::from(addr); @@ -190,3 +289,80 @@ pub fn sys_mprotect(addr: usize, length: usize, prot: u32) -> LinuxResult Ok(0) } + +pub fn sys_msync(addr: usize, length: usize, _flags: isize) -> LinuxResult { + // TODO: implement flags + if addr % PAGE_SIZE_4K != 0 { + error!("Msync addr must aligned to 4K!"); + return Err(LinuxError::EINVAL); + } + + let curr = current(); + let mmap_manager = curr.task_ext().process_data().vma_mnager(); + let area = mmap_manager.query(VirtAddr::from(addr)); + if area.is_none() { + error!("Invalid msync area"); + return Err(LinuxError::EINVAL); + } + let area = area.unwrap(); + if area.length < length { + error!("Invalid msync length"); + return Err(LinuxError::EINVAL); + } + if area.fd == -1 { + return Ok(0); + } + + let cache_manager = page_cache_manager(); + cache_manager.msync(area.fd, area.offset, length); + Ok(0) +} + +/// 在 page fatul 时,判断是否由 mmap 文件映射引起,并建立建立虚拟页 => 文件缓存页的映射。 +pub fn lazy_map_file(vaddr: VirtAddr, access_flags: MappingFlags) -> bool { + let curr = current(); + let mmap_manager = curr.task_ext().process_data().vma_mnager(); + let area = mmap_manager.query(vaddr); + + // page fatul 并非由 mmap 引起,开销仅有 mmap_manager 的一次 query。 + if area.is_none() { + return false; + } + // 匿名映射,应该会交给 AddrSpace::handle_page_fault 处理 + let area = area.unwrap(); + if area.fd == -1 { + return false; + } + + let offset = vaddr.as_usize() - area.start.as_usize(); + let page_id = offset / PAGE_SIZE_4K; + let aligned_vaddr = VirtAddr::from(memory_addr::align_down_4k(vaddr.as_usize())); + + if !area.shared { + let file = match File::from_fd(area.fd) { + Ok(f) => f, + _ => return false, + }; + let file_size = file.size(); + let length = core::cmp::min(PAGE_SIZE_4K, file_size - offset); + let mut buf = vec![0u8; length]; + match file.read_at(&mut buf, offset) { + Ok(_) => (), + _ => return false, + } + + let curr = current(); + let process_data = curr.task_ext().process_data(); + let aspace = process_data.aspace.lock(); + match aspace.write(aligned_vaddr, PageSize::Size4K, &buf) { + Ok(_) => (), + _ => return false, + } + return true; + } + + let page_cache_manager = page_cache_manager(); + let cache = page_cache_manager.fd_cache(area.fd); + // 这里面直接修改了页表,并在 Page 中加入 virt_page 信息。 + return cache.map_virt_page(page_id, aligned_vaddr, access_flags); +} diff --git a/apps/libc/c/concurrent_io/concurrent_io.c b/apps/libc/c/concurrent_io/concurrent_io.c new file mode 100644 index 00000000..f6757469 --- /dev/null +++ b/apps/libc/c/concurrent_io/concurrent_io.c @@ -0,0 +1,159 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define FILENAME "/tmp/mmap_shared_file.bin" +const int FILE_SIZE = (4096 * 200); // 文件大小 +#define WRITER_COUNT 2 // 写进程数量 +#define READER_COUNT 4 // 读进程数量 +const int BUFFER_LEN = 4096; +const int BUFFER_CNT = FILE_SIZE / BUFFER_LEN; + +char func(int i, int j) { + return (i ^ j + 998244353) % 63; +} + +// 写进程函数 +void writer_process(int writer_id) { + assert(writer_id < WRITER_COUNT); + int fd = open(FILENAME, O_RDWR | O_CREAT, 0666); + if (fd == -1) { + perror("writer open failed"); + exit(EXIT_FAILURE); + } + + // 扩展文件大小 + ftruncate(fd, FILE_SIZE); + char buffer[BUFFER_LEN]; + + // 写入文件内容 + for (int i = writer_id; i < BUFFER_CNT; i += WRITER_COUNT) { + for (int j = 0; j < BUFFER_LEN; j++) { + buffer[j] = func(i, j); + } + pwrite(fd, buffer, BUFFER_LEN, i * BUFFER_LEN); + } + + close(fd); + printf("Writer %d 写入结束\n", writer_id); +} + +// 读进程函数 +void reader_process(int reader_id) { + printf("enter reader process %d\n", reader_id); + + int fd = open(FILENAME, O_RDONLY); + if (fd < 0) { + printf("reader open failed, fd = %d\n", fd); + exit(EXIT_FAILURE); + } + + printf("Reader %d open file success, fd: %d\n", reader_id), fd; + + // 获取文件大小 + struct stat st; + if (fstat(fd, &st) == -1) { + perror("fstat failed"); + close(fd); + exit(EXIT_FAILURE); + } + printf("file size: %d\n", st.st_size); + if (st.st_size != FILE_SIZE) { + fprintf(stderr, "Reader %d: File size too small (%ld < %d)\n", + reader_id, st.st_size, FILE_SIZE); + close(fd); + exit(EXIT_FAILURE); + } + + printf("Reader %d check filesize success\n", reader_id); + + char buffer[BUFFER_LEN]; + + // 读取文件内容 + for (int round = 0; round < 100; round++) { + // printf("Reader %d start round %d\n", reader_id, round); + for (int i = reader_id; i < BUFFER_CNT; i += WRITER_COUNT) { + pread(fd, buffer, BUFFER_LEN, i * BUFFER_LEN); + for (int j = 0; j < BUFFER_LEN; j++) { + if (buffer[j] != func(i, j)) { + printf("位置 (%d, %d) 读写不一致:期待 %d, 实际 %d\n", i, j, func(i, j), buffer[j]); + } + } + } + } + + close(fd); + exit(EXIT_SUCCESS); +} + +int main() { + pid_t writer_pids[WRITER_COUNT], reader_pids[READER_COUNT]; + int status; + + printf("Starting pread/pwrite consistency test\n"); + printf("File: %s, Size: %d bytes\n", FILENAME, FILE_SIZE); + printf("Writer: %d, Readers: %d\n\n", WRITER_COUNT, READER_COUNT); + + // 创建写进程 + for (int i = 0; i < WRITER_COUNT; i++) { + writer_pids[i] = fork(); + if (writer_pids[i] == 0) { + writer_process(i); + exit(EXIT_SUCCESS); + } else if (writer_pids[i] < 0) { + perror("fork for reader failed"); + exit(EXIT_FAILURE); + } + } + + for (int i = 0; i < WRITER_COUNT; i++) { + waitpid(writer_pids[i], &status, 0); + if (!WIFEXITED(status) || WEXITSTATUS(status) != EXIT_SUCCESS) { + fprintf(stderr, "Writer process failed\n"); + } else { + printf("Writer process completed successfully\n"); + } + } + + // 创建读进程 + for (int i = 0; i < READER_COUNT; i++) { + reader_pids[i] = fork(); + if (reader_pids[i] == 0) { + reader_process(i); + exit(EXIT_SUCCESS); + } else if (reader_pids[i] < 0) { + perror("fork for reader failed"); + exit(EXIT_FAILURE); + } + } + + // 等待所有读进程完成 + int reader_failures = 0; + for (int i = 0; i < READER_COUNT; i++) { + waitpid(reader_pids[i], &status, 0); + if (!WIFEXITED(status) || WEXITSTATUS(status) != EXIT_SUCCESS) { + reader_failures++; + } + } + + // 清理 + unlink(FILENAME); + + printf("\nTest completed. \n"); + if (reader_failures == 0) { + printf("SUCCESS: 所有 ReaderProcess 验证成功\n"); + exit(EXIT_SUCCESS); + } else { + printf("FAILURE: %d/%d readers 发现错误\n", + reader_failures, READER_COUNT); + exit(EXIT_FAILURE); + } +} \ No newline at end of file diff --git a/apps/libc/c/concurrent_mmap/concurrent_mmap.c b/apps/libc/c/concurrent_mmap/concurrent_mmap.c new file mode 100644 index 00000000..4f54485e --- /dev/null +++ b/apps/libc/c/concurrent_mmap/concurrent_mmap.c @@ -0,0 +1,169 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define FILENAME "/tmp/mmap_shared_file.bin" +const int FILE_SIZE = (4096 * 20); // 文件大小 +#define WRITER_COUNT 2 // 写进程数量 +#define READER_COUNT 4 // 读进程数量 + +char func(unsigned i) { + return i % 14 + 'a'; +} + +// 写进程函数 +void writer_process(int writer_id) { + assert(writer_id < WRITER_COUNT); + int fd = open(FILENAME, O_RDWR | O_CREAT, 0666); + if (fd == -1) { + perror("writer open failed"); + exit(EXIT_FAILURE); + } + + // 扩展文件大小 + ftruncate(fd, FILE_SIZE); + + // 映射整个文件 + char *map = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (map == MAP_FAILED) { + perror("writer mmap failed"); + close(fd); + exit(EXIT_FAILURE); + } + + // 写入数据块 + for (unsigned i = writer_id; i < FILE_SIZE; i += WRITER_COUNT) { + map[i] = func(i); + } + munmap(map, FILE_SIZE); + + close(fd); + printf("Writer %d 写入结束\n", writer_id); + // exit(EXIT_SUCCESS); +} + +// 读进程函数 +void reader_process(int reader_id) { + printf("enter reader process %d\n", reader_id); + + int fd = open(FILENAME, O_RDONLY); + if (fd < 0) { + printf("reader open failed, fd = %d\n", fd); + exit(EXIT_FAILURE); + } + + printf("Reader %d open file success, fd: %d\n", reader_id), fd; + + // 获取文件大小 + struct stat st; + if (fstat(fd, &st) == -1) { + perror("fstat failed"); + close(fd); + exit(EXIT_FAILURE); + } + + if (st.st_size != FILE_SIZE) { + fprintf(stderr, "Reader %d: File size too small (%ld < %d)\n", + reader_id, st.st_size, FILE_SIZE); + close(fd); + exit(EXIT_FAILURE); + } + + printf("Reader %d check filesize success\n", reader_id); + + // 只读映射 + char *map = mmap(NULL, FILE_SIZE, PROT_READ, MAP_SHARED, fd, 0); + if (map == MAP_FAILED) { + perror("reader mmap failed"); + close(fd); + exit(EXIT_FAILURE); + } + + printf("Reader %d started verification\n", reader_id); + + for (int round = 0; round < 10; round++) { + for (unsigned i = 0; i < FILE_SIZE; i++) { + if (map[i] != func(i)) { + printf("位置 %d 读写不一致:期待 %d, 实际 %d\n", i, func(i), map[i]); + } + } + } + + printf("Reader %d: 读写验证成功!\n", reader_id); + + munmap(map, FILE_SIZE); + close(fd); + exit(EXIT_SUCCESS); +} + +int main() { + pid_t writer_pids[WRITER_COUNT], reader_pids[READER_COUNT]; + int status; + + printf("Starting MMAP read/write consistency test\n"); + printf("File: %s, Size: %d bytes\n", FILENAME, FILE_SIZE); + printf("Writer: %d, Readers: %d\n\n", WRITER_COUNT, READER_COUNT); + + // 创建写进程 + for (int i = 0; i < WRITER_COUNT; i++) { + writer_pids[i] = fork(); + if (writer_pids[i] == 0) { + writer_process(i); + exit(EXIT_SUCCESS); + } else if (writer_pids[i] < 0) { + perror("fork for reader failed"); + exit(EXIT_FAILURE); + } + } + + for (int i = 0; i < WRITER_COUNT; i++) { + waitpid(writer_pids[i], &status, 0); + if (!WIFEXITED(status) || WEXITSTATUS(status) != EXIT_SUCCESS) { + fprintf(stderr, "Writer process failed\n"); + } else { + printf("Writer process completed successfully\n"); + } + } + + // 创建读进程 + for (int i = 0; i < READER_COUNT; i++) { + reader_pids[i] = fork(); + if (reader_pids[i] == 0) { + reader_process(i); + exit(EXIT_SUCCESS); + } else if (reader_pids[i] < 0) { + perror("fork for reader failed"); + exit(EXIT_FAILURE); + } + } + + // 等待所有读进程完成 + int reader_failures = 0; + for (int i = 0; i < READER_COUNT; i++) { + waitpid(reader_pids[i], &status, 0); + if (!WIFEXITED(status) || WEXITSTATUS(status) != EXIT_SUCCESS) { + reader_failures++; + } + } + + // 清理 + unlink(FILENAME); + + printf("\nTest completed. \n"); + if (reader_failures == 0) { + printf("SUCCESS: 所有 ReaderProcess 验证成功\n"); + exit(EXIT_SUCCESS); + } else { + printf("FAILURE: %d/%d readers 发现错误\n", + reader_failures, READER_COUNT); + exit(EXIT_FAILURE); + } +} \ No newline at end of file diff --git a/apps/libc/c/page_cache/page_cache.c b/apps/libc/c/page_cache/page_cache.c new file mode 100644 index 00000000..6a5bef31 --- /dev/null +++ b/apps/libc/c/page_cache/page_cache.c @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define FILENAME "testfile.bin" + +const int PAGE_SIZE = 4096; + +// 获取当前时间(秒精度浮点数) +double get_time() { + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec + tv.tv_usec * 1e-6; +} + +void test(int use_page_cache, size_t file_size, int round) { + double start, end; + double total_size = file_size * round; + + // 确保缓冲区大小对齐 + // // 写入的内容 + // char *a = (char *)malloc((file_size + PAGE_SIZE - 1) / PAGE_SIZE * PAGE_SIZE); + // // 读取的内容 + // char *b = (char *)malloc((file_size + PAGE_SIZE - 1) / PAGE_SIZE * PAGE_SIZE); + char *a = malloc(file_size); + char *b = malloc(file_size); + + for (int i = 0; i < file_size; i++) { + a[i] = rand() % 15; + } + + // 打开文件 + int flag = O_WRONLY|O_CREAT|O_TRUNC; + if (use_page_cache == 0) { + flag |= O_DIRECT; + } + int fd = open(FILENAME, flag, 0644); + if (fd < 0) { + perror("文件创建失败"); + exit(EXIT_FAILURE); + } + + // 重复 n 轮写入数据 + start = get_time(); + for (int i = 0; i < round; i++) { + lseek(fd, 0, SEEK_SET); + size_t cur = 0; + while (cur < file_size) { + ssize_t add = write(fd, a + cur, file_size - cur); // 从 a+cur 写入 + if (add <= 0) { // 修正5:处理错误 + perror("写入失败"); + close(fd); + exit(EXIT_FAILURE); + } + cur += add; + } + } + end = get_time(); + printf("[写入] 大小: %.2f MB, 耗时: %.2f s, 速度: %.2f MB/s\n", + total_size / (1024.0 * 1024), end - start, total_size / (end - start) / (1024 * 1024)); + + // 关闭文件前获取文件大小,此时应该从 page cache 中读取 + struct stat file_stat; + off_t stat_size; + stat(FILENAME, &file_stat); + stat_size = file_stat.st_size; + if (stat_size != file_size && use_page_cache == 1) { + printf("(close 前) 文件大小: %ld, 理应 %ld\n", stat_size, file_size); + close(fd); + exit(EXIT_FAILURE); + } + + // 关闭文件 + close(fd); + + // 关闭文件后获取文件大小,此时应该从磁盘读取 + stat(FILENAME, &file_stat); + stat_size = file_stat.st_size; + if (stat_size != file_size) { + printf("(close 后) 文件大小: %ld, 理应 %ld\n", stat_size, file_size); + exit(EXIT_FAILURE); + } + + // 重新打开文件 + flag = O_RDONLY; + if (use_page_cache == 0) { + flag |= O_DIRECT; + } + if ((fd = open(FILENAME, flag)) < 0) { + perror("文件打开失败"); + exit(EXIT_FAILURE); + } + + // 重复 n 轮读取数据 + start = get_time(); + for (int i = 0; i < round; i++) { + lseek(fd, 0, SEEK_SET); + ssize_t cur = 0; + while (cur < file_size) { + ssize_t add = read(fd, b + cur, file_size - cur); // 读到 b+cur + if (add <= 0) { // 修正5:处理错误 + perror("读取失败"); + close(fd); + exit(EXIT_FAILURE); + } + cur += add; + } + } + close(fd); + end = get_time(); + printf("[读取] 大小: %.2f MB, 耗时: %.2f s, 速度: %.2f MB/s\n", + total_size / (1024.0 * 1024), end - start, total_size / (end - start) / (1024 * 1024)); + + // 验证读写的内容一样 + for (int i = 0; i < file_size; i++) { + // printf(" a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]); + if (a[i] != b[i]) { + printf("读写内容不一致, a[%d] = %d, b[%d] = %d\n", i, a[i], i, b[i]); + exit(EXIT_FAILURE); + } + } + printf("读写数据一致\n"); + + // 清理 + unlink(FILENAME); + free(a); + free(b); +} + +int main() { + printf("使用 page cache:\n"); + test(1, 4096 * 200, 1); + + // printf("关闭 page cache,直接 io:\n"); + // test(0, 513, 1); + + printf("SUCCESS PAGE CACHE TEST!\n"); + return 0; +} \ No newline at end of file diff --git a/apps/libc/testcase_list b/apps/libc/testcase_list index 3f68ac85..7d902b2f 100644 --- a/apps/libc/testcase_list +++ b/apps/libc/testcase_list @@ -1,4 +1,5 @@ helloworld_c sleep_c signal_c -mmap_c +concurrent_mmap_c +concurrent_io_c \ No newline at end of file diff --git a/core/src/mm.rs b/core/src/mm.rs index a99cc40f..e0259476 100644 --- a/core/src/mm.rs +++ b/core/src/mm.rs @@ -1,14 +1,16 @@ //! User address space management. +use axerrno::{LinuxError, LinuxResult}; use core::ffi::CStr; -use alloc::{borrow::ToOwned, string::String, vec, vec::Vec}; +use alloc::{borrow::ToOwned, collections::btree_map::BTreeMap, string::String, vec, vec::Vec}; use axerrno::{AxError, AxResult}; use axhal::{ mem::virt_to_phys, paging::{MappingFlags, PageSize}, }; use axmm::{AddrSpace, kernel_aspace}; +use axsync::Mutex; use kernel_elf_parser::{AuxvEntry, ELFParser, app_stack_region}; use memory_addr::{MemoryAddr, PAGE_SIZE_4K, VirtAddr}; use xmas_elf::{ElfFile, program::SegmentData}; @@ -150,6 +152,7 @@ pub fn load_user_app( || interp_path == "/lib64/ld-linux-loongarch-lp64d.so.1" || interp_path == "/lib64/ld-linux-x86-64.so.2" || interp_path == "/lib/ld-linux-aarch64.so.1" + || interp_path == "/lib/ld-musl-riscv64.so.1" { // TODO: Use soft link interp_path = String::from("/musl/lib/libc.so"); @@ -218,3 +221,81 @@ pub fn access_user_memory(f: impl FnOnce() -> R) -> R { pub fn is_accessing_user_memory() -> bool { ACCESSING_USER_MEM.read_current() } + +/// 类似 Linux 的 struct VMA +#[derive(Clone)] +pub struct VirtMemArea { + // VMA 起始地址 + pub start: VirtAddr, + // VMA 长度 + pub length: usize, + // fd == -1 表示匿名映射 + pub fd: i32, + // VMA 起始地址对应的文件偏移量 + pub offset: usize, + // 是否共享 + pub shared: bool, +} + +/// 作用:根据虚拟地址找到对应的 Vitual Memory Area 信息 +/// +/// 主要场景: +/// 1. 处理因 mmap lazy-alloc 产生的 page fault, +/// 2. 在页面置换时实现页表反向映射,根据虚拟地址找到对应的 Vitual Memory Area 信息 +pub struct ProcessVMAManager { + // 这些区间一定是不相交的 + areas: Mutex>, +} + +impl ProcessVMAManager { + /// 在进程创建的时候新建 ProcessVMAManager + pub fn new() -> Self { + Self { + areas: Mutex::new(BTreeMap::new()), + } + } + + /// 加入一个 VMA + pub fn add_area( + &self, + start: VirtAddr, + length: usize, + fd: i32, + offset: usize, + shared: bool, + ) -> LinuxResult { + let mut areas = self.areas.lock(); + if length == 0 || start.as_usize() % PAGE_SIZE_4K != 0 || offset % PAGE_SIZE_4K != 0 { + return Err(LinuxError::EINVAL); + } + areas.insert( + start, + VirtMemArea { + start: start, + length, + fd, + offset, + shared, + }, + ); + Ok(0) + } + + /// 删除一个 VMA + pub fn remove_area(&self, start: VirtAddr) -> LinuxResult { + let mut areas = self.areas.lock(); + areas.remove(&start).unwrap(); + Ok(0) + } + + /// 查询虚拟地址所在的 VMA + pub fn query(&self, vaddr: VirtAddr) -> Option { + let areas = self.areas.lock(); + if let Some((&start, area)) = areas.range(..=vaddr).next_back() { + if vaddr >= start && vaddr.as_usize() < start.as_usize() + area.length { + return Some(area.clone()); + } + } + None + } +} diff --git a/core/src/task.rs b/core/src/task.rs index 33bcb0e0..11b4577b 100644 --- a/core/src/task.rs +++ b/core/src/task.rs @@ -30,7 +30,7 @@ use memory_addr::VirtAddrRange; use spin::{Once, RwLock}; use weak_map::WeakMap; -use crate::{futex::FutexTable, time::TimeStat}; +use crate::{futex::FutexTable, mm::ProcessVMAManager, time::TimeStat}; /// Create a new user task. pub fn new_user_task( @@ -210,6 +210,9 @@ pub struct ProcessData { /// The futex table. pub futex_table: FutexTable, + + /// The process mmap manager + vma_mnager: Arc, } impl ProcessData { @@ -236,6 +239,7 @@ impl ProcessData { )), futex_table: FutexTable::new(), + vma_mnager: Arc::new(ProcessVMAManager::new()), } } @@ -264,6 +268,10 @@ impl ProcessData { pub fn is_clone_child(&self) -> bool { self.exit_signal != Some(Signo::SIGCHLD) } + + pub fn vma_mnager(&self) -> Arc { + self.vma_mnager.clone() + } } impl Drop for ProcessData { diff --git a/scripts/config.toml.temp b/scripts/config.toml.temp index 5676d053..bd9f00a8 100644 --- a/scripts/config.toml.temp +++ b/scripts/config.toml.temp @@ -11,3 +11,4 @@ axns = { path = "%AX_ROOT%/modules/axns" } axruntime = { path = "%AX_ROOT%/modules/axruntime" } axsync = { path = "%AX_ROOT%/modules/axsync" } axtask = { path = "%AX_ROOT%/modules/axtask" } +axalloc = { path = "%AX_ROOT%/modules/axalloc" } \ No newline at end of file diff --git a/src/mm.rs b/src/mm.rs index 6496d5d3..df484105 100644 --- a/src/mm.rs +++ b/src/mm.rs @@ -5,34 +5,35 @@ use axhal::{ }; use axtask::{TaskExtRef, current}; use linux_raw_sys::general::SIGSEGV; -use starry_api::do_exit; +use starry_api::{do_exit, lazy_map_file}; use starry_core::mm::is_accessing_user_memory; #[register_trap_handler(PAGE_FAULT)] fn handle_page_fault(vaddr: VirtAddr, access_flags: MappingFlags, is_user: bool) -> bool { warn!( - "Page fault at {:#x}, access_flags: {:#x?}", - vaddr, access_flags + "Page fault at {:#x}, {}, access_flags: {:#x?}, is_user: {}", + vaddr, + current().id_name(), + access_flags, + is_user ); if !is_user && !is_accessing_user_memory() { return false; } - + if is_user && lazy_map_file(vaddr, access_flags) { + return true; + } let curr = current(); - if !curr - .task_ext() - .process_data() - .aspace - .lock() - .handle_page_fault(vaddr, access_flags) - { - warn!( - "{} ({:?}): segmentation fault at {:#x}, exit!", - curr.id_name(), - curr.task_ext().thread, - vaddr - ); - do_exit(SIGSEGV as _, true); + let mut aspace = curr.task_ext().process_data().aspace.lock(); + if aspace.handle_page_fault(vaddr, access_flags) { + return true; } - true + + warn!( + "{} ({:?}): segmentation fault at {:#x}, exit!", + curr.id_name(), + curr.task_ext().thread, + vaddr + ); + do_exit(SIGSEGV as _, true); } diff --git a/src/syscall.rs b/src/syscall.rs index 6fe820ee..eab2268c 100644 --- a/src/syscall.rs +++ b/src/syscall.rs @@ -3,6 +3,7 @@ use axhal::{ arch::TrapFrame, trap::{SYSCALL, register_trap_handler}, }; +use axtask::current; use starry_api::*; use starry_core::task::{time_stat_from_kernel_to_user, time_stat_from_user_to_kernel}; use syscalls::Sysno; @@ -10,7 +11,7 @@ use syscalls::Sysno; #[register_trap_handler(SYSCALL)] fn handle_syscall(tf: &mut TrapFrame, syscall_num: usize) -> isize { let sysno = Sysno::from(syscall_num as u32); - info!("Syscall {}", sysno); + info!("Syscall {}, in {}", sysno, current().id_name()); time_stat_from_user_to_kernel(); let result = match sysno { // fs ctl @@ -54,6 +55,20 @@ fn handle_syscall(tf: &mut TrapFrame, syscall_num: usize) -> isize { Sysno::write => sys_write(tf.arg0() as _, tf.arg1().into(), tf.arg2() as _), Sysno::writev => sys_writev(tf.arg0() as _, tf.arg1().into(), tf.arg2() as _), Sysno::lseek => sys_lseek(tf.arg0() as _, tf.arg1() as _, tf.arg2() as _), + Sysno::fsync => sys_fsync(tf.arg0() as _), + Sysno::pread64 => sys_pread( + tf.arg0() as _, + tf.arg1().into(), + tf.arg2() as _, + tf.arg3() as _, + ), + Sysno::pwrite64 => sys_pwrite( + tf.arg0() as _, + tf.arg1().into(), + tf.arg2() as _, + tf.arg3() as _, + ), + Sysno::ftruncate => sys_ftruncate(tf.arg0() as _, tf.arg1() as _), // fs mount Sysno::mount => sys_mount( @@ -110,6 +125,7 @@ fn handle_syscall(tf: &mut TrapFrame, syscall_num: usize) -> isize { ), Sysno::munmap => sys_munmap(tf.arg0(), tf.arg1() as _), Sysno::mprotect => sys_mprotect(tf.arg0(), tf.arg1() as _, tf.arg2() as _), + Sysno::msync => sys_msync(tf.arg0(), tf.arg1() as _, tf.arg2() as _), // task info Sysno::getpid => sys_getpid(),