Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,16 @@ obsidian-export my-obsidian-vault --start-at my-obsidian-vault/Books exported-no

In this mode, all notes under the source (the first argument) are considered part of the vault so any references to these files will remain intact, even if they're not part of the exported notes.

#### Recursive export

If you would like to use `--start-at` but also export notes that are linked from the notes under that subdirectory, you can use the `--link-depth` argument:

````sh
obsidian-export my-obsidian-vault --start-at my-obsidian-vault/Books --link-depth 1 exported-notes
````

This will export all notes under `my-obsidian-vault/Books` as well as any notes that are linked from those notes. The level of recursion can be controlled by changing the value of `--link-depth`.

## Character encodings

At present, UTF-8 character encoding is assumed for all note text as well as filenames.
Expand Down
10 changes: 10 additions & 0 deletions docs/usage-basic.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ obsidian-export my-obsidian-vault --start-at my-obsidian-vault/Books exported-no

In this mode, all notes under the source (the first argument) are considered part of the vault so any references to these files will remain intact, even if they're not part of the exported notes.

#### Recursive export

If you would like to use `--start-at` but also export notes that are linked from the notes under that subdirectory, you can use the `--link-depth` argument:

```sh
obsidian-export my-obsidian-vault --start-at my-obsidian-vault/Books --link-depth 1 exported-notes
```

This will export all notes under `my-obsidian-vault/Books` as well as any notes that are linked from those notes. The level of recursion can be controlled by changing the value of `--link-depth`.

## Character encodings

At present, UTF-8 character encoding is assumed for all note text as well as filenames.
Expand Down
89 changes: 58 additions & 31 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use eyre::{eyre, Result};
use gumdrop::Options;
use obsidian_export::{postprocessors::*, ExportError};
use obsidian_export::{Exporter, FrontmatterStrategy, WalkOptions};
use std::sync::Arc;
use std::{env, path::PathBuf};

const VERSION: &str = env!("CARGO_PKG_VERSION");
Expand All @@ -23,6 +24,13 @@ struct Opts {
#[options(no_short, help = "Only export notes under this sub-path")]
start_at: Option<PathBuf>,

#[options(
no_short,
help = "Maximum depth of links to follow when using --start-at. Does nothing if --start-at is not specified",
default = "0"
)]
link_depth: usize,

#[options(
help = "Frontmatter strategy (one of: always, never, auto)",
no_short,
Expand Down Expand Up @@ -91,7 +99,7 @@ fn main() {
..Default::default()
};

let mut exporter = Exporter::new(root, destination);
let mut exporter = Exporter::new(root.clone(), destination.clone());
exporter.frontmatter_strategy(args.frontmatter_strategy);
exporter.process_embeds_recursively(!args.no_recursive_embeds);
exporter.walk_options(walk_options);
Expand All @@ -102,38 +110,57 @@ fn main() {

let tags_postprocessor = filter_by_tags(args.skip_tags, args.only_tags);
exporter.add_postprocessor(&tags_postprocessor);

let recursive_resolver: RecursiveResolver;
let shared_state: Arc<SharedResolverState> = SharedResolverState::new(args.link_depth);
let mut dont_recurse = true;
let callback;
if let Some(path) = args.start_at {
exporter.start_at(path);
exporter.start_at(path.clone());
if args.link_depth > 0 {
dont_recurse = false;
recursive_resolver =
RecursiveResolver::new(root.clone(), path, destination, shared_state.clone());
callback = |ctx: &mut obsidian_export::Context,
events: &mut Vec<pulldown_cmark::Event<'_>>| {
recursive_resolver.postprocess(ctx, events)
};
exporter.add_postprocessor(&callback);
}
}

if let Err(err) = exporter.run() {
match err {
ExportError::FileExportError {
ref path,
ref source,
} => match &**source {
// An arguably better way of enhancing error reports would be to construct a custom
// `eyre::EyreHandler`, but that would require a fair amount of boilerplate and
// reimplementation of basic reporting.
ExportError::RecursionLimitExceeded { file_tree } => {
eprintln!(
"Error: {:?}",
eyre!(
"'{}' exceeds the maximum nesting limit of embeds",
path.display()
)
);
eprintln!("\nFile tree:");
for (idx, path) in file_tree.iter().enumerate() {
eprintln!(" {}-> {}", " ".repeat(idx), path.display());
loop {
if let Err(err) = exporter.run() {
match err {
ExportError::FileExportError {
ref path,
ref source,
} => match &**source {
// An arguably better way of enhancing error reports would be to construct a custom
// `eyre::EyreHandler`, but that would require a fair amount of boilerplate and
// reimplementation of basic reporting.
ExportError::RecursionLimitExceeded { file_tree } => {
eprintln!(
"Error: {:?}",
eyre!(
"'{}' exceeds the maximum nesting limit of embeds",
path.display()
)
);
eprintln!("\nFile tree:");
for (idx, path) in file_tree.iter().enumerate() {
eprintln!(" {}-> {}", " ".repeat(idx), path.display());
}
eprintln!("\nHint: Ensure notes are non-recursive, or specify --no-recursive-embeds to break cycles")
}
eprintln!("\nHint: Ensure notes are non-recursive, or specify --no-recursive-embeds to break cycles")
}
_ => eprintln!("Error: {:?}", eyre!(err)),
},
_ => eprintln!("Error: {:?}", eyre!(err)),
},
_ => eprintln!("Error: {:?}", eyre!(err)),
};
std::process::exit(1);
};
};
std::process::exit(1);
}
if dont_recurse || shared_state.update_and_check_should_continue() {
break;
} else if shared_state.get_current_depth() == 1 {
exporter.start_at(root.clone());
}
}
}
231 changes: 229 additions & 2 deletions src/postprocessors.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
//! A collection of officially maintained [postprocessors][crate::Postprocessor].

use super::{Context, MarkdownEvents, PostprocessorResult};
use pulldown_cmark::Event;
use std::{
collections::BTreeSet,
path::{Path, PathBuf},
sync::{Arc, Mutex, RwLock},
};

use super::{Context, MarkdownEvents, PostprocessorResult, PERCENTENCODE_CHARS};
use percent_encoding::{percent_decode_str, utf8_percent_encode};
use pulldown_cmark::{CowStr, Event, Tag};
use rayon::iter::{ParallelDrainRange, ParallelExtend};
use serde_yaml::Value;

/// This postprocessor converts all soft line breaks to hard line breaks. Enabling this mimics
Expand Down Expand Up @@ -50,6 +58,225 @@ fn filter_by_tags_(
PostprocessorResult::Continue
}
}
#[derive(Debug)]
pub struct SharedResolverState {
depth: usize,
current_depth: RwLock<usize>,
files_to_parse: RwLock<BTreeSet<PathBuf>>,
linked_files: Mutex<Vec<PathBuf>>,
parsed_files: RwLock<BTreeSet<PathBuf>>,
}

impl SharedResolverState {
pub fn new(depth: usize) -> Arc<SharedResolverState> {
Arc::new(SharedResolverState {
depth,
current_depth: RwLock::new(0),
files_to_parse: RwLock::new(BTreeSet::new()),
linked_files: Mutex::new(Vec::new()),
parsed_files: RwLock::new(BTreeSet::new()),
})
}
pub fn update_and_check_should_continue(&self) -> bool {
let mut current_depth = self.current_depth.write().unwrap();

if *current_depth < self.depth {
*current_depth += 1;

let parsed_files = &mut *self.parsed_files.write().unwrap();

let files_to_parse = &mut *self.files_to_parse.write().unwrap();
parsed_files.append(files_to_parse);
files_to_parse.par_extend(self.linked_files.lock().unwrap().par_drain(..));

if !files_to_parse.is_empty() {
return false;
}
}
true
}
pub fn get_current_depth(&self) -> usize {
*self.current_depth.read().unwrap()
}
}

/// This stores the state for the recursively including linked files when
/// using the `--start-at` option with a `--link-depth` greater than 0.
/// Note the paths need to be canonicalized due to canonicalized being used to
/// resolve relative paths outside of start_at
pub struct RecursiveResolver {
/// the canonicalized root of the vault
root: PathBuf,
/// the canonicalized path to start at
start_at: PathBuf,
destination: PathBuf,
//the shared state between this and the caller
//used to tell caller when to stop recursing
shared_state: Arc<SharedResolverState>,
}

impl<'a: 'url, 'url> RecursiveResolver {
pub fn new(
root: PathBuf,
start_at: PathBuf,
destination: PathBuf,
shared_state: Arc<SharedResolverState>,
) -> RecursiveResolver {
let root = root.canonicalize().unwrap();
let start_at = start_at.canonicalize().unwrap();
RecursiveResolver {
root,
start_at,
destination,
shared_state: shared_state.clone(),
}
}

pub fn start_at(&mut self, start_at: PathBuf) {
self.start_at = start_at;
}
/// postprocess function for recursively resolving links to files outside of start_at
/// If this is the first iteration, links to files outside of start_at are changed so
/// that they are to in the root of the destination
/// if this is any other iteration, links to files outside of start_at are changed so
/// they strip the difference between root and start_at
pub fn postprocess(
&self,
context: &'a mut Context,
events: &'url mut MarkdownEvents,
) -> PostprocessorResult {
match *self.shared_state.current_depth.read().unwrap() == 0 {
true => self.first_run(context, events),
false => {
//files to parse should contain only files that have
//not been parsed in a previous iteration
if !self
.shared_state
.files_to_parse
.read()
.unwrap()
.contains(&context.current_file().canonicalize().unwrap())
{
return PostprocessorResult::StopAndSkipNote;
}
self.other_runs(context, events)
}
}
}

///first run of the postprocessor, changes links to files outside of start_at
/// and aggregates the filepaths to export in the next iteration
fn first_run(
&self,
_context: &'a mut Context,
events: &'url mut MarkdownEvents,
) -> PostprocessorResult {
//let path_changed = context.current_file() != &self.start_at;
for event in events.iter_mut() {
if let Event::End(Tag::Link(_, url, _)) = event {
if url.starts_with("https://") || url.starts_with("http://") {
continue;
}

let vault_path: PathBuf = get_vault_path(url, self.start_at.as_path());

// may still be within start_at
if vault_path.starts_with(&self.start_at) {
continue;
}

if vault_path.exists() {
let vaultless_path = vault_path.strip_prefix(self.root.as_path()).unwrap();

set_url(url, vaultless_path.to_path_buf());

self.shared_state
.linked_files
.lock()
.unwrap()
.push(vault_path);
}
}
}
PostprocessorResult::Continue
}

fn other_runs(
&self,
_context: &'a mut Context,
events: &'url mut MarkdownEvents,
) -> PostprocessorResult {
//let path_changed = context.current_file() != self.start_at;
for event in events.iter_mut() {
if let Event::End(Tag::Link(_, url, _)) = event {
if url.starts_with("https://") || url.starts_with("http://") {
continue;
}
let vault_path = get_vault_path(url, self.root.as_path());

// if it's within start_at, we need to strip the difference between root and start_at

//let vaultless_path = vault_path.strip_prefix(self.root.as_path()).unwrap();
if vault_path.exists() {
if vault_path.starts_with(&self.start_at) {
let link_destination = self
.destination
.join(vault_path.strip_prefix(&self.start_at).unwrap());
set_url(url, link_destination);
//don't need to add to linked_files, because it was parsed in the first iteration
continue;
}
//only add if this is not the last iteration
if *self.shared_state.current_depth.read().unwrap() < self.shared_state.depth {
//only add if it hasn't been parsed in a previous iteration
if !self
.shared_state
.parsed_files
.read()
.unwrap()
.contains(&vault_path)
{
self.shared_state
.linked_files
.lock()
.unwrap()
.push(vault_path);
}
}
}
}
}
PostprocessorResult::Continue
}
}
fn get_vault_path(url: &mut CowStr<'_>, root: &Path) -> PathBuf {
let path_stub = PathBuf::from(
percent_decode_str(url.as_ref())
.decode_utf8()
.unwrap()
.as_ref(),
);
root.join(path_stub).canonicalize().unwrap()
}
fn set_url(url: &mut CowStr<'_>, link_destination: PathBuf) {
// let _=std::mem::replace(
// url,
// CowStr::from(
// utf8_percent_encode(
// &format!("{}", link_destination.to_string_lossy()),
// PERCENTENCODE_CHARS,
// )
// .to_string(),
// ),
// );
*url = CowStr::from(
utf8_percent_encode(
&format!("{}", link_destination.to_string_lossy()),
PERCENTENCODE_CHARS,
)
.to_string(),
);
}

#[test]
fn test_filter_tags() {
Expand Down
Loading