From d6275f0cc4ddf73e9b1d077b50a676876a883900 Mon Sep 17 00:00:00 2001 From: Kohei Watanabe Date: Thu, 9 Jan 2025 23:17:30 +0900 Subject: [PATCH] create project --- .gitignore | 1 + Cargo.lock | 311 ++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 9 ++ LICENSE | 9 ++ README.md | 37 +++++++ src/lib.rs | 268 ++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 36 ++++++ 7 files changed, 671 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 src/lib.rs create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eb5a316 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..c1360b3 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,311 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "clap" +version = "4.5.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9560b07a799281c7e0958b9296854d6fafd4c5f31444a7e5bb1ad6dde5ccf1bd" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "874e0dd3eb68bf99058751ac9712f622e61e6f393a94f7128fa26e3f02f5c7cd" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54b755194d6389280185988721fffba69495eed5ee9feeee9a599b53db80318c" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "lazy-regex" +version = "3.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60c7310b93682b36b98fa7ea4de998d3463ccbebd94d935d6b48ba5b6ffa7126" +dependencies = [ + "lazy-regex-proc_macros", + "once_cell", + "regex", +] + +[[package]] +name = "lazy-regex-proc_macros" +version = "3.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ba01db5ef81e17eb10a5e0f2109d1b3a3e29bac3070fdbd7d156bf7dbd206a1" +dependencies = [ + "proc-macro2", + "quote", + "regex", + "syn", +] + +[[package]] +name = "logseq-export" +version = "0.1.0" +dependencies = [ + "clap", + "lazy-regex", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "proc-macro2" +version = "1.0.92" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..8ae7f50 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +edition = "2021" +name = "logseq-export" +version = "0.1.0" +license = "MIT" + +[dependencies] +clap = {version = "4.5.23", features = ["derive"]} +lazy-regex = "3.4.1" diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6bb6783 --- /dev/null +++ b/LICENSE @@ -0,0 +1,9 @@ +MIT License + +Copyright (c) 2024 Kohei Watanabe + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..2af847d --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ +# logseq-export + +Logseq Graph を Obsidian Vault に変換するツール + +## 使用方法 + +### 引数 + +- `-i, --input`: 入力ディレクトリ +- `-o, --output`: 出力ディレクトリ + +### 実行例 + +``` +$ cargo install --git https://git.fogtype.com/nebel/logseq-export.git +$ logseq-export --input path/to/logseq --output path/to/obsidian +``` + +## 機能 + +- インデントの調整 +- 特定の行(`collapsed::`, `id::`, `title::`)の無視 +- トップレベルの箇条書き記号の削除 +- `{{video URL}}` の変換 +- 画像サイズ指定の削除(例: `![alt](url){:height 100, :width 200}` -> `![alt](url)`) + +## 開発方法 + +``` +$ git clone https://git.fogtype.com/nebel/logseq-export.git +$ cd logseq-export +$ cargo build +``` + +## ライセンス + +MIT License diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..f2a5bd4 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,268 @@ +use lazy_regex::{lazy_regex, regex, Lazy, Regex}; +use std::fmt::Write as FmtWrite; +use std::fs::{self, File}; +use std::io::{self, Write}; +use std::path::Path; + +static LIST_LINE_PATTERN: Lazy = lazy_regex!(r"^\s*-(?: |$)"); +static CODE_BLOCK_LINE_PATTERN: Lazy = lazy_regex!(r"^\s*(?:- )?```"); +static IGNORE_LINE_PATTERN: Lazy = lazy_regex!(r"^\s*(?:collapsed|id|title)::"); + +fn replace_assets_path_prefix(line: &str) -> String { + regex!(r"!\[(.*?)\]\((.+?)\)") + .replace_all(line, |caps: ®ex::Captures| { + let alt = caps.get(1).unwrap().as_str(); + let src = caps.get(2).unwrap().as_str(); + if src.starts_with("../") { + format!("![{}]({})", alt, &src[1..]) + } else { + line.to_string() + } + }) + .to_string() +} + +#[test] +fn test_replace_assets_path_prefix() { + assert_eq!( + replace_assets_path_prefix("![image](../assets/image.png)"), + "![image](./assets/image.png)" + ); + assert_eq!( + replace_assets_path_prefix("![image](assets/image.png)"), + "![image](assets/image.png)" + ); +} + +fn replace_task_list(line: &str) -> String { + regex!(r"(?:^|- )(TODO|DOING|DONE)\b") + .replace(line, |caps: ®ex::Captures| { + let checked = match caps.get(1) { + Some(s) => s.as_str() == "DONE", + None => false, + }; + format!("- [{}]", if checked { "x" } else { " " }) + }) + .to_string() +} + +#[test] +fn test_replace_task_list() { + assert_eq!(replace_task_list("- TODO item"), "- [ ] item"); + assert_eq!(replace_task_list(" - DONE item"), " - [x] item"); + assert_eq!(replace_task_list("TODO item"), "- [ ] item"); +} + +fn replace_video(line: &str) -> String { + regex!(r"\{\{video (.+?)\}\}") + .replace_all(line, "![]($1)") + .to_string() +} + +#[test] +fn test_replace_video() { + assert_eq!( + replace_video("{{video https://www.youtube.com/watch?v=123}}"), + "![](https://www.youtube.com/watch?v=123)" + ); +} + +fn remove_image_size(line: &str) -> String { + regex!(r"!\[(.*?)\][(](.+?)[)]\{:height\s*\d+,\s*:width\s*\d+\}") + .replace_all(line, "![$1]($2)") + .to_string() +} + +#[test] +fn test_remove_image_size() { + assert_eq!( + remove_image_size("![image](https://example.com/image.png){:height 100, :width 200}"), + "![image](https://example.com/image.png)" + ); +} + +fn remove_bullet(line: &str) -> String { + LIST_LINE_PATTERN.replace(line, "").to_string() +} + +#[test] +fn test_remove_bullet() { + assert_eq!(remove_bullet("- item"), "item"); +} + +fn dedent(line: &str, indent: usize) -> String { + Regex::new(&format!(r"^\s{{0,{}}}", indent * 2)) + .unwrap() + .replace(line, "") + .to_string() +} + +#[test] +fn test_dedent() { + assert_eq!(dedent(" line", 1), " line"); + assert_eq!(dedent(" line", 1), "line"); +} + +fn process_line( + line: &str, + is_list: bool, + is_code_block: bool, + indent: usize, + prev_indent: usize, +) -> String { + let mut line = remove_bullet(&line); + + if !is_list { + line = dedent(&line, indent); + } + + if is_code_block { + return dedent(&line, 1); + } + + if indent == 0 { + if prev_indent > 0 && !line.starts_with(">") { + line = format!("\n{}", line); + } + } else { + if is_list { + line = format!("{}- {}", " ".repeat(indent - 1), line); + } else { + line = format!("{}{}", " ".repeat(indent - 1), line); + } + } + + line = remove_image_size(&line); + line = replace_assets_path_prefix(&line); + line = replace_task_list(&line); + line = replace_video(&line); + + line +} + +fn process_input(input: &str) -> io::Result { + let mut buffer = String::new(); + let mut prev_indent = 0; + let mut code_block = false; + + for line in input.lines() { + if IGNORE_LINE_PATTERN.is_match(line) { + continue; + } + + let is_list = LIST_LINE_PATTERN.is_match(line); + + let indent = { + let i = line + .replace('\t', " ") + .chars() + .take_while(|&c| c == ' ') + .count() + / 2; + + if code_block { + prev_indent + } else if !is_list && i > 0 { + i - 1 + } else { + i + } + }; + + let start_or_end_code_block = CODE_BLOCK_LINE_PATTERN.is_match(line); + let is_code_block = code_block || start_or_end_code_block; + let line = process_line(line, is_list, is_code_block, indent, prev_indent); + + writeln!(buffer, "{}", line).unwrap(); + + prev_indent = indent; + + if code_block && start_or_end_code_block { + code_block = false; + } else { + code_block = is_code_block; + } + } + Ok(buffer) +} + +#[test] +fn test_process_input() { + let input = r#" +- line1 + - line1.1 + line1.2 +- ```code block + code line + ``` +- line2 + - line2.1 + - line2.1.1 + - ```code block + code line + nested code line + ``` +"#; + + let expected = r#" +line1 +- line1.1 + line1.2 +```code block +code line +``` +line2 +- line2.1 + - line2.1.1 +```code block +code line + nested code line +``` +"#; + + let output = process_input(input).unwrap(); + assert_eq!(output, expected); +} + +pub fn copy_files(input_dir: &Path, output_dir: &Path) -> io::Result<()> { + for entry in input_dir.read_dir()? { + let path = entry?.path(); + + if !path.is_file() { + continue; + } + + let output_path = output_dir.join(path.file_name().unwrap()); + fs::copy(&path, &output_path)?; + } + Ok(()) +} + +pub fn process_files( + input_dir: &Path, + output_dir: &Path, + replace_underscores: bool, +) -> io::Result<()> { + for entry in input_dir.read_dir()? { + let path = entry?.path(); + + if !path.is_file() { + continue; + } + + let name = { + let f = path.file_name().unwrap().to_str().unwrap(); + + if replace_underscores { + f.replace("_", "-") + } else { + f.to_string() + } + }; + + let input = fs::read_to_string(&path)?; + let output_file = File::create(output_dir.join(name))?; + write!(&output_file, "{}", process_input(&input)?)?; + } + Ok(()) +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..da83799 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,36 @@ +use clap::Parser; +use logseq_export::{copy_files, process_files}; +use std::io; +use std::path::Path; + +#[derive(Parser, Debug)] +#[command(version)] +struct Args { + #[arg(short, long)] + input: String, + + #[arg(short, long)] + output: String, +} + +fn main() -> io::Result<()> { + let args = Args::parse(); + let input_path = Path::new(&args.input); + let output_path = Path::new(&args.output); + + input_path.join("logseq").try_exists()?; + + let input_assets = input_path.join("assets"); + let output_assets = output_path.join("assets"); + + std::fs::create_dir_all(&output_assets)?; + copy_files(&input_assets, &output_assets)?; + + let input_journals = input_path.join("journals"); + let input_pages = input_path.join("pages"); + + process_files(&input_journals, &output_path, true)?; + process_files(&input_pages, &output_path, false)?; + + Ok(()) +}