From 70879ef9699bcfff2cf0bb31d4e79a8589348f3c Mon Sep 17 00:00:00 2001 From: Kohei Watanabe Date: Sat, 11 Jan 2025 00:32:43 +0900 Subject: [PATCH] add pages metadata processing --- Cargo.lock | 350 +++++++++++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 7 +- src/lib.rs | 129 +++++++++++++++++++ src/main.rs | 7 +- 4 files changed, 489 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c1360b3..adff9cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,6 +11,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anstream" version = "0.6.18" @@ -60,6 +75,58 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "bigdecimal" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6773ddc0eafc0e509fb60e48dff7f450f8e674a0686ae8605e8d9901bd5eefa" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "cc" +version = "1.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a012a0df96dd6d06ba9a1b29d6402d1a5d77c6befd2566afdc26e10603dc93d7" +dependencies = [ + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-targets", +] + [[package]] name = "clap" version = "4.5.24" @@ -106,18 +173,117 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "edn-format" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22313f5794126f6d36a04f468ab167514161d9e1915152392e2e6a6ca2adece4" +dependencies = [ + "bigdecimal", + "chrono", + "internship", + "itertools", + "num-bigint", + "ordered-float", + "thiserror", + "uuid", +] + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "iana-time-zone" +version = "0.1.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "internship" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b75b80c06d9124692b2927086ed75c8721d4061f9c159d9675d3f6d63729b597" +dependencies = [ + "serde", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "js-sys" +version = "0.3.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + [[package]] name = "lazy-regex" version = "3.4.1" @@ -141,11 +307,26 @@ dependencies = [ "syn", ] +[[package]] +name = "libc" +version = "0.2.169" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + [[package]] name = "logseq-export" -version = "0.1.0" +version = "0.2.0" dependencies = [ + "chrono", "clap", + "convert_case", + "edn-format", "lazy-regex", ] @@ -155,12 +336,49 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + [[package]] name = "proc-macro2" version = "1.0.92" @@ -208,6 +426,32 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "serde" +version = "1.0.217" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.217" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "strsim" version = "0.11.1" @@ -225,18 +469,122 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "unicode-ident" version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "utf8parse" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "uuid" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" +dependencies = [ + "getrandom", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.59.0" diff --git a/Cargo.toml b/Cargo.toml index 8ae7f50..9d0a36f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,12 @@ [package] edition = "2021" -name = "logseq-export" -version = "0.1.0" license = "MIT" +name = "logseq-export" +version = "0.2.0" [dependencies] +chrono = "0.4.39" clap = {version = "4.5.23", features = ["derive"]} +convert_case = "0.6.0" +edn-format = "3.3.0" lazy-regex = "3.4.1" diff --git a/src/lib.rs b/src/lib.rs index f2a5bd4..ce6b62f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,8 +1,12 @@ +use chrono::{DateTime, SecondsFormat}; +use convert_case::{Case, Casing}; +use edn_format::Value; use lazy_regex::{lazy_regex, regex, Lazy, Regex}; use std::fmt::Write as FmtWrite; use std::fs::{self, File}; use std::io::{self, Write}; use std::path::Path; +use std::str; static LIST_LINE_PATTERN: Lazy = lazy_regex!(r"^\s*-(?: |$)"); static CODE_BLOCK_LINE_PATTERN: Lazy = lazy_regex!(r"^\s*(?:- )?```"); @@ -266,3 +270,128 @@ pub fn process_files( } Ok(()) } + +fn ecma_timestamp_to_iso(timestamp: i64) -> String { + DateTime::from_timestamp_millis(timestamp) + .unwrap() + .to_rfc3339_opts(SecondsFormat::Millis, true) +} + +#[test] +fn test_ecma_timestamp_to_iso() { + assert_eq!( + ecma_timestamp_to_iso(1736515558154), + "2025-01-10T13:25:58.154Z" + ); +} + +fn add_front_matter(path: &Path, created: &str) -> io::Result<()> { + if !path.exists() { + eprintln!("File not found: created: {} {:?}", created, path); + return Ok(()); + } + + let content = fs::read_to_string(path)?; + let mut lines = content.lines(); + let mut updated_content = String::new(); + let mut found_front_matter = false; + + if lines.next() == Some("---") { + updated_content.push_str("---\n"); + while let Some(line) = lines.next() { + match line { + "---" => { + if !found_front_matter { + updated_content.push_str(&format!("created: {}\n", created)); + } + updated_content.push_str("---\n"); + found_front_matter = true; + break; + } + _ => { + if line.starts_with("created:") { + found_front_matter = true; + } + updated_content.push_str(&format!("{}\n", line)); + } + } + } + } + + if found_front_matter { + for line in lines { + updated_content.push_str(&format!("{}\n", line)); + } + } else { + updated_content = format!("---\ncreated: {}\n---\n{}", created, content); + } + + let mut file = fs::OpenOptions::new() + .write(true) + .truncate(true) + .open(path)?; + + file.write_all(updated_content.as_bytes())?; + + Ok(()) +} + +pub fn process_metadata(metadata: &str, output_dir: &Path) -> io::Result<()> { + let edn = edn_format::parse_str(metadata).expect("Failed to EDN parse metadata"); + + match edn { + Value::Vector(v) => { + for item in v { + match item { + Value::Map(m) => { + let mut name = None; + let mut created = None; + + for (k, v) in m { + match k { + Value::Keyword(k) => match k.name() { + "name" => { + name = Some(v); + } + "created-at" => { + created = Some(v); + } + _ => {} + }, + _ => {} + } + } + + let created = created.expect("created-at not found"); + let name = name.expect("name not found"); + + let name = match name { + Value::String(s) => s, + _ => panic!("name is not String"), + }; + + let created = match created { + Value::Integer(i) => i, + _ => panic!("created-at is not Integer"), + }; + + let created = ecma_timestamp_to_iso(created); + + if !regex!(r"^\d{4}-\d{2}-\d{2}").is_match(&name) { + let path = output_dir.join(format!("{}.md", name)); + add_front_matter(&path, &created)?; + let path = output_dir.join(format!("{}.md", name.to_ascii_uppercase())); + add_front_matter(&path, &created)?; + let path = output_dir.join(format!("{}.md", name.to_case(Case::Title))); + add_front_matter(&path, &created)?; + } + } + _ => {} + } + } + } + _ => {} + } + + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index da83799..651fe32 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,7 @@ use clap::Parser; +use logseq_export::process_metadata; use logseq_export::{copy_files, process_files}; +use std::fs; use std::io; use std::path::Path; @@ -23,7 +25,7 @@ fn main() -> io::Result<()> { let input_assets = input_path.join("assets"); let output_assets = output_path.join("assets"); - std::fs::create_dir_all(&output_assets)?; + fs::create_dir_all(&output_assets)?; copy_files(&input_assets, &output_assets)?; let input_journals = input_path.join("journals"); @@ -32,5 +34,8 @@ fn main() -> io::Result<()> { process_files(&input_journals, &output_path, true)?; process_files(&input_pages, &output_path, false)?; + let metadata = fs::read_to_string(input_path.join("logseq/pages-metadata.edn"))?; + process_metadata(&metadata, &output_path)?; + Ok(()) }