2019-07-28 01:52:06 +09:00
|
|
|
const kuromoji = require("kuromoji");
|
|
|
|
|
|
|
|
const dicPath = require("path").resolve(
|
|
|
|
require.resolve("kuromoji"),
|
|
|
|
"../../dict"
|
|
|
|
);
|
|
|
|
|
|
|
|
const useTokenize = () =>
|
|
|
|
new Promise((resolve, reject) =>
|
|
|
|
kuromoji
|
|
|
|
.builder({ dicPath })
|
|
|
|
.build((err, tokenizer) =>
|
|
|
|
err ? reject(err) : resolve(tokenizer.tokenize.bind(tokenizer))
|
|
|
|
)
|
|
|
|
);
|
|
|
|
|
|
|
|
const features = new Map([
|
|
|
|
["surface_form", "表層形"],
|
|
|
|
["pos", "品詞"],
|
|
|
|
["pos_detail_1", "品詞細分類1"],
|
|
|
|
["pos_detail_2", "品詞細分類2"],
|
|
|
|
["pos_detail_3", "品詞細分類3"],
|
|
|
|
["conjugated_type", "活用型"],
|
|
|
|
["conjugated_form", "活用形"],
|
|
|
|
["basic_form", "基本形"],
|
|
|
|
["reading", "読み"],
|
2020-04-09 00:16:46 +09:00
|
|
|
["pronunciation", "発音"],
|
2019-07-28 01:52:06 +09:00
|
|
|
]);
|
|
|
|
|
2020-04-09 00:16:46 +09:00
|
|
|
const toCSV = (tokens) =>
|
2019-07-28 01:52:06 +09:00
|
|
|
[
|
|
|
|
[...features.values()].join(","),
|
2020-04-09 00:16:46 +09:00
|
|
|
...tokens.map((token) =>
|
|
|
|
[...features.keys()].map((feature) => token[feature]).join(",")
|
|
|
|
),
|
2019-07-28 01:52:06 +09:00
|
|
|
].join("\n");
|
|
|
|
|
|
|
|
module.exports = { useTokenize, toCSV };
|