1
0
Fork 0
mirror of https://github.com/kou029w/daraz-san.git synced 2025-01-19 00:18:01 +00:00
daraz-san/scripts/util/morpheme.js
Kohei Watanabe 5152efc6ff scripts を移行
主要な変更点
- CoffeeScript をやめる
- cron.coffee は機能していないため削除
- ruby.coffee は機能していないため削除
- www.coffee は #general でしか使われていないため削除
- *便器*はリンク切れしていたため削除
- *鳥取*は栃木県ではないため削除

Co-authored-by: SAKAGUCHI Takashi <takashi.sakaguchi@ummm.info>
Co-authored-by: Masayuki Higashino <mh61503891@users.noreply.github.com>
Co-authored-by: Kazuki Shigemichi <shigemichik@gmail.com>
2019-07-28 01:52:06 +09:00

38 lines
940 B
JavaScript

const kuromoji = require("kuromoji");
const dicPath = require("path").resolve(
require.resolve("kuromoji"),
"../../dict"
);
const useTokenize = () =>
new Promise((resolve, reject) =>
kuromoji
.builder({ dicPath })
.build((err, tokenizer) =>
err ? reject(err) : resolve(tokenizer.tokenize.bind(tokenizer))
)
);
const features = new Map([
["surface_form", "表層形"],
["pos", "品詞"],
["pos_detail_1", "品詞細分類1"],
["pos_detail_2", "品詞細分類2"],
["pos_detail_3", "品詞細分類3"],
["conjugated_type", "活用型"],
["conjugated_form", "活用形"],
["basic_form", "基本形"],
["reading", "読み"],
["pronunciation", "発音"]
]);
const toCSV = tokens =>
[
[...features.values()].join(","),
...tokens.map(token =>
[...features.keys()].map(feature => token[feature]).join(",")
)
].join("\n");
module.exports = { useTokenize, toCSV };