From 588b84ecd082680ddc6ffb93c783758f72fc4dd6 Mon Sep 17 00:00:00 2001 From: hax0r31337 <65506006+hax0r31337@users.noreply.github.com> Date: Fri, 5 Sep 2025 15:30:39 +0800 Subject: [PATCH] feat(synced-lyrics): thai romanization (#3618) Co-authored-by: Angelos Bouklis Co-authored-by: JellyBrick --- package.json | 2 + pnpm-lock.yaml | 14 +++- src/plugins/synced-lyrics/renderer/utils.tsx | 75 +++++++++++++------- 3 files changed, 62 insertions(+), 29 deletions(-) diff --git a/package.json b/package.json index 5c1383ae..9bbd3a79 100644 --- a/package.json +++ b/package.json @@ -62,6 +62,8 @@ "neverBuiltDependencies": [] }, "dependencies": { + "@dehoist/romanize-thai": "1.0.0", + "@electron-toolkit/tsconfig": "1.0.1", "@electron/remote": "2.1.3", "@ffmpeg.wasm/core-mt": "0.12.0", "@ffmpeg.wasm/main": "0.12.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 0535a11d..8804bf68 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -33,6 +33,12 @@ importers: .: dependencies: + '@dehoist/romanize-thai': + specifier: 1.0.0 + version: 1.0.0 + '@electron-toolkit/tsconfig': + specifier: 1.0.1 + version: 1.0.1(@types/node@24.3.0) '@electron/remote': specifier: 2.1.3 version: 2.1.3(electron@38.0.0) @@ -241,9 +247,6 @@ importers: specifier: 4.1.5 version: 4.1.5 devDependencies: - '@electron-toolkit/tsconfig': - specifier: 1.0.1 - version: 1.0.1(@types/node@24.3.0) '@eslint/js': specifier: 9.34.0 version: 9.34.0 @@ -473,6 +476,9 @@ packages: '@bufbuild/protobuf@2.6.3': resolution: {integrity: sha512-w/gJKME9mYN7ZoUAmSMAWXk4hkVpxRKvEJCb3dV5g9wwWdxTJJ0ayOJAVcNxtdqaxDyFuC0uz4RSGVacJ030PQ==} + '@dehoist/romanize-thai@1.0.0': + resolution: {integrity: sha512-6SqD4vyZ48otnypLXMh901CeQetoP5ptYOaIr58N6zDqjjoN0bHszMb5d/6AXJJQf8kIvbmSWBeuDrbAWLajPQ==} + '@develar/schema-utils@2.6.5': resolution: {integrity: sha512-0cp4PsWQ/9avqTVMCtZ+GirikIA36ikvjtHweU4/j8yLtgObI0+JUPhYFScgwlteveGB1rt3Cm8UhN04XayDig==} engines: {node: '>= 8.9.0'} @@ -4909,6 +4915,8 @@ snapshots: '@bufbuild/protobuf@2.6.3': {} + '@dehoist/romanize-thai@1.0.0': {} + '@develar/schema-utils@2.6.5': dependencies: ajv: 6.12.6 diff --git a/src/plugins/synced-lyrics/renderer/utils.tsx b/src/plugins/synced-lyrics/renderer/utils.tsx index c0f3f8c8..1c6a410b 100644 --- a/src/plugins/synced-lyrics/renderer/utils.tsx +++ b/src/plugins/synced-lyrics/renderer/utils.tsx @@ -1,15 +1,11 @@ import { render } from 'solid-js/web'; - import KuromojiAnalyzer from 'kuroshiro-analyzer-kuromoji'; import Kuroshiro from 'kuroshiro'; - import { romanize as esHangulRomanize } from 'es-hangul'; import hanja from 'hanja'; - -import pinyin from 'tiny-pinyin'; - +import * as pinyin from 'tiny-pinyin'; +import { romanize as romanizeThaiFrag } from '@dehoist/romanize-thai'; import { lazy } from 'lazy-var'; - import { detect } from 'tinyld'; import { waitForElement } from '@/utils/wait-for-element'; @@ -155,26 +151,9 @@ const hasKorean = (lines: string[]) => const hasChinese = (lines: string[]) => lines.some((line) => /[\u4E00-\u9FFF]+/.test(line)); -export const romanize = async (line: string) => { - const lang = detect(line); - - const handlers: Record Promise | string> = { - ja: romanizeJapanese, - ko: romanizeHangul, - zh: romanizeChinese, - }; - - const NO_OP = (l: string) => l; - const handler = handlers[lang] ?? NO_OP; - - line = await handler(line); - - if (hasJapanese([line])) line = await romanizeJapanese(line); - if (hasKorean([line])) line = romanizeHangul(line); - if (hasChinese([line])) line = romanizeChinese(line); - - return line; -}; +// https://en.wikipedia.org/wiki/Thai_(Unicode_block) +const hasThai = (lines: string[]) => + lines.some((line) => /[\u0E00-\u0E7F]+/.test(line)); export const romanizeJapanese = async (line: string) => (await kuroshiro.get()).convert(line, { @@ -190,3 +169,47 @@ export const romanizeChinese = (line: string) => { pinyin.convertToPinyin(match, ' ', true), ); }; + +const thaiSegmenter = Intl.Segmenter.supportedLocalesOf('th').includes('th') + ? new Intl.Segmenter('th', { granularity: 'word' }) + : null; + +export const romanizeThai = (line: string) => { + if (!thaiSegmenter) return romanizeThaiFrag(line); + + const segments = Array.from(thaiSegmenter.segment(line)); + const latin = segments + .map((segment) => + segment.isWordLike + ? romanizeThaiFrag(segment.segment) + : segment.segment.trim(), + ) + .join(' ') + .trim(); + + return latin; +}; + +const handlers: Record Promise | string> = { + ja: romanizeJapanese, + ko: romanizeHangul, + zh: romanizeChinese, + th: romanizeThai, +}; + +export const romanize = async (line: string) => { + const lang = detect(line); + + const handler = handlers[lang]; + if (handler) { + return handler(line); + } + + // fallback + if (hasJapanese([line])) line = await romanizeJapanese(line); + if (hasKorean([line])) line = romanizeHangul(line); + if (hasChinese([line])) line = romanizeChinese(line); + if (hasThai([line])) line = romanizeThai(line); + + return line; +};