mirror of
https://github.com/th-ch/youtube-music.git
synced 2026-01-10 10:11:46 +00:00
feat(synced-lyrics): thai romanization (#3618)
Co-authored-by: Angelos Bouklis <me@arjix.dev> Co-authored-by: JellyBrick <shlee1503@naver.com>
This commit is contained in:
@ -62,6 +62,8 @@
|
||||
"neverBuiltDependencies": []
|
||||
},
|
||||
"dependencies": {
|
||||
"@dehoist/romanize-thai": "1.0.0",
|
||||
"@electron-toolkit/tsconfig": "1.0.1",
|
||||
"@electron/remote": "2.1.3",
|
||||
"@ffmpeg.wasm/core-mt": "0.12.0",
|
||||
"@ffmpeg.wasm/main": "0.12.0",
|
||||
|
||||
14
pnpm-lock.yaml
generated
14
pnpm-lock.yaml
generated
@ -33,6 +33,12 @@ importers:
|
||||
|
||||
.:
|
||||
dependencies:
|
||||
'@dehoist/romanize-thai':
|
||||
specifier: 1.0.0
|
||||
version: 1.0.0
|
||||
'@electron-toolkit/tsconfig':
|
||||
specifier: 1.0.1
|
||||
version: 1.0.1(@types/node@24.3.0)
|
||||
'@electron/remote':
|
||||
specifier: 2.1.3
|
||||
version: 2.1.3(electron@38.0.0)
|
||||
@ -241,9 +247,6 @@ importers:
|
||||
specifier: 4.1.5
|
||||
version: 4.1.5
|
||||
devDependencies:
|
||||
'@electron-toolkit/tsconfig':
|
||||
specifier: 1.0.1
|
||||
version: 1.0.1(@types/node@24.3.0)
|
||||
'@eslint/js':
|
||||
specifier: 9.34.0
|
||||
version: 9.34.0
|
||||
@ -473,6 +476,9 @@ packages:
|
||||
'@bufbuild/protobuf@2.6.3':
|
||||
resolution: {integrity: sha512-w/gJKME9mYN7ZoUAmSMAWXk4hkVpxRKvEJCb3dV5g9wwWdxTJJ0ayOJAVcNxtdqaxDyFuC0uz4RSGVacJ030PQ==}
|
||||
|
||||
'@dehoist/romanize-thai@1.0.0':
|
||||
resolution: {integrity: sha512-6SqD4vyZ48otnypLXMh901CeQetoP5ptYOaIr58N6zDqjjoN0bHszMb5d/6AXJJQf8kIvbmSWBeuDrbAWLajPQ==}
|
||||
|
||||
'@develar/schema-utils@2.6.5':
|
||||
resolution: {integrity: sha512-0cp4PsWQ/9avqTVMCtZ+GirikIA36ikvjtHweU4/j8yLtgObI0+JUPhYFScgwlteveGB1rt3Cm8UhN04XayDig==}
|
||||
engines: {node: '>= 8.9.0'}
|
||||
@ -4909,6 +4915,8 @@ snapshots:
|
||||
|
||||
'@bufbuild/protobuf@2.6.3': {}
|
||||
|
||||
'@dehoist/romanize-thai@1.0.0': {}
|
||||
|
||||
'@develar/schema-utils@2.6.5':
|
||||
dependencies:
|
||||
ajv: 6.12.6
|
||||
|
||||
@ -1,15 +1,11 @@
|
||||
import { render } from 'solid-js/web';
|
||||
|
||||
import KuromojiAnalyzer from 'kuroshiro-analyzer-kuromoji';
|
||||
import Kuroshiro from 'kuroshiro';
|
||||
|
||||
import { romanize as esHangulRomanize } from 'es-hangul';
|
||||
import hanja from 'hanja';
|
||||
|
||||
import pinyin from 'tiny-pinyin';
|
||||
|
||||
import * as pinyin from 'tiny-pinyin';
|
||||
import { romanize as romanizeThaiFrag } from '@dehoist/romanize-thai';
|
||||
import { lazy } from 'lazy-var';
|
||||
|
||||
import { detect } from 'tinyld';
|
||||
|
||||
import { waitForElement } from '@/utils/wait-for-element';
|
||||
@ -155,26 +151,9 @@ const hasKorean = (lines: string[]) =>
|
||||
const hasChinese = (lines: string[]) =>
|
||||
lines.some((line) => /[\u4E00-\u9FFF]+/.test(line));
|
||||
|
||||
export const romanize = async (line: string) => {
|
||||
const lang = detect(line);
|
||||
|
||||
const handlers: Record<string, (line: string) => Promise<string> | string> = {
|
||||
ja: romanizeJapanese,
|
||||
ko: romanizeHangul,
|
||||
zh: romanizeChinese,
|
||||
};
|
||||
|
||||
const NO_OP = (l: string) => l;
|
||||
const handler = handlers[lang] ?? NO_OP;
|
||||
|
||||
line = await handler(line);
|
||||
|
||||
if (hasJapanese([line])) line = await romanizeJapanese(line);
|
||||
if (hasKorean([line])) line = romanizeHangul(line);
|
||||
if (hasChinese([line])) line = romanizeChinese(line);
|
||||
|
||||
return line;
|
||||
};
|
||||
// https://en.wikipedia.org/wiki/Thai_(Unicode_block)
|
||||
const hasThai = (lines: string[]) =>
|
||||
lines.some((line) => /[\u0E00-\u0E7F]+/.test(line));
|
||||
|
||||
export const romanizeJapanese = async (line: string) =>
|
||||
(await kuroshiro.get()).convert(line, {
|
||||
@ -190,3 +169,47 @@ export const romanizeChinese = (line: string) => {
|
||||
pinyin.convertToPinyin(match, ' ', true),
|
||||
);
|
||||
};
|
||||
|
||||
const thaiSegmenter = Intl.Segmenter.supportedLocalesOf('th').includes('th')
|
||||
? new Intl.Segmenter('th', { granularity: 'word' })
|
||||
: null;
|
||||
|
||||
export const romanizeThai = (line: string) => {
|
||||
if (!thaiSegmenter) return romanizeThaiFrag(line);
|
||||
|
||||
const segments = Array.from(thaiSegmenter.segment(line));
|
||||
const latin = segments
|
||||
.map((segment) =>
|
||||
segment.isWordLike
|
||||
? romanizeThaiFrag(segment.segment)
|
||||
: segment.segment.trim(),
|
||||
)
|
||||
.join(' ')
|
||||
.trim();
|
||||
|
||||
return latin;
|
||||
};
|
||||
|
||||
const handlers: Record<string, (line: string) => Promise<string> | string> = {
|
||||
ja: romanizeJapanese,
|
||||
ko: romanizeHangul,
|
||||
zh: romanizeChinese,
|
||||
th: romanizeThai,
|
||||
};
|
||||
|
||||
export const romanize = async (line: string) => {
|
||||
const lang = detect(line);
|
||||
|
||||
const handler = handlers[lang];
|
||||
if (handler) {
|
||||
return handler(line);
|
||||
}
|
||||
|
||||
// fallback
|
||||
if (hasJapanese([line])) line = await romanizeJapanese(line);
|
||||
if (hasKorean([line])) line = romanizeHangul(line);
|
||||
if (hasChinese([line])) line = romanizeChinese(line);
|
||||
if (hasThai([line])) line = romanizeThai(line);
|
||||
|
||||
return line;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user