mirror of
https://github.com/th-ch/youtube-music.git
synced 2026-01-11 10:31:47 +00:00
feat(synced-lyrics): thai romanization (#3618)
Co-authored-by: Angelos Bouklis <me@arjix.dev> Co-authored-by: JellyBrick <shlee1503@naver.com>
This commit is contained in:
@ -62,6 +62,8 @@
|
|||||||
"neverBuiltDependencies": []
|
"neverBuiltDependencies": []
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
"@dehoist/romanize-thai": "1.0.0",
|
||||||
|
"@electron-toolkit/tsconfig": "1.0.1",
|
||||||
"@electron/remote": "2.1.3",
|
"@electron/remote": "2.1.3",
|
||||||
"@ffmpeg.wasm/core-mt": "0.12.0",
|
"@ffmpeg.wasm/core-mt": "0.12.0",
|
||||||
"@ffmpeg.wasm/main": "0.12.0",
|
"@ffmpeg.wasm/main": "0.12.0",
|
||||||
|
|||||||
14
pnpm-lock.yaml
generated
14
pnpm-lock.yaml
generated
@ -33,6 +33,12 @@ importers:
|
|||||||
|
|
||||||
.:
|
.:
|
||||||
dependencies:
|
dependencies:
|
||||||
|
'@dehoist/romanize-thai':
|
||||||
|
specifier: 1.0.0
|
||||||
|
version: 1.0.0
|
||||||
|
'@electron-toolkit/tsconfig':
|
||||||
|
specifier: 1.0.1
|
||||||
|
version: 1.0.1(@types/node@24.3.0)
|
||||||
'@electron/remote':
|
'@electron/remote':
|
||||||
specifier: 2.1.3
|
specifier: 2.1.3
|
||||||
version: 2.1.3(electron@38.0.0)
|
version: 2.1.3(electron@38.0.0)
|
||||||
@ -241,9 +247,6 @@ importers:
|
|||||||
specifier: 4.1.5
|
specifier: 4.1.5
|
||||||
version: 4.1.5
|
version: 4.1.5
|
||||||
devDependencies:
|
devDependencies:
|
||||||
'@electron-toolkit/tsconfig':
|
|
||||||
specifier: 1.0.1
|
|
||||||
version: 1.0.1(@types/node@24.3.0)
|
|
||||||
'@eslint/js':
|
'@eslint/js':
|
||||||
specifier: 9.34.0
|
specifier: 9.34.0
|
||||||
version: 9.34.0
|
version: 9.34.0
|
||||||
@ -473,6 +476,9 @@ packages:
|
|||||||
'@bufbuild/protobuf@2.6.3':
|
'@bufbuild/protobuf@2.6.3':
|
||||||
resolution: {integrity: sha512-w/gJKME9mYN7ZoUAmSMAWXk4hkVpxRKvEJCb3dV5g9wwWdxTJJ0ayOJAVcNxtdqaxDyFuC0uz4RSGVacJ030PQ==}
|
resolution: {integrity: sha512-w/gJKME9mYN7ZoUAmSMAWXk4hkVpxRKvEJCb3dV5g9wwWdxTJJ0ayOJAVcNxtdqaxDyFuC0uz4RSGVacJ030PQ==}
|
||||||
|
|
||||||
|
'@dehoist/romanize-thai@1.0.0':
|
||||||
|
resolution: {integrity: sha512-6SqD4vyZ48otnypLXMh901CeQetoP5ptYOaIr58N6zDqjjoN0bHszMb5d/6AXJJQf8kIvbmSWBeuDrbAWLajPQ==}
|
||||||
|
|
||||||
'@develar/schema-utils@2.6.5':
|
'@develar/schema-utils@2.6.5':
|
||||||
resolution: {integrity: sha512-0cp4PsWQ/9avqTVMCtZ+GirikIA36ikvjtHweU4/j8yLtgObI0+JUPhYFScgwlteveGB1rt3Cm8UhN04XayDig==}
|
resolution: {integrity: sha512-0cp4PsWQ/9avqTVMCtZ+GirikIA36ikvjtHweU4/j8yLtgObI0+JUPhYFScgwlteveGB1rt3Cm8UhN04XayDig==}
|
||||||
engines: {node: '>= 8.9.0'}
|
engines: {node: '>= 8.9.0'}
|
||||||
@ -4909,6 +4915,8 @@ snapshots:
|
|||||||
|
|
||||||
'@bufbuild/protobuf@2.6.3': {}
|
'@bufbuild/protobuf@2.6.3': {}
|
||||||
|
|
||||||
|
'@dehoist/romanize-thai@1.0.0': {}
|
||||||
|
|
||||||
'@develar/schema-utils@2.6.5':
|
'@develar/schema-utils@2.6.5':
|
||||||
dependencies:
|
dependencies:
|
||||||
ajv: 6.12.6
|
ajv: 6.12.6
|
||||||
|
|||||||
@ -1,15 +1,11 @@
|
|||||||
import { render } from 'solid-js/web';
|
import { render } from 'solid-js/web';
|
||||||
|
|
||||||
import KuromojiAnalyzer from 'kuroshiro-analyzer-kuromoji';
|
import KuromojiAnalyzer from 'kuroshiro-analyzer-kuromoji';
|
||||||
import Kuroshiro from 'kuroshiro';
|
import Kuroshiro from 'kuroshiro';
|
||||||
|
|
||||||
import { romanize as esHangulRomanize } from 'es-hangul';
|
import { romanize as esHangulRomanize } from 'es-hangul';
|
||||||
import hanja from 'hanja';
|
import hanja from 'hanja';
|
||||||
|
import * as pinyin from 'tiny-pinyin';
|
||||||
import pinyin from 'tiny-pinyin';
|
import { romanize as romanizeThaiFrag } from '@dehoist/romanize-thai';
|
||||||
|
|
||||||
import { lazy } from 'lazy-var';
|
import { lazy } from 'lazy-var';
|
||||||
|
|
||||||
import { detect } from 'tinyld';
|
import { detect } from 'tinyld';
|
||||||
|
|
||||||
import { waitForElement } from '@/utils/wait-for-element';
|
import { waitForElement } from '@/utils/wait-for-element';
|
||||||
@ -155,26 +151,9 @@ const hasKorean = (lines: string[]) =>
|
|||||||
const hasChinese = (lines: string[]) =>
|
const hasChinese = (lines: string[]) =>
|
||||||
lines.some((line) => /[\u4E00-\u9FFF]+/.test(line));
|
lines.some((line) => /[\u4E00-\u9FFF]+/.test(line));
|
||||||
|
|
||||||
export const romanize = async (line: string) => {
|
// https://en.wikipedia.org/wiki/Thai_(Unicode_block)
|
||||||
const lang = detect(line);
|
const hasThai = (lines: string[]) =>
|
||||||
|
lines.some((line) => /[\u0E00-\u0E7F]+/.test(line));
|
||||||
const handlers: Record<string, (line: string) => Promise<string> | string> = {
|
|
||||||
ja: romanizeJapanese,
|
|
||||||
ko: romanizeHangul,
|
|
||||||
zh: romanizeChinese,
|
|
||||||
};
|
|
||||||
|
|
||||||
const NO_OP = (l: string) => l;
|
|
||||||
const handler = handlers[lang] ?? NO_OP;
|
|
||||||
|
|
||||||
line = await handler(line);
|
|
||||||
|
|
||||||
if (hasJapanese([line])) line = await romanizeJapanese(line);
|
|
||||||
if (hasKorean([line])) line = romanizeHangul(line);
|
|
||||||
if (hasChinese([line])) line = romanizeChinese(line);
|
|
||||||
|
|
||||||
return line;
|
|
||||||
};
|
|
||||||
|
|
||||||
export const romanizeJapanese = async (line: string) =>
|
export const romanizeJapanese = async (line: string) =>
|
||||||
(await kuroshiro.get()).convert(line, {
|
(await kuroshiro.get()).convert(line, {
|
||||||
@ -190,3 +169,47 @@ export const romanizeChinese = (line: string) => {
|
|||||||
pinyin.convertToPinyin(match, ' ', true),
|
pinyin.convertToPinyin(match, ' ', true),
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const thaiSegmenter = Intl.Segmenter.supportedLocalesOf('th').includes('th')
|
||||||
|
? new Intl.Segmenter('th', { granularity: 'word' })
|
||||||
|
: null;
|
||||||
|
|
||||||
|
export const romanizeThai = (line: string) => {
|
||||||
|
if (!thaiSegmenter) return romanizeThaiFrag(line);
|
||||||
|
|
||||||
|
const segments = Array.from(thaiSegmenter.segment(line));
|
||||||
|
const latin = segments
|
||||||
|
.map((segment) =>
|
||||||
|
segment.isWordLike
|
||||||
|
? romanizeThaiFrag(segment.segment)
|
||||||
|
: segment.segment.trim(),
|
||||||
|
)
|
||||||
|
.join(' ')
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
return latin;
|
||||||
|
};
|
||||||
|
|
||||||
|
const handlers: Record<string, (line: string) => Promise<string> | string> = {
|
||||||
|
ja: romanizeJapanese,
|
||||||
|
ko: romanizeHangul,
|
||||||
|
zh: romanizeChinese,
|
||||||
|
th: romanizeThai,
|
||||||
|
};
|
||||||
|
|
||||||
|
export const romanize = async (line: string) => {
|
||||||
|
const lang = detect(line);
|
||||||
|
|
||||||
|
const handler = handlers[lang];
|
||||||
|
if (handler) {
|
||||||
|
return handler(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
// fallback
|
||||||
|
if (hasJapanese([line])) line = await romanizeJapanese(line);
|
||||||
|
if (hasKorean([line])) line = romanizeHangul(line);
|
||||||
|
if (hasChinese([line])) line = romanizeChinese(line);
|
||||||
|
if (hasThai([line])) line = romanizeThai(line);
|
||||||
|
|
||||||
|
return line;
|
||||||
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user