From ed7ef30aaa153ef886004378390faf81e2244a33 Mon Sep 17 00:00:00 2001 From: Angelos Bouklis Date: Mon, 26 Jan 2026 11:41:53 +0200 Subject: [PATCH] feat(synced-lyrics): Improve LRC parsing (#4269) --- src/plugins/synced-lyrics/parsers/lrc.test.ts | 144 ++++++++++++++++++ src/plugins/synced-lyrics/parsers/lrc.ts | 82 +++++++--- 2 files changed, 202 insertions(+), 24 deletions(-) create mode 100644 src/plugins/synced-lyrics/parsers/lrc.test.ts diff --git a/src/plugins/synced-lyrics/parsers/lrc.test.ts b/src/plugins/synced-lyrics/parsers/lrc.test.ts new file mode 100644 index 00000000..d08a2d62 --- /dev/null +++ b/src/plugins/synced-lyrics/parsers/lrc.test.ts @@ -0,0 +1,144 @@ +import { test, expect } from '@playwright/test'; + +import { LRC } from './lrc'; + +test('empty string', () => { + const lrc = LRC.parse(''); + expect(lrc).toStrictEqual({ lines: [], tags: [] }); +}); + +test('chorus', () => { + const lrc = LRC.parse(`\ +[00:12.00]Line 1 lyrics +[00:17.20]Line 2 lyrics +[00:21.10][00:45.10]Repeating lyrics (e.g. chorus) +[mm:ss.xx]Last lyrics line\ +`); + + expect(lrc).toStrictEqual({ + lines: [ + { duration: 12000, text: '', words: [], time: '00:00:00', timeInMs: 0 }, + { + duration: 5020, + text: 'Line 1 lyrics', + words: [], + time: '00:12:00', + timeInMs: 12000, + }, + { + duration: 3990, + text: 'Line 2 lyrics', + words: [], + time: '00:17:20', + timeInMs: 17020, + }, + { + duration: 24000, + text: 'Repeating lyrics (e.g. chorus)', + words: [], + time: '00:21:10', + timeInMs: 21010, + }, + { + duration: Infinity, + text: 'Repeating lyrics (e.g. chorus)', + words: [], + time: '00:45:10', + timeInMs: 45010, + }, + ], + tags: [], + }); +}); + +test('attributes', () => { + const lrc = LRC.parse( + `[ar:Chubby Checker oppure Beatles, The] +[al:Hits Of The 60's - Vol. 2 – Oldies] +[ti:Let's Twist Again] +[au:Written by Kal Mann / Dave Appell, 1961] +[length: 2:23] + +[00:12.00]Naku Penda Piya-Naku Taka Piya-Mpenziwe +[00:15.30]Some more lyrics ...`, + ); + + expect(lrc).toStrictEqual({ + lines: [ + { duration: 12000, text: '', words: [], time: '00:00:00', timeInMs: 0 }, + { + duration: 3030, + text: 'Naku Penda Piya-Naku Taka Piya-Mpenziwe', + words: [], + time: '00:12:00', + timeInMs: 12000, + }, + { + duration: Infinity, + text: 'Some more lyrics ...', + words: [], + time: '00:15:30', + timeInMs: 15030, + }, + ], + tags: [ + { tag: 'ar', value: 'Chubby Checker oppure Beatles, The' }, + { tag: 'al', value: "Hits Of The 60's - Vol. 2 – Oldies" }, + { tag: 'ti', value: "Let's Twist Again" }, + { tag: 'au', value: 'Written by Kal Mann / Dave Appell, 1961' }, + { tag: 'length', value: '2:23' }, + ], + }); +}); + +test('karaoke', () => { + const lrc = LRC.parse( + '[00:00.00] <00:00.04> When <00:00.16> the <00:00.82> truth <00:01.29> is <00:01.63> found <00:03.09> to <00:03.37> be <00:05.92> lies', + ); + + expect(lrc).toStrictEqual({ + lines: [ + { + duration: Infinity, + text: 'When the truth is found to be lies', + time: '00:00:00', + timeInMs: 0, + words: [ + { + timeInMs: 4, + word: 'When', + }, + { + timeInMs: 16, + word: 'the', + }, + { + timeInMs: 82, + word: 'truth', + }, + { + timeInMs: 1029, + word: 'is', + }, + { + timeInMs: 1063, + word: 'found', + }, + { + timeInMs: 3009, + word: 'to', + }, + { + timeInMs: 3037, + word: 'be', + }, + { + timeInMs: 5092, + word: 'lies', + }, + ], + }, + ], + tags: [], + }); +}); diff --git a/src/plugins/synced-lyrics/parsers/lrc.ts b/src/plugins/synced-lyrics/parsers/lrc.ts index 355c0a4d..bcc770fd 100644 --- a/src/plugins/synced-lyrics/parsers/lrc.ts +++ b/src/plugins/synced-lyrics/parsers/lrc.ts @@ -8,6 +8,7 @@ interface LRCLine { timeInMs: number; duration: number; text: string; + words: { timeInMs: number; word: string }[]; } interface LRC { @@ -17,7 +18,10 @@ interface LRC { const tagRegex = /^\[(?\w+):\s*(?.+?)\s*\]$/; // prettier-ignore -const lyricRegex = /^\[(?\d+):(?\d+)\.(?\d+)\](?.+)$/; +const timestampRegex = /^\[(?\d+):(?\d+)\.(?\d+)\]/m; + +// prettier-ignore +const wordRegex = /<(?\d+):(?\d+)\.(?\d+)> *(?\w+)/g; export const LRC = { parse: (text: string): LRC => { @@ -27,13 +31,29 @@ export const LRC = { }; let offset = 0; - let previousLine: LRCLine | null = null; - for (const line of text.split('\n')) { - if (!line.trim().startsWith('[')) continue; + for (let line of text.split('\n')) { + line = line.trim(); + if (!line.startsWith('[')) continue; - const lyric = line.match(lyricRegex)?.groups; - if (!lyric) { + const timestamps = []; + let match: Record | undefined; + while ((match = line.match(timestampRegex)?.groups)) { + const { minutes, seconds, milliseconds } = match; + const timeInMs = + parseInt(minutes) * 60 * 1000 + + parseInt(seconds) * 1000 + + parseInt(milliseconds); + + timestamps.push({ + time: `${minutes}:${seconds}:${milliseconds}`, + timeInMs, + }); + + line = line.replace(timestampRegex, ''); + } + + if (!timestamps.length) { const tag = line.match(tagRegex)?.groups; if (tag) { if (tag.tag === 'offset') { @@ -49,38 +69,52 @@ export const LRC = { continue; } - const { minutes, seconds, milliseconds, text } = lyric; - const timeInMs = - parseInt(minutes) * 60 * 1000 + - parseInt(seconds) * 1000 + - parseInt(milliseconds); + let text = line.trim(); + const words = Array.from(text.matchAll(wordRegex), ({ groups }) => { + const { minutes, seconds, milliseconds, word } = groups!; + const timeInMs = + parseInt(minutes) * 60 * 1000 + + parseInt(seconds) * 1000 + + parseInt(milliseconds); - const currentLine: LRCLine = { - time: `${minutes}:${seconds}:${milliseconds}`, - timeInMs, - text: text.trim(), - duration: Infinity, - }; + return { timeInMs, word }; + }); - if (previousLine) { - previousLine.duration = timeInMs - previousLine.timeInMs; + if (words.length) { + text = words.map(({ word }) => word).join(' '); } - previousLine = currentLine; - lrc.lines.push(currentLine); + for (const { time, timeInMs } of timestamps) { + lrc.lines.push({ + time, + timeInMs, + text, + words, + duration: Infinity, + }); + } } - for (const line of lrc.lines) { - line.timeInMs += offset; + lrc.lines.sort(({ timeInMs: timeA }, { timeInMs: timeB }) => timeA - timeB); + for (let i = 0; i < lrc.lines.length; i++) { + const current = lrc.lines[i]; + const next = lrc.lines[i + 1]; + + current.timeInMs += offset; + + if (next) { + current.duration = next.timeInMs - current.timeInMs; + } } const first = lrc.lines.at(0); if (first && first.timeInMs > 300) { lrc.lines.unshift({ - time: '0:0:0', + time: '00:00:00', timeInMs: 0, duration: first.timeInMs, text: '', + words: [], }); }