feat(synced-lyrics): Improve LRC parsing (#4269)

This commit is contained in:
Angelos Bouklis
2026-01-26 11:41:53 +02:00
committed by GitHub
parent 5bbf7f964c
commit ed7ef30aaa
2 changed files with 202 additions and 24 deletions

View File

@ -8,6 +8,7 @@ interface LRCLine {
timeInMs: number;
duration: number;
text: string;
words: { timeInMs: number; word: string }[];
}
interface LRC {
@ -17,7 +18,10 @@ interface LRC {
const tagRegex = /^\[(?<tag>\w+):\s*(?<value>.+?)\s*\]$/;
// prettier-ignore
const lyricRegex = /^\[(?<minutes>\d+):(?<seconds>\d+)\.(?<milliseconds>\d+)\](?<text>.+)$/;
const timestampRegex = /^\[(?<minutes>\d+):(?<seconds>\d+)\.(?<milliseconds>\d+)\]/m;
// prettier-ignore
const wordRegex = /<(?<minutes>\d+):(?<seconds>\d+)\.(?<milliseconds>\d+)> *(?<word>\w+)/g;
export const LRC = {
parse: (text: string): LRC => {
@ -27,13 +31,29 @@ export const LRC = {
};
let offset = 0;
let previousLine: LRCLine | null = null;
for (const line of text.split('\n')) {
if (!line.trim().startsWith('[')) continue;
for (let line of text.split('\n')) {
line = line.trim();
if (!line.startsWith('[')) continue;
const lyric = line.match(lyricRegex)?.groups;
if (!lyric) {
const timestamps = [];
let match: Record<string, string> | undefined;
while ((match = line.match(timestampRegex)?.groups)) {
const { minutes, seconds, milliseconds } = match;
const timeInMs =
parseInt(minutes) * 60 * 1000 +
parseInt(seconds) * 1000 +
parseInt(milliseconds);
timestamps.push({
time: `${minutes}:${seconds}:${milliseconds}`,
timeInMs,
});
line = line.replace(timestampRegex, '');
}
if (!timestamps.length) {
const tag = line.match(tagRegex)?.groups;
if (tag) {
if (tag.tag === 'offset') {
@ -49,38 +69,52 @@ export const LRC = {
continue;
}
const { minutes, seconds, milliseconds, text } = lyric;
const timeInMs =
parseInt(minutes) * 60 * 1000 +
parseInt(seconds) * 1000 +
parseInt(milliseconds);
let text = line.trim();
const words = Array.from(text.matchAll(wordRegex), ({ groups }) => {
const { minutes, seconds, milliseconds, word } = groups!;
const timeInMs =
parseInt(minutes) * 60 * 1000 +
parseInt(seconds) * 1000 +
parseInt(milliseconds);
const currentLine: LRCLine = {
time: `${minutes}:${seconds}:${milliseconds}`,
timeInMs,
text: text.trim(),
duration: Infinity,
};
return { timeInMs, word };
});
if (previousLine) {
previousLine.duration = timeInMs - previousLine.timeInMs;
if (words.length) {
text = words.map(({ word }) => word).join(' ');
}
previousLine = currentLine;
lrc.lines.push(currentLine);
for (const { time, timeInMs } of timestamps) {
lrc.lines.push({
time,
timeInMs,
text,
words,
duration: Infinity,
});
}
}
for (const line of lrc.lines) {
line.timeInMs += offset;
lrc.lines.sort(({ timeInMs: timeA }, { timeInMs: timeB }) => timeA - timeB);
for (let i = 0; i < lrc.lines.length; i++) {
const current = lrc.lines[i];
const next = lrc.lines[i + 1];
current.timeInMs += offset;
if (next) {
current.duration = next.timeInMs - current.timeInMs;
}
}
const first = lrc.lines.at(0);
if (first && first.timeInMs > 300) {
lrc.lines.unshift({
time: '0:0:0',
time: '00:00:00',
timeInMs: 0,
duration: first.timeInMs,
text: '',
words: [],
});
}