feat(synced-lyrics): multiple lyric sources (#2383)

Co-authored-by: JellyBrick <shlee1503@naver.com>
This commit is contained in:
Angelos Bouklis
2024-12-25 00:44:29 +02:00
committed by GitHub
parent 5c9ded8779
commit 533b96d1f6
28 changed files with 1527 additions and 447 deletions

View File

@ -0,0 +1,137 @@
import { jaroWinkler } from '@skyra/jaro-winkler';
import { config } from '../renderer/renderer';
import { LRC } from '../parsers/lrc';
import type { LyricProvider, LyricResult, SearchSongInfo } from '../types';
export class LRCLib implements LyricProvider {
name = 'LRCLib';
baseUrl = 'https://lrclib.net';
async search({
title,
artist,
album,
songDuration,
}: SearchSongInfo): Promise<LyricResult | null> {
let query = new URLSearchParams({
artist_name: artist,
track_name: title,
});
query.set('album_name', album!);
if (query.get('album_name') === 'undefined') {
query.delete('album_name');
}
let url = `${this.baseUrl}/api/search?${query.toString()}`;
let response = await fetch(url);
if (!response.ok) {
throw new Error(`bad HTTPStatus(${response.statusText})`);
}
let data = (await response.json()) as LRCLIBSearchResponse;
if (!data || !Array.isArray(data)) {
throw new Error(`Expected an array, instead got ${typeof data}`);
}
if (data.length === 0) {
if (!config()?.showLyricsEvenIfInexact) {
return null;
}
query = new URLSearchParams({ q: title });
url = `${this.baseUrl}/api/search?${query.toString()}`;
response = await fetch(url);
if (!response.ok) {
throw new Error(`bad HTTPStatus(${response.statusText})`);
}
data = (await response.json()) as LRCLIBSearchResponse;
if (!Array.isArray(data)) {
throw new Error(`Expected an array, instead got ${typeof data}`);
}
}
const filteredResults = [];
for (const item of data) {
const { artistName } = item;
const artists = artist.split(/[&,]/g).map((i) => i.trim());
const itemArtists = artistName.split(/[&,]/g).map((i) => i.trim());
const permutations = [];
for (const artistA of artists) {
for (const artistB of itemArtists) {
permutations.push([artistA.toLowerCase(), artistB.toLowerCase()]);
}
}
for (const artistA of itemArtists) {
for (const artistB of artists) {
permutations.push([artistA.toLowerCase(), artistB.toLowerCase()]);
}
}
const ratio = Math.max(
...permutations.map(([x, y]) => jaroWinkler(x, y)),
);
if (ratio <= 0.9) continue;
filteredResults.push(item);
}
filteredResults.sort(({ duration: durationA }, { duration: durationB }) => {
const left = Math.abs(durationA - songDuration);
const right = Math.abs(durationB - songDuration);
return left - right;
});
const closestResult = filteredResults[0];
if (!closestResult) {
return null;
}
if (Math.abs(closestResult.duration - songDuration) > 15) {
return null;
}
if (closestResult.instrumental) {
return null;
}
const raw = closestResult.syncedLyrics;
const plain = closestResult.plainLyrics;
if (!raw && !plain) {
return null;
}
return {
title: closestResult.trackName,
artists: closestResult.artistName.split(/[&,]/g),
lines: raw
? LRC.parse(raw).lines.map((l) => ({
...l,
status: 'upcoming' as const,
}))
: undefined,
lyrics: plain,
};
}
}
type LRCLIBSearchResponse = {
id: number;
name: string;
trackName: string;
artistName: string;
albumName: string;
duration: number;
instrumental: boolean;
plainLyrics: string;
syncedLyrics: string;
}[];

View File

@ -0,0 +1,132 @@
import type { LyricProvider, LyricResult, SearchSongInfo } from '../types';
const preloadedStateRegex = /__PRELOADED_STATE__ = JSON\.parse\('(.*?)'\);/;
const preloadHtmlRegex = /body":{"html":"(.*?)","children"/;
export class LyricsGenius implements LyricProvider {
public name = 'Genius';
public baseUrl = 'https://genius.com';
private domParser = new DOMParser();
// prettier-ignore
async search({ title, artist }: SearchSongInfo): Promise<LyricResult | null> {
const query = new URLSearchParams({
q: `${artist} ${title}`,
page: '1',
per_page: '10',
});
const response = await fetch(`${this.baseUrl}/api/search/song?${query}`);
if (!response.ok) {
return null;
}
const data = (await response.json()) as LyricsGeniusSearch;
const hits = data.response.sections[0].hits;
hits.sort(
({
result: {
title: titleA,
primary_artist: { name: artistA },
},
},
{
result: {
title: titleB,
primary_artist: { name: artistB },
},
}) => {
const pointsA = (titleA === title ? 1 : 0) + (artistA.includes(artist) ? 1 : 0);
const pointsB = (titleB === title ? 1 : 0) + (artistB.includes(artist) ? 1 : 0);
return pointsB - pointsA;
},
);
const closestHit = hits.at(0);
if (!closestHit) {
return null;
}
const { result: { path } } = closestHit;
const html = await fetch(`${this.baseUrl}${path}`).then((res) => res.text());
const doc = this.domParser.parseFromString(html, 'text/html');
const preloadedStateScript = Array.prototype.find.call(doc.querySelectorAll('script'), (script: HTMLScriptElement) => {
return script.textContent?.includes('window.__PRELOADED_STATE__');
}) as HTMLScriptElement;
const preloadedState = preloadedStateScript.textContent?.match(preloadedStateRegex)?.[1]?.replace(/\\"/g, '"');
const lyricsHtml = preloadedState?.match(preloadHtmlRegex)?.[1]
?.replace(/\\\//g, '/')
?.replace(/\\\\/g, '\\')
?.replace(/\\n/g, '\n')
?.replace(/\\'/g, "'")
?.replace(/\\"/g, '"');
if (!lyricsHtml) throw new Error('Failed to extract lyrics from preloaded state.');
const lyricsDoc = this.domParser.parseFromString(lyricsHtml, 'text/html');
const lyrics = lyricsDoc.body.innerText;
if (lyrics.trim().toLowerCase().replace(/[[\]]/g, '') === 'instrumental') return null;
return {
title: closestHit.result.title,
artists: closestHit.result.primary_artists.map(({ name }) => name),
lyrics,
};
}
}
interface LyricsGeniusSearch {
response: Response;
}
interface Response {
sections: Section[];
}
interface Section {
hits: {
highlights: unknown[];
index: string;
type: string;
result: Result;
}[];
}
interface Result {
api_path: string;
artist_names: string;
full_title: string;
id: number;
instrumental: boolean;
path: string;
release_date_components: ReleaseDateComponents;
title: string;
title_with_featured: string;
updated_by_human_at: number;
url: string;
featured_artists: Artist[];
primary_artist: Artist;
primary_artists: Artist[];
}
interface Artist {
api_path: string;
id: number;
image_url: string;
name: string;
slug: string;
url: string;
}
interface ReleaseDateComponents {
year: number;
month: number;
day: number;
}

View File

@ -0,0 +1,110 @@
import { jaroWinkler } from '@skyra/jaro-winkler';
import { LRC } from '../parsers/lrc';
import type { LyricProvider, LyricResult, SearchSongInfo } from '../types';
const removeNoise = (text: string) => {
return text
.replace(/\[.*?\]/g, '')
.replace(/\(.*?\)/g, '')
.trim()
.replace(/(^[-•])|([-•]$)/g, '')
.trim()
.replace(/\s+by$/, '');
};
export class Megalobiz implements LyricProvider {
public name = 'Megalobiz';
public baseUrl = 'https://www.megalobiz.com';
private domParser = new DOMParser();
// prettier-ignore
async search({ title, artist, songDuration }: SearchSongInfo): Promise<LyricResult | null> {
const query = new URLSearchParams({
qry: `${artist} ${title}`,
});
const response = await fetch(`${this.baseUrl}/search/all?${query}`, {
signal: AbortSignal.timeout(5_000),
});
if (!response.ok) {
throw new Error(`bad HTTPStatus(${response.statusText})`);
}
const data = await response.text();
const searchDoc = this.domParser.parseFromString(data, 'text/html');
// prettier-ignore
const searchResults: MegalobizSearchResult[] = Array.prototype.map
.call(searchDoc.querySelectorAll('a.entity_name[href^="/lrc/maker/"][name][title]'),
(anchor: HTMLAnchorElement) => {
const { minutes, seconds, millis } = anchor
.getAttribute('title')!
.match(/\[(?<minutes>\d+):(?<seconds>\d+)\.(?<millis>\d+)\]/)!
.groups!;
let name = anchor.getAttribute('name')!;
const artists = [
removeNoise(name.match(/\(?[Ff]eat\. (.+)\)?/)?.[1] ?? ''),
...(removeNoise(name).match(/(?<artists>.*?) [-•] (?<title>.*)/)?.groups?.artists?.split(/[&,]/)?.map(removeNoise) ?? []),
...(removeNoise(name).match(/(?<title>.*) by (?<artists>.*)/)?.groups?.artists?.split(/[&,]/)?.map(removeNoise) ?? []),
].filter(Boolean);
for (const artist of artists) {
name = name.replace(artist, '');
name = removeNoise(name);
}
if (jaroWinkler(title, name) < 0.8) return null;
return {
title: name,
artists,
href: anchor.getAttribute('href')!,
duration:
parseInt(minutes) * 60 +
parseInt(seconds) +
parseInt(millis) / 1000,
};
},
)
.filter(Boolean);
const sortedResults = searchResults.sort(
({ duration: durationA }, { duration: durationB }) => {
const left = Math.abs(durationA - songDuration);
const right = Math.abs(durationB - songDuration);
return left - right;
},
);
const closestResult = sortedResults[0];
if (!closestResult) return null;
if (Math.abs(closestResult.duration - songDuration) > 15) {
return null;
}
const html = await fetch(`${this.baseUrl}${closestResult.href}`).then((r) => r.text());
const lyricsDoc = this.domParser.parseFromString(html, 'text/html');
const raw = lyricsDoc.querySelector('span[id^="lrc_"][id$="_lyrics"]')?.textContent;
if (!raw) throw new Error('Failed to extract lyrics from page.');
const lyrics = LRC.parse(raw);
return {
title: closestResult.title,
artists: closestResult.artists,
lines: lyrics.lines.map((l) => ({ ...l, status: 'upcoming' })),
};
}
}
interface MegalobizSearchResult {
title: string;
artists: string[];
href: string;
duration: number;
}

View File

@ -0,0 +1,10 @@
import type { LyricProvider, LyricResult, SearchSongInfo } from '../types';
export class MusixMatch implements LyricProvider {
name = 'MusixMatch';
baseUrl = 'https://www.musixmatch.com/';
search(_: SearchSongInfo): Promise<LyricResult | null> {
throw new Error('Not implemented');
}
}

View File

@ -0,0 +1,201 @@
import type { LyricProvider, LyricResult, SearchSongInfo } from '../types';
const headers = {
'Accept': 'application/json',
'Content-Type': 'application/json',
};
const client = {
clientName: '26',
clientVersion: '7.01.05',
};
export class YTMusic implements LyricProvider {
public name = 'YTMusic';
public baseUrl = 'https://music.youtube.com/';
// prettier-ignore
public async search(
{ videoId, title, artist }: SearchSongInfo,
): Promise<LyricResult | null> {
const data = await this.fetchNext(videoId);
const { tabs } =
data?.contents?.singleColumnMusicWatchNextResultsRenderer?.tabbedRenderer
?.watchNextTabbedResultsRenderer ?? {};
if (!Array.isArray(tabs)) return null;
const lyricsTab = tabs.find((it) => {
const pageType = it?.tabRenderer?.endpoint?.browseEndpoint
?.browseEndpointContextSupportedConfigs
?.browseEndpointContextMusicConfig?.pageType;
return pageType === 'MUSIC_PAGE_TYPE_TRACK_LYRICS';
});
if (!lyricsTab) return null;
const { browseId } = lyricsTab?.tabRenderer?.endpoint?.browseEndpoint ?? {};
if (!browseId) return null;
const { contents } = await this.fetchBrowse(browseId);
if (!contents) return null;
/*
NOTE: Due to the nature of Youtubei, the json responses are not consistent,
this means we have to check for multiple possible paths to get the lyrics.
*/
const syncedLines = contents?.elementRenderer?.newElement?.type
?.componentType?.model?.timedLyricsModel?.lyricsData?.timedLyricsData;
const synced = syncedLines?.length && syncedLines[0]?.cueRange
? syncedLines.map((it) => ({
time: this.millisToTime(parseInt(it.cueRange.startTimeMilliseconds)),
timeInMs: parseInt(it.cueRange.startTimeMilliseconds),
duration: parseInt(it.cueRange.endTimeMilliseconds) -
parseInt(it.cueRange.startTimeMilliseconds),
text: it.lyricLine.trim() === '♪' ? '' : it.lyricLine.trim(),
status: 'upcoming' as const,
}))
: undefined;
const plain = !synced
? syncedLines?.length
? syncedLines.map((it) => it.lyricLine).join('\n')
: contents?.messageRenderer
? contents?.messageRenderer?.text?.runs?.map((it) => it.text).join('\n')
: contents?.sectionListRenderer?.contents?.[0]
?.musicDescriptionShelfRenderer?.description?.runs?.map((it) =>
it.text
)?.join('\n')
: undefined;
if (typeof plain === 'string' && plain === 'Lyrics not available') {
return null;
}
if (synced?.length && synced[0].timeInMs > 300) {
synced.unshift({
duration: 0,
text: '',
time: '00:00.00',
timeInMs: 0,
status: 'upcoming' as const,
});
}
return {
title,
artists: [artist],
lyrics: plain,
lines: synced,
};
}
private millisToTime(millis: number) {
const minutes = Math.floor(millis / 60000);
const seconds = Math.floor((millis - minutes * 60 * 1000) / 1000);
const remaining = (millis - minutes * 60 * 1000 - seconds * 1000) / 10;
return `${minutes.toString().padStart(2, '0')}:${seconds
.toString()
.padStart(2, '0')}.${remaining.toString().padStart(2, '0')}`;
}
private ENDPOINT = 'https://youtubei.googleapis.com/youtubei/v1/';
// RATE LIMITED (2 req per sec)
private PROXIED_ENDPOINT = 'https://ytmbrowseproxy.zvz.be/';
private fetchNext(videoId: string) {
return fetch(this.ENDPOINT + 'next?prettyPrint=false', {
headers,
method: 'POST',
body: JSON.stringify({
videoId,
context: { client },
}),
}).then((res) => res.json()) as Promise<NextData>;
}
private fetchBrowse(browseId: string) {
return fetch(this.PROXIED_ENDPOINT + 'browse?prettyPrint=false', {
headers,
method: 'POST',
body: JSON.stringify({
browseId,
context: { client },
}),
}).then((res) => res.json()) as Promise<BrowseData>;
}
}
interface NextData {
contents: {
singleColumnMusicWatchNextResultsRenderer: {
tabbedRenderer: {
watchNextTabbedResultsRenderer: {
tabs: {
tabRenderer: {
endpoint: {
browseEndpoint: {
browseId: string;
browseEndpointContextSupportedConfigs: {
browseEndpointContextMusicConfig: {
pageType: string;
};
};
};
};
};
}[];
};
};
};
};
}
interface BrowseData {
contents: {
elementRenderer: {
newElement: {
type: {
componentType: {
model: {
timedLyricsModel: {
lyricsData: {
timedLyricsData: SyncedLyricLine[];
};
};
};
};
};
};
};
messageRenderer: {
text: PlainLyricsTextRenderer;
};
sectionListRenderer: {
contents: {
musicDescriptionShelfRenderer: {
description: PlainLyricsTextRenderer;
};
}[];
};
};
}
interface SyncedLyricLine {
lyricLine: string;
cueRange: CueRange;
}
interface CueRange {
startTimeMilliseconds: string;
endTimeMilliseconds: string;
}
interface PlainLyricsTextRenderer {
runs: {
text: string;
}[];
}

View File

@ -0,0 +1,189 @@
import { createStore } from 'solid-js/store';
import { createMemo } from 'solid-js';
import { SongInfo } from '@/providers/song-info';
import { LRCLib } from './LRCLib';
import { LyricsGenius } from './LyricsGenius';
import { YTMusic } from './YTMusic';
import { getSongInfo } from '@/providers/song-info-front';
import type { LyricProvider, LyricResult } from '../types';
export const providers = {
YTMusic: new YTMusic(),
LRCLib: new LRCLib(),
LyricsGenius: new LyricsGenius(),
// MusixMatch: new MusixMatch(),
// Megalobiz: new Megalobiz(), // Disabled because it is too unstable and slow
} as const;
export type ProviderName = keyof typeof providers;
export const providerNames = Object.keys(providers) as ProviderName[];
export type ProviderState = {
state: 'fetching' | 'done' | 'error';
data: LyricResult | null;
error: Error | null;
};
type LyricsStore = {
provider: ProviderName;
current: ProviderState;
lyrics: Record<ProviderName, ProviderState>;
};
const initialData = () =>
providerNames.reduce(
(acc, name) => {
acc[name] = { state: 'fetching', data: null, error: null };
return acc;
},
{} as LyricsStore['lyrics'],
);
export const [lyricsStore, setLyricsStore] = createStore<LyricsStore>({
provider: providerNames[0],
lyrics: initialData(),
get current(): ProviderState {
return this.lyrics[this.provider];
},
});
export const currentLyrics = createMemo(() => {
const provider = lyricsStore.provider;
return lyricsStore.lyrics[provider];
});
type VideoId = string;
type SearchCacheData = Record<ProviderName, ProviderState>;
interface SearchCache {
state: 'loading' | 'done';
data: SearchCacheData;
}
// TODO: Maybe use localStorage for the cache.
const searchCache = new Map<VideoId, SearchCache>();
export const fetchLyrics = (info: SongInfo) => {
if (searchCache.has(info.videoId)) {
const cache = searchCache.get(info.videoId)!;
if (cache.state === 'loading') {
setTimeout(() => {
fetchLyrics(info);
});
return;
}
if (getSongInfo().videoId === info.videoId) {
setLyricsStore('lyrics', () => {
// weird bug with solid-js
return JSON.parse(JSON.stringify(cache.data)) as typeof cache.data;
});
}
return;
}
const cache: SearchCache = {
state: 'loading',
data: initialData(),
};
searchCache.set(info.videoId, cache);
if (getSongInfo().videoId === info.videoId) {
setLyricsStore('lyrics', () => {
// weird bug with solid-js
return JSON.parse(JSON.stringify(cache.data)) as typeof cache.data;
});
}
const tasks: Promise<void>[] = [];
// prettier-ignore
for (
const [providerName, provider] of Object.entries(providers) as [
ProviderName,
LyricProvider,
][]
) {
const pCache = cache.data[providerName];
tasks.push(
provider
.search(info)
.then((res) => {
pCache.state = 'done';
pCache.data = res;
if (getSongInfo().videoId === info.videoId) {
setLyricsStore('lyrics', (old) => {
return {
...old,
[providerName]: {
state: 'done',
data: res ? { ...res } : null,
error: null,
},
};
});
}
})
.catch((error: Error) => {
pCache.state = 'error';
pCache.error = error;
if (getSongInfo().videoId === info.videoId) {
setLyricsStore('lyrics', (old) => {
return {
...old,
[providerName]: { state: 'error', error, data: null },
};
});
}
}),
);
}
Promise.allSettled(tasks).then(() => {
cache.state = 'done';
searchCache.set(info.videoId, cache);
});
};
export const retrySearch = (provider: ProviderName, info: SongInfo) => {
setLyricsStore('lyrics', (old) => {
const pCache = {
state: 'fetching',
data: null,
error: null,
};
return {
...old,
[provider]: pCache,
};
});
providers[provider]
.search(info)
.then((res) => {
setLyricsStore('lyrics', (old) => {
return {
...old,
[provider]: { state: 'done', data: res, error: null },
};
});
})
.catch((error) => {
setLyricsStore('lyrics', (old) => {
return {
...old,
[provider]: { state: 'error', data: null, error },
};
});
});
};