Improve scraping opinionation

(Thanks, The Quick Brown Fox - WANDERLUST)
This commit is contained in:
MeowcaTheoRange 2024-02-22 05:29:31 -06:00
parent b3bfe116c2
commit 1dca0db9bb
2 changed files with 41 additions and 14 deletions

View file

@ -2,13 +2,14 @@ require('dotenv').config();
const { Readable, PassThrough } = require("stream"); const { Readable, PassThrough } = require("stream");
const { finished } = require("stream/promises"); const { finished } = require("stream/promises");
const { searchMusics } = require('fix-esm').require("node-youtube-music"); const { searchMusics } = require('fix-esm').require("node-youtube-music");
const { GetListByKeyword } = require("youtube-search-api"); const { GetListByKeyword, GetVideoDetails } = require("youtube-search-api");
const skewered = require("skewered"); const skewered = require("skewered");
const { createClient } = require("fix-esm").require("webdav"); const { createClient } = require("fix-esm").require("webdav");
const path = require("path"); const path = require("path");
const { readFileSync, writeFileSync } = require("fs"); const { readFileSync, writeFileSync } = require("fs");
const io = require('socket.io-client'); const io = require('socket.io-client');
const NodeID3 = require('node-id3'); const NodeID3 = require('node-id3');
const xpath = require("xpath-html");
function pathGenerator({ url, name, channelName }) { function pathGenerator({ url, name, channelName }) {
return `${url}-${skewered(`${name} - ${channelName}`)}.mp3`; return `${url}-${skewered(`${name} - ${channelName}`)}.mp3`;
@ -26,6 +27,23 @@ function writeLastSong({ name, channelName }) {
return null; return null;
} }
async function scrapeLastFMWebsiteForVideo(trackData) {
const lastFMSite = await fetch(trackData.url).then(x => x.text());
// classes: image-overlay-playlink-link
// attrs: data-youtube-id
const attr = xpath.fromPageSource(lastFMSite).findElement("//a[contains(@class, 'image-overlay-playlink-link')]/@data-youtube-id").getAttribute("data-youtube-id");
if (attr == null) return;
const youtubeVideo = await GetVideoDetails(attr);
return {
...youtubeVideo,
id: attr
};
}
async function getVideo() { async function getVideo() {
const trackData = await fetch( const trackData = await fetch(
`https://${process.env.LASTFM_INSTANCE}/2.0/?method=user.getrecenttracks&user=${process.env.LASTFM_USERNAME}&api_key=${process.env.LASTFM_API_KEY}&format=json&limit=1&extended=1` `https://${process.env.LASTFM_INSTANCE}/2.0/?method=user.getrecenttracks&user=${process.env.LASTFM_USERNAME}&api_key=${process.env.LASTFM_API_KEY}&format=json&limit=1&extended=1`
@ -65,10 +83,14 @@ async function getVideo() {
const musicList = await searchMusics(`${trackData.artist.name} - ${trackData.name}`); const musicList = await searchMusics(`${trackData.artist.name} - ${trackData.name}`);
const youtubeMusicVideo = musicList.find((song) => { const youtubeMusicVideo = musicList.find((song) => {
return skewered(song.title).includes(skewered(trackData.name)); return skewered(song.title).includes(skewered(trackData.name))
&& song.artists.some(artist => skewered(trackData.artist.name).includes(artist.name));
}); });
if (youtubeMusicVideo == null) { if (youtubeMusicVideo == null) {
const lastFMVideo = await scrapeLastFMWebsiteForVideo(trackData);
if (lastFMVideo == null) {
const videoList = await GetListByKeyword(`${trackData.artist.name} - ${trackData.name}`, false, 1, [ const videoList = await GetListByKeyword(`${trackData.artist.name} - ${trackData.name}`, false, 1, [
{type: "video"} {type: "video"}
]); ]);
@ -80,8 +102,12 @@ async function getVideo() {
name: musicVideo.title, name: musicVideo.title,
channelName: musicVideo.channelTitle channelName: musicVideo.channelTitle
}; };
} } else selectedVideo = {
else selectedVideo = { url: lastFMVideo.id,
name: lastFMVideo.title,
channelName: lastFMVideo.channel
};
} else selectedVideo = {
url: youtubeMusicVideo.youtubeId, url: youtubeMusicVideo.youtubeId,
name: youtubeMusicVideo.title, name: youtubeMusicVideo.title,
channelName: youtubeMusicVideo.artists[0].name channelName: youtubeMusicVideo.artists[0].name

View file

@ -20,6 +20,7 @@
"socket.io": "^4.7.4", "socket.io": "^4.7.4",
"socket.io-client": "^4.7.4", "socket.io-client": "^4.7.4",
"webdav": "^5.3.2", "webdav": "^5.3.2",
"xpath-html": "^1.0.3",
"youtube-search-api": "^1.2.1" "youtube-search-api": "^1.2.1"
} }
} }