Improve scraping opinionation

(Thanks, The Quick Brown Fox - WANDERLUST)
This commit is contained in:
MeowcaTheoRange 2024-02-22 05:29:31 -06:00
parent b3bfe116c2
commit 1dca0db9bb
2 changed files with 41 additions and 14 deletions

View file

@ -2,13 +2,14 @@ require('dotenv').config();
const { Readable, PassThrough } = require("stream");
const { finished } = require("stream/promises");
const { searchMusics } = require('fix-esm').require("node-youtube-music");
const { GetListByKeyword } = require("youtube-search-api");
const { GetListByKeyword, GetVideoDetails } = require("youtube-search-api");
const skewered = require("skewered");
const { createClient } = require("fix-esm").require("webdav");
const path = require("path");
const { readFileSync, writeFileSync } = require("fs");
const io = require('socket.io-client');
const NodeID3 = require('node-id3');
const xpath = require("xpath-html");
function pathGenerator({ url, name, channelName }) {
return `${url}-${skewered(`${name} - ${channelName}`)}.mp3`;
@ -26,6 +27,23 @@ function writeLastSong({ name, channelName }) {
return null;
}
async function scrapeLastFMWebsiteForVideo(trackData) {
const lastFMSite = await fetch(trackData.url).then(x => x.text());
// classes: image-overlay-playlink-link
// attrs: data-youtube-id
const attr = xpath.fromPageSource(lastFMSite).findElement("//a[contains(@class, 'image-overlay-playlink-link')]/@data-youtube-id").getAttribute("data-youtube-id");
if (attr == null) return;
const youtubeVideo = await GetVideoDetails(attr);
return {
...youtubeVideo,
id: attr
};
}
async function getVideo() {
const trackData = await fetch(
`https://${process.env.LASTFM_INSTANCE}/2.0/?method=user.getrecenttracks&user=${process.env.LASTFM_USERNAME}&api_key=${process.env.LASTFM_API_KEY}&format=json&limit=1&extended=1`
@ -65,23 +83,31 @@ async function getVideo() {
const musicList = await searchMusics(`${trackData.artist.name} - ${trackData.name}`);
const youtubeMusicVideo = musicList.find((song) => {
return skewered(song.title).includes(skewered(trackData.name));
return skewered(song.title).includes(skewered(trackData.name))
&& song.artists.some(artist => skewered(trackData.artist.name).includes(artist.name));
});
if (youtubeMusicVideo == null) {
const videoList = await GetListByKeyword(`${trackData.artist.name} - ${trackData.name}`, false, 1, [
{type: "video"}
]);
const musicVideo = videoList.items[0];
const lastFMVideo = await scrapeLastFMWebsiteForVideo(trackData);
selectedVideo = {
url: musicVideo.id,
name: musicVideo.title,
channelName: musicVideo.channelTitle
if (lastFMVideo == null) {
const videoList = await GetListByKeyword(`${trackData.artist.name} - ${trackData.name}`, false, 1, [
{type: "video"}
]);
const musicVideo = videoList.items[0];
selectedVideo = {
url: musicVideo.id,
name: musicVideo.title,
channelName: musicVideo.channelTitle
};
} else selectedVideo = {
url: lastFMVideo.id,
name: lastFMVideo.title,
channelName: lastFMVideo.channel
};
}
else selectedVideo = {
} else selectedVideo = {
url: youtubeMusicVideo.youtubeId,
name: youtubeMusicVideo.title,
channelName: youtubeMusicVideo.artists[0].name

View file

@ -20,6 +20,7 @@
"socket.io": "^4.7.4",
"socket.io-client": "^4.7.4",
"webdav": "^5.3.2",
"xpath-html": "^1.0.3",
"youtube-search-api": "^1.2.1"
}
}
}