From 1dca0db9bba7e6c5ef1ceab60b45f3ef3c731837 Mon Sep 17 00:00:00 2001 From: MeowcaTheoRange Date: Thu, 22 Feb 2024 05:29:31 -0600 Subject: [PATCH] Improve scraping opinionation (Thanks, The Quick Brown Fox - WANDERLUST) --- index.js | 52 +++++++++++++++++++++++++++++++++++++++------------- package.json | 3 ++- 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/index.js b/index.js index a3cdd42..50c57a2 100644 --- a/index.js +++ b/index.js @@ -2,13 +2,14 @@ require('dotenv').config(); const { Readable, PassThrough } = require("stream"); const { finished } = require("stream/promises"); const { searchMusics } = require('fix-esm').require("node-youtube-music"); -const { GetListByKeyword } = require("youtube-search-api"); +const { GetListByKeyword, GetVideoDetails } = require("youtube-search-api"); const skewered = require("skewered"); const { createClient } = require("fix-esm").require("webdav"); const path = require("path"); const { readFileSync, writeFileSync } = require("fs"); const io = require('socket.io-client'); const NodeID3 = require('node-id3'); +const xpath = require("xpath-html"); function pathGenerator({ url, name, channelName }) { return `${url}-${skewered(`${name} - ${channelName}`)}.mp3`; @@ -26,6 +27,23 @@ function writeLastSong({ name, channelName }) { return null; } +async function scrapeLastFMWebsiteForVideo(trackData) { + const lastFMSite = await fetch(trackData.url).then(x => x.text()); + // classes: image-overlay-playlink-link + // attrs: data-youtube-id + + const attr = xpath.fromPageSource(lastFMSite).findElement("//a[contains(@class, 'image-overlay-playlink-link')]/@data-youtube-id").getAttribute("data-youtube-id"); + + if (attr == null) return; + + const youtubeVideo = await GetVideoDetails(attr); + + return { + ...youtubeVideo, + id: attr + }; +} + async function getVideo() { const trackData = await fetch( `https://${process.env.LASTFM_INSTANCE}/2.0/?method=user.getrecenttracks&user=${process.env.LASTFM_USERNAME}&api_key=${process.env.LASTFM_API_KEY}&format=json&limit=1&extended=1` @@ -65,23 +83,31 @@ async function getVideo() { const musicList = await searchMusics(`${trackData.artist.name} - ${trackData.name}`); const youtubeMusicVideo = musicList.find((song) => { - return skewered(song.title).includes(skewered(trackData.name)); + return skewered(song.title).includes(skewered(trackData.name)) + && song.artists.some(artist => skewered(trackData.artist.name).includes(artist.name)); }); if (youtubeMusicVideo == null) { - const videoList = await GetListByKeyword(`${trackData.artist.name} - ${trackData.name}`, false, 1, [ - {type: "video"} - ]); - - const musicVideo = videoList.items[0]; + const lastFMVideo = await scrapeLastFMWebsiteForVideo(trackData); - selectedVideo = { - url: musicVideo.id, - name: musicVideo.title, - channelName: musicVideo.channelTitle + if (lastFMVideo == null) { + const videoList = await GetListByKeyword(`${trackData.artist.name} - ${trackData.name}`, false, 1, [ + {type: "video"} + ]); + + const musicVideo = videoList.items[0]; + + selectedVideo = { + url: musicVideo.id, + name: musicVideo.title, + channelName: musicVideo.channelTitle + }; + } else selectedVideo = { + url: lastFMVideo.id, + name: lastFMVideo.title, + channelName: lastFMVideo.channel }; - } - else selectedVideo = { + } else selectedVideo = { url: youtubeMusicVideo.youtubeId, name: youtubeMusicVideo.title, channelName: youtubeMusicVideo.artists[0].name diff --git a/package.json b/package.json index 46e6002..2c57305 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,7 @@ "socket.io": "^4.7.4", "socket.io-client": "^4.7.4", "webdav": "^5.3.2", + "xpath-html": "^1.0.3", "youtube-search-api": "^1.2.1" } -} \ No newline at end of file +}