본문 바로가기

개발 관련 기타/웹

nodejs) puppeteer 사용해서 특정 사이트에서 file download - 개발 기록 2

const puppeteer = require('puppeteer');

class ItemDownloader {

    async downloadImage(link) {
        const links = await this._extractFileLinksFromLink(link);
        const paths = await this._downloadFiles(links);

        return paths;
    }

    async _extractFileLinksFromLink(link) {

        const browser = await puppeteer.launch();
        const page = await browser.newPage();
        await page.goto(link);

        const buttonSelector = 'div#download_btn';
        await page.waitForSelector(buttonSelector);
        await page.click(buttonSelector);

        const listSelector = 'div#download_list';
        await page.waitForSelector(listSelector);

        const links = await page.evaluate(() => {

            const downloadLinks = document.querySelectorAll('div#download_list a');

            return Array.from(downloadLinks).map(anchor => anchor.href);
        });

        console.log("links: ", links);

        await browser.close();

        return links;
    }

    async _downloadFiles(links) {
        const axios = require('axios');
        const fs = require('fs');
        const path = require('path');

        const downloadFiles = async (links) => {
            const downloadPromises = links.map(async (url, index) => {
                const response = await axios({
                    url,
                    method: 'GET',
                    responseType: 'arraybuffer'
                });
                const urlPath = new URL(url).pathname;
                const fileName = path.basename(urlPath);
                const filePath = path.resolve(__dirname, '../uploads/', fileName);
                fs.writeFileSync(filePath, response.data);
                return filePath;
            });
            return Promise.all(downloadPromises);
        };

        const downloadedFilePaths = await downloadFiles(links);        
        return downloadedFilePaths;
    }
}

module.exports = ItemDownloader;