본문 바로가기

nodejs, react) 외국인 대상 서비스 만들기 - 16일차 - 2024.06.25

 

crawling comments

 

    async _downloadComments(page) {
        await page.waitForSelector('div#comment.icomment');
        await page.evaluate(() => {
            if (!!document.querySelector('div.moretext')) {
                document.querySelector('div.moretext').click();
            }
        });
        await page.evaluate(() => {
            if (!!document.querySelector('div.cmt_memo p')) {
                document.querySelector('div.cmt_memo p').click();
            }
        });
        await page.waitForSelector('div.cmt.r1');
        const content = await page.evaluate(() => {
            return Array.from(document.querySelectorAll('div#comment.icomment')).map(el => el.outerHTML).join('');
        });
        return this._parseComments(content);
    }

    async _parseComments(content) {

        const $ = cheerio.load(content);
        const comments = [];

        const parseComment = (element, parentId = null) => {
            const $element = $(element);
            const id = $element.attr('w_idx');
            const text = $element.find('.content').html();
            const author = $element.find('.nick').text().trim();
            const likes = parseInt($element.find('.up u').text().trim()) || 0;

            const commentData = {
                id,
                text,
                author,
                likes,
                parentId
            };
            comments.push(commentData);

            return id;
        };

        $('div.set').each((index, setElement) => {
            const $set = $(setElement);
            var id = null;
            $set.find('div.cmt.r0').each((index, childElement) => {
                id = parseComment(childElement, id);
            });
            $set.find('div.cmt.r1').each((index, childElement) => {
                id = parseComment(childElement, id);
            });
            $set.find('div.cmt.r2').each((index, childElement) => {
                id = parseComment(childElement, id);
            });
            $set.find('div.cmt.r3').each((index, childElement) => {
                id = parseComment(childElement, id);
            });
            $set.find('div.cmt.r4').each((index, childElement) => {
                id = parseComment(childElement, id);
            });
            $set.find('div.cmt.r5').each((index, childElement) => {
                id = parseComment(childElement, id);
            });
        });

        return comments;
    }

      for (const comment of comments) {
        comment.itemId = item.id;
        await Comment.create(comment);
      }