ユーザ用ツール

サイト用ツール


nodejs:scraping

スクレイピング

一つ上へ

必要なパッケージをインストールする

npm install cheerio axios

// index.js
 
const axios = require('axios');
const cheerio = require('cheerio');
 
// Slack Incoming WebhooksのURLを設定
const slackWebhookUrl = '';
 
// Axiosを使用してHTMLを取得
axios.get(url)
  .then(response => {
    // 取得したHTMLをCheerioでパース
    const $ = cheerio.load(response.data);
 
    // スクレイピング対象の要素を指定してデータを取得
    // .modWhiteBox01
    const targetDiv = $('.modWhiteBox01');
 
    // div要素内の uniMainList セレクタ指定して取得
    const innerTargetDiv = targetDiv.find('.uniMainList');
 
    // その中の li 要素を取得
    const liElements = innerTargetDiv.find('li');
 
    const items = [];
 
    liElements.each((index, element) => {
      // 改行で分割し、空白でない行のみを取り出す
      var lines = $(element).text().replace(/\t/g, '').split('\n').filter(line => line.trim() !== '');
 
      const date = lines[0].trim();
      const tag = lines[1].trim();
      const title = lines[2].trim();
      const today = new Date();
      const yesterday = new Date();
      yesterday.setDate(today.getDate() - 1);
      //console.log(yesterday.toISOString().split('T')[0].replace(/-/g, '/'));
      const formattedToday = today.toISOString().split('T')[0].replace(/-/g, '/');
      if (tag.includes('メンテナンス') || tag.includes('障害')) {
        items.push({
        date: date,
        tag: tag,
        title: title
        });
      }
    });
  console.log(items);
 
 
  })
  .catch(error => {
    console.error('エラー:', error);
  });

以下を実行する

npm run start

進行中のコード

// index.js
 
const axios = require('axios');
const cheerio = require('cheerio');
const fs = require('fs');
const csv = require('csv-parser');
const createCsvWriter = require('csv-writer').createObjectCsvWriter;
 
 
// スクレイピング対象のURL
//const url = 'https://www.amazon.co.jp/dp/B0B3LQH6CR'; 
 
const urls = [
// URLs
];
 
const data = [];
 
const csvWriter = createCsvWriter({
  path: 'output.csv',
  encoding: 'utf8',
  header: [
      { id: 'url', title: 'URL' },
      { id: 'price', title: 'Price' },
      { id: 'stock', title: 'Stock' }
  ]
});
 
// Axios header
const headers = {
  //'Accept-Language': 'en-US;q=0.9,en;q=0.8',
  'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
 
// 非同期処理を扱うためにasync functionを使用
async function fetchData() {
  for (const url of urls) {
    let price = '';
    let stock = '';
    try {
      const response = await axios.get(url, { headers, timeout: 20000 });
 
      const $ = cheerio.load(response.data);
 
      if (url.includes('www.amazon.co.jp')) {
        const priceElement = $('#centerCol').find('#corePriceDisplay_desktop_feature_div').find('span.a-price-whole');
        if (priceElement.length > 0) {
          price = priceElement.html().trim();
        } else {
          price = '';
        }
        const stockInputElement = $('div.a-button-stack').find('input');
        if (stockInputElement.length > 0 ) {
          // Value属性の値を取得
          const valueAttribute = stockInputElement.attr('value');
          if (valueAttribute === 'カートに入れる') {
            stock = 'あり';
          } else {
            stock = 'なし';
          }
        } else {
          stock = 'なし';
        }
      } else if (url.includes('shopdisney.disney.co.jp')) {
              // 価格を取得
        price = $('body > div.page > main > div:nth-child(9) > div.product-detail.product-detail__section-bound.product-wrapper.rating--unavailable > div.product-detail__inner-container > div:nth-child(1)')
        .find('div.price').find('span.value').attr('content');
 
        // 在庫状況を確認
        const addToCartBtn = $('body > div.page > main > div:nth-child(9) > div.product-detail.product-detail__section-bound.product-wrapper.rating--unavailable > div.product-detail__inner-container > div.col-12.col-sm-12.col-md-6.col-lg-4.col-xl-3.product-detail__content-summary.product-detail__content-summary--details.product-detail__content-summary--name > div > div.col-12.prices-add-to-cart-actions > div > div:nth-child(1) > button')
        .find('span.btn').children('span').text().trim();
        if (addToCartBtn === 'カートに入れる') {
          stock = 'あり';
        } else {
          stock = 'なし';
        }
      } else if (url.includes('item.rakuten.co.jp')) {
        price = $('#rakutenLimitedId_cart').find('#priceCalculationConfig').attr('data-price');
 
        const addToCartBtn = $('#AddToCartPurchaseButtonFixed').find('button[aria-label="カートに追加"]');
        const isDisabled = addToCartBtn.prop('disabled') !== undefined || addToCartBtn.attr('disabled') !== undefined;
        if (isDisabled) {
          stock = 'なし';
        } else {
          stock = 'あり';
        }  
      } else if (url.includes('www.suruga-ya.jp/product/detail')){
        const priceGroupEl = $('body > div.dialog-off-canvas-main-canvas > div.container_suru.padB40 > div:nth-child(9) > div.col-8.padL32 > div.d-flex.justify-content-start > div.w-70.pr-5 > div.price_group.mb-3');
        const priceWithoutCamma = priceGroupEl.find('span.text-price-detail.price-buy').text().replace(/,/g, '');
        price = parseInt(priceWithoutCamma, 10);
        stock = 'あり'
      }
      else if (url.includes('ec.treasure-f.com/item')) {
        const itemElRight = $('body > main > div.main > div.clearfix > div.item-detail-area-right');
        const itempriceArea = itemElRight.find('.item-detail-area-right-price');
        const priceCurrency = itempriceArea.children('p').find('.disp-tax-in').text().replace(/[^\d]/g, '');
        price = parseInt(priceCurrency, 10);
        console.log(price);
 
        const addToCartBtnEl = itemElRight.find('#item-submitform').find('#item-cartbutton');
        if (addToCartBtnEl.length > 0) {
          stock = 'あり';
        } else {
          stock = 'なし';
        }
      } else if (url.includes('item.fril.jp')) {
        const price = $('body > div.drawer-overlay > div.container.new-rakuma > div > div.col-lg-12.col-md-12.col-sm-12.col-xs-12 > div.row > div > article > div > div.col-lg-5.col-md-5.col-sm-12.col-xs-12.right-section > section > div.item-info__header > div:nth-child(2) > div > p > span.item__price')
          .text().replace(/[^\d]/g, '');
 
        const sellBtnEl = $('body > div.drawer-overlay > div.container.new-rakuma > div > div.col-lg-12.col-md-12.col-sm-12.col-xs-12 > div.row > div > article > div > div.col-lg-5.col-md-5.col-sm-12.col-xs-12.right-section > section > div:nth-child(4) > div > div.btn-buy-fixed.clearfix > p.sell-btn-fixed');
        if (sellBtnEl.length > 0) {
          stock = 'あり';
        } else {
          stock = 'なし';
        }
      } else {
        price = '';
        stock = '';
      }
 
      data.push({ url, price, stock });
      console.log(data);
 
    } catch (error) {
      console.error('エラー:', error);
      data.push({ url, price, stock });
    }
  }
 
 // 全ての非同期処理が完了した後にCSVに書き込み
  await csvWriter.writeRecords(data);
  console.log('CSVにデータが書き込まれました。');
}
 
// fetchDataを呼び出し
fetchData();
nodejs/scraping.txt · 最終更新: 2023/12/18 20:48 by mikoto