====== スクレイピング ======
[[:nodejs|一つ上へ]]
必要なパッケージをインストールする
npm install cheerio axios
===== 例 =====
// index.js
const axios = require('axios');
const cheerio = require('cheerio');
// Slack Incoming WebhooksのURLを設定
const slackWebhookUrl = '';
// Axiosを使用してHTMLを取得
axios.get(url)
.then(response => {
// 取得したHTMLをCheerioでパース
const $ = cheerio.load(response.data);
// スクレイピング対象の要素を指定してデータを取得
// .modWhiteBox01
const targetDiv = $('.modWhiteBox01');
// div要素内の uniMainList セレクタ指定して取得
const innerTargetDiv = targetDiv.find('.uniMainList');
// その中の li 要素を取得
const liElements = innerTargetDiv.find('li');
const items = [];
liElements.each((index, element) => {
// 改行で分割し、空白でない行のみを取り出す
var lines = $(element).text().replace(/\t/g, '').split('\n').filter(line => line.trim() !== '');
const date = lines[0].trim();
const tag = lines[1].trim();
const title = lines[2].trim();
const today = new Date();
const yesterday = new Date();
yesterday.setDate(today.getDate() - 1);
//console.log(yesterday.toISOString().split('T')[0].replace(/-/g, '/'));
const formattedToday = today.toISOString().split('T')[0].replace(/-/g, '/');
if (tag.includes('メンテナンス') || tag.includes('障害')) {
items.push({
date: date,
tag: tag,
title: title
});
}
});
console.log(items);
})
.catch(error => {
console.error('エラー:', error);
});
以下を実行する
npm run start
==== 進行中のコード ====
// index.js
const axios = require('axios');
const cheerio = require('cheerio');
const fs = require('fs');
const csv = require('csv-parser');
const createCsvWriter = require('csv-writer').createObjectCsvWriter;
// スクレイピング対象のURL
//const url = 'https://www.amazon.co.jp/dp/B0B3LQH6CR';
const urls = [
// URLs
];
const data = [];
const csvWriter = createCsvWriter({
path: 'output.csv',
encoding: 'utf8',
header: [
{ id: 'url', title: 'URL' },
{ id: 'price', title: 'Price' },
{ id: 'stock', title: 'Stock' }
]
});
// Axios header
const headers = {
//'Accept-Language': 'en-US;q=0.9,en;q=0.8',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
// 非同期処理を扱うためにasync functionを使用
async function fetchData() {
for (const url of urls) {
let price = '';
let stock = '';
try {
const response = await axios.get(url, { headers, timeout: 20000 });
const $ = cheerio.load(response.data);
if (url.includes('www.amazon.co.jp')) {
const priceElement = $('#centerCol').find('#corePriceDisplay_desktop_feature_div').find('span.a-price-whole');
if (priceElement.length > 0) {
price = priceElement.html().trim();
} else {
price = '';
}
const stockInputElement = $('div.a-button-stack').find('input');
if (stockInputElement.length > 0 ) {
// Value属性の値を取得
const valueAttribute = stockInputElement.attr('value');
if (valueAttribute === 'カートに入れる') {
stock = 'あり';
} else {
stock = 'なし';
}
} else {
stock = 'なし';
}
} else if (url.includes('shopdisney.disney.co.jp')) {
// 価格を取得
price = $('body > div.page > main > div:nth-child(9) > div.product-detail.product-detail__section-bound.product-wrapper.rating--unavailable > div.product-detail__inner-container > div:nth-child(1)')
.find('div.price').find('span.value').attr('content');
// 在庫状況を確認
const addToCartBtn = $('body > div.page > main > div:nth-child(9) > div.product-detail.product-detail__section-bound.product-wrapper.rating--unavailable > div.product-detail__inner-container > div.col-12.col-sm-12.col-md-6.col-lg-4.col-xl-3.product-detail__content-summary.product-detail__content-summary--details.product-detail__content-summary--name > div > div.col-12.prices-add-to-cart-actions > div > div:nth-child(1) > button')
.find('span.btn').children('span').text().trim();
if (addToCartBtn === 'カートに入れる') {
stock = 'あり';
} else {
stock = 'なし';
}
} else if (url.includes('item.rakuten.co.jp')) {
price = $('#rakutenLimitedId_cart').find('#priceCalculationConfig').attr('data-price');
const addToCartBtn = $('#AddToCartPurchaseButtonFixed').find('button[aria-label="カートに追加"]');
const isDisabled = addToCartBtn.prop('disabled') !== undefined || addToCartBtn.attr('disabled') !== undefined;
if (isDisabled) {
stock = 'なし';
} else {
stock = 'あり';
}
} else if (url.includes('www.suruga-ya.jp/product/detail')){
const priceGroupEl = $('body > div.dialog-off-canvas-main-canvas > div.container_suru.padB40 > div:nth-child(9) > div.col-8.padL32 > div.d-flex.justify-content-start > div.w-70.pr-5 > div.price_group.mb-3');
const priceWithoutCamma = priceGroupEl.find('span.text-price-detail.price-buy').text().replace(/,/g, '');
price = parseInt(priceWithoutCamma, 10);
stock = 'あり'
}
else if (url.includes('ec.treasure-f.com/item')) {
const itemElRight = $('body > main > div.main > div.clearfix > div.item-detail-area-right');
const itempriceArea = itemElRight.find('.item-detail-area-right-price');
const priceCurrency = itempriceArea.children('p').find('.disp-tax-in').text().replace(/[^\d]/g, '');
price = parseInt(priceCurrency, 10);
console.log(price);
const addToCartBtnEl = itemElRight.find('#item-submitform').find('#item-cartbutton');
if (addToCartBtnEl.length > 0) {
stock = 'あり';
} else {
stock = 'なし';
}
} else if (url.includes('item.fril.jp')) {
const price = $('body > div.drawer-overlay > div.container.new-rakuma > div > div.col-lg-12.col-md-12.col-sm-12.col-xs-12 > div.row > div > article > div > div.col-lg-5.col-md-5.col-sm-12.col-xs-12.right-section > section > div.item-info__header > div:nth-child(2) > div > p > span.item__price')
.text().replace(/[^\d]/g, '');
const sellBtnEl = $('body > div.drawer-overlay > div.container.new-rakuma > div > div.col-lg-12.col-md-12.col-sm-12.col-xs-12 > div.row > div > article > div > div.col-lg-5.col-md-5.col-sm-12.col-xs-12.right-section > section > div:nth-child(4) > div > div.btn-buy-fixed.clearfix > p.sell-btn-fixed');
if (sellBtnEl.length > 0) {
stock = 'あり';
} else {
stock = 'なし';
}
} else {
price = '';
stock = '';
}
data.push({ url, price, stock });
console.log(data);
} catch (error) {
console.error('エラー:', error);
data.push({ url, price, stock });
}
}
// 全ての非同期処理が完了した後にCSVに書き込み
await csvWriter.writeRecords(data);
console.log('CSVにデータが書き込まれました。');
}
// fetchDataを呼び出し
fetchData();