nodejs:scraping
差分
このページの2つのバージョン間の差分を表示します。
両方とも前のリビジョン前のリビジョン次のリビジョン | 前のリビジョン | ||
nodejs:scraping [2023/10/26 14:18] – mikoto | nodejs:scraping [2023/12/18 20:48] (現在) – mikoto | ||
---|---|---|---|
行 1: | 行 1: | ||
- | スクレイピング | + | ====== |
[[: | [[: | ||
行 6: | 行 6: | ||
<code bash> | <code bash> | ||
npm install cheerio axios | npm install cheerio axios | ||
+ | </ | ||
+ | |||
+ | ===== 例 ===== | ||
+ | <code javascript> | ||
+ | // index.js | ||
+ | |||
+ | const axios = require(' | ||
+ | const cheerio = require(' | ||
+ | |||
+ | // Slack Incoming WebhooksのURLを設定 | ||
+ | const slackWebhookUrl = ''; | ||
+ | |||
+ | // Axiosを使用してHTMLを取得 | ||
+ | axios.get(url) | ||
+ | .then(response => { | ||
+ | // 取得したHTMLをCheerioでパース | ||
+ | const $ = cheerio.load(response.data); | ||
+ | |||
+ | // スクレイピング対象の要素を指定してデータを取得 | ||
+ | // .modWhiteBox01 | ||
+ | const targetDiv = $(' | ||
+ | |||
+ | // div要素内の uniMainList セレクタ指定して取得 | ||
+ | const innerTargetDiv = targetDiv.find(' | ||
+ | |||
+ | // その中の li 要素を取得 | ||
+ | const liElements = innerTargetDiv.find(' | ||
+ | |||
+ | const items = []; | ||
+ | |||
+ | liElements.each((index, | ||
+ | // 改行で分割し、空白でない行のみを取り出す | ||
+ | var lines = $(element).text().replace(/ | ||
+ | | ||
+ | const date = lines[0].trim(); | ||
+ | const tag = lines[1].trim(); | ||
+ | const title = lines[2].trim(); | ||
+ | const today = new Date(); | ||
+ | const yesterday = new Date(); | ||
+ | yesterday.setDate(today.getDate() - 1); | ||
+ | // | ||
+ | const formattedToday = today.toISOString().split(' | ||
+ | if (tag.includes(' | ||
+ | items.push({ | ||
+ | date: date, | ||
+ | tag: tag, | ||
+ | title: title | ||
+ | }); | ||
+ | } | ||
+ | }); | ||
+ | console.log(items); | ||
+ | |||
+ | |||
+ | }) | ||
+ | .catch(error => { | ||
+ | console.error(' | ||
+ | }); | ||
+ | </ | ||
+ | |||
+ | 以下を実行する | ||
+ | < | ||
+ | npm run start | ||
+ | </ | ||
+ | |||
+ | ==== 進行中のコード ==== | ||
+ | <code javascript> | ||
+ | // index.js | ||
+ | |||
+ | const axios = require(' | ||
+ | const cheerio = require(' | ||
+ | const fs = require(' | ||
+ | const csv = require(' | ||
+ | const createCsvWriter = require(' | ||
+ | |||
+ | |||
+ | // スクレイピング対象のURL | ||
+ | //const url = ' | ||
+ | |||
+ | const urls = [ | ||
+ | // URLs | ||
+ | ]; | ||
+ | |||
+ | const data = []; | ||
+ | |||
+ | const csvWriter = createCsvWriter({ | ||
+ | path: ' | ||
+ | encoding: ' | ||
+ | header: [ | ||
+ | { id: ' | ||
+ | { id: ' | ||
+ | { id: ' | ||
+ | ] | ||
+ | }); | ||
+ | |||
+ | // Axios header | ||
+ | const headers = { | ||
+ | //' | ||
+ | ' | ||
+ | } | ||
+ | |||
+ | // 非同期処理を扱うためにasync functionを使用 | ||
+ | async function fetchData() { | ||
+ | for (const url of urls) { | ||
+ | let price = ''; | ||
+ | let stock = ''; | ||
+ | try { | ||
+ | const response = await axios.get(url, | ||
+ | |||
+ | const $ = cheerio.load(response.data); | ||
+ | |||
+ | if (url.includes(' | ||
+ | const priceElement = $('# | ||
+ | if (priceElement.length > 0) { | ||
+ | price = priceElement.html().trim(); | ||
+ | } else { | ||
+ | price = ''; | ||
+ | } | ||
+ | const stockInputElement = $(' | ||
+ | if (stockInputElement.length > 0 ) { | ||
+ | // Value属性の値を取得 | ||
+ | const valueAttribute = stockInputElement.attr(' | ||
+ | if (valueAttribute === ' | ||
+ | stock = ' | ||
+ | } else { | ||
+ | stock = ' | ||
+ | } | ||
+ | } else { | ||
+ | stock = ' | ||
+ | } | ||
+ | } else if (url.includes(' | ||
+ | // 価格を取得 | ||
+ | price = $(' | ||
+ | .find(' | ||
+ | |||
+ | // 在庫状況を確認 | ||
+ | const addToCartBtn = $(' | ||
+ | .find(' | ||
+ | if (addToCartBtn === ' | ||
+ | stock = ' | ||
+ | } else { | ||
+ | stock = ' | ||
+ | } | ||
+ | } else if (url.includes(' | ||
+ | price = $('# | ||
+ | |||
+ | const addToCartBtn = $('# | ||
+ | const isDisabled = addToCartBtn.prop(' | ||
+ | if (isDisabled) { | ||
+ | stock = ' | ||
+ | } else { | ||
+ | stock = ' | ||
+ | } | ||
+ | } else if (url.includes(' | ||
+ | const priceGroupEl = $(' | ||
+ | const priceWithoutCamma = priceGroupEl.find(' | ||
+ | price = parseInt(priceWithoutCamma, | ||
+ | stock = ' | ||
+ | } | ||
+ | else if (url.includes(' | ||
+ | const itemElRight = $(' | ||
+ | const itempriceArea = itemElRight.find(' | ||
+ | const priceCurrency = itempriceArea.children(' | ||
+ | price = parseInt(priceCurrency, | ||
+ | console.log(price); | ||
+ | |||
+ | const addToCartBtnEl = itemElRight.find('# | ||
+ | if (addToCartBtnEl.length > 0) { | ||
+ | stock = ' | ||
+ | } else { | ||
+ | stock = ' | ||
+ | } | ||
+ | } else if (url.includes(' | ||
+ | const price = $(' | ||
+ | .text().replace(/ | ||
+ | |||
+ | const sellBtnEl = $(' | ||
+ | if (sellBtnEl.length > 0) { | ||
+ | stock = ' | ||
+ | } else { | ||
+ | stock = ' | ||
+ | } | ||
+ | } else { | ||
+ | price = ''; | ||
+ | stock = ''; | ||
+ | } | ||
+ | | ||
+ | data.push({ url, price, stock }); | ||
+ | console.log(data); | ||
+ | |||
+ | } catch (error) { | ||
+ | console.error(' | ||
+ | data.push({ url, price, stock }); | ||
+ | } | ||
+ | } | ||
+ | | ||
+ | // 全ての非同期処理が完了した後にCSVに書き込み | ||
+ | await csvWriter.writeRecords(data); | ||
+ | console.log(' | ||
+ | } | ||
+ | |||
+ | // fetchDataを呼び出し | ||
+ | fetchData(); | ||
</ | </ |
nodejs/scraping.1698297516.txt.gz · 最終更新: 2023/10/26 14:18 by mikoto