nodejs:scraping
差分
このページの2つのバージョン間の差分を表示します。
| 次のリビジョン | 前のリビジョン | ||
| nodejs:scraping [2023/10/26 14:07] – 作成 mikoto | nodejs:scraping [2023/12/18 20:48] (現在) – mikoto | ||
|---|---|---|---|
| 行 1: | 行 1: | ||
| - | スクレイピング | + | ====== |
| + | |||
| + | [[: | ||
| + | |||
| + | 必要なパッケージをインストールする | ||
| + | <code bash> | ||
| + | npm install cheerio axios | ||
| + | </ | ||
| + | |||
| + | ===== 例 ===== | ||
| + | <code javascript> | ||
| + | // index.js | ||
| + | |||
| + | const axios = require(' | ||
| + | const cheerio = require(' | ||
| + | |||
| + | // Slack Incoming WebhooksのURLを設定 | ||
| + | const slackWebhookUrl = ''; | ||
| + | |||
| + | // Axiosを使用してHTMLを取得 | ||
| + | axios.get(url) | ||
| + | .then(response => { | ||
| + | // 取得したHTMLをCheerioでパース | ||
| + | const $ = cheerio.load(response.data); | ||
| + | |||
| + | // スクレイピング対象の要素を指定してデータを取得 | ||
| + | // .modWhiteBox01 | ||
| + | const targetDiv = $(' | ||
| + | |||
| + | // div要素内の uniMainList セレクタ指定して取得 | ||
| + | const innerTargetDiv = targetDiv.find(' | ||
| + | |||
| + | // その中の li 要素を取得 | ||
| + | const liElements = innerTargetDiv.find(' | ||
| + | |||
| + | const items = []; | ||
| + | |||
| + | liElements.each((index, | ||
| + | // 改行で分割し、空白でない行のみを取り出す | ||
| + | var lines = $(element).text().replace(/ | ||
| + | |||
| + | const date = lines[0].trim(); | ||
| + | const tag = lines[1].trim(); | ||
| + | const title = lines[2].trim(); | ||
| + | const today = new Date(); | ||
| + | const yesterday = new Date(); | ||
| + | yesterday.setDate(today.getDate() - 1); | ||
| + | // | ||
| + | const formattedToday = today.toISOString().split(' | ||
| + | if (tag.includes(' | ||
| + | items.push({ | ||
| + | date: date, | ||
| + | tag: tag, | ||
| + | title: title | ||
| + | }); | ||
| + | } | ||
| + | }); | ||
| + | console.log(items); | ||
| + | |||
| + | |||
| + | }) | ||
| + | .catch(error => { | ||
| + | console.error(' | ||
| + | }); | ||
| + | </ | ||
| + | |||
| + | 以下を実行する | ||
| + | < | ||
| + | npm run start | ||
| + | </ | ||
| + | |||
| + | ==== 進行中のコード ==== | ||
| + | <code javascript> | ||
| + | // index.js | ||
| + | |||
| + | const axios = require(' | ||
| + | const cheerio = require(' | ||
| + | const fs = require(' | ||
| + | const csv = require(' | ||
| + | const createCsvWriter = require(' | ||
| + | |||
| + | |||
| + | // スクレイピング対象のURL | ||
| + | //const url = ' | ||
| + | |||
| + | const urls = [ | ||
| + | // URLs | ||
| + | ]; | ||
| + | |||
| + | const data = []; | ||
| + | |||
| + | const csvWriter = createCsvWriter({ | ||
| + | path: ' | ||
| + | encoding: ' | ||
| + | header: [ | ||
| + | { id: ' | ||
| + | { id: ' | ||
| + | { id: ' | ||
| + | ] | ||
| + | }); | ||
| + | |||
| + | // Axios header | ||
| + | const headers = { | ||
| + | //' | ||
| + | ' | ||
| + | } | ||
| + | |||
| + | // 非同期処理を扱うためにasync functionを使用 | ||
| + | async function fetchData() { | ||
| + | for (const url of urls) { | ||
| + | let price = ''; | ||
| + | let stock = ''; | ||
| + | try { | ||
| + | const response = await axios.get(url, | ||
| + | |||
| + | const $ = cheerio.load(response.data); | ||
| + | |||
| + | if (url.includes(' | ||
| + | const priceElement = $('# | ||
| + | if (priceElement.length > 0) { | ||
| + | price = priceElement.html().trim(); | ||
| + | } else { | ||
| + | price = ''; | ||
| + | } | ||
| + | const stockInputElement = $(' | ||
| + | if (stockInputElement.length > 0 ) { | ||
| + | // Value属性の値を取得 | ||
| + | const valueAttribute = stockInputElement.attr(' | ||
| + | if (valueAttribute === ' | ||
| + | stock = ' | ||
| + | } else { | ||
| + | stock = ' | ||
| + | } | ||
| + | } else { | ||
| + | stock = ' | ||
| + | } | ||
| + | } else if (url.includes(' | ||
| + | // 価格を取得 | ||
| + | price = $(' | ||
| + | .find(' | ||
| + | |||
| + | // 在庫状況を確認 | ||
| + | const addToCartBtn = $(' | ||
| + | .find(' | ||
| + | if (addToCartBtn === ' | ||
| + | stock = ' | ||
| + | } else { | ||
| + | stock = ' | ||
| + | } | ||
| + | } else if (url.includes(' | ||
| + | price = $('# | ||
| + | |||
| + | const addToCartBtn = $('# | ||
| + | const isDisabled = addToCartBtn.prop(' | ||
| + | if (isDisabled) { | ||
| + | stock = ' | ||
| + | } else { | ||
| + | stock = ' | ||
| + | } | ||
| + | } else if (url.includes(' | ||
| + | const priceGroupEl = $(' | ||
| + | const priceWithoutCamma = priceGroupEl.find(' | ||
| + | price = parseInt(priceWithoutCamma, | ||
| + | stock = ' | ||
| + | } | ||
| + | else if (url.includes(' | ||
| + | const itemElRight = $(' | ||
| + | const itempriceArea = itemElRight.find(' | ||
| + | const priceCurrency = itempriceArea.children(' | ||
| + | price = parseInt(priceCurrency, | ||
| + | console.log(price); | ||
| + | |||
| + | const addToCartBtnEl = itemElRight.find('# | ||
| + | if (addToCartBtnEl.length > 0) { | ||
| + | stock = ' | ||
| + | } else { | ||
| + | stock = ' | ||
| + | } | ||
| + | } else if (url.includes(' | ||
| + | const price = $(' | ||
| + | .text().replace(/ | ||
| + | |||
| + | const sellBtnEl = $(' | ||
| + | if (sellBtnEl.length > 0) { | ||
| + | stock = ' | ||
| + | } else { | ||
| + | stock = ' | ||
| + | } | ||
| + | } else { | ||
| + | price = ''; | ||
| + | stock = ''; | ||
| + | } | ||
| + | |||
| + | data.push({ url, price, stock }); | ||
| + | console.log(data); | ||
| + | |||
| + | } catch (error) { | ||
| + | console.error(' | ||
| + | data.push({ url, price, stock }); | ||
| + | } | ||
| + | } | ||
| + | |||
| + | // 全ての非同期処理が完了した後にCSVに書き込み | ||
| + | await csvWriter.writeRecords(data); | ||
| + | console.log(' | ||
| + | } | ||
| + | |||
| + | // fetchDataを呼び出し | ||
| + | fetchData(); | ||
| + | </ | ||
nodejs/scraping.1698296868.txt.gz · 最終更新: 2023/10/26 14:07 by mikoto