« Back to Index

Web Scraping with NodeJS (copied from http://www.storminthecastle.com/2013/08/25/use-node-js-to-extract-data-from-the-web-for-fun-and-profit/)

View original Gist on GitHub

1. package.json

{
  "name": "WebScraping",
  "main": "scrap.js",
  "dependencies": {
    "cheerio": "~0.13.1"
  }
}

2. scrap.js

var http = require('http');

function download(url, callback) {
    http.get(url, function(res) {
        var data = '';

        res.on('data', function(chunk) {
            data += chunk;
        });

        res.on('end', function() {
            callback(data);
        });
    }).on('error', function() {
        callback(null)
    });
}

module.exports = download;

3.download.js

var cheerio  = require('cheerio'); // converts string into dom tree and provides querying (and other) methods
var download = require('./scrap.js');

download('http://www.integralist.co.uk/', function(data) {
    if (data) {
        var $ = cheerio.load(data)

        $('h2').each(function(index, item) {
            console.log($(item).text());
        });
    }
    else console.log('error');
});