summaryrefslogtreecommitdiff
path: root/crawler.js
blob: 6b2d5b6a9918c1f4101041c5e65edb04537e61e5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
const crawlService = require("crawler");
const crypto = require("crypto");
const database = require("./database");

const crawler = new crawlService({
    maxConnections: 10,
    callback: (error, res, done) => {
        if (error) {
            console.log(error);
        } else {
            const $ = res.$;
            database.index('crawled', 'site', [
                {
                    "id": crypto.createHash('sha256').update(res.request.uri.href).digest('base64'),
                    "url": res.request.uri.href,
                    "title": $("title").text(),
                    "description": $("meta[name=description]").attr("content"),
                    "keywords": $("meta[name=keywords]").attr("content").split(", ")
                }
            ]);
        }
        done();
    }
});

crawler.queue('http://www.amazon.com');