blob: 6b2d5b6a9918c1f4101041c5e65edb04537e61e5 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
|
const crawlService = require("crawler");
const crypto = require("crypto");
const database = require("./database");
const crawler = new crawlService({
maxConnections: 10,
callback: (error, res, done) => {
if (error) {
console.log(error);
} else {
const $ = res.$;
database.index('crawled', 'site', [
{
"id": crypto.createHash('sha256').update(res.request.uri.href).digest('base64'),
"url": res.request.uri.href,
"title": $("title").text(),
"description": $("meta[name=description]").attr("content"),
"keywords": $("meta[name=keywords]").attr("content").split(", ")
}
]);
}
done();
}
});
crawler.queue('http://www.amazon.com');
|