summaryrefslogtreecommitdiff
path: root/crawler.js
diff options
context:
space:
mode:
Diffstat (limited to 'crawler.js')
-rw-r--r--crawler.js26
1 files changed, 26 insertions, 0 deletions
diff --git a/crawler.js b/crawler.js
new file mode 100644
index 0000000..6b2d5b6
--- /dev/null
+++ b/crawler.js
@@ -0,0 +1,26 @@
+const crawlService = require("crawler");
+const crypto = require("crypto");
+const database = require("./database");
+
+const crawler = new crawlService({
+ maxConnections: 10,
+ callback: (error, res, done) => {
+ if (error) {
+ console.log(error);
+ } else {
+ const $ = res.$;
+ database.index('crawled', 'site', [
+ {
+ "id": crypto.createHash('sha256').update(res.request.uri.href).digest('base64'),
+ "url": res.request.uri.href,
+ "title": $("title").text(),
+ "description": $("meta[name=description]").attr("content"),
+ "keywords": $("meta[name=keywords]").attr("content").split(", ")
+ }
+ ]);
+ }
+ done();
+ }
+});
+
+crawler.queue('http://www.amazon.com');