summaryrefslogtreecommitdiff
path: root/crawler/WebRequest.php
diff options
context:
space:
mode:
Diffstat (limited to 'crawler/WebRequest.php')
-rw-r--r--crawler/WebRequest.php29
1 files changed, 29 insertions, 0 deletions
diff --git a/crawler/WebRequest.php b/crawler/WebRequest.php
new file mode 100644
index 0000000..a72efc6
--- /dev/null
+++ b/crawler/WebRequest.php
@@ -0,0 +1,29 @@
+<?php
+/**
+ * User: Marvin Borner
+ * Date: 16/09/2018
+ * Time: 21:53
+ */
+
+class WebRequest
+{
+ public function getContent($url)
+ {
+ $curl = curl_init($url);
+ curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)');
+ curl_setopt($curl, CURLOPT_ENCODING, '');
+ curl_setopt($curl, CURLOPT_TIMEOUT, 5);
+ curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
+ curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
+ curl_setopt($curl, CURLOPT_BINARYTRANSFER, true);
+ $content = curl_exec($curl);
+ $responseCode = curl_getinfo($curl, CURLINFO_HTTP_CODE);
+ $downloadSize = curl_getinfo($curl, CURLINFO_SIZE_DOWNLOAD) / 1000 . "KB\n";
+ if (preg_match('~Location: (.*)~i', $content, $match)) {
+ $updatedUrl = trim($match[1]); // update url on 301/302
+ }
+ curl_close($curl);
+
+ return [$content, $responseCode, $downloadSize, $updatedUrl ?? $url];
+ }
+} \ No newline at end of file