Authenticating and scrapping web page using PHP and cURL
PHP:
-
<?php
-
-
-
$cURL = curl_init();
-
-
### Let's login
-
curl_setopt($cURL, CURLOPT_URL, POSTURL); // Destination
-
curl_setopt($cURL, CURLOPT_POST, 1); // Please POST
-
curl_setopt($cURL, CURLOPT_POSTFIELDS, POSTDATA); // Data to POST
-
curl_setopt($cURL, CURLOPT_FOLLOWLOCATION, 0); // Do follow any redirect
-
curl_setopt($cURL, CURLOPT_HEADER, 1); // Get Headers
-
curl_setopt($cURL, CURLOPT_RETURNTRANSFER, 1); // Get the output
-
curl_setopt($cURL, CURLOPT_COOKIEJAR, 'cookie.txt');// Store cookies in cookie.txt
-
-
$loggedInOutput=curl_exec($cURL);
-
-
### Not let's scrape the page we need
-
curl_setopt($cURL, CURLOPT_POST,0); // Do not POST
-
curl_setopt($cURL, CURLOPT_POSTFIELDS, '');
-
curl_setopt($cURL, CURLOPT_HEADER, 1); // Get Headers
-
curl_setopt($cURL, CURLOPT_RETURNTRANSFER, 1); // Get the output
-
curl_setopt($cURL, CURLOPT_COOKIEFILE, 'cookie.txt'); // Read previously stored cookies
-
curl_setopt($cURL, CURLOPT_URL, "http://yourdomain.com/realpagetoscrape.aspx");
-
-
$scrappeddata=curl_exec($cURL);
-
-
### Now we can do whatever we need with that data
-
-
### ...
-
curl_close($cURL);
-
-
?>