Crawler

mirocow/Crawler - Паук аналог teleport pro на php

Использование

<?php
 
require_once('./Crawler.php');
 
$path = './data';
 
$crawler = new Crawler('http://yandex.ru', 5);
$crawler->allow_links = [
  '~.*?rom=morda.*?~',
  '~.*?job_industry.*?~'
];
$crawler->onSettings = function(Goutte\Client &$client){
  $client->setHeader('User-Agent', 'android');
};
$crawler->onContent = function($uri, $content) use ($path){
  echo "+ $uri\n";
  if (preg_match('~.*?search\?job_industry=\d+$~', $uri, $matches) == true) {
    file_put_contents($path . '/' . $matches[1] . '.html', $content);
  }
};
$crawler->traverse();