core/parcer/getleopak.php

138 lines
4.4 KiB
PHP

<?php
$_SERVER['SERVER_NAME']='tk-ligat.ru';
require_once('/home/cloud/core/set/tk-ligat.ru.php');
require_once('/home/cloud/core/api/php/db.php');
require_once('/home/cloud/core/api/php/json.php');
function findtxt($txt, $str){
$pos1 = stripos($txt, $str);
if ($pos1 === false) return 0;
else
return 1;
}
function leopak ($text) {
$old1 = array("http://market.leopak.ru");
$new1 = array("");
$text = str_replace($old1, $new1, $text);
return $text;
}
function add_link($html){
preg_match_all("/<[Aa][\s]{1}[^>]*[Hh][Rr][Ee][Ff][^=]*=[ '\"\s]*([^ \"'>\s#]+)[^>]*>/", $html, $matches);
$urls = $matches[1]; // Берём то место, где сама ссылка (благодаря группирующим скобкам в регулярном выражении)
/* Выводим все ссылки */
for ($j = 0; $j < count($urls); $j++){
unset($a);
$a['link']=$urls[$j];
if ( findtxt ( $urls[$j], 'https://market.leopak.ru' )==0 ){
$link = 'https://market.leopak.ru' . $urls[$j];
}else{
$link=$urls[$j];
}
$link=leopak($link);
$id=DB::getValue("SELECT `id` FROM `donorLinks` WHERE `link`=?", $link);
if (!$id && findtxt($link, 'yandex.ru')==0){
DB::add("INSERT INTO `donorLinks` (`link`) VALUES(?)", $link);
echo "Добавлено " . $link . "\n";
}
}
}
function get_page($link){
$agent = 'Mozilla/5.0 (compatible; YandexBot/3.0)';
echo $link . "\n\r";
if (findtxt($link, 'ruhtttp')==0){
$ch = curl_init($link);
curl_setopt($ch, CURLOPT_REFERER, 'https://www.samsonopt.ru');
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$dir = dirname(__FILE__);
$config['cookie_file'] = $dir . '/cookies/' . md5(@$_SERVER['REMOTE_ADDR']) . '.txt';
curl_setopt($ch, CURLOPT_COOKIEFILE, $config['cookie_file']);
curl_setopt($ch, CURLOPT_COOKIEJAR, $config['cookie_file']);
curl_setopt($ch, CURLOPT_COOKIE, "PMBC=96152e8e9a0168a731539c5e52c6b39a; PHPSESSID=jl0i13pn3157qca807jgp0jqa7; ServerName=WoW+Circle+3.3.5a+x5; serverId=1");
// curl_setopt($ch, CURLOPT_PROXY, 'localhost:9050'); // Use if proxy have username and password
// curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
$response_data = curl_exec($ch);
if (curl_errno($ch) > 0) {
echo ('Ошибка curl: ' . curl_error($ch)) . ' ' . $link . " \n";
}
$http_code = curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
echo $http_code . "\n";
curl_close($ch);
return $response_data;
}
}
$LinkList = DB::getAll( 'SELECT `id`, `link`, html`` FROM `donorLinks` WHERE `check`=1 AND `link` NOT LIKE "%.jpg" AND `link` NOT LIKE "%yandex.ru%" ORDER BY RAND() LIMIT 500');
//print_r($LinkList);
for($i=0;$i<count($LinkList);$i++){
// DB::set("UPDATE `donorLinks` SET `link`=? WHERE `id`=?", array(leopak($LinkList[$i]['link']), $LinkList[$i]['id']) );
if ( findtxt ( $LinkList[$i]['link'], 'https://market.leopak.ru' )==0 ){
$link = 'https://market.leopak.ru' . $LinkList[$i]['link'];
}else{
$link=$LinkList[$i]['link'];
}
$page=get_page($link);
//echo $page;
add_link($page);
//if (!@$link[$i]['html']){
DB::set("UPDATE `donorLinks` SET `html`=? WHERE `id`=?", array(base64_encode ( gzcompress ( $page, 9) ), $LinkList[$i]['id']) );
//}
//echo $link . "\n"; https://market.leopak.ru
sleep(3);
//exec("php /home/cloud/core/parcer/getleopak.php &");
//exec("php /home/cloud/core/parcer/getleopak.php &" );
}
$htmls=DB::getAll("SELECT `html` FROM `donorLinks` WHERE `check`=1 AND `link` NOT LIKE '%.jpg' AND `link` NOT LIKE '%yandex.ru%'");
for ($i=0; $i<count($htmls); $i++){
$html=gzuncompress(base64_decode($htmls[$i]['html']));
//echo $html;
add_link($html);
}
DB::set("UPDATE `donorLinks` SET `check`=0 WHERE `link` LIKE '%?%'", 'yandex.ru');
DB::set("UPDATE `donorLinks` SET `check`=0 WHERE `link` LIKE '%?'", '.jpg');
DB::set("UPDATE `donorLinks` SET `check`=0 WHERE `link` LIKE '%?%'", 'tel:');
DB::set("UPDATE `donorLinks` SET `check`=1 WHERE `link` LIKE 'https://market.leopak.ru%' AND `link` NOT LIKE '%yandex.ru%'");
?>