core/parcer/desnet_parce.php

119 lines
5.1 KiB
PHP
Raw Normal View History

2023-02-08 16:59:59 +05:00
<?php
ini_set( 'display_errors', 0 );
$_SERVER['SERVER_NAME']='tk-ligat.ru';
require_once('/home/cloud/core/set/tk-ligat.ru.php');
require_once('/home/cloud/core/api/php/db.php');
require_once('/home/cloud/core/api/php/json.php');
function html_to_obj($html) {
$dom = new DOMDocument();
// $dom->loadHTML($html);
@$dom->loadHTML(mb_convert_encoding(@$html, 'HTML-ENTITIES', 'UTF-8'));
return element_to_obj($dom->documentElement);
}
function element_to_obj($element) {
@$obj = array( "tag" => @$element->tagName );
foreach (@$element->attributes as $attribute) {
$obj[$attribute->name] = $attribute->value;
}
foreach ($element->childNodes as $subElement) {
if ($subElement->nodeType == XML_TEXT_NODE) {
$obj["html"] = $subElement->wholeText;
}
else {
$obj["children"][] = element_to_obj($subElement);
}
}
return $obj;
}
function leopak ($text) {
$old1 = array("\\");
$new1 = array("/");
$text = str_replace($old1, $new1, $text);
return $text;
}
/* ищет str в txt если находит, возвращает единичку */
function findtxt($txt, $str){
$pos1 = stripos($txt, $str);
if ($pos1 === false) return 0;
else
return 1;
}
function add_link($html){
preg_match_all("/<[Aa][\s]{1}[^>]*[Hh][Rr][Ee][Ff][^=]*=[ '\"\s]*([^ \"'>\s#]+)[^>]*>/", $html, $matches);
$urls = $matches[1]; // Берём то место, где сама ссылка (благодаря группирующим скобкам в регулярном выражении)
/* Выводим все ссылки */
for ($j = 0; $j < count($urls); $j++){
$link=$urls[$j];
if (findtxt($link, 'https://www.deznet.ru')==0) $link = 'https://www.deznet.ru' . leopak($link);
$id=DB::getValue("SELECT `id` FROM `sm2` WHERE `link` LIKE '%" . $link . "' LIMIT 1");
if (!$id)DB::add("INSERT INTO `sm2` (`link`, `status`) VALUES (?, ?)", array($link, 1));
}
}
function generate_password($number){
$arr = array('a','b','c','d','e','f', 'g','h','i','j','k','l','m','n','o','p','r','s','t','u','v','x','y','z','1','2','3','4','5','6','7','8','9','0');
$pass = "";
for($i = 0; $i < $number; $i++){
$index = rand(0, count($arr) - 1);
$pass .= $arr[$index];
}
return $pass;
}
$LinkList = DB::getAll( "SELECT * FROM `sm2` WHERE `add` IS NULL AND`html` IS NOT NULL");
for ($i=0; $i<count( $LinkList ); $i++){
$html=gzuncompress ( base64_decode ( $LinkList[$i]['html'] ) );
$a = html_to_obj($html);
$title = $a['children'][1]['children'][9]['children'][5]['children'][0]['children'][1]['children'][0]['children'][0]['children'][0]['children'][8]['children'][1]['children'][0]['html'];
$txt = $a['children'][1]['children'][9]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][3]['content'] ;
$code = $a['children'][1]['children'][9]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][4]['content'] ;
$img = $a['children'][1]['children'][9]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][5]['children'][0]['children'][0]['children'][0]['children'][0]['href'] ;
$artikul = $a['children'][1]['children'][9]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][5]['children'][1]['children'][0]['children'][0]['children'][1]['children'][0]['children'][1]['children'][0]['children'][1]['html'] ;
$partner = "deznet";
unset($html);
echo $title . "\n";
if ($title){
//Узнаем ли есть ли такой товар в БД
$id=DB::getValue("SELECT `id` FROM `tovar` WHERE `title`=? AND `code`=? AND `partner`=?", array($title, $code, $partner));
if (!$id){
$tovar_id = DB::add( "INSERT INTO `tovar` (`title`, `txt`, `code`, `artikul`, `partner`, `linkimg`, `status`, `category`) VALUES (?, ?, ?, ?, ?, ?, 1, 0)", array( $title, nl2br($txt), $code, $artikul, $partner, $img ) );
$filename=generate_password(30);
echo $img . "\n";
exec("wget https://www.deznet.ru" . $img . " -O /home/cloud/core/img/tk-ligat.ru/tovar/" . $filename . ".jpg");
DB::add("INSERT INTO `tovar_img` (`filename`, `tovar_id`) VALUES (?, ?)", array( $filename, $tovar_id ) );
DB::set("UPDATE `sm2` SET `add`=1 WHERE `id`=?", $LinkList[$i]['html']);
}
unset($id);
unset($title);
unset($txt);
unset($code);
unset($img);
unset($artikil);
}
}
//sleep(120);
//exec ('php desnet_parce.php > 2/1.txt')
?>