core/parcer/leopak_parce.php

145 lines
6.7 KiB
PHP
Raw Permalink Normal View History

2023-02-08 16:59:59 +05:00
<?php
ini_set( 'display_errors', 0 );
$_SERVER['SERVER_NAME']='tk-ligat.ru';
require_once('/home/cloud/core/set/tk-ligat.ru.php');
require_once('/home/cloud/core/api/php/db.php');
require_once('/home/cloud/core/api/php/json.php');
function html_to_obj($html) {
$dom = new DOMDocument();
// $dom->loadHTML($html);
@$dom->loadHTML(mb_convert_encoding(@$html, 'HTML-ENTITIES', 'UTF-8'));
return element_to_obj($dom->documentElement);
}
function element_to_obj($element) {
@$obj = array( "tag" => @$element->tagName );
foreach (@$element->attributes as $attribute) {
$obj[$attribute->name] = $attribute->value;
}
foreach ($element->childNodes as $subElement) {
if ($subElement->nodeType == XML_TEXT_NODE) {
$obj["html"] = $subElement->wholeText;
}
else {
$obj["children"][] = element_to_obj($subElement);
}
}
return $obj;
}
function leopak ($text) {
$old1 = array("\\");
$new1 = array("/");
$text = str_replace($old1, $new1, $text);
return $text;
}
/* ищет str в txt если находит, возвращает единичку */
function findtxt($txt, $str){
$pos1 = stripos($txt, $str);
if ($pos1 === false) return 0;
else
return 1;
}
function add_link($html){
preg_match_all("/<[Aa][\s]{1}[^>]*[Hh][Rr][Ee][Ff][^=]*=[ '\"\s]*([^ \"'>\s#]+)[^>]*>/", $html, $matches);
$urls = $matches[1]; // Берём то место, где сама ссылка (благодаря группирующим скобкам в регулярном выражении)
/* Выводим все ссылки */
for ($j = 0; $j < count($urls); $j++){
$link=$urls[$j];
if (findtxt($link, 'https://www.deznet.ru')==0) $link = 'https://www.deznet.ru' . leopak($link);
$id=DB::getValue("SELECT `id` FROM `sm2` WHERE `link` LIKE '%" . $link . "' LIMIT 1");
if (!$id)DB::add("INSERT INTO `sm2` (`link`, `status`) VALUES (?, ?)", array($link, 1));
}
}
function generate_password($number){
$arr = array('a','b','c','d','e','f', 'g','h','i','j','k','l','m','n','o','p','r','s','t','u','v','x','y','z','1','2','3','4','5','6','7','8','9','0');
$pass = "";
for($i = 0; $i < $number; $i++){
$index = rand(0, count($arr) - 1);
$pass .= $arr[$index];
}
return $pass;
}
function parce_cena ($text) {
$old1 = array(",", "middle", "xsmall");
$new1 = array(".", "big", "big");
$text = str_replace($old1, $new1, $text);
return $text;
}
$LinkList = DB::getAll( "SELECT * FROM `sm` WHERE `add` IS NULL AND `html` IS NOT NULL");
for ($i=0; $i<count( $LinkList ); $i++){
$html=base64_decode ( $LinkList[$i]['html'] );
$a = html_to_obj($html);
$title = $a['children'][1]['children'][8]['children'][1]['children'][0]['children'][0]['children'][2]['children'][0]['children'][0]['children'][0]['html'];
$artikul = trim($a['children'][1]['children'][8]['children'][1]['children'][0]['children'][0]['children'][2]['children'][2]['children'][0]['children'][0]['children'][1]['children'][1]['html']);
$txt = $a['children'][1]['children'][8]['children'][1]['children'][0]['children'][0]['children'][2]['children'][4]['children'][0]['children'][1]['children'][0]['children'][0]['html'] ;
$img = $a['children'][1]['children'][8]['children'][1]['children'][0]['children'][0]['children'][2]['children'][3]['children'][0]['children'][0]['children'][2]['children'][0]['children'][0]['children'][0]['children'] ;
$cena=$a['children'][1]['children'][8]['children'][1]['children'][0]['children'][0]['children'][2]['children'][3]['children'][1]['children'][0]['children'][0]['children'][3]['children'][0]['children'][1]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['html'];
$cena = parce_cena($cena)*1.5;
$code='none';
$partner = "leopak";
unset($html);
if ($img){
for ($k=0; $k<count($img); $k++){
$s[$k]=$img[$i]['children'][0]['children'][0]['children'][0]['src'];
if (!$s[$k])$s[$k]=$img[$k]['children'][0]['children'][0]['children'][0]['data-src'];
}
}else{
//Изображение Одно....
//пытемся его дернуть
$s[0] = $a['children'][1]['children'][8]['children'][1]['children'][0]['children'][0]['children'][2]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['src'] ;
}
//echo $title . "\n";
if ($title){
//Узнаем ли есть ли такой товар в БД
$tovar_id=DB::getValue("SELECT `id` FROM `tovar` WHERE `title`=? AND `artikul`=? AND `partner`=?", array($title, $artikul, $partner));
if (!$tovar_id) $tovar_id = DB::add( "INSERT INTO `tovar` (`title`, `txt`, `code`, `artikul`, `partner`, `status`, `category`, `linkimg`) VALUES
(?, ?, ?, ?, ?, ?, ?, ?)", array(
$title, nl2br($txt), $code, $artikul, $partner, 1, 0, json::to_j($s)) );
//Перебираем и загружаем картинки
for ($x=0; $x<count($s); $x++){
$filename=generate_password(30);
echo $img . "\n";
exec("wget " . parce_cena($s[$x]) . " -O /home/cloud/core/img/tk-ligat.ru/tovar/" . $filename . ".png");
DB::add("INSERT INTO `tovar_img` (`filename`, `tovar_id`, `tip`) VALUES (?, ?, 'png')", array( $filename, $tovar_id ) );
DB::set("UPDATE `sm` SET `add`=1 WHERE `id`=?", $LinkList[$i]['id']);
}
$massiv[0]['tovar_id']=$tovar_id;
$massiv[0]['cena']=$cena;
$json=json::to_j($massiv);
$insert_id = DB::add( "INSERT INTO `docs` (`t`, `tip`, `json`, `user_id`, `status`, `comment`) VALUES('" . time() . "', 'pereocenka', ?, ?, 1, 'Документ создан из публичной карточки товара')", array($json, 1) );
if ($insert_id) DB::add( "INSERT INTO `tovar_price_history` (`t`, `tovar_id`, `status`, `cena`, `docs_id`) VALUES(?, ?, ?, ?, ?)", array( time(), $tovar_id, '1', $cena, $insert_id ) );
}
unset($massiv);
unset($tovar_id);
unset($title);
unset($txt);
unset($code);
unset($img);
unset($artikil);
unset($s);
unset($img);
}
//sleep(120);
//exec ('php desnet_parce.php > 2/1.txt')
// print_r($a['children'][1]['children'][8]['children'][1]['children'][0]['children'][0]['children'][2]['children'][3]['children'][1]['children'][0]['children'][0]['children'][3]['children'][0]['children'][1]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['html'] );
?>