core/parcer/leopak_parce.php

145 lines
6.7 KiB
PHP
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
ini_set( 'display_errors', 0 );
$_SERVER['SERVER_NAME']='tk-ligat.ru';
require_once('/home/cloud/core/set/tk-ligat.ru.php');
require_once('/home/cloud/core/api/php/db.php');
require_once('/home/cloud/core/api/php/json.php');
function html_to_obj($html) {
$dom = new DOMDocument();
// $dom->loadHTML($html);
@$dom->loadHTML(mb_convert_encoding(@$html, 'HTML-ENTITIES', 'UTF-8'));
return element_to_obj($dom->documentElement);
}
function element_to_obj($element) {
@$obj = array( "tag" => @$element->tagName );
foreach (@$element->attributes as $attribute) {
$obj[$attribute->name] = $attribute->value;
}
foreach ($element->childNodes as $subElement) {
if ($subElement->nodeType == XML_TEXT_NODE) {
$obj["html"] = $subElement->wholeText;
}
else {
$obj["children"][] = element_to_obj($subElement);
}
}
return $obj;
}
function leopak ($text) {
$old1 = array("\\");
$new1 = array("/");
$text = str_replace($old1, $new1, $text);
return $text;
}
/* ищет str в txt если находит, возвращает единичку */
function findtxt($txt, $str){
$pos1 = stripos($txt, $str);
if ($pos1 === false) return 0;
else
return 1;
}
function add_link($html){
preg_match_all("/<[Aa][\s]{1}[^>]*[Hh][Rr][Ee][Ff][^=]*=[ '\"\s]*([^ \"'>\s#]+)[^>]*>/", $html, $matches);
$urls = $matches[1]; // Берём то место, где сама ссылка (благодаря группирующим скобкам в регулярном выражении)
/* Выводим все ссылки */
for ($j = 0; $j < count($urls); $j++){
$link=$urls[$j];
if (findtxt($link, 'https://www.deznet.ru')==0) $link = 'https://www.deznet.ru' . leopak($link);
$id=DB::getValue("SELECT `id` FROM `sm2` WHERE `link` LIKE '%" . $link . "' LIMIT 1");
if (!$id)DB::add("INSERT INTO `sm2` (`link`, `status`) VALUES (?, ?)", array($link, 1));
}
}
function generate_password($number){
$arr = array('a','b','c','d','e','f', 'g','h','i','j','k','l','m','n','o','p','r','s','t','u','v','x','y','z','1','2','3','4','5','6','7','8','9','0');
$pass = "";
for($i = 0; $i < $number; $i++){
$index = rand(0, count($arr) - 1);
$pass .= $arr[$index];
}
return $pass;
}
function parce_cena ($text) {
$old1 = array(",", "middle", "xsmall");
$new1 = array(".", "big", "big");
$text = str_replace($old1, $new1, $text);
return $text;
}
$LinkList = DB::getAll( "SELECT * FROM `sm` WHERE `add` IS NULL AND `html` IS NOT NULL");
for ($i=0; $i<count( $LinkList ); $i++){
$html=base64_decode ( $LinkList[$i]['html'] );
$a = html_to_obj($html);
$title = $a['children'][1]['children'][8]['children'][1]['children'][0]['children'][0]['children'][2]['children'][0]['children'][0]['children'][0]['html'];
$artikul = trim($a['children'][1]['children'][8]['children'][1]['children'][0]['children'][0]['children'][2]['children'][2]['children'][0]['children'][0]['children'][1]['children'][1]['html']);
$txt = $a['children'][1]['children'][8]['children'][1]['children'][0]['children'][0]['children'][2]['children'][4]['children'][0]['children'][1]['children'][0]['children'][0]['html'] ;
$img = $a['children'][1]['children'][8]['children'][1]['children'][0]['children'][0]['children'][2]['children'][3]['children'][0]['children'][0]['children'][2]['children'][0]['children'][0]['children'][0]['children'] ;
$cena=$a['children'][1]['children'][8]['children'][1]['children'][0]['children'][0]['children'][2]['children'][3]['children'][1]['children'][0]['children'][0]['children'][3]['children'][0]['children'][1]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['html'];
$cena = parce_cena($cena)*1.5;
$code='none';
$partner = "leopak";
unset($html);
if ($img){
for ($k=0; $k<count($img); $k++){
$s[$k]=$img[$i]['children'][0]['children'][0]['children'][0]['src'];
if (!$s[$k])$s[$k]=$img[$k]['children'][0]['children'][0]['children'][0]['data-src'];
}
}else{
//Изображение Одно....
//пытемся его дернуть
$s[0] = $a['children'][1]['children'][8]['children'][1]['children'][0]['children'][0]['children'][2]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['src'] ;
}
//echo $title . "\n";
if ($title){
//Узнаем ли есть ли такой товар в БД
$tovar_id=DB::getValue("SELECT `id` FROM `tovar` WHERE `title`=? AND `artikul`=? AND `partner`=?", array($title, $artikul, $partner));
if (!$tovar_id) $tovar_id = DB::add( "INSERT INTO `tovar` (`title`, `txt`, `code`, `artikul`, `partner`, `status`, `category`, `linkimg`) VALUES
(?, ?, ?, ?, ?, ?, ?, ?)", array(
$title, nl2br($txt), $code, $artikul, $partner, 1, 0, json::to_j($s)) );
//Перебираем и загружаем картинки
for ($x=0; $x<count($s); $x++){
$filename=generate_password(30);
echo $img . "\n";
exec("wget " . parce_cena($s[$x]) . " -O /home/cloud/core/img/tk-ligat.ru/tovar/" . $filename . ".png");
DB::add("INSERT INTO `tovar_img` (`filename`, `tovar_id`, `tip`) VALUES (?, ?, 'png')", array( $filename, $tovar_id ) );
DB::set("UPDATE `sm` SET `add`=1 WHERE `id`=?", $LinkList[$i]['id']);
}
$massiv[0]['tovar_id']=$tovar_id;
$massiv[0]['cena']=$cena;
$json=json::to_j($massiv);
$insert_id = DB::add( "INSERT INTO `docs` (`t`, `tip`, `json`, `user_id`, `status`, `comment`) VALUES('" . time() . "', 'pereocenka', ?, ?, 1, 'Документ создан из публичной карточки товара')", array($json, 1) );
if ($insert_id) DB::add( "INSERT INTO `tovar_price_history` (`t`, `tovar_id`, `status`, `cena`, `docs_id`) VALUES(?, ?, ?, ?, ?)", array( time(), $tovar_id, '1', $cena, $insert_id ) );
}
unset($massiv);
unset($tovar_id);
unset($title);
unset($txt);
unset($code);
unset($img);
unset($artikil);
unset($s);
unset($img);
}
//sleep(120);
//exec ('php desnet_parce.php > 2/1.txt')
// print_r($a['children'][1]['children'][8]['children'][1]['children'][0]['children'][0]['children'][2]['children'][3]['children'][1]['children'][0]['children'][0]['children'][3]['children'][0]['children'][1]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['html'] );
?>