core/parcer/deznet/desnet_parce.php

242 lines
12 KiB
PHP
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
ini_set('display_errors', 0);
$_SERVER['SERVER_NAME'] = 'tk-ligat.ru';
require_once('/home/cloud/core/set/tk-ligat.ru.php');
require_once('/home/cloud/core/api/php/db.php');
require_once('/home/cloud/core/api/php/json.php');
function html_to_obj($html)
{
$dom = new DOMDocument();
// $dom->loadHTML($html);
@$dom->loadHTML(mb_convert_encoding(@$html, 'HTML-ENTITIES', 'UTF-8'));
return element_to_obj($dom->documentElement);
}
function element_to_obj($element)
{
@$obj = array("tag" => @$element->tagName);
foreach (@$element->attributes as $attribute) {
$obj[$attribute->name] = $attribute->value;
}
foreach ($element->childNodes as $subElement) {
if ($subElement->nodeType == XML_TEXT_NODE) {
$obj["html"] = $subElement->wholeText;
} else {
$obj["children"][] = element_to_obj($subElement);
}
}
return $obj;
}
function leopak($text)
{
$old1 = array("\\");
$new1 = array("/");
$text = str_replace($old1, $new1, $text);
return $text;
}
/* ищет str в txt если находит, возвращает единичку */
function findtxt($txt, $str)
{
$pos1 = stripos($txt, $str);
if ($pos1 === false)
return 0;
else
return 1;
}
function add_link($html)
{
preg_match_all("/<[Aa][\s]{1}[^>]*[Hh][Rr][Ee][Ff][^=]*=[ '\"\s]*([^ \"'>\s#]+)[^>]*>/", $html, $matches);
$urls = $matches[1]; // Берём то место, где сама ссылка (благодаря группирующим скобкам в регулярном выражении)
/* Выводим все ссылки */
for ($j = 0; $j < count($urls); $j++) {
$link = $urls[$j];
if (findtxt($link, 'https://www.deznet.ru') == 0)
$link = 'https://www.deznet.ru' . leopak($link);
$id = DB::getValue("SELECT `id` FROM `sm2` WHERE `link` LIKE '%" . $link . "' LIMIT 1");
if (!$id)
DB::add("INSERT INTO `sm2` (`link`, `status`) VALUES (?, ?)", array($link, 1));
}
}
function generate_password($number)
{
$arr = array('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0');
$pass = "";
for ($i = 0; $i < $number; $i++) {
$index = rand(0, count($arr) - 1);
$pass .= $arr[$index];
}
return $pass;
}
//$LinkList = DB::getAll( "SELECT * FROM `sm2` WHERE `id`=8549");
$LinkList = DB::getAll("SELECT `link`, `id`, `html` FROM `sm2` WHERE `status` =1 AND `link` NOT LIKE '%.jpg' LIMIT 1000000");
//$LinkList = DB::getAll("SELECT `link`, `id`, `html` FROM `sm2` WHERE `add` IS NULL AND`html` IS NOT NULL AND `link` NOT LIKE '%.jpg' LIMIT 10000");
function setHars($massiv, $tovar_id)
{
for ($i = 0; $i < count($massiv); $i++) {
$h = trim($massiv[$i]['children'][0]['children'][0]['children'][0]['html']);
$v = trim($massiv[$i]['children'][1]['children'][0]['html']);
echo $h . " - " . $v . "\n";
//Ищем в базе характеристику, если нет - добавляем
$id_h = DB::getValue("SELECT `id` FROM `tovar_har_sp` WHERE `txt` = ?", $h);
if (!$id_h)
$id_h = DB::add("INSERT INTO `tovar_har_sp` (`txt`) VALUES (?)", $h);
//Проверяем наличие характеристики у товара:
$id_h_t = DB::getValue("SELECT `id` FROM `tovar_har` WHERE `tovar_har_sp_id` = ? AND `tovar_id`=?", array($id_h, $tovar_id));
if (!$id_h_t)
DB::add("INSERT INTO `tovar_har` (`txt`, `tovar_id`, `tovar_har_sp_id`) VALUES (?, ?, ?)", array($v, $tovar_id, $id_h));
}
}
function setTovar($title, $artikul)
{
//Узнаем, есть ли, если есть - тупо добавляем и возвращаем ИД
echo "SELECT `id` FROM `tovar` WHERE `title`='" . $title . "' AND `artikul`='" . $artikul . "' AND `partner`='deznet'";
$tovar_id = DB::getValue("SELECT `id` FROM `tovar` WHERE `title`='" . $title . "' AND `artikul`='" . $artikul . "' AND `partner`='deznet'");
if (!$tovar_id)
$tovar_id = DB::add("INSERT INTO `tovar` (`title`, `artikul`, `partner`) VALUES (?, ?, 'deznet')", array($title, $artikul));
return $tovar_id;
}
function updateImg($tovarID, $img)
{
$img = 'https://www.deznet.ru' . $img;
DB::set("UPDATE `tovar` SET `linkimg`=? WHERE `id`=?", array($img, $tovarID));
}
function setCena($tovarID, $cena)
{
$cena = round($cena * 1.5);
DB::set("UPDATE `tovar` SET `cena`=? WHERE `id`=?", array($cena, $tovarID));
//получаем последнюю цену товара, если не совпадает - создаем документ + цену
//$cenaFromBD = DB::getValue("SELECT `cena` FROM `tovar_price_history` WHERE `tovar_id`=? AND `status`=1 ORDER BY `t` DESC LIMIT 1", $tovarID);
/* echo $cenaFromBD;
if ($cenaFromBD!=$cena){
$t[0]['tovar_id']=$tovarID;
$t[0]['cena']=$cena;
$c=json::to_j($t);
$doc_id=DB::add("INSERT INTO `docs` (`t`, `tip`, `json`, `status`, `comment`) VALUES (?, ?, ?, ?, ?)", array(time(), 'pereocenka', $c, 1, 'Из парсера Deznet'));
DB::add("INSERT INTO `tovar_price_history` (`status`, `tovar_id`, `cena`, `docs_id`, `t`) VALUES (?, ?, ?, ?, ?)", array(1, $tovarID, $cena, $doc_id, time()));
}*/
}
for ($i = 0; $i < count($LinkList); $i++) {
$html = gzuncompress(base64_decode($LinkList[$i]['html']));
$a = html_to_obj($html);
//print_r($a);
$title = $a['children'][1]['children'][8]['children'][5]['children'][0]['children'][1]['children'][0]['children'][1]['children'][0]['children'][1]['children'][0]['html'];
$kroshki = $a['children'][1]['children'][8]['children'][5]['children'][0]['children'][1]['children'][0]['children'][0]['children'][0]['children'];
echo "Название - " . $title . "\n";
//print_r( $kroshki );
$cat_id = 4433;
for ($j = 4; $j < count(($kroshki)); $j++) {
$b = $kroshki[$j]['children'][0]['title'];
if ($b) {
$c = $cat_id;
$cat_id = DB::getValue("SELECT `id` FROM `tovar_category` WHERE `category`=? AND `status`=1 AND `partner`='deznet' AND `title`=? LIMIT 1", array($cat_id, $b));
if (!$cat_id) {
$cat_id = DB::add("INSERT INTO `tovar_category` (`title`, `partner`, `status`, `category`) VALUES (?, ?, ?, ?)", array($b, 'deznet', 1, $c));
}
$c = $cat_id;
}
}
$artikul = $a['children'][1]['children'][8]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][5]['children'][1]['children'][0]['children'][0]['children'][1]['children'][0]['children'][1]['children'][0]['children'][1]['html'];
$cena = $a['children'][1]['children'][8]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][1]['children'][0]['content'];
echo "Артикул - " . $artikul . "\n";
echo "Цена - " . $cena * 1.5 . "\n";
//$img = $a['children'][1]['children'][8]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2] ;
$img = $a['children'][1]['children'][8]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][5]['children'][0]['children'][0]['children'][0]['children'][1]['children'][0]['children'][0]['href'];
echo "Картинка - " . $img . "\n";
$opis = nl2br($a['children'][1]['children'][8]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][3]['content']);
$hars = $a['children'][1]['children'][8]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][5]['children'][0]['children'][1]['children'][1]['children'][1]['children'][0]['children'][0]['children'][0]['children'];
unset($tovarID);
if ($title && $artikul) {
$tovarID = setTovar($title, $artikul);
setCena($tovarID, $cena);
updateImg($tovarID, $img);
DB::set("UPDATE `tovar` SET `txt`=?, `category`=?, `status`=?, `donor`=? WHERE `id`=?", array($opis, $c, 1, $LinkList[$i]['link'], $tovarID));
setHars($hars, $tovarID);
DB::set("UPDATE `sm2` SET `add`=1 WHERE `id`=?", $LinkList[$i]['id']);
} else {
DB::set("UPDATE `sm2` SET `add`=2 WHERE `id`=?", $LinkList[$i]['id']);
}
/* $title = $a['children'][1]['children'][9]['children'][5]['children'][0]['children'][1]['children'][0]['children'][0]['children'][0]['children'][8]['children'][1]['children'][0]['html'];
$txt = $a['children'][1]['children'][9]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][3]['content'] ;
$code = $a['children'][1]['children'][9]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][4]['content'] ;
$img = $a['children'][1]['children'][9]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][5]['children'][0]['children'][0]['children'][0]['children'][0]['href'] ;
$artikul = $a['children'][1]['children'][9]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][5]['children'][1]['children'][0]['children'][0]['children'][1]['children'][0]['children'][1]['children'][0]['children'][1]['html'] ;
$partner = "deznet";
unset($html);
echo $title . "\n";
if ($title){
//Узнаем ли есть ли такой товар в БД
$id=DB::getValue("SELECT `id` FROM `tovar` WHERE `title`=? AND `code`=? AND `partner`=?", array($title, $code, $partner));
if (!$id){
$tovar_id = DB::add( "INSERT INTO `tovar` (`title`, `txt`, `code`, `artikul`, `partner`, `linkimg`, `status`, `category`) VALUES (?, ?, ?, ?, ?, ?, 1, 0)", array( $title, nl2br($txt), $code, $artikul, $partner, $img ) );
$filename=generate_password(30);
echo $img . "\n";
exec("wget https://www.deznet.ru" . $img . " -O /home/cloud/core/img/tk-ligat.ru/tovar/" . $filename . ".jpg");
DB::add("INSERT INTO `tovar_img` (`filename`, `tovar_id`) VALUES (?, ?)", array( $filename, $tovar_id ) );
DB::set("UPDATE `sm2` SET `add`=1 WHERE `id`=?", $LinkList[$i]['html']);
}
unset($id);
unset($title);
unset($txt);
unset($code);
unset($img);
unset($artikil);
}
*/
}
//sleep(120);
//exec ('php desnet_parce.php > 2/1.txt')
?>