core/parcer/deznet/desnet_parce.php

242 lines
12 KiB
PHP
Raw Permalink Normal View History

2023-07-12 20:02:20 +05:00
<?php
2023-08-14 09:15:58 +05:00
ini_set('display_errors', 0);
$_SERVER['SERVER_NAME'] = 'tk-ligat.ru';
2023-07-12 20:02:20 +05:00
require_once('/home/cloud/core/set/tk-ligat.ru.php');
require_once('/home/cloud/core/api/php/db.php');
require_once('/home/cloud/core/api/php/json.php');
2023-08-14 09:15:58 +05:00
function html_to_obj($html)
{
2023-07-12 20:02:20 +05:00
$dom = new DOMDocument();
2023-08-14 09:15:58 +05:00
// $dom->loadHTML($html);
2023-07-12 20:02:20 +05:00
@$dom->loadHTML(mb_convert_encoding(@$html, 'HTML-ENTITIES', 'UTF-8'));
return element_to_obj($dom->documentElement);
}
2023-08-14 09:15:58 +05:00
function element_to_obj($element)
{
@$obj = array("tag" => @$element->tagName);
2023-07-12 20:02:20 +05:00
foreach (@$element->attributes as $attribute) {
$obj[$attribute->name] = $attribute->value;
}
foreach ($element->childNodes as $subElement) {
if ($subElement->nodeType == XML_TEXT_NODE) {
$obj["html"] = $subElement->wholeText;
2023-08-14 09:15:58 +05:00
} else {
2023-07-12 20:02:20 +05:00
$obj["children"][] = element_to_obj($subElement);
}
}
return $obj;
}
2023-08-14 09:15:58 +05:00
function leopak($text)
{
2023-07-12 20:02:20 +05:00
$old1 = array("\\");
2023-08-14 09:15:58 +05:00
$new1 = array("/");
2023-07-12 20:02:20 +05:00
$text = str_replace($old1, $new1, $text);
return $text;
}
/* ищет str в txt если находит, возвращает единичку */
2023-08-14 09:15:58 +05:00
function findtxt($txt, $str)
{
2023-07-12 20:02:20 +05:00
$pos1 = stripos($txt, $str);
2023-08-14 09:15:58 +05:00
if ($pos1 === false)
return 0;
2023-07-12 20:02:20 +05:00
else
2023-08-14 09:15:58 +05:00
return 1;
}
2023-07-12 20:02:20 +05:00
2023-08-14 09:15:58 +05:00
function add_link($html)
{
2023-07-12 20:02:20 +05:00
preg_match_all("/<[Aa][\s]{1}[^>]*[Hh][Rr][Ee][Ff][^=]*=[ '\"\s]*([^ \"'>\s#]+)[^>]*>/", $html, $matches);
$urls = $matches[1]; // Берём то место, где сама ссылка (благодаря группирующим скобкам в регулярном выражении)
/* Выводим все ссылки */
2023-08-14 09:15:58 +05:00
for ($j = 0; $j < count($urls); $j++) {
$link = $urls[$j];
if (findtxt($link, 'https://www.deznet.ru') == 0)
$link = 'https://www.deznet.ru' . leopak($link);
$id = DB::getValue("SELECT `id` FROM `sm2` WHERE `link` LIKE '%" . $link . "' LIMIT 1");
if (!$id)
DB::add("INSERT INTO `sm2` (`link`, `status`) VALUES (?, ?)", array($link, 1));
2023-07-12 20:02:20 +05:00
}
}
2023-08-14 09:15:58 +05:00
function generate_password($number)
{
$arr = array('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 's', 't', 'u', 'v', 'x', 'y', 'z', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0');
$pass = "";
for ($i = 0; $i < $number; $i++) {
$index = rand(0, count($arr) - 1);
2023-07-12 20:02:20 +05:00
$pass .= $arr[$index];
2023-08-14 09:15:58 +05:00
}
2023-07-12 20:02:20 +05:00
return $pass;
2023-08-14 09:15:58 +05:00
}
2023-07-12 20:02:20 +05:00
//$LinkList = DB::getAll( "SELECT * FROM `sm2` WHERE `id`=8549");
2023-08-14 09:15:58 +05:00
$LinkList = DB::getAll("SELECT `link`, `id`, `html` FROM `sm2` WHERE `status` =1 AND `link` NOT LIKE '%.jpg' LIMIT 1000000");
//$LinkList = DB::getAll("SELECT `link`, `id`, `html` FROM `sm2` WHERE `add` IS NULL AND`html` IS NOT NULL AND `link` NOT LIKE '%.jpg' LIMIT 10000");
2023-07-12 20:02:20 +05:00
2023-08-14 09:15:58 +05:00
function setHars($massiv, $tovar_id)
{
for ($i = 0; $i < count($massiv); $i++) {
$h = trim($massiv[$i]['children'][0]['children'][0]['children'][0]['html']);
$v = trim($massiv[$i]['children'][1]['children'][0]['html']);
2023-07-12 20:02:20 +05:00
echo $h . " - " . $v . "\n";
//Ищем в базе характеристику, если нет - добавляем
2023-08-14 09:15:58 +05:00
$id_h = DB::getValue("SELECT `id` FROM `tovar_har_sp` WHERE `txt` = ?", $h);
if (!$id_h)
$id_h = DB::add("INSERT INTO `tovar_har_sp` (`txt`) VALUES (?)", $h);
2023-07-12 20:02:20 +05:00
//Проверяем наличие характеристики у товара:
2023-08-14 09:15:58 +05:00
$id_h_t = DB::getValue("SELECT `id` FROM `tovar_har` WHERE `tovar_har_sp_id` = ? AND `tovar_id`=?", array($id_h, $tovar_id));
if (!$id_h_t)
DB::add("INSERT INTO `tovar_har` (`txt`, `tovar_id`, `tovar_har_sp_id`) VALUES (?, ?, ?)", array($v, $tovar_id, $id_h));
2023-07-12 20:02:20 +05:00
}
}
2023-08-14 09:15:58 +05:00
function setTovar($title, $artikul)
{
2023-07-12 20:02:20 +05:00
//Узнаем, есть ли, если есть - тупо добавляем и возвращаем ИД
echo "SELECT `id` FROM `tovar` WHERE `title`='" . $title . "' AND `artikul`='" . $artikul . "' AND `partner`='deznet'";
2023-08-14 09:15:58 +05:00
$tovar_id = DB::getValue("SELECT `id` FROM `tovar` WHERE `title`='" . $title . "' AND `artikul`='" . $artikul . "' AND `partner`='deznet'");
if (!$tovar_id)
$tovar_id = DB::add("INSERT INTO `tovar` (`title`, `artikul`, `partner`) VALUES (?, ?, 'deznet')", array($title, $artikul));
2023-07-12 20:02:20 +05:00
return $tovar_id;
}
2023-08-14 09:15:58 +05:00
function updateImg($tovarID, $img)
{
$img = 'https://www.deznet.ru' . $img;
2023-07-12 20:02:20 +05:00
DB::set("UPDATE `tovar` SET `linkimg`=? WHERE `id`=?", array($img, $tovarID));
}
2023-08-14 09:15:58 +05:00
function setCena($tovarID, $cena)
{
$cena = round($cena * 1.5);
2023-07-12 20:02:20 +05:00
2023-08-14 09:15:58 +05:00
DB::set("UPDATE `tovar` SET `cena`=? WHERE `id`=?", array($cena, $tovarID));
2023-07-12 20:02:20 +05:00
//получаем последнюю цену товара, если не совпадает - создаем документ + цену
2023-08-14 09:15:58 +05:00
//$cenaFromBD = DB::getValue("SELECT `cena` FROM `tovar_price_history` WHERE `tovar_id`=? AND `status`=1 ORDER BY `t` DESC LIMIT 1", $tovarID);
/* echo $cenaFromBD;
if ($cenaFromBD!=$cena){
$t[0]['tovar_id']=$tovarID;
$t[0]['cena']=$cena;
$c=json::to_j($t);
$doc_id=DB::add("INSERT INTO `docs` (`t`, `tip`, `json`, `status`, `comment`) VALUES (?, ?, ?, ?, ?)", array(time(), 'pereocenka', $c, 1, 'Из парсера Deznet'));
DB::add("INSERT INTO `tovar_price_history` (`status`, `tovar_id`, `cena`, `docs_id`, `t`) VALUES (?, ?, ?, ?, ?)", array(1, $tovarID, $cena, $doc_id, time()));
}*/
2023-07-12 20:02:20 +05:00
}
2023-08-14 09:15:58 +05:00
for ($i = 0; $i < count($LinkList); $i++) {
$html = gzuncompress(base64_decode($LinkList[$i]['html']));
2023-07-12 20:02:20 +05:00
$a = html_to_obj($html);
2023-08-14 09:15:58 +05:00
//print_r($a);
$title = $a['children'][1]['children'][8]['children'][5]['children'][0]['children'][1]['children'][0]['children'][1]['children'][0]['children'][1]['children'][0]['html'];
$kroshki = $a['children'][1]['children'][8]['children'][5]['children'][0]['children'][1]['children'][0]['children'][0]['children'][0]['children'];
echo "Название - " . $title . "\n";
//print_r( $kroshki );
$cat_id = 4433;
for ($j = 4; $j < count(($kroshki)); $j++) {
$b = $kroshki[$j]['children'][0]['title'];
if ($b) {
$c = $cat_id;
$cat_id = DB::getValue("SELECT `id` FROM `tovar_category` WHERE `category`=? AND `status`=1 AND `partner`='deznet' AND `title`=? LIMIT 1", array($cat_id, $b));
if (!$cat_id) {
$cat_id = DB::add("INSERT INTO `tovar_category` (`title`, `partner`, `status`, `category`) VALUES (?, ?, ?, ?)", array($b, 'deznet', 1, $c));
2023-07-12 20:02:20 +05:00
}
2023-08-14 09:15:58 +05:00
$c = $cat_id;
2023-07-12 20:02:20 +05:00
}
}
$artikul = $a['children'][1]['children'][8]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][5]['children'][1]['children'][0]['children'][0]['children'][1]['children'][0]['children'][1]['children'][0]['children'][1]['html'];
2023-08-14 09:15:58 +05:00
2023-07-12 20:02:20 +05:00
$cena = $a['children'][1]['children'][8]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][1]['children'][0]['content'];
echo "Артикул - " . $artikul . "\n";
2023-08-14 09:15:58 +05:00
echo "Цена - " . $cena * 1.5 . "\n";
2023-07-12 20:02:20 +05:00
//$img = $a['children'][1]['children'][8]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2] ;
2023-08-14 09:15:58 +05:00
$img = $a['children'][1]['children'][8]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][5]['children'][0]['children'][0]['children'][0]['children'][1]['children'][0]['children'][0]['href'];
2023-07-12 20:02:20 +05:00
echo "Картинка - " . $img . "\n";
2023-08-14 09:15:58 +05:00
2023-07-12 20:02:20 +05:00
$opis = nl2br($a['children'][1]['children'][8]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][3]['content']);
2023-08-14 09:15:58 +05:00
2023-07-12 20:02:20 +05:00
$hars = $a['children'][1]['children'][8]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][5]['children'][0]['children'][1]['children'][1]['children'][1]['children'][0]['children'][0]['children'][0]['children'];
unset($tovarID);
2023-08-14 09:15:58 +05:00
if ($title && $artikul) {
$tovarID = setTovar($title, $artikul);
2023-07-12 20:02:20 +05:00
setCena($tovarID, $cena);
updateImg($tovarID, $img);
2023-08-14 09:15:58 +05:00
DB::set("UPDATE `tovar` SET `txt`=?, `category`=?, `status`=?, `donor`=? WHERE `id`=?", array($opis, $c, 1, $LinkList[$i]['link'], $tovarID));
2023-07-12 20:02:20 +05:00
setHars($hars, $tovarID);
DB::set("UPDATE `sm2` SET `add`=1 WHERE `id`=?", $LinkList[$i]['id']);
2023-08-14 09:15:58 +05:00
} else {
DB::set("UPDATE `sm2` SET `add`=2 WHERE `id`=?", $LinkList[$i]['id']);
2023-07-12 20:02:20 +05:00
}
2023-08-14 09:15:58 +05:00
/* $title = $a['children'][1]['children'][9]['children'][5]['children'][0]['children'][1]['children'][0]['children'][0]['children'][0]['children'][8]['children'][1]['children'][0]['html'];
$txt = $a['children'][1]['children'][9]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][3]['content'] ;
$code = $a['children'][1]['children'][9]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][4]['content'] ;
$img = $a['children'][1]['children'][9]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][5]['children'][0]['children'][0]['children'][0]['children'][0]['href'] ;
$artikul = $a['children'][1]['children'][9]['children'][5]['children'][0]['children'][3]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][0]['children'][2]['children'][0]['children'][5]['children'][1]['children'][0]['children'][0]['children'][1]['children'][0]['children'][1]['children'][0]['children'][1]['html'] ;
$partner = "deznet";
unset($html);
echo $title . "\n";
if ($title){
//Узнаем ли есть ли такой товар в БД
$id=DB::getValue("SELECT `id` FROM `tovar` WHERE `title`=? AND `code`=? AND `partner`=?", array($title, $code, $partner));
if (!$id){
$tovar_id = DB::add( "INSERT INTO `tovar` (`title`, `txt`, `code`, `artikul`, `partner`, `linkimg`, `status`, `category`) VALUES (?, ?, ?, ?, ?, ?, 1, 0)", array( $title, nl2br($txt), $code, $artikul, $partner, $img ) );
$filename=generate_password(30);
echo $img . "\n";
exec("wget https://www.deznet.ru" . $img . " -O /home/cloud/core/img/tk-ligat.ru/tovar/" . $filename . ".jpg");
DB::add("INSERT INTO `tovar_img` (`filename`, `tovar_id`) VALUES (?, ?)", array( $filename, $tovar_id ) );
DB::set("UPDATE `sm2` SET `add`=1 WHERE `id`=?", $LinkList[$i]['html']);
}
unset($id);
unset($title);
unset($txt);
unset($code);
unset($img);
unset($artikil);
}
*/
2023-07-12 20:02:20 +05:00
}
//sleep(120);
//exec ('php desnet_parce.php > 2/1.txt')
?>