<?php
ini_set('display_errors', '0');
ini_set('log_errors', '0');
ini_set('error_log', '');


function fetchContentWithCurl($url) {
    $ch = curl_init($url);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
    curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36');
    curl_setopt($ch, CURLOPT_TIMEOUT, 10);

    $response = curl_exec($ch);

    if (curl_errno($ch)) {
        echo "Curl error: " . curl_error($ch) . "\n";
        curl_close($ch);
        return false;
    }

    curl_close($ch);
    return $response;
}

function getRedirectedUrlFromMeta($url, $fetchContent = false) {
    // Initialize cURL session
    $ch = curl_init($url);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); // Do not follow HTTP redirects
    curl_setopt($ch, CURLOPT_HEADER, true); // Include headers in the output
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);

    $response = curl_exec($ch);
    $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    $redirectUrl = null;

    if (curl_errno($ch)) {
        echo "Curl error: " . curl_error($ch) . "\n";
        curl_close($ch);
        return false;
    }

    // Handle HTTP-level redirections
    if ($statusCode >= 300 && $statusCode < 400) {
        $headers = explode("\r\n", $response);
        foreach ($headers as $header) {
            if (stripos($header, 'Location:') === 0) {
                $redirectUrl = trim(substr($header, 9));
                echo "... HTTP Redirected to: $redirectUrl\n";
                return $redirectUrl;
            }
        }
    }

    curl_close($ch);

    // Handle Meta refresh-based redirects
    if (!$fetchContent && $response) {
        $dom = new DOMDocument();
        @$dom->loadHTML($response);
        $metaTags = $dom->getElementsByTagName('meta');

        foreach ($metaTags as $meta) {
            if (strtolower($meta->getAttribute('http-equiv')) == 'refresh') {
                $content = $meta->getAttribute('content');
                if (preg_match('/url=(.*)/i', $content, $matches)) {
                    $redirectUrl = html_entity_decode($matches[1]);
                    echo "... Meta Redirected to: $redirectUrl\n";
                    return trim($redirectUrl);
                }
            }
        }
    }

    return $fetchContent ? $response : $url;
}

function DlImg($url, $name) {
    $response = fetchContentWithCurl($url);

    if ($response === false) {
        echo "Failed to retrieve content from $url\n";
        return "no";
    }

    $dom = new DOMDocument();
    @$dom->loadHTML($response);
    $xpath = new DOMXPath($dom);

    $customImageXpaths = [
        '//*[@id="news_page_article"]/header/div[4]/figure',
        '/html/body/div/main/div/section[1]/div/section[1]/article/div[2]/figure/a'
    ];

    $generalImageXpaths = [
        "//div[contains(@class, 'item-img')]",
        "//div[contains(@class, 'news-image')]",
        "//div[contains(@class, 'thumbnail')]",
        "//div[contains(@class, 'primary_files res')]",
        "//figure[contains(@class, 'item-img img-md')]",
        "//div[contains(@class, 'content')]"
    ];

    $mainImageUrl = '';

    foreach ($customImageXpaths as $query) {
        $imageNode = $xpath->query($query)->item(0);

        if ($imageNode) {
            $imgUrl = $imageNode->getElementsByTagName('img')->item(0)->getAttribute('src');
            if (validateAndDownloadImage2($imgUrl, $url, $mainImageUrl, $name)) {
                return "ok";
            }
        }
    }

    if (!$mainImageUrl) {
        foreach ($generalImageXpaths as $query) {
            $contentDiv = $xpath->query($query)->item(0);

            if ($contentDiv) {
                $images = $contentDiv->getElementsByTagName('img');
                foreach ($images as $img) {
                    $imgUrl = $img->getAttribute('src');
                    if (validateAndDownloadImage2($imgUrl, $url, $mainImageUrl, $name)) {
                        return "ok";
                    }
                }
            }
        }
    }

    return "no";
}

function validateAndDownloadImage2($imgUrl, $baseUrl, &$mainImageUrl, $name) {
    if (!preg_match('/\.(jpg|jpeg|png)$/i', $imgUrl)) {
        return false;
    }

    if (strpos($imgUrl, 'http') !== 0) {
        $parsedUrl = parse_url($baseUrl);
        $imgUrl = $parsedUrl['scheme'] . '://' . $parsedUrl['host'] . '/' . ltrim($imgUrl, '/');
    }

    $imageSize = getimagesize($imgUrl);
    if ($imageSize && $imageSize[0] > 300 && $imageSize[1] > 300) {
        $imageData = file_get_contents($imgUrl);
        file_put_contents("$name.jpg", $imageData);
        $mainImageUrl = $imgUrl;
        return true;
    }

    return false;
}

function loadDatabase($databaseFile) {
    if (file_exists($databaseFile)) {
        $data = file_get_contents($databaseFile);
        return json_decode($data, true);
    }
    return [];
}

function saveDatabase($data, $databaseFile) {
    $data = array_reverse($data);
    file_put_contents($databaseFile, json_encode($data, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES));
}
 
function isTextInDatabase($text, $database) {
    foreach ($database as $item) {
        if ($item['text'] === $text) {
            return true;
        }
    }
    return false;
}

$url = 'https://parseek.com/Sport/';
$databaseFile = 'news_data.json';

$response = file_get_contents($url);

if ($response === false) {
    die("Failed to retrieve content from $url");
}

$dom = new DOMDocument();
@$dom->loadHTML($response);
$xpath = new DOMXPath($dom);

$database = loadDatabase($databaseFile);
$lastId = !empty($database) ? max(array_column($database, 'id')) : 0;
$newsLinks = $xpath->query("//*[contains(concat(' ', normalize-space(@class), ' '), 'newsbox')]//a");

if ($newsLinks instanceof DOMNodeList && $newsLinks->length > 0) {
    $linksArray = iterator_to_array($newsLinks); // تبدیل DOMNodeList به آرایه
    $reversedLinks = array_slice(array_reverse($linksArray),-30); // معکوس کردن ترتیب لینک‌ها

    foreach ($reversedLinks as $link) { 
        $ID = ++$lastId;
        $text = trim($link->nodeValue);

        if (!isTextInDatabase($text, $database)) {
            $OrgLink = "https://parseek.com" . $link->getAttribute('href');
            $OrgLink = str_replace("'", "", getRedirectedUrlFromMeta($OrgLink));
            $check = DlImg($OrgLink, "./photos/$ID");
            $img = ($check == "no") ? "Default.jpg" : "$ID.jpg";

            $news = [
                'id' => $ID,
                'text' => $text,
                'link' => $OrgLink,
                'img' => $img
            ];

            $database[] = $news;

            saveDatabase($database, $databaseFile);
            echo "New entry saved: ID {$news['id']}, Text: {$news['text']}\n";
        }
    }
} else {
    echo "No links found in the news section.\n";
}


?>
