data scraping travel information using PHP

Goodmorning,

I have a question about data scraping using php.
For a travel agency I need to scrape two websites wherby my contractor wants information about the competetion. specific information the need is all the trips that offered/ prices and the attandance.

I also received a php script that I can use but it is a bit technical for me.
Is there someone that can help me to adjust the php script?

Thanks in advance,

Tried to adjust script but didn’t work unfortunatly.

<?php


namespace ScraperNRV;

use GuzzleHttpClient;
use PsrHttpMessageResponseInterface;

class Sawadee extends Scraper
{
    private $client;
    private $maxPages = 30;
    /**
     * @var Cookie
     */
    private $cookies;

   
   # protected $to = 

    /**
     * TUI constructor.
     * @param Cookie $cookies
     */
    public function __construct(Cookie $cookies)
    {
        $this->cookies = $cookies;
    }

    public function scrape()
    {
        $this->fetchUris();
        print_r($this->products);die;
        $onlyFirst = true;
        foreach ($this->products as $product) {
            try {
                //                if ($onlyFirst) {
                $this->updateProductInfo($product['url']);
                $onlyFirst = false;
                //                }
            } catch (Throwable $e) {
                echo $e->getMessage() ."n";
            }
        }
        $this->generateCsv();
        $this->sendMail();
    }

    private $iniUrl = 'reizen/?_smstate=1$5_28_65';
    private $searchPageUrl = 'reizen/?page=';
    private function fetchUris()
    {
        $options = [];
        $response = $this->getClient()->request("GET", $this->iniUrl, $options);
        $this->cookies->update($response);
        $this->parseSearchPage($response);
        for ($p = 2; $p < $this->maxPages; $p++) {
            try {
                $response = $this->getClient()->request("GET", $this->searchPageUrl . $p, $this->getOptions(true));
                if ($response->getStatusCode() !== 200) {
                    echo "Parsed page $p gives http code " . $response->getStatusCode() . "n";
                    break;
                }
                $this->cookies->update($response);
                $this->parseSearchPage($response);
                $wait = rand(1, 5);
                echo "Parsed page $p of {$this->maxPages}, waiting $wait sec.n";
                //                sleep($wait);
            } catch (Throwable $e) {
                echo $e->getMessage() . "n" . $e->getTraceAsString() . "n";
            }
        }
    }

    private $initGridUrl  = 'data/pricegrid/pricegridprices/?clearprices=1';
    private $selectionUrl = 'data/pricegrid/selection/';
    private $receiptUrl   = 'data/pricegrid/priceselect/';
    public function updateProductInfo(string $url)
    {
        $startTime = microtime(true);
        echo "Get page $url - " . date("Y-m-d H:i:s") . "n";
        $this->products[$url] = [
            'url'    => $url,
            'airline' => "",
            'prices' => [],
        ];
        $response = $this->getClient()->request("GET", $url, $this->getOptions(true));
        $this->cookies->update($response);
        if ($response->getStatusCode() !== 200) {
            echo "Parsed page $url gives http code " . $response->getStatusCode() . "n";
            return;
        }
        $dom = new DOMDocument();
        @$dom->loadHTML($response->getBody()->getContents());
        $xpath = new DOMXPath($dom);
        $html = $xpath->query("//html");
        $entityId = $html->item(0)->getAttribute('data-tmd');
        list($theme, $type, $id) = explode("_", $entityId);
        $h1 = $xpath->query("//h1");
        $this->products[$url]['name'] = trim($h1->item(0)->nodeValue);
        $crumbs = $xpath->query("//ul[contains(@class,'crumbtrail')]/li");
        $this->products[$url]['country'] = trim($crumbs->item(0)->nodeValue);
        $response = $this->getClient()->request("POST", $this->initGridUrl, $this->getOptions(true, true, [
            'masterentitytype' => $type,
            'masterentityid'   => $id,
            'theme'            => $theme,
            'firstview'        => 'true',
        ]));
        $this->cookies->update($response);
        if ($response->getStatusCode() !== 200) {
            echo "Parsed page $url gives http code " . $response->getStatusCode() . "n";
            return;
        }
        $this->parsePriceInfo($url, $entityId, $response->getBody()->getContents());
        $proceed = true;
        $direction = 'earlier';
        $previousMessage = '';
        while ($proceed) {
            echo "Price page $url -> $directionn";
            try {
                $response = $this->getClient()->request("POST", $this->selectionUrl, $this->getOptions(true, true, [
                    'MoveRelativeDates' => $direction,
                    'Entity'            => $entityId,
                ]));
            } catch (Throwable $e) {
                echo $e->getMessage() . "n";
                $proceed = false;
                continue;
            }

            $this->cookies->update($response);
            if ($response->getStatusCode() !== 200) {
                echo "Parsed page $url gives http code " . $response->getStatusCode() . "n";
                $proceed = false;
                continue;
            }
            $content = $response->getBody()->getContents();
            if ($previousMessage == $content) {
                if ($direction === 'earlier') {
                    $direction = 'later';
                    continue;
                } else {
                    $proceed = false;
                    continue;
                }
            }
            $previousMessage = $content;
            $json = json_decode($content, true);
            if (!($json['pricegrid'] ?? false)) {
                if ($direction === 'earlier') {
                    $direction = 'later';
                    continue;
                } else {
                    $proceed = false;
                    continue;
                }
            }
            $this->parsePriceInfo($url, $entityId, $json['pricegrid']);
            echo "Price page $url -> $direction donen";
        }
        echo "UpdateProductInfo - " . (microtime(true) - $startTime) . "n";
    }

    private function parsePriceInfo(string $url, string $entityId, string $content)
    {
        $startTime = microtime(true);
        $dom = new DOMDocument();
        @$dom->loadHTML($content);
        $xpath = new DOMXpath($dom);
        $toolTip = $xpath->query("//*[boolean(@data-tui-tooltip-element)]/..");

        foreach ($toolTip as $details) {
            $priceId = $details->getElementsByTagName('span')->item(0)->getAttribute('rev');
            $price = $details->getElementsByTagName('span')->item(0)->nodeValue;
            $date = "";
            $occupation = "";
            $duration = "";
            $board = "";
            $transport = "";
            $departure = "";
            foreach ($details->getElementsByTagName('tr') as $details) {
                $fields = $details->getElementsByTagName('td');
                $name   = $fields->item(0)->nodeValue;
                $value  = $fields->item(1)->nodeValue;
                if (strripos($name, 'bezet') !== false) {
                    $occupation = trim(str_ireplace('n', '', str_ireplace('volwassene', '', $value)));
                } elseif (strripos($name, 'duur') !== false) {
                    $duration = (int) trim(str_ireplace('dgn', '', $value)) - 1;
                } elseif (strripos($name, 'verzorging') !== false) {
                    $board = $value;
                } elseif (strripos($name, 'vervoer') !== false) {
                    $transport = $value;
                } elseif (strripos($name, 'datum') !== false) {
                    $date = $this->parseDate($value);
                } elseif (strripos($name, 'vanaf') !== false) {
                    $departure = $value;
                }
            }
            $datePrice = $this->products[$url]['prices'][$date] ?? [
                    'first'      => true,
                    'date'       => $date,
                    'occupation' => $occupation,
                    'duration'   => $duration,
                    'board'      => $board,
                    'transport'  => $transport,
                    'departure'  => $departure,
                    'price'      => $price,
                    'outbound'   => '',
                    'inbound'    => '',
                    'pax'    => '',
                ];
            if ($price < $datePrice['price'] || $datePrice['first']) {
                //                $info = [];
                //                if (strtolower($transport) == 'vliegtuig') {
                $info = $this->getReceipt($entityId, $priceId);
                //                }
                $datePrice = [
                    'first'      => false,
                    'date'       => $date,
                    'occupation' => $occupation,
                    'duration'   => $duration,
                    'board'      => $board,
                    'transport'  => $transport,
                    'departure'  => $departure,
                    'price'      => $price,
                    'outbound'   => $info['outbound'] ?? '',
                    'inbound'    => $info['inbound'] ?? '',
                    'pax'        => $info['pax'] ?? '',
                ];
            }
            $this->products[$url]['prices'][$date] = $datePrice;
            //error_log(print_r([$date, $occupation, $duration, $board, $transport, $departure, $price], true));
        }
        echo "parsePriceInfo - " . (microtime(true) - $startTime) . "n";
    }

    private function getReceipt(string $entityId, string $priceId): array
    {
        $startTime = microtime(true);
        if (empty($priceId)) {
            echo "Empty price id givenn";
            return ['outbound' => "", 'inbound' => ""];
        }
        $response = $this->getClient()->request("POST", $this->receiptUrl, $this->getOptions(true, true, [
            'PriceSelectionId'  => $priceId,
            'Entity'            => $entityId,
        ]));
        $this->cookies->update($response);
        if ($response->getStatusCode() !== 200) {
            echo "Parsed receipt for price id $priceId gives http code " . $response->getStatusCode() . "n";
            return [];
        }
        $content = $response->getBody()->getContents();
        $json = json_decode($content, true);
        if (!($json['pricegrid'] ?? false)) {
            return [];
        }
        $outbound = [];
        $inbound = [];
        $pax = "";
        $dom = new DOMDocument();
        @$dom->loadHTML($json['pricegrid']);
        $xpath = new DomXPath($dom);

        $classname = "dep-loc";
        $nodes = $xpath->query("//*[contains(@class, '$classname')]");
        if ($nodes->length == 2) {
            $outbound[] = $nodes->item(0)->nodeValue;
            $inbound[] = $nodes->item(1)->nodeValue;
        }

        $classname = "arr-loc";
        $nodes = $xpath->query("//*[contains(@class, '$classname')]");
        if ($nodes->length == 2) {
            $outbound[] = $nodes->item(0)->nodeValue;
            $inbound[] = $nodes->item(1)->nodeValue;
        }
        $classname = "trnsprt";
        $nodes = $xpath->query("//*[contains(@class, '$classname')]");
        if ($nodes->length == 2) {
            $outbound[] = $nodes->item(0)->nodeValue;
            $inbound[] = $nodes->item(1)->nodeValue;
        }
        $classname = "grp-cmpstn-cnt";
        $nodes = $xpath->query("//*[contains(@class, '$classname')]/@data-totalpassengers");
        if ($nodes->length == 1) {
            $pax = $nodes->item(0)->nodeValue;
        }
        echo "getReceipt - " . (microtime(true) - $startTime) . "n";
        return ['outbound' => implode("- ", $outbound), 'inbound' => implode("- ", $inbound), 'pax' => $pax];
    }

    private function parseSearchPage(ResponseInterface $response)
    {
        $dom = new DOMDocument();
        @$dom->loadHTML($response->getBody()->getContents());
        $xpath = new DOMXPath($dom);
        $buttons = $xpath->query("//div[contains(@class,'pricelabel')]/a");

        foreach ($buttons as $button) {
            $this->products[$button->getAttribute('href')] = [
                'url'    => $button->getAttribute('href'),
                'prices' => [],
            ];
        }
    }

    private function getClient(): Client
    {
        if ($this->client === null) {
            return new Client([
                // Base URI is used with relative requests
                'base_uri' => "https://www.tui.nl/",
                $this->getOptions(),
            ]);
        }
        return $this->client;
    }

    private function getOptions(bool $addCookie = false, bool $ajax = false, array $params = []): array
    {
        $options = ['headers' => [
            'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Safari/605.1.15',
            'Host' => 'www.tui.nl',
            'Accept-Encoding' => 'gzip, deflate, br',
            'Connection' => 'keep-alive',
        ],
                    'decode_content' => true];
        if ($addCookie) {
            $options['headers']['Cookie'] = $this->cookies->toString();
        }
        if ($ajax) {
            $options['headers']['X-TS-AJAX-Request'] = "true";
            $options['headers']['X-Requested-With'] = "XMLHttpRequest";
        }
        if (!empty($params)) {
            $options['form_params'] = $params;
        }
        return $options;
    }

    private $months = [
        'januari'   => '01',
        'februari'  => '02',
        'maart'     => '03',
        'april'     => '04',
        'mei'       => '05',
        'juni'      => '06',
        'juli'      => '07',
        'augustus'  => '08',
        'september' => '09',
        'oktober'   => '10',
        'november'  => '11',
        'december'  => '12',
    ];

    private function parseDate(string $date): string
    {
        $parts = explode(' ', $date);
        return sprintf("%s-%s-%02d", $parts[3], $this->months[strtolower($parts[2])], $parts[1]);
    }
}