Photo) === 0) { break; } // Loop back photos foreach ($sxe->Photo as $photo) { // SimpleXML to stdClass yield json_decode(json_encode($photo)); } // If less photos than pageSize, break the loop to not have to go to an empty page if (count($sxe->Photo) < $pageSize) { break; } $page++; } curl_close($curl); } function getPhotoGalleryMetadataByUrl(string $url, string $takenOn = null) : stdClass { // Set up a dom document and fetch the url $doc = new DOMDocument(); $doc->loadHTMLFile($url); // Assume that the page has a title element, grab the first one and trim it $pageTitle = $doc->getElementsByTagName('title')->item(0)->textContent; $pageTitle = trim(str_replace('Sportgardens fotogalleri |', '', $pageTitle)); // Match out date from title preg_match('#(\d\d\d\d-?\d\d-?\d\d)#', $pageTitle, $matches); // We got gallery date $galleryDate = $matches[0]; // Clean the date out of the title $galleryTitle = str_replace($galleryDate, '', $pageTitle); $galleryTitle = preg_replace('#\d.+\d#', '', $galleryTitle); $galleryTitle = preg_replace('#\s-#', '', $galleryTitle); $galleryTitle = preg_replace('#\s+#', ' ', $galleryTitle); // If dateformat is without dashes, reformat it if (strlen($galleryDate) === 8) { $galleryDate = sprintf( '%d-%02d-%02d', substr($galleryDate, 0, 4), substr($galleryDate, 4, 2), substr($galleryDate, 6, 2), ); } if (empty($galleryDate) && $takenOn) { $galleryDate = date('Y-m-d', strtotime($takenOn)); } // Put metadata together return (object) [ 'url' => $url, 'date' => $galleryDate, 'group' => trim($galleryTitle), ]; } function getPhotoSetUrlsByPhotoSetId(int $photoSetId) : \Generator { $photoSetUrls = []; foreach (getPhotosByPhotoSetId($photoSetId) as $photo) { $galleryUrl = preg_replace('#/\w+$#', '', $photo->PageUrl); if (!isset($photoSetUrls[$galleryUrl])) { $photoSetUrls[$galleryUrl] = getPhotoGalleryMetadataByUrl($galleryUrl, $photo->TakenOn); yield $photoSetUrls[$galleryUrl]; } } } // Map years to photo gallery ids $photoGalleryIds = [ 2016 => 3742586428944470221, 2017 => 3742586429170821662, 2018 => 3742586428889977272, ]; // Check arguments if ($argv[1] === '--year' && isset($photoGalleryIds[$argv[2]])) { // Loop through all galleries foreach (getPhotoSetUrlsByPhotoSetId($photoGalleryIds[$argv[2]]) as $photoGallery) { echo $photoGallery->group.' - '.$photoGallery->date.' - '.$photoGallery->url.PHP_EOL; } return; } // Print help echo 'Usage: php '.$argv[0].' --year [year]'.PHP_EOL; echo 'Valid years: '.implode(', ', array_keys($photoGalleryIds)).PHP_EOL;