File: /www/wwwroot/healthyton.com/wp-scrap/spinbot_get_heightline.php
<?php
require 'vendor/autoload.php';
use Goutte\Client;
function strip_tags_blacklist($html, $tags) {
$html = preg_replace('/<'. $tags .'\b[^>]*>(.*?)<\/'. $tags .'>/is', "", $html);
return $html;
}
$url = 'https://heightline.com/ariana-grande-height-weight-dress-and-shoe-size/';
if(isset($_GET['url'])){
$url = $_GET['url'];
}
$client = new Client();
$crawler = $client->request('GET', $url);
$name = array_shift($crawler->filter('h1.tdb-title-text')->extract('_text'));
try{
$image = $crawler->filterXpath('//meta[@property="og:image"]')->attr('content');
}catch (Exception $e){
$image = '';
}
$category = array_shift($crawler->filter('a.tdb-entry-category')->extract('_text'));
$html = $crawler->filter('div.tdb_single_content')->html();
$html = preg_replace('/(<(script|style)\b[^>]*>).*?(<\/\2>)/is', "$1$3", $html);
$strip_tags = "center|style|span|ins|div|script|a|img|input|button|figure";
// remove link
$html = preg_replace('#<a.*?>.*?</a>#i', '', $html);
$html = str_replace('See Also:','',$html);
$html = str_replace('Read Also:','',$html);
//$html = str_replace('\n','',$html);
$html = preg_replace("#<\s*\/?(".$strip_tags.")\s*[^>]*?>#im", '', $html);
$strip_tags = "figcaption";
/*$html = preg_replace("#<\s*\/?(".$strip_tags.")\s*[^>]*?>#im", '', $html);*/
$html = preg_replace('/(<(figcaption)\b[^>]*>).*?(<\/\2>)/is', "$1$3", $html);
$html = preg_replace('/<figcaption[^>]*>([\s\S]*?)<\/figcaption[^>]*>/', '', $html);
// Remove empty paragraphs
$html = str_replace("<strong> </strong>","",$html);
$html = str_replace("<p></p>","",$html);
$html = trim($html);
$textToSpin = str_replace('</div>','',$html);
$url = 'https://api.spinbot.com';
$header = array();
$spinbotApiKey = '87b0d1bb9b3c420381ee141837cbc7f5';
$header[] = "x-auth-key:$spinbotApiKey";
// optional header values
$header[] = 'x-spin-cap-words:true';
$header[] = 'x-words-to-skip:rewrit,nonExistentWordPart';
$header[] = 'x-min-percent-change-per-sentence:any';
// Execute cURL request, get response
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $textToSpin);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
$response = curl_exec($ch);
curl_close($ch);
//echo $response;exit;
// Make the response readable
list($strResponseHeaders, $strResponseBody) = explode("\r\n\r\n", $response, 2);
list($strResponseHeaders, $strResponseBody) = explode("\r\n\r\n", $strResponseBody, 2);
$aHeaders = putHeadersTextIntoArray($strResponseHeaders);
//echo $strResponseHeaders;
//print_r($aHeaders);
//echo $strResponseBody;exit;
//$out = substr(strstr($strResponseBody, '<p>'), strlen('<p>'));
//echo $out;exit;
$fields['name'] = trim($name);
$fields['image'] = $image;
$fields['category'] = $category;
$fields['body'] = $strResponseBody;
$fields['available-spins'] = $aHeaders['available-spins'];
header('Content-Type: application/json');
echo json_encode($fields);
function putHeadersTextIntoArray($header_text) {
$headers = array();
foreach (explode("\r\n", $header_text) as $i => $line)
if ($i === 0) {
$headers['http_code'] = $line;
} else {
list ($key, $value) = explode(': ', $line);
$headers[$key] = $value;
}
return $headers;
}
?>