File: /www/wwwroot/healthyton.com/wp-scrap/api_healthyceleb.php
<?php
require 'vendor/autoload.php';
use Goutte\Client;
function strip_tags_blacklist($html, $tags) {
$html = preg_replace('/<'. $tags .'\b[^>]*>(.*?)<\/'. $tags .'>/is', "", $html);
return $html;
}
$url = 'https://healthyceleb.com/myra-molloy/';
if(isset($_GET['url'])){
$url = $_GET['url'];
}
$prefix = 'hton_';
$meta = array();
$client = new Client();
$crawler = $client->request('GET', $url);
$full_name = array_shift($crawler->filter('.tablepress .odd .column-1')->extract('_text'));
if (strpos($full_name, 'Quick Info') == false) {
echo json_encode(array('error'=>'Data not found'));exit;
}
$full_name = trim(str_replace('Quick Info', '', $full_name));
$meta['hton_background_full_name'] = $full_name;
$name = array_shift($crawler->filter('h1.entry-title')->extract('_text'));
// META FOR YOAST
$meta_desc = 'Who is '.$name.' ? View the latest Biography of '.$name.' and also find Personal Life, estimated Net Worth, Salary, Age, Career & More.';
$meta['_yoast_wpseo_metadesc'] = $meta_desc;
try{
$image = $crawler->filterXpath('//meta[@property="og:image"]')->attr('content');
}catch (Exception $e){
$image = '';
}
// PULL META
$count = 0;
$crawler->filter('.tablepress tr td')->each(function($node) use (&$meta, &$count) {
if($count == 1){ // Weight
$height = convertHeight($node->text());
if($height){
$meta['hton_body_height'] = $height;
}
}else if($count == 3){ // Height
$weight = convertWeight($node->text());
if($weight){
$meta['hton_body_weight'] = $weight;
}
}else if($count == 5){
$date = convertDOB($node->text());
if($date){
$meta['hton_background_dob'] = $date;
}
}
$count++;
// $node->filter('td')->each(function($nodeChild) use(&$meta){
// if($nodeChild->text() == ''){
// echo 'its height';exit;
// }
// });
});
// $meta[$prefix.'background_full_name'] = get_meta($crawler, 'name');
// $meta[$prefix.'background_dob'] = get_meta($crawler, 'birthDate');
// $meta[$prefix.'body_gender'] = get_meta($crawler, 'gender');
// $meta[$prefix.'background_profession'] = get_meta($crawler, 'jobTitle');
// $meta[$prefix.'background_nicknames'] = get_meta($crawler, 'givenName');
$html = $crawler->filter('.td-post-content')->html();
// REPLACE h3 with h2
// $html = str_replace('<h3>','<h2>',$html);
// $html = str_replace('</h3>','</h2>',$html);
// First paragraph into H3
$start = strpos($html, '<p>');
$end = strpos($html, '</p>', $start);
$paragraph = substr($html, $start, $end-$start+4);
$paragraph_h3 = str_replace('<p>','<h3>',$paragraph);
$paragraph_h3 = str_replace('</p>','</h3>',$paragraph_h3);
$html = str_replace($paragraph,$paragraph_h3,$html);
$remove_tags_and_content = array('table','figcaption','div','iframe','font','noscript');
foreach($remove_tags_and_content as $tg){
$html = strip_tags_and_content($html, $tg);
}
$html = preg_replace('/(<(script|style)\b[^>]*>).*?(<\/\2>)/is', "$1$3", $html);
$strip_tags = "center|style|span|ins|div|script|a|img|input|button|figure|strong|em";
$html = preg_replace('#<a.*?>.*?</a>#i', '', $html);
$html = preg_replace("#<\s*\/?(".$strip_tags.")\s*[^>]*?>#im", '', $html);
// Empty spaces
$html = preg_replace("#<p[^>]*>(\s| |</?\s?br\s?/?>)*</?p>#", '', $html);
// Remove empty paragraphs
$html = str_replace("<strong> </strong>","",$html);
$html = str_replace("<p></p>","",$html);
$html = str_replace("<p><br></p>","",$html);
// Remove New lines
$html = preg_replace('/[\s\t\n]{2,}/', ' ', $html);
// Remove HTML Comments
$html = preg_replace('/<!--(.|\s)*?-->/', '', $html);
$html = trim($html);
$fields['name'] = $name;
$fields['image'] = $image;
$fields['meta'] = $meta;
$fields['body'] = $html;
header('Content-Type: application/json');
echo json_encode($fields);
exit;
// Date covert
function convertDOB($datestr,$format = 'Y-m-d'){
if(strtotime($datestr)){
$date = date($format, strtotime($datestr));
return $date;
}else{
return false;
}
}
function convertWeight($weight){
if (strpos($weight, 'kg') !== false) {
return $weight;
}else{
return false;
}
}
function convertHeight($height){
if (strpos($height, 'ft') !== false) {
return $height;
}else{
return false;
}
}
function strip_tags_and_content($html,$tag){
return preg_replace('/<'.$tag.'[^>]*>([\s\S]*?)<\/'.$tag.'[^>]*>/', '', $html);
}
function curlTest(){
$url = 'https://healthyton.com';
$ch = curl_init();
$timeout = 10;
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
function spinbot($textToSpin){
// echo $textToSpin;exit;
// debug(curlTest());
$url = 'https://api.spinbot.com';
$header = array();
$spinbotApiKey = '87b0d1bb9b3c420381ee141837cbc7f5';
$header[] = "x-auth-key:$spinbotApiKey";
$header[] = 'x-spin-cap-words:true';
$header[] = 'x-words-to-skip:rewrit,nonExistentWordPart';
$header[] = 'x-min-percent-change-per-sentence:any';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $textToSpin);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
// echo 'curl here';
$response = curl_exec($ch);
// debug($response);
curl_close($ch);
return $response;
}
function stringClean($str)
{
$str = utf8_decode($str);
$str = str_replace(" ", "", $str);
$str = preg_replace("/\s+/", " ", $str);
$str = trim($str);
return $str;
}
function cleanDate($date){
$date = trim($date);
$date = str_replace(',','',$date);
$date = str_replace('.','',$date);
$date = str_replace('th','',$date);
$date = str_replace('st','',$date);
$date = str_replace('rd','',$date);
$date = str_replace('<sup></sup>','',$date);
try{
$dt = new DateTime($date);
}catch (Exception $e){
debug($e);
}
return $dt->format('Y-m-d');
}
function putHeadersTextIntoArray($header_text) {
$headers = array();
foreach (explode("\r\n", $header_text) as $i => $line)
if ($i === 0) {
$headers['http_code'] = $line;
} else {
list ($key, $value) = explode(': ', $line);
$headers[$key] = $value;
}
return $headers;
}
function debug($arr, $exit = true){
echo '<pre>';
print_r($arr);
echo '</pre>';
if($exit) exit;
}
function get_meta($crawler, $meta_name){
$meta_info = array_shift($crawler->filterXpath('//td[@itemprop="'.$meta_name.'"]')->extract('_text'));
return $meta_info;
}
function splitName($name) {
$parts = explode(' ', $name);
return array(
'firstname' => array_shift($parts),
'lastname' => array_pop($parts),
'middlename' => join(' ', $parts)
);
}