File: /www/wwwroot/healthyton.com/wp-scrap/api_mddailyrecord.php
<?php
require 'vendor/autoload.php';
use Goutte\Client;
function strip_tags_blacklist($html, $tags) {
$html = preg_replace('/<'. $tags .'\b[^>]*>(.*?)<\/'. $tags .'>/is', "", $html);
return $html;
}
$url = 'https://mddailyrecord.com/nazanin-kavari-net-worth-boyfriend-husband-kids-height-weight-age-lesbian-bio-2021-2022-2023';
if(isset($_GET['url'])){
$url = $_GET['url'];
}
$prefix = 'hton_';
$meta = array();
$client = new Client();
$crawler = $client->request('GET', $url);
$name = '';
// $full_name = array_shift($crawler->filter('.tablepress .odd .column-1')->extract('_text'));
// if (strpos($full_name, 'Quick Info') == false) {
// echo json_encode(array('error'=>'Data not found'));exit;
// }
// $full_name = trim(str_replace('Quick Info', '', $full_name));
// $meta['hton_background_full_name'] = $full_name;
// $name = array_shift($crawler->filter('h1.entry-title')->extract('_text'));
// META FOR YOAST
try{
$image = $crawler->filterXpath('//meta[@property="og:image"]')->attr('content');
}catch (Exception $e){
$image = '';
}
$tableData = getTableData($crawler);
foreach($tableData as $key => $val){
if($key == 'Real Name/Full Name'){$meta[$prefix.'background_full_name'] = $val;}
if($key == 'Birth Place'){$meta[$prefix.'background_birthplace'] = $val;}
if($key == 'Nick Name/Celebrated Name'){$name = $val;}
if($key == 'Date Of Birth/Birthday'){
$meta[$prefix.'background_dob'] = convertDOB($val);
}
if($key == 'Height/How Tall'){
$meta[$prefix.'body_height'] = preg_replace('~[\r\n]+~', ', ', $val);
}
if($key == 'Weight'){
$meta[$prefix.'body_weight'] = preg_replace('~[\r\n]+~', ', ', $val);
}
if($key == 'Eye Color'){$meta[$prefix.'body_eye_color'] = $val;}
if($key == 'Hair Color'){$meta[$prefix.'body_hair_color'] = $val;}
if($key == 'School'){$meta[$prefix.'education_high_school'] = $val;}
if($key == 'Religion'){$meta[$prefix.'background_religion'] = $val;}
if($key == 'Nationality'){$meta[$prefix.'background_nationality'] = $val;}
if($key == 'Profession'){$meta[$prefix.'background_profession'] = $val;}
if($key == 'Net Worth'){$meta[$prefix.'net_worth'] = $val;}
if($key == 'Gender'){$meta[$prefix.'body_gender'] = $val;}
}
$html = $crawler->filter('.td-ss-main-content')->html();
// First paragraph into H3
$start = strpos($html, '<p>');
$end = strpos($html, '</p>', $start);
$paragraph = substr($html, $start, $end-$start+4);
$paragraph_h3 = str_replace('<p>','<h3>',$paragraph);
$paragraph_h3 = str_replace('</p>','</h3>',$paragraph_h3);
$html = str_replace($paragraph,$paragraph_h3,$html);
$remove_tags_and_content = array('table','figcaption','div','iframe','font','noscript');
foreach($remove_tags_and_content as $tg){
$html = strip_tags_and_content($html, $tg);
}
$html = preg_replace('/(<(script|style)\b[^>]*>).*?(<\/\2>)/is', "$1$3", $html);
$strip_tags = "center|style|span|ins|div|script|a|img|input|button|figure|strong|em|meta|footer|form";
$html = preg_replace('#<a.*?>.*?</a>#i', '', $html);
$html = preg_replace("#<\s*\/?(".$strip_tags.")\s*[^>]*?>#im", '', $html);
// Empty spaces
$html = preg_replace("#<p[^>]*>(\s| |</?\s?br\s?/?>)*</?p>#", '', $html);
// Remove empty paragraphs
$html = str_replace("<strong> </strong>","",$html);
$html = str_replace("<p></p>","",$html);
$html = str_replace("<p><br></p>","",$html);
// Remove New lines
$html = preg_replace('/[\s\t\n]{2,}/', ' ', $html);
$html = str_replace("Save my name, email, and website in this browser for the next time I comment.","",$html);
// Remove HTML Comments
$html = preg_replace('/<!--(.|\s)*?-->/', '', $html);
$html = trim($html);
// echo $html;exit;
$meta_desc = 'Who is '.$name.' ? Also find Personal Life, estimated Net Worth, Salary, Age, Career & Full Biography of '.$name.'.';
$meta['_yoast_wpseo_metadesc'] = $meta_desc;
$meta['_yoast_wpseo_title'] = $name.' - Bio, Net Worth, Salary Age, Height, Weight, Wiki, Health, Facts and Family';
$fields['name'] = $name;
$fields['image'] = $image;
$fields['meta'] = $meta;
$fields['body'] = $html;
header('Content-Type: application/json');
echo json_encode($fields);
exit;
function getTableData($crawler){
// PULL META
$count = 0;
$list = [];
$output = [];
$crawler->filter('table tr td')->each(function($node) use (&$list, &$count) {
$list[] = $node->text();
$count++;
});
$c=0;
foreach($list as $o){
if(isset($list[$c+1])){
$output[trim($list[$c],':')] = $list[$c+1];
}
$c += 2;
}
return $output;
}
function replaceNbspWithSpace($content){
$string = htmlentities($content, null, 'utf-8');
$content = str_replace(" ", " ", $string);
$content = html_entity_decode($content);
return $content;
}
// Date covert
function convertDOB($datestr,$format = 'Y-m-d'){
$datestr = str_replace('th',' ',$datestr);
// $datestr = str_replace(' ',' ',$datestr);
$datestr = replaceNbspWithSpace($datestr);
$dates = explode(' ',$datestr);
$month = date_parse(trim($dates[1]));
return $dates[2].'-'.$month['month'].'-'.$dates[0];
}
function clean($str)
{
$str = utf8_decode($str);
$str = str_replace(" ", "", $str);
$str = preg_replace("/\s+/", " ", $str);
$str = trim($str);
return $str;
}
function convertWeight($weight){
if (strpos($weight, 'kg') !== false) {
return $weight;
}else{
return false;
}
}
function convertHeight($height){
if (strpos($height, 'ft') !== false) {
return $height;
}else{
return false;
}
}
function strip_tags_and_content($html,$tag){
return preg_replace('/<'.$tag.'[^>]*>([\s\S]*?)<\/'.$tag.'[^>]*>/', '', $html);
}
function curlTest(){
$url = 'https://healthyton.com';
$ch = curl_init();
$timeout = 10;
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
function spinbot($textToSpin){
// echo $textToSpin;exit;
// debug(curlTest());
$url = 'https://api.spinbot.com';
$header = array();
$spinbotApiKey = '87b0d1bb9b3c420381ee141837cbc7f5';
$header[] = "x-auth-key:$spinbotApiKey";
$header[] = 'x-spin-cap-words:true';
$header[] = 'x-words-to-skip:rewrit,nonExistentWordPart';
$header[] = 'x-min-percent-change-per-sentence:any';
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $textToSpin);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
curl_setopt($ch, CURLOPT_VERBOSE, 1);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
// echo 'curl here';
$response = curl_exec($ch);
// debug($response);
curl_close($ch);
return $response;
}
function stringClean($str)
{
$str = utf8_decode($str);
$str = str_replace(" ", "", $str);
$str = preg_replace("/\s+/", " ", $str);
$str = trim($str);
return $str;
}
function cleanDate($date){
$date = trim($date);
$date = str_replace(',','',$date);
$date = str_replace('.','',$date);
$date = str_replace('th','',$date);
$date = str_replace('st','',$date);
$date = str_replace('rd','',$date);
$date = str_replace('<sup></sup>','',$date);
try{
$dt = new DateTime($date);
}catch (Exception $e){
debug($e);
}
return $dt->format('Y-m-d');
}
function putHeadersTextIntoArray($header_text) {
$headers = array();
foreach (explode("\r\n", $header_text) as $i => $line)
if ($i === 0) {
$headers['http_code'] = $line;
} else {
list ($key, $value) = explode(': ', $line);
$headers[$key] = $value;
}
return $headers;
}
function debug($arr, $exit = true){
echo '<pre>';
print_r($arr);
echo '</pre>';
if($exit) exit;
}
function get_meta($crawler, $meta_name){
$meta_info = array_shift($crawler->filterXpath('//td[@itemprop="'.$meta_name.'"]')->extract('_text'));
return $meta_info;
}
function splitName($name) {
$parts = explode(' ', $name);
return array(
'firstname' => array_shift($parts),
'lastname' => array_pop($parts),
'middlename' => join(' ', $parts)
);
}