File: /www/wwwroot/healthyton.com/wp-scrap/single.php
<?php
require 'vendor/autoload.php';
use Goutte\Client;
$hostname = "localhost";
$base_url = "http://netwp.test/";
$username = "root";
$password = "root";
$dbname = "project_netwp";
$dbhandle = mysqli_connect($hostname, $username, $password,$dbname)
or die("Unable to connect to MySQLi");
// SAVE THUMBNAIL IMAGE
$post_fields['post_author'] = 1;
$post_fields['post_date'] = date('Y-m-d H:i:s');
$post_fields['post_date_gmt'] = date('Y-m-d H:i:s');;
$post_fields['post_modified'] = date('Y-m-d H:i:s');;
$post_fields['post_modified_gmt'] = date('Y-m-d H:i:s');;
$post_fields['post_title'] = 'Ashok PHOTO URL'; // @Todo photo
$post_fields['post_type'] = 'attachment';
$post_fields['post_mime_type'] = 'image/jpeg';
$post_fields['guid'] = 'image.path'; // @Todo image path save
$post_fields['post_status'] = 'inherit';
$post_fields['post_parent'] = 57; // @Todo parent id
$post_fields['post_name'] = seo_dash_url($post_fields['post_title']);
$post_fields['comment_status'] = 'closed';
$post_fields['ping_status'] = 'closed';
$url = 'https://networthpost.com/wp-content/uploads/profiles/2015-11-16/James-Arness-net-worth.jpeg';
$file_ext = pathinfo($url, PATHINFO_EXTENSION);
$image_path = 'wp-content/uploads/'.date('Y').'/'.date('m').'/'.$post_fields['post_name'].'.'.$file_ext;
$img = dirname(__DIR__).'/'.$image;
$ch = curl_init($url);
$fp = fopen($img, 'wb');
curl_setopt($ch, CURLOPT_FILE, $fp);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_exec($ch);
curl_close($ch);
fclose($fp);
$post_fields['guid'] = $base_url.$image_path;
createWPPost($dbhandle, $post_fields); exit;
// echo getCategoryId($dbhandle,'Politician');exit;
$url = "https://networthpost.com/james-arness-net-worth/";
$fields = array();
$mapping = array();
$mapping['Full Name'] = 'qfacts_full_name';
$mapping['Net Worth'] = 'qfacts_net_worth';
$mapping['Date Of Birth'] = 'qfacts_dob';
$mapping['Died'] = 'qfacts_died_on';
$mapping['Place Of Birth'] = 'qfacts_birth_place';
$mapping['Height'] = 'qfacts_height';
$mapping['Weight'] = 'qfacts_weight';
$mapping['Profession'] = 'qfacts_profession';
$mapping['Education'] = 'qfacts_education';
$mapping['Nationality'] = 'qfacts_nationality';
$mapping['Spouse'] = 'qfacts_spouse';
$mapping['Children'] = 'qfacts_children';
$mapping['Spouse'] = 'qfacts_spouse';
$mapping['Parents'] = 'qfacts_parents';
$mapping['Siblings'] = 'qfacts_siblings';
$mapping['Nicknames'] = 'qfacts_nicknames';
$mapping['Awards'] = 'qfacts_awards';
$mapping['Nominations'] = 'qfacts_nominations';
$mapping['Movies'] = 'qfacts_movies';
$mapping['TV Shows'] = 'qfacts_tv_shows';
$client = new Client();
$crawler = $client->request('GET', $url);
$name = str_replace(' Net Worth','',array_shift($crawler->filter('h1.post-title')->extract('_text')));
$image = array_shift($crawler->filter('.entry-content img.g_banner_div')->extract('src'));
$category = $crawler->filterXpath('//meta[@property="article:section"]')->attr('content');
// GET TAGS
$tags = $crawler->filter('span.tagcloud a')->extract('_text');
$post_fields['post_author'] = 1;
$post_fields['post_date'] = date('Y-m-d H:i:s');
$post_fields['post_date_gmt'] = date('Y-m-d H:i:s');;
$post_fields['post_modified'] = date('Y-m-d H:i:s');;
$post_fields['post_modified_gmt'] = date('Y-m-d H:i:s');;
$post_fields['post_title'] = $name;
$post_fields['post_status'] = 'draft';
$post_fields['post_name'] = seo_dash_url($post_fields['post_title']);
$post_fields['comment_status'] = 'closed';
$post_fields['ping_status'] = 'closed';
$post_fields['category'] = $category;
$post_fields['tags'] = $tags;
createWPPost($dbhandle, $post_fields); exit;
// CHECK IF NAME EXISTS
// if(!isPostExists($dbhandle,'Ashok Basnet')){
// echo 'new data';
// }
$tabs = $crawler->filter('.container-wrapper ul.tabs-menu li')->extract('_text');
$tabs_href = $crawler->filter('.container-wrapper ul.tabs-menu li')->extract('href');
foreach($tabs as $tab){
$list = array();
switch($tab){
case 'Structural Info':
$structural_info = $crawler->filter('#tab-content-0 table td')->extract('_text');
for($i = 0; $i < count($structural_info); $i+=2){
$fields[$mapping[$structural_info[$i]]] = $structural_info[$i+1];
}
break;
case 'Trademarks':
$tab_info = $crawler->filter('table.trademarks td')->extract('_text');
$j = 0;
for($i = 0; $i < count($tab_info); $i+=2){
$list[$j] = $tab_info[$i+1];
$j++;
}
$fields['trademarks'] = json_encode($list);
break;
case 'Quotes':
$tab_info = $crawler->filter('table.quotes td')->extract('_text');
$j = 0;
for($i = 0; $i < count($tab_info); $i+=2){
$list[$j] = $tab_info[$i+1];
$j++;
}
$fields['quotes'] = json_encode($list);
break;
case 'Facts':
$tab_info = $crawler->filter('table.interesting_facts td')->extract('_text');
$j = 0;
for($i = 0; $i < count($tab_info); $i+=2){
$list[$j] = $tab_info[$i+1];
$j++;
}
$fields['quick_facts'] = json_encode($list);
break;
case 'Filmography':
$tab_info = $crawler->filter('#tab-content-6 h3.toggle-head')->extract('_text');
foreach($tab_info as $tab){
$tab_info_detail = $crawler->filter($tab_hrefs[$k].' table.'.seourl($tab).' td')->extract('_text');
$j = 0;
for($i = 0; $i < count($tab_info_detail); $i+=4){
$list[seourl($tab)][$j]['title'] = $tab_info_detail[$i];
$list[seourl($tab)][$j]['year'] = $tab_info_detail[$i+1];
$list[seourl($tab)][$j]['status'] = $tab_info_detail[$i+2];
$list[seourl($tab)][$j]['character'] = $tab_info_detail[$i+3];
$j++;
}
}
$fields['filmography'] = json_encode($list);
break;
case 'Awards':
$tab_info_detail = $crawler->filter('#tab-content-7 .toggle:nth-child(1) table td')->extract('_text');
$j = 0;
for($i = 0; $i < count($tab_info_detail); $i+=5){
$list['won'][$j]['year'] = $tab_info_detail[$i];
$list['won'][$j]['award'] = $tab_info_detail[$i+1];
$list['won'][$j]['ceremony'] = $tab_info_detail[$i+2];
$list['won'][$j]['nomination'] = $tab_info_detail[$i+3];
$list['won'][$j]['movie'] = $tab_info_detail[$i+4];
$j++;
}
$tab_info_detail = $crawler->filter('#tab-content-7 .toggle:nth-child(1) table td')->extract('_text');
$j = 0;
for($i = 0; $i < count($tab_info_detail); $i+=5){
$list['nominated'][$j]['year'] = $tab_info_detail[$i];
$list['nominated'][$j]['award'] = $tab_info_detail[$i+1];
$list['nominated'][$j]['ceremony'] = $tab_info_detail[$i+2];
$list['nominated'][$j]['nomination'] = $tab_info_detail[$i+3];
$list['nominated'][$j]['movie'] = $tab_info_detail[$i+4];
$j++;
}
$fields['awards'] = json_encode($list);
break;
default:
# code...
break;
}
}
debug($tabs_wrapper);
// $count = count($structural_info)/2;
// debug($structural_info);
$fields['name'] = '';
if(isset($name[0])){
$fields['name'] = $name[0];
}
$fields['image'] = '';
if(isset($image[0])){
$fields['image'] = $image[0];
}
debug($fields);
function createWPPost($dbhandle, $post_fields){
// @Todo
// Create thumbnail as wordpress post and attach to the post in wp_post_meta
// @Todo
// Save Other fields of meta in CMB2
$sql = "INSERT INTO wp_posts SET ";
foreach($post_fields as $key=>$field){
if($key != 'category' && $key != 'tags')
$sql .= $key." = '$field',";
}
$sql = rtrim($sql,',');
$result = mysqli_query($dbhandle,$sql);
$last_id = mysqli_insert_id($dbhandle);
if(isset($post_fields['category'])){
// INSERT CATEGORY
$category_id = getCategoryId($dbhandle,$post_fields['category']);
if($category_id != 0){
$sql = "INSERT INTO wp_term_relationships SET object_id = ".$last_id.", term_taxonomy_id = ".$category_id;
mysqli_query($dbhandle,$sql);
}
}
if(isset($post_fields['tags'])){
// INSERT TAGS
foreach($post_fields['tags'] as $tag){
$tag_id = getTagId($dbhandle,$tag);
if($tag_id != 0){
$sql = "INSERT INTO wp_term_relationships SET object_id = ".$last_id.", term_taxonomy_id = ".$tag_id;
mysqli_query($dbhandle,$sql);
}
}
}
// Update GUID
GLOBAL $base_url;
$sql = "UPDATE `wp_posts` SET `guid`= concat('$base_url?p=',ID) WHERE `post_type` = 'post'";
mysqli_query($dbhandle,$sql);
}
function seourl($phrase, $maxLength = 100000000000000) {
$result = strtolower($phrase);
$result = preg_replace("~[^A-Za-z0-9-\s]~", "", $result);
$result = trim(preg_replace("~[\s-]+~", " ", $result));
$result = trim(substr($result, 0, $maxLength));
$result = preg_replace("~\s~", "_", $result);
return $result;
}
function seo_dash_url($phrase) {
$result = strtolower(trim(preg_replace('/[^A-Za-z0-9-]+/', '-', $phrase)));
return $result;
}
function debug( $data = ''){
if(is_array($data)){
echo '<pre>';
print_r($data);
echo '</pre>';
}else{
echo $data.'<br>';
}
}
function isPostExists($dbhandle,$name){
$sql = 'SELECT * FROM wp_posts WHERE post_parent = 0 AND post_title LIKE "'.$name.'"';
$result = mysqli_query($dbhandle,$sql);
if($result->num_rows > 0){
return true;
}else{
return false;
}
}
function getCategoryId($dbhandle,$name){
$sql = 'SELECT * FROM wp_terms wt LEFT JOIN wp_term_taxonomy wtt ON wtt.term_id = wt.term_id WHERE wtt.taxonomy = "category" AND name LIKE "'.$name.'"';
$result = mysqli_query($dbhandle,$sql);
if($result->num_rows > 0){
$row = mysqli_fetch_assoc($result);
return $row['term_id'];
}else{
// Create category and return
$sql = "INSERT INTO wp_terms SET name = '$name', slug = '".seo_dash_url($name)."'";
mysqli_query($dbhandle,$sql);
$last_id = mysqli_insert_id($dbhandle);
$sql = "INSERT INTO wp_term_taxonomy SET taxonomy = 'category', term_id = $last_id";
mysqli_query($dbhandle,$sql);
return $last_id;
}
}
function getTagId($dbhandle,$name){
$sql = 'SELECT * FROM wp_terms wt LEFT JOIN wp_term_taxonomy wtt ON wtt.term_id = wt.term_id WHERE wtt.taxonomy = "post_tag" AND name LIKE "'.$name.'"';
$result = mysqli_query($dbhandle,$sql);
if($result->num_rows > 0){
$row = mysqli_fetch_assoc($result);
return $row['term_id'];
}else{
// Create category and return
$sql = "INSERT INTO wp_terms SET name = '$name', slug = '".seo_dash_url($name)."'";
mysqli_query($dbhandle,$sql);
$last_id = mysqli_insert_id($dbhandle);
$sql = "INSERT INTO wp_term_taxonomy SET taxonomy = 'post_tag', term_id = $last_id";
mysqli_query($dbhandle,$sql);
return $last_id;
}
}