File: /www/wwwroot/healthyton.com/wp-content/plugins/scraper.php
<?php
/*
Plugin Name: Scraper Plugin
Plugin URI: http://ashokbasnet.com.np
description: Manages Scraping of data
a plugin to create awesomeness and spread joy
Version: 1.0
Author: Mr. Ashok Basnet
Author URI: http://ashokbasnet.com.np
License: GPL2
*/
// INTERFACE
//require '../../wp-scrap/vendor/autoload.php';
//use Goutte\Client;
/**
* Setting page for updating the settings related to the date
*/
function scraper_page(){
?>
<h1>Spinbot URL</h1><hr>
<div class="wrap">
<form method="POST">
<label name="gossipgist_url">Enter the Gossip Gist URL [e.g. https://gossipgist.com/lori-harvey]:</label>
<input type="text" name="gossipgist_url" class="large-text" value="<?php echo isset($_POST['gossipgist_url']) ? $_POST['gossipgist_url'] : '';?>" placeholder="Enter URL"/><br /><br />
<label name="ndu_convert_post_date">Enter the Height Line URL [e.g. https://heightline.com/ariana-grande-height-weight-dress-and-shoe-size/]:</label>
<input type="text" name="url" class="large-text" value="<?php echo isset($_POST['url']) ? $_POST['url'] : '';?>" placeholder="Enter URL"/><br /><br />
<!-- <label name="medical_news_url">Enter Medical News Today URL [e.g. https://www.medicalnewstoday.com/articles/324376.php]:</label>-->
<!-- <input type="text" name="medical_news_url" class="large-text" value="--><?php //echo isset($_POST['medical_news_url']) ? $_POST['medical_news_url'] : '';?><!--" placeholder="Enter URL"/><br /><br />-->
<input type="submit" name="submit_scraper_url" value="Submit" class="button button-primary">
</form>
</div>
<hr>
<?php
function strip_tags_blacklist($html, $tags) {
$html = preg_replace('/<'. $tags .'\b[^>]*>(.*?)<\/'. $tags .'>/is', "", $html);
return $html;
}
if(array_key_exists('submit_scraper_url', $_POST)){
?>
<div id="setting-error-settings_updated" class="update_settings-error notice is-dismissible">
<p><strong>Successfully Spinbot completed.</strong></p>
</div>
<?php
echo '<div class="card" style="max-width: 100%;">';
$url = '';
if(isset($_POST['url']) && !empty($_POST['url'])){
$url = 'spinbot_get_heightline.php?url='.$_POST['url'];
}elseif(isset($_POST['gossipgist_url']) && !empty($_POST['gossipgist_url'])){
$url = 'spinbot_get_gossipgist.php?url='.$_POST['gossipgist_url'];
}
// echo $url;
if(!empty($url)){
$data = file_get_contents(get_site_url().'/wp-scrap/'.$url);
$data = json_decode($data, true);
if(!empty($data['body'])){
$postId = postToWP($data);
echo '<a class="button button-primary" href="/wp-admin/post.php?post='.$postId.'&action=edit" target="_blank">OPEN POST</a>';
echo '<h2>'.$data['name'].'</h2>';
echo get_the_post_thumbnail($postId).'<br>';
// echo '<img src ="'.$data['image'].'"><br>';
echo '<h3>'.$data['category'].'</h3>';
echo $data['body'];
echo '<hr><h3 style="color:red;">Available SPINS Left: '.$data['available-spins'].'</h3>';
}else{
echo 'ERROR FETCHING DATA';
}
}else if(!empty($medical_news_url)){
//$data = file_get_contents(get_site_url().'/wp-scrap/spinbot_get_medicalnews.php?url='.$medical_news_url);
}else{
echo 'No URL selected';
}
echo '</div>';
}
}
function postToWP($data){
// IMPORT TO WORDPRESS
$post = get_page_by_title($data['name'], OBJECT, 'post');
if($post){
echo 'already exists';exit;
}
// $cat = get_term_by('name', $data['category'] , 'category');
// if($cat == false){
// $cat = wp_insert_term($data['category'], 'category');
// $cat_id = $cat['term_id'] ;
// }else{
// $cat_id = $cat->term_id ;
// }
$post_fields = array();
$post_fields['post_author'] = get_current_user_id(); // 3
$post_fields['post_title'] = $data['name'];
$post_fields['post_status'] = 'draft';
$post_fields['post_content'] = $data['body'];
$post_fields['post_name'] = strtolower(trim(preg_replace('/[^A-Za-z0-9-]+/', '-', $data['name'])));;
$post_fields['comment_status'] = 'closed';
$post_fields['ping_status'] = 'closed';
$post_fields['post_category'] = array(2); // celeb health
if(isset($data['meta'])) {
$post_fields['meta_input'] = $data['meta'];
}
$postId = wp_insert_post($post_fields);
$thumbnail_id = uploadRemoteImageAndAttach($data['image'],$data['name'], $postId);
set_post_thumbnail( $postId, $thumbnail_id );
return $postId;
}
// Add to admin menu
function scraper_add__menu(){
add_submenu_page('options-general.php','Spinbot','Spinbot','publish_posts','spinbot','scraper_page', '',111);
}
add_action('admin_menu','scraper_add__menu');
/// SCRAPER PART
add_filter( 'query_vars', 'se67095_add_query_vars');
function se67095_add_query_vars($vars){
$vars[] = "scraper";
$vars[] = "random";
return $vars;
}
add_action('template_redirect', 'se67905_random_template');
function se67905_random_template($template) {
global $wp_query;
if(isset($wp_query->query['random'])){
$args = array(
'numberposts'=>1,
'order' => 'ASC',
'post_status' =>'publish',
'post_type' => 'post',
'orderby' => 'rand'
);
$posts = get_posts($args);
if(isset($posts[0])){
header('Location: '.$posts[0]->guid);
}
// print_r($posts);exit;
}
return $template;
}
add_action('template_redirect', 'se67905_my_template');
function se67905_my_template($template) {
global $wp_query;
if(isset($wp_query->query['scraper']) && $wp_query->query['scraper'] == 'scrap'){
$url = "https://healthyceleb.com/sora-simmons/";
if(isset($_GET['url'])){
$url = $_GET['url'];
}
// $data = file_get_contents(get_site_url().'/wp-scrap/api_healthyceleb.php?url='.$url);
$data = file_get_contents(get_site_url().'/wp-scrap/api_mddailyrecord.php?url='.$url);
$data = json_decode($data,true);
if(isset($data['error'])){
die('Data not available');
}
$data['image'] = $data['image'];
$post = get_page_by_title($data['name'], OBJECT, 'post');
if($post){
echo 'already exists';exit;
}
$image_title = $data['name'];
if(isset($data['meta']['hton_background_full_name'])){
$image_title = $data['meta']['hton_background_full_name'];
}
$post_fields = array();
$post_fields['post_category'] = array(2);
$post_fields['post_author'] = 6;
// $post_fields['post_author'] = 1;
$post_fields['post_title'] = $data['name'];
$post_fields['post_content'] = $data['body'];
$post_fields['post_status'] = 'publish';
$post_fields['post_name'] = strtolower(trim(preg_replace('/[^A-Za-z0-9-]+/', '-', $post_fields['post_title'])));
$post_fields['comment_status'] = 'closed';
$post_fields['ping_status'] = 'closed';
$post_fields['meta_input'] = $data['meta'];
$postId = wp_insert_post($post_fields);
$thumbnail_id = uploadRemoteImageAndAttach($data['image'],$image_title, $postId);
set_post_thumbnail( $postId, $thumbnail_id );
echo 'Post Inserted Successfully';exit;
}
else if(isset($wp_query->query['scraper']) && $wp_query->query['scraper'] == 'publish'){
// http://netwp.test/scraper?scraper=publish
$args = array(
'numberposts'=>1,
'order' => 'ASC',
'post_status' =>'draft',
'post_type' => 'post',
'orderby' => 'rand'
);
$posts = get_posts($args);
// debug($posts);
foreach($posts as $post){
$current_time = date('Y-m-d H:i:s');
$my_post = array(
'ID' => $post->ID,
'post_status' => 'publish',
'post_date' => $current_time,
'post_date_gmt' => $current_time,
'post_modified'=> $current_time,
'post_modified_gmt'=> $current_time
);
// debug($my_post);
// Update the post into the database
wp_update_post( $my_post );
}
// exit;
echo 'published';
exit;
}
else if(isset($wp_query->query['scraper']) && $wp_query->query['scraper'] == 'corona'){
$countries = wp_remote_get('https://healthyton.com/wp-scrap/corona_v1.php');
// $countries = wp_remote_get('http://healthyton.test/wp-scrap/corona_v1.php');
// $countries = wp_remote_get('https://wuhan-coronavirus-api.laeyoung.endpoint.ainize.ai/jhu-edu/latest?onlyCountries=true');
if($countries){
// Save to Fle
$countries_data = json_decode($countries['body'],true);
global $wpdb;
$table_name = $wpdb->prefix . "corona_countries";
foreach($countries_data as $country){
$data_to_insert = array();
$data_to_insert['name'] = $country['country'];
$data_to_insert['confirmed'] = $country['confirmed'];
$data_to_insert['recovered'] = $country['recovered'];
$data_to_insert['deaths'] = $country['deaths'];
// debug($data_to_insert);
$existing = $wpdb->get_results ("SELECT * FROM $table_name WHERE name = '".$data_to_insert['name']."'");
if(!isset($existing[0])){
$wpdb->insert($table_name, $data_to_insert);
}else{
$wpdb->update($table_name, $data_to_insert, array('id'=>$existing[0]->id));
}
}
}
echo 'import success';exit;
}else if(isset($wp_query->query['scraper']) && $wp_query->query['scraper'] == 'update_date'){
$args = array(
'numberposts' => 500,
'post_status' =>'publish',
'post_type' => 'post',
'date_query' => array(
'column' => 'post_modified',
'before' => '2022-09-21',
),
'orderby' => 'rand'
);
$posts = get_posts($args);
$count = 0;
global $wpdb;
foreach($posts as $post){
// debug($post, false);
$day = date('d', strtotime($post->post_date));
// $month = date('m', strtotime($post->post_date));
// $update_datetime = date('2021-09-'.$day.' H:i:s', strtotime($post->post_date));
$update_datetime = date('Y-m-d H:i:s');
// debug($update_datetime, false);
$gmt_update_datetime = get_gmt_from_date( $update_datetime );
$wpdb->query( "UPDATE `$wpdb->posts`
SET `post_modified` ='{$update_datetime}',
`post_modified_gmt` ='{$gmt_update_datetime}'
WHERE `ID`= {$post->ID} ");
$count++;
}
echo 'updated '.$count. ' posts successfully !!!';
exit;
}
return $template;
}
function debug($arr, $exit=true){
echo '<pre>';
print_r($arr);
echo '</pre>';
if($exit) exit;
else return;
}
function uploadRemoteImageAndAttach($image_url, $post_name, $parent_id){
$image = $image_url;
$file_ext = pathinfo($image_url, PATHINFO_EXTENSION);
$image_name = sanitize_file_name($post_name).'.'.$file_ext;
$get = wp_remote_get( $image );
$type = wp_remote_retrieve_header( $get, 'content-type' );
if (!$type)
return false;
$mirror = wp_upload_bits( $image_name, '', wp_remote_retrieve_body( $get ) );
$attachment = array(
'post_title'=> $post_name,
'post_mime_type' => $type
);
$attach_id = wp_insert_attachment( $attachment, $mirror['file'], $parent_id );
require_once(ABSPATH . 'wp-admin/includes/image.php');
$attach_data = wp_generate_attachment_metadata( $attach_id, $mirror['file'] );
wp_update_attachment_metadata( $attach_id, $attach_data );
return $attach_id;
}
// https://stackoverflow.com/questions/41524931/how-to-set-featured-image-programmatically-from-url
function featured_image($post_id,$image_url,$post_name){
// Add Featured Image to Post
$image_url = $image_url; // Define the image URL here
$file_ext = pathinfo($image_url, PATHINFO_EXTENSION);
$image_name = $post_name.'.'.$file_ext;
$upload_dir = wp_upload_dir(); // Set upload folder
$image_data = file_get_contents($image_url); // Get image data
$unique_file_name = wp_unique_filename( $upload_dir['path'], $image_name ); // Generate unique name
$filename = basename( $unique_file_name ); // Create image file name
// Check folder permission and define file location
if( wp_mkdir_p( $upload_dir['path'] ) ) {
$file = $upload_dir['path'] . '/' . $filename;
} else {
$file = $upload_dir['basedir'] . '/' . $filename;
}
// Create the image file on the server
file_put_contents( $file, $image_data );
// Check image file type
$wp_filetype = wp_check_filetype( $filename, null );
// Set attachment data
$attachment = array(
'post_mime_type' => $wp_filetype['type'],
'post_title' => sanitize_file_name( $filename ),
'post_content' => '',
'post_status' => 'inherit'
);
// Create the attachment
$attach_id = wp_insert_attachment( $attachment, $file, $post_id );
// Include image.php
require_once(ABSPATH . 'wp-admin/includes/image.php');
// Define attachment metadata
$attach_data = wp_generate_attachment_metadata( $attach_id, $file );
// Assign metadata to attachment
wp_update_attachment_metadata( $attach_id, $attach_data );
// And finally assign featured image to post
set_post_thumbnail( $post_id, $attach_id );
}