« Utilisateur:Amgine/basebot.php » : différence entre les versions

Contenu supprimé Contenu ajouté
Amgine (discuter | contributions)
init
(Aucune différence)

Version du 30 décembre 2009 à 23:48

<?php
/**
 * The base script for a php-based Mediawiki bot
 **
 * This script is an example of a php bot designed to retrieve content
 * from one website and upload it to a Mediawiki website.
 * 
 * Dependencies:
 * 	php must have --with-curl
 **
 * @author Amgine <amgine.saewyc@gmail.com
 * @copyright 2009 Amgine <amgine.saewyc@gmail.com
 **/

// Include Mediawiki API class
require_once( 'includes/mwapiclass.php' );

/**
 * getUrl function
 **
 * Retrieve the contents of a webpage
 **
 * @param 	$url	string	Url to retrieve (no validation performed here)
 * @return	$val	string	Webpage contents
 */
function getUrl( $url ){
	$val = '';
	
	$ch = curl_init( $url );
	curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, true );
	$val = curl_exec ( $ch );
	curl_close ( $ch );
	return $val;
	
}

/**
 * parseWebPage function
 **
 * Parse through the returned webpage. Many ways to do this, and you can 
 * use this function to call different functions for different sites.
 **
 * @param	$webpage	string	Raw html
 * @param	$site		string	Url of the source, in case you need 
 * 						different parse routines for each.
 * @return	array/string	depends on the format of the data you're 
 * 							working with.
 **/
function parseWebPage( $webpage, $site = '' ){
	switch( $site ){
		case 'http://some.website/page':
			// parse this website
			break;
		case 'http://some.other.web.com/page=Hockey_scores':
			// parse this website
			break;
		default:
			// some way to process other websites, or report a typo
			break;
	}
	return $data;
}

/**
 * makeWnHockeyArticle function
 **
 * This is a fake function where you would create a Mediawiki article
 * using content scraped from a website. You might need several different
 * versions of this function for different types of data.
 **
 * @param	$data	array/string	Depends on what you're doing
 * @return	string	MW-syntax output.
 */
function makeWnHockeyArticle( $data ){
	// do something to create the article based on the data
	return $article;
}

// Main loop of the bot

$urls = array(
	'http://some.website/page',
	'http://some.other.web.com/page=Hockey_scores',
);

$user = array(
	'username' => 'MyUsername',
	'password' => 'MyPassword',
);
$newAPIconnection = NEW mwAPI( $user );

foreach($urls AS $site ){
	$page_text = getUrl( $site );
	$new_article = makeWnHockeyArticle( parseWebpage( getUrl( $site ) ) );
	
	$newAPIconnection->newPage( $new_article );