<?php
/**
* The base script for a php-based Mediawiki bot
**
* This script is an example of a php bot designed to retrieve content
* from one website and upload it to a Mediawiki website.
*
* Dependencies:
* php must have --with-curl
**
* @author Amgine <amgine.saewyc@gmail.com
* @copyright 2009 Amgine <amgine.saewyc@gmail.com
**/
// Include Mediawiki API class
require_once( 'includes/mwapiclass.php' );
/**
* getUrl function
**
* Retrieve the contents of a webpage
**
* @param $url string Url to retrieve (no validation performed here)
* @return $val string Webpage contents
*/
function getUrl( $url ){
$val = '';
$ch = curl_init( $url );
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, true );
$val = curl_exec ( $ch );
curl_close ( $ch );
return $val;
}
/**
* parseWebPage function
**
* Parse through the returned webpage. Many ways to do this, and you can
* use this function to call different functions for different sites.
**
* @param $webpage string Raw html
* @param $site string Url of the source, in case you need
* different parse routines for each.
* @return array/string depends on the format of the data you're
* working with.
**/
function parseWebPage( $webpage, $site = '' ){
switch( $site ){
case 'http://some.website/page':
// parse this website
break;
case 'http://some.other.web.com/page=Hockey_scores':
// parse this website
break;
default:
// some way to process other websites, or report a typo
break;
}
return $data;
}
/**
* makeWnHockeyArticle function
**
* This is a fake function where you would create a Mediawiki article
* using content scraped from a website. You might need several different
* versions of this function for different types of data.
**
* @param $data array/string Depends on what you're doing
* @return string MW-syntax output.
*/
function makeWnHockeyArticle( $data ){
// do something to create the article based on the data
return $article;
}
// Main loop of the bot
$urls = array(
'http://some.website/page',
'http://some.other.web.com/page=Hockey_scores',
);
$user = array(
'username' => 'MyUsername',
'password' => 'MyPassword',
);
$newAPIconnection = NEW mwAPI( $user );
foreach($urls AS $site ){
$page_text = getUrl( $site );
$new_article = makeWnHockeyArticle( parseWebpage( getUrl( $site ) ) );
$newAPIconnection->newPage( $new_article );
}