MediaWiki:Common.js/Wikinews:Article principal/leadGenerator
mw.loader.load('//en.wikinews.org/w/index.php?title=user:Bawolff/mwapilib2.js&action=raw&ctype=text/javascript'); if (!window.Bawolff) {
window.Bawolff = {};
} // //en.wikinews.org/w/api.php?action=query&prop=revisions&titles=template:Lead%20article%201|template:Lead%20article%202|template:Lead%20article%203|template:Lead%20article%204|template:Lead%20article%205&rvprop=timestamp|content //Call as Bawolff.leadGen(pageName, 1, alert) //replacing 1 with which lead (1-5), alert with your callback //optionally takes summary method (as numeric) argument. choose 0-4 Bawolff.leadGen = function (title, leadNumb, callback, summaryMethod) {
api(title).getPage().lift(Bawolff.leadGen.extract, title, summaryMethod).lift(Bawolff.leadGen.create, leadNumb).lift(callback, title).exec();
}
/********
This is a list that maps categories/infboxes to generic images
Note: the category map only works with categories explicity included. it does not count categories included by templates Note: this does not consider templates with parameters
- /
Bawolff.leadGen.imgMap = { "Brésil": "Flag of Brazil.svg",
"Canada": "Flag of Canada.svg", "États-Unis d'Amérique": "Flag of the United States.svg", "France": "Flag of France.svg", "Informatique": "Computer-aj aj ashton 01.svg", "Mexique": "Flag of Mexico.svg", "Nécrologie": "Wikinews tag obituary.png", "Ontario": "Flag of Ontario.svg", "Québec": "Flag of Quebec.svg", "Royaume-Uni": "Flag of the United Kingdom.svg", "Football": "Wikinews-football.svg", "Science et technologie": "Science-symbol-2.svg" }
//takes the source of a wikipage, extracts the first image name without the leading namespace Bawolff.leadGen.extractImg = function (page) {
var imgRegex = /\[\[(?:[iI][mM][aA][gG][eE]\:|[fF][Ii][lL][eE]\:|[Ff][Ii][cC][hH][iI][eE][rR]\:)((?:[^\|\]])*?\.[pPsSjJgG][nNvVpPiI][gGeEfF][gG]?)[\|\]]?/; var img = imgRegex.exec(page); if (img && img.length >= 2) { return img[1]; } else {
var infoboxRegex = /\{\{([^|}]*)\}\}/g; var categoryRegex = /\[\[[cC]at[eé]gor[yi]e?:([^|\]]*)(?:\|[^\]]*)?\]\]/g; //js seems to reuse these objects from prev calls without reseting lastIndex. reset lastIndex. infoboxRegex.lastIndex = categoryRegex.lastIndex = 0; var item; //note doesn't match infoboxes w/params
while (item = infoboxRegex.exec(page)) { item = item[1]; item = item.charAt(0).toUpperCase() + item.substring(1, item.length); if (Bawolff.leadGen.imgMap[item]) return Bawolff.leadGen.imgMap[item]; } while (item = categoryRegex.exec(page)) { item = item[1]; item = item.charAt(0).toUpperCase() + item.substring(1, item.length); if (Bawolff.leadGen.imgMap[item]) return Bawolff.leadGen.imgMap[item]; }
return "Wikinews-logo.png"; //default // return ""; }
}
Bawolff.leadGen.extractAudio = function (page) { //Version audio|Ares I-X lancement réussi.ogg
var res = page.match(/\{\{[vV]ersion audio\|([^|}]*)\|?[^}]*\}\}/); if (res) return res[1]; return "";
} Bawolff.leadGen.extractDate = function (page) {
var ress = page.match(/\{\{[Dd]ate\|([^|}]*)\|?[^}]*\}\}/); if (ress) return ress[1]; return "";
} Bawolff.leadGen.extractType = function (page) {
//valid types are: breaking, special, original, exclusive, urgent or none. //this currently does not detect special or urget. if (page.match(/\{\{[eE]n(?: cours)?\}\}/)) { return "breaking"; } else if (page.match(/\{\{[iI]nterview(?:\|[^}]*)?\}\}/i)) { return "exclusive"; } else if (page.match(/\{\{[rR]eportage(?: original)?(?:\|[^}]*)?\}\}/i)) { return "original"; } else { //default return "none"; }
}
Bawolff.leadGen.takeIntro = function (pageText, method) {
//first test for redirects. var isRedirect = pageText.match(/^#redirect\s?\[\[([^\]]*)\]\]/i) if (isRedirect) { alert("Il semble que vous essayez d\'utiliser la création d\'articles principaux sur un page de redirection. Utiliser s\'il vous plaît le nom réel de la page au lieu de (" + isRedirect[1] + ")."); throw new Error("La page est une redirection. Merci de résoudre manuellement à : " + isRedirect[1] ); }
//doesn't handle links that make [1]. //intentionally doesn't strip ' chars (bold or italic) //as this often marks thigs with periods (E. coli) //method is a number (must be a number. no type conversion preformed). // 0: 1st sentence // 1: 1st two senetences // 2: 1st paragraph // 3: 1st 250 characters (+ a couple so we don't end in middle of word), or paragraph // 4: 1st 500 characters-ish, or paragraph var fixWLink = /\{\{[wW]\|([^\}]+)\}\}/g var fixformatnum = /\{\{formatnum\:([^\}]+)\}\}/g var stripTemplates = /\{\{[^\}\{]*(?:\{\{[^\}\{]*(?:\{\{[^\}\{]*(?:\{\{[^\}\{]*\}\})?\}\})?\}\})?\}\}/g; var stripCitations = /\{\{(?:"\s?"|Guil|Citation1?)\|([^}]*)\}\}/g; //note: this misinterperts template:"" var stripRefs = /\<ref[^>]*\>[\s\S]*?\<\/ref>/g; //the img regex, looks for a start of the image, than checks for nested internal links, external links, and for ending ]] in caption //templates shold already be stripped at this point. (fr image = Fichier) var img = /\[\[[IifF][mMIi][aALlcC][GgEehH][eEiI]?[eE]?[rR]?\:(?:\[\[(?:[^\]]*)\]\]|[^\]]|\](?!\]))*]]/g
//note, these are run multiple times to deal with nesting. pageText = pageText.replace(fixWLink, '$1'); pageText = pageText.replace(fixformatnum, '$1'); pageText = pageText.replace(stripCitations, '« $1 »'); pageText = pageText.replace(stripCitations, '« $1 »'); pageText = pageText.replace(stripTemplates, ); pageText = pageText.replace(stripTemplates, ); pageText = pageText.replace(stripTemplates, ); pageText = pageText.replace(stripTemplates, ); pageText = pageText.replace(stripRefs, ); pageText = pageText.replace(img, ); var pipedLink = /\[\[[^\]\|]*\|([^\]\|]*)\]\]/g; pageText = pageText.replace(pipedLink, '$1'); var normLink = /\[\[([^\]\|]*)\]\]/g; pageText = pageText.replace(normLink, '$1'); var extLink = /\[(?:http|ftp|gopher|irc|https)\:[^\]\s]*\s?([^\]]*)]/g pageText = pageText.replace(extLink, '$1'); var firstPar; switch (method) { case 1: //1st 2 sentence or 1st paragraph firstPar = /[^\n]+?\s[^\s\.]*\.(?=\s)(?:[^\n]+?\s[^\s\.]*\.(?=\s)|(?=\s))/; pageText = pageText.match(firstPar)[0]; break; case 2: //1st paragraph firstPar = /[^\n]+?(?=\n)/; pageText = pageText.match(firstPar)[0]; break; case 3: // 1st 250 characters-ish firstPar = /[^\n]{2,250}.*?\b/; pageText = pageText.match(firstPar)[0] + "..."; break; case 4: //1st 500 characters-ish firstPar = /[^\n]{2,500}.*?\b/; pageText = pageText.match(firstPar)[0] + "..."; break; default: //aka case 0. 1st sentence firstPar = /[^\n]+?\s[^\s\.]*\.(?=\s)/; pageText = pageText.match(firstPar)[0]; break; } return pageText;
}
Bawolff.leadGen.extract = function(pageText, pageName, summaryMethod) { //editlink is overriden later. return { width:'200',
image: Bawolff.leadGen.extractImg(pageText), title: pageName, synopsis: Bawolff.leadGen.takeIntro(pageText, summaryMethod), edit_this: 'Wikinews:Bac_à_sable', audio: Bawolff.leadGen.extractAudio(pageText), date: Bawolff.leadGen.extractDate(pageText) };
}
Bawolff.leadGen.create = function (leadObj, leadNumb) {
var res = '{' + '{Article principal'; res += "\n |modif=Modèle:Une " + leadNumb; res += "\n |Image=" + leadObj.image; res += "\n |Image width=" + leadObj.width; res += "\n |Image text="; //default to blank for now. res += "\n |bordure=1"; res += "\n |thème="; //default to blank for now. res += "\n |thème nom="; //default to blank for now. res += "\n |date=" + leadObj.date; res += "\n |Titre=" + leadObj.title; res += "\n |audio=" + leadObj.audio; res += "\n |Synopsis=" + leadObj.synopsis; res += "\n}}\n{{" res += "Modèle:Article principal/Documentation}}"; return res;
} //calls its argument giving it an object with meta info about the current leads. //probably want to use Bawolff.leadGen.makeLeadTable instead.
Bawolff.leadGen.makeLeadMetaObject = function (callback) {
var leadObj = function (doc) { var exTitle = function (text) { text = text.replace(/[\s\S]*?\|Titre=([^\|]*)[\s\S]*/, '$1'); text = text.replace(/\s*$/, ); return text; } var pages = doc.getElementsByTagName('page');
var obj = {}, time, time2 = Infinity; for (var i = 0; i < pages.length; i++) { pages[i].getElementsByTagName('rev')[0].normalize(); time = Bawolff.mwapi.parseAPIDate(pages[i].getElementsByTagName('rev')[0].getAttribute('timestamp')); //This is really ugly... //put the oldest lead in a global variable. if (time < time2) { //this compares miliseconds after epoch Bawolff.leadGen.oldestLead = pages[i].getAttribute('title'); time2 = time; } obj[pages[i].getAttribute('title')] = {timestamp: time, title: exTitle(pages[i].getElementsByTagName('rev')[0].firstChild.data)}; } return obj; } api().makeRequest({action: 'query', prop: 'revisions', titles: 'Modèle:Une 1|Modèle:Une 2|Modèle:Une 3', prop: 'revisions', rvprop: 'timestamp|content', redirects: true}, leadObj).lift(callback).exec();
}
Bawolff.leadGen.oldestLead = null;
Bawolff.leadGen.makeLeadTable = function (callback) {
var wrapper = function(leadObj) {
var html = '
'; html += '<thead></thead><tbody>';for (var i in leadObj) { if (leadObj.hasOwnProperty(i)) { var numb = i.charAt(i.length - 1);html += '"; } } html += '</tbody>
# | Position | Article | Âge |
---|---|---|---|
';
html += numb; //last characterhtml += ' | ';
html += Bawolff.leadGen.leadToPosition(numb);html += ' | ';
html += leadObj[i].title;html += ' | ';
var time = leadObj[i].timestamp.getTime(); //convert to miliseconds var d = (new Date).getTime(); var delta = Math.round((d - time) / (1000*60*60)); var timeStr; if (delta === 1) { timeStr = delta + ' heure'; } else { timeStr = delta + ' heures'; } html += timeStr;html += " |
';
callback(html); } Bawolff.leadGen.makeLeadMetaObject(wrapper);
}
Bawolff.leadGen.leadToPosition = function (numb) { //double equal sign intentional to convert from string.
if (numb == 1 || numb === 'e') { return "En haut"; } if (numb == 2) { return "Milieu"; } if (numb == 3) { return "Milieu 2"; }
if (numb == 4) {
return "En bas"; } else { throw new Error("Chiffre de l\'article principal invalide (" + numb +") trasmis à Bawolff.leadGen.leadToPosition"); }
}