Meego Wiki
Views

Metrics/MediaWiki queries

From MeeGo wiki
Jump to: navigation, search
A word of warning with MediaWiki: when initialising the database, a table prefix is used for table names, which is configured on a per-wiki database (this allows different MediaWiki instances to be stored in the same database instance, which is kind of a useless feature, but anyway...). On wiki.meego.org, this prefix is set to "", while the detault MediaWiki prefix is "mw_", and this is what I used. Ideally, we would do like Wikipedia, and have a set of parameters containing the table names which would be used:
${revision}
instead of
mw_revision

Contents

Wiki editors per month


select
	user_edits.monthnum,
	user_edits.monthstr,
	count(*) as editor_count,
	user_active_edits.active_editor_count as active_editor_count
from
(
	select
		date_format(rev_timestamp,'%Y%m') as monthnum,
		date_format(rev_timestamp,'%b %y') as monthstr,
	    	CAST(rev_user_text as CHAR) AS username,
		count(*) as editcount
	from
		`mw_revision`
	group by 
		monthnum,username
	HAVING monthnum > 201001
		and monthnum <= date_format(${last_month},'%Y%m')
	order by 
		monthnum,editcount desc
)
as user_edits
inner join
(
	select
		monthnum,
		count(*) as active_editor_count
	from
	(
		select
			date_format(rev_timestamp,'%Y%m') as monthnum,
		    	CAST(rev_user_text as CHAR) AS username,
			count(*) as editcount
		from
			`mw_revision`
		group by 
			monthnum,username
		having 
			editcount >1
			and monthnum > 201001
			and monthnum <= date_format(${last_month},'%Y%m')
	)
	as
		active_editors
	group by 
		monthnum
	order by 
		monthnum
)
as user_active_edits
on user_active_edits.monthnum=user_edits.monthnum
group by
	user_edits.monthnum
order by
	user_edits.monthnum;

Wiki edits per month


SELECT 
		date_format(rev_timestamp,'%Y%m') as monthnum, 
		count(*) as edit_count 
	FROM `mw_revision` 
	GROUP BY monthnum 
	HAVING monthnum > 201001 and
		monthnum <= date_format(${last_month},'%Y%m')

Wiki top editors last month


SELECT
     CAST(rev_user_text as CHAR) AS user,
     COUNT(*) AS edits
FROM `mw_revision`
WHERE date_format(rev_timestamp, '%Y%m') = date_format(${last_month}, '%Y%m') 
GROUP BY user
ORDER BY edits desc
LIMIT 20;

Edits all time

A test query, unused in the report.


SELECT
     COUNT(*) AS edits
FROM
     `mw_revision` rev

Wiki edits and new pages per month


select 
	new_pages.monthnum,
	new_pages.monthstr,
	edit_count,
	count(*) as new_page_count 
from 
(
	select 
		page_id,
		date_format(min(rev_timestamp),'%Y%m') as monthnum,
		date_format(min(rev_timestamp),'%b %y') as monthstr 
	from 
		mw_page, 
		mw_revision 
	where 
		page_id=rev_page and 
		date_format(rev_timestamp,'%Y%m')>'201001'
	group by page_id
) as new_pages 
inner join 
(
	SELECT 
		date_format(rev_timestamp,'%Y%m') as monthnum, 
		count(*) as edit_count 
	FROM `mw_revision` 
	GROUP BY monthnum 
	HAVING monthnum > 201001
) as edits 
on 
	edits.monthnum=new_pages.monthnum and
	new_pages.monthnum <= date_format(${last_month},'%Y%m') 
group by 
	new_pages.monthnum 
order by 
	new_pages.monthnum;

Wiki new pages per month


SELECT
     COUNT(*) AS pages,
     date_format(rev_timestamp,'%Y%m') as monthnum, 
     date_format(rev_timestamp, '%M %Y') as monthstr,
     year(rev_timestamp) as year,
     monthname(rev_timestamp) as month
FROM
     `mw_revision` rev
WHERE
     date_format(rev_timestamp,'%Y%m') <= date_format(${last_month},'%Y%m')
GROUP BY
     year,
     month
HAVING
     monthnum > 201001
ORDER BY monthnum

Wiki editors >1 edits per month


select
	monthnum,
	count(*) as active_editor_count
from
(
	select
		date_format(rev_timestamp,'%Y%m') as monthnum,
	    	CAST(rev_user_text as CHAR) AS username,
		count(*) as editcount
	from
		`mw_revision`
	group by 
		monthnum,username
	having 
		editcount >1
	order by 
		monthnum,editcount desc
)
as user_edits
group by
	monthnum
order by
	monthnum

Top editors all time


SELECT CAST(rev_user_text as CHAR) AS user,
       COUNT(*) AS c
FROM `mw_revision` AS rev
GROUP BY user
ORDER BY c desc
LIMIT 10;

MediaWiki database schema

MediaWiki uses 41 different tables in version 1.15. Not all of them are useful for community metrics. Those with potentially useful information are:

  • mw_page: Each page in the wiki has one entry in the "page" table, containing the name of its creator, a unique ID, and a pointer to the latest revision.
  • mw_revision: Every edit made in the wiki is recorded in the revision page. Information recorded includes the author, timestamp, and a pointer to the text of the page after the revision
  • mw_text: Contains the page text for a given revision of a given page. Potentially content analysis possible.
  • mw_user: Allows cross-referencing of usernames and user IDs, among other things.


What follows is the schema for the "important" tables (user, page, revision, text)

Table 'user'


CREATE TABLE `mw_user` (
  `user_id` int(10) unsigned NOT NULL,
  `user_name` varbinary(255) NOT NULL DEFAULT '',
  `user_real_name` varbinary(255) NOT NULL DEFAULT '',
  `user_password` tinyblob NOT NULL,
  `user_newpassword` tinyblob NOT NULL,
  `user_newpass_time` binary(14) DEFAULT NULL,
  `user_email` tinyblob NOT NULL,
  `user_options` blob NOT NULL,
  `user_touched` binary(14) NOT NULL DEFAULT '\0\0\0\0\0\0\0\0\0\0\0\0\0\0',
  `user_token` binary(32) NOT NULL DEFAULT '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0',
  `user_email_authenticated` binary(14) DEFAULT NULL,
  `user_email_token` binary(32) DEFAULT NULL,
  `user_email_token_expires` binary(14) DEFAULT NULL,
  `user_registration` binary(14) DEFAULT NULL,
  `user_editcount` int(11) DEFAULT NULL,
  PRIMARY KEY (`user_id`),
  UNIQUE KEY `user_name` (`user_name`),
  KEY `user_email_token` (`user_email_token`)
);

Table 'page'


CREATE TABLE `mw_page` (
  `page_id` int(10) unsigned NOT NULL,
  `page_namespace` int(11) NOT NULL,
  `page_title` varbinary(255) NOT NULL,
  `page_restrictions` tinyblob NOT NULL,
  `page_counter` bigint(20) unsigned NOT NULL DEFAULT '0',
  `page_is_redirect` tinyint(3) unsigned NOT NULL DEFAULT '0',
  `page_is_new` tinyint(3) unsigned NOT NULL DEFAULT '0',
  `page_random` double unsigned NOT NULL,
  `page_touched` binary(14) NOT NULL DEFAULT '\0\0\0\0\0\0\0\0\0\0\0\0\0\0',
  `page_latest` int(10) unsigned NOT NULL,
  `page_len` int(10) unsigned NOT NULL,
  PRIMARY KEY (`page_id`),
  UNIQUE KEY `name_title` (`page_namespace`,`page_title`),
  KEY `page_random` (`page_random`),
  KEY `page_len` (`page_len`)
);

Table 'revision'


CREATE TABLE `mw_revision` (
  `rev_id` int(10) unsigned NOT NULL,
  `rev_page` int(10) unsigned NOT NULL,
  `rev_text_id` int(10) unsigned NOT NULL,
  `rev_comment` tinyblob NOT NULL,
  `rev_user` int(10) unsigned NOT NULL DEFAULT '0',
  `rev_user_text` varbinary(255) NOT NULL DEFAULT '',
  `rev_timestamp` binary(14) NOT NULL DEFAULT '\0\0\0\0\0\0\0\0\0\0\0\0\0\0',
  `rev_minor_edit` tinyint(3) unsigned NOT NULL DEFAULT '0',
  `rev_deleted` tinyint(3) unsigned NOT NULL DEFAULT '0',
  `rev_len` int(10) unsigned DEFAULT NULL,
  `rev_parent_id` int(10) unsigned DEFAULT NULL,
  PRIMARY KEY (`rev_id`),
  UNIQUE KEY `rev_page_id` (`rev_page`,`rev_id`),
  KEY `rev_timestamp` (`rev_timestamp`),
  KEY `page_timestamp` (`rev_page`,`rev_timestamp`),
  KEY `user_timestamp` (`rev_user`,`rev_timestamp`),
  KEY `usertext_timestamp` (`rev_user_text`,`rev_timestamp`)
);

Table 'text'


CREATE TABLE `mw_text` (
  `old_id` int(10) unsigned NOT NULL,
  `old_text` mediumblob NOT NULL,
  `old_flags` tinyblob NOT NULL,
  PRIMARY KEY (`old_id`)
);

Personal tools