(MLStats queries) |
(Add schema for mlstats) |
||
| Line 245: | Line 245: | ||
list ASC, | list ASC, | ||
c ASC; | c ASC; | ||
| + | </nowiki> | ||
| + | </pre> | ||
| + | |||
| + | == MLStats database schema == | ||
| + | |||
| + | === Table compressed_files === | ||
| + | |||
| + | <pre> | ||
| + | <nowiki> | ||
| + | CREATE TABLE `compressed_files` ( | ||
| + | `url` varchar(255) CHARACTER SET utf8 NOT NULL, | ||
| + | `mailing_list_url` varchar(255) CHARACTER SET utf8 NOT NULL, | ||
| + | `status` enum('new','visited','failed') DEFAULT NULL, | ||
| + | `last_analysis` datetime DEFAULT NULL, | ||
| + | PRIMARY KEY (`url`), | ||
| + | KEY `mailing_list_url` (`mailing_list_url`), | ||
| + | CONSTRAINT `compressed_files_ibfk_1` FOREIGN KEY (`mailing_list_url`) REFERENCES `mailing_lists` (`mailing_list_url`) | ||
| + | ); | ||
| + | </nowiki> | ||
| + | </pre> | ||
| + | |||
| + | === Table mailing_lists === | ||
| + | <pre> | ||
| + | <nowiki> | ||
| + | CREATE TABLE `mailing_lists` ( | ||
| + | `mailing_list_url` varchar(255) CHARACTER SET utf8 NOT NULL, | ||
| + | `mailing_list_name` varchar(255) CHARACTER SET utf8 DEFAULT 'NULL', | ||
| + | `project_name` varchar(255) CHARACTER SET utf8 DEFAULT 'NULL', | ||
| + | `last_analysis` datetime DEFAULT NULL, | ||
| + | PRIMARY KEY (`mailing_list_url`) | ||
| + | ); | ||
| + | </nowiki> | ||
| + | </pre> | ||
| + | |||
| + | === Table mailing_lists_people === | ||
| + | <pre> | ||
| + | <nowiki> | ||
| + | CREATE TABLE `mailing_lists_people` ( | ||
| + | `email_address` varchar(255) CHARACTER SET utf8 NOT NULL, | ||
| + | `mailing_list_url` varchar(255) CHARACTER SET utf8 NOT NULL, | ||
| + | PRIMARY KEY (`email_address`,`mailing_list_url`), | ||
| + | KEY `mailing_list_url` (`mailing_list_url`), | ||
| + | CONSTRAINT `mailing_lists_people_ibfk_1` FOREIGN KEY (`mailing_list_url`) REFERENCES `mailing_lists` (`mailing_list_url`) ON DELETE CASCADE ON UPDATE CASCADE, | ||
| + | CONSTRAINT `mailing_lists_people_ibfk_2` FOREIGN KEY (`email_address`) REFERENCES `people` (`email_address`) ON DELETE CASCADE ON UPDATE CASCADE | ||
| + | ); | ||
| + | </nowiki> | ||
| + | </pre> | ||
| + | |||
| + | === Table messages === | ||
| + | |||
| + | <pre> | ||
| + | <nowiki> | ||
| + | CREATE TABLE `messages` ( | ||
| + | `message_ID` varchar(255) CHARACTER SET utf8 NOT NULL, | ||
| + | `mailing_list_url` varchar(255) CHARACTER SET utf8 NOT NULL, | ||
| + | `mailing_list` varchar(255) CHARACTER SET utf8 DEFAULT NULL, | ||
| + | `first_date` datetime DEFAULT NULL, | ||
| + | `first_date_tz` int(11) DEFAULT NULL, | ||
| + | `arrival_date` datetime DEFAULT NULL, | ||
| + | `arrival_date_tz` int(11) DEFAULT NULL, | ||
| + | `subject` varchar(255) CHARACTER SET utf8 DEFAULT NULL, | ||
| + | `message_body` text CHARACTER SET utf8, | ||
| + | `is_response_of` varchar(255) CHARACTER SET utf8 DEFAULT NULL, | ||
| + | `mail_path` text CHARACTER SET utf8, | ||
| + | PRIMARY KEY (`message_ID`), | ||
| + | KEY `response` (`is_response_of`), | ||
| + | KEY `mailing_list_url` (`mailing_list_url`), | ||
| + | CONSTRAINT `messages_ibfk_1` FOREIGN KEY (`mailing_list_url`) REFERENCES `mailing_lists` (`mailing_list_url`) ON DELETE CASCADE ON UPDATE CASCADE | ||
| + | ); | ||
| + | </nowiki> | ||
| + | </pre> | ||
| + | |||
| + | === Table messages_people === | ||
| + | |||
| + | <pre> | ||
| + | <nowiki> | ||
| + | CREATE TABLE `messages_people` ( | ||
| + | `type_of_recipient` enum('From','To','Cc') NOT NULL DEFAULT 'From', | ||
| + | `message_id` varchar(255) CHARACTER SET utf8 NOT NULL, | ||
| + | `email_address` varchar(255) CHARACTER SET utf8 NOT NULL, | ||
| + | PRIMARY KEY (`type_of_recipient`,`message_id`,`email_address`), | ||
| + | KEY `m_id` (`message_id`), | ||
| + | KEY `message_id` (`message_id`), | ||
| + | KEY `email_address` (`email_address`), | ||
| + | CONSTRAINT `messages_people_ibfk_1` FOREIGN KEY (`message_id`) REFERENCES `messages` (`message_ID`) ON DELETE CASCADE ON UPDATE CASCADE, | ||
| + | CONSTRAINT `messages_people_ibfk_2` FOREIGN KEY (`email_address`) REFERENCES `people` (`email_address`) ON DELETE CASCADE ON UPDATE CASCADE | ||
| + | ); | ||
| + | </nowiki> | ||
| + | </pre> | ||
| + | |||
| + | === Table people === | ||
| + | |||
| + | <pre> | ||
| + | <nowiki> | ||
| + | CREATE TABLE `people` ( | ||
| + | `email_address` varchar(255) CHARACTER SET utf8 NOT NULL, | ||
| + | `name` varchar(255) CHARACTER SET utf8 DEFAULT NULL, | ||
| + | `username` varchar(255) CHARACTER SET utf8 DEFAULT NULL, | ||
| + | `domain_name` varchar(255) CHARACTER SET utf8 DEFAULT NULL, | ||
| + | `top_level_domain` varchar(255) CHARACTER SET utf8 DEFAULT NULL, | ||
| + | PRIMARY KEY (`email_address`) | ||
| + | ); | ||
</nowiki> | </nowiki> | ||
</pre> | </pre> | ||
These are all of the queries which we have used for mailing list statistics in the monthly report:
Note: last_month is a parameter set once in the master report representing the month and year of the previous month, in the format 'YYYYMM'. For example, in September 2011, lastmonth is '201108'.
SELECT
if(messages.mailing_list_url like '%-community%' or
messages.mailing_list_url like '%meego-dev%' or
messages.mailing_list_url like '%-sdk%' or
messages.mailing_list_url like '%-packaging%' or
messages.mailing_list_url like '%-qa%',
substring_index(TRIM(TRAILING '/' FROM `messages`.`mailing_list_url`), '/', -1),
'Other') as list,
year(first_date) AS y,
monthname(first_date) AS mon,
month(first_date) AS m,
date_format(first_date, '%b %Y') as monthstr,
date_format(first_date,'%Y%m') as monthnum,
count(*) AS posts
FROM
`messages`
WHERE
month(first_date)>=MONTH(DATE_SUB(NOW(), INTERVAL 4 MONTH)) and
year(first_date)>=YEAR(DATE_SUB(NOW(), INTERVAL 4 MONTH)) and
mailing_list_url not like '%meego-commits%' and
date_format(first_date,'%Y%m') <= date_format(${last_month},'%Y%m')
GROUP BY
list,
y,m
ORDER BY
list ASC,
monthnum ASC
This query is unused in the report, I think. Left in for posterity.
SELECT
substring_index(TRIM(TRAILING '/' FROM `messages`.`mailing_list_url`), '/', -1) AS list,
year(first_date) AS y,
monthname(first_date) AS mon,
month(first_date) AS m,
date_format(first_date, '%M %Y') as monthstr,
date_format(first_date,'%Y%m') as monthnum,
count(*) AS c
FROM
`messages`
WHERE
year(first_date) > 1979 and
mailing_list_url not like '%meego-commits%'
GROUP BY
`messages`.`mailing_list_url`,
y,m
ORDER BY
list ASC,
monthnum ASC,
c ASC
Provides the number of active mailing list participants - sending more than one email per month.
select
monthname(${last_month}) as monthname,
year(${last_month}) as y,
count(*) as c
from
( select
p.email_address as member,
count(*) AS c
from messages as m,
messages_people as p
where
m.message_id=p.message_ID and
month(m.first_date)=month(${last_month}) and
year(m.first_date)=year(${last_month}) and
p.email_address NOT LIKE '%no_reply@%'
group by p.email_address
)
as repeat_posters
where c>=2;
SELECT
monthname(${last_month}) as monthname,
count(*) AS message_count
FROM
`messages`
WHERE
month(first_date) = month(${last_month}) and
year(first_date) = year(${last_month}) and
mailing_list_url not like '%meego-commits%'
select
subject,
count(*) as c
from messages
where year(first_date)=year(${last_month})
and month(first_date)=month(${last_month})
group by subject
order by c desc
limit 10;
select
p.email_address,
count(*) as c
from
messages as m,
messages_people as p
where
m.message_id=p.message_ID
and month(m.first_date)=month(${last_month})
and year(m.first_date)=year(${last_month})
and p.email_address NOT LIKE '%no_reply@%'
group by p.email_address
order by c desc
limit 10;
This query is used to graph the evolution of each mailing list in a graph. The top 5 lists are taken out, and the others are grouped together.
select
if(messages.mailing_list_url like '%-community%' or
messages.mailing_list_url like '%meego-dev%' or
messages.mailing_list_url like '%-sdk%' or
messages.mailing_list_url like '%-packaging%' or
messages.mailing_list_url like '%-qa%',
substring_index(TRIM(TRAILING '/' FROM `messages`.`mailing_list_url`), '/', -1),
'Other') as list,
year(first_date) AS y,
monthname(first_date) AS mon,
month(first_date) AS m,
date_format(first_date, '%b %Y') as monthstr,
date_format(first_date,'%Y%m') as monthnum,
count(*) AS c
FROM
`messages`
WHERE
year(first_date) > 1979 and
date_format(first_date,'%Y%m') <= date_format(${last_month},'%Y%m') and
messages.mailing_list_url not like '%meego-commit%'
GROUP BY
list,y,m
ORDER BY
monthnum ASC,
list asc,
c asc;
I should really have done a "top N posters last month" and made N a parameter. This was the quick copy & paste way to go.
select
p.email_address,
count(*) as c
from
messages as m,
messages_people as p
where
m.message_id=p.message_ID
and month(m.first_date)=month(${last_month})
and year(m.first_date)=year(${last_month})
and p.email_address NOT LIKE '%no_reply@%'
group by p.email_address
order by c desc
limit 20;
select
count(distinct p.email_address) as member_count,
monthname(${last_month}) AS monthname
from
messages as m,
messages_people as p
where
m.message_id=p.message_ID
and month(m.first_date)=month(${last_month})
and year(m.first_date)=year(${last_month})
and p.email_address NOT LIKE '%no_reply@%';
All mailing lists. The graph was far too cluttered.
SELECT
substring_index(TRIM(TRAILING '/' FROM `messages`.`mailing_list_url`), '/', -1) AS list,
year(first_date) AS y,
monthname(first_date) AS mon,
month(first_date) AS m,
date_format(first_date, '%b %Y') as monthstr,
date_format(first_date,'%Y%m') as monthnum,
count(*) AS c
FROM
`messages`
WHERE
year(first_date) > 1979 and
mailing_list_url not like '%meego-commits%' and
date_format(first_date,'%Y%m') <= date_format(${last_month},'%Y%m')
GROUP BY
list,
y,m
ORDER BY
monthnum ASC,
list ASC,
c ASC;
CREATE TABLE `compressed_files` (
`url` varchar(255) CHARACTER SET utf8 NOT NULL,
`mailing_list_url` varchar(255) CHARACTER SET utf8 NOT NULL,
`status` enum('new','visited','failed') DEFAULT NULL,
`last_analysis` datetime DEFAULT NULL,
PRIMARY KEY (`url`),
KEY `mailing_list_url` (`mailing_list_url`),
CONSTRAINT `compressed_files_ibfk_1` FOREIGN KEY (`mailing_list_url`) REFERENCES `mailing_lists` (`mailing_list_url`)
);
CREATE TABLE `mailing_lists` ( `mailing_list_url` varchar(255) CHARACTER SET utf8 NOT NULL, `mailing_list_name` varchar(255) CHARACTER SET utf8 DEFAULT 'NULL', `project_name` varchar(255) CHARACTER SET utf8 DEFAULT 'NULL', `last_analysis` datetime DEFAULT NULL, PRIMARY KEY (`mailing_list_url`) );
CREATE TABLE `mailing_lists_people` ( `email_address` varchar(255) CHARACTER SET utf8 NOT NULL, `mailing_list_url` varchar(255) CHARACTER SET utf8 NOT NULL, PRIMARY KEY (`email_address`,`mailing_list_url`), KEY `mailing_list_url` (`mailing_list_url`), CONSTRAINT `mailing_lists_people_ibfk_1` FOREIGN KEY (`mailing_list_url`) REFERENCES `mailing_lists` (`mailing_list_url`) ON DELETE CASCADE ON UPDATE CASCADE, CONSTRAINT `mailing_lists_people_ibfk_2` FOREIGN KEY (`email_address`) REFERENCES `people` (`email_address`) ON DELETE CASCADE ON UPDATE CASCADE );
CREATE TABLE `messages` ( `message_ID` varchar(255) CHARACTER SET utf8 NOT NULL, `mailing_list_url` varchar(255) CHARACTER SET utf8 NOT NULL, `mailing_list` varchar(255) CHARACTER SET utf8 DEFAULT NULL, `first_date` datetime DEFAULT NULL, `first_date_tz` int(11) DEFAULT NULL, `arrival_date` datetime DEFAULT NULL, `arrival_date_tz` int(11) DEFAULT NULL, `subject` varchar(255) CHARACTER SET utf8 DEFAULT NULL, `message_body` text CHARACTER SET utf8, `is_response_of` varchar(255) CHARACTER SET utf8 DEFAULT NULL, `mail_path` text CHARACTER SET utf8, PRIMARY KEY (`message_ID`), KEY `response` (`is_response_of`), KEY `mailing_list_url` (`mailing_list_url`), CONSTRAINT `messages_ibfk_1` FOREIGN KEY (`mailing_list_url`) REFERENCES `mailing_lists` (`mailing_list_url`) ON DELETE CASCADE ON UPDATE CASCADE );
CREATE TABLE `messages_people` (
`type_of_recipient` enum('From','To','Cc') NOT NULL DEFAULT 'From',
`message_id` varchar(255) CHARACTER SET utf8 NOT NULL,
`email_address` varchar(255) CHARACTER SET utf8 NOT NULL,
PRIMARY KEY (`type_of_recipient`,`message_id`,`email_address`),
KEY `m_id` (`message_id`),
KEY `message_id` (`message_id`),
KEY `email_address` (`email_address`),
CONSTRAINT `messages_people_ibfk_1` FOREIGN KEY (`message_id`) REFERENCES `messages` (`message_ID`) ON DELETE CASCADE ON UPDATE CASCADE,
CONSTRAINT `messages_people_ibfk_2` FOREIGN KEY (`email_address`) REFERENCES `people` (`email_address`) ON DELETE CASCADE ON UPDATE CASCADE
);
CREATE TABLE `people` ( `email_address` varchar(255) CHARACTER SET utf8 NOT NULL, `name` varchar(255) CHARACTER SET utf8 DEFAULT NULL, `username` varchar(255) CHARACTER SET utf8 DEFAULT NULL, `domain_name` varchar(255) CHARACTER SET utf8 DEFAULT NULL, `top_level_domain` varchar(255) CHARACTER SET utf8 DEFAULT NULL, PRIMARY KEY (`email_address`) );