I played a bit with database.
Here is something that looks like Wikipedia database schema but
compiles in Posgres (and no, Wikipedia script won't be able to use
that yet):
CREATE TABLE user_table (
user_id serial NOT NULL PRIMARY KEY,
user_name varchar(255) NOT NULL default '',
user_rights varchar(255) NOT NULL default '',
user_password varchar(255) NOT NULL default '',
user_newpassword varchar(255) NOT NULL default '',
user_email varchar(255) NOT NULL default '',
user_options varchar(65535) NOT NULL default '',
user_newtalk smallint NOT NULL default '1'
);
CREATE TABLE cur (
cur_id serial NOT NULL PRIMARY KEY,
cur_namespace smallint NOT NULL default '0',
cur_title varchar(255) NOT NULL default '',
cur_text text NOT NULL default '',
cur_comment varchar(255) NOT NULL default '',
cur_user int NOT NULL default '0',
cur_user_text varchar(255) NOT NULL default '',
cur_timestamp char(14) NOT NULL default '',
cur_restrictions varchar(255) NOT NULL default '',
cur_counter bigint NOT NULL default '0',
cur_ind_title varchar(255) NOT NULL default '',
cur_ind_text text NOT NULL default '',
cur_is_redirect boolean NOT NULL default 'f',
cur_minor_edit boolean NOT NULL default 'f',
cur_is_new boolean NOT NULL default 'f'
);
CREATE TABLE old_table (
old_id serial NOT NULL PRIMARY KEY,
old_namespace smallint NOT NULL default '0',
old_title varchar(255) NOT NULL default '',
old_text text NOT NULL default '',
old_comment varchar(255) NOT NULL default '',
old_user int NOT NULL default '0',
old_user_text varchar(255) NOT NULL,
old_timestamp char(14) NOT NULL default '',
old_minor_edit boolean NOT NULL default 'f',
old_flags varchar(255) NOT NULL default ''
);
CREATE TABLE archive (
ar_namespace smallint NOT NULL default '0',
ar_title varchar(255) NOT NULL default '',
ar_text text NOT NULL default '',
ar_comment varchar(255) NOT NULL default '',
ar_user int NOT NULL default '0',
ar_user_text varchar(255) NOT NULL,
ar_timestamp char(14) NOT NULL default '',
ar_minor_edit boolean NOT NULL default 'f',
ar_flags varchar(255) NOT NULL default ''
);
CREATE TABLE links (
l_from varchar(255) NOT NULL default '',
l_to int NOT NULL default '0'
);
CREATE TABLE brokenlinks (
bl_from int NOT NULL default '0',
bl_to varchar(255) NOT NULL default ''
);
CREATE TABLE imagelinks (
il_from varchar(255) NOT NULL default '',
il_to varchar(255) NOT NULL default ''
);
CREATE TABLE site_stats (
ss_row_id int NOT NULL PRIMARY KEY,
ss_total_views bigint default '0',
ss_total_edits bigint default '0',
ss_good_articles bigint default '0'
);
CREATE TABLE ipblocks (
ipb_address varchar(40) NOT NULL default '',
ipb_user int NOT NULL default '0',
ipb_by int NOT NULL default '0',
ipb_reason varchar(255) NOT NULL default '',
ipb_timestamp char(14) NOT NULL default ''
);
CREATE TABLE image (
img_name varchar(255) NOT NULL default '',
img_size int NOT NULL default '0',
img_description varchar(255) NOT NULL default '',
img_user int NOT NULL default '0',
img_user_text varchar(255) NOT NULL default '',
img_timestamp char(14) NOT NULL default ''
);
CREATE TABLE oldimage (
oi_name varchar(255) NOT NULL default '',
oi_archive_name varchar(255) NOT NULL default '',
oi_size int NOT NULL default 0,
oi_description varchar(255) NOT NULL default '',
oi_user int NOT NULL default '0',
oi_user_text varchar(255) NOT NULL default '',
oi_timestamp char(14) NOT NULL default ''
);
CREATE TABLE random (
ra_current smallint NOT NULL default 0,
ra_title varchar(255) NOT NULL default ''
);
CREATE TABLE recentchanges (
rc_timestamp varchar(14) NOT NULL default '',
rc_cur_time varchar(14) NOT NULL default '',
rc_user int NOT NULL default '0',
rc_user_text varchar(255) NOT NULL default '',
rc_namespace smallint NOT NULL default '0',
rc_title varchar(255) NOT NULL default '',
rc_comment varchar(255) NOT NULL default '',
rc_minor boolean NOT NULL default 'f',
rc_bot boolean NOT NULL default 'f',
rc_new boolean NOT NULL default 'f',
rc_cur_id int NOT NULL default '0',
rc_this_oldid int NOT NULL default '0',
rc_last_oldid int NOT NULL default '0'
);
CREATE TABLE watchlist (
wl_user int NOT NULL,
wl_namespace smallint NOT NULL default '0',
wl_title varchar(255) NOT NULL default ''
);
CREATE TABLE math (
math_inputhash char(16) NOT NULL PRIMARY KEY,
math_outputhash char(16) NOT NULL,
math_conservative bool NOT NULL,
math_html text NOT NULL
);
Of course, this isn't "real" code, and many changes will be necessary
to make it any useful. Anyway:
* user and old are reserved keywords in Postgres
* why are we using tinyblob(1) instead of bool for boolean data ?
* why are we creating so many indexes ? are all of them necessary ?
* shouldn't we use database-provided timestamps instead of varchar(14) ?
* rights and restrictions don't seem to be represented very efficiently.
using integer flag for that seems like a better idea to me
* it might be good idea to consider making our own searching system
instead of relying on something provided by database
Here is something that looks like Wikipedia database schema but
compiles in Posgres (and no, Wikipedia script won't be able to use
that yet):
CREATE TABLE user_table (
user_id serial NOT NULL PRIMARY KEY,
user_name varchar(255) NOT NULL default '',
user_rights varchar(255) NOT NULL default '',
user_password varchar(255) NOT NULL default '',
user_newpassword varchar(255) NOT NULL default '',
user_email varchar(255) NOT NULL default '',
user_options varchar(65535) NOT NULL default '',
user_newtalk smallint NOT NULL default '1'
);
CREATE TABLE cur (
cur_id serial NOT NULL PRIMARY KEY,
cur_namespace smallint NOT NULL default '0',
cur_title varchar(255) NOT NULL default '',
cur_text text NOT NULL default '',
cur_comment varchar(255) NOT NULL default '',
cur_user int NOT NULL default '0',
cur_user_text varchar(255) NOT NULL default '',
cur_timestamp char(14) NOT NULL default '',
cur_restrictions varchar(255) NOT NULL default '',
cur_counter bigint NOT NULL default '0',
cur_ind_title varchar(255) NOT NULL default '',
cur_ind_text text NOT NULL default '',
cur_is_redirect boolean NOT NULL default 'f',
cur_minor_edit boolean NOT NULL default 'f',
cur_is_new boolean NOT NULL default 'f'
);
CREATE TABLE old_table (
old_id serial NOT NULL PRIMARY KEY,
old_namespace smallint NOT NULL default '0',
old_title varchar(255) NOT NULL default '',
old_text text NOT NULL default '',
old_comment varchar(255) NOT NULL default '',
old_user int NOT NULL default '0',
old_user_text varchar(255) NOT NULL,
old_timestamp char(14) NOT NULL default '',
old_minor_edit boolean NOT NULL default 'f',
old_flags varchar(255) NOT NULL default ''
);
CREATE TABLE archive (
ar_namespace smallint NOT NULL default '0',
ar_title varchar(255) NOT NULL default '',
ar_text text NOT NULL default '',
ar_comment varchar(255) NOT NULL default '',
ar_user int NOT NULL default '0',
ar_user_text varchar(255) NOT NULL,
ar_timestamp char(14) NOT NULL default '',
ar_minor_edit boolean NOT NULL default 'f',
ar_flags varchar(255) NOT NULL default ''
);
CREATE TABLE links (
l_from varchar(255) NOT NULL default '',
l_to int NOT NULL default '0'
);
CREATE TABLE brokenlinks (
bl_from int NOT NULL default '0',
bl_to varchar(255) NOT NULL default ''
);
CREATE TABLE imagelinks (
il_from varchar(255) NOT NULL default '',
il_to varchar(255) NOT NULL default ''
);
CREATE TABLE site_stats (
ss_row_id int NOT NULL PRIMARY KEY,
ss_total_views bigint default '0',
ss_total_edits bigint default '0',
ss_good_articles bigint default '0'
);
CREATE TABLE ipblocks (
ipb_address varchar(40) NOT NULL default '',
ipb_user int NOT NULL default '0',
ipb_by int NOT NULL default '0',
ipb_reason varchar(255) NOT NULL default '',
ipb_timestamp char(14) NOT NULL default ''
);
CREATE TABLE image (
img_name varchar(255) NOT NULL default '',
img_size int NOT NULL default '0',
img_description varchar(255) NOT NULL default '',
img_user int NOT NULL default '0',
img_user_text varchar(255) NOT NULL default '',
img_timestamp char(14) NOT NULL default ''
);
CREATE TABLE oldimage (
oi_name varchar(255) NOT NULL default '',
oi_archive_name varchar(255) NOT NULL default '',
oi_size int NOT NULL default 0,
oi_description varchar(255) NOT NULL default '',
oi_user int NOT NULL default '0',
oi_user_text varchar(255) NOT NULL default '',
oi_timestamp char(14) NOT NULL default ''
);
CREATE TABLE random (
ra_current smallint NOT NULL default 0,
ra_title varchar(255) NOT NULL default ''
);
CREATE TABLE recentchanges (
rc_timestamp varchar(14) NOT NULL default '',
rc_cur_time varchar(14) NOT NULL default '',
rc_user int NOT NULL default '0',
rc_user_text varchar(255) NOT NULL default '',
rc_namespace smallint NOT NULL default '0',
rc_title varchar(255) NOT NULL default '',
rc_comment varchar(255) NOT NULL default '',
rc_minor boolean NOT NULL default 'f',
rc_bot boolean NOT NULL default 'f',
rc_new boolean NOT NULL default 'f',
rc_cur_id int NOT NULL default '0',
rc_this_oldid int NOT NULL default '0',
rc_last_oldid int NOT NULL default '0'
);
CREATE TABLE watchlist (
wl_user int NOT NULL,
wl_namespace smallint NOT NULL default '0',
wl_title varchar(255) NOT NULL default ''
);
CREATE TABLE math (
math_inputhash char(16) NOT NULL PRIMARY KEY,
math_outputhash char(16) NOT NULL,
math_conservative bool NOT NULL,
math_html text NOT NULL
);
Of course, this isn't "real" code, and many changes will be necessary
to make it any useful. Anyway:
* user and old are reserved keywords in Postgres
* why are we using tinyblob(1) instead of bool for boolean data ?
* why are we creating so many indexes ? are all of them necessary ?
* shouldn't we use database-provided timestamps instead of varchar(14) ?
* rights and restrictions don't seem to be represented very efficiently.
using integer flag for that seems like a better idea to me
* it might be good idea to consider making our own searching system
instead of relying on something provided by database