我有 3 个表,我试图在其中执行连接,并将结果数据插入到另一个表中。根据数据集,查询需要 15-30 分钟。我从中选择和加入的表每个至少有 25k 条记录,但很快就会增长到 500k+。
我尝试在字段上添加索引,但仍然没有太大帮助。还有其他我可以尝试的事情吗,或者这种规模的加入需要这么长时间吗?
这是我要执行的查询:
INSERT INTO audience.topitem
(runs_id, total_training_count, item, standard_index_value, significance, seed_count, nonseed_count, prod, model_type, level_1, level_2, level_3, level_4, level_5)
SELECT 5, seed_count + nonseed_count AS total_training_count,
ii.item, standard_index_value, NULL, seed_count, nonseed_count,
standard_index_value * seed_count AS prod, 'site', topic_L1, topic_L2, topic_L3, topic_L4, topic_L5
FROM audience.item_indexes ii
LEFT JOIN audience.usercounts uc ON ii.item = uc.item AND ii.runs_id = uc.runs_id
LEFT JOIN categorization.categorization at on ii.item = at.url
WHERE ii.runs_id = 5
表:audience.item_indexes
CREATE TABLE `item_indexes` (
`item` varchar(1024) DEFAULT NULL,
`standard_index_value` float DEFAULT NULL,
`runs_id` int(11) DEFAULT NULL,
`model_type` enum('site','term','combo') DEFAULT NULL,
KEY `item_idx` (`item`(333))
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
表:audience.usercounts
CREATE TABLE `usercounts` (
`item` varchar(1024) DEFAULT NULL,
`seed_count` int(11) DEFAULT NULL,
`nonseed_count` int(11) DEFAULT NULL,
`significance` float(19,6) DEFAULT NULL,
`runs_id` int(11) DEFAULT NULL,
`model_type` enum('site','term','combo') DEFAULT NULL,
KEY `item_idx` (`item`(333))
) ENGINE=MyISAM DEFAULT CHARSET=utf8;
表:audience.topitem
CREATE TABLE `topitem` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`total_training_count` int(11) DEFAULT NULL,
`item` varchar(1024) DEFAULT NULL,
`standard_index_value` float(19,6) DEFAULT NULL,
`significance` float(19,6) DEFAULT NULL,
`seed_count` int(11) DEFAULT NULL,
`nonseed_count` int(11) DEFAULT NULL,
`prod` float(19,6) DEFAULT NULL,
`cat_type` varchar(32) DEFAULT NULL,
`cat_level` int(11) DEFAULT NULL,
`conf` decimal(19,9) DEFAULT NULL,
`level_1` varchar(64) DEFAULT NULL,
`level_2` varchar(64) DEFAULT NULL,
`level_3` varchar(64) DEFAULT NULL,
`level_4` varchar(64) DEFAULT NULL,
`level_5` varchar(64) DEFAULT NULL,
`runs_id` int(11) DEFAULT NULL,
`model_type` enum('site','term','combo') DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=825 DEFAULT CHARSET=utf8;
表:categorization.categorization
CREATE TABLE `AT_categorization` (
`url` varchar(760) NOT NULL ,
`language` varchar(10) DEFAULT NULL,
`category` text,
`entity` text,
`source` varchar(255) DEFAULT NULL,
`topic_L1` varchar(45) NOT NULL DEFAULT '',
`topic_L2` varchar(45) NOT NULL DEFAULT '',
`topic_L3` varchar(45) NOT NULL DEFAULT '',
`topic_L4` varchar(45) NOT NULL DEFAULT '',
`topic_L5` varchar(45) NOT NULL DEFAULT '',
`last_refreshed` datetime DEFAULT NULL,
PRIMARY KEY (`url`,`topic_L1`,`topic_L2`,`topic_L3`,`topic_L4`,`topic_L5`),
UNIQUE KEY `inx_url` (`url`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;