我在 MySQL 数据仓库数据库中有大约 30 个表用于分析数据。目前它大约有 200 万条数据行,但我相信未来它很快就会变成数十亿。挑战是,查询应该以快速的方式返回数据。我遵循简单的查询,处理这 200 万行需要 60 多秒:
SELECT count(distinct fact.dim_pageview_id)
FROM `datawarehouse_schema_alpha`.`fact_master` fact
left join dim_visit visit on visit.dim_visit_id = fact.dim_visit_id
left join dim_datetime datetim on datetim.dim_datetime_id = fact.dim_datetime_id
where fact.dim_site_id = 552
解释查询结果:
1 SIMPLE fact ref fk_fact_bb_pageview_dim_bb_site,fk_fact_master_dim_site fk_fact_bb_pageview_dim_bb_site 5 const 17490 Using where
1 SIMPLE visit eq_ref PRIMARY PRIMARY 4 datawarehouse_schema_alpha.fact.dim_visit_id 1 Using index
1 SIMPLE datetim eq_ref PRIMARY PRIMARY 4 datawarehouse_schema_alpha.fact.dim_datetime_id 1 Using index
我遵循示例数据库结构:
--
-- Table structure for table `dim_datetime`
--
CREATE TABLE IF NOT EXISTS `dim_datetime` (
`dim_datetime_id` int(11) NOT NULL AUTO_INCREMENT,
`datetime_date` varchar(45) CHARACTER SET latin1 DEFAULT NULL,
`datetime_year` varchar(45) CHARACTER SET latin1 DEFAULT NULL,
`datetime_full` varchar(45) CHARACTER SET latin1 DEFAULT NULL,
PRIMARY KEY (`dim_datetime_id`)
) ENGINE=InnoDB DEFAULT CHARSET=big5 AUTO_INCREMENT=4568326 ;
-- --------------------------------------------------------
--
-- Table structure for table `dim_visit`
--
CREATE TABLE IF NOT EXISTS `dim_visit` (
`dim_visit_id` int(11) NOT NULL AUTO_INCREMENT,
`visit_start_time` datetime DEFAULT NULL,
`visit_end_time` datetime DEFAULT NULL,
`visit_duration` varchar(45) DEFAULT NULL,
PRIMARY KEY (`dim_visit_id`)
) ENGINE=InnoDB DEFAULT CHARSET=big5 AUTO_INCREMENT=1295102 ;
-- --------------------------------------------------------
--
-- Table structure for table `dim_site`
--
CREATE TABLE IF NOT EXISTS `dim_site` (
`dim_site_id` int(11) NOT NULL AUTO_INCREMENT,
`site_name` varchar(255) CHARACTER SET latin1 DEFAULT NULL,
`site_url` text CHARACTER SET latin1,
`site_key` text CHARACTER SET latin1,
PRIMARY KEY (`dim_site_id`)
) ENGINE=InnoDB DEFAULT CHARSET=big5 AUTO_INCREMENT=870 ;
--
-- Table structure for table `fact_master`
--
CREATE TABLE IF NOT EXISTS `fact_master` (
`fact_master_id` int(11) NOT NULL AUTO_INCREMENT,
`dim_pageview_id` int(11) DEFAULT NULL,
`dim_visit_id` int(11) DEFAULT NULL,
`dim_site_id` int(11) DEFAULT NULL,
`dim_datetime_id` int(11) DEFAULT NULL,
`master_ip` varchar(255) DEFAULT NULL,
`master_spent_time` varchar(255) DEFAULT NULL,
`master_datetime` datetime DEFAULT NULL,
PRIMARY KEY (`fact_master_id`),
KEY `fk_fact_bb_pageview_dim_bb_visit` (`dim_visit_id`),
KEY `fk_fact_bb_pageview_dim_bb_datetime` (`dim_datetime_id`),
KEY `fk_fact_bb_pageview_dim_bb_pageview` (`dim_pageview_id`),
KEY `fk_fact_master_dim_pageview` (`dim_pageview_id`),
KEY `fk_fact_master_dim_visit` (`dim_visit_id`),
KEY `fk_fact_master_dim_datetime` (`dim_datetime_id`),
KEY `fk_fact_master_dim_site` (`dim_site_id`),
) ENGINE=InnoDB DEFAULT CHARSET=big5 AUTO_INCREMENT=1 ;
--
-- Constraints for dumped tables
--
--
-- Constraints for table `fact_master`
--
ALTER TABLE `fact_master`
ADD CONSTRAINT `fk_fact_master_dim_datetime` FOREIGN KEY (`dim_datetime_id`) REFERENCES `dim_datetime` (`dim_datetime_id`) ON DELETE NO ACTION ON UPDATE NO ACTION,
ADD CONSTRAINT `fk_fact_master_dim_pageview` FOREIGN KEY (`dim_pageview_id`) REFERENCES `dim_pageview` (`dim_pageview_id`) ON DELETE NO ACTION ON UPDATE NO ACTION,
ADD CONSTRAINT `fk_fact_master_dim_site` FOREIGN KEY (`dim_site_id`) REFERENCES `dim_site` (`dim_site_id`) ON DELETE NO ACTION ON UPDATE NO ACTION,
ADD CONSTRAINT `fk_fact_master_dim_visit` FOREIGN KEY (`dim_visit_id`) REFERENCES `dim_visit` (`dim_visit_id`) ON DELETE NO ACTION ON UPDATE NO ACTION;
请让我知道可能有什么问题?如何使用索引和视图以非常快速的方式获取数据?除了使用索引或视图来优化速度之外的任何其他建议也是受欢迎的。谢谢!