I've write a Python script to insert some data(300 millions) to a MySQL table:
#!/usr/bin/python
import os
import MySQLdb
from multiprocessing import Pool
class DB(object):
def __init__(self):
self.conn = MySQLdb.connect(host='localhost',user='root',passwd='xxx',db='xdd',port=3306)
self.cur = self.conn.cursor()
def insert(self, arr):
self.cur.execute('insert into RAW_DATA values(null,%s,%s,%s,%s,%s,%s,%s)', arr)
def close(self):
self.conn.commit()
self.cur.close()
self.conn.close()
def Import(fname):
db = DB()
print 'importing ', fname
with open('data/'+fname, 'r') as f:
for line in f:
arr = line.split()
db.insert(arr)
db.close()
if __name__ == '__main__':
# 800+ files
files = [d for d in os.listdir('data') if d[-3:]=='txt']
pool = Pool(processes = 10)
pool.map(Import, files)
The problem is, the script runs very very slow, is there any obvious wrong using of multiprocessing ?