import pandas
import pygrametl
import psycopg2
from pygrametl.tables import SlowlyChangingDimension,CachedDimension,BulkDimension
from pygrametl.datasources import CSVSource
##Connection to PostGres
connection = psycopg2.connect(host="localhost",database="postgres", user="postgres",
password="tekihcan")
connect = pygrametl.ConnectionWrapper(connection)
def pgcopybulkloader(name, atts, fieldsep, rowsep, nullval, filehandle):
# Here we use driver-specific code to get fast bulk loading.
# You can change this method if you use another driver or you can
# use the FactTable or BatchFactTable classes (which don't require
# use of driver-specifc code) instead of the BulkFactTable class.
global connection
curs = connect.cursor()
try:
curs.copy_from(file=filehandle, table=name, sep=fieldsep,
columns=atts,null='null')
except(Exception, psycopg2.Database) as error:
print("Error %s" % error)
date_dim = BulkDimension(name='date_dim',key='d_date_sk',attributes=[
'd_date_id (B)'
,'d_date'
,'d_month_seq'
,'d_week_seq'
,'d_quarter_seq'
,'d_year'
,'d_dow'
,'d_moy'
,'d_dom'
,'d_qoy'
,'d_fy_year'
,'d_fy_quarter_seq'
,'d_fy_week_seq'
,'d_day_name'
,'d_quarter_name'
,'d_holiday'
,'d_weekend'
,'d_following_holiday'
,'d_first_dom'
,'d_last_dom'
,'d_same_day_ly'
,'d_same_day_lq'
,'d_current_day'
,'d_current_week'
,'d_current_month'
,'d_current_quarter'
,'d_current_year'
],lookupatts = ['d_date_id (B)'],
bulkloader = pgcopybulkloader)
date_dim_source = CSVSource(open('C:/Users/HP\Documents/v2.13.0rc1/data/date_dim.csv',
'r', 16384),delimiter='|')
def main():
for row in date_dim_source:
date_dim.insert(row)
代码因错误而失败 -
据我了解,错误是由于目标表为空而引起的。CSV 源也没有标题。这会影响代码吗?请找到用于开发代码的链接 - https://chrthomsen.github.io/pygrametl/