I have requirement to extract data from SQL Server and create a .csv
file from numerous tables. So I created a python script to do this activity which uses pyodbc/turbodbc connection with SQL Server ODBC Drivers. It works fine sometimes however it disconnects when it finds large table (over 11M) and performance wise it is very slow. I tried freeTDS, but looks the same as pyodbc interns of performance.
This is my connection:
pyodbc.connect(Driver='/opt/microsoft/msodbcsql17/lib64/libmsodbcsql-17.5.so.2.1',server=systemname,UID=user_name,PWD=pwd)
def connect_to_SQL_Server(logins):
'''Connects to SQL Server.
Returns connection object or None
'''
con = None
try:
hostname = logins['hostname']
username = logins['sql_username']
password = logins['snow_password']
#con = turbodbc.connect(Driver='/usr/lib64/libtdsodbc.so',server=hostname,UID=username,PWD=password,TDS_Version=8.0)
#con = turbodbc.connect(Driver='/usr/lib64/libtdsodbc.so',server=hostname,UID=username,PWD=password,TDS_Version=8.0)
#con = pyodbc.connect(Driver='/usr/lib64/libtdsodbc.so',server=hostname,UID=username,PWD=password,TDS_Version=8.0,Trace='Yes',ForceTrace='Yes',TraceFile='/maxbill_mvp_data/all_data/sql.log')
con = pyodbc.connect(Driver='/opt/microsoft/msodbcsql17/lib64/libmsodbcsql-17.5.so.2.1',server=hostname,UID=username,PWD=password)
#con = turbodbc.connect(Driver='/opt/microsoft/msodbcsql17/lib64/libmsodbcsql-17.5.so.2.1',server=hostname,UID=username,PWD=password)
#con = pyodbc.connect(DSN='MSSQLDEV',server=hostname,UID=username,PWD=password)
return con
except (pyodbc.ProgrammingError, Exception) as error:
logging.critical(error)
sqlCon = connect_to_SQL_Server(logins)
sql = 'select * from table'
i = 0
for partial_df in(pd.read_sql(sql, sqlCon, chunksize=300000)):
#chunk.to_csv(f+'_'+str(i)+'.csv',index = False,header = False,sep = ',',mode = 'a+')
partial_df.to_csv(filenamewithpath + '_'+str(i)+'.csv.gz', compression='gzip', index=False, sep='\01', header= False, mode='a+')
i+=1
Are there any parameters I can try with for performance improvement. Just to let you know these python scripts running from different server than SQL Server hosted server and which is Linux cloud instance