我有像下面的示例数据这样的数据。当我运行下面的代码以通过 prod cat 获取不同产品 ID 的计数时,我收到以下错误。带有 nunique 的 Pandas groupby 似乎没有任何问题。有谁看到问题是什么?产品 ID 似乎不是那么大的整数。
样本数据:
print data[['Product ID','prod cat']].head()
Product ID prod cat
0 3488319 kew_31839
1 5250340 kew_6086
2 3500693 kew_30077
3 3500693 kew_30077
4 3500693 kew_30077
代码:
import pandasql
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())
prod_df=pysqldf("""select count(distinct([Product ID])) as Prod_Cnt
,[prod cat]
from data
group by [prod cat]
order by
count(distinct([Product ID])) desc""")
prod_df.head()
错误:
ERROR:root:An unexpected error occurred while tokenizing input
The following traceback may be corrupted or invalid
The error message is: ('EOF in multi-line string', (1, 58))
---------------------------------------------------------------------------
OverflowError Traceback (most recent call last)
<ipython-input-31-c1f2ccaca168> in <module>()
4 group by [prod cat]
5 order by
----> 6 count(distinct([Product ID])) desc""")
7
8 prod_df.head()
<ipython-input-12-54596a728697> in <lambda>(q)
1 import pandasql
2 from pandasql import sqldf
----> 3 pysqldf = lambda q: sqldf(q, globals())
/Users/sname/anaconda2/lib/python2.7/site-packages/pandasql/sqldf.pyc in sqldf(query, env, db_uri)
154 >>> sqldf("select avg(x) from df;", locals())
155 """
--> 156 return PandaSQL(db_uri)(query, env)
/Users/sname/anaconda2/lib/python2.7/site-packages/pandasql/sqldf.pyc in __call__(self, query, env)
56 continue
57 self.loaded_tables.add(table_name)
---> 58 write_table(env[table_name], table_name, conn)
59
60 try:
/Users/sname/anaconda2/lib/python2.7/site-packages/pandasql/sqldf.pyc in write_table(df, tablename, conn)
119 message='The provided table name \'%s\' is not found exactly as such in the database' % tablename)
120 to_sql(df, name=tablename, con=conn,
--> 121 index=not any(name is None for name in df.index.names)) # load index into db if all levels are named
122
123
/Users/sname/anaconda2/lib/python2.7/site-packages/pandas/io/sql.pyc in to_sql(frame, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype)
469 pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index,
470 index_label=index_label, schema=schema,
--> 471 chunksize=chunksize, dtype=dtype)
472
473
/Users/sname/anaconda2/lib/python2.7/site-packages/pandas/io/sql.pyc in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype)
1149 schema=schema, dtype=dtype)
1150 table.create()
-> 1151 table.insert(chunksize)
1152 if (not name.isdigit() and not name.islower()):
1153 # check for potentially case sensitivity issues (GH7815)
/Users/sname/anaconda2/lib/python2.7/site-packages/pandas/io/sql.pyc in insert(self, chunksize)
664
665 chunk_iter = zip(*[arr[start_i:end_i] for arr in data_list])
--> 666 self._execute_insert(conn, keys, chunk_iter)
667
668 def _query_iterator(self, result, chunksize, columns, coerce_float=True,
/Users/sname/anaconda2/lib/python2.7/site-packages/pandas/io/sql.pyc in _execute_insert(self, conn, keys, data_iter)
639 def _execute_insert(self, conn, keys, data_iter):
640 data = [dict((k, v) for k, v in zip(keys, row)) for row in data_iter]
--> 641 conn.execute(self.insert_statement(), data)
642
643 def insert(self, chunksize=None):
/Users/sname/anaconda2/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc in execute(self, object, *multiparams, **params)
943 raise exc.ObjectNotExecutableError(object)
944 else:
--> 945 return meth(self, multiparams, params)
946
947 def _execute_function(self, func, multiparams, params):
/Users/sname/anaconda2/lib/python2.7/site-packages/sqlalchemy/sql/elements.pyc in _execute_on_connection(self, connection, multiparams, params)
261 def _execute_on_connection(self, connection, multiparams, params):
262 if self.supports_execution:
--> 263 return connection._execute_clauseelement(self, multiparams, params)
264 else:
265 raise exc.ObjectNotExecutableError(self)
/Users/sname/anaconda2/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc in _execute_clauseelement(self, elem, multiparams, params)
1051 compiled_sql,
1052 distilled_params,
-> 1053 compiled_sql, distilled_params
1054 )
1055 if self._has_events or self.engine._has_events:
/Users/sname/anaconda2/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc in _execute_context(self, dialect, constructor, statement, parameters, *args)
1187 parameters,
1188 cursor,
-> 1189 context)
1190
1191 if self._has_events or self.engine._has_events:
/Users/sname/anaconda2/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
1403 )
1404 else:
-> 1405 util.reraise(*exc_info)
1406
1407 finally:
/Users/sname/anaconda2/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc in _execute_context(self, dialect, constructor, statement, parameters, *args)
1157 statement,
1158 parameters,
-> 1159 context)
1160 elif not parameters and context.no_parameters:
1161 if self.dialect._has_events:
/Users/sname/anaconda2/lib/python2.7/site-packages/sqlalchemy/engine/default.pyc in do_executemany(self, cursor, statement, parameters, context)
465
466 def do_executemany(self, cursor, statement, parameters, context=None):
--> 467 cursor.executemany(statement, parameters)
468
469 def do_execute(self, cursor, statement, parameters, context=None):
OverflowError: Python int too large to convert to SQLite INTEGER