0

我有像下面的示例数据这样的数据。当我运行下面的代码以通过 prod cat 获取不同产品 ID 的计数时,我收到以下错误。带有 nunique 的 Pandas groupby 似乎没有任何问题。有谁看到问题是什么?产品 ID 似乎不是那么大的整数。

样本数据:

print data[['Product ID','prod cat']].head()



   Product ID prod cat
0     3488319        kew_31839
1     5250340         kew_6086
2     3500693        kew_30077
3     3500693        kew_30077
4     3500693        kew_30077

代码:

import pandasql
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())

prod_df=pysqldf("""select count(distinct([Product ID])) as Prod_Cnt
                          ,[prod cat]
                        from data
                        group by [prod cat]
                        order by
                        count(distinct([Product ID])) desc""")

prod_df.head()

错误:

ERROR:root:An unexpected error occurred while tokenizing input
The following traceback may be corrupted or invalid
The error message is: ('EOF in multi-line string', (1, 58))


---------------------------------------------------------------------------
OverflowError                             Traceback (most recent call last)
<ipython-input-31-c1f2ccaca168> in <module>()
      4                         group by [prod cat]
      5                         order by
----> 6                         count(distinct([Product ID])) desc""")
      7 
      8 prod_df.head()

<ipython-input-12-54596a728697> in <lambda>(q)
      1 import pandasql
      2 from pandasql import sqldf
----> 3 pysqldf = lambda q: sqldf(q, globals())

/Users/sname/anaconda2/lib/python2.7/site-packages/pandasql/sqldf.pyc in sqldf(query, env, db_uri)
    154     >>> sqldf("select avg(x) from df;", locals())
    155     """
--> 156     return PandaSQL(db_uri)(query, env)

/Users/sname/anaconda2/lib/python2.7/site-packages/pandasql/sqldf.pyc in __call__(self, query, env)
     56                     continue
     57                 self.loaded_tables.add(table_name)
---> 58                 write_table(env[table_name], table_name, conn)
     59 
     60             try:

/Users/sname/anaconda2/lib/python2.7/site-packages/pandasql/sqldf.pyc in write_table(df, tablename, conn)
    119                        message='The provided table name \'%s\' is not found exactly as such in the database' % tablename)
    120         to_sql(df, name=tablename, con=conn,
--> 121                index=not any(name is None for name in df.index.names))  # load index into db if all levels are named
    122 
    123 

/Users/sname/anaconda2/lib/python2.7/site-packages/pandas/io/sql.pyc in to_sql(frame, name, con, flavor, schema, if_exists, index, index_label, chunksize, dtype)
    469     pandas_sql.to_sql(frame, name, if_exists=if_exists, index=index,
    470                       index_label=index_label, schema=schema,
--> 471                       chunksize=chunksize, dtype=dtype)
    472 
    473 

/Users/sname/anaconda2/lib/python2.7/site-packages/pandas/io/sql.pyc in to_sql(self, frame, name, if_exists, index, index_label, schema, chunksize, dtype)
   1149                          schema=schema, dtype=dtype)
   1150         table.create()
-> 1151         table.insert(chunksize)
   1152         if (not name.isdigit() and not name.islower()):
   1153             # check for potentially case sensitivity issues (GH7815)

/Users/sname/anaconda2/lib/python2.7/site-packages/pandas/io/sql.pyc in insert(self, chunksize)
    664 
    665                 chunk_iter = zip(*[arr[start_i:end_i] for arr in data_list])
--> 666                 self._execute_insert(conn, keys, chunk_iter)
    667 
    668     def _query_iterator(self, result, chunksize, columns, coerce_float=True,

/Users/sname/anaconda2/lib/python2.7/site-packages/pandas/io/sql.pyc in _execute_insert(self, conn, keys, data_iter)
    639     def _execute_insert(self, conn, keys, data_iter):
    640         data = [dict((k, v) for k, v in zip(keys, row)) for row in data_iter]
--> 641         conn.execute(self.insert_statement(), data)
    642 
    643     def insert(self, chunksize=None):

/Users/sname/anaconda2/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc in execute(self, object, *multiparams, **params)
    943             raise exc.ObjectNotExecutableError(object)
    944         else:
--> 945             return meth(self, multiparams, params)
    946 
    947     def _execute_function(self, func, multiparams, params):

/Users/sname/anaconda2/lib/python2.7/site-packages/sqlalchemy/sql/elements.pyc in _execute_on_connection(self, connection, multiparams, params)
    261     def _execute_on_connection(self, connection, multiparams, params):
    262         if self.supports_execution:
--> 263             return connection._execute_clauseelement(self, multiparams, params)
    264         else:
    265             raise exc.ObjectNotExecutableError(self)

/Users/sname/anaconda2/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc in _execute_clauseelement(self, elem, multiparams, params)
   1051             compiled_sql,
   1052             distilled_params,
-> 1053             compiled_sql, distilled_params
   1054         )
   1055         if self._has_events or self.engine._has_events:

/Users/sname/anaconda2/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1187                 parameters,
   1188                 cursor,
-> 1189                 context)
   1190 
   1191         if self._has_events or self.engine._has_events:

/Users/sname/anaconda2/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc in _handle_dbapi_exception(self, e, statement, parameters, cursor, context)
   1403                 )
   1404             else:
-> 1405                 util.reraise(*exc_info)
   1406 
   1407         finally:

/Users/sname/anaconda2/lib/python2.7/site-packages/sqlalchemy/engine/base.pyc in _execute_context(self, dialect, constructor, statement, parameters, *args)
   1157                         statement,
   1158                         parameters,
-> 1159                         context)
   1160             elif not parameters and context.no_parameters:
   1161                 if self.dialect._has_events:

/Users/sname/anaconda2/lib/python2.7/site-packages/sqlalchemy/engine/default.pyc in do_executemany(self, cursor, statement, parameters, context)
    465 
    466     def do_executemany(self, cursor, statement, parameters, context=None):
--> 467         cursor.executemany(statement, parameters)
    468 
    469     def do_execute(self, cursor, statement, parameters, context=None):

OverflowError: Python int too large to convert to SQLite INTEGER
4

0 回答 0