我已经从数据库中创建了一个包含三列(id、作者、标题)的镶木地板文件,并且想要读取带有条件(标题='Learn Python')的镶木地板文件。下面提到的是我用于此 POC 的 python 代码。
import pyarrow as pa
import pyarrow.parquet as pq
import pandas as pd
import pyodbc
def write_to_parquet(df, out_path, compression='SNAPPY'):
arrow_table = pa.Table.from_pandas(df)
if compression == 'UNCOMPRESSED':
compression = None
pq.write_table(arrow_table, out_path, use_dictionary=False,
compression=compression)
def read_pyarrow(path, nthreads=1):
return pq.read_table(path, nthreads=nthreads).to_pandas()
path = './test.parquet'
sql = "SELECT * FROM [dbo].[Book] (NOLOCK)"
conn = pyodbc.connect(r'Driver={SQL
Server};Server=.;Database=APP_BBG_RECN;Trusted_Connection=yes;')
df = pd.io.sql.read_sql(sql, conn)
write_to_parquet(df, path)
df1 = read_pyarrow(path)
如何在 read_pyarrow 方法中添加条件(title='Learn Python')?