0

我正在尝试使用熊猫从 excel 文件中读取特定列。当我尝试将这些记录添加到 Solr 时,得到了 TypeError。

我正在使用的代码 -

from __future__ import print_function
import pysolr
import pandas as pd

df_my = pd.read_excel('C:\\Users\\shantanu.nandan\\Desktop\\sample.xlsx',encoding='utf-8')
df_selected = df_my.loc[:,['column_1','column_2','column_3','column_4']]
for index,row in df_selected.iterrows():
    row['column_2']  = row['column_2'].replace('\n','')

for index,row in df_selected.iterrows():
    row['column_2']  = row['column_2'].replace('\r','')

for index,row in df_selected.iterrows():
    row['column_2']  = row['column_2'].replace('<br/>','')

df_dict = df_selected.to_dict('records')
solr = pysolr.Solr('http://localhost:8983/solr/My_Data')
solr.add(df_dict)

放置错误的堆栈跟踪 -

TypeError                                 Traceback (most recent call last)
<ipython-input-62-00ae8c4ee938> in <module>
----> 1 solr.add(df_dict)

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pysolr.py in add(self, docs, commit, boost, commitWithin, waitFlush, waitSearcher)
    747         end_time = time.time()
    748         self.log.debug("Built add request of %s docs in %0.2f seconds.", len(docs), end_time - start_time)
--> 749         return self._update(m, commit=commit, waitFlush=waitFlush, waitSearcher=waitSearcher)
    750 
    751     def delete(self, id=None, q=None, commit=True, waitFlush=None, waitSearcher=None):

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pysolr.py in _update(self, message, clean_ctrl_chars, commit, waitFlush, waitSearcher)
    357             message = sanitize(message)
    358 
--> 359         return self._send_request('post', path, message, {'Content-type': 'text/xml; charset=utf-8'})
    360 
    361     def _extract_error(self, resp):

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pysolr.py in _send_request(self, method, path, body, headers, files)
    288 
    289         if int(resp.status_code) != 200:
--> 290             error_message = self._extract_error(resp)
    291             self.log.error(error_message, extra={'data': {'headers': resp.headers,
    292                                                           'response': resp.content}})

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pysolr.py in _extract_error(self, resp)
    367 
    368         if reason is None:
--> 369             reason, full_html = self._scrape_response(resp.headers, resp.content)
    370 
    371         msg = "[Reason: %s]" % reason

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pysolr.py in _scrape_response(self, headers, response)
    430                 full_html = "%s" % response
    431 
--> 432         full_html = full_html.replace('\n', '')
    433         full_html = full_html.replace('\r', '')
    434         full_html = full_html.replace('<br/>', '')

TypeError: a bytes-like object is required, not 'str'
4

0 回答 0