我正在尝试使用熊猫从 excel 文件中读取特定列。当我尝试将这些记录添加到 Solr 时,得到了 TypeError。
我正在使用的代码 -
from __future__ import print_function
import pysolr
import pandas as pd
df_my = pd.read_excel('C:\\Users\\shantanu.nandan\\Desktop\\sample.xlsx',encoding='utf-8')
df_selected = df_my.loc[:,['column_1','column_2','column_3','column_4']]
for index,row in df_selected.iterrows():
row['column_2'] = row['column_2'].replace('\n','')
for index,row in df_selected.iterrows():
row['column_2'] = row['column_2'].replace('\r','')
for index,row in df_selected.iterrows():
row['column_2'] = row['column_2'].replace('<br/>','')
df_dict = df_selected.to_dict('records')
solr = pysolr.Solr('http://localhost:8983/solr/My_Data')
solr.add(df_dict)
放置错误的堆栈跟踪 -
TypeError Traceback (most recent call last)
<ipython-input-62-00ae8c4ee938> in <module>
----> 1 solr.add(df_dict)
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pysolr.py in add(self, docs, commit, boost, commitWithin, waitFlush, waitSearcher)
747 end_time = time.time()
748 self.log.debug("Built add request of %s docs in %0.2f seconds.", len(docs), end_time - start_time)
--> 749 return self._update(m, commit=commit, waitFlush=waitFlush, waitSearcher=waitSearcher)
750
751 def delete(self, id=None, q=None, commit=True, waitFlush=None, waitSearcher=None):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pysolr.py in _update(self, message, clean_ctrl_chars, commit, waitFlush, waitSearcher)
357 message = sanitize(message)
358
--> 359 return self._send_request('post', path, message, {'Content-type': 'text/xml; charset=utf-8'})
360
361 def _extract_error(self, resp):
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pysolr.py in _send_request(self, method, path, body, headers, files)
288
289 if int(resp.status_code) != 200:
--> 290 error_message = self._extract_error(resp)
291 self.log.error(error_message, extra={'data': {'headers': resp.headers,
292 'response': resp.content}})
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pysolr.py in _extract_error(self, resp)
367
368 if reason is None:
--> 369 reason, full_html = self._scrape_response(resp.headers, resp.content)
370
371 msg = "[Reason: %s]" % reason
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pysolr.py in _scrape_response(self, headers, response)
430 full_html = "%s" % response
431
--> 432 full_html = full_html.replace('\n', '')
433 full_html = full_html.replace('\r', '')
434 full_html = full_html.replace('<br/>', '')
TypeError: a bytes-like object is required, not 'str'