我正在编写一个 python 脚本来解析用户代理字符串并将它们简化为“家庭”(即 chrome、firefox、safari 等)。
我有一个脚本在对 csv 文件运行时完全正常,但是当我对 .tsv 文件运行文件时,它给了我以下错误:
TypeError:预期的字符串或缓冲区
还有其他人遇到这个问题吗?示例代码如下。
import pandas as pd
import numpy as np
import glob as glob
from ua_parser import user_agent_parser as uaparser
#THIS WORKS FINE:
def parse_uagent():
ua_list = []
uadf = pd.DataFrame()
for datafile in glob.glob("*.csv"):
df = pd.read_csv(datafile, sep=',')
df = df[['user_agent','date_time','user_name']]
ua = df[df.columns[0]].values
for line in ua:
uagent = uaparser.ParseUserAgent(line)
ua_list.append(uagent)
uadf = uadf.append(ua_list)
print uadf
#THIS GIVES AN ERROR:
def parse_uagent():
ua_list = []
uadf = pd.DataFrame()
for datafile in glob.glob("*.tsv"):
df = pd.read_csv(datafile, sep='\t')
df = df[['user_agent','date_time','user_name']]
ua = df[df.columns[0]].values
for line in ua:
uagent = uaparser.ParseUserAgent(line)
ua_list.append(uagent)
uadf = uadf.append(ua_list)
print uadf
追溯:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-92-14c05dc8ee13> in <module>()
29
30
---> 31 parse_uagent()
32
<ipython-input-92-14c05dc8ee13> in parse_uagent()
19 ua = df[df.columns[0]].values
20 for line in ua:
---> 21 uagent = uaparser.ParseUserAgent(line)
22 ua_list.append(uagent)
23 uadf = uadf.append(ua_list)
/anaconda2/lib/python2.7/site-packages/ua_parser/user_agent_parser.pyc in ParseUserAgent(user_agent_string, **jsParseBits)
247 else:
248 for uaParser in USER_AGENT_PARSERS:
--> 249 family, v1, v2, v3 = uaParser.Parse(user_agent_string)
250 if family:
251 break
/anaconda2/lib/python2.7/site-packages/ua_parser/user_agent_parser.pyc in Parse(self, user_agent_string)
49 def Parse(self, user_agent_string):
50 family, v1, v2, v3 = None, None, None, None
---> 51 match = self.user_agent_re.search(user_agent_string)
52 if match:
53 if self.family_replacement:
TypeError: expected string or buffer