我尝试了以下代码
import pandas as pd
import os
from torch.utils.data import Dataset
from torch.utils.data import Dataset, DataLoader
class ArticleDataset(Dataset):
def __init__(self):
# super().__init__()
df= pd.read_csv('https://drive.google.com/uc?export?format=csv&=download&id=*********', sep = "delimiter", index_col=0, header=None, engine='python')
self.articles_list=df
def __len__(self):
return len(self.articles_list)
def __getitem__(self, item):
return self.articles_list.index[item]
执行df.head()时,数据框有数据,打印数据。但它给出了错误
TypeError Traceback (most recent call last)
<ipython-input-74-08e914e52560> in <module>()
1 dataset =ArticleDataset()
----> 2 article_loader=DataLoader(dataset, batch_size=1, shuffle=True)
2 frames
/usr/local/lib/python3.6/dist-packages/torch/utils/data/sampler.py in num_samples(self)
98 # dataset size might change at runtime
99 if self._num_samples is None:
--> 100 return len(self.data_source)
101 return self._num_samples
102
TypeError: object of type 'ArticleDataset' has no len()
什么时候
dataset =ArticleDataset()
article_loader=DataLoader(dataset, batch_size=1, shuffle=True)
被执行。
但是根据在这里找到的正确答案, len(DataFrame.index) 给了我预期的不。数据框中的行数。并且 df.index 1打印数据帧的第一行,但无法找出问题所在。
我也尝试将数据框转为列表,但给出了相同的错误。代码中可能有一些我没有看到的错误,而且我是 python 新手。任何形式的帮助表示赞赏。
编辑(取消缩进一次后):
import pandas as pd
import os
from torch.utils.data import Dataset
from torch.utils.data import Dataset, DataLoader
class ArticleDataset(Dataset):
def __init__(self):
# super().__init__()
df= pd.read_csv('https://drive.google.com/uc?export?format=csv&=download&id=*******', sep = "delimiter", index_col=0, header=None, engine='python')
self.articles_list=df
def __len__(self):
return len(self.articles_list)
def __getitem__(self, item):
return self.articles_list.index[item]
这将给出以下错误
File "<ipython-input-104-165bbc37826a>", line 13
def __len__(self):
^
IndentationError: unindent does not match any outer indentation level