0

我正在尝试将文件保存为 h5ad 格式,它给出了这个值错误;ValueError: '_index' 是数据框列的保留名称。

import pandas as pd
import scanpy as sc
import numpy as np
data = sc.read_h5ad('f.h5ad')
annotation = pd.read_csv('n.tsv', sep='\t')
annotation_dict = {item['barcodes']:item['celltype'] for item in annotation.to_dict('records')}
data.obs['barcodes'] = data.obs.index
data.obs['celltype'] = data.obs['barcodes'].map(annotation_dict)

sc.pp.filter_genes(data,min_cells=686)
sc.pp.filter_cells(data,min_genes=10)
sc.pp.normalize_per_cell(data,20000)
sc.pp.log1p(data)
sc.pp.highly_variable_genes(data,n_top_genes=1000)
data.X = np.exp(data.X.toarray())-1
data=data[:,data.var['highly_variable']]
sc.pp.normalize_per_cell(data,3800)

clustered = sc.read_h5ad('f.h5ad')
sc.pp.filter_cells(data,min_genes=10)
sc.pp.recipe_zheng17(clustered)

sc.tl.pca(clustered, n_comps=50)
sc.pp.neighbors(clustered, n_pcs=50)
sc.tl.louvain(clustered, resolution=0.15)
clustered.obs.groupby('louvain').count()
data.obs['louvain'] = list(clustered.obs['louvain'])
split = pd.DataFrame(data.obs['barcodes'])
test = split.sample(frac=0.2)
d_split = {item:'test' for item in test['barcodes']}
data.obs['split'] = data.obs['barcodes'].map(d_split).fillna('train')
data.write_h5ad(e.h5ad')
4

1 回答 1

0

这可能与 AnnData.raw对象的一个​​已知问题有关。两种解决方法(从这里):

#1
data.__dict__['_raw'].__dict__['_var'] = data.__dict__['_raw'].__dict__['_var'].rename(columns={'_index': 'features'})
 
#2, deleting the backed up raw information
del data.raw
于 2021-12-19T16:45:53.720 回答