In the special case where you have a columnar MultiIndex, but a simple index, you can transpose the DataFrame and use index_label
and index_col
as follows:
import numpy as np
import pandas as pd
cols = pd.MultiIndex.from_arrays([["foo", "foo", "bar", "bar"],
["a", "b", "c", "d"]])
df = pd.DataFrame(np.random.randn(5, 4), index=range(5), columns=cols)
(df.T).to_csv('/tmp/df.csv', index_label=['first','second'])
df_new = pd.read_csv('/tmp/df.csv', index_col=['first','second']).T
assert np.all(df.columns.values == df_new.columns.values)
But unfortunately this begs the question what to do if both the index and columns are MultiIndexes?
Here is one hacky workaround:
import numpy as np
import pandas as pd
import ast
cols = pd.MultiIndex.from_arrays([["foo", "foo", "bar", "bar"],
["a", "b", "c", "d"]])
df = pd.DataFrame(np.random.randn(5, 4), index=range(5), columns=cols)
print(df)
df.to_csv('/tmp/df.csv', index_label='index')
df_new = pd.read_csv('/tmp/df.csv', index_col='index')
columns = pd.MultiIndex.from_tuples([ast.literal_eval(item) for item in df_new.columns])
df_new.columns = columns
df_new.index.name = None
print(df_new)
assert np.all(df.columns.values == df_new.columns.values)
Of course, if you just want to store the DataFrame in a file of any format, then df.save
and pd.load
provide a more pleasant solution:
import numpy as np
import pandas as pd
cols = pd.MultiIndex.from_arrays([["foo", "foo", "bar", "bar"],
["a", "b", "c", "d"]])
df = pd.DataFrame(np.random.randn(5, 4), index=range(5), columns=cols)
df.save('/tmp/df.df')
df_new = pd.load('/tmp/df.df')
assert np.all(df.columns.values == df_new.columns.values)