python - 如何轻松地从 python 生成报告，包括 matplotlib 数字和熊猫表

Question

我有一个计算性能报告的代码（包括文本、pandas 表和 matplotlib 数字）。我正在寻找将此报告导出到文件（html/pdf 等）。

我曾尝试使用pweave，但无法使其正常工作（网站中的示例有效，但是当我尝试在具有类和函数的代码中使用它时，它似乎不起作用）。此外，pweave 似乎不支持 pandas 表。

我对降价包不熟悉，但这可能是mpld3的关键。有人可以帮我举个例子吗？

谢谢，哈南。

score 0 · Accepted Answer

迟到的回答你的问题：

Pweave 运行良好，即使是您自己的类。只要确保你的类文件的目录在 python 的导入路径中。一种方法是在 python 块中添加目录，如下所示：

```python, echo=False
import sys
sys.path.append('/path/to/your/python/files/')
import myfile
myfile.myclass...
```

Pweave 可以输出为 Markdown 格式（选项-f pandoc），然后您可以使用pandoc将其处理为 HTML 或通过 LaTeX 处理为 PDF。

至于 pandas 表：通过tabulate包将它们在 python 块中转换为 nmarkdown 。

score 0 · Accepted Answer

来自http://buddapie.com/2015/10/31/report-with-matplotlib-tables-and-plots/

生成完整报告的代码如下。对于表格，我编写了一个算法来将长列分成多个较短的列。我正在使用 Seaborn 库，因为我真的很喜欢这种格式，而且设置起来非常容易。

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.table as tbl
import seaborn as sns

#range for floats
def frange(x, y, jump):
    while x<=y:
        yield x
        x += jump

#function to return the table, modifying if needed due to size etc
def get_table(ax, ts_list, list_column_labels, list_col_widths, list_col_formats):

    divisor = len(list_col_formats)
    max_length = 19

    #adjusts max length to a min of the length of the timeseries
    max_length = min(max_length, len(ts_list[1]))   

    #creates table array and labels vector
    l = len(ts_list[1])
    l1 = divisor*(l//max_length+1)
    table_array_temp = [get_section(ts_list, i, max_length, l, list_col_formats, divisor) for i in range(l1)]
    table_array = np.array(table_array_temp[0])

    #to fit column width to each column
    table_widths = np.array([list_col_widths[i%divisor] for i in range(l1)])

    for i in range(1,l1):
        #if temp array size doesn't match (it can happen at last rows) adds 0s 
        if len(table_array_temp[i])<max_length:
            table_array_temp[i] = np.concatenate((table_array_temp[i], np.zeros(max_length-len(table_array_temp[i]))))
        table_array = np.vstack((table_array, table_array_temp[i]))

    table_labels = np.array([list_column_labels[i%divisor] for i in range((divisor*(l//max_length+1)))])     

    the_table = ax.table(cellText=np.transpose(table_array), colWidths=table_widths, colLabels=table_labels, loc='center')
    the_table.auto_set_font_size(False)
    the_table.set_fontsize(6)

    table_props = the_table.properties()
    table_cells = table_props['child_artists']
    for cell in table_cells:
        cell.set_height(0.05)

    return the_table

#formats number
def format(j, format):
    return locale.format(format, j, grouping=True)

#util funciton for dividing values in array
def get_section(list_ts, i, max_length, l, col_formats, d):
    k = i//d
    return [format(j, col_formats[i%d]) for j in list_ts[i%d][k*max_length:min((k+1)*max_length, l)]]

#function to write report
def write_report(list_plots):

    #sets the grid shape
    #tuple for shape of figure
    shape = (3,2)
    fig, axes = plt.subplots(nrows=shape[0], ncols=shape[1], figsize=(8, 10))

    #lists for changing parameters
    list_labels = [['periods', 'cash flows (m)', 'balance (m)'], ['periods', 'cashflows'], ['periods', 'cash flows']]
    list_titles = ['Simulation 1', 'Simulation 2', 'Simulation 3']

    #where we position the plots and the tables
    axes_plots = [0,2,4]
    axes_tables = [1,3,5]

    #line graphs
    fontsize = 5
    labelsize = 4
    for i in range(len(list_plots)):
        shape = np.array(list_plots[i]).shape
        if len(shape)==2:
            first_ts = list_plots[i][0]
        else:
            first_ts = list_plots[i]
        ax1 = axes.flat[axes_plots[i]]
        ax1.plot(first_ts, c='blue', linewidth=1)
        ax1.set_xlabel(list_labels[i][0], fontsize=fontsize)
        ax1.set_ylabel(list_labels[i][1], fontsize=fontsize)
        ax1.yaxis.label.set_color('blue')
        ax1.tick_params(axis='both', which='major', labelsize=labelsize)
        ax1.set_title(list_titles[i] , fontsize=7)  

        #if the other axis is relevant, builds it
        if len(shape)==2:
            _test = list_plots[i][1]
            ax2 = ax1.twinx()
            ax2.plot(list_plots[i][1], c='red', linewidth=1)
            ax2.set_ylabel(list_labels[i][2], fontsize=fontsize)
            ax2.yaxis.label.set_color('red')
            ax2.tick_params(axis='both', which='major', labelsize=labelsize)

    col_widths = [[0.1, 0.095, 0.15], [0.125, 0.11, 0.15], [0.125, 0.13]]
    col_formats = [['%.d', '%.2f', '%.2f'], ['%.d', '%.2f'], ['%.d', '%.2f']]
    col_labels = [['period', 'cf', 'balance'], ['period (y)', 'cf'], ['period (y)', 'cf']]

    #asset simulation tables
    for i in range(len(list_plots)):
        shape = np.array(list_plots[i]).shape
        ax1 = axes.flat[axes_tables[i]]
        #hides the axis etc
        ax1.set_frame_on(False)
        ax1.get_xaxis().set_visible(False)
        ax1.get_yaxis().set_visible(False)

        if len(shape)==2:
            periods = [k for k in range(len(list_plots[i][0]))]
            t_array = np.array([l for l in [periods, list_plots[i][0], list_plots[i][1]] if l!=None])
        else:
             periods = [k for k in range(len(list_plots[i]))]
             t_array = np.array([l for l in [periods, list_plots[i]] if l!=None])

        #gets the table
        the_table = get_table(ax1, t_array, col_labels[i], col_widths[i], col_formats[i])
        ax1.add_table(the_table)
        ax1.set_title(list_titles[i], fontsize=7)        

    plt.tight_layout()
    plt.savefig('TestReport', dpi=400)

if __name__=='__main__':

    #example timeseries
    ts1 = [np.arange(50), np.array([np.power(100, -i*0.01) for i in range(50)])]
    ts2 = np.arange(25)
    ts3 = [np.power(i, 3) for i in frange(3, 200, 3.5)]

    write_report([ts1, ts2, ts3])

python - 如何轻松地从 python 生成报告，包括 matplotlib 数字和熊猫表

2 回答 2

Related

Reference