0

我正在尝试将我的 python 脚本转换为 EXE。

该脚本的作用是对 excel 文件进行基本分析,并生成 pdf 格式的报告。

同样在脚本中,我创建了一个 png 文件,然后通过脚本将其重新加载到 pdf 中。

我正在尝试将 py 文件转换为 EXE,但它不起作用:(

脚本(作为 py 文件很好用):

import pandas as pd
import os
from pandasql import sqldf
from datetime import datetime
import numpy as nu
from tkinter import *
import tkinter as tk
from fpdf import FPDF
import matplotlib.pyplot as plt


def start_gui(root):
    myLabel = Label(root, text='Hi! Here you can output the sessions report').grid(row=0, column=0)
    start_button = Button(root, text='Produce Report', padx=30, pady=20, command=main, fg='blue').grid(row=50, column=0)
    root.mainloop()
    pass


def print_full_results(df):
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    print(df)
    pd.reset_option('display.max_rows')
    pd.reset_option('display.max_columns')
    pass


def load_data(path):
    df = pd.read_csv(path)
    df = pd.DataFrame(df)
    return df


def clean_raw_data(raw_data):
    raw_data = raw_data.dropna(how='all')  # Drop the rows where all elements are missing.
    raw_data = raw_data.dropna(axis=1, how='all')  # Drop the columns where all elements are missing.
    raw_data = raw_data.reset_index()  # Reset the indexes after droping rows
    raw_data = raw_data.drop(columns=['index'])
    raw_data = raw_data.rename(
        columns={'Meeting ID': 'Meeting_ID', 'User Name': 'Admin_User_Name', 'Uzer Eam1l': 'Admin_Email',
                 'Has Zoom Rooms?': 'Has_Zoom_Rooms', 'Creation Time': 'Meeting_Creation_Time',
                 'Start Time': 'Meeting_Start_Time', 'End Time': 'Meeting_End_Time',
                 'Duration (Minutes)': 'Meeting_Duration_min', 'Ncmf (prjgjncl Ncmf)': 'User_Name',
                 'Usfr fncil': 'User_Email', 'Join Time': 'User_Join_Time', 'Leave Time': 'User_Leave_Time',
                 'Duration (Minutes).1': 'User_Duration_min'})

    raw_data = convert_relevant_types(raw_data)
    raw_data = fill_null_emails(raw_data)
    return raw_data


def convert_relevant_types(db):
    pd.options.mode.chained_assignment = None  # default='warn'
    # relevant columns (Meeting_Creation_Time,Meeting_Start_Time,Meeting_End_Time,User_Join_Time,User_Leave_Time): convert string to date
    for i in range(len(db['Meeting_Start_Time'])):
        creation_date = datetime.strptime(db['Meeting_Creation_Time'][i], '%m/%d/%y %H:%M')
        start_date = datetime.strptime(db['Meeting_Start_Time'][i], '%m/%d/%y %H:%M')
        end_date = datetime.strptime(db['Meeting_End_Time'][i], '%m/%d/%y %H:%M')
        user_join_date = datetime.strptime(db['User_Join_Time'][i], '%m/%d/%y %H:%M')
        user_leave_date = datetime.strptime(db['User_Leave_Time'][i], '%m/%d/%y %H:%M')

        db['Meeting_Creation_Time'][i] = creation_date
        db['Meeting_Start_Time'][i] = start_date
        db['Meeting_End_Time'][i] = end_date
        db['User_Join_Time'][i] = user_join_date
        db['User_Leave_Time'][i] = user_leave_date

    # relevant columns (Meeting_Duration_min,User_Duration_min): convert string to int
    for i in range(len(db['Meeting_Duration_min'])):
        db['Meeting_Duration_min'][i] = int(db['Meeting_Duration_min'][i])
        db['User_Duration_min'][i] = int(db['User_Duration_min'][i])

    return db


def fill_null_emails(db):
    for i in range(len(db['User_Email'])):
        if pd.isnull(db['User_Email'][i]):
            db['User_Email'][i] = db['User_Name'][i] + ' Missing Mail'
    return db


def pdff_space_down(pdf):
    pdf.cell(0, 10, '', ln=1, align='L')
    return pdf


def pdff_write(pdf, text, space=5, align='L'):
    pdf.cell(0, space, text, ln=1, align='L')
    return pdf


def pdff_write_table(pdf, data, spacing=1.5):
    col_width = pdf.w / 4.5
    row_height = pdf.font_size
    for row in data:
        for item in row:
            pdf.cell(col_width, row_height * spacing,
                     txt=item, border=1)
        pdf.ln(row_height * spacing)
    return pdf


def create_pdf(today,min_date, max_date, sessions_num, total_cost, costs_table, num_of_users, avg_users_come):
    pdf = FPDF(orientation='p', unit='mm', format='A4')
    pdf.add_page()
    pdf.set_font('Arial', size=10)
    pdf.cell(0, 10, 'Date:{}'.format(today), ln=1, align='L')
    pdf.set_font('times', 'B', size=24)
    pdf.cell(0, 8, 'Home Assignment - Ziv Mor', ln=1, align='C')
    pdf.set_font('times', size=18)
    pdf.cell(0, 10, 'Zoom-Sessions Report (Automated by Python)', ln=1, align='C')
    pdf.cell(0, 10, '({}'.format(min_date) + ' To {})'.format(max_date), ln=1, align='C')
    pdf.set_font('times', 'U', size=15)
    pdf = pdff_write(pdf, 'Sessions Analysis', space=20)
    pdf.set_font('times', size=13)
    pdf = pdff_write(pdf, 'Total Number of Sessions: {} (Team meetings are not include)'.format(sessions_num), space=15)
    pdf.set_font('times', 'UB', size=13)
    pdf.cell(0, 10, 'Number Of Sessions By Dates', ln=1.5, align='C')
    pdf.image('sessions_by_day_plot.png', x=55, y=None, w=100, h=70, type='', link='')
    pdf = pdff_space_down(pdf)
    pdf.set_font('times', size=13)
    pdf = pdff_write(pdf, 'Sessions Participants Segmentation:', space=10)
    pdf = pdff_write_table(pdf, costs_table)
    pdf.set_font('times', 'UB', size=13)
    pdf.cell(0, 20, 'Sessions Total Cost: {} NIS'.format(total_cost), ln=1, align='C')
    pdf.set_font('times', 'U', size=15)
    pdf = pdff_write(pdf, 'Users Analysis', space=17)
    pdf.set_font('times', size=13)
    pdf = pdff_write(pdf, 'Total Number of Users Engaged: {}'.format(num_of_users), space=10)
    pdf = pdff_write(pdf, 'The Average Frequency of Arrival of Each User : {} Sessions'.format(avg_users_come),
                     space=10)
    pdf.output('Zoom Report_{}.pdf'.format(str(datetime.today()).replace(':', '.', 3)))


def main():
    path = os.path.join(sys.path[0], 'participant sessions data.csv')
    raw_data = load_data(path)
    zoom_db = clean_raw_data(raw_data)

    '''------------------------------SQL Queries---------------------------------'''

    # todo asume פגישת צוות - not counted
    question_1_query = 'Select date(Meeting_Start_Time)date, count(distinct Meeting_Start_Time)Num_Of_Sessions From zoom_db where Topic <>"פגישת צוות" Group by date(Meeting_Start_Time)'
    answer_1_table = sqldf(question_1_query)
    num_of_sessions = nu.sum(list(answer_1_table['Num_Of_Sessions']))

    # count for each meeting the number of participants
    question_2_query = 'Select Topic, Meeting_Start_Time, count(Distinct User_Email)num_of_Users From zoom_db Group by Meeting_Start_Time, Meeting_ID'
    answer_2_table = sqldf(question_2_query)

    # count for each user number of times the user arrived to session
    # todo - mention I didnt concluded rows that user got in for less than 1 minute + there are a lot of users without mail so I assume for
    question_3_query = 'select User_Email, count(*)num_of_arrivals from(Select User_Email, Meeting_Start_Time, Meeting_ID From zoom_db Where User_Duration_min <> 0 Group by User_Email, Meeting_ID , Meeting_Start_Time) group by User_Email Order by num_of_arrivals desc'
    answer_3_table = sqldf(question_3_query)

    # Calculate the avg times of arrival of users (Using the result of 3'rd question query #todo - asumming not conclud the host
    participants_arrivals_list = list(answer_3_table['num_of_arrivals'])[1:]
    avg_users_come = round((nu.average(participants_arrivals_list)), 2)

    '''---------------------More Calculates for the report------------------------'''

    # Calculate the intervals of dates
    min_date_qu = sqldf('select min(date(Meeting_Start_Time)) from zoom_db')
    min_date_qu = list(min_date_qu['min(date(Meeting_Start_Time))'])[0]
    max_date_qu = sqldf('select max(date(Meeting_Start_Time)) from zoom_db')
    max_date_qu = list(max_date_qu['max(date(Meeting_Start_Time))'])[0]

    num_meetings0_5 = sqldf('select count(*) from answer_2_table where num_of_users<=5 and Topic <>"פגישת צוות"')
    num_meetings0_5 = list(num_meetings0_5['count(*)'])[0]

    num_meetings5_10 = sqldf(
        'select count(*) from answer_2_table where num_of_users>5 and num_of_users<=10 and Topic <>"פגישת צוות"')
    num_meetings5_10 = list(num_meetings5_10['count(*)'])[0]

    num_meetings10_15 = sqldf(
        'select count(*) from answer_2_table where num_of_users>10 and num_of_users<=15 and Topic <>"פגישת צוות"')
    num_meetings10_15 = list(num_meetings10_15['count(*)'])[0]

    num_meetings_15_plus = sqldf('select count(*) from answer_2_table where num_of_users>15 and Topic <>"פגישת צוות"')
    num_meetings_15_plus = list(num_meetings_15_plus['count(*)'])[0]

    total_cost = 50 * num_meetings0_5 + 100 * num_meetings5_10 + 150 * num_meetings10_15 + 200 * num_meetings_15_plus
    costs_table = [['Session type', 'Number of sessions', 'Cost'],
                   ['0-5 participants', str(num_meetings0_5), str(50 * num_meetings0_5)],
                   ['5-10 participants', str(num_meetings5_10), str(100 * num_meetings5_10)],
                   ['10-15 participants', str(num_meetings10_15), str(150 * num_meetings10_15)],
                   ['15+ participants', str(num_meetings_15_plus), str(200 * num_meetings_15_plus)]]

    sessions_by_day_plot = answer_1_table.plot.bar(x='date', y='Num_Of_Sessions', rot=80)
    plt.savefig('sessions_by_day_plot.png')

    num_of_users = sqldf('select count(*) From answer_3_table')
    num_of_users = list(num_of_users['count(*)'])[0]

    today = datetime.today().strftime("%b-%d-%Y")

    '''----------------------------------Out-Put Results------------------------'''

    create_pdf(today = today , max_date=max_date_qu, min_date=min_date_qu, sessions_num=num_of_sessions,
               total_cost=total_cost, costs_table=costs_table, num_of_users=num_of_users, avg_users_come=avg_users_come)

    writer = pd.ExcelWriter('Zoom Report_{}.xlsx'.format(str(datetime.today()).replace(':', '.', 3)))
    (answer_2_table).to_excel(writer , sheet_name='Sessions Number of Participants')
    (answer_3_table).to_excel(writer, sheet_name='Participants show-up')
    writer.save()
    '''---------------------Delete not relevant files------------------------'''
    plot1_path = os.path.join(sys.path[0], 'sessions_by_day_plot.png')
    os.remove(plot1_path)
    exit()


if __name__ == '__main__':
    root = Tk()
    start_gui(root)
    # main()
4

0 回答 0