0

我正在尝试编写一个脚本来生成数据。我为此使用随机包。我执行脚本,一切正常。但是当我检查结果时,我发现脚本由于某种原因未能生成最后 100 多行。

有人可以建议我为什么会发生这种情况吗?

from __future__ import print_function
from faker import Faker;
import random;

## Vaue declaration
population = 3;
product = 3;
years = 3;
months = 13;
days = 30;
tax= 3.5;

## Define Column Header
Column_Names = "Population_ID",";","Product_Name",";","Product_ID",";","Year",";",
"Month",";","Day","Quantity_sold",";","Sales_Price",";","Discount",
";","Actual_Sales_Price",tax;


## Function to generate sales related information
def sales_data():
    for x in range(0,1):
        quantity_sold = random.randint(5,20);
        discount = random.choice(range(5,11));
        sales_price = random.uniform(20,30);
        return quantity_sold,round(sales_price,2),discount,round((sales_price)-(sales_price*discount)+(sales_price*tax));


## Format the month to quarter and return the value
def quarter(month):
    if month >= 1 and month <= 3:
        return "Q1";
    elif month > 3 and month <= 6:
        return "Q2";
    elif month > 6 and month <= 9:
        return "Q3";
    else:
        return "Q4";

## Generate product_id
def product_name():
    str2 = "PROD";
    sample2 = random.sample([1,2,3,4,5,6,7,8,9],5);
    string_list = [];
    for x in sample2:
        string_list.append(str(x));
    return (str2+''.join(string_list));


### Main starts here ###

result_log = open("C:/Users/Sangamesh.sangamad/Dropbox/Thesis/Data Preparation/GenData.csv",'w')    
print (Column_Names, result_log);

### Loop and Generate Data ###

for pop in range(0,population):     
    pop = random.randint(55000,85000);
    for prod_id in range(0,product):
        product_name2 = product_name();
        for year in range(1,years):
            for month in range(1,months):
                for day in range(1,31):
                    a = sales_data();
                    rows =  str(pop)+";"+product_name2+";"+str(prod_id)+";"+str(year)+";"+str(month)+";"+quarter(month)+";"+str(day)+";"+str(a[0])+";"+str(a[1])+";"+str(a[2])+";"+str(tax)+";"+str(a[3]);
                    print(rows,file=result_log);
                    #print (rows);
    tax = tax+1;
4

1 回答 1

1

您需要关闭文件以刷新缓冲区:

result_log.close()

更好的是,将文件对象用作上下文管理器,并with在块退出时让语句为您关闭它:

filename = "C:/Users/Sangamesh.sangamad/Dropbox/Thesis/Data Preparation/GenData.csv"
with result_log = open(filename, 'w'):
    # code writing to result_log

而不是手动编写带有分隔符的字符串,您应该真正使用该csv模块

import csv

# ..

column_names = (
    "Population_ID", "Product_Name", "Product_ID", "Year",
    "Month", "Day", "Quantity_sold", "Sales_Price", "Discount",
    "Actual_Sales_Price", tax)

# ..

with result_log = open(filename, 'wb'):
    writer = csv.writer(result_log, delimiter=';')
    writer.writerow(column_names)

    # looping
        row = [pop, product_name2, prod_id, year, month, quarter(month), day,
               a[0], a[1], a[2], tax, a[3]]
        writer.writerow(row)
于 2014-08-30T18:01:03.513 回答