我正在编写一个程序,它将生成给定温度数据的多项式模型。您可以在下面的屏幕截图中看到我的程序以及我的数据:
from google.colab import drive
import os as os # for moving around the operating system
import matplotlib.pyplot as plt
import numpy as np
import pickle
import calendar
drive.mount("/content/drive", force_remount=True)
fileName = "/content/drive/My Drive/11th Grade/Independent Study/MBCP2018-2019_iButton1.csv"
## remove excess lines of header data
rawData = open(fileName, 'r');
data = rawData.readlines();
rawData.close()
data = data[15:len(data)];
# ----------------------------------------------------------------------------------------------------------- #
dateTime = [];
unit = [];
T = [];
unit = [];
T = []; # temperature
year = [];
month = [];
day = []; # convert the day into a decimal day value
hour = [];
minute = [];
second = [];
td = []; # Decimal value of year
## use this for converting hours minutes seconds to days https://matplotlib.org/api/dates_api.html
i = 0;
while i < len(data):
dateTimeTemp, unitTemp, T_Temp = data[i].split(',');
monthDayYear, time_Temp, AMPM_Temp = dateTimeTemp.split(' ');
month_Temp,day_Temp,year_Temp = monthDayYear.split('/');
hour_Temp,minute_Temp,second_Temp = time_Temp.split(':');
## clean up the strings to be processed by np.datetime64 - a better solution has got to exist in python...
if int(month_Temp) < 10:
month_Temp = '0' + month_Temp;
if int(day_Temp) < 10:
day_Temp = '0' + day_Temp;
if (int(hour_Temp) < 10) & (int(hour_Temp) != 0):
hour_Temp = '0' + hour_Temp;
# convert the clock to 24hr
if AMPM_Temp == 'PM':
if int(hour_Temp) == 12:
hour_Temp = '12';
else:
hour_Temp = str(int(hour_Temp)+12);
elif int(hour_Temp) == 12: ## when it's in the mo'nin' and clock should read 00:00
hour_Temp = '00';
dateTime.append(dateTimeTemp);
year.append(int('20'+year_Temp));
month.append(int(month_Temp));
day.append(int(day_Temp));
hour.append(int(hour_Temp));
minute.append(int(minute_Temp));
second.append(int(second_Temp));
## time difference from the beginning of the year.
td_Temp = np.datetime64('20' + year_Temp + '-' + month_Temp + '-' + day_Temp + 'T' + hour_Temp + ':' + minute_Temp + ':' + second_Temp)- np.datetime64('20' + year_Temp + '-01-01T00:00:00');
td_Temp = td_Temp / np.timedelta64(1,'s'); ## converts this into a decimal seconds value
# converts this into a decimal year value.
if calendar.isleap(year[i]):
td_Temp = td_Temp/86400/366;
else:
td_Temp = td_Temp/86400/365;
td.append(td_Temp);
unit.append(unitTemp);
T.append(float(T_Temp));
i = i + 1;
np.asarray(td);
np.asarray(T);
np.asarray(year);
td = np.add(td,year);
plt.plot(td,T)
plt.xlabel('Decimal year')
plt.ylabel('Temperature (C)')
plt.xlim(left=2018.5464,right=2019.5395)
# ------------------------------------------------------------------------------------- #
# Machine Learning Stuff! #
# SPLICING WORK #
td = np.round(td,4)
# only have to change these two parameters, but make sure these values exist in the csv file
leftBound = 2018.5464
rightBound = 2019.5395
# returns as a tuple
xLowerBound = np.where(td == leftBound)
xUpperBound = np.where(td == rightBound)
# splicing the array
td = td[xLowerBound[0][0]:xUpperBound[0][0]]
T = T[xLowerBound[0][0]:xUpperBound[0][0]]
# MACHINE LEARNING ALGORITHM #
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
X1,X2,Y1,Y2 = train_test_split(td, T, random_state=0, train_size=0.5)
X1 = X1.reshape(-1,1)
X2 = X2.reshape(-1,1)
print("X1: " + str(X1))
print("X2: " + str(X2))
print("Y1: " + str(Y1))
print("Y2: " + str(Y2))
degree=2
polyreg=make_pipeline(PolynomialFeatures(degree),LinearRegression())
polyreg.fit(X1,Y1)
plt.figure(1)
plt.scatter(X1,Y1)
plt.plot(X2,polyreg.predict(X2),color="black")
plt.title("Polynomial regression with degree "+str(degree))
plt.show()
但是,当我生成模型并更改模型的度数时,我得到了这些非常潦草、时髦的图表。问题是什么?我真的很感激一个解决方案,因为我已经被这个问题困扰了一周。