In [238]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression 
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures
In [239]:
path=r"C:\Users\venkatesh\Desktop\pandasimpor\T11.csv"
df=pd.read_csv(path)
In [240]:
df.head(500)
Out[240]:
Date/Time LV ActivePower (kW) Wind Speed (m/s) Theoretical_Power_Curve (KWh) Wind Direction (°)
0 01 01 2018 00:00 380.047791 5.311336 416.328908 259.994904
1 01 01 2018 00:10 453.769196 5.672167 519.917511 268.641113
2 01 01 2018 00:20 306.376587 5.216037 390.900016 272.564789
3 01 01 2018 00:30 419.645905 5.659674 516.127569 271.258087
4 01 01 2018 00:40 380.650696 5.577941 491.702972 265.674286
... ... ... ... ... ...
495 04 01 2018 13:20 0.000000 2.484865 0.000000 142.863403
496 04 01 2018 13:30 0.000000 2.764512 0.000000 133.911102
497 04 01 2018 13:40 0.000000 2.507089 0.000000 103.273804
498 04 01 2018 13:50 0.000000 3.441704 45.742725 79.094109
499 04 01 2018 14:00 0.000000 2.484878 0.000000 106.450302

500 rows × 5 columns

In [241]:
x=df[['Wind Speed (m/s)']]
y=df[['LV ActivePower (kW)']]
In [242]:
lm=LinearRegression()
In [243]:
cost=np.array(cost)
degree=np.array(degree)
In [244]:
plt.scatter(degree,cost)
plt.title("degree vs Mean Square Error")
plt.xlabel("degree")
plt.ylabel("Mean Sqaure Error")
Out[244]:
Text(0, 0.5, 'Mean Sqaure Error')
In [245]:
cost=[]
degree=[]
for i in range(30):
    poly_reg=PolynomialFeatures(degree=i)
    input_reg=poly_reg.fit_transform(x)
    lm.fit(input_reg,y)
    p=lm.predict(input_reg)
    cost.append(mean_squared_error(p,y))
    degree.append(i)
    print(i)
    print(" ")
    print(mean_squared_error(p,y))
    plt.scatter(x,y)
    plt.scatter(x,p,color='r') 
    plt.xlabel('Input')
    plt.ylabel('output')
    plt.show()
0
 
1722515.1726089804
1
 
287389.75992399297
2
 
283801.2530865484
3
 
182392.22619537456
4
 
168109.0218138836
5
 
161270.8911902387
6
 
156483.70271468573
7
 
155748.6676050543
8
 
154896.67491289933
9
 
154434.31444870358
10
 
153981.4714173322
11
 
153627.19275326448
12
 
153741.25550626346
13
 
153927.7455724994
14
 
153447.50881802136
15
 
160007.09717426196
16
 
168451.12610774164
17
 
183899.78195152752
18
 
209588.05099577547
19
 
248714.7293670994
20
 
303303.0633678924
21
 
648406.2730546015
22
 
758565.1528829635
23
 
864950.4920537206
24
 
963629.962190674
25
 
1052116.9898924348
26
 
1129711.8541660463
27
 
1196667.9799520336
28
 
1254013.9749663228
29
 
1303042.6086160154
In [246]:
cost=np.array(cost)
cost.min()
Out[246]:
153447.50881802136
In [247]:
poly_reg=PolynomialFeatures(degree=14)
input_reg=poly_reg.fit_transform(x)
lm.fit(input_reg,y)
p=lm.predict(input_reg)
print(' ')
print(mean_squared_error(p,y))
plt.scatter(x,y)
plt.scatter(x,p,color='r')
plt.xlabel('Input')
plt.ylabel('output')
plt.show()
 
153447.50881802136
In [248]:
path=r"C:\Users\venkatesh\Desktop\pandasimpor\T11.csv"
df1=pd.read_csv(path)
In [249]:
df1=df1.iloc[18073:18444,:-1]
df1
Out[249]:
Date/Time LV ActivePower (kW) Wind Speed (m/s) Theoretical_Power_Curve (KWh)
18073 11 05 2018 04:40 226.784698 4.925289 317.855511
18074 11 05 2018 04:50 91.152710 3.845725 100.283272
18075 11 05 2018 05:00 53.089458 3.672670 73.724219
18076 11 05 2018 05:10 154.288696 4.391197 199.318102
18077 11 05 2018 05:20 193.218903 4.795403 287.281942
... ... ... ... ...
18439 13 05 2018 17:40 347.659485 5.463120 458.453007
18440 13 05 2018 17:50 238.012299 4.870736 304.867198
18441 13 05 2018 18:00 106.696999 4.099254 141.650063
18442 13 05 2018 18:10 12.002850 2.522152 0.000000
18443 13 05 2018 18:20 0.000000 1.561904 0.000000

371 rows × 4 columns

In [250]:
df1
x_test=df1[['Wind Speed (m/s)']]
xx=df1[['Wind Speed (m/s)']]
y_test=df1[['LV ActivePower (kW)']]
In [251]:
x_test=poly_reg.fit_transform(x_test)
In [252]:
y_output=lm.predict(x_test)
In [253]:
print(mean_squared_error(y_output,y_test))
plt.scatter(xx,y_output)
plt.xlabel('  Input')
plt.ylabel('Polynomial Regression Based output')
plt.show()
plt.scatter(xx,y_test,color='r')
plt.xlabel('Input')
plt.ylabel('output')
plt.show()
33296.86525272274
In [ ]:
 
In [ ]: