Predicting Pollutant levels through Weather Conditions

Using the weather condition data, we managed to train a model externally, and load into a linear regression estimator here.

In [1]:
import pandas as pd
from vega_datasets import data
import altair as alt
from altair import expr, datum
import numpy as np
In [2]:
paraDict = {'PM2.5': [[1.6611484593581733e-11, -82.29455576348255, -8.792145340651148, -6.8741956509437125, -54.70124449793916, -134.56525939438185, 22.759082459121217, -0.4251239081851825, 0.23199710054282213, 0.25538831218102764, 6.7835977165817285, 12.032720915307873, -5.2386128801877865, 0.8806400389543716, 0.4830544327906852, -0.6914287813469701, -0.19665159849409633, -0.28380430139344526, -0.08367141167246661, 3.90541436921075, -5.090568388401967, 0.5096972529568387, -1.454401595682728, 22.723681537802914, -3.1891908030308653, 4.2409089350125315, 10.779442548050273, -0.142109126009746, 0.535284743068051, -0.005116691474398749, -0.09672130656319311, -0.32192467322784357, -1.2716072752929788, 0.44632376509149596, 0.0023815812969179007, -0.025288761238222435, 0.03263684809759293, -0.11746096004009893, 0.022160087258288996, 0.05769081191339609, -0.08325439065004936, 1.0080229364800735, -0.09474198968995573, -0.2671553432253188, 0.764340093117642, 0.0015174341878068098, 0.04677362323567282, -0.857491557065488, 0.028841314661262274, -0.022739616106647986, -0.014718233362231103, 0.00602855844971617, 0.021764745322534473, 0.005603152373538478, -0.0038267916415080094, 0.011464630906123041, -0.08395848969015907, 0.006312837688753348, -0.01756306432858945, -0.022699708903226368, -0.01437328175120367, 0.02240727035628287, 0.11942492428009688, -0.0038055173478852566, -0.00134434570718156, -0.07958823871060483, -0.1584203700576724, -0.011802125099677596, 0.03444904995108039, 0.10035589060071878, 0.1568626065408285, -0.3419363054131945, 0.4863203299307871, 0.018344290399527452, 0.24622629353595726, 0.14454881683738002, -0.009428108253477285, -0.05525057748520478, -1.4479264725470722, -0.044378769091222736, -0.05075457468941941, 0.20528918884411712, -0.42306574794298124, -0.006608719908918204]], 'PM10': [[1.9652858001079642e-11, -87.89630601286993, -7.646739676358217, 1.0152397990735387, -55.559379402597465, -158.27065577696482, 17.48074654358579, 0.8854281683122807, 0.5756287482311925, -0.7774694610617898, 2.8158270620752113, 13.697647261154982, -5.313931031090208, 0.752375688107261, 0.18826139021193256, -2.0388451536545373, 0.4909257007708139, -0.15506239767382013, -0.0282631367160997, 3.77208533813393, 0.6667040358127481, 0.8797910881337127, 1.5406870560838901, 27.416196463528188, -2.7069591683921583, 2.6814429304802414, 7.500905421132543, -0.46532980529507273, 0.4678955795958615, -0.015572055834594423, -0.046607420442855485, -0.076368524175765, -1.4372887935958318, 0.475766817081956, -0.00436627487798269, -0.013322009825099457, 0.010938261457240417, -0.1609041644517128, 0.017234238903622412, 0.04949434315741286, -0.062355213949344845, 0.8472454798831046, -0.09855102331380028, -0.3155398768126911, 1.2412960698021074, -0.043680672161871215, 0.18466485000190005, -0.7924607908059444, 0.03864835371991582, -0.02011481039291141, -0.004110985295362468, 0.06889869223526657, -0.00647854863655728, -0.0014722307839463156, -0.004793027979431486, 0.007888046101321544, 0.007776659224026261, 0.008967813958307147, -0.012565986349530545, -0.017578993463531145, -0.0035949251230160684, 0.022600270099461084, 0.03974997157980775, -0.004886129568426156, -0.003309423783548482, -0.08047064538392457, -0.41047519310070224, -0.016306234407323927, 0.15310906476795172, -0.273842249117597, 0.1401223746187834, -0.45122082697644716, 0.8194516110314709, 0.026476185923302253, 0.01801960092674687, 0.11129528566481228, -0.0913033855026435, -0.15731389147525526, -1.3612969737229963, -0.02336283034306952, -0.05455786188570047, 0.3596995809449284, -0.557913777266217, -0.011064698646521265]], 'SO2': [[4.1477147071952384e-12, -14.048533416925107, -1.7285530892050907, 5.40988311282001, -11.28437621758516, -34.673850369937036, -3.1496436417462745, -0.0827635311597211, 0.2912552121003448, -0.612574079017647, 2.8107070967948404, 8.092144635547601, -0.07775873781226203, 0.20763427034145043, 0.12960780178839357, -0.38034082928145074, -0.31788886163796803, -0.14814302730697992, -0.301411662997894, 0.4088834119988849, -0.07583583230258698, 0.6151672718679928, -0.699671758310567, 3.8681806440193243, -0.24390430620281017, 0.39389910976494064, 0.6053514345620021, -0.3034717172861639, 0.08946793075876602, -0.028670461338351938, 0.03208957794392708, -0.2046512061625248, -0.48345531057690155, 0.013334729873052176, 0.003152926401647882, -0.00485101829238932, 0.020075533527773137, 0.00940996670623409, 0.0036340872175379957, 0.010181479564133511, -0.004966931641423866, -0.0360477837289056, -0.016279098928175584, 0.009547161432614124, -0.09548333635761448, 0.01625002956640842, -0.04189648845077041, 0.010125717103962793, 0.009491622030543878, -0.006399500453412088, -0.004918342367185247, 0.003028707408470721, 0.001053716039468744, 0.003967050128100169, -0.0021801910648271416, 0.01710248429576533, 0.0032455452779979743, 0.005081464352086229, -0.020096291857824508, 0.04794800412052705, -0.014655293584331819, -0.0009965301347508587, 0.0060697279004791604, -0.0027609141885598376, 0.00492514952816947, -0.007068549967939042, 0.0048036019337569305, -0.015155221104112826, -0.033166545959736726, -0.0113222625016108, 0.006624565310459074, 0.05141293545145578, -0.04294662045448971, 0.013545095055626843, 0.14030237300826096, 0.08504260880445395, 0.007672515632256516, -0.0007917005162801326, -0.19495568370575636, 0.0016793637347787271, 0.008520512901243539, -0.07423935960488312, 0.0386971870543014, -0.003727480220151236]], 'CO': [[2.240321600407482e-10, -1225.952278469486, -78.86559791156917, -206.45015003430737, -861.3845296609677, -1833.9974696884938, 256.62889271754335, 4.551628651538043, 3.914504812941773, 38.79052247983111, 143.20266837031292, 244.16614240998447, -50.94684421657351, 8.089667287525991, 5.223305491768963, -2.2169648087833167, 1.9137467467820168, -3.122192485669096, 2.0406030891127074, 32.006037007428446, -112.05006315109591, -0.7513573432314313, 5.386747568345018, 428.0272667558207, -23.125317569144627, 20.0937608033041, 187.44415794451882, -0.17499879558970477, 7.536540883902589, -0.2919098852077866, -4.028029635964884, -8.798006177895616, -22.258192526264274, 4.630167650931327, 0.11915142128478351, -0.37112573782557945, 0.47892627455583536, -1.1502403104366081, 0.3066638408869835, 0.7943451074257837, -0.20040362036145254, 13.289202082115533, -1.149061666290379, -3.2852003902072475, -16.7488252892817, -0.06075544822345336, -0.07387592373754837, -7.508166399967124, 0.36068385116954005, -0.2217383136259059, -0.1941973140878872, -0.06797677387043422, -0.19443348767094548, 0.10297927652948147, -0.029745137135591904, 0.3906574369694944, -0.7418746052461493, 0.040248494351772024, -1.1747582724307453, 2.0355704296475476, -0.4319261344032981, 0.09525614906935598, 1.2235329472219547, -0.01942535668808887, -0.1110673116252201, -0.7253581610203241, 1.6128440658804957, 0.09308463993144755, -0.44642294876691846, -0.15969420877173657, 1.204237130884266, -1.8344995432274782, -2.3827826342632523, 0.036741960814140384, 3.3989949993438557, -3.5014492807904336, -0.12894025299694079, 1.2234807279220936, -16.02064846819309, -0.1634391636285244, -0.23528535963134, 1.0703384865824521, -1.9234989835171818, -0.013337530675300968]], 'NO2': [[3.968090788028884e-12, -17.44832957752581, -1.9632618451408732, 2.7529566457688768, -27.08443441966133, -44.29488883917191, 0.1656208293540129, -0.09518019377688144, -0.08085329101794786, -0.2489570408211356, 2.6660626246573713, 3.9862206193295395, -0.5678061548615748, 0.16084942139198433, 0.09814962498653851, 0.034444668975875545, 0.5311920994175339, -0.10784852238667342, -0.16922679920406292, 0.4220587712587283, -2.3874317994974668, 0.4762168231927388, 0.2616461479551287, 11.46017515819665, -0.5616023399571081, 0.9604769642983996, 3.873247888228764, -0.28846814764544804, 0.116827893003709, 0.014013272335454019, -0.0001453980872678784, -0.1671394439803519, -0.41693244716378053, 0.0592726697032373, -0.0012981944024366143, -0.0013194446469895205, -0.006016571330371867, 0.006246884412886081, 0.004982305763588268, 0.002294694954226628, 0.028701185265002316, 0.19030336579784005, -0.011602431451107176, -0.07283199651545394, 0.11233705026690324, -0.021535620933806454, 0.01811619425707487, -0.1347968219925358, 0.00655667199646448, -0.0016492856537695388, -0.0008874470269049084, -0.02300903433469914, -0.02125935165181625, 0.0012848187246312648, -0.004784790350901957, 0.010290803740053645, -0.026396922545597168, 0.003437550965472269, 0.05823568689356534, 0.0043106045775461385, 0.0014509246136302778, -0.00023582449682857387, 0.02943563397388116, -0.0008453273137192241, 0.003461394053193345, -0.00014664545284936992, -0.012139815130827208, -0.011072798932873162, -0.05106365770154353, 0.0519330071908479, -0.0010998025064432593, -0.10076707122149484, 0.10841131734069534, 0.010684347959459162, 0.11728380819837067, -0.40084140287806247, 0.057371804225025747, -0.03134211250582285, -0.49451451242910205, 0.01335469180406701, -0.015074328176060525, 0.08043540745214922, -0.12678013442982072, -0.0034529460535495105]], 'O3': [[2.157105360329609e-12, 16.828252570808825, -11.601077723475168, 1.1717458514327006, 11.922877186018242, -12.846012416949598, -0.6372908407953215, -1.9519342501922914, 0.09271226424382578, -0.9263686757190486, -1.0117308378213876, 11.393272845522036, -0.0012067691849317885, 1.200401921746459, 0.28120041609708324, -0.6486106803263719, -0.7177499687096124, -0.34430877061945747, 0.08560191833949651, 1.0814729095239604, 8.122472496383418, 0.2030962855555689, -0.5418510580649216, -5.743506608410425, -0.6892220172082439, -3.0057543219159224, -7.829093578107546, -0.15810127225458648, 0.061330637815660616, 0.00034529265664140496, 0.0625746004134875, 0.0366263071760739, -0.845577527683766, 0.012296024685507945, -0.003981912803450172, -0.007772102970800887, 0.027453286389521603, -0.057548267043509904, 0.006273581870499423, -0.0010580671519972995, -0.058096858436602394, -0.8612601330020627, -0.02144293215604698, 0.07122280670664745, 0.3374416210411078, 0.02304643847434229, 0.08241677694374282, 0.9329681707343066, 0.010558816141621463, -0.033745334276451544, -0.008354135049797762, 0.026606572512395066, 0.02753753287628547, 0.007775106574301027, 0.0033206910128473766, 0.018950253864760347, 0.13282947350901275, 0.011790152579346902, -0.08833488561887246, 0.08955408216426756, -0.022816130157939818, 0.011308569546120466, -0.13220781149847707, -0.008817508376199212, -0.0023018416711790834, -0.012559406146110952, -0.4017523895718383, -0.0011770729270121905, -0.05843259910897006, -0.0025323503510215686, 0.04864922255430715, -0.005901299204482041, 0.7513850361302525, 0.013475987152461677, 0.05446792656703674, 0.6936506464468829, 0.0030937724908725733, -0.06590689709140427, -0.05173671809864869, -0.03161485788511209, 0.03681549386844947, 0.05314441331581858, -0.39218971055322127, -0.008115257281969601]]}

interceptDict = {'PM2.5': 451.5494046072333, 'PM10': 463.2892450354731, 'SO2': 61.240610451389934, 'CO': 6403.735081626501, 'NO2': 140.52967162255118, 'O3': -5.6935059417963885}

symbolList = ['1', 'x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x0^2', 'x0 x1', 'x0 x2', 'x0 x3', 'x0 x4', 'x0 x5', 'x1^2', 'x1 x2', 'x1 x3', 'x1 x4', 'x1 x5', 'x2^2', 'x2 x3', 'x2 x4', 'x2 x5', 'x3^2', 'x3 x4', 'x3 x5', 'x4^2', 'x4 x5', 'x5^2', 'x0^3', 'x0^2 x1', 'x0^2 x2', 'x0^2 x3', 'x0^2 x4', 'x0^2 x5', 'x0 x1^2', 'x0 x1 x2', 'x0 x1 x3', 'x0 x1 x4', 'x0 x1 x5', 'x0 x2^2', 'x0 x2 x3', 'x0 x2 x4', 'x0 x2 x5', 'x0 x3^2', 'x0 x3 x4', 'x0 x3 x5', 'x0 x4^2', 'x0 x4 x5', 'x0 x5^2', 'x1^3', 'x1^2 x2', 'x1^2 x3', 'x1^2 x4', 'x1^2 x5', 'x1 x2^2', 'x1 x2 x3', 'x1 x2 x4', 'x1 x2 x5', 'x1 x3^2', 'x1 x3 x4', 'x1 x3 x5', 'x1 x4^2', 'x1 x4 x5', 'x1 x5^2', 'x2^3', 'x2^2 x3', 'x2^2 x4', 'x2^2 x5', 'x2 x3^2', 'x2 x3 x4', 'x2 x3 x5', 'x2 x4^2', 'x2 x4 x5', 'x2 x5^2', 'x3^3', 'x3^2 x4', 'x3^2 x5', 'x3 x4^2', 'x3 x4 x5', 'x3 x5^2', 'x4^3', 'x4^2 x5', 'x4 x5^2', 'x5^3']

rangeDict = {'CO':[100,9700],'NO2':[2,228],'O3':[0.321300,334.000000],'PM10':[3.000000,951.500000],'PM2.5':[3.000000,970.000000],'SO2':[0.571200,325.000000]}

# paraDict["CO"][0] = [value/1000 for value in paraDict["CO"][0]]

# import numpy as np
# # for polution in paraDict:
#     paraDict[polution] = [np.random.random(len(paraDict[polution][0])).tolist()]

paraData = []
for polution in paraDict:
    paraData.append(paraDict[polution][0] + [interceptDict[polution], polution] + rangeDict[polution])

paraDf = pd.DataFrame(data=paraData, columns=symbolList+["intercept", "Polution", "min_range", "max_range"])
paraDf
Out[2]:
1 x0 x1 x2 x3 x4 x5 x0^2 x0 x1 x0 x2 ... x3 x4 x5 x3 x5^2 x4^3 x4^2 x5 x4 x5^2 x5^3 intercept Polution min_range max_range
0 1.661148e-11 -82.294556 -8.792145 -6.874196 -54.701244 -134.565259 22.759082 -0.425124 0.231997 0.255388 ... -1.447926 -0.044379 -0.050755 0.205289 -0.423066 -0.006609 451.549405 PM2.5 3.0000 970.0
1 1.965286e-11 -87.896306 -7.646740 1.015240 -55.559379 -158.270656 17.480747 0.885428 0.575629 -0.777469 ... -1.361297 -0.023363 -0.054558 0.359700 -0.557914 -0.011065 463.289245 PM10 3.0000 951.5
2 4.147715e-12 -14.048533 -1.728553 5.409883 -11.284376 -34.673850 -3.149644 -0.082764 0.291255 -0.612574 ... -0.194956 0.001679 0.008521 -0.074239 0.038697 -0.003727 61.240610 SO2 0.5712 325.0
3 2.240322e-10 -1225.952278 -78.865598 -206.450150 -861.384530 -1833.997470 256.628893 4.551629 3.914505 38.790522 ... -16.020648 -0.163439 -0.235285 1.070338 -1.923499 -0.013338 6403.735082 CO 100.0000 9700.0
4 3.968091e-12 -17.448330 -1.963262 2.752957 -27.084434 -44.294889 0.165621 -0.095180 -0.080853 -0.248957 ... -0.494515 0.013355 -0.015074 0.080435 -0.126780 -0.003453 140.529672 NO2 2.0000 228.0
5 2.157105e-12 16.828253 -11.601078 1.171746 11.922877 -12.846012 -0.637291 -1.951934 0.092712 -0.926369 ... -0.051737 -0.031615 0.036815 0.053144 -0.392190 -0.008115 -5.693506 O3 0.3213 334.0

6 rows × 88 columns

In [3]:
def parseSymbol(symbol):
    if symbol == "1":
        return []
    ret = []
    elementList = symbol.split(" ")
    for element in elementList:
#         print(element, "^" in element)
        if "^" in element:
            var, time = tuple(element.split("^"))
#             print(var, time)
            ret.append((int(var[1:]), int(time)))
#             print(ret)
        else:
            ret.append((int(element[1:]), 1))
#         print(ret)
    return ret

def calculate(valueList, paraList, symbolList):
    ret = 0
    for symbol, para in zip(symbolList, paraList):
        varList = parseSymbol(symbol)
        mul = 1
        for var, time in varList:
            mul *= valueList[var] ** time
#         print(mul, end = " ")
        ret += para * mul
    return ret
        
        
# calculate([1,2,3,4,5,6], PM25, symbolList)

featureRange = {'month':(2,12),'hour':(0,24),'PRES':(986.0,1042.0),'TEMP':(-16.0,40.0),'WSPM':(2.0,13.000000),'RAIN':(0.000000,29.0),'DEWP':(-35.00000,27.00000)}
featureStep = {'month':1, "hour":1, "PRES":0.1, "TEMP":0.1, "WSPM":0.1, "RAIN":0.1, "DEWP":0.01}


featureList = ['month','hour','TEMP','WSPM','RAIN','DEWP']
selectorDict = {}
for featureName in featureList:
    slider = alt.binding_range(min=featureRange[featureName][0], max=featureRange[featureName][1], step=featureStep[featureName], name='%s:'%featureName)
    selector = alt.selection_single(name="select%s"%featureName, fields=[featureName],
                                bind=slider, init={featureName: (featureRange[featureName][0]+ featureRange[featureName][1])/2})
    selectorDict[featureName] = selector

selectorList = []
for featureName in featureList:
    selectorList.append(selectorDict[featureName])
    
selectorValueList = [selectorDict[featureName][featureName] for featureName in featureList]

datumList = []
for symbol in symbolList:
    datumList.append(datum[symbol])
    
predictValue = calculate(selectorValueList, datumList, symbolList) + datum["intercept"]
predictValue = (datum["max_range"] - datum["min_range"]) / (1+expr.exp(-predictValue/100)) + datum["min_range"]



alt.renderers.enable('notebook')

slider = alt.binding_range(min=0, max=1, step=0.1, name='WSPM:')
selector = alt.selection_single(name="SelectorName", fields=['WSPM'],
                                bind=slider, init={'WSPM': 0.5})

data = pd.DataFrame({'t': np.arange(0, 10, 0.1)})

alt.Chart(paraDf).mark_bar(size=50).encode(
    x="Polution",
    y="value:Q",
).transform_calculate(
    value=predictValue
).add_selection(*selectorList).properties(
    height=500,
    width=500,
).properties(title="Predicted Value of Polution")#.save("predict.html")
Out[3]: