VerticaPy

Python API for Vertica Data Science at Scale

Regression

This example demonstrates various Regressor methods using the Winequality dataset.

In [19]:
from verticapy.learn.datasets import load_winequality
winequality = load_winequality()
display(winequality)
123
fixed_acidity
Numeric(6,3)
123
volatile_acidity
Numeric(7,4)
123
citric_acid
Numeric(6,3)
123
residual_sugar
Numeric(7,3)
123
chlorides
Float
123
free_sulfur_dioxide
Numeric(7,2)
123
total_sulfur_dioxide
Numeric(7,2)
123
density
Float
123
pH
Numeric(6,3)
123
sulphates
Numeric(6,3)
123
alcohol
Float
123
quality
Int
123
good
Int
Abc
color
Varchar(20)
13.8000.31000.02011.1000.03620.00114.000.992483.7500.44012.460white
23.9000.22500.4004.2000.0329.00118.000.9893.5700.36012.881white
34.2000.17000.3601.8000.02993.00161.000.989993.6500.89012.071white
44.2000.21500.2305.1000.04164.00157.000.996883.4200.4408.030white
54.4000.32000.3904.3000.0331.00127.000.989043.4600.36012.881white
64.4000.46000.1002.8000.02431.00111.000.988163.4800.34013.160white
74.4000.54000.0905.1000.03852.0097.000.990223.4100.40012.271white
84.5000.19000.2100.9500.03389.00159.000.993323.3400.4208.050white
94.6000.44500.0001.4000.05311.00178.000.994263.7900.55010.250white
104.6000.52000.1502.1000.0548.0065.000.99343.9000.56013.140red
114.7000.14500.2901.0000.04235.0090.000.99083.7600.49011.360white
124.7000.33500.1401.3000.03669.00168.000.992123.4700.46010.550white
134.7000.45500.1801.9000.03633.00106.000.987463.2100.83014.071white
144.7000.60000.1702.3000.05817.00106.000.99323.8500.60012.960red
154.7000.67000.0901.0000.025.009.000.987223.3000.34013.650white
164.7000.78500.0003.4000.03623.00134.000.989813.5300.92013.860white
174.8000.13000.3201.2000.04240.0098.000.98983.4200.64011.871white
184.8000.17000.2802.9000.0322.00111.000.99023.3800.34011.371white
194.8000.21000.21010.2000.03717.00112.000.993243.6600.48012.271white
204.8000.22500.3801.2000.07447.00130.000.991323.3100.40010.360white
214.8000.26000.23010.6000.03423.00111.000.992743.4600.28011.571white
224.8000.29000.2301.1000.04438.00180.000.989243.2800.34011.960white
234.8000.33000.0006.5000.02834.00163.000.99373.3500.6109.950white
244.8000.34000.0006.5000.02833.00163.000.99393.3600.6109.960white
254.8000.65000.1201.1000.0134.0010.000.992463.3200.36013.540white
264.9000.23500.27011.7500.0334.00118.000.99543.0700.5009.460white
274.9000.33000.3101.2000.01639.00150.000.987133.3300.59014.081white
284.9000.33500.1401.3000.03669.00168.000.992123.4700.46010.466666666666750white
294.9000.33500.1401.3000.03669.00168.000.992123.4700.46010.466666666666750white
304.9000.34500.3401.0000.06832.00143.000.991383.2400.40010.150white
314.9000.34500.3401.0000.06832.00143.000.991383.2400.40010.150white
324.9000.42000.0002.1000.04816.0042.000.991543.7100.74014.071red
334.9000.47000.1701.9000.03560.00148.000.989643.2700.35011.560white
345.0000.17000.5601.5000.02624.00115.000.99063.4800.39010.871white
355.0000.20000.4001.9000.01520.0098.000.98973.3700.55012.0560white
365.0000.23500.27011.7500.0334.00118.000.99543.0700.5009.460white
375.0000.24000.1905.0000.04317.00101.000.994383.6700.57010.050white
385.0000.24000.2102.2000.03931.00100.000.990983.6900.62011.760white
395.0000.24000.3401.1000.03449.00158.000.987743.3200.32013.171white
405.0000.25500.2202.7000.04346.00153.000.992383.7500.76011.360white
415.0000.27000.3204.5000.03258.00178.000.989563.4500.31012.671white
425.0000.27000.3204.5000.03258.00178.000.989563.4500.31012.671white
435.0000.27000.4001.2000.07642.00124.000.992043.3200.47010.160white
445.0000.29000.5405.7000.03554.00155.000.989763.2700.34012.981white
455.0000.30000.3303.7000.0354.00173.000.98873.3600.30013.071white
465.0000.31000.0006.4000.04643.00166.000.9943.3000.6309.960white
475.0000.33000.1601.5000.04910.0097.000.99173.4800.44010.760white
485.0000.33000.1601.5000.04910.0097.000.99173.4800.44010.760white
495.0000.33000.1601.5000.04910.0097.000.99173.4800.44010.760white
505.0000.33000.1804.6000.03240.00124.000.991143.1800.40011.060white
515.0000.33000.23011.8000.0323.00158.000.993223.4100.64011.860white
525.0000.35000.2507.8000.03124.00116.000.992413.3900.40011.360white
535.0000.35000.2507.8000.03124.00116.000.992413.3900.40011.360white
545.0000.38000.0101.6000.04826.0060.000.990843.7000.75014.060red
555.0000.40000.5004.3000.04629.0080.000.99023.4900.66013.660red
565.0000.42000.2402.0000.0619.0050.000.99173.7200.74014.081red
575.0000.44000.04018.6000.03938.00128.000.99853.3700.57010.260white
585.0000.45500.1801.9000.03633.00106.000.987463.2100.83014.071white
595.0000.55000.1408.3000.03235.00164.000.99183.5300.51012.581white
605.0000.61000.1201.3000.00965.00100.000.98743.2600.37013.550white
615.0000.74000.0001.2000.04116.0046.000.992584.0100.59012.560red
625.0001.02000.0401.4000.04541.0085.000.99383.7500.48010.540red
635.0001.04000.2401.6000.0532.0096.000.99343.7400.62011.550red
645.1000.11000.3201.6000.02812.0090.000.990083.5700.52012.260white
655.1000.14000.2500.7000.03915.0089.000.99193.2200.4309.260white
665.1000.16500.2205.7000.04742.00146.000.99343.1800.5509.960white
675.1000.21000.2801.4000.04748.00148.000.991683.5000.49010.450white
685.1000.23000.1801.0000.05313.0099.000.989563.2200.39011.550white
695.1000.25000.3601.3000.03540.0078.000.98913.2300.64012.171white
705.1000.26000.3301.1000.02746.00113.000.989463.3500.43011.471white
715.1000.26000.3406.4000.03426.0099.000.994493.2300.4109.260white
725.1000.29000.2808.3000.02627.00107.000.993083.3600.37011.060white
735.1000.29000.2808.3000.02627.00107.000.993083.3600.37011.060white
745.1000.30000.3002.3000.04840.00150.000.989443.2900.46012.260white
755.1000.30500.1301.7500.03617.0073.000.993.4000.51012.333333333333350white
765.1000.31000.3000.9000.03728.00152.000.9923.5400.56010.160white
775.1000.33000.2201.6000.02718.0089.000.98933.5100.38012.571white
785.1000.33000.2201.6000.02718.0089.000.98933.5100.38012.571white
795.1000.33000.2201.6000.02718.0089.000.98933.5100.38012.571white
805.1000.33000.2706.7000.02244.00129.000.992213.3600.39011.071white
815.1000.35000.2606.8000.03436.00120.000.991883.3800.40011.560white
825.1000.35000.2606.8000.03436.00120.000.991883.3800.40011.560white
835.1000.35000.2606.8000.03436.00120.000.991883.3800.40011.560white
845.1000.39000.2101.7000.02715.0072.000.98943.5000.45012.560white
855.1000.42000.0001.8000.04418.0088.000.991573.6800.73013.671red
865.1000.42000.0101.5000.01725.00102.000.98943.3800.36012.371white
875.1000.47000.0201.3000.03418.0044.000.99213.9000.62012.860red
885.1000.51000.1802.1000.04216.00101.000.99243.4600.87012.971red
895.1000.52000.0602.7000.05230.0079.000.99323.3200.4309.350white
905.1000.58500.0001.7000.04414.0086.000.992643.5600.94012.971red
915.2000.15500.3301.6000.02813.0059.000.989753.3000.84011.981white
925.2000.15500.3301.6000.02813.0059.000.989753.3000.84011.981white
935.2000.16000.3400.8000.02926.0077.000.991553.2500.51010.160white
945.2000.17000.2700.7000.0311.0068.000.992183.3000.4109.850white
955.2000.18500.2201.0000.0347.00123.000.992183.5500.44010.1560white
965.2000.20000.2703.2000.04716.0093.000.992353.4400.53010.171white
975.2000.21000.3101.7000.04817.0061.000.989533.2400.37012.071white
985.2000.22000.4606.2000.06641.00187.000.993623.1900.4209.7333333333333350white
995.2000.24000.1507.1000.04332.00134.000.993783.2400.4809.960white
1005.2000.24000.4503.8000.02721.00128.000.9923.5500.49011.281white
Rows: 1-100 of 6497 | Columns: 14

Let's create a Linear Regression to predict the Wine Quality.

In [22]:
from verticapy.learn.linear_model import LinearRegression
model = LinearRegression("public.LR_winequality")
model.fit("public.winequality", ["alcohol"], "quality")
Out[22]:

=======
details
=======
predictor|coefficient|std_err |t_value |p_value 
---------+-----------+--------+--------+--------
Intercept|  2.40527  | 0.08594|27.98758| 0.00000
 alcohol |  0.32531  | 0.00814|39.97052| 0.00000


==============
regularization
==============
type| lambda 
----+--------
none| 1.00000


===========
call_string
===========
linear_reg('public.LR_winequality', 'public.winequality', '"quality"', '"alcohol"'
USING PARAMETERS optimizer='cgd', epsilon=0.0001, max_iterations=100, regularization='none', lambda=1, alpha=0.5)

===============
Additional Info
===============
       Name       |Value
------------------+-----
 iteration_count  |  1  
rejected_row_count|  0  
accepted_row_count|6497 

Fitting the model creates new model attributes, which make methods easier to use.

In [23]:
model.X
Out[23]:
['"alcohol"']
In [24]:
model.y
Out[24]:
'"quality"'
In [25]:
model.input_relation
Out[25]:
'public.winequality'
In [26]:
model.test_relation
Out[26]:
'public.winequality'

In our case, we did not write the test relation when fitting the model. The model will then consider the training relation as test. These attributes will be used when invoking the different model abstractions. For example, let's compute the R2 of the model.

In [27]:
model.score(method = "r2")
Out[27]:
0.197418947221722

The 'score' method is using the attribute 'y' and the model prediction in the 'testrelation' to compute the accuracy. You can change these at anytime to deploy the models on different columns. The model could also have other useful attributes. In this case, the 'coef' attribute provides us with useful information.

In [28]:
model.coef_
Abc
predictor
Varchar(65000)
123
coefficient
Float
123
std_err
Float
123
t_value
Float
123
p_value
Float
1Intercept2.405268833837810.085940570861419727.98758269498047.70442131066327e-163
2alcohol0.3253120380535420.0081387996607595539.97051796495291.49650934284e-312
Out[28]:

Some other Vertica attributes might be interesting. You can check all the other attributes using the 'get_model_attribute' method.

In [29]:
model.get_model_attribute()
Abc
attr_name
Varchar(128)
Abc
Long varchar(32000000)
123
#_of_rows
Integer
1details2
2regularization1
3iteration_count1
4rejected_row_count1
5accepted_row_count1
6call_string1
Out[29]:

Let's look at the generated SQL code.

In [30]:
display(model.deploySQL())
PREDICT_LINEAR_REG("alcohol" USING PARAMETERS model_name = 'public.LR_winequality', match_by_pos = 'true')

The best way to evaluate your model is with the regression report.

In [31]:
model.regression_report()
value
explained_variance0.197418947221787
max_error3.50420051331244
median_absolute_error0.495799486687561
mean_absolute_error0.618753626974322
mean_squared_error0.611933859491423
r20.197418947221722
Out[31]:

You can also add the prediction to your vDataFrame.

In [32]:
model.predict(winequality, name = "pred_quality")
123
fixed_acidity
Numeric(6,3)
123
volatile_acidity
Numeric(7,4)
123
citric_acid
Numeric(6,3)
123
residual_sugar
Numeric(7,3)
123
chlorides
Float
123
free_sulfur_dioxide
Numeric(7,2)
123
total_sulfur_dioxide
Numeric(7,2)
123
density
Float
123
pH
Numeric(6,3)
123
sulphates
Numeric(6,3)
123
alcohol
Float
123
quality
Int
123
good
Int
Abc
color
Varchar(20)
123
pred_quality
Float
13.8000.31000.02011.1000.03620.00114.000.992483.7500.44012.460white6.43913810570173
23.9000.22500.4004.2000.0329.00118.000.9893.5700.36012.881white6.56926292092315
34.2000.17000.3601.8000.02993.00161.000.989993.6500.89012.071white6.30901329048031
44.2000.21500.2305.1000.04164.00157.000.996883.4200.4408.030white5.00776513826615
54.4000.32000.3904.3000.0331.00127.000.989043.4600.36012.881white6.56926292092315
64.4000.46000.1002.8000.02431.00111.000.988163.4800.34013.160white6.66685653233921
74.4000.54000.0905.1000.03852.0097.000.990223.4100.40012.271white6.37407569809102
84.5000.19000.2100.9500.03389.00159.000.993323.3400.4208.050white5.00776513826615
94.6000.44500.0001.4000.05311.00178.000.994263.7900.55010.250white5.72345162198394
104.6000.52000.1502.1000.0548.0065.000.99343.9000.56013.140red6.66685653233921
114.7000.14500.2901.0000.04235.0090.000.99083.7600.49011.360white6.08129486384284
124.7000.33500.1401.3000.03669.00168.000.992123.4700.46010.550white5.8210452334
134.7000.45500.1801.9000.03633.00106.000.987463.2100.83014.071white6.9596373665874
144.7000.60000.1702.3000.05817.00106.000.99323.8500.60012.960red6.6017941247285
154.7000.67000.0901.0000.025.009.000.987223.3000.34013.650white6.82951255136598
164.7000.78500.0003.4000.03623.00134.000.989813.5300.92013.860white6.89457495897669
174.8000.13000.3201.2000.04240.0098.000.98983.4200.64011.871white6.24395088286961
184.8000.17000.2802.9000.0322.00111.000.99023.3800.34011.371white6.08129486384284
194.8000.21000.21010.2000.03717.00112.000.993243.6600.48012.271white6.37407569809102
204.8000.22500.3801.2000.07447.00130.000.991323.3100.40010.360white5.75598282578929
214.8000.26000.23010.6000.03423.00111.000.992743.4600.28011.571white6.14635727145354
224.8000.29000.2301.1000.04438.00180.000.989243.2800.34011.960white6.27648208667496
234.8000.33000.0006.5000.02834.00163.000.99373.3500.6109.950white5.62585801056788
244.8000.34000.0006.5000.02833.00163.000.99393.3600.6109.960white5.62585801056788
254.8000.65000.1201.1000.0134.0010.000.992463.3200.36013.540white6.79698134756063
264.9000.23500.27011.7500.0334.00118.000.99543.0700.5009.460white5.4632019915411
274.9000.33000.3101.2000.01639.00150.000.987133.3300.59014.081white6.9596373665874
284.9000.33500.1401.3000.03669.00168.000.992123.4700.46010.466666666666750white5.81020149879823
294.9000.33500.1401.3000.03669.00168.000.992123.4700.46010.466666666666750white5.81020149879823
304.9000.34500.3401.0000.06832.00143.000.991383.2400.40010.150white5.69092041817858
314.9000.34500.3401.0000.06832.00143.000.991383.2400.40010.150white5.69092041817858
324.9000.42000.0002.1000.04816.0042.000.991543.7100.74014.071red6.9596373665874
334.9000.47000.1701.9000.03560.00148.000.989643.2700.35011.560white6.14635727145354
345.0000.17000.5601.5000.02624.00115.000.99063.4800.39010.871white5.91863884481606
355.0000.20000.4001.9000.01520.0098.000.98973.3700.55012.0560white6.32527889238299
365.0000.23500.27011.7500.0334.00118.000.99543.0700.5009.460white5.4632019915411
375.0000.24000.1905.0000.04317.00101.000.994383.6700.57010.050white5.65838921437323
385.0000.24000.2102.2000.03931.00100.000.990983.6900.62011.760white6.21141967906425
395.0000.24000.3401.1000.03449.00158.000.987743.3200.32013.171white6.66685653233921
405.0000.25500.2202.7000.04346.00153.000.992383.7500.76011.360white6.08129486384284
415.0000.27000.3204.5000.03258.00178.000.989563.4500.31012.671white6.50420051331244
425.0000.27000.3204.5000.03258.00178.000.989563.4500.31012.671white6.50420051331244
435.0000.27000.4001.2000.07642.00124.000.992043.3200.47010.160white5.69092041817858
445.0000.29000.5405.7000.03554.00155.000.989763.2700.34012.981white6.6017941247285
455.0000.30000.3303.7000.0354.00173.000.98873.3600.30013.071white6.63432532853386
465.0000.31000.0006.4000.04643.00166.000.9943.3000.6309.960white5.62585801056788
475.0000.33000.1601.5000.04910.0097.000.99173.4800.44010.760white5.88610764101071
485.0000.33000.1601.5000.04910.0097.000.99173.4800.44010.760white5.88610764101071
495.0000.33000.1601.5000.04910.0097.000.99173.4800.44010.760white5.88610764101071
505.0000.33000.1804.6000.03240.00124.000.991143.1800.40011.060white5.98370125242677
515.0000.33000.23011.8000.0323.00158.000.993223.4100.64011.860white6.24395088286961
525.0000.35000.2507.8000.03124.00116.000.992413.3900.40011.360white6.08129486384284
535.0000.35000.2507.8000.03124.00116.000.992413.3900.40011.360white6.08129486384284
545.0000.38000.0101.6000.04826.0060.000.990843.7000.75014.060red6.9596373665874
555.0000.40000.5004.3000.04629.0080.000.99023.4900.66013.660red6.82951255136598
565.0000.42000.2402.0000.0619.0050.000.99173.7200.74014.081red6.9596373665874
575.0000.44000.04018.6000.03938.00128.000.99853.3700.57010.260white5.72345162198394
585.0000.45500.1801.9000.03633.00106.000.987463.2100.83014.071white6.9596373665874
595.0000.55000.1408.3000.03235.00164.000.99183.5300.51012.581white6.47166930950709
605.0000.61000.1201.3000.00965.00100.000.98743.2600.37013.550white6.79698134756063
615.0000.74000.0001.2000.04116.0046.000.992584.0100.59012.560red6.47166930950709
625.0001.02000.0401.4000.04541.0085.000.99383.7500.48010.540red5.8210452334
635.0001.04000.2401.6000.0532.0096.000.99343.7400.62011.550red6.14635727145354
645.1000.11000.3201.6000.02812.0090.000.990083.5700.52012.260white6.37407569809102
655.1000.14000.2500.7000.03915.0089.000.99193.2200.4309.260white5.3981395839304
665.1000.16500.2205.7000.04742.00146.000.99343.1800.5509.960white5.62585801056788
675.1000.21000.2801.4000.04748.00148.000.991683.5000.49010.450white5.78851402959465
685.1000.23000.1801.0000.05313.0099.000.989563.2200.39011.550white6.14635727145354
695.1000.25000.3601.3000.03540.0078.000.98913.2300.64012.171white6.34154449428567
705.1000.26000.3301.1000.02746.00113.000.989463.3500.43011.471white6.11382606764819
715.1000.26000.3406.4000.03426.0099.000.994493.2300.4109.260white5.3981395839304
725.1000.29000.2808.3000.02627.00107.000.993083.3600.37011.060white5.98370125242677
735.1000.29000.2808.3000.02627.00107.000.993083.3600.37011.060white5.98370125242677
745.1000.30000.3002.3000.04840.00150.000.989443.2900.46012.260white6.37407569809102
755.1000.30500.1301.7500.03617.0073.000.993.4000.51012.333333333333350white6.41745063649815
765.1000.31000.3000.9000.03728.00152.000.9923.5400.56010.160white5.69092041817858
775.1000.33000.2201.6000.02718.0089.000.98933.510