VerticaPy

Python API for Vertica Data Science at Scale

Example: Methods in a Regression Model

In this example, we use the 'Winequality' dataset to demonstrate the methods available to regressor models.

In [19]:
from verticapy.datasets import load_winequality
winequality = load_winequality()
display(winequality)
123
fixed_acidity
Numeric(6,3)
123
volatile_acidity
Numeric(7,4)
123
citric_acid
Numeric(6,3)
123
residual_sugar
Numeric(7,3)
123
chlorides
Float
123
free_sulfur_dioxide
Numeric(7,2)
123
total_sulfur_dioxide
Numeric(7,2)
123
density
Float
123
pH
Numeric(6,3)
123
sulphates
Numeric(6,3)
123
alcohol
Float
123
quality
Int
123
good
Int
Abc
color
Varchar(20)
13.8000.31000.02011.1000.03620.00114.000.992483.7500.44012.460white
23.9000.22500.4004.2000.0329.00118.000.9893.5700.36012.881white
34.2000.17000.3601.8000.02993.00161.000.989993.6500.89012.071white
44.2000.21500.2305.1000.04164.00157.000.996883.4200.4408.030white
54.4000.32000.3904.3000.0331.00127.000.989043.4600.36012.881white
64.4000.46000.1002.8000.02431.00111.000.988163.4800.34013.160white
74.4000.54000.0905.1000.03852.0097.000.990223.4100.40012.271white
84.5000.19000.2100.9500.03389.00159.000.993323.3400.4208.050white
94.6000.44500.0001.4000.05311.00178.000.994263.7900.55010.250white
104.6000.52000.1502.1000.0548.0065.000.99343.9000.56013.140red
114.7000.14500.2901.0000.04235.0090.000.99083.7600.49011.360white
124.7000.33500.1401.3000.03669.00168.000.992123.4700.46010.550white
134.7000.45500.1801.9000.03633.00106.000.987463.2100.83014.071white
144.7000.60000.1702.3000.05817.00106.000.99323.8500.60012.960red
154.7000.67000.0901.0000.025.009.000.987223.3000.34013.650white
164.7000.78500.0003.4000.03623.00134.000.989813.5300.92013.860white
174.8000.13000.3201.2000.04240.0098.000.98983.4200.64011.871white
184.8000.17000.2802.9000.0322.00111.000.99023.3800.34011.371white
194.8000.21000.21010.2000.03717.00112.000.993243.6600.48012.271white
204.8000.22500.3801.2000.07447.00130.000.991323.3100.40010.360white
214.8000.26000.23010.6000.03423.00111.000.992743.4600.28011.571white
224.8000.29000.2301.1000.04438.00180.000.989243.2800.34011.960white
234.8000.33000.0006.5000.02834.00163.000.99373.3500.6109.950white
244.8000.34000.0006.5000.02833.00163.000.99393.3600.6109.960white
254.8000.65000.1201.1000.0134.0010.000.992463.3200.36013.540white
264.9000.23500.27011.7500.0334.00118.000.99543.0700.5009.460white
274.9000.33000.3101.2000.01639.00150.000.987133.3300.59014.081white
284.9000.33500.1401.3000.03669.00168.000.992123.4700.46010.466666666666750white
294.9000.33500.1401.3000.03669.00168.000.992123.4700.46010.466666666666750white
304.9000.34500.3401.0000.06832.00143.000.991383.2400.40010.150white
314.9000.34500.3401.0000.06832.00143.000.991383.2400.40010.150white
324.9000.42000.0002.1000.04816.0042.000.991543.7100.74014.071red
334.9000.47000.1701.9000.03560.00148.000.989643.2700.35011.560white
345.0000.17000.5601.5000.02624.00115.000.99063.4800.39010.871white
355.0000.20000.4001.9000.01520.0098.000.98973.3700.55012.0560white
365.0000.23500.27011.7500.0334.00118.000.99543.0700.5009.460white
375.0000.24000.1905.0000.04317.00101.000.994383.6700.57010.050white
385.0000.24000.2102.2000.03931.00100.000.990983.6900.62011.760white
395.0000.24000.3401.1000.03449.00158.000.987743.3200.32013.171white
405.0000.25500.2202.7000.04346.00153.000.992383.7500.76011.360white
415.0000.27000.3204.5000.03258.00178.000.989563.4500.31012.671white
425.0000.27000.3204.5000.03258.00178.000.989563.4500.31012.671white
435.0000.27000.4001.2000.07642.00124.000.992043.3200.47010.160white
445.0000.29000.5405.7000.03554.00155.000.989763.2700.34012.981white
455.0000.30000.3303.7000.0354.00173.000.98873.3600.30013.071white
465.0000.31000.0006.4000.04643.00166.000.9943.3000.6309.960white
475.0000.33000.1601.5000.04910.0097.000.99173.4800.44010.760white
485.0000.33000.1601.5000.04910.0097.000.99173.4800.44010.760white
495.0000.33000.1601.5000.04910.0097.000.99173.4800.44010.760white
505.0000.33000.1804.6000.03240.00124.000.991143.1800.40011.060white
515.0000.33000.23011.8000.0323.00158.000.993223.4100.64011.860white
525.0000.35000.2507.8000.03124.00116.000.992413.3900.40011.360white
535.0000.35000.2507.8000.03124.00116.000.992413.3900.40011.360white
545.0000.38000.0101.6000.04826.0060.000.990843.7000.75014.060red
555.0000.40000.5004.3000.04629.0080.000.99023.4900.66013.660red
565.0000.42000.2402.0000.0619.0050.000.99173.7200.74014.081red
575.0000.44000.04018.6000.03938.00128.000.99853.3700.57010.260white
585.0000.45500.1801.9000.03633.00106.000.987463.2100.83014.071white
595.0000.55000.1408.3000.03235.00164.000.99183.5300.51012.581white
605.0000.61000.1201.3000.00965.00100.000.98743.2600.37013.550white
615.0000.74000.0001.2000.04116.0046.000.992584.0100.59012.560red
625.0001.02000.0401.4000.04541.0085.000.99383.7500.48010.540red
635.0001.04000.2401.6000.0532.0096.000.99343.7400.62011.550red
645.1000.11000.3201.6000.02812.0090.000.990083.5700.52012.260white
655.1000.14000.2500.7000.03915.0089.000.99193.2200.4309.260white
665.1000.16500.2205.7000.04742.00146.000.99343.1800.5509.960white
675.1000.21000.2801.4000.04748.00148.000.991683.5000.49010.450white
685.1000.23000.1801.0000.05313.0099.000.989563.2200.39011.550white
695.1000.25000.3601.3000.03540.0078.000.98913.2300.64012.171white
705.1000.26000.3301.1000.02746.00113.000.989463.3500.43011.471white
715.1000.26000.3406.4000.03426.0099.000.994493.2300.4109.260white
725.1000.29000.2808.3000.02627.00107.000.993083.3600.37011.060white
735.1000.29000.2808.3000.02627.00107.000.993083.3600.37011.060white
745.1000.30000.3002.3000.04840.00150.000.989443.2900.46012.260white
755.1000.30500.1301.7500.03617.0073.000.993.4000.51012.333333333333350white
765.1000.31000.3000.9000.03728.00152.000.9923.5400.56010.160white
775.1000.33000.2201.6000.02718.0089.000.98933.5100.38012.571white
785.1000.33000.2201.6000.02718.0089.000.98933.5100.38012.571white
795.1000.33000.2201.6000.02718.0089.000.98933.5100.38012.571white
805.1000.33000.2706.7000.02244.00129.000.992213.3600.39011.071white
815.1000.35000.2606.8000.03436.00120.000.991883.3800.40011.560white
825.1000.35000.2606.8000.03436.00120.000.991883.3800.40011.560white
835.1000.35000.2606.8000.03436.00120.000.991883.3800.40011.560white
845.1000.39000.2101.7000.02715.0072.000.98943.5000.45012.560white
855.1000.42000.0001.8000.04418.0088.000.991573.6800.73013.671red
865.1000.42000.0101.5000.01725.00102.000.98943.3800.36012.371white
875.1000.47000.0201.3000.03418.0044.000.99213.9000.62012.860red
885.1000.51000.1802.1000.04216.00101.000.99243.4600.87012.971red
895.1000.52000.0602.7000.05230.0079.000.99323.3200.4309.350white
905.1000.58500.0001.7000.04414.0086.000.992643.5600.94012.971red
915.2000.15500.3301.6000.02813.0059.000.989753.3000.84011.981white
925.2000.15500.3301.6000.02813.0059.000.989753.3000.84011.981white
935.2000.16000.3400.8000.02926.0077.000.991553.2500.51010.160white
945.2000.17000.2700.7000.0311.0068.000.992183.3000.4109.850white
955.2000.18500.2201.0000.0347.00123.000.992183.5500.44010.1560white
965.2000.20000.2703.2000.04716.0093.000.992353.4400.53010.171white
975.2000.21000.3101.7000.04817.0061.000.989533.2400.37012.071white
985.2000.22000.4606.2000.06641.00187.000.993623.1900.4209.7333333333333350white
995.2000.24000.1507.1000.04332.00134.000.993783.2400.4809.960white
1005.2000.24000.4503.8000.02721.00128.000.9923.5500.49011.281white
Rows: 1-100 of 6497 | Columns: 14

Let's create a linear regression to predict the quality of each wine.

In [22]:
from verticapy.learn.linear_model import LinearRegression
model = LinearRegression("public.LR_winequality")
model.fit("public.winequality", ["alcohol"], "quality")
Out[22]:

=======
details
=======
predictor|coefficient|std_err |t_value |p_value 
---------+-----------+--------+--------+--------
Intercept|  2.40527  | 0.08594|27.98758| 0.00000
 alcohol |  0.32531  | 0.00814|39.97052| 0.00000


==============
regularization
==============
type| lambda 
----+--------
none| 1.00000


===========
call_string
===========
linear_reg('public.LR_winequality', 'public.winequality', '"quality"', '"alcohol"'
USING PARAMETERS optimizer='cgd', epsilon=0.0001, max_iterations=100, regularization='none', lambda=1, alpha=0.5)

===============
Additional Info
===============
       Name       |Value
------------------+-----
 iteration_count  |  1  
rejected_row_count|  0  
accepted_row_count|6497 

Fitting the model creates new model attributes, which make methods easier to use.

In [23]:
model.X
Out[23]:
['"alcohol"']
In [24]:
model.y
Out[24]:
'"quality"'
In [25]:
model.input_relation
Out[25]:
'public.winequality'
In [26]:
model.test_relation
Out[26]:
'public.winequality'

Since we didn't write a test relation when fitting the model, the model will use the training relation as the test relation.

Let's compute the R2 of the model.

In [27]:
model.score(method = "r2")
Out[27]:
0.197418947221722

The 'score' method uses the 'y' attribute and the model prediction in the 'test_relation' to compute the accuracy of the model. You can change these attributes at any time to deploy the models on different columns.

Models have many useful attributes. For example, the 'coef_' attribute gives us the p-value of the model.

In [28]:
model.coef_
Abc
predictor
Varchar(65000)
123
coefficient
Float
123
std_err
Float
123
t_value
Float
123
p_value
Float
1Intercept2.405268833837810.085940570861419727.98758269498047.70442131066327e-163
2alcohol0.3253120380535420.0081387996607595539.97051796495291.49650934284e-312
Out[28]:

You can view other attributes using the 'get_attr' method.

In [29]:
model.get_attr()
Abc
attr_name
Varchar(128)
Abc
Long varchar(32000000)
123
#_of_rows
Integer
1details2
2regularization1
3iteration_count1
4rejected_row_count1
5accepted_row_count1
6call_string1
Out[29]:

Let's look at the SQL query for our model.

In [30]:
display(model.deploySQL())
PREDICT_LINEAR_REG("alcohol" USING PARAMETERS model_name = 'public.LR_winequality', match_by_pos = 'true')

The best way to evaluate this type of model is with the 'report' method.

In [31]:
model.report()
value
explained_variance0.197418947221787
max_error3.50420051331244
median_absolute_error0.495799486687561
mean_absolute_error0.618753626974322
mean_squared_error0.611933859491423
r20.197418947221722
Out[31]:

You can add this prediction to your vDataFrame.

In [32]:
model.predict(winequality, name = "pred_quality")
123
fixed_acidity
Numeric(6,3)
123
volatile_acidity
Numeric(7,4)
123
citric_acid
Numeric(6,3)
123
residual_sugar
Numeric(7,3)
123
chlorides
Float
123
free_sulfur_dioxide
Numeric(7,2)
123
total_sulfur_dioxide
Numeric(7,2)
123
density
Float
123
pH
Numeric(6,3)
123
sulphates
Numeric(6,3)
123
alcohol
Float
123
quality
Int
123
good
Int
Abc
color
Varchar(20)
123
pred_quality
Float
13.8000.31000.02011.1000.03620.00114.000.992483.7500.44012.460white6.43913810570173
23.9000.22500.4004.2000.0329.00118.000.9893.5700.36012.881white6.56926292092315
34.2000.17000.3601.8000.02993.00161.000.989993.6500.89012.071white6.30901329048031
44.2000.21500.2305.1000.04164.00157.000.996883.4200.4408.030white5.00776513826615
54.4000.32000.3904.3000.0331.00127.000.989043.4600.36012.881white6.56926292092315
64.4000.46000.1002.8000.02431.00111.000.988163.4800.34013.160white6.66685653233921
74.4000.54000.0905.1000.03852.0097.000.990223.4100.40012.271white6.37407569809102
84.5000.19000.2100.9500.03389.00159.000.993323.3400.4208.050white5.00776513826615
94.6000.44500.0001.4000.05311.00178.000.994263.7900.55010.250white5.72345162198394
104.6000.52000.1502.1000.0548.0065.000.99343.9000.56013.140red6.66685653233921
114.7000.14500.2901.0000.04235.0090.000.99083.7600.49011.360white6.08129486384284
124.7000.33500.1401.3000.03669.00168.000.992123.4700.46010.550white5.8210452334
134.7000.45500.1801.9000.03633.00106.000.987463.2100.83014.071white6.9596373665874
144.7000.60000.1702.3000.05817.00106.000.99323.8500.60012.960red6.6017941247285
154.7000.67000.0901.0000.025.009.000.987223.3000.34013.650white6.82951255136598
164.7000.78500.0003.4000.03623.00134.000.989813.5300.92013.860white6.89457495897669
174.8000.13000.3201.2000.04240.0098.000.98983.4200.64011.871white6.24395088286961
184.8000.17000.2802.9000.0322.00111.000.99023.3800.34011.371white6.08129486384284
194.8000.21000.21010.2000.03717.00112.000.993243.6600.48012.271white6.37407569809102
204.8000.22500.3801.2000.07447.00130.000.991323.3100.40010.360white5.75598282578929
214.8000.26000.23010.6000.03423.00111.000.992743.4600.28011.571white6.14635727145354
224.8000.29000.2301.1000.04438.00180.000.989243.2800.34011.960white6.27648208667496
234.8000.33000.0006.5000.02834.00163.000.99373.3500.6109.950white5.62585801056788
244.8000.34000.0006.5000.02833.00163.000.99393.3600.6109.960white5.62585801056788
254.8000.65000.1201.1000.0134.0010.000.992463.3200.36013.540white6.79698134756063
264.9000.23500.27011.7500.0334.00118.000.99543.0700.5009.460white5.4632019915411
274.9000.33000.3101.2000.01639.00150.000.987133.3300.59014.081white6.9596373665874
284.9000.33500.1401.3000.03669.00168.000.992123.4700.46010.466666666666750white5.81020149879823
294.9000.33500.1401.3000.03669.00168.000.992123.4700.46010.466666666666750white5.81020149879823
304.9000.34500.3401.0000.06832.00143.000.991383.2400.40010.150white5.69092041817858
314.9000.34500.3401.0000.06832.00143.000.991383.2400.40010.150white5.69092041817858
324.9000.42000.0002.1000.04816.0042.000.991543.7100.74014.071red6.9596373665874
334.9000.47000.1701.9000.03560.00148.000.989643.2700.35011.560white6.14635727145354
345.0000.17000.5601.5000.02624.00115.000.99063.4800.39010.871white5.91863884481606
355.0000.20000.4001.9000.01520.0098.000.98973.3700.55012.0560white6.32527889238299
365.0000.23500.27011.7500.0334.00118.000.99543.0700.5009.460white5.4632019915411
375.0000.24000.1905.0000.04317.00101.000.994383.6700.57010.050white5.65838921437323
385.0000.24000.2102.2000.03931.00100.000.990983.6900.62011.760white6.21141967906425
395.0000.24000.3401.1000.03449.00158.000.987743.3200.32013.171white6.66685653233921
405.0000.25500.2202.7000.04346.00153.000.992383.7500.76011.360white6.08129486384284
415.0000.27000.3204.5000.03258.00178.000.989563.4500.31012.671white6.50420051331244
425.0000.27000.3204.5000.03258.00178.000.989563.4500.31012.671white6.50420051331244
435.0000.27000.4001.2000.07642.00124.000.992043.3200.47010.160white5.69092041817858
445.0000.29000.5405.7000.03554.00155.000.989763.2700.34012.981white6.6017941247285
455.0000.30000.3303.7000.0354.00173.000.98873.3600.30013.071white6.63432532853386
465.0000.31000.0006.4000.04643.00166.000.9943.3000.6309.960white5.62585801056788
475.0000.33000.1601.5000.04910.0097.000.99173.4800.44010.760white5.88610764101071
485.0000.33000.1601.5000.04910.0097.000.99173.4800.44010.760white5.88610764101071
495.0000.33000.1601.5000.04910.0097.000.99173.4800.44010.760white5.88610764101071
505.0000.33000.1804.6000.03240.00124.000.991143.1800.40011.060white5.98370125242677
515.0000.33000.23011.8000.0323.00158.000.993223.4100.64011.860white6.24395088286961
525.0000.35000.2507.8000.03124.00116.000.992413.3900.40011.360white6.08129486384284
535.0000.35000.2507.8000.03124.00116.000.992413.3900.40011.360white6.08129486384284
545.0000.38000.0101.6000.04826.0060.000.990843.7000.75014.060red6.9596373665874
555.0000.40000.5004.3000.04629.0080.000.99023.4900.66013.660red6.82951255136598
565.0000.42000.2402.0000.0619.0050.000.99173.7200.74014.081red6.9596373665874
575.0000.44000.04018.6000.03938.00128.000.99853.3700.57010.260white5.72345162198394
585.0000.45500.1801.9000.03633.00106.000.987463.2100.83014.071white6.9596373665874
595.0000.55000.1408.3000.03235.00164.000.99183.5300.51012.581white6.47166930950709
605.0000.61000.1201.3000.00965.00100.000.98743.2600.37013.550white6.79698134756063
615.0000.74000.0001.2000.04116.0046.000.992584.0100.59012.560red6.47166930950709
625.0001.02000.0401.4000.04541.0085.000.99383.7500.48010.540red5.8210452334
635.0001.04000.2401.6000.0532.0096.000.99343.7400.62011.550red6.14635727145354
645.1000.11000.3201.6000.02812.0090.000.990083.5700.52012.260white6.37407569809102
655.1000.14000.2500.7000.03915.0089.000.99193.2200.4309.260white5.3981395839304
665.1000.16500.2205.7000.04742.00146.000.99343.1800.5509.960white5.62585801056788
675.1000.21000.2801.4000.04748.00148.000.991683.5000.49010.450white5.78851402959465
685.1000.23000.1801.0000.05313.0099.000.989563.2200.39011.550white6.14635727145354
695.1000.25000.3601.3000.03540.0078.000.98913.2300.64012.171white6.34154449428567
705.1000.26000.3301.1000.02746.00113.000.989463.3500.43011.471white6.11382606764819
715.1000.26000.3406.4000.03426.0099.000.994493.2300.4109.260white5.3981395839304
725.1000.29000.2808.3000.02627.00107.000.993083.3600.37011.060white5.98370125242677
735.1000.29000.2808.3000.02627.00107.000.993083.3600.37011.060white5.98370125242677
745.1000.30000.3002.3000.04840.00150.000.989443.2900.46012.260white6.37407569809102
755.1000.30500.1301.7500.03617.0073.000.993.4000.51012.333333333333350white6.41745063649815
765.1000.31000.3000.9000.03728.00152.000.9923.5400.56010.160white5.69092041817858
775.1000.33000.2201.6000.02718.0089.000.98933.5100.38012.571white6.47166930950709
785.1000.33000.2201.6000.02718.0089.000.98933.5100.38012.571white6.47166930950709
795.1000.33000.2201.6000.02718.0089.000.98933.5100.38012.571white6.47166930950709
805.1000.33000.2706.7000.02244.00129.000.992213.3600.39011.071white5.98370125242677
815.1000.35000.2606.8000.03436.00120.000.991883.3800.40011.560white6.14635727145354
825.1000.35000.2606.8000.03436.00120.000.991883.3800.40011.560white6.14635727145354
835.1000.35000.2606.8000.03436.00120.000.991883.3800.40011.560white6.14635727145354
845.1000.39000.2101.7000.02715.0072.000.98943.5000.45012.560white6.47166930950709
855.1000.42000.0001.8000.04418.0088.000.991573.6800.73013.671red6.82951255136598
865.1000.42000.0101.5000.01725.00102.000.98943.3800.36012.371white6.40660690189638
875.1000.47000.0201.3000.03418.0044.000.99213.9000.62012.860red6.56926292092315
885.1000.51000.1802.1000.04216.00101.000.99243.4600.87012.971red6.6017941247285
895.1000.52000.0602.7000.05230.0079.000.99323.3200.4309.350white5.43067078773575
905.1000.58500.0001.7000.04414.0086.000.992643.5600.94012.971red6.6017941247285
915.2000.15500.3301.6000.02813.0059.000.989753.3000.84011.981white6.27648208667496
925.2000.15500.3301.6000.02813.0059.000.989753.3000.84011.981white6.27648208667496
935.2000.16000.3400.8000.02926.0077.000.991553.2500.51010.160white5.69092041817858
945.2000.17000.2700.7000.0311.0068.000.992183.3000.4109.850white5.59332680676252
955.2000.18500.2201.0000.0347.00123.000.992183.5500.44010.1560white5.70718602008126
965.2000.20000.2703.2000.04716.0093.000.992353.4400.53010.171white5.69092041817858
975.2000.21000.3101.7000.04817.0061.000.989533.2400.37012.071white6.30901329048031
985.2000.22000.4606.2000.06641.00187.000.993623.1900.4209.7333333333333350white5.57163933755895
995.2000.24000.1507.1000.04332.00134.000.993783.2400.4809.960white5.62585801056788
1005.2000.24000.4503.8000.02721.00128.000.9923.5500.49011.281white6.04876366003748
Out[32]:
Rows: 1-100 of 6497 | Columns: 15

The vDataFrame has also a method 'score' to evaluate the quality of your model.

In [33]:
winequality.score("quality", "pred_quality", method = "mse")
Out[33]:
0.611933859491424

Let's examine our prediction with a histogram and regression plot.

In [34]:
winequality.hist(["quality", "pred_quality"], h = (2, 2))
In [35]:
model.plot()