VerticaPy

Python API for Vertica Data Science at Scale

Example: Pipelines

Pipeline models makes model deployment even more efficient by combining several steps of the Data Science process (preprocessing, modeling...). In this example, we use the 'Winequality' dataset to demonstrate the methods available to Pipeline models.

In [20]:
from verticapy.datasets import load_winequality
winequality = load_winequality()
display(winequality)
123
fixed_acidity
Numeric(6,3)
123
volatile_acidity
Numeric(7,4)
123
citric_acid
Numeric(6,3)
123
residual_sugar
Numeric(7,3)
123
chlorides
Float
123
free_sulfur_dioxide
Numeric(7,2)
123
total_sulfur_dioxide
Numeric(7,2)
123
density
Float
123
pH
Numeric(6,3)
123
sulphates
Numeric(6,3)
123
alcohol
Float
123
quality
Int
123
good
Int
Abc
color
Varchar(20)
13.80.310.0211.10.03620.0114.00.992483.750.4412.460white
23.90.2250.44.20.0329.0118.00.9893.570.3612.881white
34.20.170.361.80.02993.0161.00.989993.650.8912.071white
44.20.2150.235.10.04164.0157.00.996883.420.448.030white
54.40.320.394.30.0331.0127.00.989043.460.3612.881white
64.40.460.12.80.02431.0111.00.988163.480.3413.160white
74.40.540.095.10.03852.097.00.990223.410.412.271white
84.50.190.210.950.03389.0159.00.993323.340.428.050white
94.60.4450.01.40.05311.0178.00.994263.790.5510.250white
104.60.520.152.10.0548.065.00.99343.90.5613.140red
114.70.1450.291.00.04235.090.00.99083.760.4911.360white
124.70.3350.141.30.03669.0168.00.992123.470.4610.550white
134.70.4550.181.90.03633.0106.00.987463.210.8314.071white
144.70.60.172.30.05817.0106.00.99323.850.612.960red
154.70.670.091.00.025.09.00.987223.30.3413.650white
164.70.7850.03.40.03623.0134.00.989813.530.9213.860white
174.80.130.321.20.04240.098.00.98983.420.6411.871white
184.80.170.282.90.0322.0111.00.99023.380.3411.371white
194.80.210.2110.20.03717.0112.00.993243.660.4812.271white
204.80.2250.381.20.07447.0130.00.991323.310.410.360white
214.80.260.2310.60.03423.0111.00.992743.460.2811.571white
224.80.290.231.10.04438.0180.00.989243.280.3411.960white
234.80.330.06.50.02834.0163.00.99373.350.619.950white
244.80.340.06.50.02833.0163.00.99393.360.619.960white
254.80.650.121.10.0134.010.00.992463.320.3613.540white
264.90.2350.2711.750.0334.0118.00.99543.070.59.460white
274.90.330.311.20.01639.0150.00.987133.330.5914.081white
284.90.3350.141.30.03669.0168.00.992123.470.4610.466666666666750white
294.90.3350.141.30.03669.0168.00.992123.470.4610.466666666666750white
304.90.3450.341.00.06832.0143.00.991383.240.410.150white
314.90.3450.341.00.06832.0143.00.991383.240.410.150white
324.90.420.02.10.04816.042.00.991543.710.7414.071red
334.90.470.171.90.03560.0148.00.989643.270.3511.560white
345.00.170.561.50.02624.0115.00.99063.480.3910.871white
355.00.20.41.90.01520.098.00.98973.370.5512.0560white
365.00.2350.2711.750.0334.0118.00.99543.070.59.460white
375.00.240.195.00.04317.0101.00.994383.670.5710.050white
385.00.240.212.20.03931.0100.00.990983.690.6211.760white
395.00.240.341.10.03449.0158.00.987743.320.3213.171white
405.00.2550.222.70.04346.0153.00.992383.750.7611.360white
415.00.270.324.50.03258.0178.00.989563.450.3112.671white
425.00.270.324.50.03258.0178.00.989563.450.3112.671white
435.00.270.41.20.07642.0124.00.992043.320.4710.160white
445.00.290.545.70.03554.0155.00.989763.270.3412.981white
455.00.30.333.70.0354.0173.00.98873.360.313.071white
465.00.310.06.40.04643.0166.00.9943.30.639.960white
475.00.330.161.50.04910.097.00.99173.480.4410.760white
485.00.330.161.50.04910.097.00.99173.480.4410.760white
495.00.330.161.50.04910.097.00.99173.480.4410.760white
505.00.330.184.60.03240.0124.00.991143.180.411.060white
515.00.330.2311.80.0323.0158.00.993223.410.6411.860white
525.00.350.257.80.03124.0116.00.992413.390.411.360white
535.00.350.257.80.03124.0116.00.992413.390.411.360white
545.00.380.011.60.04826.060.00.990843.70.7514.060red
555.00.40.54.30.04629.080.00.99023.490.6613.660red
565.00.420.242.00.0619.050.00.99173.720.7414.081red
575.00.440.0418.60.03938.0128.00.99853.370.5710.260white
585.00.4550.181.90.03633.0106.00.987463.210.8314.071white
595.00.550.148.30.03235.0164.00.99183.530.5112.581white
605.00.610.121.30.00965.0100.00.98743.260.3713.550white
615.00.740.01.20.04116.046.00.992584.010.5912.560red
625.01.020.041.40.04541.085.00.99383.750.4810.540red
635.01.040.241.60.0532.096.00.99343.740.6211.550red
645.10.110.321.60.02812.090.00.990083.570.5212.260white
655.10.140.250.70.03915.089.00.99193.220.439.260white
665.10.1650.225.70.04742.0146.00.99343.180.559.960white
675.10.210.281.40.04748.0148.00.991683.50.4910.450white
685.10.230.181.00.05313.099.00.989563.220.3911.550white
695.10.250.361.30.03540.078.00.98913.230.6412.171white
705.10.260.331.10.02746.0113.00.989463.350.4311.471white
715.10.260.346.40.03426.099.00.994493.230.419.260white
725.10.290.288.30.02627.0107.00.993083.360.3711.060white
735.10.290.288.30.02627.0107.00.993083.360.3711.060white
745.10.30.32.30.04840.0150.00.989443.290.4612.260white
755.10.3050.131.750.03617.073.00.993.40.5112.333333333333350white
765.10.310.30.90.03728.0152.00.9923.540.5610.160white
775.10.330.221.60.02718.089.00.98933.510.3812.571white
785.10.330.221.60.02718.089.00.98933.510.3812.571white
795.10.330.221.60.02718.089.00.98933.510.3812.571white
805.10.330.276.70.02244.0129.00.992213.360.3911.071white
815.10.350.266.80.03436.0120.00.991883.380.411.560white
825.10.350.266.80.03436.0120.00.991883.380.411.560white
835.10.350.266.80.03436.0120.00.991883.380.411.560white
845.10.390.211.70.02715.072.00.98943.50.4512.560white
855.10.420.01.80.04418.088.00.991573.680.7313.671red
865.10.420.011.50.01725.0102.00.98943.380.3612.371white
875.10.470.021.30.03418.044.00.99213.90.6212.860red
885.10.510.182.10.04216.0101.00.99243.460.8712.971red
895.10.520.062.70.05230.079.00.99323.320.439.350white
905.10.5850.01.70.04414.086.00.992643.560.9412.971red
915.20.1550.331.60.02813.059.00.989753.30.8411.981white
925.20.1550.331.60.02813.059.00.989753.30.8411.981white
935.20.160.340.80.02926.077.00.991553.250.5110.160white
945.20.170.270.70.0311.068.00.992183.30.419.850white
955.20.1850.221.00.0347.0123.00.992183.550.4410.1560white
965.20.20.273.20.04716.093.00.992353.440.5310.171white
975.20.210.311.70.04817.061.00.989533.240.3712.071white
985.20.220.466.20.06641.0187.00.993623.190.429.7333333333333350white
995.20.240.157.10.04332.0134.00.993783.240.489.960white
1005.20.240.453.80.02721.0128.00.9923.550.4911.281white
Rows: 1-100 | Columns: 14

We'll start by doing the following to create a Pipeline model to predict the quality of each wine.

  1. Normalize the data using a standard StandardScaler.
  2. Create a linear regression model to predict wine quality.
In [21]:
from verticapy.learn.linear_model import LinearRegression
from verticapy.learn.preprocessing import StandardScaler
from verticapy.learn.pipeline import Pipeline

model1 = StandardScaler("public.StdWine")
model2 = LinearRegression("public.LR_winequality")

model = Pipeline([("WineNorm", model1),
                  ("WineLR", model2)])
model.fit("public.winequality", 
          ["alcohol"], 
          "quality")

Fitting the model creates new model attributes, which make methods easier to use.

In [22]:
model.X
Out[22]:
['"alcohol"']
In [23]:
model.y
Out[23]:
'"quality"'
In [24]:
model.input_relation
Out[24]:
'(SELECT "fixed_acidity", "volatile_acidity", "citric_acid", "residual_sugar", "chlorides", "free_sulfur_dioxide", "total_sulfur_dioxide", "density", "pH", "sulphates", "alcohol", "quality", "good", "color" FROM public.winequality) VERTICAPY_SUBTABLE'
In [25]:
model.test_relation
Out[25]:
'(SELECT "fixed_acidity", "volatile_acidity", "citric_acid", "residual_sugar", "chlorides", "free_sulfur_dioxide", "total_sulfur_dioxide", "density", "pH", "sulphates", "alcohol", "quality", "good", "color" FROM (SELECT APPLY_NORMALIZE("fixed_acidity", "volatile_acidity", "citric_acid", "residual_sugar", "chlorides", "free_sulfur_dioxide", "total_sulfur_dioxide", "density", "pH", "sulphates", "alcohol", "quality", "good", "color" USING PARAMETERS model_name = \'public.StdWine\', match_by_pos = \'true\', key_columns = \'"fixed_acidity", "volatile_acidity", "citric_acid", "residual_sugar", "chlorides", "free_sulfur_dioxide", "total_sulfur_dioxide", "density", "pH", "sulphates", "quality", "good", "color"\', exclude_columns = \'"fixed_acidity", "volatile_acidity", "citric_acid", "residual_sugar", "chlorides", "free_sulfur_dioxide", "total_sulfur_dioxide", "density", "pH", "sulphates", "quality", "good", "color"\') FROM (SELECT "fixed_acidity", "volatile_acidity", "citric_acid", "residual_sugar", "chlorides", "free_sulfur_dioxide", "total_sulfur_dioxide", "density", "pH", "sulphates", "alcohol", "quality", "good", "color" FROM public.winequality) VERTICAPY_SUBTABLE) VERTICAPY_SUBTABLE) VERTICAPY_SUBTABLE'

Since we didn't write a test relation when fitting the model, the model will use the training relation as the test relation.

Let's compute the R2 of the model.

In [26]:
model.score(method = "r2")
Out[26]:
0.197418947221811

The 'score' method uses the 'y' attribute and the model prediction in the 'test_relation' to compute the accuracy of the model. You can change these attributes at any time to deploy the models on different columns.

You can access Pipeline models using indexes.

In [27]:
model[-1]
Out[27]:

=======
details
=======
predictor|coefficient|std_err | t_value |p_value 
---------+-----------+--------+---------+--------
Intercept|  5.81838  | 0.00971|599.43139| 0.00000
 alcohol |  0.38800  | 0.00971|39.97052 | 0.00000


==============
regularization
==============
type| lambda 
----+--------
none| 1.00000


===========
call_string
===========
linear_reg('public.LR_winequality', '"public"._VERTICAPY_TEMPORARY_VIEW_dbadmin_912240', '"quality"', '"alcohol"'
USING PARAMETERS optimizer='newton', epsilon=1e-06, max_iterations=100, regularization='none', lambda=1, alpha=0.5)

===============
Additional Info
===============
       Name       |Value
------------------+-----
 iteration_count  |  1  
rejected_row_count|  0  
accepted_row_count|6497 

Models have many useful attributes. For example, the 'coef_' attribute of our last step gives us the p-value of the model.

In [28]:
model[-1].coef_
Out[28]:
Abc
predictor
Varchar(65000)
123
coefficient
Float
123
std_err
Float
123
t_value
Float
123
p_value
Float
1Intercept5.818377712790520.0097064948926276599.4313886890080.0
2alcohol0.3880034898355760.0097072419770922839.97051796495951.496509342523e-312
Rows: 1-2 | Columns: 5

The 'report' method allows to evaluate the quality of our model.

In [30]:
model.report()
Out[30]:
value
explained_variance0.197418947221787
max_error3.50420028103088
median_absolute_error0.495799718969121
mean_absolute_error0.618753622791327
mean_squared_error0.611933859491369
root_mean_squared_error0.7822620146033994
r20.197418947221811
r2_adj0.1972953781605672
Rows: 1-8 | Columns: 2

The Pipeline's final estimator determines which methods you'll have available to you to apply on the vDataFrame. For example, if your Pipeline model's final estimator implements a prediction method, you can use the Pipeline's 'predict' method on your vDataFrame.

In [31]:
model.predict(winequality, 
              name = "pred_quality")
Out[31]:
123
fixed_acidity
Numeric(6,3)
123
volatile_acidity
Numeric(7,4)
123
citric_acid
Numeric(6,3)
123
residual_sugar
Numeric(7,3)
123
chlorides
Float
123
free_sulfur_dioxide
Numeric(7,2)
123
total_sulfur_dioxide
Numeric(7,2)
123
density
Float
123
pH
Numeric(6,3)
123
sulphates
Numeric(6,3)
123
alcohol
Float
123
quality
Int
123
good
Int
Abc
color
Varchar(20)
123
pred_quality
Float
13.80.310.0211.10.03620.0114.00.992483.750.441.5998829311888960white6.43913787342018
23.90.2250.44.20.0329.0118.00.9893.570.361.9352531498344681white6.56926268864158
34.20.170.361.80.02993.0161.00.989993.650.891.2645127125433371white6.30901305819878
44.20.2150.235.10.04164.0157.00.996883.420.44-2.0891894739123230white5.00776490598479
54.40.320.394.30.0331.0127.00.989043.460.361.9352531498344681white6.56926268864158
64.40.460.12.80.02431.0111.00.988163.480.342.1867808138186360white6.66685630005763
74.40.540.095.10.03852.097.00.990223.410.41.4321978218661171white6.37407546580948
84.50.190.210.950.03389.0159.00.993323.340.42-2.0891894739123250white5.00776490598479
94.60.4450.01.40.05311.0178.00.994263.790.55-0.24465327136171350white5.72345138970248
104.60.520.152.10.0548.065.00.99343.90.562.1867808138186340red6.66685630005763
114.70.1450.291.00.04235.090.00.99083.760.490.67761482991359160white6.08129463156133
124.70.3350.141.30.03669.0168.00.992123.470.460.0068743926224610850white5.82104500111853
134.70.4550.181.90.03633.0106.00.987463.210.832.9413638057711571white6.95963713430578
144.70.60.172.30.05817.0106.00.99323.850.62.0190957044958560red6.60179389244693
154.70.670.091.00.025.09.00.987223.30.342.6059935871255950white6.82951231908438
164.70.7850.03.40.03623.0134.00.989813.530.922.7736786964483760white6.89457472669508
174.80.130.321.20.04240.098.00.98983.420.641.0968276032205571white6.24395065058808
184.80.170.282.90.0322.0111.00.99023.380.340.67761482991359171white6.08129463156133
194.80.210.2110.20.03717.0112.00.993243.660.481.4321978218661171white6.37407546580948
204.80.2250.381.20.07447.0130.00.991323.310.4-0.16081071670032160white5.75598259350783
214.80.260.2310.60.03423.0111.00.992743.460.280.84529993923637371white6.14635703917203
224.80.290.231.10.04438.0180.00.989243.280.341.1806701578819460white6.27648185439343
234.80.330.06.50.02834.0163.00.99373.350.61-0.49618093534588650white5.62585777828644
244.80.340.06.50.02833.0163.00.99393.360.61-0.49618093534588660white5.62585777828644
254.80.650.121.10.0134.010.00.992463.320.362.522151032464240white6.79698111527903
264.90.2350.2711.750.0334.0118.00.99543.070.5-0.91539370865284260white5.46320175925969
274.90.330.311.20.01639.0150.00.987133.330.592.9413638057711581white6.95963713430578
284.90.3350.141.30.03669.0168.00.992123.470.46-0.021073125597974350white5.81020126651676
294.90.3350.141.30.03669.0168.00.992123.470.46-0.021073125597974350white5.81020126651676
304.90.3450.341.00.06832.0143.00.991383.240.4-0.32849582602310450white5.69092018589713
314.90.3450.341.00.06832.0143.00.991383.240.4-0.32849582602310450white5.69092018589713
324.90.420.02.10.04816.042.00.991543.710.742.9413638057711571red6.95963713430578
334.90.470.171.90.03560.0148.00.989643.270.350.84529993923637360white6.14635703917203
345.00.170.561.50.02624.0115.00.99063.480.390.25840205660663571white5.91863861253458
355.00.20.41.90.01520.098.00.98973.370.551.3064339898740360white6.32527866010146
365.00.2350.2711.750.0334.0118.00.99543.070.5-0.91539370865284260white5.46320175925969
375.00.240.195.00.04317.0101.00.994383.670.57-0.41233838068449550white5.65838898209179
385.00.240.212.20.03931.0100.00.990983.690.621.0129850485591560white6.21141944678273
395.00.240.341.10.03449.0158.00.987743.320.322.1867808138186371white6.66685630005763
405.00.2550.222.70.04346.0153.00.992383.750.760.67761482991359160white6.08129463156133
415.00.270.324.50.03258.0178.00.989563.450.311.7675680405116871white6.50420028103088
425.00.270.324.50.03258.0178.00.989563.450.311.7675680405116871white6.50420028103088
435.00.270.41.20.07642.0124.00.992043.320.47-0.32849582602310460white5.69092018589713
445.00.290.545.70.03554.0155.00.989763.270.342.0190957044958581white6.60179389244693
455.00.30.333.70.0354.0173.00.98873.360.32.1029382591572471white6.63432509625228
465.00.310.06.40.04643.0166.00.9943.30.63-0.49618093534588660white5.62585777828644
475.00.330.161.50.04910.097.00.99173.480.440.17455950194524360white5.88610740872923
485.00.330.161.50.04910.097.00.99173.480.440.17455950194524360white5.88610740872923
495.00.330.161.50.04910.097.00.99173.480.440.17455950194524360white5.88610740872923
505.00.330.184.60.03240.0124.00.991143.180.40.42608716592941760white5.98370102014528
515.00.330.2311.80.0323.0158.00.993223.410.641.0968276032205560white6.24395065058808
525.00.350.257.80.03124.0116.00.992413.390.40.67761482991359160white6.08129463156133
535.00.350.257.80.03124.0116.00.992413.390.40.67761482991359160white6.08129463156133
545.00.380.011.60.04826.060.00.990843.70.752.9413638057711560red6.95963713430578
555.00.40.54.30.04629.080.00.99023.490.662.6059935871255960red6.82951231908438
565.00.420.242.00.0619.050.00.99173.720.742.9413638057711581red6.95963713430578
575.00.440.0418.60.03938.0128.00.99853.370.57-0.24465327136171360white5.72345138970248
585.00.4550.181.90.03633.0106.00.987463.210.832.9413638057711571white6.95963713430578
595.00.550.148.30.03235.0164.00.99183.530.511.6837254858502981white6.47166907722553
605.00.610.121.30.00965.0100.00.98743.260.372.522151032464250white6.79698111527903
615.00.740.01.20.04116.046.00.992584.010.591.6837254858502960red6.47166907722553
625.01.020.041.40.04541.085.00.99383.750.480.0068743926224610840red5.82104500111853
635.01.040.241.60.0532.096.00.99343.740.620.84529993923637350red6.14635703917203
645.10.110.321.60.02812.090.00.990083.570.521.4321978218661160white6.37407546580948
655.10.140.250.70.03915.089.00.99193.220.43-1.0830788179756360white5.39813935164899
665.10.1650.225.70.04742.0146.00.99343.180.55-0.49618093534588660white5.62585777828644
675.10.210.281.40.04748.0148.00.991683.50.49-0.076968162038929850white5.78851379731318
685.10.230.181.00.05313.099.00.989563.220.390.84529993923637350white6.14635703917203
695.10.250.361.30.03540.078.00.98913.230.641.3483552672047271white6.34154426200413
705.10.260.331.10.02746.0113.00.989463.350.430.76145738457498271white6.11382583536668
715.10.260.346.40.03426.099.00.994493.230.41-1.0830788179756360white5.39813935164899
725.10.290.288.30.02627.0107.00.993083.360.370.42608716592941760white5.98370102014528
735.10.290.288.30.02627.0107.00.993083.360.370.42608716592941760white5.98370102014528
745.10.30.32.30.04840.0150.00.989443.290.461.4321978218661160white6.37407546580948
755.10.3050.131.750.03617.073.00.993.40.511.5439878947479450white6.4174504042166
765.10.310.30.90.03728.0152.00.9923.540.56-0.32849582602310460white5.69092018589713
775.10.330.221.60.02718.089.00.98933.510.381.6837254858502971white6.47166907722553
785.10.330.221.60.02718.089.00.98933.510.381.6837254858502971white6.47166907722553
795.10.330.221.60.02718.089.00.98933.510.381.6837254858502971white6.47166907722553
805.10.330.276.70.02244.0129.00.992213.360.390.42608716592941771white5.98370102014528
815.10.350.266.80.03436.0120.00.991883.380.40.84529993923637360white6.14635703917203
825.10.350.266.80.03436.0120.00.991883.380.40.84529993923637360white6.14635703917203
835.10.350.266.80.03436.0120.00.991883.380.40.84529993923637360white6.14635703917203
845.10.390.211.70.02715.072.00.98943.50.451.6837254858502960white6.47166907722553
855.10.420.01.80.04418.088.00.991573.680.732.6059935871255971red6.82951231908438
865.10.420.011.50.01725.0102.00.98943.380.361.516040376527571white6.40660666961483
875.10.470.021.30.03418.044.00.99213.90.621.9352531498344660red6.56926268864158
885.10.510.182.10.04216.0101.00.99243.460.872.0190957044958571red6.60179389244693
895.10.520.062.70.05230.079.00.99323.320.43-0.99923626331423350white5.43067055545434
905.10.5850.01.70.04414.086.00.992643.560.942.0190957044958571red6.60179389244693
915.20.1550.331.60.02813.059.00.989753.30.841.1806701578819481white6.27648185439343
925.20.1550.331.60.02813.059.00.989753.30.841.1806701578819481white6.27648185439343
935.20.160.340.80.02926.077.00.991553.250.51-0.32849582602310460white5.69092018589713
945.20.170.270.70.0311.068.00.992183.30.41-0.58002349000727750white5.59332657448109
955.20.1850.221.00.0347.0123.00.992183.550.44-0.28657454869240860white5.70718578779981
965.20.20.273.20.04716.093.00.992353.440.53-0.32849582602310471white5.69092018589713
975.20.210.311.70.04817.061.00.989533.240.371.2645127125433371white