VerticaPy

Python API for Vertica Data Science at Scale

Binary Classification

This example demonstrates various Binary Classifier methods with the Titanic dataset

In [1]:
from verticapy.learn.datasets import load_titanic
titanic = load_titanic()
display(titanic)
123
pclass
Int
123
survived
Int
Abc
Varchar(96)
Abc
gender
Char(1)
123
age
Numeric(6,3)
123
sibsp
Int
123
parch
Int
Abc
ticket
Varchar(24)
123
fare
Numeric(7,4)
Abc
cabin
Char(10)
Abc
embarked
Char(1)
Abc
boat
Char(4)
123
body
Int
Abc
homedest
Varchar(64)
123
split
Float
110f2.00012113781151.5500C22 C26 S[null][null]Montreal, PQ / Chesterville, ON0.663092836970463
210m30.00012113781151.5500C22 C26 S[null]135Montreal, PQ / Chesterville, ON0.161721162730828
310f25.00012113781151.5500C22 C26 S[null][null]Montreal, PQ / Chesterville, ON0.527904006186873
410m39.000001120500.0000A36 S[null][null]Belfast, NI0.936014187755063
510m71.00000PC 1760949.5042[null]C[null]22Montevideo, Uruguay0.40874306159094
610m47.00010PC 17757227.5250C62 C64 C[null]124New York, NY0.0771260927431285
710m[null]00PC 1731825.9250[null]S[null][null]New York, NY0.360885151894763
810m24.00001PC 17558247.5208B58 B60 C[null][null]Montreal, PQ0.78418595273979
910m36.000001305075.2417C6 CA [null]Winnipeg, MN0.392075354699045
1010m25.000001390526.0000[null]C[null]148San Francisco, CA0.760954634286463
1110m45.0000011378435.5000T S[null][null]Trenton, NJ0.083728444064036
1210m42.0000011048926.5500D22 S[null][null]London / Winnipeg, MB0.187408909434453
1310m41.0000011305430.5000A21 S[null][null]Pomeroy, WA0.464197367895395
1410m48.00000PC 1759150.4958B10 C[null]208Omaha, NE0.989756429567933
1510m[null]0011237939.6000[null]C[null][null]Philadelphia, PA0.197682256577536
1610m45.0000011305026.5500B38 S[null][null]Washington, DC0.567665293579921
1710m[null]0011379831.0000[null]S[null][null][null]0.221641422947869
1810m33.000006955.0000B51 B53 B5S[null][null]New York, NY0.738475721329451
1910m28.0000011305947.1000[null]S[null][null]Montevideo, Uruguay0.366747971624136
2010m17.0000011305947.1000[null]S[null][null]Montevideo, Uruguay0.268853009678423
2110m49.000001992426.0000[null]S[null][null]Ascot, Berkshire / Rochester, NY0.513710137922317
2210m36.000101987778.8500C46 S[null]172Little Onn Hall, Staffs0.453328180825338
2310m46.00010W.E.P. 573461.1750E31 S[null][null]Amenia, ND0.169697929639369
2410m[null]001120510.0000[null]S[null][null]Liverpool, England / Belfast0.0851756541524082
2510m27.0001013508136.7792C89 C[null][null]Los Angeles, CA0.0152691958937794
2610m[null]0011046552.0000A14 S[null][null]Stoughton, MA0.916533817071468
2710m47.00000572725.5875E58 S[null][null]Victoria, BC0.568090511020273
2810m37.00011PC 1775683.1583E52 C[null][null]Lakewood, NJ0.627309911651537
2910m[null]0011379126.5500[null]S[null][null]Roachdale, IN0.747528738109395
3010m70.00011WE/P 573571.0000B22 S[null]269Milwaukee, WI0.335725951706991
3110m39.00010PC 1759971.2833C85 C[null][null]New York, NY0.481107967440039
3210m31.00010F.C. 1275052.0000B71 S[null][null]Montreal, PQ0.0527018732391298
3310m50.00010PC 17761106.4250C86 C[null]62Deephaven, MN / Cedar Rapids, IA0.97885028552264
3410m39.00000PC 1758029.7000A18 C[null]133Philadelphia, PA0.157812902005389
3510f36.00000PC 1753131.6792A29 C[null][null]New York, NY0.644055349286646
3610m[null]00PC 17483221.7792C95 S[null][null][null]0.279342690482736
3710m30.0000011305127.7500C111 C[null][null]New York, NY0.767035169992596
3810m19.0003219950263.0000C23 C25 C2S[null][null]Winnipeg, MB0.490231013158336
3910m64.0001419950263.0000C23 C25 C2S[null][null]Winnipeg, MB0.235716909635812
4010m[null]0011377826.5500D34 S[null][null]Westcliff-on-Sea, Essex0.390792083926499
4110m[null]001120580.0000B102 S[null][null][null]0.71977791050449
4210m37.0001011380353.1000C123 S[null][null]Scituate, MA0.114295000443235
4310m47.0000011132038.5000E63 S[null]275St Anne's-on-Sea, Lancashire0.831041638739407
4410m24.00000PC 1759379.2000B86 C[null][null][null]0.967385483207181
4510m71.00000PC 1775434.6542A5 C[null][null]New York, NY0.591355679091066
4610m38.00001PC 17582153.4625C91 S[null]147Winnipeg, MB0.870173128321767
4710m46.00000PC 1759379.2000B82 B84 C[null][null]New York, NY0.99003749457188
4810m[null]0011379642.4000[null]S[null][null][null]0.403090257197618
4910m45.000103697383.4750C83 S[null][null]New York, NY0.323395622661337
5010m40.000001120590.0000B94 S[null]110[null]0.170912061352283
5110m55.000111274993.5000B69 S[null]307Montreal, PQ0.737730374792591
5210m42.0000011303842.5000B11 S[null][null]London / Middlesex0.356435600901023
5310m[null]001746351.8625E46 S[null][null]Brighton, MA0.61207019072026
5410m55.0000068050.0000C39 S[null][null]London / Birmingham0.96926672779955
5510m42.0001011378952.0000[null]S[null]38New York, NY0.536786375567317
5610m[null]00PC 1760030.6958[null]C14 [null]New York, NY0.56807480356656
5710f50.00000PC 1759528.7125C49 C[null][null]Paris, France New York, NY0.809493057662621
5810m46.0000069426.0000[null]S[null]80Bennington, VT0.982430178439245
5910m50.0000011304426.0000E60 S[null][null]London0.696268314495683
6010m32.50000113503211.5000C132 C[null]45[null]0.267956059193239
6110m58.000001177129.7000B37 C[null]258Buffalo, NY0.533579262206331
6210m41.000101746451.8625D21 S[null][null]Southington / Noank, CT0.596509403316304
6310m[null]0011302826.5500C124 S[null][null]Portland, OR0.430671361740679
6410m[null]00PC 1761227.7208[null]C[null][null]Chicago, IL0.355260608484969
6510m29.0000011350130.0000D6 S[null]126Springfield, MA0.0263086690101773
6610m30.0000011380145.5000[null]S[null][null]London / New York, NY0.309636837337166
6710m30.0000011046926.0000C106 S[null][null]Brockton, MA0.0152337534818798
6810m19.0001011377353.1000D30 S[null][null]New York, NY0.364044237649068
6910m46.000001305075.2417C6 C[null]292Vancouver, BC0.475262716412544
7010m54.000001746351.8625E46 S[null]175Dorchester, MA0.30979929654859
7110m28.00010PC 1760482.1708[null]C[null][null]New York, NY0.408943912014365
7210m65.000001350926.5500E38 S[null]249East Bridgewater, MA0.478623170405626
7310m44.000201992890.0000C78 Q[null]230Fond du Lac, WI0.148798908572644
7410m55.0000011378730.5000C30 S[null][null]Montreal, PQ0.126385363517329
7510m47.0000011379642.4000[null]S[null][null]Washington, DC0.625826764851809
7610m37.00001PC 1759629.7000C118 C[null][null]Brooklyn, NY0.980584268225357
7710m58.0000235273113.2750D48 C[null]122Lexington, MA0.496911391615868
7810m64.0000069326.0000[null]S[null]263Isle of Wight, England0.189228599891067
7910m65.0000111350961.9792B30 C[null]234Providence, RI0.280916610732675
8010m28.50000PC 1756227.7208D43 C[null]189?Havana, Cuba0.485971889225766
8110m[null]001120520.0000[null]S[null][null]Belfast0.392282282235101
8210m45.5000011304328.5000C124 S[null]166Surbiton Hill, Surrey0.0708799788262695
8310m23.000001274993.5000B24 S[null][null]Montreal, PQ0.089022418949753
8410m29.0001011377666.6000C2 S[null][null]Isleworth, England0.545445928815752
8510m18.00010PC 17758108.9000C65 C[null][null]Madrid, Spain0.513165891636163
8610m47.0000011046552.0000C110 S[null]207Worcester, MA0.164120414061472
8710m38.00000199720.0000[null]S[null][null]Rotterdam, Netherlands0.431144091999158
8810m22.00000PC 17760135.6333[null]C[null]232[null]0.174215304432437
8910m[null]00PC 17757227.5250[null]C[null][null][null]0.641582700423896
9010m31.00000PC 1759050.4958A24 S[null][null]Trenton, NJ0.814205173403025
9110m[null]0011376750.0000A32 S[null][null]Seattle, WA0.195884848246351
9210m36.000001304940.1250A10 C[null][null]Winnipeg, MB0.653516355436295
9310m55.00010PC 1760359.4000[null]C[null][null]New York, NY0.10073209204711
9410m33.0000011379026.5500[null]S[null]109London0.278607521904632
9510m61.00013PC 17608262.3750B57 B59 B6C[null][null]Haverford, PA / Cooperstown, NY0.0629091840237379
9610m50.000101350755.9000E44 S[null][null]Duluth, MN0.963238372234628
9710m56.0000011379226.5500[null]S[null][null]New York, NY0.462777572218329
9810m56.000001776430.6958A7 C[null][null]St James, Long Island, NY0.0639691490214318
9910m24.000101369560.0000C31 S[null][null]Huntington, WV0.0569538441486657
10010m[null]0011305626.0000A19 S[null][null]Streatham, Surrey0.725290278671309
Rows: 1-100 of 1234 | Columns: 15

Let's create a Logistic Regression to predict the survival of the Titanic passengers. We will use the age and the fare as predictors.

In [2]:
from verticapy.learn.linear_model import LogisticRegression
model = LogisticRegression("public.LR_titanic")
model.fit("public.titanic", ["age", "fare"], "survived")
Out[2]:

=======
details
=======
predictor|coefficient|std_err |z_value |p_value 
---------+-----------+--------+--------+--------
Intercept| -0.09135  | 0.15559|-0.58709| 0.55714
   age   | -0.01439  | 0.00475|-3.02599| 0.00248
  fare   |  0.01546  | 0.00212| 7.29445| 0.00000


==============
regularization
==============
type| lambda 
----+--------
 l2 | 1.00000


===========
call_string
===========
logistic_reg('public.LR_titanic', 'public.titanic', '"survived"', '"age", "fare"'
USING PARAMETERS optimizer='cgd', epsilon=0.0001, max_iterations=100, regularization='l2', lambda=1, alpha=0)

===============
Additional Info
===============
       Name       |Value
------------------+-----
 iteration_count  |  5  
rejected_row_count| 238 
accepted_row_count| 996 

Fitting the model creates new model attributes, making methods easier to use.

In [3]:
model.X
Out[3]:
['"age"', '"fare"']
In [4]:
model.y
Out[4]:
'"survived"'
In [5]:
model.input_relation
Out[5]:
'public.titanic'
In [6]:
model.test_relation
Out[6]:
'public.titanic'

In our case, we did not write the test relation when fitting the model. The model will then consider the training relation as test. These attributes will be used when invoking the different model abstractions. For example, let's compute the accuracy of the model.

In [7]:
model.score(method = "accuracy")
Out[7]:
0.6969205834683955

The 'score' method is using the attribute 'y' and the model prediction in the 'testrelation' to compute the accuracy. You can change these at anytime to deploy the models on different columns. The model could also have other useful attributes. In this case, the 'coef' attribute provides us with useful information.

In [8]:
model.coef_
Abc
predictor
Varchar(65000)
123
coefficient
Float
123
std_err
Float
123
z_value
Float
123
p_value
Float
1Intercept-0.09134875833752320.155594583418985-0.5870947196891770.557140093691283
2age-0.01438502352042850.00475381848744904-3.025993432102550.00247817685818187
3fare0.01546036233411470.002119469710611357.294448350316472.99885239324488e-13
Out[8]:

It is also possible to look at some other Vertica attributes using the 'get_model_attribute' method.

In [9]:
model.get_model_attribute()
Abc
attr_name
Varchar(128)
Abc
Long varchar(32000000)
123
#_of_rows
Integer
1details3
2regularization1
3iteration_count1
4rejected_row_count1
5accepted_row_count1
6call_string1
Out[9]:

Many abstraction can help you drawing model evaluation curves like PRC, ROC or Lift Chart.

In [10]:
model.roc_curve()
model.prc_curve()
model.lift_chart()

Looking at the SQL code can help you understand how Vertica works.

In [11]:
display(model.deploySQL())
PREDICT_LOGISTIC_REG("age", "fare" USING PARAMETERS model_name = 'public.LR_titanic', type = 'probability', match_by_pos = 'true')

The classification report is the best way to evaluate your model.

In [12]:
model.classification_report()
value
auc0.6974762740166146
prc_auc0.6003540469187277
accuracy0.6969205834683955
log_loss0.281741003041208
precision0.6194968553459119
recall0.43777777777777777
f1_score0.5769062584198693
mcc0.31193616529653234
informedness0.2834410430839003
markedness0.34329598198346645
csi0.3450087565674256
cutoff0.999
Out[12]:

You can also add the prediction to your vDataFrame.

In [13]:
model.predict(titanic, name = "pred_survived")
123
pclass
Int
123
survived
Int
Abc
Varchar(96)
Abc
gender
Char(1)
123
age
Numeric(6,3)
123
sibsp
Int
123
parch
Int
Abc
ticket
Varchar(24)
123
fare
Numeric(7,4)
Abc
cabin
Char(10)
Abc
embarked
Char(1)
Abc
boat
Char(4)
123
body
Int
Abc
homedest
Varchar(64)
123
split
Float
123
pred_survived
Float
110f2.00012113781151.5500C22 C26 S[null][null]Montreal, PQ / Chesterville, ON0.6630928369704630.902287093955018
210m30.00012113781151.5500C22 C26 S[null]135Montreal, PQ / Chesterville, ON0.1617211627308280.860580339225997
310f25.00012113781151.5500C22 C26 S[null][null]Montreal, PQ / Chesterville, ON0.5279040061868730.868988361039809
410m39.000001120500.0000A36 S[null][null]Belfast, NI0.9360141877550630.342456861096143
510m71.00000PC 1760949.5042[null]C[null]22Montevideo, Uruguay0.408743061590940.414029417624538
610m47.00010PC 17757227.5250C62 C64 C[null]124New York, NY0.07712609274312850.939923180247281
710m[null]00PC 1731825.9250[null]S[null][null]New York, NY0.360885151894763[null]
810m24.00001PC 17558247.5208B58 B60 C[null][null]Montreal, PQ0.784185952739790.967395967987415
910m36.000001305075.2417C6 CA [null]Winnipeg, MN0.3920753546990450.635075716220126
1010m25.000001390526.0000[null]C[null]148San Francisco, CA0.7609546342864630.487751219756109
1110m45.0000011378435.5000T S[null][null]Trenton, NJ0.0837284440640360.45268401682983
1210m42.0000011048926.5500D22 S[null][null]London / Winnipeg, MB0.1874089094344530.429216842625225
1310m41.0000011305430.5000A21 S[null][null]Pomeroy, WA0.4641973678953950.447792562926084
1410m48.00000PC 1759150.4958B10 C[null]208Omaha, NE0.9897564295679330.499713369289623
1510m[null]0011237939.6000[null]C[null][null]Philadelphia, PA0.197682256577536[null]
1610m45.0000011305026.5500B38 S[null][null]Washington, DC0.5676652935799210.418678120562726
1710m[null]0011379831.0000[null]S[null][null][null]0.221641422947869[null]
1810m33.000006955.0000B51 B53 B5S[null][null]New York, NY0.7384757213294510.380187438266754
1910m28.0000011305947.1000[null]S[null][null]Montevideo, Uruguay0.3667479716241360.558247748355842
2010m17.0000011305947.1000[null]S[null][null]Montevideo, Uruguay0.2688530096784230.59683358559402
2110m49.000001992426.0000[null]S[null][null]Ascot, Berkshire / Rochester, NY0.5137101379223170.40269570450922
2210m36.000101987778.8500C46 S[null]172Little Onn Hall, Staffs0.4533281808253380.647904295062985
2310m46.00010W.E.P. 573461.1750E31 S[null][null]Amenia, ND0.1696979296393690.548033368992097
2410m[null]001120510.0000[null]S[null][null]Liverpool, England / Belfast0.0851756541524082[null]
2510m27.0001013508136.7792C89 C[null][null]Los Angeles, CA0.01526919589377940.836841368048104
2610m[null]0011046552.0000A14 S[null][null]Stoughton, MA0.916533817071468[null]
2710m47.00000572725.5875E58 S[null][null]Victoria, BC0.5680905110202730.408093385463076
2810m37.00011PC 1775683.1583E52 C[null][null]Lakewood, NJ0.6273099116515370.659723620113947
2910m[null]0011379126.5500[null]S[null][null]Roachdale, IN0.747528738109395[null]
3010m70.00011WE/P 573571.0000B22 S[null]269Milwaukee, WI0.3357259517069910.499846330243495
3110m39.00010PC 1759971.2833C85 C[null][null]New York, NY0.4811079674400390.610568134710964
3210m31.00010F.C. 1275052.0000B71 S[null][null]Montreal, PQ0.05270187323912980.566271370083388
3310m50.00010PC 17761106.4250C86 C[null]62Deephaven, MN / Cedar Rapids, IA0.978850285522640.69736239132171
3410m39.00000PC 1758029.7000A18 C[null]133Philadelphia, PA0.1578129020053890.451851682120462
3510f36.00000PC 1753131.6792A29 C[null][null]