vDataFrame.outliers

In [ ]:
vDataFrame.outliers(columns: list = [],
                    name: str = "distribution_outliers",
                    threshold: float = 3.0,
                    robust: bool = False)

Adds a new vcolumn labeled with 0 and 1. 1 means that the record is a global outlier.

Parameters

Name Type Optional Description
columns
list
List of the vcolumns names. If empty, all the numerical vcolumns will be used.
name
str
Name of the new vcolumn.
threshold
float
Threshold equals to the critical score.
robust
bool
If set to True, the score used will be the Robust Z-Score instead of the Z-Score.

Returns

vDataFrame : self

Example

In [44]:
from verticapy.datasets import load_titanic
titanic = load_titanic()
display(titanic)
123
fare
Numeric(10,5)
123
survived
Int
Abc
sex
Varchar(20)
Abc
boat
Varchar(100)
123
pclass
Int
123
age
Numeric(6,3)
Abc
ticket
Varchar(36)
Abc
Varchar(164)
Abc
embarked
Varchar(20)
Abc
cabin
Varchar(30)
123
body
Int
123
parch
Int
Abc
home.dest
Varchar(100)
123
sibsp
Int
1151.550000female[null]12.000113781SC22 C26[null]2Montreal, PQ / Chesterville, ON1
2151.550000male[null]130.000113781SC22 C261352Montreal, PQ / Chesterville, ON1
3151.550000female[null]125.000113781SC22 C26[null]2Montreal, PQ / Chesterville, ON1
40.000000male[null]139.000112050SA36[null]0Belfast, NI0
549.504200male[null]171.000PC 17609C[null]220Montevideo, Uruguay0
6227.525000male[null]147.000PC 17757CC62 C641240New York, NY1
725.925000male[null]1[null]PC 17318S[null][null]0New York, NY0
8247.520800male[null]124.000PC 17558CB58 B60[null]1Montreal, PQ0
975.241700maleA136.00013050CC6[null]0Winnipeg, MN0
1026.000000male[null]125.00013905C[null]1480San Francisco, CA0
1135.500000male[null]145.000113784ST[null]0Trenton, NJ0
1226.550000male[null]142.000110489SD22[null]0London / Winnipeg, MB0
1330.500000male[null]141.000113054SA21[null]0Pomeroy, WA0
1450.495800male[null]148.000PC 17591CB102080Omaha, NE0
1539.600000male[null]1[null]112379C[null][null]0Philadelphia, PA0
1626.550000male[null]145.000113050SB38[null]0Washington, DC0
1731.000000male[null]1[null]113798S[null][null]0[null]0
185.000000male[null]133.000695SB51 B53 B55[null]0New York, NY0
1947.100000male[null]128.000113059S[null][null]0Montevideo, Uruguay0
2047.100000male[null]117.000113059S[null][null]0Montevideo, Uruguay0
2126.000000male[null]149.00019924S[null][null]0Ascot, Berkshire / Rochester, NY0
2278.850000male[null]136.00019877SC461720Little Onn Hall, Staffs1
2361.175000male[null]146.000W.E.P. 5734SE31[null]0Amenia, ND1
240.000000male[null]1[null]112051S[null][null]0Liverpool, England / Belfast0
25136.779200male[null]127.00013508CC89[null]0Los Angeles, CA1
2652.000000male[null]1[null]110465SA14[null]0Stoughton, MA0
2725.587500male[null]147.0005727SE58[null]0Victoria, BC0
2883.158300male[null]137.000PC 17756CE52[null]1Lakewood, NJ1
2926.550000male[null]1[null]113791S[null][null]0Roachdale, IN0
3071.000000male[null]170.000WE/P 5735SB222691Milwaukee, WI1
3171.283300male[null]139.000PC 17599CC85[null]0New York, NY1
3252.000000male[null]131.000F.C. 12750SB71[null]0Montreal, PQ1
33106.425000male[null]150.000PC 17761CC86620Deephaven, MN / Cedar Rapids, IA1
3429.700000male[null]139.000PC 17580CA181330Philadelphia, PA0
3531.679200female[null]136.000PC 17531CA29[null]0New York, NY0
36221.779200male[null]1[null]PC 17483SC95[null]0[null]0
3727.750000male[null]130.000113051CC111[null]0New York, NY0
38263.000000male[null]119.00019950SC23 C25 C27[null]2Winnipeg, MB3
39263.000000male[null]164.00019950SC23 C25 C27[null]4Winnipeg, MB1
4026.550000male[null]1[null]113778SD34[null]0Westcliff-on-Sea, Essex0
410.000000male[null]1[null]112058SB102[null]0[null]0
4253.100000male[null]137.000113803SC123[null]0Scituate, MA1
4338.500000male[null]147.000111320SE632750St Anne's-on-Sea, Lancashire0
4479.200000male[null]124.000PC 17593CB86[null]0[null]0
4534.654200male[null]171.000PC 17754CA5[null]0New York, NY0
46153.462500male[null]138.000PC 17582SC911471Winnipeg, MB0
4779.200000male[null]146.000PC 17593CB82 B84[null]0New York, NY0
4842.400000male[null]1[null]113796S[null][null]0[null]0
4983.475000male[null]145.00036973SC83[null]0New York, NY1
500.000000male[null]140.000112059SB941100[null]0
5193.500000male[null]155.00012749SB693071Montreal, PQ1
5242.500000male[null]142.000113038SB11[null]0London / Middlesex0
5351.862500male[null]1[null]17463SE46[null]0Brighton, MA0
5450.000000male[null]155.000680SC39[null]0London / Birmingham0
5552.000000male[null]142.000113789S[null]380New York, NY1
5630.695800male141[null]PC 17600C[null][null]0New York, NY0
5728.712500female[null]150.000PC 17595CC49[null]0Paris, France New York, NY0
5826.000000male[null]146.000694S[null]800Bennington, VT0
5926.000000male[null]150.000113044SE60[null]0London0
60211.500000male[null]132.500113503CC132450[null]0
6129.700000male[null]158.00011771CB372580Buffalo, NY0
6251.862500male[null]141.00017464SD21[null]0Southington / Noank, CT1
6326.550000male[null]1[null]113028SC124[null]0Portland, OR0
6427.720800male[null]1[null]PC 17612C[null][null]0Chicago, IL0
6530.000000male[null]129.000113501SD61260Springfield, MA0
6645.500000male[null]130.000113801S[null][null]0London / New York, NY0
6726.000000male[null]130.000110469SC106[null]0Brockton, MA0
6853.100000male[null]119.000113773SD30[null]0New York, NY1
6975.241700male[null]146.00013050CC62920Vancouver, BC0
7051.862500male[null]154.00017463SE461750Dorchester, MA0
7182.170800male[null]128.000PC 17604C[null][null]0New York, NY1
7226.550000male[null]165.00013509SE382490East Bridgewater, MA0
7390.000000male[null]144.00019928QC782300Fond du Lac, WI2
7430.500000male[null]155.000113787SC30[null]0Montreal, PQ0
7542.400000male[null]147.000113796S[null][null]0Washington, DC0
7629.700000male[null]137.000PC 17596CC118[null]1Brooklyn, NY0
77113.275000male[null]158.00035273CD481222Lexington, MA0
7826.000000male[null]164.000693S[null]2630Isle of Wight, England0
7961.979200male[null]165.000113509CB302341Providence, RI0
8027.720800male[null]128.500PC 17562CD431890?Havana, Cuba0
810.000000male[null]1[null]112052S[null][null]0Belfast0
8228.500000male[null]145.500113043SC1241660Surbiton Hill, Surrey0
8393.500000male[null]123.00012749SB24[null]0Montreal, PQ0
8466.600000male[null]129.000113776SC2[null]0Isleworth, England1
85108.900000male[null]118.000PC 17758CC65[null]0Madrid, Spain1
8652.000000male[null]147.000110465SC1102070Worcester, MA0
870.000000male[null]138.00019972S[null][null]0Rotterdam, Netherlands0
88135.633300male[null]122.000PC 17760C[null]2320[null]0
89227.525000male[null]1[null]PC 17757C[null][null]0[null]0
9050.495800male[null]131.000PC 17590SA24[null]0Trenton, NJ0
9150.000000male[null]1[null]113767SA32[null]0Seattle, WA0
9240.125000male[null]136.00013049CA10[null]0Winnipeg, MB0
9359.400000male[null]155.000PC 17603C[null][null]0New York, NY1
9426.550000male[null]133.000113790S[null]1090London0
95262.375000male[null]161.000PC 17608CB57 B59 B63 B66[null]3Haverford, PA / Cooperstown, NY1
9655.900000male[null]150.00013507SE44[null]0Duluth, MN1
9726.550000male[null]156.000113792S[null][null]0New York, NY0
9830.695800male[null]156.00017764CA7[null]0St James, Long Island, NY0
9960.000000male[null]124.00013695SC31[null]0Huntington, WV1
10026.000000male[null]1[null]113056SA19[null]0Streatham, Surrey0
Rows: 1-100 of 1234 | Columns: 14
In [55]:
titanic.scatter(columns = ["age", "fare"])
titanic.outliers(columns = ["age", "fare"],
                 name = "outliers",
                 threshold = 2.5)
In [56]:
titanic.scatter(columns = ["age", "fare"], 
                catcol = "outliers")

See Also

vDataFrame.normalize Normalizes the input vcolumns.