Multilingual Examples

Language as an Argument

select sentimentanalysis('Cookies are sweet.', 'english') OVER(PARTITION BEST);
 sentence | attribute | sentiment_score 
----------+-----------+-----------------
        1 | cookies   |               1
(1 row)

select sentimentanalysis('Las galletas son dulces','spanish') OVER(PARTITION BEST);
 sentence | attribute | sentiment_score 
----------+-----------+-----------------
        1 | galletas  |               1
(1 row)

The following example shows how to analyze tweets from a table where each tweet record contains the language of the tweet in addition to the text.

create table myTweets (text varchar(300), language varchar(15));

insert into myTweets values ('Wired reviews Amazon''s tiny-screen Kindle Fire: Web browsing sucks, emotionally draining, makes reading a chore', 'english');

insert into myTweets values ('Cookies are sweet', 'english');

insert into myTweets values ('Why does my iPhone have 6 GB of corrupted space I can''t use? That is obnoxious.', 'english');

insert into myTweets values ('Las galletas son dulces', 'spanish');

insert into myTweets values ('el iPhone es el celular mas popular', 'spanish');


select sentimentanalysis(text,language) OVER(PARTITION BEST) from MyTweets;
sentence |   attribute    | sentiment_score 
----------+----------------+-----------------
        1 | reviews amazon |              -1
        1 | kindle fire    |              -1
        1 | web            |              -1
        1 | chore          |              -1
        1 | cookies        |               1
        1 | iphone         |              -1
        1 | gb             |              -1
        1 | space          |              -1
        1 | galletas       |               1
        1 | iphone         |               1
        1 | celular        |               1

       
(11 rows)

Language as a Parameter

select sentimentanalysis('Las galletas son dulces' using PARAMETERS language='spanish') OVER(PARTITION BEST);
 sentence | attribute | sentiment_score 
----------+-----------+-----------------
        1 | galletas  |               1
(1 row)

select sentimentanalysis('Cookies are sweet' using PARAMETERS language='english') OVER(PARTITION BEST);
 sentence | attribute | sentiment_score 
----------+-----------+-----------------
        1 | cookies   |               1
(1 row)

Although it is possible to specify the language as parameter for a specific text given in a query, using the language argument is more appropriate. The use of the language parameter is targeted to queries that analyze a set of texts (from a table) written in a same language. The language parameter is used by Pulse to skip texts in other languages because Pulse does not automatically detect the language, Thus, Pulse uses the language specified as parameter to analyze each text from the table (consequently the sentiment scores for texts in other language may be incorrect).

The following example shows a query that analyzes tweets from a table where the tweets do not have a language value stored in the table, but are all in the same language.

create table myTweets (text varchar(300));

insert into myTweets values ('Las galletas son dulces');

insert into myTweets values ('el iphone es el celular mas popular');

insert into myTweets values ('el zorro rapido brinco sobre el perro flojo');

select sentimentanalysis(text using PARAMETERS language='spanish') OVER(PARTITION BEST) from MyTweets;

sentence  |   attribute    | sentiment_score 
----------+----------------+-----------------
        1 | galletas       |               1
        1 | iphone         |               1
        1 | celular        |               1
        1 | zorro          |               1
        1 | perro          |              -1
       
(5 rows)

The following example shows a query that analyzes tweets from a table with tweets in different languages. The Spanish tweets do not have the language value. In a single query you can specify both an argument and parameter. The argument has precedence over the parameter setting. In this case the parameter is only used when a tweet doesn't provide a language value.

create table myTweets (doc_id int, text varchar(300), language varchar(15));

insert into myTweets values (1, 'Vertica is the best company', 'english');

insert into myTweets values (2, 'Cookies are sweet', 'english');

insert into myTweets values (3, 'The quick brown fox jumped over the lazy dog', 'english');

insert into myTweets values (4, 'Las galletas son dulces');

insert into myTweets values (5, 'el iphone es el celular mas popular');

select doc_id, sentimentanalysis(text,language using PARAMETERS language='spanish') OVER(PARTITION BY id, text) from MyTweets;  

doc_id    | sentence  | attribute | sentiment_score 
----------+-----------+-----------+-----------------
        1 |          1| vertica   |               1  
        1 |          1| company   |               1  
        2 |          1| cookies   |               1  
        3 |          1| fox       |               1  
        3 |          1| dog       |              -1  
        4 |          1| galletas  |               1  
        5 |          1| iphone    |               1  
        5 |          1| celular   |               1  
(8 rows)

Using the Default Language

select sentimentanalysis('Cookies are sweet') OVER(PARTITION BEST);
 sentence | attribute | sentiment_score 
----------+-----------+-----------------
        1 | cookies   |               1
(1 row)