# import the graphlab
import graphlab as gl
# reading the csv file
data = gl.SFrame.read_csv("C:\learn\ML\ML00caseStudy\week01Intro\song_data.csv")
[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: C:\Users\Atul\AppData\Local\Temp\graphlab_server_1469128219.log.0 INFO:graphlab.cython.cy_server:GraphLab Create v2.1 started. Logging: C:\Users\Atul\AppData\Local\Temp\graphlab_server_1469128219.log.0
Finished parsing file C:\learn\ML\ML00caseStudy\week01Intro\song_data.csv
Parsing completed. Parsed 100 lines in 4.06285 secs.
This non-commercial license of GraphLab Create for academic use is assigned to atul9806@yahoo.in and will expire on December 09, 2016. ------------------------------------------------------
Read 637410 lines. Lines per second: 192485
Finished parsing file C:\learn\ML\ML00caseStudy\week01Intro\song_data.csv
Parsing completed. Parsed 1000000 lines in 3.68689 secs.
Inferred types from first 100 line(s) of file as column_type_hints=[str,str,str,str,long] If parsing fails due to incorrect types, you can correct the inferred type list above and pass it to read_csv in the column_type_hints argument ------------------------------------------------------
# getting no of rows and columns
data.num_rows(), data.num_columns() , len(data)
(1000000, 5, 1000000)
# saving the dataset
data.save('songs')
# loading the dataset
sdata = gl.load_sframe('songs')
# head 5 rows
sdata.head(5)
song_id | title | release | artist_name | year |
---|---|---|---|---|
SOQMMHC12AB0180CB8 | Silent Night | Monster Ballads X-Mas | Faster Pussy cat | 2003 |
SOVFVAK12A8C1350D9 | Tanssi vaan | Karkuteillä | Karkkiautomaatti | 1995 |
SOGTUKN12AB017F4F1 | No One Could Ever | Butter | Hudson Mohawke | 2006 |
SOBNYVR12A8C13558C | Si Vos Querés | De Culo | Yerba Brava | 2003 |
SOHSBXH12A8C13B0DF | Tangle Of Aspens | Rene Ablaze Presents Winter Sessions ... |
Der Mystic | 0 |
# likewise tail 4 rows
sdata.tail(4)
song_id | title | release | artist_name | year |
---|---|---|---|---|
SOXQYIQ12A8C137FBB | Jago Chhadeo | Naale Baba Lassi Pee Gya | Kuldeep Manak | 0 |
SOHODZI12A8C137BB3 | Novemba | Dub_Connected: electronic music ... |
Gabriel Le Mar | 0 |
SOLXGOR12A81C21EB7 | Faraday | The Trance Collection Vol. 2 ... |
Elude | 0 |
SOWXJXQ12AB0189F43 | Fernweh feat. Sektion Kuchikäschtli ... |
So Oder So | Texta | 2004 |
# checking the column names and data type
sdata.column_types(), sdata.column_names()
([str, str, str, str, int], ['song_id', 'title', 'release', 'artist_name', 'year'])
# creating a new columns
sdata['my_rating'] = 0
sdata
song_id | title | release | artist_name | year | my_rating |
---|---|---|---|---|---|
SOQMMHC12AB0180CB8 | Silent Night | Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0 |
SOVFVAK12A8C1350D9 | Tanssi vaan | Karkuteillä | Karkkiautomaatti | 1995 | 0 |
SOGTUKN12AB017F4F1 | No One Could Ever | Butter | Hudson Mohawke | 2006 | 0 |
SOBNYVR12A8C13558C | Si Vos Querés | De Culo | Yerba Brava | 2003 | 0 |
SOHSBXH12A8C13B0DF | Tangle Of Aspens | Rene Ablaze Presents Winter Sessions ... |
Der Mystic | 0 | 0 |
SOZVAPQ12A8C13B63C | Symphony No. 1 G minor "Sinfonie ... |
Berwald: Symphonies Nos. 1/2/3/4 ... |
David Montgomery | 0 | 0 |
SOQVRHI12A6D4FB2D7 | We Have Got Love | Strictly The Best Vol. 34 | Sasha / Turbulence | 0 | 0 |
SOEYRFT12AB018936C | 2 Da Beat Ch'yall | Da Bomb | Kris Kross | 1993 | 0 |
SOPMIYT12A6D4F851E | Goodbye | Danny Boy | Joseph Locke | 0 | 0 |
SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? ... |
March to cadence with the US marines ... |
The Sun Harbor's Chorus- Documentary Recordings ... |
0 | 0 |
# generating a new colmns called song_age respective to my age
my_age = 1989
sdata['song_age']=sdata['year']-my_age
sdata
song_id | title | release | artist_name | year | my_rating |
---|---|---|---|---|---|
SOQMMHC12AB0180CB8 | Silent Night | Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0 |
SOVFVAK12A8C1350D9 | Tanssi vaan | Karkuteillä | Karkkiautomaatti | 1995 | 0 |
SOGTUKN12AB017F4F1 | No One Could Ever | Butter | Hudson Mohawke | 2006 | 0 |
SOBNYVR12A8C13558C | Si Vos Querés | De Culo | Yerba Brava | 2003 | 0 |
SOHSBXH12A8C13B0DF | Tangle Of Aspens | Rene Ablaze Presents Winter Sessions ... |
Der Mystic | 0 | 0 |
SOZVAPQ12A8C13B63C | Symphony No. 1 G minor "Sinfonie ... |
Berwald: Symphonies Nos. 1/2/3/4 ... |
David Montgomery | 0 | 0 |
SOQVRHI12A6D4FB2D7 | We Have Got Love | Strictly The Best Vol. 34 | Sasha / Turbulence | 0 | 0 |
SOEYRFT12AB018936C | 2 Da Beat Ch'yall | Da Bomb | Kris Kross | 1993 | 0 |
SOPMIYT12A6D4F851E | Goodbye | Danny Boy | Joseph Locke | 0 | 0 |
SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? ... |
March to cadence with the US marines ... |
The Sun Harbor's Chorus- Documentary Recordings ... |
0 | 0 |
song_age |
---|
14 |
6 |
17 |
14 |
-1989 |
-1989 |
-1989 |
4 |
-1989 |
-1989 |
# generating another column title length
sdata['title length'] = sdata['title'].apply(lambda x : len(x.split()))
sdata
song_id | title | release | artist_name | year | my_rating |
---|---|---|---|---|---|
SOQMMHC12AB0180CB8 | Silent Night | Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0 |
SOVFVAK12A8C1350D9 | Tanssi vaan | Karkuteillä | Karkkiautomaatti | 1995 | 0 |
SOGTUKN12AB017F4F1 | No One Could Ever | Butter | Hudson Mohawke | 2006 | 0 |
SOBNYVR12A8C13558C | Si Vos Querés | De Culo | Yerba Brava | 2003 | 0 |
SOHSBXH12A8C13B0DF | Tangle Of Aspens | Rene Ablaze Presents Winter Sessions ... |
Der Mystic | 0 | 0 |
SOZVAPQ12A8C13B63C | Symphony No. 1 G minor "Sinfonie ... |
Berwald: Symphonies Nos. 1/2/3/4 ... |
David Montgomery | 0 | 0 |
SOQVRHI12A6D4FB2D7 | We Have Got Love | Strictly The Best Vol. 34 | Sasha / Turbulence | 0 | 0 |
SOEYRFT12AB018936C | 2 Da Beat Ch'yall | Da Bomb | Kris Kross | 1993 | 0 |
SOPMIYT12A6D4F851E | Goodbye | Danny Boy | Joseph Locke | 0 | 0 |
SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? ... |
March to cadence with the US marines ... |
The Sun Harbor's Chorus- Documentary Recordings ... |
0 | 0 |
song_age | title length |
---|---|
14 | 2 |
6 | 2 |
17 | 4 |
14 | 3 |
-1989 | 3 |
-1989 | 9 |
-1989 | 4 |
4 | 4 |
-1989 | 1 |
-1989 | 6 |
# adding one more column
sdata.add_column(sdata.select_column('year').apply(lambda x: x - my_age),name='how_old_was_i')
sdata
song_id | title | release | artist_name | year | my_rating |
---|---|---|---|---|---|
SOQMMHC12AB0180CB8 | Silent Night | Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0 |
SOVFVAK12A8C1350D9 | Tanssi vaan | Karkuteillä | Karkkiautomaatti | 1995 | 0 |
SOGTUKN12AB017F4F1 | No One Could Ever | Butter | Hudson Mohawke | 2006 | 0 |
SOBNYVR12A8C13558C | Si Vos Querés | De Culo | Yerba Brava | 2003 | 0 |
SOHSBXH12A8C13B0DF | Tangle Of Aspens | Rene Ablaze Presents Winter Sessions ... |
Der Mystic | 0 | 0 |
SOZVAPQ12A8C13B63C | Symphony No. 1 G minor "Sinfonie ... |
Berwald: Symphonies Nos. 1/2/3/4 ... |
David Montgomery | 0 | 0 |
SOQVRHI12A6D4FB2D7 | We Have Got Love | Strictly The Best Vol. 34 | Sasha / Turbulence | 0 | 0 |
SOEYRFT12AB018936C | 2 Da Beat Ch'yall | Da Bomb | Kris Kross | 1993 | 0 |
SOPMIYT12A6D4F851E | Goodbye | Danny Boy | Joseph Locke | 0 | 0 |
SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? ... |
March to cadence with the US marines ... |
The Sun Harbor's Chorus- Documentary Recordings ... |
0 | 0 |
song_age | title length | how_old_was_i |
---|---|---|
14 | 2 | 14 |
6 | 2 | 6 |
17 | 4 | 17 |
14 | 3 | 14 |
-1989 | 3 | -1989 |
-1989 | 9 | -1989 |
-1989 | 4 | -1989 |
4 | 4 | 4 |
-1989 | 1 | -1989 |
-1989 | 6 | -1989 |
# can add multiple columns at a time
sdata[['col1','col2']] = [sdata['title length'],sdata['my_rating']]
sdata
song_id | title | release | artist_name | year | my_rating |
---|---|---|---|---|---|
SOQMMHC12AB0180CB8 | Silent Night | Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0 |
SOVFVAK12A8C1350D9 | Tanssi vaan | Karkuteillä | Karkkiautomaatti | 1995 | 0 |
SOGTUKN12AB017F4F1 | No One Could Ever | Butter | Hudson Mohawke | 2006 | 0 |
SOBNYVR12A8C13558C | Si Vos Querés | De Culo | Yerba Brava | 2003 | 0 |
SOHSBXH12A8C13B0DF | Tangle Of Aspens | Rene Ablaze Presents Winter Sessions ... |
Der Mystic | 0 | 0 |
SOZVAPQ12A8C13B63C | Symphony No. 1 G minor "Sinfonie ... |
Berwald: Symphonies Nos. 1/2/3/4 ... |
David Montgomery | 0 | 0 |
SOQVRHI12A6D4FB2D7 | We Have Got Love | Strictly The Best Vol. 34 | Sasha / Turbulence | 0 | 0 |
SOEYRFT12AB018936C | 2 Da Beat Ch'yall | Da Bomb | Kris Kross | 1993 | 0 |
SOPMIYT12A6D4F851E | Goodbye | Danny Boy | Joseph Locke | 0 | 0 |
SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? ... |
March to cadence with the US marines ... |
The Sun Harbor's Chorus- Documentary Recordings ... |
0 | 0 |
song_age | title length | how_old_was_i | col1 | col2 |
---|---|---|---|---|
14 | 2 | 14 | 2 | 0 |
6 | 2 | 6 | 2 | 0 |
17 | 4 | 17 | 4 | 0 |
14 | 3 | 14 | 3 | 0 |
-1989 | 3 | -1989 | 3 | 0 |
-1989 | 9 | -1989 | 9 | 0 |
-1989 | 4 | -1989 | 4 | 0 |
4 | 4 | 4 | 4 | 0 |
-1989 | 1 | -1989 | 1 | 0 |
-1989 | 6 | -1989 | 6 | 0 |
#adding some more
#sdata[['col3','col4']] = [[3,4]] this is not allowed, assigned values should be SArrray
#sdata
# deleting column
del sdata['song_age']
# rename columns
sdata.rename({'title length':'title_length'})
sdata
song_id | title | release | artist_name | year | my_rating |
---|---|---|---|---|---|
SOQMMHC12AB0180CB8 | Silent Night | Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0 |
SOVFVAK12A8C1350D9 | Tanssi vaan | Karkuteillä | Karkkiautomaatti | 1995 | 0 |
SOGTUKN12AB017F4F1 | No One Could Ever | Butter | Hudson Mohawke | 2006 | 0 |
SOBNYVR12A8C13558C | Si Vos Querés | De Culo | Yerba Brava | 2003 | 0 |
SOHSBXH12A8C13B0DF | Tangle Of Aspens | Rene Ablaze Presents Winter Sessions ... |
Der Mystic | 0 | 0 |
SOZVAPQ12A8C13B63C | Symphony No. 1 G minor "Sinfonie ... |
Berwald: Symphonies Nos. 1/2/3/4 ... |
David Montgomery | 0 | 0 |
SOQVRHI12A6D4FB2D7 | We Have Got Love | Strictly The Best Vol. 34 | Sasha / Turbulence | 0 | 0 |
SOEYRFT12AB018936C | 2 Da Beat Ch'yall | Da Bomb | Kris Kross | 1993 | 0 |
SOPMIYT12A6D4F851E | Goodbye | Danny Boy | Joseph Locke | 0 | 0 |
SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? ... |
March to cadence with the US marines ... |
The Sun Harbor's Chorus- Documentary Recordings ... |
0 | 0 |
title_length | how_old_was_i | col1 | col2 |
---|---|---|---|
2 | 14 | 2 | 0 |
2 | 6 | 2 | 0 |
4 | 17 | 4 | 0 |
3 | 14 | 3 | 0 |
3 | -1989 | 3 | 0 |
9 | -1989 | 9 | 0 |
4 | -1989 | 4 | 0 |
4 | 4 | 4 | 0 |
1 | -1989 | 1 | 0 |
6 | -1989 | 6 | 0 |
# swap columns location
sdata.swap_columns('year','title_length')
sdata
song_id | title | release | artist_name | title_length |
---|---|---|---|---|
SOQMMHC12AB0180CB8 | Silent Night | Monster Ballads X-Mas | Faster Pussy cat | 2 |
SOVFVAK12A8C1350D9 | Tanssi vaan | Karkuteillä | Karkkiautomaatti | 2 |
SOGTUKN12AB017F4F1 | No One Could Ever | Butter | Hudson Mohawke | 4 |
SOBNYVR12A8C13558C | Si Vos Querés | De Culo | Yerba Brava | 3 |
SOHSBXH12A8C13B0DF | Tangle Of Aspens | Rene Ablaze Presents Winter Sessions ... |
Der Mystic | 3 |
SOZVAPQ12A8C13B63C | Symphony No. 1 G minor "Sinfonie ... |
Berwald: Symphonies Nos. 1/2/3/4 ... |
David Montgomery | 9 |
SOQVRHI12A6D4FB2D7 | We Have Got Love | Strictly The Best Vol. 34 | Sasha / Turbulence | 4 |
SOEYRFT12AB018936C | 2 Da Beat Ch'yall | Da Bomb | Kris Kross | 4 |
SOPMIYT12A6D4F851E | Goodbye | Danny Boy | Joseph Locke | 1 |
SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? ... |
March to cadence with the US marines ... |
The Sun Harbor's Chorus- Documentary Recordings ... |
6 |
my_rating | year | how_old_was_i | col1 | col2 |
---|---|---|---|---|
0 | 2003 | 14 | 2 | 0 |
0 | 1995 | 6 | 2 | 0 |
0 | 2006 | 17 | 4 | 0 |
0 | 2003 | 14 | 3 | 0 |
0 | 0 | -1989 | 3 | 0 |
0 | 0 | -1989 | 9 | 0 |
0 | 0 | -1989 | 4 | 0 |
0 | 1993 | 4 | 4 | 0 |
0 | 0 | -1989 | 1 | 0 |
0 | 0 | -1989 | 6 | 0 |
# change the column data type
sdata.column_names(),sdata.column_types()
(['song_id', 'title', 'release', 'artist_name', 'title_length', 'my_rating', 'year', 'how_old_was_i', 'col1', 'col2'], [str, str, str, str, int, int, int, int, int, int])
sdata['my_rating'] = sdata['my_rating'].astype(float)
sdata.column_types()
[str, str, str, str, int, float, int, int, int, int]
# swap column location
sdata.swap_columns('title_length','year')
song_id | title | release | artist_name | year | my_rating |
---|---|---|---|---|---|
SOQMMHC12AB0180CB8 | Silent Night | Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0.0 |
SOVFVAK12A8C1350D9 | Tanssi vaan | Karkuteillä | Karkkiautomaatti | 1995 | 0.0 |
SOGTUKN12AB017F4F1 | No One Could Ever | Butter | Hudson Mohawke | 2006 | 0.0 |
SOBNYVR12A8C13558C | Si Vos Querés | De Culo | Yerba Brava | 2003 | 0.0 |
SOHSBXH12A8C13B0DF | Tangle Of Aspens | Rene Ablaze Presents Winter Sessions ... |
Der Mystic | 0 | 0.0 |
SOZVAPQ12A8C13B63C | Symphony No. 1 G minor "Sinfonie ... |
Berwald: Symphonies Nos. 1/2/3/4 ... |
David Montgomery | 0 | 0.0 |
SOQVRHI12A6D4FB2D7 | We Have Got Love | Strictly The Best Vol. 34 | Sasha / Turbulence | 0 | 0.0 |
SOEYRFT12AB018936C | 2 Da Beat Ch'yall | Da Bomb | Kris Kross | 1993 | 0.0 |
SOPMIYT12A6D4F851E | Goodbye | Danny Boy | Joseph Locke | 0 | 0.0 |
SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? ... |
March to cadence with the US marines ... |
The Sun Harbor's Chorus- Documentary Recordings ... |
0 | 0.0 |
title_length | how_old_was_i | col1 | col2 |
---|---|---|---|
2 | 14 | 2 | 0 |
2 | 6 | 2 | 0 |
4 | 17 | 4 | 0 |
3 | 14 | 3 | 0 |
3 | -1989 | 3 | 0 |
9 | -1989 | 9 | 0 |
4 | -1989 | 4 | 0 |
4 | 4 | 4 | 0 |
1 | -1989 | 1 | 0 |
6 | -1989 | 6 | 0 |
# let's try to calculate how many 'a' are there in 'title', 'release' & 'artist_name' column
sdata.add_column(sdata['title', 'release' , 'artist_name'].apply(lambda row:sum(word.count('a') for word in row.values())),'no_of_a')
sdata.head(3)
song_id | title | release | artist_name | year | my_rating | title_length |
---|---|---|---|---|---|---|
SOQMMHC12AB0180CB8 | Silent Night | Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0.0 | 2 |
SOVFVAK12A8C1350D9 | Tanssi vaan | Karkuteillä | Karkkiautomaatti | 1995 | 0.0 | 2 |
SOGTUKN12AB017F4F1 | No One Could Ever | Butter | Hudson Mohawke | 2006 | 0.0 | 4 |
how_old_was_i | col1 | col2 | no_of_a |
---|---|---|---|
14 | 2 | 0 | 5 |
6 | 2 | 0 | 8 |
17 | 4 | 0 | 1 |
# checking the missing year
year_count = sdata.groupby('year', gl.aggregate.COUNT)
year_count.head(6)
year | Count |
---|---|
1987 | 5125 |
2011 | 1 |
1998 | 15858 |
1990 | 7258 |
1991 | 8650 |
1950 | 84 |
# no of unique years
print "No of unique years :", str(len(year_count))
No of unique years : 90
print "no of invalid year count "
year_count.topk('year', reverse=True, k=1)
no of invalid year count
year | Count |
---|---|
0 | 484424 |
# year 0 is invalid value to better convert it to None
sdata['year'] = sdata['year'].apply(lambda x : None if x==0 else x)
sdata.head(5)
song_id | title | release | artist_name | year | my_rating | title_length |
---|---|---|---|---|---|---|
SOQMMHC12AB0180CB8 | Silent Night | Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0.0 | 2 |
SOVFVAK12A8C1350D9 | Tanssi vaan | Karkuteillä | Karkkiautomaatti | 1995 | 0.0 | 2 |
SOGTUKN12AB017F4F1 | No One Could Ever | Butter | Hudson Mohawke | 2006 | 0.0 | 4 |
SOBNYVR12A8C13558C | Si Vos Querés | De Culo | Yerba Brava | 2003 | 0.0 | 3 |
SOHSBXH12A8C13B0DF | Tangle Of Aspens | Rene Ablaze Presents Winter Sessions ... |
Der Mystic | None | 0.0 | 3 |
how_old_was_i | col1 | col2 | no_of_a |
---|---|---|---|
14 | 2 | 0 | 5 |
6 | 2 | 0 | 8 |
17 | 4 | 0 | 1 |
14 | 3 | 0 | 3 |
-1989 | 3 | 0 | 2 |
# now I have to fix the value of 'How_old_Was_i' column as well
# I can do this two ways, one - substract - year - born_year
# or I can use the same apply function with NONE if x < 0
sdata['how_old_was_i'] = sdata['year'].apply(lambda x : None if x is None else x-my_age)
sdata.head(5)
song_id | title | release | artist_name | year | my_rating | title_length |
---|---|---|---|---|---|---|
SOQMMHC12AB0180CB8 | Silent Night | Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0.0 | 2 |
SOVFVAK12A8C1350D9 | Tanssi vaan | Karkuteillä | Karkkiautomaatti | 1995 | 0.0 | 2 |
SOGTUKN12AB017F4F1 | No One Could Ever | Butter | Hudson Mohawke | 2006 | 0.0 | 4 |
SOBNYVR12A8C13558C | Si Vos Querés | De Culo | Yerba Brava | 2003 | 0.0 | 3 |
SOHSBXH12A8C13B0DF | Tangle Of Aspens | Rene Ablaze Presents Winter Sessions ... |
Der Mystic | None | 0.0 | 3 |
how_old_was_i | col1 | col2 | no_of_a |
---|---|---|---|
14 | 2 | 0 | 5 |
6 | 2 | 0 | 8 |
17 | 4 | 0 | 1 |
14 | 3 | 0 | 3 |
None | 3 | 0 | 2 |
# check the no of songs which have valid years
print len(sdata[sdata['year']>0])
515576
tmp = sdata['year']>0
print tmp
[1L, 1L, 1L, 1L, None, None, None, 1L, None, None, 1L, 1L, None, 1L, None, 1L, 1L, 1L, 1L, None, None, None, 1L, 1L, None, None, 1L, 1L, None, None, None, None, None, 1L, 1L, 1L, None, 1L, None, 1L, None, 1L, None, 1L, 1L, 1L, None, 1L, None, None, None, None, 1L, 1L, 1L, None, None, 1L, None, None, 1L, 1L, None, 1L, 1L, 1L, None, None, None, 1L, None, None, 1L, None, 1L, 1L, None, None, None, None, 1L, None, None, 1L, 1L, None, 1L, None, 1L, None, None, None, 1L, 1L, 1L, 1L, None, 1L, 1L, 1L, ... ]
# Look at lots of descriptive statistics of title_length
print "mean: " + str(sdata['title_length'].mean())
print "std: " + str(sdata['title_length'].std())
print "var: " + str(sdata['title_length'].var())
print "min: " + str(sdata['title_length'].min())
print "max: " + str(sdata['title_length'].max())
print "sum: " + str(sdata['title_length'].sum())
print "number of non-zero entries: " + str(sdata['title_length'].nnz())
mean: 3.369894 std: 2.17226527587 var: 4.71873642876 min: 0 max: 47 sum: 3369894 number of non-zero entries: 999985
approx_sketch = sdata['title_length'].sketch_summary()
print approx_sketch
+--------------------+---------------+----------+ | item | value | is exact | +--------------------+---------------+----------+ | Length | 1000000 | Yes | | Min | 0.0 | Yes | | Max | 47.0 | Yes | | Mean | 3.369894 | Yes | | Sum | 3369894.0 | Yes | | Variance | 4.71873642876 | Yes | | Standard Deviation | 2.17226527587 | Yes | | # Missing Values | 0 | Yes | | # unique values | 44 | No | +--------------------+---------------+----------+ Most frequent items: +-------+--------+--------+--------+--------+-------+-------+-------+-------+ | value | 2 | 3 | 1 | 4 | 5 | 6 | 7 | 8 | +-------+--------+--------+--------+--------+-------+-------+-------+-------+ | count | 241587 | 217674 | 164124 | 152569 | 92148 | 54674 | 30777 | 17826 | +-------+--------+--------+--------+--------+-------+-------+-------+-------+ +-------+------+ | 9 | 10 | +-------+------+ | 10465 | 6633 | +-------+------+ Quantiles: +-----+-----+-----+-----+-----+-----+-----+------+------+ | 0% | 1% | 5% | 25% | 50% | 75% | 95% | 99% | 100% | +-----+-----+-----+-----+-----+-----+-----+------+------+ | 0.0 | 1.0 | 1.0 | 2.0 | 3.0 | 4.0 | 7.0 | 11.0 | 47.0 | +-----+-----+-----+-----+-----+-----+-----+------+------+
# lets check which songs are having largest and smallest length
top_title_length = sdata.topk('title_length')
print top_title_length
+--------------------+-------------------------------+ | song_id | title | +--------------------+-------------------------------+ | SOAALOO12AC468C4ED | Resolution Island Suite I)... | | SOSEWOR12AB018BDF3 | Guayacan Mix: Amor Traicio... | | SOEBPPZ12AB0183730 | Son Of Scheherazade: Pt.1-... | | SODRVPW12A8C13DDCB | And by our own hand did ev... | | SODEYRT12A8AE47972 | Any Place I Hang My Hat is... | | SOEICJI12AC3DFAAD4 | Throw Away Comedy Medley: ... | | SORHPYP12AB017C661 | Happenings' Medley: Oh! Th... | | SOKNNWV12AB017B473 | If I Had My Way/Irish Rose... | | SOWKFQF12AAA8C85E4 | Manitoba Ne Répond Plus (C... | | SOGFUVA12A58A773AB | They Call Me Rock 'N' Roll... | +--------------------+-------------------------------+ +-------------------------------+--------------------------+------+-----------+ | release | artist_name | year | my_rating | +-------------------------------+--------------------------+------+-----------+ | Allegory Of Hearing | Roy Montgomery | None | 0.0 | | Como en un baile | Guayacan | None | 0.0 | | British Tour '76 | Renaissance | None | 0.0 | | Every Red Heart Shines Tow... | Red Sparowes | 2006 | 0.0 | | Too Marvelous For Words - ... | Lee Lessack | None | 0.0 | | Live At The Sands | Dean Martin | None | 0.0 | | The Cat & The Fiddle - 66 ... | The Mother Goose Singers | None | 0.0 | | 1999 International Barbers... | Swing City | None | 0.0 | | Manitoba Ne Répond Plus | Gérard Manset | 2008 | 0.0 | | Motor City Connection | Brownsville Station | 1975 | 0.0 | +-------------------------------+--------------------------+------+-----------+ +--------------+---------------+------+------+---------+ | title_length | how_old_was_i | col1 | col2 | no_of_a | +--------------+---------------+------+------+---------+ | 47 | None | 47 | 0 | 10 | | 46 | None | 46 | 0 | 22 | | 46 | None | 46 | 0 | 17 | | 45 | 17 | 45 | 0 | 16 | | 44 | None | 44 | 0 | 14 | | 44 | None | 44 | 0 | 14 | | 40 | None | 40 | 0 | 6 | | 40 | None | 40 | 0 | 21 | | 39 | 19 | 39 | 0 | 21 | | 39 | -14 | 39 | 0 | 9 | +--------------+---------------+------+------+---------+ [10 rows x 11 columns]
# what about lowest
lowest_title_lenght = sdata.topk('title_length', reverse=True)
print lowest_title_lenght
+--------------------+-------+-------------------------------+----------------+ | song_id | title | release | artist_name | +--------------------+-------+-------------------------------+----------------+ | SOVICLT12A58A7C4D0 | | Of Flies And Men | Milton | | SOAGRAA12AB018D567 | | Puce de luxe | Sébastien Roch | | SOAUTVB12AB018AFF0 | | Puce de luxe | Sébastien Roch | | SOMPGYE12AB018AFE0 | | Puce de luxe | Sébastien Roch | | SOVHAZH12AB018D5B3 | | Puce de luxe | Sébastien Roch | | SOECGBQ12AB018D582 | | Puce de luxe | Sébastien Roch | | SOFDILP12AC960A4EF | | Recupera tus Clásicos - Fobia | Fobia | | SOSFNKO12AB018D5BB | | Puce de luxe | Sébastien Roch | | SOZDBDL12AB018AFFF | | Puce de luxe | Sébastien Roch | | SOQUGMS12AB018B01D | | Puce de luxe | Sébastien Roch | +--------------------+-------+-------------------------------+----------------+ +------+-----------+--------------+---------------+------+------+---------+ | year | my_rating | title_length | how_old_was_i | col1 | col2 | no_of_a | +------+-----------+--------------+---------------+------+------+---------+ | None | 0.0 | 0 | None | 0 | 0 | 0 | | None | 0.0 | 0 | None | 0 | 0 | 1 | | None | 0.0 | 0 | None | 0 | 0 | 1 | | None | 0.0 | 0 | None | 0 | 0 | 1 | | None | 0.0 | 0 | None | 0 | 0 | 1 | | None | 0.0 | 0 | None | 0 | 0 | 1 | | None | 0.0 | 0 | None | 0 | 0 | 3 | | None | 0.0 | 0 | None | 0 | 0 | 1 | | None | 0.0 | 0 | None | 0 | 0 | 1 | | None | 0.0 | 0 | None | 0 | 0 | 1 | +------+-----------+--------------+---------------+------+------+---------+ [10 rows x 11 columns]
# lowest 15
# what about lowest
lowest_title_lenght = sdata.topk('title_length', reverse=True, k =15)
print lowest_title_lenght
+--------------------+-------+---------------------+----------------+------+-----------+ | song_id | title | release | artist_name | year | my_rating | +--------------------+-------+---------------------+----------------+------+-----------+ | SOVICLT12A58A7C4D0 | | Of Flies And Men | Milton | None | 0.0 | | SOAGRAA12AB018D567 | | Puce de luxe | Sébastien Roch | None | 0.0 | | SOOAFJX12AB018A028 | | Puce de luxe | Sébastien Roch | None | 0.0 | | SOLDTFD12AB018AFE6 | | Puce de luxe | Sébastien Roch | None | 0.0 | | SOAUTVB12AB018AFF0 | | Puce de luxe | Sébastien Roch | None | 0.0 | | SOMPGYE12AB018AFE0 | | Puce de luxe | Sébastien Roch | None | 0.0 | | SOVHAZH12AB018D5B3 | | Puce de luxe | Sébastien Roch | None | 0.0 | | SOECGBQ12AB018D582 | | Puce de luxe | Sébastien Roch | None | 0.0 | | SOTJWHR12AB018D5A0 | | Puce de luxe | Sébastien Roch | None | 0.0 | | SOBDDHR12AB01888A6 | | Grand Glam Outtakes | Remute | None | 0.0 | +--------------------+-------+---------------------+----------------+------+-----------+ +--------------+---------------+------+------+---------+ | title_length | how_old_was_i | col1 | col2 | no_of_a | +--------------+---------------+------+------+---------+ | 0 | None | 0 | 0 | 0 | | 0 | None | 0 | 0 | 1 | | 0 | None | 0 | 0 | 1 | | 0 | None | 0 | 0 | 1 | | 0 | None | 0 | 0 | 1 | | 0 | None | 0 | 0 | 1 | | 0 | None | 0 | 0 | 1 | | 0 | None | 0 | 0 | 1 | | 0 | None | 0 | 0 | 1 | | 0 | None | 0 | 0 | 3 | +--------------+---------------+------+------+---------+ [15 rows x 11 columns] Note: Only the head of the SFrame is printed. You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.
before_i_was_born = sdata['how_old_was_i'] < 0
before_i_was_born.all(), before_i_was_born.any()
(False, True)
# get total songs in an album and display the top album by no
sdata.groupby(['artist_name','release'], {'no_of_songs_in_album':gl.aggregate.COUNT} ).topk('no_of_songs_in_album')
artist_name | release | no_of_songs_in_album |
---|---|---|
Fanny | First Time In A Long Time: The Reprise ... |
85 |
Bernard Herrmann | The Twilight Zone | 81 |
Spanky & Our Gang | The Complete Mercury Recordings ... |
75 |
The Smashing Pumpkins | Rarities & B-Sides | 72 |
Big Star | Keep An Eye On The Sky | 71 |
Jacques Dutronc | Intégrale Les Cactus | 69 |
The Stooges | 1970: The Complete Fun House Sessions ... |
67 |
Lull | Moments | 64 |
Willie Clancy | Willie Clancy The Gold Ring ... |
61 |
Jack Dangers | Forbidden Planet Explored / Sci-Fi Sound Effects ... |
60 |
# this will download the 118 MB file
#usage_data = gl.SFrame.read_csv("https://static.turi.com/datasets/millionsong/10000.txt", header=False, delimiter='\t', column_type_hints={'X3':int})
#usage_data.rename({'X1':'user_id', 'X2':'song_id', 'X3':'listen_count'})
# Read the data
usage_data = gl.SFrame.read_csv("C:/learn/ML/ML00caseStudy/week01Intro/10000.txt", header=False, delimiter="\t", column_type_hints={'X3':int})
usage_data.rename({'X1':'user_id', 'X2':'song_id', 'X3':'listen_count'})
Finished parsing file C:\learn\ML\ML00caseStudy\week01Intro\10000.txt
Parsing completed. Parsed 100 lines in 3.04661 secs.
Read 844838 lines. Lines per second: 265452
Finished parsing file C:\learn\ML\ML00caseStudy\week01Intro\10000.txt
Parsing completed. Parsed 2000000 lines in 4.35327 secs.
user_id | song_id | listen_count |
---|---|---|
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOAKIMP12A8C130995 | 1 |
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOBBMDR12A8C13253B | 2 |
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOBXHDL12A81C204C0 | 1 |
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOBYHAJ12A6701BF1D | 1 |
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SODACBL12A8C13C273 | 1 |
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SODDNQT12A6D4F5F7E | 5 |
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SODXRTY12AB0180F3B | 1 |
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOFGUAY12AB017B0A8 | 1 |
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOFRQTD12A81C233C0 | 1 |
b80344d063b5ccb3212f76538 f3d9e43d87dca9e ... |
SOHQWYZ12A6D4FA701 | 1 |
# saving this data frame
usage_data.save('usage_data')
# loading the data
usage_data = gl.load_sframe('usage_data')
# find out the unique users
print len(usage_data['user_id'].unique())
76353
# let's create two datasets which we can join
ds1 = sdata[((sdata['artist_name'] == 'Relient K')
| (sdata['artist_name'] == 'Streetlight Manifesto'))
& (sdata['how_old_was_i'] >= 14) & (sdata['how_old_was_i'] <= 18)]
ds1
song_id | title | release | artist_name | year | my_rating |
---|---|---|---|---|---|
SOQNUHJ12A6D4F9E19 | Let It All Out (Album Version) ... |
MMHMM | Relient K | 2004 | 0.0 |
SOADZOM12A8C13E446 | The Blonde Lead The Blind (Album Version) ... |
Somewhere In The Between | Streetlight Manifesto | 2007 | 0.0 |
SOKUTMT12A58A77CFB | Give Until There's Nothing Left ... |
Five Score And Seven Years Ago ... |
Relient K | 2007 | 0.0 |
SOIONAH12A58A76FD1 | We Are The Few (Album Version) ... |
Everything Goes Numb | Streetlight Manifesto | 2003 | 0.0 |
SOUUYHK12A6D4F9E16 | I So Hate Consequences (Album Version) ... |
MMHMM | Relient K | 2004 | 0.0 |
SOCBUJT12A6D4F9E1A | Who I Am Hates Who I've Been (mmhmm Album ... |
MMHMM | Relient K | 2004 | 0.0 |
SOBUHJR12A6D4FDC7C | Here's To Life (Album Version) ... |
Everything Goes Numb | Streetlight Manifesto | 2003 | 0.0 |
SONKBWG12A6D4FB91D | Giving Up_ Giving In (LP Version) ... |
Keasbey Nights | Streetlight Manifesto | 2006 | 0.0 |
SOSFAVU12A6D4FDC6A | Everything Went Numb (Album Version) ... |
Everything Goes Numb | Streetlight Manifesto | 2003 | 0.0 |
SORYDMW12A6D4FB923 | This One Goes Out To.... (LP Version) ... |
Keasbey Nights | Streetlight Manifesto | 2006 | 0.0 |
title_length | how_old_was_i | col1 | col2 | no_of_a |
---|---|---|---|---|
6 | 15 | 6 | 0 | 0 |
7 | 18 | 7 | 0 | 2 |
5 | 18 | 5 | 0 | 1 |
6 | 14 | 6 | 0 | 1 |
6 | 15 | 6 | 0 | 1 |
10 | 15 | 10 | 0 | 1 |
5 | 14 | 5 | 0 | 1 |
6 | 17 | 6 | 0 | 2 |
5 | 14 | 5 | 0 | 1 |
7 | 17 | 7 | 0 | 2 |
# Let's join ds1 with the 10000.txt dataset usage_data
dsjoin = ds1.join(usage_data, 'song_id')
dsjoin
song_id | title | release | artist_name | year | my_rating |
---|---|---|---|---|---|
SOMMLRO12A6D4F9FD1 | Must Have Done Something Right ... |
Must Have Done Something Right ... |
Relient K | 2007 | 0.0 |
SOCLCYG12A6D4FDC71 | Point/Counterpoint (Album Version) ... |
Everything Goes Numb | Streetlight Manifesto | 2003 | 0.0 |
SOINPKF12A6D4FDC75 | A Better Place_ A Better Time (Album Version) ... |
Everything Goes Numb | Streetlight Manifesto | 2003 | 0.0 |
SOMMLRO12A6D4F9FD1 | Must Have Done Something Right ... |
Must Have Done Something Right ... |
Relient K | 2007 | 0.0 |
SOMMLRO12A6D4F9FD1 | Must Have Done Something Right ... |
Must Have Done Something Right ... |
Relient K | 2007 | 0.0 |
SOMMLRO12A6D4F9FD1 | Must Have Done Something Right ... |
Must Have Done Something Right ... |
Relient K | 2007 | 0.0 |
SOPSQOS12A6D4F9E15 | High Of 75 (Album Version) ... |
MMHMM | Relient K | 2004 | 0.0 |
SOSTGAF12A58A7B18B | The Best Thing | Five Score And Seven Years Ago ... |
Relient K | 2007 | 0.0 |
SOUUYHK12A6D4F9E16 | I So Hate Consequences (Album Version) ... |
MMHMM | Relient K | 2004 | 0.0 |
SOMMLRO12A6D4F9FD1 | Must Have Done Something Right ... |
Must Have Done Something Right ... |
Relient K | 2007 | 0.0 |
title_length | how_old_was_i | col1 | col2 | no_of_a | user_id | listen_count |
---|---|---|---|---|---|---|
5 | 18 | 5 | 0 | 2 | 3f9ed694a79835c921ef6d94a cd28f876c1d901e ... |
3 |
3 | 14 | 3 | 0 | 1 | 956044d724390e40c8511b49e 5bf6bc28071de3a ... |
1 |
8 | 14 | 8 | 0 | 2 | 956044d724390e40c8511b49e 5bf6bc28071de3a ... |
1 |
5 | 18 | 5 | 0 | 2 | 354cfdb566f543bb5b810a4d8 959d974a30797fd ... |
1 |
5 | 18 | 5 | 0 | 2 | e4d4e50c99ed5b3dfe740fa1c cbe6be41eeb4f35 ... |
14 |
5 | 18 | 5 | 0 | 2 | ed0f4979ffddc0010244638a3 b956510624dc6a9 ... |
3 |
5 | 15 | 5 | 0 | 0 | ed0f4979ffddc0010244638a3 b956510624dc6a9 ... |
2 |
3 | 18 | 3 | 0 | 1 | ed0f4979ffddc0010244638a3 b956510624dc6a9 ... |
1 |
6 | 15 | 6 | 0 | 1 | ed0f4979ffddc0010244638a3 b956510624dc6a9 ... |
1 |
5 | 18 | 5 | 0 | 2 | 0e741cfe121bb619177be8d8a 135a2d3692d9c90 ... |
4 |
# total row in ds1 and dsjoin datasets
len(ds1), len(dsjoin)
(64, 571)
len(ds1['song_id'].unique()), len(dsjoin['song_id'].unique()), len(ds1['song_id']), len(dsjoin['song_id'])
(64, 7, 64, 571)
# find out most popular songs when I was between 14 n 18
most_popular = dsjoin.groupby(['song_id'], {'total_listen_count':gl.aggregate.SUM('listen_count'),
'num_unique_users':gl.aggregate.COUNT('user_id')})
most_popular
song_id | num_unique_users | total_listen_count |
---|---|---|
SOCVOVH12A6D4FB912 | 107 | 555 |
SOPSQOS12A6D4F9E15 | 79 | 207 |
SOMMLRO12A6D4F9FD1 | 104 | 331 |
SOUUYHK12A6D4F9E16 | 79 | 171 |
SOINPKF12A6D4FDC75 | 63 | 196 |
SOSTGAF12A58A7B18B | 72 | 168 |
SOCLCYG12A6D4FDC71 | 67 | 188 |
# to get artist name we have to join this data
most_popular.join(sdata, 'song_id').topk('total_listen_count',k=20)
song_id | num_unique_users | total_listen_count | title | release |
---|---|---|---|---|
SOCVOVH12A6D4FB912 | 107 | 555 | Keasbey Nights (LP Version) ... |
Keasbey Nights |
SOMMLRO12A6D4F9FD1 | 104 | 331 | Must Have Done Something Right ... |
Must Have Done Something Right ... |
SOPSQOS12A6D4F9E15 | 79 | 207 | High Of 75 (Album Version) ... |
MMHMM |
SOINPKF12A6D4FDC75 | 63 | 196 | A Better Place_ A Better Time (Album Version) ... |
Everything Goes Numb |
SOCLCYG12A6D4FDC71 | 67 | 188 | Point/Counterpoint (Album Version) ... |
Everything Goes Numb |
SOUUYHK12A6D4F9E16 | 79 | 171 | I So Hate Consequences (Album Version) ... |
MMHMM |
SOSTGAF12A58A7B18B | 72 | 168 | The Best Thing | Five Score And Seven Years Ago ... |
artist_name | year | my_rating | title_length | how_old_was_i | col1 | col2 | no_of_a |
---|---|---|---|---|---|---|---|
Streetlight Manifesto | 2006 | 0.0 | 4 | 17 | 4 | 0 | 3 |
Relient K | 2007 | 0.0 | 5 | 18 | 5 | 0 | 2 |
Relient K | 2004 | 0.0 | 5 | 15 | 5 | 0 | 0 |
Streetlight Manifesto | 2003 | 0.0 | 8 | 14 | 8 | 0 | 2 |
Streetlight Manifesto | 2003 | 0.0 | 3 | 14 | 3 | 0 | 1 |
Relient K | 2004 | 0.0 | 6 | 15 | 6 | 0 | 1 |
Relient K | 2007 | 0.0 | 3 | 18 | 3 | 0 | 1 |
# let's append a row with max liste count and check whether it comes in above result or not
me = gl.SFrame({'user_id':['evan'],'song_id':['SOSFAVU12A6D4FDC6A'],'listen_count':[4000]})
# adding this data to usage data
usage_data = usage_data.append(me)
# repeating the above join n group by statement
dsjoin = ds1.join(usage_data, 'song_id')
most_popular = dsjoin.groupby(['song_id'], {'total_listen_count':gl.aggregate.SUM('listen_count'),
'num_unique_users':gl.aggregate.COUNT('user_id')})
most_popular.join(sdata, 'song_id').topk('total_listen_count',k=20)
song_id | num_unique_users | total_listen_count | title | release |
---|---|---|---|---|
SOSFAVU12A6D4FDC6A | 1 | 4000 | Everything Went Numb (Album Version) ... |
Everything Goes Numb |
SOCVOVH12A6D4FB912 | 107 | 555 | Keasbey Nights (LP Version) ... |
Keasbey Nights |
SOMMLRO12A6D4F9FD1 | 104 | 331 | Must Have Done Something Right ... |
Must Have Done Something Right ... |
SOPSQOS12A6D4F9E15 | 79 | 207 | High Of 75 (Album Version) ... |
MMHMM |
SOINPKF12A6D4FDC75 | 63 | 196 | A Better Place_ A Better Time (Album Version) ... |
Everything Goes Numb |
SOCLCYG12A6D4FDC71 | 67 | 188 | Point/Counterpoint (Album Version) ... |
Everything Goes Numb |
SOUUYHK12A6D4F9E16 | 79 | 171 | I So Hate Consequences (Album Version) ... |
MMHMM |
SOSTGAF12A58A7B18B | 72 | 168 | The Best Thing | Five Score And Seven Years Ago ... |
artist_name | year | my_rating | title_length | how_old_was_i | col1 | col2 | no_of_a |
---|---|---|---|---|---|---|---|
Streetlight Manifesto | 2003 | 0.0 | 5 | 14 | 5 | 0 | 1 |
Streetlight Manifesto | 2006 | 0.0 | 4 | 17 | 4 | 0 | 3 |
Relient K | 2007 | 0.0 | 5 | 18 | 5 | 0 | 2 |
Relient K | 2004 | 0.0 | 5 | 15 | 5 | 0 | 0 |
Streetlight Manifesto | 2003 | 0.0 | 8 | 14 | 8 | 0 | 2 |
Streetlight Manifesto | 2003 | 0.0 | 3 | 14 | 3 | 0 | 1 |
Relient K | 2004 | 0.0 | 6 | 15 | 6 | 0 | 1 |
Relient K | 2007 | 0.0 | 3 | 18 | 3 | 0 | 1 |
Lets check, how we can randomly split the data for test
# Randomly split data rows into two subsets
first_set, second_set = sdata.random_split(0.8, seed = 1)
first_set.num_rows(), second_set.num_rows()
(800135, 199865)
If you want to split on a predicate though, you'll have to do that manually.
songs_before = sdata[sdata['how_old_was_i'] < 0]
songs_after = sdata[sdata['how_old_was_i'] >= 0]
songs_before.num_rows(), songs_after.num_rows()
(75581, 439995)
# generating sample data
sample = sdata.sample(0.4)
sample.num_rows()
399454
arr = gl.SArray([1,2,3])
arr
dtype: int Rows: 3 [1L, 2L, 3L]
arr2 = 2*arr
arr2
dtype: int Rows: 3 [2L, 4L, 6L]
# add
arr + arr2
dtype: int Rows: 3 [3L, 6L, 9L]
# multiply
arr * arr2
dtype: int Rows: 3 [2L, 8L, 18L]
# divide
arr2 / arr
dtype: float Rows: 3 [2.0, 2.0, 2.0]
# iterating with SFrame
for i in sdata:
if i['title_length'] >= 45:
print "Whoa that's long!"
Whoa that's long! Whoa that's long! Whoa that's long! Whoa that's long!
sdata['title_artist_length'] = sdata['title','artist_name'].apply(lambda row: sum([len(col) for col in row.values()]))
sdata
song_id | title | release | artist_name | year | my_rating |
---|---|---|---|---|---|
SOQMMHC12AB0180CB8 | Silent Night | Monster Ballads X-Mas | Faster Pussy cat | 2003 | 0.0 |
SOVFVAK12A8C1350D9 | Tanssi vaan | Karkuteillä | Karkkiautomaatti | 1995 | 0.0 |
SOGTUKN12AB017F4F1 | No One Could Ever | Butter | Hudson Mohawke | 2006 | 0.0 |
SOBNYVR12A8C13558C | Si Vos Querés | De Culo | Yerba Brava | 2003 | 0.0 |
SOHSBXH12A8C13B0DF | Tangle Of Aspens | Rene Ablaze Presents Winter Sessions ... |
Der Mystic | None | 0.0 |
SOZVAPQ12A8C13B63C | Symphony No. 1 G minor "Sinfonie ... |
Berwald: Symphonies Nos. 1/2/3/4 ... |
David Montgomery | None | 0.0 |
SOQVRHI12A6D4FB2D7 | We Have Got Love | Strictly The Best Vol. 34 | Sasha / Turbulence | None | 0.0 |
SOEYRFT12AB018936C | 2 Da Beat Ch'yall | Da Bomb | Kris Kross | 1993 | 0.0 |
SOPMIYT12A6D4F851E | Goodbye | Danny Boy | Joseph Locke | None | 0.0 |
SOJCFMH12A8C13B0C2 | Mama_ mama can't you see ? ... |
March to cadence with the US marines ... |
The Sun Harbor's Chorus- Documentary Recordings ... |
None | 0.0 |
title_length | how_old_was_i | col1 | col2 | no_of_a | title_artist_length |
---|---|---|---|---|---|
2 | 14 | 2 | 0 | 5 | 28 |
2 | 6 | 2 | 0 | 8 | 27 |
4 | 17 | 4 | 0 | 1 | 31 |
3 | 14 | 3 | 0 | 3 | 25 |
3 | None | 3 | 0 | 2 | 26 |
9 | None | 9 | 0 | 3 | 78 |
4 | None | 4 | 0 | 3 | 34 |
4 | 4 | 4 | 0 | 4 | 27 |
1 | None | 1 | 0 | 1 | 19 |
6 | None | 6 | 0 | 10 | 72 |
# save as csv
sdata.save('sdata_new.csv', format='csv')
sdata.save('sdata_new')