Code
from operator import countOf
import sys
print(sys.executable)/Users/shaunak/.virtualenvs/crooners/bin/python3
Before getting started, lets setup our venv and create a pyrightconfig.json file so that we get dependancy detection and autocompletion
NOTE: using venv “crooners”
from operator import countOf
import sys
print(sys.executable)/Users/shaunak/.virtualenvs/crooners/bin/python3
A few things going on here:
Import required deps
Read csv data into dataframes
Create some common “agrregate” dataframes for easy reference
Define helper functions
from matplotlib import legend
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Markdown
from pprint import PrettyPrinter
plt.close("all")
pp = PrettyPrinter(indent=2, width=80, compact=True)
# populate data frames
competitors = pd.read_csv("data/competitors.csv")
rounds = pd.read_csv("data/rounds.csv")
submissions = pd.read_csv("data/submissions.csv")
votes = pd.read_csv("data/votes.csv")
## submissions with submitter name and total votes for the submission
songsWithVotes = submissions.merge(
competitors[['ID','Name']],
left_on='Submitter ID',
right_on='ID',
how='inner'
)
votesPerSong = votes.groupby('Spotify URI')['Points Assigned'].sum()
# set total votes per song
songsWithVotes['Votes'] = (
songsWithVotes['Spotify URI']
.map(votesPerSong)
.fillna(0)
.astype(int)
)
## Add song title and song submitter name to each vote
votesBySubName = votes.merge(
songsWithVotes[['Spotify URI', 'Name', 'Title']],
left_on='Spotify URI',
right_on='Spotify URI',
how='inner'
)
# Sum points per submitter (Name) & voter
sub_voter_totals = votesBySubName.groupby(
['Name', 'Voter ID'], as_index=False
)['Points Assigned'].sum().rename(
columns={'Points Assigned': 'TotalPoints'}
)
id_name_map = competitors.set_index('ID')['Name']
voterTotals = sub_voter_totals.copy()
voterTotals['VoterName'] = voterTotals['Voter ID'].map(id_name_map)
voterTotals = (
voterTotals
.drop(columns=['Voter ID'])
.rename(columns={'TotalPoints': 'Votes'})
)
# Find each submitter’s biggest opponent (min) and biggest fan (max)
worst = sub_voter_totals.loc[
sub_voter_totals.groupby('Name')['TotalPoints'].idxmin()
].rename(columns={
'Voter ID': 'BiggestOpp',
'TotalPoints': 'BiggestOppCount'
})
best = sub_voter_totals.loc[
sub_voter_totals.groupby('Name')['TotalPoints'].idxmax()
].rename(columns={
'Voter ID': 'BiggestFan',
'TotalPoints': 'BiggestFanCount'
})
# Merge them back together
voterData = worst.merge(
best,
on='Name',
how='outer'
)[['Name', 'BiggestOpp', 'BiggestOppCount', 'BiggestFan', 'BiggestFanCount']]
voterData['BiggestOpp'] = voterData['BiggestOpp'].map(id_name_map)
voterData['BiggestFan'] = voterData['BiggestFan'].map(id_name_map)
# split into a dict of DataFrames keyed by competitor Name
songsByUser = { name: df for name, df in songsWithVotes.groupby('Name') }
songsByArtist = {name: df for name, df in songsWithVotes.groupby('Artist(s)')}
# calculating round placements
round_scores = (
votes
.groupby(["Round ID", "Spotify URI"], as_index=False)
.agg({"Points Assigned": "sum"})
)
# assign a rank within each round
round_scores["rank"] = (
round_scores
.groupby("Round ID")["Points Assigned"]
.rank(ascending=False, method="dense")
)
# mark top-3 and bottom-3 per round
# bottom-3 = rank >= (max rank in that round − 2)
max_rank = round_scores.groupby("Round ID")["rank"].transform("max")
round_scores["is_top3"] = round_scores["rank"] <= 3
round_scores["is_bottom3"] = round_scores["rank"] >= (max_rank - 2)
# join back to submissions → get submitter/competitor
df = (
round_scores
.merge(submissions[["Spotify URI","Submitter ID"]], on="Spotify URI")
.merge(competitors.rename(columns={"ID":"Submitter ID"}), on="Submitter ID")
)
top3 = (
df[df["is_top3"]]
.groupby(["Submitter ID","Name"], as_index=False)
.size()
.rename(columns={"size":"top3_count"})
)
bottom3 = (
df[df["is_bottom3"]]
.groupby(["Submitter ID","Name"], as_index=False)
.size()
.rename(columns={"size":"bottom3_count"})
)
# final dict of DataFrames
placement_counts = {
"top3": top3,
"bottom3": bottom3
}
# submissions: ['Spotify URI', 'Title', 'Album', 'Artist(s)', 'Submitter ID', 'Created','Comment', 'Round ID', 'Visible To Voters']
# competitors: ['ID', 'Name']
# rounds: ['ID', 'Created', 'Name', 'Description', 'Playlist URL']
# votes ['Spotify URI', 'Voter ID', 'Created', 'Points Assigned', 'Comment','Round ID']
## HELPER FUNCTIONS
def truncateTitle(title):
maxChars = 18
return title if len(title) <= maxChars else title[:(maxChars - 3)] + "..."Lets take a second to look back and reflect on this season’s highs and lows!
top3_ops = voterData.sort_values('BiggestOppCount', ascending=True).head(3)
top3_fans = voterData.sort_values('BiggestFanCount', ascending=False).head(3)
top3_ops = top3_ops.reset_index(drop=True)
top3_fans = top3_fans.reset_index(drop=True)
topOpp = top3_ops.iloc[0]
topFan = top3_fans.iloc[0]
fans = voterData[['Name', 'BiggestFan', 'BiggestFanCount']].sort_values('BiggestFanCount', ascending=False)
ops = voterData[['Name', 'BiggestOpp', 'BiggestOppCount']].sort_values('BiggestOppCount', ascending=False)
fans['PercentOfVotes'] = (fans['BiggestFanCount'] / 63) * 100
fans = fans.reset_index(drop=True)
bestFan = fans['BiggestFan'].value_counts()
bestOpp = ops['BiggestOpp'].value_counts()
# get the most frequent name and its count
top_name = bestFan.idxmax()
top_count = bestFan.max()
top_opp_name = bestOpp.idxmax()
top_opp_count = bestOpp.max()
display(Markdown(f"### Top 3 finishers\n In recognizition of some of our best players. Here is a look at the number of podium finishes each member got this season!"))
print(top3[['Name', 'top3_count']].sort_values('top3_count', ascending=False).reset_index(drop=True))
display(Markdown(f"### Bottom 3 finishers\n In recognizition of some of our worst players. Here is a look at the number of bottom of barrel finishes each member got this season!"))
print(bottom3[['Name', 'bottom3_count']].sort_values('bottom3_count', ascending=False).reset_index(drop=True))
display(Markdown(f"### Top Opp\n This season's #1 opp is **{topOpp['BiggestOpp']}**. Over the course of the entire season, they awarded **{topOpp['Name']}** a total of **{topOpp['BiggestOppCount']}** points. \n {topOpp['BiggestOpp']}, however, is not the only hater. Here are the top 3 opps this season"))
print(top3_ops[['Name', 'BiggestOpp', 'BiggestOppCount']])
display(Markdown(f"### Top Fan\n Alright guys. I planned to keep these headers season agnositic for reusability but these results taste like collusion. The season's biggest fan is **{topFan['BiggestFan']}** giving **{topFan['Name']}** a total of **{topFan['BiggestFanCount']}** votes. In a season with 9 rounds each allowing for net 7 points (10 upvotes + -3 downvotes), {topFan['BiggestFan']} awarded **{((topFan['BiggestFanCount'] / (63)) * 100 ):.1f}**% of their votes to {topFan['Name']}."))
display(Markdown(f"\n Now if that wasn't crazy enough. Take a look at the Top 3 biggest fans. Notice a pattern?"))
print(top3_fans[['Name', 'BiggestFan', 'BiggestFanCount']])
display(Markdown(f"\n So these results raised two immediate thoughts... \n\n 1) Julia should be **proud** of me for not being her biggest fan \n 2) How much collusion is going on in this League? "))
print(fans)
display(Markdown(f"The results are better than I thought. Matt/Audra, Pablo/Nico and Brendan/Sierra have some explaining to do, but the rest of us are clean. Good job guys!!"))
display(Markdown(f"### BONUS ROUND \n Lets take a second to recognize this season's Super Fan and Super Hater. These are special individuals who was most people's biggest fan, or biggest opp"))
display(Markdown(f"**{top_name}** was {top_count} people's biggest fan. Good Job!!"))
display(Markdown(f"**{top_opp_name}** was {top_opp_count} people's biggest opp. Oof"))In recognizition of some of our best players. Here is a look at the number of podium finishes each member got this season!
Name top3_count
0 Brendan 4
1 Nicolette 3
2 Emmett 3
3 Sammi 3
4 Shaunak 3
5 Sierra 2
6 Alex 2
7 Matt 2
8 Julia 2
9 Audra 2
10 James 2
11 Angelica 2
12 Dennis 1
13 Nina 1
In recognizition of some of our worst players. Here is a look at the number of bottom of barrel finishes each member got this season!
Name bottom3_count
0 Judy 3
1 James 3
2 Pablo 3
3 Nina 3
4 Shaunak 3
5 Matt 2
6 Emmett 2
7 Audra 2
8 Sammi 2
9 Sierra 1
10 Alex 1
11 Nicolette 1
12 Julia 1
13 Brendan 1
14 Dennis 1
15 Angelica 1
This season’s #1 opp is Sammi. Over the course of the entire season, they awarded Pablo a total of -6 points. Sammi, however, is not the only hater. Here are the top 3 opps this season
Name BiggestOpp BiggestOppCount
0 Pablo Sammi -6
1 Judy Alex -4
2 Brendan Angelica -3
Alright guys. I planned to keep these headers season agnositic for reusability but these results taste like collusion. The season’s biggest fan is Audra giving Matt a total of 23 votes. In a season with 9 rounds each allowing for net 7 points (10 upvotes + -3 downvotes), Audra awarded 36.5% of their votes to Matt.
Now if that wasn’t crazy enough. Take a look at the Top 3 biggest fans. Notice a pattern?
Name BiggestFan BiggestFanCount
0 Matt Audra 23
1 Nicolette Pablo 18
2 Pablo Nicolette 17
So these results raised two immediate thoughts…
Name BiggestFan BiggestFanCount PercentOfVotes
0 Matt Audra 23 36.507937
1 Nicolette Pablo 18 28.571429
2 Pablo Nicolette 17 26.984127
3 Dennis Shaunak 16 25.396825
4 Audra Matt 14 22.222222
5 Brendan Sierra 13 20.634921
6 Sierra Brendan 12 19.047619
7 Alex Matt 11 17.460317
8 James Dennis 11 17.460317
9 Angelica Matt 10 15.873016
10 Emmett Matt 10 15.873016
11 Sammi Dennis 9 14.285714
12 Shaunak Brendan 9 14.285714
13 Julia Nicolette 8 12.698413
14 Judy Matt 7 11.111111
15 Nina Emmett 7 11.111111
The results are better than I thought. Matt/Audra, Pablo/Nico and Brendan/Sierra have some explaining to do, but the rest of us are clean. Good job guys!!
Lets take a second to recognize this season’s Super Fan and Super Hater. These are special individuals who was most people’s biggest fan, or biggest opp
Matt was 5 people’s biggest fan. Good Job!!
Audra was 3 people’s biggest opp. Oof
avg = songsWithVotes['Votes'].mean()
biggestWinner = songsWithVotes.loc[songsWithVotes['Votes'].idxmax()]
biggestLoser = songsWithVotes.loc[songsWithVotes['Votes'].idxmin()]
artist_stats = songsWithVotes.groupby('Artist(s)').agg(
NumSongs=('Artist(s)', 'size'),
SubmittedSongs=('Title', list),
AvgVotes=('Votes', 'mean')
)
# Get top three artists by number of songs
top3_songs = songsWithVotes.sort_values('Votes', ascending=False).head(3).set_index('Title')
bottom3_songs = songsWithVotes.sort_values('Votes', ascending=True).head(3).set_index('Title')
top3_artists = artist_stats.sort_values('NumSongs', ascending=False).head(3)
bottom3_artists = artist_stats.sort_values('AvgVotes', ascending=True).head(3)
top3_ops = voterData.sort_values('BiggestOppCount', ascending=True).head(3)
top3_fans = voterData.sort_values('BiggestFanCount', ascending=False).head(3)
display(Markdown(f"### Top Songs\n Our season's bangers. Here is a look back at the three most loved songs of this season. Good job submitters! Lets see if you have what it takes to find yourself back in this list next season"))
display(Markdown(f"**Season average votes per song:** {avg:.1f}\n"))
pp.pprint(top3_songs[['Votes', 'Name']])
display(Markdown(f"### Worst Songs\n Our season's biggest stinkers. These songs sucked. If you find yourself in this list, I'm sure the people above would be willing to help!"))
pp.pprint(bottom3_songs[['Votes', 'Name']])
display(Markdown(f"### Top Artists\n A look at our favorite stars? Here are this season's top three favorite artists! Artists with an 'AvgVotes' higher than **{avg:.1f}** are winners. These artist's songs earned more votes than the average submission. Artists with an 'AvgVotes' lower than our **{avg:.1f}** are probably not as loved as we think. While we loved submitting their music, these submissions gardnered fewer votes than the average song"))
pp.pprint(top3_artists)
display(Markdown(f"### Worst Artists\n Whose music do we hate? Here 3 artists with the lowest averge votes per song this season. May be best to avoid them next time"))
pp.pprint(bottom3_artists[['NumSongs', 'AvgVotes']])Our season’s bangers. Here is a look back at the three most loved songs of this season. Good job submitters! Lets see if you have what it takes to find yourself back in this list next season
Season average votes per song: 6.8
Votes Name
Title
Ignition - Remix 22 Dennis
The Fool On The Hill - Remastered 2009 20 Matt
Doin' Time 19 Nina
Our season’s biggest stinkers. These songs sucked. If you find yourself in this list, I’m sure the people above would be willing to help!
Votes Name
Title
You Can't Stop The Beat -13 Sammi
Jaws Main Theme (From "Jaws") -8 Shaunak
That Guy -8 Emmett
A look at our favorite stars? Here are this season’s top three favorite artists! Artists with an ‘AvgVotes’ higher than 6.8 are winners. These artist’s songs earned more votes than the average submission. Artists with an ‘AvgVotes’ lower than our 6.8 are probably not as loved as we think. While we loved submitting their music, these submissions gardnered fewer votes than the average song
NumSongs SubmittedSongs AvgVotes
Artist(s)
Miley Cyrus 3 [The Climb, Wrecking Ball, Jolene - Live] 7.0
Kanye West 3 [Stronger, Pt. 2, Black Skinhead] 9.0
Drake 2 [Passionfruit, Sticky] 0.0
Whose music do we hate? Here 3 artists with the lowest averge votes per song this season. May be best to avoid them next time
NumSongs AvgVotes
Artist(s)
Elijah Kelley, John Travolta, Queen Latifah, Ni... 1 -13.0
Geek Music 1 -8.0
etmnx 1 -8.0
How did each person perform? This section breaks down your performance per round, highlighting your highest and lowest scores of this season. As well as providing a few summarizing metrics to help contexualize your performance.
for name, df in songsByUser.items():
# Create third level heading for each member of our league
display(Markdown(f"### {name}\n"))
## Create a round by round graph of votes
fig, ax = plt.subplots(figsize=(12, 6))
df.plot(x="Title", y="Votes", ax=ax, marker="o", legend=False)
# Titles can be very long messing with graph proportions. clip at 12 chars
ax.set_xticklabels([truncateTitle(t.get_text()) for t in ax.get_xticklabels()], rotation=45, ha="right")
plt.title(f"{name}'s votes per song")
plt.xlabel("Song")
plt.ylabel("Votes")
plt.tight_layout()
titles = df['Title'].tolist()
min_idx = df['Votes'].idxmin()
min_pos = titles.index(df.at[min_idx, "Title"])
min_votes = df.at[min_idx, "Votes"]
ax.scatter(min_pos, min_votes,
color="red", s=100, zorder=5)
ax.annotate(f"Lowest Score: {min_votes}",
(min_pos, min_votes),
textcoords="offset points",
xytext=(0,10), ha="center",
color="red")
max_idx = df['Votes'].idxmax()
max_pos = titles.index(df.at[max_idx, "Title"])
max_votes = df.at[max_idx, "Votes"]
ax.scatter(max_pos, max_votes,
color="green", s=100, zorder=5)
ax.annotate(f"Highest Score: {max_votes}",
(max_pos, max_votes),
textcoords="offset points",
xytext=(0,10), ha="center",
color="green")
for pos, (vote, rnd) in enumerate(zip(df['Votes'], df['Round ID'])):
# grab the Name from your rounds df
round_name = rounds.loc[rounds['ID'] == rnd, 'Name'].values[0]
ax.annotate(
f"round: {round_name}",
(pos, vote),
textcoords="offset points",
xytext=(0, -10),
ha="center",
va="top",
fontsize=10,
color="black"
)
plt.show()
# Sets of other interesting metrics for each user
mavg = df['Votes'].mean()
best = df.loc[df['Votes'].idxmax(), 'Title']
userVoterTotals = voterTotals[voterTotals['Name'] == name]
fans = userVoterTotals.loc[userVoterTotals['VoterName'] != name, ['VoterName', 'Votes']].sort_values('Votes', ascending=False).reset_index(drop=True).head(3)
ops = userVoterTotals.loc[userVoterTotals['VoterName'] != name, ['VoterName', 'Votes']].sort_values('Votes', ascending=True).reset_index(drop=True).head(3)
competitor_name = name
top_series = placement_counts["top3"].query("Name == @competitor_name")["top3_count"]
bottom_series= placement_counts['bottom3'].query("Name == @competitor_name")["bottom3_count"]
top3_count = int(top_series.iloc[0]) if not top_series.empty else 0
bottom3_count = int(bottom_series.iloc[0]) if not bottom_series.empty else 0
display(Markdown(f"- **Average points awarded to a song this season:** {avg:.1f}\n- **Average points awarded to a song you submitted:** {mavg:.1f}\n- **Best performing song:** {best}\n- **Number of top 3 finishes:** {top3_count}\n- **Number of bottom 3 finishes:** {bottom3_count}"))
display(Markdown(f"**{name}'s Biggest Fans**"))
pp.pprint(fans)
display(Markdown(f"**{name}'s Biggest Opps**"))
pp.pprint(ops)
Alex’s Biggest Fans
VoterName Votes
0 Matt 11
1 Dennis 8
2 Emmett 7
Alex’s Biggest Opps
VoterName Votes
0 Julia -2
1 Nicolette 0
2 Sierra 1

Angelica’s Biggest Fans
VoterName Votes
0 Matt 10
1 Sammi 9
2 Sierra 7
Angelica’s Biggest Opps
VoterName Votes
0 Emmett 1
1 Dennis 1
2 James 1

Audra’s Biggest Fans
VoterName Votes
0 Matt 14
1 Sammi 11
2 Alex 9
Audra’s Biggest Opps
VoterName Votes
0 Pablo 0
1 Judy 1
2 Brendan 1

Brendan’s Biggest Fans
VoterName Votes
0 Sierra 13
1 Julia 10
2 Alex 9
Brendan’s Biggest Opps
VoterName Votes
0 Angelica -3
1 Pablo 3
2 Matt 4

Dennis’s Biggest Fans
VoterName Votes
0 Shaunak 16
1 Emmett 9
2 Sammi 9
Dennis’s Biggest Opps
VoterName Votes
0 Audra -3
1 Matt 2
2 Sierra 3

Emmett’s Biggest Fans
VoterName Votes
0 Matt 10
1 James 9
2 Nina 8
Emmett’s Biggest Opps
VoterName Votes
0 Audra -1
1 Shaunak 1
2 Dennis 2

James’s Biggest Fans
VoterName Votes
0 Dennis 11
1 Matt 7
2 Nina 6
James’s Biggest Opps
VoterName Votes
0 Sierra -1
1 Nicolette -1
2 Angelica -1

Judy’s Biggest Fans
VoterName Votes
0 Matt 7
1 Julia 5
2 Audra 5
Judy’s Biggest Opps
VoterName Votes
0 Alex -4
1 Nina -3
2 Emmett -2

Julia’s Biggest Fans
VoterName Votes
0 Nicolette 8
1 Brendan 8
2 Emmett 7
Julia’s Biggest Opps
VoterName Votes
0 Audra -3
1 Matt -2
2 James 3

Matt’s Biggest Fans
VoterName Votes
0 Audra 23
1 Emmett 7
2 Alex 6
Matt’s Biggest Opps
VoterName Votes
0 Brendan -2
1 Angelica -2
2 Nicolette 1

Nicolette’s Biggest Fans
VoterName Votes
0 Pablo 18
1 Audra 13
2 Julia 9
Nicolette’s Biggest Opps
VoterName Votes
0 Matt -2
1 Brendan 0
2 Alex 1

Nina’s Biggest Fans
VoterName Votes
0 Emmett 7
1 Pablo 7
2 Nicolette 5
Nina’s Biggest Opps
VoterName Votes
0 Julia -3
1 Sierra -1
2 Dennis -1

Pablo’s Biggest Fans
VoterName Votes
0 Nicolette 17
1 Julia 7
2 James 6
Pablo’s Biggest Opps
VoterName Votes
0 Sammi -6
1 Audra -1
2 Alex 0

Sammi’s Biggest Fans
VoterName Votes
0 Dennis 9
1 Sierra 8
2 Shaunak 7
Sammi’s Biggest Opps
VoterName Votes
0 Nicolette -3
1 Pablo 0
2 Matt 1

Shaunak’s Biggest Fans
VoterName Votes
0 Brendan 9
1 Dennis 9
2 Julia 8
Shaunak’s Biggest Opps
VoterName Votes
0 Judy -2
1 Matt -1
2 Audra -1

Sierra’s Biggest Fans
VoterName Votes
0 Brendan 12
1 Nicolette 11
2 Sammi 10
Sierra’s Biggest Opps
VoterName Votes
0 James -1
1 Shaunak 0
2 Dennis 1