Gapminders#
In his Gapminder example during thes 2006 TED Talk, Hans Rosling debunked stereotypes about developed and undeveloped countries using statistics and data visualization, revealing the nuanced reality of global development. We will be recreating this example using four different plotting libraries (Matplotlib, Plotly, Vega-Altair, hvPlot, which will be controlled by widgets from Panel.
We’ll being by importing the packages needed.
import numpy as np
import pandas as pd
import panel as pn
import altair as alt
import plotly.graph_objs as go
import plotly.io as pio
import matplotlib.pyplot as plt
import matplotlib as mpl
import hvplot.pandas # noqa
import warnings
warnings.simplefilter('ignore')
pn.extension('vega', 'plotly', defer_load=True, sizing_mode="stretch_width")
mpl.use('agg')
Let’s also define some constant variables for our plots.
XLABEL = 'GDP per capita (2000 dollars)'
YLABEL = 'Life expectancy (years)'
YLIM = (20, 90)
HEIGHT=500 # pixels
WIDTH=500 # pixels
ACCENT="#D397F8"
PERIOD = 1000 # miliseconds
Extract the dataset#
First, we’ll get the data into a Pandas dataframe.
dataset = pd.read_csv('./data/gapminders.csv')
dataset.sample(10)
country | year | pop | continent | lifeExp | gdpPercap | |
---|---|---|---|---|---|---|
97 | Bangladesh | 1957 | 51365468.0 | Asia | 39.348 | 661.637458 |
1428 | Sri Lanka | 1952 | 7982342.0 | Asia | 57.593 | 1083.532030 |
1445 | Sudan | 1977 | 17104986.0 | Africa | 47.800 | 2202.988423 |
871 | Lebanon | 1987 | 3089353.0 | Asia | 67.926 | 5377.091329 |
845 | Korea, Rep. | 1977 | 36436000.0 | Asia | 64.766 | 4657.221020 |
811 | Jordan | 1987 | 2820042.0 | Asia | 65.869 | 4448.679912 |
1599 | United Kingdom | 1967 | 54959000.0 | Europe | 71.360 | 14142.850890 |
39 | Angola | 1967 | 5247469.0 | Africa | 35.985 | 5522.776375 |
977 | Mauritius | 1977 | 913025.0 | Africa | 64.930 | 3710.982963 |
1316 | Saudi Arabia | 1992 | 16945857.0 | Asia | 68.768 | 24841.617770 |
We’ll also create a constant variable YEARS
containing all the unique years in our dataset.
YEARS = [int(year) for year in dataset.year.unique()]
YEARS
[1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002, 2007]
Transform the dataset to plots#
Now let’s define helper functions and functions to plot this dataset with Matplotlib, Plotly, Altair, and hvPlot (using HoloViews and Bokeh).
def get_data(year):
df = dataset[(dataset.year==year) & (dataset.gdpPercap < 10000)].copy()
df['size'] = np.sqrt(df['pop']*2.666051223553066e-05)
df['size_hvplot'] = df['size']*6
return df
def get_title(library, year):
return f"{library}: Life expectancy vs. GDP, {year}"
def get_xlim(data):
return (dataset['gdpPercap'].min()-100, dataset[dataset['gdpPercap'] < 10000].max()['gdpPercap']+1000)
Let’s define the Matplotlib plotting function.
def mpl_view(year=1952, show_legend=True):
data = get_data(year)
title = get_title("Matplotlib", year)
xlim = get_xlim(data)
plot = plt.figure(figsize=(10, 8), facecolor=(0, 0, 0, 0))
ax = plot.add_subplot(111)
ax.set_xscale("log")
ax.set_title(title)
ax.set_xlabel(XLABEL)
ax.set_ylabel(YLABEL)
ax.set_ylim(YLIM)
ax.set_xlim(xlim)
for continent, df in data.groupby('continent'):
ax.scatter(df.gdpPercap, y=df.lifeExp, s=df['size']*5,
edgecolor='black', label=continent)
if show_legend:
ax.legend(loc=4)
plt.close(plot)
return plot
mpl_view(1952, True)
<Figure size 1000x800 with 1 Axes>
Let’s define the Plotly plotting function.
pio.templates.default = None
def plotly_view(year=1952, show_legend=True):
data = get_data(year)
title = get_title("Plotly", year)
xlim = get_xlim(data)
ylim = YLIM
traces = []
for continent, df in data.groupby('continent'):
marker=dict(symbol='circle', sizemode='area', sizeref=0.1, size=df['size'], line=dict(width=2))
traces.append(go.Scatter(x=df.gdpPercap, y=df.lifeExp, mode='markers', marker=marker, name=continent, text=df.country))
axis_opts = dict(gridcolor='rgb(255, 255, 255)', zerolinewidth=1, ticklen=5, gridwidth=2)
layout = go.Layout(
title=title, showlegend=show_legend,
xaxis=dict(title=XLABEL, type='linear', range=xlim, **axis_opts),
yaxis=dict(title=YLABEL, range=ylim, **axis_opts),
autosize=True, paper_bgcolor='rgba(0,0,0,0)',
)
return go.Figure(data=traces, layout=layout)
plotly_view()