Gapminders#

In his Gapminder example during thes 2006 TED Talk, Hans Rosling debunked stereotypes about developed and undeveloped countries using statistics and data visualization, revealing the nuanced reality of global development. We will be recreating this example using four different plotting libraries (Matplotlib, Plotly, Vega-Altair, hvPlot, which will be controlled by widgets from Panel.

Gapminder app with 4 plots

We’ll being by importing the packages needed.

import numpy as np 
import pandas as pd
import panel as pn

import altair as alt
import plotly.graph_objs as go
import plotly.io as pio
import matplotlib.pyplot as plt
import matplotlib as mpl
import hvplot.pandas  # noqa
import warnings

warnings.simplefilter('ignore')
pn.extension('vega', 'plotly', defer_load=True, sizing_mode="stretch_width")
mpl.use('agg')

Let’s also define some constant variables for our plots.

XLABEL = 'GDP per capita (2000 dollars)'
YLABEL = 'Life expectancy (years)'
YLIM = (20, 90)
HEIGHT=500 # pixels
WIDTH=500 # pixels
ACCENT="#D397F8"
PERIOD = 1000 # miliseconds

Extract the dataset#

First, we’ll get the data into a Pandas dataframe.

dataset = pd.read_csv('./data/gapminders.csv')
dataset.sample(10)
country year pop continent lifeExp gdpPercap
97 Bangladesh 1957 51365468.0 Asia 39.348 661.637458
1428 Sri Lanka 1952 7982342.0 Asia 57.593 1083.532030
1445 Sudan 1977 17104986.0 Africa 47.800 2202.988423
871 Lebanon 1987 3089353.0 Asia 67.926 5377.091329
845 Korea, Rep. 1977 36436000.0 Asia 64.766 4657.221020
811 Jordan 1987 2820042.0 Asia 65.869 4448.679912
1599 United Kingdom 1967 54959000.0 Europe 71.360 14142.850890
39 Angola 1967 5247469.0 Africa 35.985 5522.776375
977 Mauritius 1977 913025.0 Africa 64.930 3710.982963
1316 Saudi Arabia 1992 16945857.0 Asia 68.768 24841.617770

We’ll also create a constant variable YEARS containing all the unique years in our dataset.

YEARS = [int(year) for year in dataset.year.unique()]
YEARS
[1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002, 2007]

Transform the dataset to plots#

Now let’s define helper functions and functions to plot this dataset with Matplotlib, Plotly, Altair, and hvPlot (using HoloViews and Bokeh).

def get_data(year):
    df = dataset[(dataset.year==year) & (dataset.gdpPercap < 10000)].copy()
    df['size'] = np.sqrt(df['pop']*2.666051223553066e-05)
    df['size_hvplot'] = df['size']*6
    return df

def get_title(library, year):
    return f"{library}: Life expectancy vs. GDP, {year}"

def get_xlim(data):
    return (dataset['gdpPercap'].min()-100, dataset[dataset['gdpPercap'] < 10000].max()['gdpPercap']+1000)

Let’s define the Matplotlib plotting function.

def mpl_view(year=1952, show_legend=True):
    data = get_data(year)
    title = get_title("Matplotlib", year)
    xlim = get_xlim(data)

    plot = plt.figure(figsize=(10, 8), facecolor=(0, 0, 0, 0))
    ax = plot.add_subplot(111)
    ax.set_xscale("log")
    ax.set_title(title)
    ax.set_xlabel(XLABEL)
    ax.set_ylabel(YLABEL)
    ax.set_ylim(YLIM)
    ax.set_xlim(xlim)

    for continent, df in data.groupby('continent'):
        ax.scatter(df.gdpPercap, y=df.lifeExp, s=df['size']*5,
                   edgecolor='black', label=continent)

    if show_legend:
        ax.legend(loc=4)

    plt.close(plot)
    return plot

mpl_view(1952, True)
<Figure size 1000x800 with 1 Axes>

Let’s define the Plotly plotting function.

pio.templates.default = None

def plotly_view(year=1952, show_legend=True):
    data = get_data(year)
    title = get_title("Plotly", year)
    xlim = get_xlim(data)
    ylim = YLIM
    traces = []
    for continent, df in data.groupby('continent'):
        marker=dict(symbol='circle', sizemode='area', sizeref=0.1, size=df['size'], line=dict(width=2))
        traces.append(go.Scatter(x=df.gdpPercap, y=df.lifeExp, mode='markers', marker=marker, name=continent, text=df.country))

    axis_opts = dict(gridcolor='rgb(255, 255, 255)', zerolinewidth=1, ticklen=5, gridwidth=2)
    layout = go.Layout(
        title=title, showlegend=show_legend,
        xaxis=dict(title=XLABEL, type='linear', range=xlim, **axis_opts),
        yaxis=dict(title=YLABEL, range=ylim, **axis_opts),
        autosize=True, paper_bgcolor='rgba(0,0,0,0)',
    )
    return go.Figure(data=traces, layout=layout)

plotly_view()