Matplotlib is a commonly used visualization package embedded in Python. In recent years, however, the interface and style of Matplotlib have begun to show their age. Newer tools like ggplot and ggvis in the R language, along with web visualization toolkits based on D3js and HTML5 canvas, often make Matplotlib feel clunky and old-fashioned. Other data visualization tools include PowerBI for business data analysis, Boken, Seaborn, ggpy for interactive data visualization are gradually adopted by data scientists to be used on a daily basis.
This work collects some commonly used templates for creating visualizations using Matplotlib and Seaborn.
strength: a well-tested, cross-platform graphics engine
weakness: clunky and old-fashioned compared to newer packages
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
print(plt.style.available) # list all available styles
plt.style.use('classic')
font = {'family': 'normal',
'weight': 'normal',
'size': 22}
mpl.rc('font', **font)
mpl.rcParams["figure.figsize"] = [12,8]
mpl.rcParams['lines.linewidth'] = 2
# plt.style.use('ggplot')
# plt.rcParams.update({'font.size': 18.0,
# 'xtick.labelsize': 18.0,
# 'ytick.labelsize': 18.0,
# 'axes.labelsize': 18.0})
x = np.linspace(0, 10, 100)
#plt.figure(figsize=(12,8))
plt.title('Sin(x) and Cos(x) curve')
plt.plot(x, np.sin(x), '-', marker='o', markersize=12, linewidth=2, c='g')
plt.plot(x, np.cos(x), '--', marker='*',markersize=12, linewidth=2, c='b')
# plt.xticks(old_ticklabel, new_ticklabel, rotation = 15) change tick labels
plt.xlabel('x')
plt.ylabel('y')
plt.grid(True)
plt.show()
# maker: https://matplotlib.org/api/markers_api.html, o, *, ^, v, <, >, 1-8
# plot line style: https://matplotlib.org/gallery/lines_bars_and_markers/line_styles_reference.html :, -., --, -
x = np.linspace(0, 10, 100)
#plt.figure(figsize=(12,8))
plt.title('Sin(x) and Cos(x) curve')
plt.scatter(x, np.sin(x), s = 400, marker='o', linewidth=2, c='r', alpha = 0.5) # s = size in points^2
plt.scatter(x, np.cos(x), s = 400, marker='*', linewidth=2, c='g', alpha = 0.5)
# plt.xticks(old_ticklabel, new_ticklabel, rotation = 15) change tick labels
plt.xlabel('x')
plt.ylabel('y')
plt.grid(True)
plt.show()
# adjust scatter plot marker size
x = [0, 2, 4, 6, 8, 10]
y = [0]*len(x)
s = [20*4**n for n in range(len(x))]
plt.scatter(x, y, s=s, c='r')
plt.show()
x = np.linspace(0, 10, 10)
y = np.logspace(2.0, 3.0, 10)
#plt.figure(figsize=(12,8))
plt.bar(x, y, align = 'center', alpha=0.5, color = 'g')
# plt.xticks(x, label, rotation = 30)
plt.xlabel('Product Style and Color')
plt.ylabel('Overall Sales from Jan 2016 to Oct 2018')
plt.grid(True)
plt.show()
# color: https://matplotlib.org/api/_as_gen/matplotlib.pyplot.colors.html
# b, g, r, c, m, y, k, w
fig, ax = plt.subplots()
plt.figure(figsize=(12,8))
bar_width = 0.15
index = np.arange(5)
label = ['Rob', 'Lucy', 'Mary', 'Lili', 'Luna']
Q1 = np.linspace(0, 10, 5)
Q2 = np.linspace(10, 50, 5)
Q3 = np.linspace(50, 100, 5)
opacity = 0.4
q1 = ax.bar(index, Q1, bar_width, alpha=opacity, color='b', label='Q1')
q2 = ax.bar(index + bar_width, Q2, bar_width, alpha=opacity, color='r', label='Q2')
q3 = ax.bar(index + bar_width*2, Q3, bar_width, alpha=opacity, color='g', label='Q3')
ax.set_xlabel('Year')
ax.set_ylabel('Sales Each Quarter (in 10k)')
ax.set_title('Predicted Sales from Jan 2016 to Sep 2019 By Quarter')
ax.set_xticks(index + bar_width)
ax.set_xticklabels(label)
ax.legend()
ax.text(0.35, 0.93, label, ha='center', va='center',transform=ax.transAxes)
ax.grid(True)
plt.show()
sizes = [100, 200, 500]
colors = ['pink', 'green', 'lightskyblue']
labels = ['neutral', 'positive', 'negative']
plt.pie(sizes, labels=labels, colors=colors, autopct="%1.1f%%", startangle = 90)
plt.title('Three Types of Comments on all US airlines')
plt.show()
MATLAB style, easy to plot but difficult to make changes once the data is plotted. plt.plot()
x1 = np.linspace(0, 10, 10)
y1 = np.logspace(2.0, 3.0, 10)
x2 = np.linspace(10, 20, 10)
y2 = np.logspace(20, 30, 10)
plt.figure(figsize=(12,8))
#
# create the first plot
plt.subplot(211)
plt.plot(x1, y1)
# create the second plot
plt.subplot(212)
plt.plot(x2, y2)
Object-oriented interface: more control over the figure ax.plot()
# first create a grid of plots, ax will be an array of two Axes objects
fig, ax = plt.subplots(2)
# call plot() method on the appropriate object
ax[0].plot(x, np.sin(x))
ax[1].plot(x, np.cos(x))
x = np.linspace(0, 10, 100)
fig = plt.figure()
plt.plot(x, np.sin(x), '-')
plt.plot(x, np.cos(x), '--')
# fig.savafig('my_figure.png')
fig.canvas.get_supported_filetypes()
import seaborn as sns
import pandas as pd
import numpy as pd
dataset = sns.load_dataset('titanic')
dataset.head()
sns.distplot(dataset['fare'])
sns.distplot(dataset['fare'], kde = False)
sns.distplot(dataset['fare'], kde = False, bins = 10)
sns.jointplot(x='age', y='fare', data=dataset)
hexagonal plot
sns.jointplot(x='age', y='fare', data=dataset, kind='hex')
dataset = dataset.dropna()
sns.pairplot(dataset, hue='sex') # separate by gender
sns.rugplot(dataset['fare'])
sns.barplot(x='sex', y='age', data=dataset)
import numpy as np
sns.barplot(x='sex', y='age', data=dataset, estimator = np.std)
sns.countplot(x='sex', data=dataset)
The box plot is used to display the distribution of the categorical data in the form of quartiles. The center of the box shows the median value. The value from the lower whisker to the bottom of the box shows the first quartile. From the bottom of the box to the middle of the box lies the second quartile. From the middle of the box to the top of the box lies the third quartile and finally from the top of the box to the top whisker lies the last quartile.
sns.boxplot(x='sex', y='age', data=dataset)
sns.boxplot(x='sex', y='age', data=dataset, hue="survived")
sns.violinplot(x='sex', y='age', data=dataset)
sns.violinplot(x='sex', y='age', data=dataset, hue='survived')
sns.violinplot(x='sex', y='age', data=dataset, hue='survived', split=True)
sns.stripplot(x='sex', y='age', data=dataset, jitter=True)
sns.stripplot(x='sex', y='age', data=dataset, jitter=False)
sns.stripplot(x='sex', y='age', data=dataset, jitter=True, hue='survived')
sns.stripplot(x='sex', y='age', data=dataset, jitter=True, hue='survived', split=True)
Interactive visualization using Seaborn to be explored.