Violin Plots in Python/v3

How to make Violin Plots in Python with Plotly. A Violin Plot is a plot of numeric data with probability distributions drawn on both sides on the plotted data.


Note: this page is part of the documentation for version 3 of Plotly.py, which is not the most recent version.
See our Version 4 Migration Guide for information about how to upgrade.

New to Plotly?

Plotly's Python library is free and open source! Get started by downloading the client and reading the primer.
You can set up Plotly to work in online or offline mode, or in jupyter notebooks.
We also have a quick-reference cheatsheet (new!) to help you get started!

Version Check

Note: Violin Plots are available in version 1.12.1+
Run pip install plotly --upgrade to update your Plotly version.

In [1]:
import plotly
plotly.__version__
Out[1]:
'2.4.1'

One Violin

In [2]:
import plotly.plotly as py
import plotly.figure_factory as ff
import plotly.graph_objs as go

import numpy as np
from scipy import stats

data_list = np.random.randn(100)
data_list.tolist()

fig = ff.create_violin(data_list, colors='#604d9e')
py.iplot(fig, filename='One Violin')
Out[2]:

Multiple Violins

In [3]:
import plotly.plotly as py
import plotly.figure_factory as ff
import plotly.graph_objs as go

import numpy as np
import pandas as pd
from scipy import stats

np.random.seed(619517)
Nr = 250
y = np.random.randn(Nr)
gr = np.random.choice(list("ABCDE"), Nr)
norm_params = [(0, 1.2), (0.7, 1), (-0.5, 1.4), (0.3, 1), (0.8, 0.9)]

for i, letter in enumerate("ABCDE"):
    y[gr == letter] *= norm_params[i][1] + norm_params[i][0]
df = pd.DataFrame(dict(Score = y, Group = gr))

fig = ff.create_violin(df, data_header='Score', group_header='Group',
                       height=500, width=800)
py.iplot(fig, filename='Multiple Violins')
Out[3]:

Violin Plots with Colorscale

In [4]:
import plotly.plotly as py
import plotly.figure_factory as ff
import plotly.graph_objs as go

import numpy as np
import pandas as pd
from scipy import stats

np.random.seed(619517)
Nr = 250
y = np.random.randn(Nr)
gr = np.random.choice(list("ABCDE"), Nr)
norm_params = [(0, 1.2), (0.7, 1), (-0.5, 1.4), (0.3, 1), (0.8, 0.9)]

for i, letter in enumerate("ABCDE"):
    y[gr == letter] *= norm_params[i][1] + norm_params[i][0]
df = pd.DataFrame(dict(Score = y, Group = gr))

data_header = 'Score'
group_header = 'Group'

group_stats = {}
groupby_data = df.groupby([group_header])

for group in "ABCDE":
    data_from_group = groupby_data.get_group(group)[data_header]
    stat = np.median(data_from_group)
    group_stats[group] = stat

fig = ff.create_violin(df, data_header='Score', group_header='Group',
                       colors='YlOrRd', height=500, width=800,
                       use_colorscale=True, group_stats=group_stats)
py.iplot(fig, filename='Violin Plots with Colorscale')
Out[4]:

Violin Plots with Dictionary Colors

In [5]:
import plotly.plotly as py
import plotly.figure_factory as ff
import plotly.graph_objs as go

import numpy as np
import pandas as pd
from scipy import stats

np.random.seed(619517)
Nr = 250
y = np.random.randn(Nr)
gr = np.random.choice(list("ABCDE"), Nr)
norm_params = [(0, 1.2), (0.7, 1), (-0.5, 1.4), (0.3, 1), (0.8, 0.9)]

for i, letter in enumerate("ABCDE"):
    y[gr == letter] *= norm_params[i][1] + norm_params[i][0]
df = pd.DataFrame(dict(Score = y, Group = gr))

data_header = 'Score'
group_header = 'Group'

colors_dict = dict(A = 'rgb(25, 200, 120)',
                   B = '#aa6ff60',
                   C = (0.3, 0.7, 0.3),
                   D = 'rgb(175, 25, 122)',
                   E = 'rgb(255, 150, 226)')

fig = ff.create_violin(df, data_header='Score', group_header='Group',
                       colors=colors_dict, height=500, width=800,
                       use_colorscale=False)
py.iplot(fig, filename='Violin Plots with Dictionary Colors')
Out[5]:

Reference

In [7]:
help(ff.create_violin)
Help on function create_violin in module plotly.figure_factory._violin:

create_violin(data, data_header=None, group_header=None, colors=None, use_colorscale=False, group_stats=None, rugplot=True, sort=False, height=450, width=600, title='Violin and Rug Plot')
    Returns figure for a violin plot

    :param (list|array) data: accepts either a list of numerical values,
        a list of dictionaries all with identical keys and at least one
        column of numeric values, or a pandas dataframe with at least one
        column of numbers.
    :param (str) data_header: the header of the data column to be used
        from an inputted pandas dataframe. Not applicable if 'data' is
        a list of numeric values.
    :param (str) group_header: applicable if grouping data by a variable.
        'group_header' must be set to the name of the grouping variable.
    :param (str|tuple|list|dict) colors: either a plotly scale name,
        an rgb or hex color, a color tuple, a list of colors or a
        dictionary. An rgb color is of the form 'rgb(x, y, z)' where
        x, y and z belong to the interval [0, 255] and a color tuple is a
        tuple of the form (a, b, c) where a, b and c belong to [0, 1].
        If colors is a list, it must contain valid color types as its
        members.
    :param (bool) use_colorscale: only applicable if grouping by another
        variable. Will implement a colorscale based on the first 2 colors
        of param colors. This means colors must be a list with at least 2
        colors in it (Plotly colorscales are accepted since they map to a
        list of two rgb colors). Default = False
    :param (dict) group_stats: a dictioanry where each key is a unique
        value from the group_header column in data. Each value must be a
        number and will be used to color the violin plots if a colorscale
        is being used.
    :param (bool) rugplot: determines if a rugplot is draw on violin plot.
        Default = True
    :param (bool) sort: determines if violins are sorted
        alphabetically (True) or by input order (False). Default = False
    :param (float) height: the height of the violin plot.
    :param (float) width: the width of the violin plot.
    :param (str) title: the title of the violin plot.

    Example 1: Single Violin Plot
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_violin
    from plotly.graph_objs import graph_objs

    import numpy as np
    from scipy import stats

    # create list of random values
    data_list = np.random.randn(100)
    data_list.tolist()

    # create violin fig
    fig = create_violin(data_list, colors='#604d9e')

    # plot
    py.iplot(fig, filename='Violin Plot')
    ```

    Example 2: Multiple Violin Plots with Qualitative Coloring
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_violin
    from plotly.graph_objs import graph_objs

    import numpy as np
    import pandas as pd
    from scipy import stats

    # create dataframe
    np.random.seed(619517)
    Nr=250
    y = np.random.randn(Nr)
    gr = np.random.choice(list("ABCDE"), Nr)
    norm_params=[(0, 1.2), (0.7, 1), (-0.5, 1.4), (0.3, 1), (0.8, 0.9)]

    for i, letter in enumerate("ABCDE"):
        y[gr == letter] *=norm_params[i][1]+ norm_params[i][0]
    df = pd.DataFrame(dict(Score=y, Group=gr))

    # create violin fig
    fig = create_violin(df, data_header='Score', group_header='Group',
                        sort=True, height=600, width=1000)

    # plot
    py.iplot(fig, filename='Violin Plot with Coloring')
    ```

    Example 3: Violin Plots with Colorscale
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_violin
    from plotly.graph_objs import graph_objs

    import numpy as np
    import pandas as pd
    from scipy import stats

    # create dataframe
    np.random.seed(619517)
    Nr=250
    y = np.random.randn(Nr)
    gr = np.random.choice(list("ABCDE"), Nr)
    norm_params=[(0, 1.2), (0.7, 1), (-0.5, 1.4), (0.3, 1), (0.8, 0.9)]

    for i, letter in enumerate("ABCDE"):
        y[gr == letter] *=norm_params[i][1]+ norm_params[i][0]
    df = pd.DataFrame(dict(Score=y, Group=gr))

    # define header params
    data_header = 'Score'
    group_header = 'Group'

    # make groupby object with pandas
    group_stats = {}
    groupby_data = df.groupby([group_header])

    for group in "ABCDE":
        data_from_group = groupby_data.get_group(group)[data_header]
        # take a stat of the grouped data
        stat = np.median(data_from_group)
        # add to dictionary
        group_stats[group] = stat

    # create violin fig
    fig = create_violin(df, data_header='Score', group_header='Group',
                        height=600, width=1000, use_colorscale=True,
                        group_stats=group_stats)

    # plot
    py.iplot(fig, filename='Violin Plot with Colorscale')
    ```