Scatterplot Matrix in Python/v3

How to make scatterplot-matrix plots in Python with Plotly.


Note: this page is part of the documentation for version 3 of Plotly.py, which is not the most recent version.
See our Version 4 Migration Guide for information about how to upgrade.

New to Plotly?¶

Plotly's Python library is free and open source! Get started by downloading the client and reading the primer.
You can set up Plotly to work in online or offline mode, or in jupyter notebooks.
We also have a quick-reference cheatsheet (new!) to help you get started!

Version Check¶

Note: Scatterplot Matrix is available in version 1.9.11+
Run pip install plotly --upgrade to update your Plotly version

In [1]:
import plotly
plotly.__version__
Out[1]:
'2.4.1'

Basic Scatterplot Matrix¶

In [2]:
import plotly.plotly as py
import plotly.figure_factory as ff

import numpy as np
import pandas as pd

dataframe = pd.DataFrame(np.random.randn(10, 2),
                         columns=['Column A', 'Column B'])

fig = ff.create_scatterplotmatrix(dataframe, height=800, width=800)
py.iplot(fig, filename='Basic Scatterplot Matrix')
Out[2]:

Index a Column¶

In [3]:
import plotly.plotly as py
import plotly.figure_factory as ff

import numpy as np
import pandas as pd

dataframe = pd.DataFrame(np.random.randn(10, 2),
                         columns=['Column A', 'Column B'])

dataframe['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple',
                                'grape', 'pear', 'pear', 'apple', 'pear'])

fig = ff.create_scatterplotmatrix(dataframe, index='Fruit', size=10, height=800, width=800)
py.iplot(fig, filename = 'Index a Column')
Out[3]:

Box Plots along Diagonal¶

In [4]:
import plotly.plotly as py
import plotly.figure_factory as ff

import numpy as np
import pandas as pd

dataframe = pd.DataFrame(np.random.randn(10, 4),
                         columns=['Column A', 'Column B', 'Column C', 'Column D'])

dataframe['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple',
                                'grape', 'pear', 'pear', 'apple', 'pear'])


fig = ff.create_scatterplotmatrix(dataframe, diag='box', index='Fruit',
                                  height=800, width=800)
py.iplot(fig, filename='Box plots along Diagonal Subplots')
Out[4]:

Histograms along Diagonal¶

In [5]:
import plotly.plotly as py
import plotly.figure_factory as ff

import numpy as np
import pandas as pd

dataframe = pd.DataFrame(np.random.randn(20, 4),
                         columns=['Column A', 'Column B', 'Column C', 'Column D'])

dataframe['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple',
                                'grape', 'pear', 'pear', 'apple', 'pear',
                                'apple', 'apple', 'grape', 'apple', 'apple',
                                'grape', 'pear', 'pear', 'apple', 'pear'])


fig = ff.create_scatterplotmatrix(dataframe, diag='histogram', index='Fruit',
                                  height=800, width=800)
py.iplot(fig, filename='Histograms along Diagonal Subplots')
Out[5]:

Sequential Colormap¶

In [6]:
import plotly.plotly as py
import plotly.figure_factory as ff

import numpy as np
import pandas as pd

dataframe = pd.DataFrame(np.random.randn(100, 3),
                         columns=['Column A', 'Column B', 'Column C'])

fig = ff.create_scatterplotmatrix(dataframe, diag='histogram',index='Column A',
                                  colormap='Blues', height=800, width=800)
py.iplot(fig, filename = 'Use a Sequential Colormap')
Out[6]:

Custom Sequential Colormap¶

In [7]:
import plotly.plotly as py
import plotly.figure_factory as ff

import numpy as np
import pandas as pd

dataframe = pd.DataFrame(np.random.randn(100, 3),
                         columns=['Column A', 'Column B', 'Column C'])

fig = ff.create_scatterplotmatrix(dataframe, diag='histogram', index='Column A',
                                  colormap=['rgb(100, 150, 255)', '#F0963C', 'rgb(51, 255, 153)'],
                                  colormap_type='seq', height=800, width=800)
py.iplot(fig, filename = 'Custom Sequential Colormap')
Out[7]:

Partition Numeric Data into Intervals¶

In [8]:
import plotly.plotly as py
import plotly.figure_factory as FF

import numpy as np
import pandas as pd

dataframe = pd.DataFrame(np.random.randn(100, 3),
                         columns=['Column A', 'Column B', 'Column C'])

fig = FF.create_scatterplotmatrix(dataframe, diag='box', index='Column A',
                                  colormap='Portland', colormap_type='seq',
                                  endpts=[-1, 0, 1],
                                  height=800, width=800, size=12,
                                  marker=dict(symbol=25))
py.iplot(fig, filename = 'Partition Numeric Data into Intervals')
Out[8]:

Categorical Colormap¶

In [9]:
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff

import numpy as np
import pandas as pd
import random

dataframe = pd.DataFrame(np.random.randn(100, 2),
                         columns=['Column A', 'Column B'])

new_column = []
fruits = ['apple', 'blueberry', 'banana', 'orange', 'raspberry']
for j in range(100):
    new_column.append(random.choice(fruits))
dataframe['Fruits'] = pd.Series(new_column, index=dataframe.index)

fig = ff.create_scatterplotmatrix(dataframe, index='Fruits', diag='histogram',
                                  colormap= ['#d95f0e', (0.2, 0.6, 1), 'rgb(230,247,188)', '#bcbddc', (0.8, 0.7, 0.65)],
                                  colormap_type='cat',
                                  height=800, width=800,
                                  size=15, marker=dict(symbol='square-open'))
py.iplot(fig, filename = 'Use a Categorical Colormap')
Out[9]:

Colormap as a Dictionary¶

In [10]:
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff

import numpy as np
import pandas as pd
import random

dataframe = pd.DataFrame(np.random.randn(100, 3),
                  columns=['Column A', 'Column B', 'Column C'])

new_column = []
strange_colors = ['turquoise', 'limegreen', 'goldenrod']
for j in range(100):
    new_column.append(random.choice(strange_colors))
dataframe['Colors'] = pd.Series(new_column, index=dataframe.index)

fig = ff.create_scatterplotmatrix(dataframe, diag='box', index='Colors',
                                  colormap= dict(
                                      turquoise = '#00F5FF',
                                      limegreen = '#32CD32',
                                      goldenrod = '#DAA520'
                                  ),
                                  colormap_type='cat',
                                  height=800, width=800)
py.iplot(fig, filename = 'Colormap as a Dictionary')
Out[10]:

Reference¶

In [11]:
help(ff.create_scatterplotmatrix)
Help on function create_scatterplotmatrix in module plotly.figure_factory._scatterplot:

create_scatterplotmatrix(df, index=None, endpts=None, diag='scatter', height=500, width=500, size=6, title='Scatterplot Matrix', colormap=None, colormap_type='cat', dataframe=None, headers=None, index_vals=None, **kwargs)
    Returns data for a scatterplot matrix.

    :param (array) df: array of the data with column headers
    :param (str) index: name of the index column in data array
    :param (list|tuple) endpts: takes an increasing sequece of numbers
        that defines intervals on the real line. They are used to group
        the entries in an index of numbers into their corresponding
        interval and therefore can be treated as categorical data
    :param (str) diag: sets the chart type for the main diagonal plots.
        The options are 'scatter', 'histogram' and 'box'.
    :param (int|float) height: sets the height of the chart
    :param (int|float) width: sets the width of the chart
    :param (float) size: sets the marker size (in px)
    :param (str) title: the title label of the scatterplot matrix
    :param (str|tuple|list|dict) colormap: either a plotly scale name,
        an rgb or hex color, a color tuple, a list of colors or a
        dictionary. An rgb color is of the form 'rgb(x, y, z)' where
        x, y and z belong to the interval [0, 255] and a color tuple is a
        tuple of the form (a, b, c) where a, b and c belong to [0, 1].
        If colormap is a list, it must contain valid color types as its
        members.
        If colormap is a dictionary, all the string entries in
        the index column must be a key in colormap. In this case, the
        colormap_type is forced to 'cat' or categorical
    :param (str) colormap_type: determines how colormap is interpreted.
        Valid choices are 'seq' (sequential) and 'cat' (categorical). If
        'seq' is selected, only the first two colors in colormap will be
        considered (when colormap is a list) and the index values will be
        linearly interpolated between those two colors. This option is
        forced if all index values are numeric.
        If 'cat' is selected, a color from colormap will be assigned to
        each category from index, including the intervals if endpts is
        being used
    :param (dict) **kwargs: a dictionary of scatterplot arguments
        The only forbidden parameters are 'size', 'color' and
        'colorscale' in 'marker'

    Example 1: Vanilla Scatterplot Matrix
    ```
    import plotly.plotly as py
    from plotly.graph_objs import graph_objs
    from plotly.figure_factory import create_scatterplotmatrix

    import numpy as np
    import pandas as pd

    # Create dataframe
    df = pd.DataFrame(np.random.randn(10, 2),
                    columns=['Column 1', 'Column 2'])

    # Create scatterplot matrix
    fig = create_scatterplotmatrix(df)

    # Plot
    py.iplot(fig, filename='Vanilla Scatterplot Matrix')
    ```

    Example 2: Indexing a Column
    ```
    import plotly.plotly as py
    from plotly.graph_objs import graph_objs
    from plotly.figure_factory import create_scatterplotmatrix

    import numpy as np
    import pandas as pd

    # Create dataframe with index
    df = pd.DataFrame(np.random.randn(10, 2),
                       columns=['A', 'B'])

    # Add another column of strings to the dataframe
    df['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple',
                             'grape', 'pear', 'pear', 'apple', 'pear'])

    # Create scatterplot matrix
    fig = create_scatterplotmatrix(df, index='Fruit', size=10)

    # Plot
    py.iplot(fig, filename = 'Scatterplot Matrix with Index')
    ```

    Example 3: Styling the Diagonal Subplots
    ```
    import plotly.plotly as py
    from plotly.graph_objs import graph_objs
    from plotly.figure_factory import create_scatterplotmatrix

    import numpy as np
    import pandas as pd

    # Create dataframe with index
    df = pd.DataFrame(np.random.randn(10, 4),
                       columns=['A', 'B', 'C', 'D'])

    # Add another column of strings to the dataframe
    df['Fruit'] = pd.Series(['apple', 'apple', 'grape', 'apple', 'apple',
                             'grape', 'pear', 'pear', 'apple', 'pear'])

    # Create scatterplot matrix
    fig = create_scatterplotmatrix(df, diag='box', index='Fruit', height=1000,
                                   width=1000)

    # Plot
    py.iplot(fig, filename = 'Scatterplot Matrix - Diagonal Styling')
    ```

    Example 4: Use a Theme to Style the Subplots
    ```
    import plotly.plotly as py
    from plotly.graph_objs import graph_objs
    from plotly.figure_factory import create_scatterplotmatrix

    import numpy as np
    import pandas as pd

    # Create dataframe with random data
    df = pd.DataFrame(np.random.randn(100, 3),
                       columns=['A', 'B', 'C'])

    # Create scatterplot matrix using a built-in
    # Plotly palette scale and indexing column 'A'
    fig = create_scatterplotmatrix(df, diag='histogram', index='A',
                                   colormap='Blues', height=800, width=800)

    # Plot
    py.iplot(fig, filename = 'Scatterplot Matrix - Colormap Theme')
    ```

    Example 5: Example 4 with Interval Factoring
    ```
    import plotly.plotly as py
    from plotly.graph_objs import graph_objs
    from plotly.figure_factory import create_scatterplotmatrix

    import numpy as np
    import pandas as pd

    # Create dataframe with random data
    df = pd.DataFrame(np.random.randn(100, 3),
                       columns=['A', 'B', 'C'])

    # Create scatterplot matrix using a list of 2 rgb tuples
    # and endpoints at -1, 0 and 1
    fig = create_scatterplotmatrix(df, diag='histogram', index='A',
                                   colormap=['rgb(140, 255, 50)',
                                             'rgb(170, 60, 115)', '#6c4774',
                                             (0.5, 0.1, 0.8)],
                                   endpts=[-1, 0, 1], height=800, width=800)

    # Plot
    py.iplot(fig, filename = 'Scatterplot Matrix - Intervals')
    ```

    Example 6: Using the colormap as a Dictionary
    ```
    import plotly.plotly as py
    from plotly.graph_objs import graph_objs
    from plotly.figure_factory import create_scatterplotmatrix

    import numpy as np
    import pandas as pd
    import random

    # Create dataframe with random data
    df = pd.DataFrame(np.random.randn(100, 3),
                       columns=['Column A',
                                'Column B',
                                'Column C'])

    # Add new color column to dataframe
    new_column = []
    strange_colors = ['turquoise', 'limegreen', 'goldenrod']

    for j in range(100):
        new_column.append(random.choice(strange_colors))
    df['Colors'] = pd.Series(new_column, index=df.index)

    # Create scatterplot matrix using a dictionary of hex color values
    # which correspond to actual color names in 'Colors' column
    fig = create_scatterplotmatrix(
        df, diag='box', index='Colors',
        colormap= dict(
            turquoise = '#00F5FF',
            limegreen = '#32CD32',
            goldenrod = '#DAA520'
        ),
        colormap_type='cat',
        height=800, width=800
    )

    # Plot
    py.iplot(fig, filename = 'Scatterplot Matrix - colormap dictionary ')
    ```