PlotlyとCufflinks#
Warning
2024/05/09現在,cufflinksは3年前からメンテナンスが止まっているようです.そのため,このノートを参考にcufflinksを勉強し始める必要はありません.
このノートは記録のためだけに公開されています.
可視化プラットフォームPlotlyとpd.DataFrameから簡単にPlotlyを使うためのラッパーcufflinks#
Plotlyを使うことで,HTML,CSS, Javascriptなどの力を借りたインタラクティブなグラフを作成できます.PlotlyはそのままPythonライブラリとして公開されていますが,PandasのDataFrameから手軽にプロットできるcufflinksというライブラリが別途公開されているので,ここではこれの使い方を確認していきます.
インストール#
インストールは以下のCommand。
pip install plotly
pip install cufflinks
データの準備#
まずはデータを読み込みます。
import warnings
warnings.filterwarnings('ignore')
from sklearn import datasets
from sklearn.manifold import TSNE
import pandas as pd
import cufflinks as cf
import numpy as np
cf.go_offline()
digits = datasets.load_digits()
digits.data.shape
(1797, 64)
df = pd.DataFrame(digits.data)
df["label"] = digits["target"]
df.head()
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | label | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | 0.0 | 5.0 | 13.0 | 9.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 6.0 | 13.0 | 10.0 | 0.0 | 0.0 | 0.0 | 0 |
1 | 0.0 | 0.0 | 0.0 | 12.0 | 13.0 | 5.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 11.0 | 16.0 | 10.0 | 0.0 | 0.0 | 1 |
2 | 0.0 | 0.0 | 0.0 | 4.0 | 15.0 | 12.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 | 11.0 | 16.0 | 9.0 | 0.0 | 2 |
3 | 0.0 | 0.0 | 7.0 | 15.0 | 13.0 | 1.0 | 0.0 | 0.0 | 0.0 | 8.0 | ... | 0.0 | 0.0 | 0.0 | 7.0 | 13.0 | 13.0 | 9.0 | 0.0 | 0.0 | 3 |
4 | 0.0 | 0.0 | 0.0 | 1.0 | 11.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | 16.0 | 4.0 | 0.0 | 0.0 | 4 |
5 rows × 65 columns
cufflinksで2d scatter plot#
X2 = TSNE(n_components=2,verbose=True).fit_transform(df.values)
embed2d = pd.DataFrame(X2, columns=list("XY"))
embed2d["label"] = df["label"]
embed2d.head()
[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 1797 samples in 0.000s...
[t-SNE] Computed neighbors for 1797 samples in 0.319s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1797
[t-SNE] Computed conditional probabilities for sample 1797 / 1797
[t-SNE] Mean sigma: 11.619740
[t-SNE] KL divergence after 250 iterations with early exaggeration: 60.883373
[t-SNE] KL divergence after 1000 iterations: 0.741524
X | Y | label | |
---|---|---|---|
0 | -2.675452 | 57.514252 | 0 |
1 | 12.662912 | -9.085543 | 1 |
2 | -15.789229 | -18.999388 | 2 |
3 | -36.275799 | 5.813906 | 3 |
4 | 43.600700 | -9.102531 | 4 |
embed2d.iplot(
kind="scatter",
x="X", y="Y",
xTitle="X", yTitle="Y",
categories="label",
title="T-SNE 2D demo",
mode='markers',
)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[6], line 1
----> 1 embed2d.iplot(
2 kind="scatter",
3 x="X", y="Y",
4 xTitle="X", yTitle="Y",
5 categories="label",
6 title="T-SNE 2D demo",
7 mode='markers',
8 )
File ~/workspace/prpy/.venv/lib/python3.11/site-packages/cufflinks/plotlytools.py:798, in _iplot(self, kind, data, layout, filename, sharing, title, xTitle, yTitle, zTitle, theme, colors, colorscale, fill, width, dash, mode, interpolation, symbol, size, barmode, sortbars, bargap, bargroupgap, bins, histnorm, histfunc, orientation, boxpoints, annotations, keys, bestfit, bestfit_colors, mean, mean_colors, categories, x, y, z, text, gridcolor, zerolinecolor, margin, labels, values, secondary_y, secondary_y_title, subplots, shape, error_x, error_y, error_type, locations, lon, lat, asFrame, asDates, asFigure, asImage, dimensions, asPlot, asUrl, online, **kwargs)
796 else:
797 _size=size
--> 798 _data=Scatter3d(x=_x,y=_y,mode=mode,name=_,
799 marker=dict(color=colors[_],symbol=symbol,size=_size,opacity=opacity,
800 line=dict(width=width)),textfont=tools.getLayout(theme=theme)['xaxis']['titlefont'])
801 if '3d' in kind:
802 _data=Scatter3d(x=_x,y=_y,z=_z,mode=mode,name=_,
803 marker=dict(color=colors[_],symbol=symbol,size=_size,opacity=opacity,
804 line=dict(width=width)),textfont=tools.getLayout(theme=theme)['xaxis']['titlefont'])
File ~/workspace/prpy/.venv/lib/python3.11/site-packages/plotly/graph_objs/_scatter3d.py:2694, in Scatter3d.__init__(self, arg, connectgaps, customdata, customdatasrc, error_x, error_y, error_z, hoverinfo, hoverinfosrc, hoverlabel, hovertemplate, hovertemplatesrc, hovertext, hovertextsrc, ids, idssrc, legend, legendgroup, legendgrouptitle, legendrank, legendwidth, line, marker, meta, metasrc, mode, name, opacity, projection, scene, showlegend, stream, surfaceaxis, surfacecolor, text, textfont, textposition, textpositionsrc, textsrc, texttemplate, texttemplatesrc, uid, uirevision, visible, x, xcalendar, xhoverformat, xsrc, y, ycalendar, yhoverformat, ysrc, z, zcalendar, zhoverformat, zsrc, **kwargs)
2692 _v = name if name is not None else _v
2693 if _v is not None:
-> 2694 self["name"] = _v
2695 _v = arg.pop("opacity", None)
2696 _v = opacity if opacity is not None else _v
File ~/workspace/prpy/.venv/lib/python3.11/site-packages/plotly/basedatatypes.py:4874, in BasePlotlyType.__setitem__(self, prop, value)
4870 self._set_array_prop(prop, value)
4872 # ### Handle simple property ###
4873 else:
-> 4874 self._set_prop(prop, value)
4875 else:
4876 # Make sure properties dict is initialized
4877 self._init_props()
File ~/workspace/prpy/.venv/lib/python3.11/site-packages/plotly/basedatatypes.py:5218, in BasePlotlyType._set_prop(self, prop, val)
5216 return
5217 else:
-> 5218 raise err
5220 # val is None
5221 # -----------
5222 if val is None:
5223 # Check if we should send null update
File ~/workspace/prpy/.venv/lib/python3.11/site-packages/plotly/basedatatypes.py:5213, in BasePlotlyType._set_prop(self, prop, val)
5210 validator = self._get_validator(prop)
5212 try:
-> 5213 val = validator.validate_coerce(val)
5214 except ValueError as err:
5215 if self._skip_invalid:
File ~/workspace/prpy/.venv/lib/python3.11/site-packages/_plotly_utils/basevalidators.py:1104, in StringValidator.validate_coerce(self, v)
1102 v = str(v)
1103 else:
-> 1104 self.raise_invalid_val(v)
1106 if self.no_blank and len(v) == 0:
1107 self.raise_invalid_val(v)
File ~/workspace/prpy/.venv/lib/python3.11/site-packages/_plotly_utils/basevalidators.py:296, in BaseValidator.raise_invalid_val(self, v, inds)
293 for i in inds:
294 name += "[" + str(i) + "]"
--> 296 raise ValueError(
297 """
298 Invalid value of type {typ} received for the '{name}' property of {pname}
299 Received value: {v}
300
301 {valid_clr_desc}""".format(
302 name=name,
303 pname=self.parent_name,
304 typ=type_str(v),
305 v=repr(v),
306 valid_clr_desc=self.description(),
307 )
308 )
ValueError:
Invalid value of type 'numpy.int64' received for the 'name' property of scatter3d
Received value: 0
The 'name' property is a string and must be specified as:
- A string
- A number that will be converted to a string
cufflinksで3d scatter plot#
X3 = TSNE(n_components=3,verbose=True).fit_transform(df.values)
embed3d = pd.DataFrame(X3, columns=list("XYZ"))
embed3d["label"] = df["label"]
embed3d.head()
[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 1797 samples in 0.007s...
[t-SNE] Computed neighbors for 1797 samples in 0.516s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1797
[t-SNE] Computed conditional probabilities for sample 1797 / 1797
[t-SNE] Mean sigma: 8.132731
[t-SNE] KL divergence after 250 iterations with early exaggeration: 61.700394
[t-SNE] Error after 1000 iterations: 0.611338
X | Y | Z | label | |
---|---|---|---|---|
0 | 2.644546 | 22.259514 | -1.929261 | 0 |
1 | -10.827566 | -4.184731 | 8.216836 | 1 |
2 | -0.487536 | -3.023013 | 13.081998 | 2 |
3 | 7.301555 | 7.178536 | 3.457083 | 3 |
4 | -17.475632 | 1.376311 | -1.810267 | 4 |
embed3d.iplot(kind="scatter3d", x="X", y="Y", z="Z", mode="markers", categories="label", title="T-SNE 3D demo")
その他、よく使うグラフの作り方#
toy1 = pd.DataFrame(np.random.random((100,6)),
columns=("特徴1","特徴2","特徴3","特徴4","特徴5","特徴6"))
toy1.iplot()
toy1.iplot(subplots=True, subplot_titles=True)
toy1.scatter_matrix()
x = cf.datagen.sinwave(10,.25)
x.iplot(kind="surface")