diff --git a/src/py/CHANGELOG.txt b/CHANGELOG.md similarity index 83% rename from src/py/CHANGELOG.txt rename to CHANGELOG.md index 7f9f1dd9..81742fb9 100644 --- a/src/py/CHANGELOG.txt +++ b/CHANGELOG.md @@ -1,4 +1,9 @@ -v1.3.0rc0 +## Unreleased + +### Fixed +- Fix issue where exporting large figures could cause hang [[#442](https://github.com/plotly/Kaleido/pull/442)], with thanks to @EliasTalcott for the contribution! + +## v1.3.0rc0 - Significant refactor, better organization - `write_fig` and `_from_object` now take an additional argument: `cancel_on_error: bool, default False`. See docs. @@ -6,10 +11,10 @@ v1.3.0rc0 - Fixed race condition where two render tasks would choose the same filename -v1.2.0 +## v1.2.0 - Try to use plotly JSON encoder instead of default -v1.1.0 +## v1.1.0 - Add testing - Fix a variety of type bugs - Change order of browser closer to fix hang @@ -20,51 +25,51 @@ v1.1.0 - Add option to silence warnings in start/stop_sync_server - Fix bug where attribute was inconsistently named -v1.1.0rc0 +## v1.1.0rc0 - Improve verbosity of errors when starting kaleido improperly - Add new api functions start/stop_sync_server -v1.0.0 +## v1.0.0 - Add warning if using incompatible Plotly version -v1.0.0rc15 +## v1.0.0rc15 - BUG: Add regex sanitization for auto-filename generation - Further santiize title to filename conversion -v1.0.0rc14 +## v1.0.0rc14 - Pass `plotlyjs` option through from Kaleido() to PageGenerator() -v1.0.0rc13 +## v1.0.0rc13 - Pass mathjax option through when using default plotly.js template -v1.0.0rc12 +## v1.0.0rc12 - Add `kopts` args to top-level shortcuts to pass args to `Kaleido(**kopts)` -v1.0.0rc11 +## v1.0.0rc11 - Write mocker tool to parameterize opts in tests - Crop page to pdf size - Add type checks to user input for improved error messages - Fix latex strings in PDF bolding - Add some choreographer errors to kaleido.errors -v1.0.0rc10 +## v1.0.0rc10 - Allow user to pass Figure-like dicts - Fix bug by which calc fig rejected plotly figures - Improve testing -v1.0.0rc9 +## v1.0.0rc9 - Fix v1.0.0rc7 for logic reversal (was conditional error) -v1.0.0rc8 +## v1.0.0rc8 - Add kaleido.calc_fig to return bytes, not write file automatically - Add calc_fig[_sync], write_fig_sync, and write_fig_from_object_sync to kaleido API -v1.0.0rc7 +## v1.0.0rc7 - Use new choreo is_isolated() to improve platform support (snap /tmp sandboxing) -v1.0.0rc6 +## v1.0.0rc6 - Allow PageGenerator(force_cdn=True) to not use plotly.py's installed js -v1.0.0rc5 +## ## v1.0.0rc5 - Fix bug by which plotly.py's internal js was always ignored - Adds testing for PageGenerator diff --git a/src/py/kaleido/_kaleido_tab/_tab.py b/src/py/kaleido/_kaleido_tab/_tab.py index 2c1da8df..b3baa24d 100644 --- a/src/py/kaleido/_kaleido_tab/_tab.py +++ b/src/py/kaleido/_kaleido_tab/_tab.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING import logistro +import orjson from . import _devtools_utils as _dtools from . import _js_logger @@ -19,10 +20,18 @@ _TEXT_FORMATS = ("svg", "json") # eps +_CHUNK_SIZE = 10 * 1024 * 1024 # 10 MB _logger = logistro.getLogger(__name__) +def _orjson_default(obj): + """Fallback for types orjson can't handle natively (e.g. NumPy string arrays).""" + if hasattr(obj, "tolist"): + return obj.tolist() + raise TypeError(f"Type is not JSON serializable: {type(obj).__name__}") + + def _subscribe_new(tab: choreo.Tab, event: str) -> asyncio.Future: """Create subscription to tab clearing old ones first: helper function.""" new_future = tab.subscribe_once(event) @@ -117,22 +126,38 @@ async def _calc_fig( render_prof, stepper, ) -> bytes: - # js script - kaleido_js_fn = ( - r"function(spec, ...args)" - r"{" - r"return kaleido_scopes.plotly(spec, ...args).then(JSON.stringify);" - r"}" - ) - render_prof.profile_log.tick("sending javascript") - result = await _dtools.exec_js_fn( - self.tab, - self._current_js_id, - kaleido_js_fn, + render_prof.profile_log.tick("serializing spec") + spec_str = orjson.dumps( spec, - topojson, - stepper, - ) + default=_orjson_default, + option=orjson.OPT_SERIALIZE_NUMPY, + ).decode() + render_prof.profile_log.tick("spec serialized") + + render_prof.profile_log.tick("sending javascript") + if len(spec_str) <= _CHUNK_SIZE: + kaleido_js_fn = ( + r"function(specStr, ...args)" + r"{" + r"return kaleido_scopes" + r".plotly(JSON.parse(specStr), ...args)" + r".then(JSON.stringify);" + r"}" + ) + result = await _dtools.exec_js_fn( + self.tab, + self._current_js_id, + kaleido_js_fn, + spec_str, + topojson, + stepper, + ) + else: + result = await self._calc_fig_chunked( + spec_str, + topojson=topojson, + stepper=stepper, + ) _raise_error(result) render_prof.profile_log.tick("javascript sent") @@ -154,3 +179,45 @@ async def _calc_fig( render_prof.data_out_size = len(res) render_prof.js_log = self.js_logger.log return res + + async def _calc_fig_chunked( + self, + spec_str: str, + *, + topojson: str | None, + stepper, + ): + _raise_error( + await _dtools.exec_js_fn( + self.tab, + self._current_js_id, + r"function() { window.__kaleido_chunks = []; }", + ) + ) + + for i in range(0, len(spec_str), _CHUNK_SIZE): + chunk = spec_str[i : i + _CHUNK_SIZE] + _raise_error( + await _dtools.exec_js_fn( + self.tab, + self._current_js_id, + r"function(c) { window.__kaleido_chunks.push(c); }", + chunk, + ) + ) + + kaleido_js_fn = ( + r"function(...args)" + r"{" + r"var spec = JSON.parse(window.__kaleido_chunks.join(''));" + r"delete window.__kaleido_chunks;" + r"return kaleido_scopes.plotly(spec, ...args).then(JSON.stringify);" + r"}" + ) + return await _dtools.exec_js_fn( + self.tab, + self._current_js_id, + kaleido_js_fn, + topojson, + stepper, + ) diff --git a/src/py/tests/test_large_fig.py b/src/py/tests/test_large_fig.py new file mode 100644 index 00000000..41b45a52 --- /dev/null +++ b/src/py/tests/test_large_fig.py @@ -0,0 +1,26 @@ +import numpy as np +import plotly.graph_objects as go +import pytest + +import kaleido + +TOTAL_POINTS = 5_000_000 + + +@pytest.mark.parametrize( + ("num_traces", "num_points"), + [ + (1, TOTAL_POINTS), + (1_000, TOTAL_POINTS / 1_000), + ], +) +async def test_large_fig(num_traces, num_points): + fig = go.Figure() + for _ in range(num_traces): + fig.add_trace( + go.Scatter( + x=np.arange(num_points, dtype=float), + y=np.arange(num_points, dtype=float), + ) + ) + assert isinstance(await kaleido.calc_fig(fig), bytes)