etrotta commited on
Commit
5490952
·
1 Parent(s): 97f8c36

Mention lazy execution and multiplexing

Browse files
Files changed (1) hide show
  1. polars/03_loading_data.py +58 -2
polars/03_loading_data.py CHANGED
@@ -2,7 +2,7 @@
2
  # requires-python = ">=3.12"
3
  # dependencies = [
4
  # "adbc-driver-sqlite==1.7.0",
5
- # "duckdb==1.4.0.dev2673",
6
  # "lxml==6.0.0",
7
  # "marimo",
8
  # "pandas==2.3.2",
@@ -14,7 +14,7 @@
14
 
15
  import marimo
16
 
17
- __generated_with = "0.15.0"
18
  app = marimo.App(width="medium")
19
 
20
 
@@ -508,6 +508,62 @@ def _(adlfs, df, os, pl):
508
  return
509
 
510
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  @app.cell(hide_code=True)
512
  def _(mo):
513
  mo.md(
 
2
  # requires-python = ">=3.12"
3
  # dependencies = [
4
  # "adbc-driver-sqlite==1.7.0",
5
+ # "duckdb>=1.4.0.dev",
6
  # "lxml==6.0.0",
7
  # "marimo",
8
  # "pandas==2.3.2",
 
14
 
15
  import marimo
16
 
17
+ __generated_with = "0.15.2"
18
  app = marimo.App(width="medium")
19
 
20
 
 
508
  return
509
 
510
 
511
+ @app.cell(hide_code=True)
512
+ def _(mo):
513
+ mo.md(
514
+ r"""
515
+ # Multiplexing
516
+
517
+ You can also split a query into multiple sinks via [multiplexing](https://docs.pola.rs/user-guide/lazy/multiplexing/), to avoid reading multiple times, repeating the same operations for each sink or collecting intermediary results into memory.
518
+ """
519
+ )
520
+ return
521
+
522
+
523
+ @app.cell
524
+ def _(folder, lz, pl):
525
+ lz2 = lz.with_columns(pl.col(pl.String).str.to_uppercase())
526
+ lz3 = lz.with_columns(pl.col(pl.String).str.to_lowercase())
527
+
528
+ # Collecting multiple LazyFrames into memory
529
+ _df, _df2, _df3 = pl.collect_all([lz, lz2, lz3])
530
+
531
+ # Sinking multiple LazyFrames into different destinations
532
+ sinks = [
533
+ lz.sink_csv(folder / "data_1.csv", lazy=True),
534
+ lz2.sink_csv(folder / "data_2.csv", lazy=True),
535
+ lz3.sink_csv(folder / "data_3.csv", lazy=True),
536
+ ]
537
+ _ = pl.collect_all(sinks)
538
+ return (sinks,)
539
+
540
+
541
+ @app.cell(hide_code=True)
542
+ def _(mo):
543
+ mo.md(
544
+ r"""
545
+ # Async Execution
546
+
547
+ Polars also has experimental support for running lazy queries in `async` mode, letting you `await` operations inside of async functions.
548
+ """
549
+ )
550
+ return
551
+
552
+
553
+ @app.cell
554
+ async def _(lz):
555
+ await lz.collect_async()
556
+ return
557
+
558
+
559
+ @app.cell
560
+ async def _(folder, lz, pl, sinks):
561
+ # If you want to write to a file, use `lz.sink_format(lazy=True)` followed by `...collect_async()` or `pl.collect_all_async(...)`
562
+ _ = await lz.sink_csv(folder / "data_from_async.csv", lazy=True).collect_async()
563
+ _ = await pl.collect_all_async(sinks)
564
+ return
565
+
566
+
567
  @app.cell(hide_code=True)
568
  def _(mo):
569
  mo.md(