Spaces:
Running
Running
Mention lazy execution and multiplexing
Browse files- polars/03_loading_data.py +58 -2
polars/03_loading_data.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
# requires-python = ">=3.12"
|
3 |
# dependencies = [
|
4 |
# "adbc-driver-sqlite==1.7.0",
|
5 |
-
# "duckdb
|
6 |
# "lxml==6.0.0",
|
7 |
# "marimo",
|
8 |
# "pandas==2.3.2",
|
@@ -14,7 +14,7 @@
|
|
14 |
|
15 |
import marimo
|
16 |
|
17 |
-
__generated_with = "0.15.
|
18 |
app = marimo.App(width="medium")
|
19 |
|
20 |
|
@@ -508,6 +508,62 @@ def _(adlfs, df, os, pl):
|
|
508 |
return
|
509 |
|
510 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
511 |
@app.cell(hide_code=True)
|
512 |
def _(mo):
|
513 |
mo.md(
|
|
|
2 |
# requires-python = ">=3.12"
|
3 |
# dependencies = [
|
4 |
# "adbc-driver-sqlite==1.7.0",
|
5 |
+
# "duckdb>=1.4.0.dev",
|
6 |
# "lxml==6.0.0",
|
7 |
# "marimo",
|
8 |
# "pandas==2.3.2",
|
|
|
14 |
|
15 |
import marimo
|
16 |
|
17 |
+
__generated_with = "0.15.2"
|
18 |
app = marimo.App(width="medium")
|
19 |
|
20 |
|
|
|
508 |
return
|
509 |
|
510 |
|
511 |
+
@app.cell(hide_code=True)
|
512 |
+
def _(mo):
|
513 |
+
mo.md(
|
514 |
+
r"""
|
515 |
+
# Multiplexing
|
516 |
+
|
517 |
+
You can also split a query into multiple sinks via [multiplexing](https://docs.pola.rs/user-guide/lazy/multiplexing/), to avoid reading multiple times, repeating the same operations for each sink or collecting intermediary results into memory.
|
518 |
+
"""
|
519 |
+
)
|
520 |
+
return
|
521 |
+
|
522 |
+
|
523 |
+
@app.cell
|
524 |
+
def _(folder, lz, pl):
|
525 |
+
lz2 = lz.with_columns(pl.col(pl.String).str.to_uppercase())
|
526 |
+
lz3 = lz.with_columns(pl.col(pl.String).str.to_lowercase())
|
527 |
+
|
528 |
+
# Collecting multiple LazyFrames into memory
|
529 |
+
_df, _df2, _df3 = pl.collect_all([lz, lz2, lz3])
|
530 |
+
|
531 |
+
# Sinking multiple LazyFrames into different destinations
|
532 |
+
sinks = [
|
533 |
+
lz.sink_csv(folder / "data_1.csv", lazy=True),
|
534 |
+
lz2.sink_csv(folder / "data_2.csv", lazy=True),
|
535 |
+
lz3.sink_csv(folder / "data_3.csv", lazy=True),
|
536 |
+
]
|
537 |
+
_ = pl.collect_all(sinks)
|
538 |
+
return (sinks,)
|
539 |
+
|
540 |
+
|
541 |
+
@app.cell(hide_code=True)
|
542 |
+
def _(mo):
|
543 |
+
mo.md(
|
544 |
+
r"""
|
545 |
+
# Async Execution
|
546 |
+
|
547 |
+
Polars also has experimental support for running lazy queries in `async` mode, letting you `await` operations inside of async functions.
|
548 |
+
"""
|
549 |
+
)
|
550 |
+
return
|
551 |
+
|
552 |
+
|
553 |
+
@app.cell
|
554 |
+
async def _(lz):
|
555 |
+
await lz.collect_async()
|
556 |
+
return
|
557 |
+
|
558 |
+
|
559 |
+
@app.cell
|
560 |
+
async def _(folder, lz, pl, sinks):
|
561 |
+
# If you want to write to a file, use `lz.sink_format(lazy=True)` followed by `...collect_async()` or `pl.collect_all_async(...)`
|
562 |
+
_ = await lz.sink_csv(folder / "data_from_async.csv", lazy=True).collect_async()
|
563 |
+
_ = await pl.collect_all_async(sinks)
|
564 |
+
return
|
565 |
+
|
566 |
+
|
567 |
@app.cell(hide_code=True)
|
568 |
def _(mo):
|
569 |
mo.md(
|