Construct a DataFrame#
[1]:
import polars as pl
Create DataFrame#
Ref:
Create DataFrame from {"col": [values...]}
[2]:
data = {"a": [1, 2], "b": [3, 4]}
df = pl.DataFrame(data)
df
[2]:
shape: (2, 2)
| a | b |
|---|---|
| i64 | i64 |
| 1 | 3 |
| 2 | 4 |
[3]:
df.dtypes
[3]:
[Int64, Int64]
[4]:
df.schema
[4]:
Schema([('a', Int64), ('b', Int64)])
[5]:
# {"col1": [...], "col2": [...]}
# this is most efficient because polars is column oriented
data = {"col1": [0, 2], "col2": [3, 7]}
df = pl.DataFrame(data, schema={"col1": pl.Float32, "col2": pl.Int64})
df
[5]:
shape: (2, 2)
| col1 | col2 |
|---|---|
| f32 | i64 |
| 0.0 | 3 |
| 2.0 | 7 |
[6]:
data = {"col1": [1, 2], "col2": [3, 4]}
df = pl.DataFrame(data, schema=[("col1", pl.Float32), ("col2", pl.Int64)])
df
[6]:
shape: (2, 2)
| col1 | col2 |
|---|---|
| f32 | i64 |
| 1.0 | 3 |
| 2.0 | 4 |
[7]:
data = {"col1": [0, 2], "col2": [3, 7]}
df = pl.DataFrame(data, schema={"col1": float, "col2": int})
df
[7]:
shape: (2, 2)
| col1 | col2 |
|---|---|
| f64 | i64 |
| 0.0 | 3 |
| 2.0 | 7 |
[8]:
data = {"col1": [1, 2], "col2": [3, 4]}
df = pl.DataFrame(data, schema=[("col1", float), ("col2", int)])
df
[8]:
shape: (2, 2)
| col1 | col2 |
|---|---|
| f64 | i64 |
| 1.0 | 3 |
| 2.0 | 4 |
Create DataFrame from 2d array (list of list)
[9]:
data = [[1, "Alice"], [2, "Bob"]]
df = pl.DataFrame(data, schema=["id", "name"], orient="row")
df
[9]:
shape: (2, 2)
| id | name |
|---|---|
| i64 | str |
| 1 | "Alice" |
| 2 | "Bob" |
Create DataFrame from list of dictionary (struct)
[10]:
# For [{key: value}, ...], polars try infer the data type
data = [
{"id": 1, "name": "Alice"},
{"id": 2, "name": "Bob"},
]
df = pl.DataFrame(data)
df
[10]:
shape: (2, 2)
| id | name |
|---|---|
| i64 | str |
| 1 | "Alice" |
| 2 | "Bob" |
[11]:
# For [{key: value}, ...], I suggest to define schema explicitly
data = [
{"id": 1, "name": "Alice"},
{"id": 2, "name": "Bob"},
]
df = pl.DataFrame(data, schema={"id": int, "name": str})
df
[11]:
shape: (2, 2)
| id | name |
|---|---|
| i64 | str |
| 1 | "Alice" |
| 2 | "Bob" |
Handle Type Mismatch#
[12]:
# For [{key: value}, ...], I suggest to define schema explicitly
data = [
{"id": 1, "name": "Alice", "bank_account": "1111111111"},
{"id": 2, "name": "Bob", "bank_account": "2222222222"},
{"id": 3, "name": "Cathy", "bank_account": 3333333333},
]
df = pl.DataFrame(data)
df
[12]:
shape: (3, 3)
| id | name | bank_account |
|---|---|---|
| i64 | str | str |
| 1 | "Alice" | "1111111111" |
| 2 | "Bob" | "2222222222" |
| 3 | "Cathy" | "3333333333" |
[13]:
data = [
{"id": 1, "name": "Alice", "bank_account": "1111111111"},
{"id": 2, "name": "Bob", "bank_account": "2222222222"},
{"id": 3, "name": "Cathy", "bank_account": 3333333333},
]
df = pl.DataFrame(data, schema={"id": int, "name": str, "bank_account": str})
df
[13]:
shape: (3, 3)
| id | name | bank_account |
|---|---|---|
| i64 | str | str |
| 1 | "Alice" | "1111111111" |
| 2 | "Bob" | "2222222222" |
| 3 | "Cathy" | "3333333333" |
Pretty Print a DataFrame#
[15]:
import polars as pl
data = [
{"id": 1, "name": f"Name-{i}", "details": {"Phone": "111-222-3333", "SSN": f"SSN-{i}"}}
for i in range(1, 10)
]
df = pl.DataFrame(data)
df
[15]:
shape: (9, 3)
| id | name | details |
|---|---|---|
| i64 | str | struct[2] |
| 1 | "Name-1" | {"111-222-3333","SSN-1"} |
| 1 | "Name-2" | {"111-222-3333","SSN-2"} |
| 1 | "Name-3" | {"111-222-3333","SSN-3"} |
| 1 | "Name-4" | {"111-222-3333","SSN-4"} |
| 1 | "Name-5" | {"111-222-3333","SSN-5"} |
| 1 | "Name-6" | {"111-222-3333","SSN-6"} |
| 1 | "Name-7" | {"111-222-3333","SSN-7"} |
| 1 | "Name-8" | {"111-222-3333","SSN-8"} |
| 1 | "Name-9" | {"111-222-3333","SSN-9"} |
tabulate library convert your dataframe to beautiful ascii table.
[16]:
from tabulate import tabulate
from pathlib import Path
dir_here = Path.cwd()
path = dir_here / "dataframe.txt"
text = tabulate(df.to_dict(), headers=list(df.schema), tablefmt="grid")
path.write_text(text)
print(f"See Dataframe at: file://{path}")
print(text) # You can also write it to file
See Dataframe at: file:///Users/sanhehu/Documents/GitHub/learn_polars-project/docs/source/02-DataFrame/01-Construct-A-DataFrame/dataframe.txt
+------+--------+-------------------------------------------+
| id | name | details |
+======+========+===========================================+
| 1 | Name-1 | {'Phone': '111-222-3333', 'SSN': 'SSN-1'} |
+------+--------+-------------------------------------------+
| 1 | Name-2 | {'Phone': '111-222-3333', 'SSN': 'SSN-2'} |
+------+--------+-------------------------------------------+
| 1 | Name-3 | {'Phone': '111-222-3333', 'SSN': 'SSN-3'} |
+------+--------+-------------------------------------------+
| 1 | Name-4 | {'Phone': '111-222-3333', 'SSN': 'SSN-4'} |
+------+--------+-------------------------------------------+
| 1 | Name-5 | {'Phone': '111-222-3333', 'SSN': 'SSN-5'} |
+------+--------+-------------------------------------------+
| 1 | Name-6 | {'Phone': '111-222-3333', 'SSN': 'SSN-6'} |
+------+--------+-------------------------------------------+
| 1 | Name-7 | {'Phone': '111-222-3333', 'SSN': 'SSN-7'} |
+------+--------+-------------------------------------------+
| 1 | Name-8 | {'Phone': '111-222-3333', 'SSN': 'SSN-8'} |
+------+--------+-------------------------------------------+
| 1 | Name-9 | {'Phone': '111-222-3333', 'SSN': 'SSN-9'} |
+------+--------+-------------------------------------------+
[ ]: