Construct a DataFrame#

[1]:
import polars as pl

Create DataFrame#

Ref:

Create DataFrame from {"col": [values...]}

[2]:
data = {"a": [1, 2], "b": [3, 4]}
df = pl.DataFrame(data)
df
[2]:
shape: (2, 2)
ab
i64i64
13
24
[3]:
df.dtypes
[3]:
[Int64, Int64]
[4]:
df.schema
[4]:
Schema([('a', Int64), ('b', Int64)])
[5]:
# {"col1": [...], "col2": [...]}
# this is most efficient because polars is column oriented
data = {"col1": [0, 2], "col2": [3, 7]}
df = pl.DataFrame(data, schema={"col1": pl.Float32, "col2": pl.Int64})
df
[5]:
shape: (2, 2)
col1col2
f32i64
0.03
2.07
[6]:
data = {"col1": [1, 2], "col2": [3, 4]}
df = pl.DataFrame(data, schema=[("col1", pl.Float32), ("col2", pl.Int64)])
df
[6]:
shape: (2, 2)
col1col2
f32i64
1.03
2.04
[7]:
data = {"col1": [0, 2], "col2": [3, 7]}
df = pl.DataFrame(data, schema={"col1": float, "col2": int})
df
[7]:
shape: (2, 2)
col1col2
f64i64
0.03
2.07
[8]:
data = {"col1": [1, 2], "col2": [3, 4]}
df = pl.DataFrame(data, schema=[("col1", float), ("col2", int)])
df
[8]:
shape: (2, 2)
col1col2
f64i64
1.03
2.04

Create DataFrame from 2d array (list of list)

[9]:
data = [[1, "Alice"], [2, "Bob"]]
df = pl.DataFrame(data, schema=["id", "name"], orient="row")
df
[9]:
shape: (2, 2)
idname
i64str
1"Alice"
2"Bob"

Create DataFrame from list of dictionary (struct)

[10]:
# For [{key: value}, ...], polars try infer the data type
data = [
    {"id": 1, "name": "Alice"},
    {"id": 2, "name": "Bob"},
]
df = pl.DataFrame(data)
df
[10]:
shape: (2, 2)
idname
i64str
1"Alice"
2"Bob"
[11]:
# For [{key: value}, ...], I suggest to define schema explicitly
data = [
    {"id": 1, "name": "Alice"},
    {"id": 2, "name": "Bob"},
]
df = pl.DataFrame(data, schema={"id": int, "name": str})
df
[11]:
shape: (2, 2)
idname
i64str
1"Alice"
2"Bob"

Handle Type Mismatch#

[12]:
# For [{key: value}, ...], I suggest to define schema explicitly
data = [
    {"id": 1, "name": "Alice", "bank_account": "1111111111"},
    {"id": 2, "name": "Bob", "bank_account": "2222222222"},
    {"id": 3, "name": "Cathy", "bank_account": 3333333333},
]
df = pl.DataFrame(data)
df
[12]:
shape: (3, 3)
idnamebank_account
i64strstr
1"Alice""1111111111"
2"Bob""2222222222"
3"Cathy""3333333333"
[13]:
data = [
    {"id": 1, "name": "Alice", "bank_account": "1111111111"},
    {"id": 2, "name": "Bob", "bank_account": "2222222222"},
    {"id": 3, "name": "Cathy", "bank_account": 3333333333},
]
df = pl.DataFrame(data, schema={"id": int, "name": str, "bank_account": str})
df
[13]:
shape: (3, 3)
idnamebank_account
i64strstr
1"Alice""1111111111"
2"Bob""2222222222"
3"Cathy""3333333333"

Pretty Print a DataFrame#

[15]:
import polars as pl

data = [
    {"id": 1, "name": f"Name-{i}", "details": {"Phone": "111-222-3333", "SSN": f"SSN-{i}"}}
    for i in range(1, 10)
]
df = pl.DataFrame(data)
df
[15]:
shape: (9, 3)
idnamedetails
i64strstruct[2]
1"Name-1"{"111-222-3333","SSN-1"}
1"Name-2"{"111-222-3333","SSN-2"}
1"Name-3"{"111-222-3333","SSN-3"}
1"Name-4"{"111-222-3333","SSN-4"}
1"Name-5"{"111-222-3333","SSN-5"}
1"Name-6"{"111-222-3333","SSN-6"}
1"Name-7"{"111-222-3333","SSN-7"}
1"Name-8"{"111-222-3333","SSN-8"}
1"Name-9"{"111-222-3333","SSN-9"}

tabulate library convert your dataframe to beautiful ascii table.

[16]:
from tabulate import tabulate
from pathlib import Path

dir_here = Path.cwd()
path = dir_here / "dataframe.txt"

text = tabulate(df.to_dict(), headers=list(df.schema),  tablefmt="grid")
path.write_text(text)
print(f"See Dataframe at: file://{path}")
print(text) # You can also write it to file
See Dataframe at: file:///Users/sanhehu/Documents/GitHub/learn_polars-project/docs/source/02-DataFrame/01-Construct-A-DataFrame/dataframe.txt
+------+--------+-------------------------------------------+
|   id | name   | details                                   |
+======+========+===========================================+
|    1 | Name-1 | {'Phone': '111-222-3333', 'SSN': 'SSN-1'} |
+------+--------+-------------------------------------------+
|    1 | Name-2 | {'Phone': '111-222-3333', 'SSN': 'SSN-2'} |
+------+--------+-------------------------------------------+
|    1 | Name-3 | {'Phone': '111-222-3333', 'SSN': 'SSN-3'} |
+------+--------+-------------------------------------------+
|    1 | Name-4 | {'Phone': '111-222-3333', 'SSN': 'SSN-4'} |
+------+--------+-------------------------------------------+
|    1 | Name-5 | {'Phone': '111-222-3333', 'SSN': 'SSN-5'} |
+------+--------+-------------------------------------------+
|    1 | Name-6 | {'Phone': '111-222-3333', 'SSN': 'SSN-6'} |
+------+--------+-------------------------------------------+
|    1 | Name-7 | {'Phone': '111-222-3333', 'SSN': 'SSN-7'} |
+------+--------+-------------------------------------------+
|    1 | Name-8 | {'Phone': '111-222-3333', 'SSN': 'SSN-8'} |
+------+--------+-------------------------------------------+
|    1 | Name-9 | {'Phone': '111-222-3333', 'SSN': 'SSN-9'} |
+------+--------+-------------------------------------------+
[ ]: