Creating a SQLite database with parent and child table

create_one_to_many_sql.ipynb

https://mybinder.org/v2/gh/aubreymoore/jb/HEAD?urlpath=https%3A%2F%2Fgithub.com%2Faubreymoore%2Fjb%2Fcreate_one_to_many_sql.ipynb

An example of setting up a SQLite3 database containing a one_to_many relationship between a parent and a child table.

Both tables have a unique id field for each record.

The biggest problem I had was preventing database integrity errors which caused crashes. Solved by checking dataframes prior to exporting with to_sqlite.

import sqlite3
import pandas as pd
from icecream import ic
import os

# Initialize variables for this run

db_path = 'example.sqlite3'

people_data = [
    {'name': 'Aubrey Moore', 'age': 74},
    {'name': 'Jane Ginlo Moore', 'age': 72}
]

notes_data = [
    {'name': 'Aubrey Moore', 'note': 'an OK guy'},
    {'name': 'Jane Ginlo Moore', 'note': 'a beautiful lady with a beautiful voice'},
    {'name': 'Jane Ginlo Moore', 'note': 'She, who must be obeyed'}
]

schema_sql = """
CREATE TABLE IF NOT EXISTS people (
    personid INTEGER PRIMARY KEY AUTOINCREMENT,
    name TEXT UNIQUE,
    age INTEGER
);

CREATE TABLE IF NOT EXISTS notes (
    noteid INTEGER PRIMARY KEY AUTOINCREMENT,
    personid INTEGER,
    note TEXT,
    FOREIGN KEY(personid) REFERENCES people(personid) ON DELETE CASCADE  
);
"""

# connect to db. A new db is created if db_path does not exist.
con = sqlite3.connect(db_path)

# create new db if one does not exist; otherwise tables are not modified
con.executescript(schema_sql)
    
# store people_data in a dataframe
df = pd.DataFrame(people_data)

# this code prevents database integrity errors
# by removing dataframe rows in which name is already in the 'people' table
results = con.execute('SELECT name FROM people').fetchall()
names_in_people_table = set([name[0] for name in results])
ic(names_in_people_table)
df = df[~df['name'].isin(names_in_people_table)]

# export to db
df.to_sql('people', con, if_exists='append', index=False)

# read and display 'people' db table
df = pd.read_sql('SELECT * FROM people', con)
ic(df)

personid_map = dict(con.execute('SELECT name, personid from people').fetchall())
ic(personid_map)

# store notes_data in a dataframe
df = pd.DataFrame(notes_data)

# replace name column with personid column by using name as a key for personid_map
df['personid'] = df['name'].map(personid_map)

# This code prevents duplicate records in the notes table.
df = df[~df['name'].isin(names_in_people_table)]

# drop the name column to match the note column schema (IMPORTANT)
df.drop('name', inplace=True, axis=1)

# export to db
df.to_sql('notes', con, if_exists='append', index=False)

# read and display 'notes' db table
df = pd.read_sql('SELECT * FROM notes', con)
ic(df)

con.close()

ic| names_in_people_table: {'Jane Ginlo Moore', 'Aubrey Moore'}
ic| df:    personid              name  age
        0         1      Aubrey Moore   74
        1         2  Jane Ginlo Moore   72
ic| personid_map: {'Aubrey Moore': 1, 'Jane Ginlo Moore': 2}
ic| df:    noteid  personid                                     note
        0       1         1                                an OK guy
        1       2         2  a beautiful lady with a beautiful voice
        2       3         2                  She, who must be obeyed