Skip to content

Chapter 4: Working with Databases

Inserting and extracting relational data in Python

Create a table to use

CREATE TABLE users (
    name text,
    id integer,
    street text,
    city text,
    zip text
);

Create a connection to the database


import psycopg2

conn_string="dbname='dataengineering' host='206.189.46.156' user='postgres' password='jdW9IuFFztFJhHdMEuI3'"

conn=psycopg2.connect(conn_string)
cur=conn.cursor()

Insert a row of data


query = "insert into users (id,name,street,city,zip) values({},'{}','{}','{}','{}')".format(1,'Big Bird','Sesame Street','Fakeville','12345')

# see the query sent to the database
cur.mogrify(query)

# Execute the query
cur.execute(query)

# save the changes to the database
conn.commit()

Insert a row of data using tuples

query2 = "insert into users (id,name,street,city,zip) values(%s,%s,%s,%s,%s)"
data=(2,'Grouch','Sesame Street','Fakeville','12345')

# see the query sent to the database
cur.mogrify(query2,data)

# Execute the query
cur.execute(query2,data)

# save the changes to the database
conn.commit()

Inserting multiple records


from faker import Faker

fake = Faker()
data = []
i = 3 # there are already 2 entries in the database

# create data to load to database
for r in range(1000):
    data.append((i,
                fake.name(),
                fake.street_address(),
                fake.city(),
                fake.zipcode()))
    i = i + 1

data_for_db=tuple(data)

query3 = "INSERT INTO users (id, name, street, city, zip) values(%s, %s, %s, %s, %s)"

# see the query for a single row
cur.mogrify(query3, data_for_db[1])

# insert data and save database changes
cur.executemany(query3, data_for_db)
conn.commit()

Extracting data with Python

query4 = "SELECT * FROM users"

cur.execute(query4)

# One option is to iterate over the results
for record in cur:
    print(record)


# another option is to use the "fetch" related methods
cur.fetchall() # returns a list of tuples for all results

cur.fetchmany(10) # returns a list of tuples for the first n specified

cur.fetchone() # returns a single entry as a tuple


# see how many rows are returned
cur.rowcount

# Write query results directly to a csv file 
f=open('data/fromdb.csv','w')
cur.copy_to(f,'users',sep=',')
f.close()

Extracting data with Pandas

import pandas as pd

df = pd.read_sql("SELECT * FROM USERS", conn)

# have a look at the data
df.head()