Chapter 4: Working with Databases
Inserting and extracting relational data in Python
Create a table to use
CREATE TABLE users (
name text,
id integer,
street text,
city text,
zip text
);
Create a connection to the database
import psycopg2
conn_string="dbname='dataengineering' host='206.189.46.156' user='postgres' password='jdW9IuFFztFJhHdMEuI3'"
conn=psycopg2.connect(conn_string)
cur=conn.cursor()
Insert a row of data
query = "insert into users (id,name,street,city,zip) values({},'{}','{}','{}','{}')".format(1,'Big Bird','Sesame Street','Fakeville','12345')
# see the query sent to the database
cur.mogrify(query)
# Execute the query
cur.execute(query)
# save the changes to the database
conn.commit()
Insert a row of data using tuples
query2 = "insert into users (id,name,street,city,zip) values(%s,%s,%s,%s,%s)"
data=(2,'Grouch','Sesame Street','Fakeville','12345')
# see the query sent to the database
cur.mogrify(query2,data)
# Execute the query
cur.execute(query2,data)
# save the changes to the database
conn.commit()
Inserting multiple records
from faker import Faker
fake = Faker()
data = []
i = 3 # there are already 2 entries in the database
# create data to load to database
for r in range(1000):
data.append((i,
fake.name(),
fake.street_address(),
fake.city(),
fake.zipcode()))
i = i + 1
data_for_db=tuple(data)
query3 = "INSERT INTO users (id, name, street, city, zip) values(%s, %s, %s, %s, %s)"
# see the query for a single row
cur.mogrify(query3, data_for_db[1])
# insert data and save database changes
cur.executemany(query3, data_for_db)
conn.commit()
Extracting data with Python
query4 = "SELECT * FROM users"
cur.execute(query4)
# One option is to iterate over the results
for record in cur:
print(record)
# another option is to use the "fetch" related methods
cur.fetchall() # returns a list of tuples for all results
cur.fetchmany(10) # returns a list of tuples for the first n specified
cur.fetchone() # returns a single entry as a tuple
# see how many rows are returned
cur.rowcount
# Write query results directly to a csv file
f=open('data/fromdb.csv','w')
cur.copy_to(f,'users',sep=',')
f.close()
Extracting data with Pandas
import pandas as pd
df = pd.read_sql("SELECT * FROM USERS", conn)
# have a look at the data
df.head()