import psycopg2
conn = psycopg2.connect(dbname="postgres", user="postgres", password="6288")
conn.autocommit = True
cur = conn.cursor()
cur.execute("DROP DATABASE IF EXISTS crime_db;")
cur.execute("DROP GROUP IF EXISTS readonly;")
cur.execute("DROP GROUP IF EXISTS readwrite;")
cur.execute("DROP USER IF EXISTS data_analyst;")
cur.execute("DROP USER IF EXISTS data_scientist;")
conn.close()


conn = psycopg2.connect(dbname="postgres", user="postgres", password="6288")
conn.autocommit = True      # allows database to be created.
cur = conn.cursor()
cur.execute("CREATE DATABASE crime_db;")
conn.close()


conn = psycopg2.connect(dbname="crime_db", user="postgres", password="6288")
cur = conn.cursor()
cur.execute("CREATE SCHEMA crimes;")


import csv 

with open('data/boston.csv') as f:
    reader = csv.reader(f)
    columns = next(reader)
    first_row = next(reader)

print(f"column names:    {columns}\n row example:   {first_row}")

column names:    ['incident_number', 'offense_code', 'description', 'date', 'day_of_the_week', 'lat', 'long']
 row example:   ['1', '619', 'LARCENY ALL OTHERS', '2018-09-02', 'Sunday', '42.35779134', '-71.13937053']


def get_col_set(file_path, col_index):
    col_set = set()     #empty set for holding unique values in column
    with open(file_path) as f:
        next(f)
        reader = csv.reader(f)
        for row in reader:
            col_set.add(row[col_index])     #add item to set
    
    return col_set 

# Displaying result of above function for each column
for i in range(7):
    print(f"{columns[i]}:  {len(get_col_set('data/boston.csv',i))}")

incident_number:  298329
offense_code:  219
description:  239
date:  1177
day_of_the_week:  7
lat:  18177
long:  18177


#index of description column is 2

descriptions = get_col_set('data/boston.csv',2)

max_length = 0
for description in descriptions:
    if len(description) > max_length:
        max_length = len(description)

print(f"maximum length of description:  {max_length}")

maximum length of description:  58


codes = get_col_set('data/boston.csv',1)        #list of unique codes


min_code = 10000
max_code = 0

for code in codes:
    code = int(code)    
    if code > max_code:
        max_code = code                 #update largest code 
    if code < min_code:
        min_code = code                 #update smallest code

print(f"maximum code is:    {max_code}")
print(f"minimum code is:    {min_code}")

maximum code is:    3831
minimum code is:    111


# double check format of column
weekdays = get_col_set('data/boston.csv', 4)
weekdays

{'Friday', 'Monday', 'Saturday', 'Sunday', 'Thursday', 'Tuesday', 'Wednesday'}


# create weekday_enum datatype with days in proper order
query = "CREATE TYPE weekday_enum AS ENUM ('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday');"
cur.execute(query)


query = """
CREATE TABLE crimes.boston_crimes (
    incident_number SERIAL PRIMARY KEY,
    offense_code SMALLINT,
    description VARCHAR(116),
    date DATE,
    day_of_the_week weekday_enum,
    lat DECIMAL,
    long DECIMAL
);
"""
cur.execute(query)


with open("data/boston.csv") as f:
    cur.copy_expert("COPY crimes.boston_crimes FROM STDIN WITH CSV HEADER;", f)


cur.execute("SELECT * FROM crimes.boston_crimes LIMIT 3;")
results = cur.fetchall()
for result in results:
    print(result)

(1, 619, 'LARCENY ALL OTHERS', datetime.date(2018, 9, 2), 'Sunday', Decimal('42.35779134'), Decimal('-71.13937053'))
(2, 1402, 'VANDALISM', datetime.date(2018, 8, 21), 'Tuesday', Decimal('42.30682138'), Decimal('-71.06030035'))
(3, 3410, 'TOWED MOTOR VEHICLE', datetime.date(2018, 9, 3), 'Monday', Decimal('42.34658879'), Decimal('-71.07242943'))


cur.execute("REVOKE ALL ON SCHEMA public FROM public;")
cur.execute("REVOKE ALL ON DATABASE crime_db FROM public;")


# create readonly and readwrite groups
cur.execute("CREATE GROUP readonly NOLOGIN;")
cur.execute("CREATE GROUP readwrite NOLOGIN;")

# grant database connection priviledges to both
cur.execute("GRANT CONNECT ON DATABASE crime_db TO readonly;")
cur.execute("GRANT CONNECT ON DATABASE crime_db TO readwrite;")

# grant usage priviledges on crimes schema
cur.execute("GRANT USAGE ON SCHEMA crimes TO readonly;")
cur.execute("GRANT USAGE ON SCHEMA crimes TO readonly;")

# grant group specific priviliedges to corresponding group
cur.execute("GRANT SELECT ON ALL TABLES IN SCHEMA crimes TO readonly;")
cur.execute("GRANT SELECT, INSERT, DELETE, UPDATE ON ALL TABLES IN SCHEMA crimes TO readwrite;")


# create data_analyst user and assign to readonly group
cur.execute("CREATE USER data_analyst WITH PASSWORD 'abc';")
cur.execute("GRANT readonly TO data_analyst;")

# create data_scientist user and assign to readwrite group
cur.execute("CREATE USER data_scientist WITH PASSWORD '123';")
cur.execute("GRANT readwrite TO data_scientist;")


conn.commit()
conn.close()


conn = psycopg2.connect(dbname="crime_db", user="postgres", password="6288")
cur = conn.cursor()
cur.execute("""
            SELECT schema_name FROM information_schema.schemata;
            """)

results = cur.fetchall()
for result in results:
    print(result)

('pg_toast',)
('pg_temp_1',)
('pg_toast_temp_1',)
('pg_catalog',)
('information_schema',)
('public',)
('crimes',)


cur.execute("SELECT * FROM information_schema.tables WHERE table_schema = 'crimes';")

results = cur.fetchall()
for result in results:
    print(result)

('crime_db', 'crimes', 'boston_crimes', 'BASE TABLE', None, None, None, None, None, 'YES', 'NO', None)


cur.execute("""
            SELECT * FROM information_schema.applicable_roles 
            WHERE role_name = 'readonly' OR role_name = 'readwrite';
            """)

results = cur.fetchmany(5)
for result in results:
    print(result)

('data_analyst', 'readonly', 'NO')
('data_scientist', 'readwrite', 'NO')


cur.execute("""
    SELECT grantee, privilege_type
    FROM information_schema.table_privileges
    WHERE grantee = 'readwrite' OR grantee = 'readonly';
    """)
results = cur.fetchall()
for result in results:
    print(result)

('readonly', 'SELECT')
('readwrite', 'INSERT')
('readwrite', 'SELECT')
('readwrite', 'UPDATE')
('readwrite', 'DELETE')

Building a Crime Report Database with Postgres¶

Creating the Initial Database¶

Data Exploration¶

Creating Table¶

Creating Groups¶

Creating Users¶

Testing¶

Conclusion¶

column name	datatype
incident_number	serial
offense_code	smallint
description	varchar(2 X 58)
date	date (ISO 8601)
day_of_the_week	enumerated
lat	DECIMAL
long	DECIMAL