Completed HW3: Intermediate SQL
This commit is contained in:
235
Intermediate-SQL/hw3-autograder.py
Normal file
235
Intermediate-SQL/hw3-autograder.py
Normal file
@@ -0,0 +1,235 @@
|
|||||||
|
"""Autograder for Postgres queries assignment.
|
||||||
|
|
||||||
|
Author: CS374 Faculty
|
||||||
|
Version: 09/30/2025
|
||||||
|
"""
|
||||||
|
|
||||||
|
import difflib
|
||||||
|
import psycopg
|
||||||
|
import re
|
||||||
|
import socket
|
||||||
|
|
||||||
|
# The expected number of queries on this assignment
|
||||||
|
QUERIES = 8
|
||||||
|
|
||||||
|
# Determine whether connecting from on/off campus
|
||||||
|
try:
|
||||||
|
socket.gethostbyname("data.cs.jmu.edu")
|
||||||
|
HOST = "data.cs.jmu.edu"
|
||||||
|
except socket.gaierror:
|
||||||
|
HOST = "localhost"
|
||||||
|
|
||||||
|
|
||||||
|
def connect(dbname):
|
||||||
|
"""Connect to the database and create a cursor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dbname: The name of the database to use.
|
||||||
|
"""
|
||||||
|
global con, cur
|
||||||
|
con = psycopg.connect(host=HOST, user="demo", password="demo", dbname=dbname)
|
||||||
|
cur = con.cursor()
|
||||||
|
|
||||||
|
|
||||||
|
def assert_eq(actual, expect, message=""):
|
||||||
|
"""Assert whether two values are equal (custom feedback).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
actual: The value produced by the code being tested.
|
||||||
|
expect: The expected value to compare with `actual`.
|
||||||
|
message: Text to display if AssertionError is raised.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
AssertionError: If `actual` is not equal to `expect`,
|
||||||
|
with a message displaying both values.
|
||||||
|
"""
|
||||||
|
if type(actual) is str:
|
||||||
|
# Abbreviate output if too long
|
||||||
|
a_str = actual if len(actual) <= 120 else actual[:120] + "..."
|
||||||
|
e_str = expect if len(expect) <= 120 else expect[:120] + "..."
|
||||||
|
else:
|
||||||
|
# Convert to simple strings
|
||||||
|
a_str = str(actual)
|
||||||
|
e_str = str(expect)
|
||||||
|
|
||||||
|
assert actual == expect, f"{message}\n Actual: {a_str}\n Expect: {e_str}"
|
||||||
|
|
||||||
|
|
||||||
|
def res2str(res):
|
||||||
|
"""Convert query results into a multiline string.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
res (list of tuples): Results obtained from fetchall().
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Each line is one row with values separated by tabs.
|
||||||
|
"""
|
||||||
|
return "\n".join(
|
||||||
|
["\t".join(map(lambda x: "" if x is None else str(x), tup)) for tup in res]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def run_query(sql, txt, qno):
|
||||||
|
"""Run the query and compare with expected output.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sql (str): The sql chunk from the HW file.
|
||||||
|
txt (str): The expected output of the sql.
|
||||||
|
qno (int): The query number being tested.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
RuntimeError: If incorrect number of queries.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Print status message for autograder feedback
|
||||||
|
if qno:
|
||||||
|
print(f"Running Query #{qno}...")
|
||||||
|
# Reset connection in case of previous error or timeout
|
||||||
|
con.cancel_safe()
|
||||||
|
connect(con.info.dbname)
|
||||||
|
elif "\\c" in sql:
|
||||||
|
beg = sql.find("\\c")
|
||||||
|
end = sql.find("\n", beg)
|
||||||
|
dbname = sql[beg + 3 : end]
|
||||||
|
print("Connecting to", dbname)
|
||||||
|
connect(dbname)
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
print("Running header comment")
|
||||||
|
connect("postgres")
|
||||||
|
|
||||||
|
# Execute the chunk, convert results to text
|
||||||
|
results = []
|
||||||
|
if "\\echo" in sql:
|
||||||
|
sql = sql.replace("\\echo", "--\\echo")
|
||||||
|
beg = sql.find("\\echo")
|
||||||
|
end = sql.find("\n", beg)
|
||||||
|
name = sql[beg + 6 : end]
|
||||||
|
results.append(name)
|
||||||
|
res = cur.execute(sql)
|
||||||
|
if res.rowcount > -1:
|
||||||
|
column_names = [desc[0] for desc in res.description]
|
||||||
|
schema = "\t".join(column_names)
|
||||||
|
output = res2str(res.fetchall())
|
||||||
|
output = output.replace(".0\n", "\n") # integer hack
|
||||||
|
footer = f"({res.rowcount} rows)\n"
|
||||||
|
results.append(schema + "\n" + output + "\n" + footer)
|
||||||
|
|
||||||
|
# Compare with expected output, if applicable
|
||||||
|
if txt:
|
||||||
|
if len(results) == 0:
|
||||||
|
raise RuntimeError(f"Missing output of \\echo Query #{qno}")
|
||||||
|
|
||||||
|
# 1st line blank, 2nd line "Query #"
|
||||||
|
actual = results[0]
|
||||||
|
expect = txt.splitlines()
|
||||||
|
assert_eq(actual, expect[1], "Incorrect query number")
|
||||||
|
|
||||||
|
# Check the number of queries run
|
||||||
|
if len(results) == 1:
|
||||||
|
raise RuntimeError("No results (code is blank)")
|
||||||
|
if len(results) > 2:
|
||||||
|
raise RuntimeError("Extra results (more than one query)")
|
||||||
|
|
||||||
|
# Calculate similarity percentage
|
||||||
|
actual = "\n" + results[0] + "\n" + results[1]
|
||||||
|
seq = difflib.SequenceMatcher(None, actual, txt)
|
||||||
|
sim = int(seq.ratio() * 100)
|
||||||
|
print(f"Output matches {sim}%")
|
||||||
|
|
||||||
|
# Compare schema and row count
|
||||||
|
actual = results[1].rstrip().splitlines()
|
||||||
|
expect = expect[2:]
|
||||||
|
assert_eq(actual[0], expect[0], "Incorrect schema")
|
||||||
|
a_rows = len(actual) - 2
|
||||||
|
e_rows = len(expect) - 2
|
||||||
|
assert_eq(a_rows, e_rows, "Incorrect row count")
|
||||||
|
|
||||||
|
# Compare each row of the results
|
||||||
|
for i in range(1, a_rows):
|
||||||
|
assert_eq(actual[i], expect[i], f"Row {i} does not match")
|
||||||
|
|
||||||
|
# No output expected (not a SELECT)
|
||||||
|
elif results:
|
||||||
|
raise RuntimeError(f"Results should be empty: {results}")
|
||||||
|
|
||||||
|
|
||||||
|
def split_file(path):
|
||||||
|
"""Split a text file into chunks by query number.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path (str): The path of the text file to split.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list of str: The code or output for each query.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Read the file contents
|
||||||
|
beg = 0
|
||||||
|
chunks = []
|
||||||
|
with open(path) as file:
|
||||||
|
text = file.read()
|
||||||
|
|
||||||
|
# Extract the text before each query
|
||||||
|
pattern = re.compile(r"^-- -+\n-- |^(--)?\n?.*Query #\d+", re.MULTILINE)
|
||||||
|
for match in re.finditer(pattern, text):
|
||||||
|
end = match.start()
|
||||||
|
chunks.append(text[beg:end])
|
||||||
|
beg = end
|
||||||
|
|
||||||
|
# Append the text of the final query
|
||||||
|
chunks.append(text[beg:])
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def main(sql_file, txt_file, g_scope=False):
|
||||||
|
"""Split the given files and execute each query.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sql_file (str): Path to the sql script file.
|
||||||
|
txt_file (str): Path to the expected output.
|
||||||
|
g_scope (bool): True if running on Gradescope.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple or None: queries and outputs (for Gradescope)
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
RuntimeError: If a file doesn't split correctly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Split and validate the given files
|
||||||
|
queries = split_file(sql_file)
|
||||||
|
q_count = sum(1 for s in queries if "Query #" in s)
|
||||||
|
if q_count != QUERIES:
|
||||||
|
raise RuntimeError(f"Expected {QUERIES} queries, but {q_count} were found.")
|
||||||
|
outputs = split_file(txt_file)
|
||||||
|
del outputs[0] # Blank string
|
||||||
|
o_count = sum(1 for s in outputs if "Query #" in s)
|
||||||
|
if o_count != QUERIES:
|
||||||
|
raise RuntimeError(f"Expected {QUERIES} outputs, but {o_count} were found.")
|
||||||
|
|
||||||
|
# Gradescope skips the rest of main()
|
||||||
|
if g_scope:
|
||||||
|
return queries, outputs
|
||||||
|
|
||||||
|
# Execute each chunk of sql in order
|
||||||
|
qno = 0
|
||||||
|
for sql in queries:
|
||||||
|
try:
|
||||||
|
if "Query #" in sql:
|
||||||
|
qno += 1
|
||||||
|
run_query(sql, outputs[qno-1], qno)
|
||||||
|
else:
|
||||||
|
run_query(sql, None, None) # Ex: meta-command
|
||||||
|
except Exception as e:
|
||||||
|
# Assertion or psycopg or Runtime error
|
||||||
|
print(type(e).__name__ + ":", e)
|
||||||
|
print()
|
||||||
|
|
||||||
|
# That's all folks!
|
||||||
|
if qno != QUERIES:
|
||||||
|
print(f"Error: Something went wrong. {qno} of {QUERIES} queries were run.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main("hw3.sql", "hw3-sol.txt")
|
||||||
1139
Intermediate-SQL/hw3-sol.txt
Normal file
1139
Intermediate-SQL/hw3-sol.txt
Normal file
File diff suppressed because it is too large
Load Diff
163
Intermediate-SQL/hw3.sql
Normal file
163
Intermediate-SQL/hw3.sql
Normal file
@@ -0,0 +1,163 @@
|
|||||||
|
--
|
||||||
|
-- Name: Nicholas Tamassia
|
||||||
|
--
|
||||||
|
-- Write your queries below each comment. Please use good style (one clause
|
||||||
|
-- per line, JOIN syntax, indentation) and make sure all queries end with a
|
||||||
|
-- semicolon. When necessary, limit the output to the first 200 results.
|
||||||
|
--
|
||||||
|
-- DO NOT MODIFY OR DELETE ANY OTHER LINES!
|
||||||
|
--
|
||||||
|
|
||||||
|
-- -----------------------------------------------------------------------------
|
||||||
|
-- Connect to tpch database
|
||||||
|
\c tpch
|
||||||
|
-- -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
--
|
||||||
|
\echo
|
||||||
|
\echo Query #1
|
||||||
|
--
|
||||||
|
-- Show the customer name, order date, and line items for order number 3.
|
||||||
|
--
|
||||||
|
-- Schema: c_name, o_orderdate, l_partkey, l_suppkey, l_quantity, l_extendedPrice
|
||||||
|
-- Order: l_linenumber
|
||||||
|
|
||||||
|
SELECT c_name, o_orderdate, l_partkey, l_suppkey, l_quantity, l_extendedPrice
|
||||||
|
FROM customer
|
||||||
|
JOIN orders ON c_custkey = o_custkey
|
||||||
|
JOIN lineitem ON o_orderkey = l_orderkey
|
||||||
|
WHERE o_orderkey = 3
|
||||||
|
ORDER BY l_linenumber
|
||||||
|
LIMIT 200;
|
||||||
|
|
||||||
|
--
|
||||||
|
\echo
|
||||||
|
\echo Query #2
|
||||||
|
--
|
||||||
|
-- Show the part name, supply cost, and retail price of each line item.
|
||||||
|
--
|
||||||
|
-- Schema: l_orderkey, l_partkey, l_suppkey, p_name, ps_supplycost, p_retailprice
|
||||||
|
-- Order: l_orderkey, l_linenumber
|
||||||
|
|
||||||
|
SELECT l_orderkey, l_partkey, l_suppkey, p_name, ps_supplycost, p_retailprice
|
||||||
|
FROM lineitem
|
||||||
|
JOIN partsupp ON (l_partkey = ps_partkey AND l_suppkey = ps_suppkey)
|
||||||
|
JOIN part ON ps_partkey = p_partkey
|
||||||
|
ORDER BY l_orderkey, l_linenumber
|
||||||
|
LIMIT 200;
|
||||||
|
|
||||||
|
--
|
||||||
|
\echo
|
||||||
|
\echo Query #3
|
||||||
|
--
|
||||||
|
-- Which customers with an account balance over 5000 have no orders?
|
||||||
|
--
|
||||||
|
-- Schema: c_name, c_acctbal
|
||||||
|
-- Order: c_custkey
|
||||||
|
|
||||||
|
SELECT c_name, c_acctbal
|
||||||
|
FROM customer
|
||||||
|
LEFT JOIN orders ON c_custkey = o_custkey
|
||||||
|
WHERE c_acctbal > 5000 AND o_orderkey is NULL
|
||||||
|
ORDER BY c_custkey
|
||||||
|
LIMIT 200;
|
||||||
|
|
||||||
|
--
|
||||||
|
\echo
|
||||||
|
\echo Query #4
|
||||||
|
--
|
||||||
|
-- Which urgent priority orders have only one line item?
|
||||||
|
--
|
||||||
|
-- Schema: o_orderkey, o_custkey, o_orderstatus
|
||||||
|
-- Order: o_orderkey
|
||||||
|
|
||||||
|
SELECT o_orderkey, o_custkey, o_orderstatus
|
||||||
|
FROM orders
|
||||||
|
JOIN lineitem ON o_orderkey = l_orderkey
|
||||||
|
WHERE o_orderpriority = '1-URGENT'
|
||||||
|
GROUP BY o_orderkey
|
||||||
|
HAVING COUNT(l_orderkey) = 1
|
||||||
|
ORDER BY o_orderkey
|
||||||
|
LIMIT 200;
|
||||||
|
|
||||||
|
--
|
||||||
|
\echo
|
||||||
|
\echo Query #5
|
||||||
|
--
|
||||||
|
-- What parts containing the word "chocolate" were ordered on or after
|
||||||
|
-- August 1, 1998 and shipped by mail?
|
||||||
|
--
|
||||||
|
-- Schema: p_name, ps_supplycost, l_quantity
|
||||||
|
-- Order: p_partkey
|
||||||
|
|
||||||
|
SELECT p_name, ps_supplycost, l_quantity
|
||||||
|
FROM part
|
||||||
|
JOIN partsupp ON p_partkey = ps_partkey
|
||||||
|
JOIN lineitem ON (ps_partkey = l_partkey AND ps_suppkey = l_suppkey)
|
||||||
|
JOIN orders ON l_orderkey = o_orderkey
|
||||||
|
WHERE p_name LIKE '%chocolate%' AND o_orderdate >= '1998-08-01' AND l_shipmode = 'MAIL'
|
||||||
|
ORDER BY p_partkey
|
||||||
|
LIMIT 200;
|
||||||
|
|
||||||
|
--
|
||||||
|
\echo
|
||||||
|
\echo Query #6
|
||||||
|
--
|
||||||
|
-- Get the min, max, and average supply cost of each part. The average supply cost
|
||||||
|
-- must be rounded to 2 decimal places.
|
||||||
|
--
|
||||||
|
-- Schema: p_name, p_retailprice, min_supplycost, max_supplycost, avg_supplycost
|
||||||
|
-- Order: p_partkey
|
||||||
|
|
||||||
|
SELECT p_name, p_retailprice, MIN(ps_supplycost) AS "min_supplycost", MAX(ps_supplycost) AS "max_supplycost", ROUND(AVG(ps_supplycost), 2) as "avg_supplycost"
|
||||||
|
FROM part
|
||||||
|
JOIN partsupp on p_partkey = ps_partkey
|
||||||
|
GROUP BY p_partkey
|
||||||
|
ORDER BY p_partkey
|
||||||
|
LIMIT 200;
|
||||||
|
|
||||||
|
|
||||||
|
--
|
||||||
|
\echo
|
||||||
|
\echo Query #7
|
||||||
|
--
|
||||||
|
-- What suppliers have an inventory (total available quantity) of over 125,000
|
||||||
|
-- parts with size over 40.
|
||||||
|
--
|
||||||
|
-- Schema: r_name, n_name, s_name, total_qty
|
||||||
|
-- Order: r_name, n_name, s_name
|
||||||
|
|
||||||
|
SELECT r_name, n_name, s_name, SUM(ps_availqty) AS "total_qty"
|
||||||
|
FROM supplier
|
||||||
|
JOIN partsupp ON s_suppkey = ps_suppkey
|
||||||
|
JOIN part ON ps_partkey = p_partkey
|
||||||
|
JOIN nation ON s_nationkey = n_nationkey
|
||||||
|
JOIN region ON n_regionkey = r_regionkey
|
||||||
|
WHERE p_size > 40
|
||||||
|
GROUP BY s_name, n_name, r_name
|
||||||
|
HAVING SUM(ps_availqty) > 125000
|
||||||
|
ORDER BY r_name, n_name, s_name
|
||||||
|
LIMIT 200;
|
||||||
|
|
||||||
|
--
|
||||||
|
\echo
|
||||||
|
\echo Query #8
|
||||||
|
--
|
||||||
|
-- Find orders with a total profit of over $375,000. The profit of a single
|
||||||
|
-- lineitem is defined as quantity * (retail price - supply cost). The profit
|
||||||
|
-- of an order is the sum of its lineitem profits. For each order, show also
|
||||||
|
-- the number of lineitems, the total quantity of items (rounded to 0 decimal
|
||||||
|
-- places), and the total profit of the order (rounded to 2 decimal places).
|
||||||
|
--
|
||||||
|
-- Schema: l_orderkey, num_lineitems, total_quantity, total_profit
|
||||||
|
-- Order: total_profit (descending), total_quantity
|
||||||
|
|
||||||
|
SELECT l_orderkey, COUNT(l_quantity) AS "num_lineitems", ROUND(SUM(l_quantity)) AS "total_quantity", ROUND(SUM(l_quantity * (p_retailprice - ps_supplycost)), 2) AS "total_profit"
|
||||||
|
FROM orders
|
||||||
|
JOIN lineitem ON o_orderkey = l_orderkey
|
||||||
|
JOIN partsupp ON (l_partkey = ps_partkey AND l_suppkey = ps_suppkey)
|
||||||
|
JOIN part ON ps_partkey = p_partkey
|
||||||
|
GROUP BY l_orderkey
|
||||||
|
HAVING SUM(l_quantity * (p_retailprice - ps_supplycost)) > 375000
|
||||||
|
ORDER BY "total_profit" DESC, "total_quantity"
|
||||||
|
LIMIT 200;
|
||||||
Reference in New Issue
Block a user