Complete HW Advanced-SQL
This commit is contained in:
1216
Advanced-SQL/hw5-sol.txt
Normal file
1216
Advanced-SQL/hw5-sol.txt
Normal file
File diff suppressed because it is too large
Load Diff
235
Advanced-SQL/hw5.py
Normal file
235
Advanced-SQL/hw5.py
Normal file
@@ -0,0 +1,235 @@
|
||||
"""Autograder for Postgres queries assignment.
|
||||
|
||||
Author: CS374 Faculty
|
||||
Version: 11/13/2025
|
||||
"""
|
||||
|
||||
import difflib
|
||||
import psycopg
|
||||
import re
|
||||
import socket
|
||||
|
||||
# The expected number of queries on this assignment
|
||||
QUERIES = 8
|
||||
|
||||
# Determine whether connecting from on/off campus
|
||||
try:
|
||||
socket.gethostbyname("data.cs.jmu.edu")
|
||||
HOST = "data.cs.jmu.edu"
|
||||
except socket.gaierror:
|
||||
HOST = "localhost"
|
||||
|
||||
|
||||
def connect(dbname):
|
||||
"""Connect to the database and create a cursor.
|
||||
|
||||
Args:
|
||||
dbname: The name of the database to use.
|
||||
"""
|
||||
global con, cur
|
||||
con = psycopg.connect(host=HOST, user="demo", password="demo", dbname=dbname)
|
||||
cur = con.cursor()
|
||||
|
||||
|
||||
def assert_eq(actual, expect, message=""):
|
||||
"""Assert whether two values are equal (custom feedback).
|
||||
|
||||
Args:
|
||||
actual: The value produced by the code being tested.
|
||||
expect: The expected value to compare with `actual`.
|
||||
message: Text to display if AssertionError is raised.
|
||||
|
||||
Raises:
|
||||
AssertionError: If `actual` is not equal to `expect`,
|
||||
with a message displaying both values.
|
||||
"""
|
||||
if type(actual) is str:
|
||||
# Abbreviate output if too long
|
||||
a_str = actual if len(actual) <= 120 else actual[:120] + "..."
|
||||
e_str = expect if len(expect) <= 120 else expect[:120] + "..."
|
||||
else:
|
||||
# Convert to simple strings
|
||||
a_str = str(actual)
|
||||
e_str = str(expect)
|
||||
|
||||
assert actual == expect, f"{message}\n Actual: {a_str}\n Expect: {e_str}"
|
||||
|
||||
|
||||
def res2str(res):
|
||||
"""Convert query results into a multiline string.
|
||||
|
||||
Args:
|
||||
res (list of tuples): Results obtained from fetchall().
|
||||
|
||||
Returns:
|
||||
str: Each line is one row with values separated by tabs.
|
||||
"""
|
||||
return "\n".join(
|
||||
["\t".join(map(lambda x: "" if x is None else str(x), tup)) for tup in res]
|
||||
)
|
||||
|
||||
|
||||
def run_query(sql, txt, qno):
|
||||
"""Run the query and compare with expected output.
|
||||
|
||||
Args:
|
||||
sql (str): The sql chunk from the HW file.
|
||||
txt (str): The expected output of the sql.
|
||||
qno (int): The query number being tested.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If incorrect number of queries.
|
||||
"""
|
||||
|
||||
# Print status message for autograder feedback
|
||||
if qno:
|
||||
print(f"Running Query #{qno}...")
|
||||
# Reset connection in case of previous error or timeout
|
||||
con.cancel_safe()
|
||||
connect(con.info.dbname)
|
||||
elif "\\c" in sql:
|
||||
beg = sql.find("\\c")
|
||||
end = sql.find("\n", beg)
|
||||
dbname = sql[beg + 3 : end]
|
||||
print("Connecting to", dbname)
|
||||
connect(dbname)
|
||||
return
|
||||
else:
|
||||
print("Running header comment")
|
||||
connect("postgres")
|
||||
|
||||
# Execute the chunk, convert results to text
|
||||
results = []
|
||||
if "\\echo" in sql:
|
||||
sql = sql.replace("\\echo", "--\\echo")
|
||||
beg = sql.find("\\echo")
|
||||
end = sql.find("\n", beg)
|
||||
name = sql[beg + 6 : end]
|
||||
results.append(name)
|
||||
res = cur.execute(sql)
|
||||
if res.rowcount > -1:
|
||||
column_names = [desc[0] for desc in res.description]
|
||||
schema = "\t".join(column_names)
|
||||
output = res2str(res.fetchall())
|
||||
output = output.replace(".0\n", "\n") # integer hack
|
||||
footer = f"({res.rowcount} rows)\n"
|
||||
results.append(schema + "\n" + output + "\n" + footer)
|
||||
|
||||
# Compare with expected output, if applicable
|
||||
if txt:
|
||||
if len(results) == 0:
|
||||
raise RuntimeError(f"Missing output of \\echo Query #{qno}")
|
||||
|
||||
# 1st line blank, 2nd line "Query #"
|
||||
actual = results[0]
|
||||
expect = txt.splitlines()
|
||||
assert_eq(actual, expect[1], "Incorrect query number")
|
||||
|
||||
# Check the number of queries run
|
||||
if len(results) == 1:
|
||||
raise RuntimeError("No results (code is blank)")
|
||||
if len(results) > 2:
|
||||
raise RuntimeError("Extra results (more than one query)")
|
||||
|
||||
# Calculate similarity percentage
|
||||
actual = "\n" + results[0] + "\n" + results[1]
|
||||
seq = difflib.SequenceMatcher(None, actual, txt)
|
||||
sim = int(seq.ratio() * 100)
|
||||
print(f"Output matches {sim}%")
|
||||
|
||||
# Compare schema and row count
|
||||
actual = results[1].rstrip().splitlines()
|
||||
expect = expect[2:]
|
||||
assert_eq(actual[0], expect[0], "Incorrect schema")
|
||||
a_rows = len(actual) - 2
|
||||
e_rows = len(expect) - 2
|
||||
assert_eq(a_rows, e_rows, "Incorrect row count")
|
||||
|
||||
# Compare each row of the results
|
||||
for i in range(1, a_rows):
|
||||
assert_eq(actual[i], expect[i], f"Row {i} does not match")
|
||||
|
||||
# No output expected (not a SELECT)
|
||||
elif results:
|
||||
raise RuntimeError(f"Results should be empty: {results}")
|
||||
|
||||
|
||||
def split_file(path):
|
||||
"""Split a text file into chunks by query number.
|
||||
|
||||
Args:
|
||||
path (str): The path of the text file to split.
|
||||
|
||||
Returns:
|
||||
list of str: The code or output for each query.
|
||||
"""
|
||||
|
||||
# Read the file contents
|
||||
beg = 0
|
||||
chunks = []
|
||||
with open(path) as file:
|
||||
text = file.read()
|
||||
|
||||
# Extract the text before each query
|
||||
pattern = re.compile(r"^-- -+\n-- |^(--)?\n?.*Query #\d+", re.MULTILINE)
|
||||
for match in re.finditer(pattern, text):
|
||||
end = match.start()
|
||||
chunks.append(text[beg:end])
|
||||
beg = end
|
||||
|
||||
# Append the text of the final query
|
||||
chunks.append(text[beg:])
|
||||
return chunks
|
||||
|
||||
|
||||
def main(sql_file, txt_file, g_scope=False):
|
||||
"""Split the given files and execute each query.
|
||||
|
||||
Args:
|
||||
sql_file (str): Path to the sql script file.
|
||||
txt_file (str): Path to the expected output.
|
||||
g_scope (bool): True if running on Gradescope.
|
||||
|
||||
Returns:
|
||||
tuple or None: queries and outputs (for Gradescope)
|
||||
|
||||
Raises:
|
||||
RuntimeError: If a file doesn't split correctly.
|
||||
"""
|
||||
|
||||
# Split and validate the given files
|
||||
queries = split_file(sql_file)
|
||||
q_count = sum(1 for s in queries if "Query #" in s)
|
||||
if q_count != QUERIES:
|
||||
raise RuntimeError(f"Expected {QUERIES} queries, but {q_count} were found.")
|
||||
outputs = split_file(txt_file)
|
||||
del outputs[0] # Blank string
|
||||
o_count = sum(1 for s in outputs if "Query #" in s)
|
||||
if o_count != QUERIES:
|
||||
raise RuntimeError(f"Expected {QUERIES} outputs, but {o_count} were found.")
|
||||
|
||||
# Gradescope skips the rest of main()
|
||||
if g_scope:
|
||||
return queries, outputs
|
||||
|
||||
# Execute each chunk of sql in order
|
||||
qno = 0
|
||||
for sql in queries:
|
||||
try:
|
||||
if "Query #" in sql:
|
||||
qno += 1
|
||||
run_query(sql, outputs[qno-1], qno)
|
||||
else:
|
||||
run_query(sql, None, None) # Ex: meta-command
|
||||
except Exception as e:
|
||||
# Assertion or psycopg or Runtime error
|
||||
print(type(e).__name__ + ":", e)
|
||||
print()
|
||||
|
||||
# That's all folks!
|
||||
if qno != QUERIES:
|
||||
print(f"Error: Something went wrong. {qno} of {QUERIES} queries were run.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main("hw5.sql", "hw5-sol.txt")
|
||||
217
Advanced-SQL/hw5.sql
Normal file
217
Advanced-SQL/hw5.sql
Normal file
@@ -0,0 +1,217 @@
|
||||
--
|
||||
-- Name: Nicholas Tamassia
|
||||
--
|
||||
-- Write your queries below each comment. Please use good style (one clause
|
||||
-- per line, JOIN syntax, indentation) and make sure all queries end with a
|
||||
-- semicolon. When necessary, limit the output to the first 200 results.
|
||||
--
|
||||
-- DO NOT MODIFY OR DELETE ANY OTHER LINES!
|
||||
--
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- Connect to air database
|
||||
\c air
|
||||
-- -----------------------------------------------------------------------------
|
||||
|
||||
--
|
||||
\echo
|
||||
\echo Query #1
|
||||
--
|
||||
-- List all last names in the database that start with the letter M. Use the
|
||||
-- lower() function to convert all last names to lowercase. Show how many times
|
||||
-- each last name is used. Note: last names are in more than one table.
|
||||
--
|
||||
-- Schema: count, last_name
|
||||
-- Order: count (descending), last_name
|
||||
|
||||
SELECT COUNT(*) AS count, last_name
|
||||
FROM (
|
||||
SELECT lower(last_name) AS last_name FROM passenger
|
||||
UNION ALL
|
||||
SELECT lower(last_name) FROM account
|
||||
UNION ALL
|
||||
SELECT lower(last_name) FROM frequent_flyer
|
||||
)
|
||||
WHERE last_name LIKE 'm%'
|
||||
GROUP BY last_name
|
||||
ORDER BY count DESC, last_name
|
||||
LIMIT 200;
|
||||
|
||||
--
|
||||
\echo
|
||||
\echo Query #2
|
||||
--
|
||||
-- Show the passengers who have flown out of Dulles (IAD) but have never flown
|
||||
-- out of Orlando (MCO).
|
||||
--
|
||||
-- Schema: passenger_id, first_name, last_name
|
||||
-- Order: passenger_id
|
||||
|
||||
SELECT passenger_id, first_name, last_name
|
||||
FROM (
|
||||
SELECT passenger_id, first_name, last_name
|
||||
FROM passenger
|
||||
JOIN booking ON passenger.booking_id = booking.booking_id
|
||||
JOIN booking_leg ON booking.booking_id = booking_leg.booking_id
|
||||
JOIN flight ON booking_leg.flight_id = flight.flight_id
|
||||
WHERE departure_airport = 'IAD'
|
||||
EXCEPT
|
||||
SELECT passenger_id, first_name, last_name
|
||||
FROM passenger
|
||||
JOIN booking ON passenger.booking_id = booking.booking_id
|
||||
JOIN booking_leg ON booking.booking_id = booking_leg.booking_id
|
||||
JOIN flight ON booking_leg.flight_id = flight.flight_id
|
||||
WHERE departure_airport = 'MCO'
|
||||
)
|
||||
ORDER BY passenger_id
|
||||
LIMIT 200;
|
||||
|
||||
--
|
||||
\echo
|
||||
\echo Query #3
|
||||
--
|
||||
-- Find the passengers who have not been issued a boarding pass, but whose
|
||||
-- booked itinerary includes a domestic flight from Dhaka (depart airport
|
||||
-- DAC, arrive in Bangladesh BD).
|
||||
--
|
||||
-- Schema: first_name, last_name, iso_country
|
||||
-- Order: passenger_id
|
||||
|
||||
SELECT p.first_name, p.last_name, a.iso_country
|
||||
FROM passenger AS p
|
||||
JOIN booking AS b ON p.booking_id = b.booking_id
|
||||
JOIN booking_leg AS bl ON b.booking_id = bl.booking_id
|
||||
JOIN flight AS f ON bl.flight_id = f.flight_id
|
||||
JOIN airport AS a ON f.arrival_airport = a.airport_code
|
||||
WHERE f.departure_airport = 'DAC'
|
||||
AND a.iso_country = 'BD'
|
||||
AND NOT EXISTS (
|
||||
SELECT 1
|
||||
FROM boarding_pass
|
||||
WHERE boarding_pass.passenger_id = p.passenger_id
|
||||
)
|
||||
ORDER BY p.passenger_id
|
||||
LIMIT 200;
|
||||
|
||||
--
|
||||
\echo
|
||||
\echo Query #4
|
||||
--
|
||||
-- For each airport, find the percentage of flights that have departed over 10
|
||||
-- minutes late. Note that in order to avoid integer division, you must cast at
|
||||
-- least one of the values to a float. For example: CAST(late_flights AS float)
|
||||
-- Also, you will need to use the interval data type to represent 10 minutes.
|
||||
--
|
||||
-- Schema: departure_airport, percentage (calculated value)
|
||||
-- Order: departure_airport
|
||||
|
||||
SELECT
|
||||
f.departure_airport,
|
||||
100.0 * CAST(SUM(
|
||||
CASE WHEN actual_departure > scheduled_departure + INTERVAL '10 minutes'
|
||||
THEN 1 ELSE 0 END
|
||||
) AS float)
|
||||
/
|
||||
COUNT(*) AS percentage
|
||||
FROM flight f
|
||||
GROUP BY f.departure_airport
|
||||
ORDER BY f.departure_airport
|
||||
LIMIT 200;
|
||||
|
||||
|
||||
-- -----------------------------------------------------------------------------
|
||||
-- Connect to jmudb database
|
||||
\c jmudb
|
||||
-- -----------------------------------------------------------------------------
|
||||
|
||||
--
|
||||
\echo
|
||||
\echo Query #5
|
||||
--
|
||||
-- For each subject, how many sections in Fall 2024 were taught with more than
|
||||
-- 15 students enrolled? Be careful not to count the same section twice!
|
||||
--
|
||||
-- Schema: subject, count
|
||||
-- Order: count (descending), subject
|
||||
|
||||
SELECT subject,
|
||||
COUNT(DISTINCT nbr) AS count
|
||||
FROM enrollment
|
||||
WHERE term = 1248
|
||||
AND enrolled > 15
|
||||
GROUP BY subject
|
||||
ORDER BY count DESC, subject
|
||||
LIMIT 200;
|
||||
|
||||
--
|
||||
\echo
|
||||
\echo Query #6
|
||||
--
|
||||
-- For each instructor, count the total number of students they taught in CS
|
||||
-- courses over the past three academic years (Summer 2022 to Spring 2025).
|
||||
--
|
||||
-- Schema: instructor, students (calculated value)
|
||||
-- Order: students (descending), instructor
|
||||
|
||||
SELECT instructor, SUM(students.enrolled) AS students
|
||||
FROM (
|
||||
SELECT DISTINCT instructor, term, nbr, enrolled
|
||||
FROM enrollment
|
||||
WHERE subject = 'CS'
|
||||
AND term BETWEEN 1225 AND 1251
|
||||
) as students
|
||||
GROUP BY instructor
|
||||
ORDER BY students DESC, instructor
|
||||
LIMIT 200;
|
||||
|
||||
--
|
||||
\echo
|
||||
\echo Query #7
|
||||
-- List all sections in Fall 2024 with more that 100 students enrolled, and
|
||||
-- rank them by course (i.e., the section with the most students is rank #1).
|
||||
--
|
||||
-- Schema: subject, number, nbr, enrolled, rank
|
||||
-- Order: subject, number, nbr
|
||||
|
||||
SELECT
|
||||
subject,
|
||||
number,
|
||||
nbr,
|
||||
enrolled,
|
||||
RANK() OVER (
|
||||
PARTITION BY subject, number
|
||||
ORDER BY enrolled DESC
|
||||
) AS rank
|
||||
FROM (
|
||||
SELECT subject, number, nbr, enrolled
|
||||
FROM enrollment
|
||||
WHERE term = 1248 AND enrolled > 100
|
||||
GROUP BY subject, number, nbr, enrolled
|
||||
)
|
||||
ORDER BY subject, number, nbr
|
||||
LIMIT 200;
|
||||
|
||||
--
|
||||
\echo
|
||||
\echo Query #8
|
||||
-- Rank departments in Spring 2025 by their total enrollment, i.e., who has the
|
||||
-- most students enrolled across all sections. (Hint: two nested subqueries)
|
||||
--
|
||||
-- Schema: subject, total, rank
|
||||
-- Order: subject
|
||||
|
||||
|
||||
SELECT t1.subject,
|
||||
t1.total,
|
||||
RANK() OVER (ORDER BY t1.total DESC) AS rank
|
||||
FROM (
|
||||
SELECT subject, SUM(enrolled) as total
|
||||
FROM (
|
||||
SELECT DISTINCT subject, term, nbr, enrolled
|
||||
FROM enrollment
|
||||
WHERE term = 1251
|
||||
)
|
||||
GROUP BY subject
|
||||
) AS t1
|
||||
ORDER BY subject
|
||||
LIMIT 200;
|
||||
Reference in New Issue
Block a user