Compare commits

..

4 Commits

Author SHA1 Message Date
7010ede218 Updated Project 2025-12-07 16:53:52 -05:00
76cd999a01 Completed Neo4j HW 2025-12-07 16:53:29 -05:00
0542ac241c Complete MongoDB HW 2025-12-07 16:53:02 -05:00
7f780c9e71 Complete HW Advanced-SQL 2025-12-07 16:52:49 -05:00
7 changed files with 1841 additions and 1 deletions

1216
Advanced-SQL/hw5-sol.txt Normal file

File diff suppressed because it is too large Load Diff

235
Advanced-SQL/hw5.py Normal file
View File

@@ -0,0 +1,235 @@
"""Autograder for Postgres queries assignment.
Author: CS374 Faculty
Version: 11/13/2025
"""
import difflib
import psycopg
import re
import socket
# The expected number of queries on this assignment
QUERIES = 8
# Determine whether connecting from on/off campus
try:
socket.gethostbyname("data.cs.jmu.edu")
HOST = "data.cs.jmu.edu"
except socket.gaierror:
HOST = "localhost"
def connect(dbname):
"""Connect to the database and create a cursor.
Args:
dbname: The name of the database to use.
"""
global con, cur
con = psycopg.connect(host=HOST, user="demo", password="demo", dbname=dbname)
cur = con.cursor()
def assert_eq(actual, expect, message=""):
"""Assert whether two values are equal (custom feedback).
Args:
actual: The value produced by the code being tested.
expect: The expected value to compare with `actual`.
message: Text to display if AssertionError is raised.
Raises:
AssertionError: If `actual` is not equal to `expect`,
with a message displaying both values.
"""
if type(actual) is str:
# Abbreviate output if too long
a_str = actual if len(actual) <= 120 else actual[:120] + "..."
e_str = expect if len(expect) <= 120 else expect[:120] + "..."
else:
# Convert to simple strings
a_str = str(actual)
e_str = str(expect)
assert actual == expect, f"{message}\n Actual: {a_str}\n Expect: {e_str}"
def res2str(res):
"""Convert query results into a multiline string.
Args:
res (list of tuples): Results obtained from fetchall().
Returns:
str: Each line is one row with values separated by tabs.
"""
return "\n".join(
["\t".join(map(lambda x: "" if x is None else str(x), tup)) for tup in res]
)
def run_query(sql, txt, qno):
"""Run the query and compare with expected output.
Args:
sql (str): The sql chunk from the HW file.
txt (str): The expected output of the sql.
qno (int): The query number being tested.
Raises:
RuntimeError: If incorrect number of queries.
"""
# Print status message for autograder feedback
if qno:
print(f"Running Query #{qno}...")
# Reset connection in case of previous error or timeout
con.cancel_safe()
connect(con.info.dbname)
elif "\\c" in sql:
beg = sql.find("\\c")
end = sql.find("\n", beg)
dbname = sql[beg + 3 : end]
print("Connecting to", dbname)
connect(dbname)
return
else:
print("Running header comment")
connect("postgres")
# Execute the chunk, convert results to text
results = []
if "\\echo" in sql:
sql = sql.replace("\\echo", "--\\echo")
beg = sql.find("\\echo")
end = sql.find("\n", beg)
name = sql[beg + 6 : end]
results.append(name)
res = cur.execute(sql)
if res.rowcount > -1:
column_names = [desc[0] for desc in res.description]
schema = "\t".join(column_names)
output = res2str(res.fetchall())
output = output.replace(".0\n", "\n") # integer hack
footer = f"({res.rowcount} rows)\n"
results.append(schema + "\n" + output + "\n" + footer)
# Compare with expected output, if applicable
if txt:
if len(results) == 0:
raise RuntimeError(f"Missing output of \\echo Query #{qno}")
# 1st line blank, 2nd line "Query #"
actual = results[0]
expect = txt.splitlines()
assert_eq(actual, expect[1], "Incorrect query number")
# Check the number of queries run
if len(results) == 1:
raise RuntimeError("No results (code is blank)")
if len(results) > 2:
raise RuntimeError("Extra results (more than one query)")
# Calculate similarity percentage
actual = "\n" + results[0] + "\n" + results[1]
seq = difflib.SequenceMatcher(None, actual, txt)
sim = int(seq.ratio() * 100)
print(f"Output matches {sim}%")
# Compare schema and row count
actual = results[1].rstrip().splitlines()
expect = expect[2:]
assert_eq(actual[0], expect[0], "Incorrect schema")
a_rows = len(actual) - 2
e_rows = len(expect) - 2
assert_eq(a_rows, e_rows, "Incorrect row count")
# Compare each row of the results
for i in range(1, a_rows):
assert_eq(actual[i], expect[i], f"Row {i} does not match")
# No output expected (not a SELECT)
elif results:
raise RuntimeError(f"Results should be empty: {results}")
def split_file(path):
"""Split a text file into chunks by query number.
Args:
path (str): The path of the text file to split.
Returns:
list of str: The code or output for each query.
"""
# Read the file contents
beg = 0
chunks = []
with open(path) as file:
text = file.read()
# Extract the text before each query
pattern = re.compile(r"^-- -+\n-- |^(--)?\n?.*Query #\d+", re.MULTILINE)
for match in re.finditer(pattern, text):
end = match.start()
chunks.append(text[beg:end])
beg = end
# Append the text of the final query
chunks.append(text[beg:])
return chunks
def main(sql_file, txt_file, g_scope=False):
"""Split the given files and execute each query.
Args:
sql_file (str): Path to the sql script file.
txt_file (str): Path to the expected output.
g_scope (bool): True if running on Gradescope.
Returns:
tuple or None: queries and outputs (for Gradescope)
Raises:
RuntimeError: If a file doesn't split correctly.
"""
# Split and validate the given files
queries = split_file(sql_file)
q_count = sum(1 for s in queries if "Query #" in s)
if q_count != QUERIES:
raise RuntimeError(f"Expected {QUERIES} queries, but {q_count} were found.")
outputs = split_file(txt_file)
del outputs[0] # Blank string
o_count = sum(1 for s in outputs if "Query #" in s)
if o_count != QUERIES:
raise RuntimeError(f"Expected {QUERIES} outputs, but {o_count} were found.")
# Gradescope skips the rest of main()
if g_scope:
return queries, outputs
# Execute each chunk of sql in order
qno = 0
for sql in queries:
try:
if "Query #" in sql:
qno += 1
run_query(sql, outputs[qno-1], qno)
else:
run_query(sql, None, None) # Ex: meta-command
except Exception as e:
# Assertion or psycopg or Runtime error
print(type(e).__name__ + ":", e)
print()
# That's all folks!
if qno != QUERIES:
print(f"Error: Something went wrong. {qno} of {QUERIES} queries were run.")
if __name__ == "__main__":
main("hw5.sql", "hw5-sol.txt")

217
Advanced-SQL/hw5.sql Normal file
View File

@@ -0,0 +1,217 @@
--
-- Name: Nicholas Tamassia
--
-- Write your queries below each comment. Please use good style (one clause
-- per line, JOIN syntax, indentation) and make sure all queries end with a
-- semicolon. When necessary, limit the output to the first 200 results.
--
-- DO NOT MODIFY OR DELETE ANY OTHER LINES!
--
-- -----------------------------------------------------------------------------
-- Connect to air database
\c air
-- -----------------------------------------------------------------------------
--
\echo
\echo Query #1
--
-- List all last names in the database that start with the letter M. Use the
-- lower() function to convert all last names to lowercase. Show how many times
-- each last name is used. Note: last names are in more than one table.
--
-- Schema: count, last_name
-- Order: count (descending), last_name
SELECT COUNT(*) AS count, last_name
FROM (
SELECT lower(last_name) AS last_name FROM passenger
UNION ALL
SELECT lower(last_name) FROM account
UNION ALL
SELECT lower(last_name) FROM frequent_flyer
)
WHERE last_name LIKE 'm%'
GROUP BY last_name
ORDER BY count DESC, last_name
LIMIT 200;
--
\echo
\echo Query #2
--
-- Show the passengers who have flown out of Dulles (IAD) but have never flown
-- out of Orlando (MCO).
--
-- Schema: passenger_id, first_name, last_name
-- Order: passenger_id
SELECT passenger_id, first_name, last_name
FROM (
SELECT passenger_id, first_name, last_name
FROM passenger
JOIN booking ON passenger.booking_id = booking.booking_id
JOIN booking_leg ON booking.booking_id = booking_leg.booking_id
JOIN flight ON booking_leg.flight_id = flight.flight_id
WHERE departure_airport = 'IAD'
EXCEPT
SELECT passenger_id, first_name, last_name
FROM passenger
JOIN booking ON passenger.booking_id = booking.booking_id
JOIN booking_leg ON booking.booking_id = booking_leg.booking_id
JOIN flight ON booking_leg.flight_id = flight.flight_id
WHERE departure_airport = 'MCO'
)
ORDER BY passenger_id
LIMIT 200;
--
\echo
\echo Query #3
--
-- Find the passengers who have not been issued a boarding pass, but whose
-- booked itinerary includes a domestic flight from Dhaka (depart airport
-- DAC, arrive in Bangladesh BD).
--
-- Schema: first_name, last_name, iso_country
-- Order: passenger_id
SELECT p.first_name, p.last_name, a.iso_country
FROM passenger AS p
JOIN booking AS b ON p.booking_id = b.booking_id
JOIN booking_leg AS bl ON b.booking_id = bl.booking_id
JOIN flight AS f ON bl.flight_id = f.flight_id
JOIN airport AS a ON f.arrival_airport = a.airport_code
WHERE f.departure_airport = 'DAC'
AND a.iso_country = 'BD'
AND NOT EXISTS (
SELECT 1
FROM boarding_pass
WHERE boarding_pass.passenger_id = p.passenger_id
)
ORDER BY p.passenger_id
LIMIT 200;
--
\echo
\echo Query #4
--
-- For each airport, find the percentage of flights that have departed over 10
-- minutes late. Note that in order to avoid integer division, you must cast at
-- least one of the values to a float. For example: CAST(late_flights AS float)
-- Also, you will need to use the interval data type to represent 10 minutes.
--
-- Schema: departure_airport, percentage (calculated value)
-- Order: departure_airport
SELECT
f.departure_airport,
100.0 * CAST(SUM(
CASE WHEN actual_departure > scheduled_departure + INTERVAL '10 minutes'
THEN 1 ELSE 0 END
) AS float)
/
COUNT(*) AS percentage
FROM flight f
GROUP BY f.departure_airport
ORDER BY f.departure_airport
LIMIT 200;
-- -----------------------------------------------------------------------------
-- Connect to jmudb database
\c jmudb
-- -----------------------------------------------------------------------------
--
\echo
\echo Query #5
--
-- For each subject, how many sections in Fall 2024 were taught with more than
-- 15 students enrolled? Be careful not to count the same section twice!
--
-- Schema: subject, count
-- Order: count (descending), subject
SELECT subject,
COUNT(DISTINCT nbr) AS count
FROM enrollment
WHERE term = 1248
AND enrolled > 15
GROUP BY subject
ORDER BY count DESC, subject
LIMIT 200;
--
\echo
\echo Query #6
--
-- For each instructor, count the total number of students they taught in CS
-- courses over the past three academic years (Summer 2022 to Spring 2025).
--
-- Schema: instructor, students (calculated value)
-- Order: students (descending), instructor
SELECT instructor, SUM(students.enrolled) AS students
FROM (
SELECT DISTINCT instructor, term, nbr, enrolled
FROM enrollment
WHERE subject = 'CS'
AND term BETWEEN 1225 AND 1251
) as students
GROUP BY instructor
ORDER BY students DESC, instructor
LIMIT 200;
--
\echo
\echo Query #7
-- List all sections in Fall 2024 with more that 100 students enrolled, and
-- rank them by course (i.e., the section with the most students is rank #1).
--
-- Schema: subject, number, nbr, enrolled, rank
-- Order: subject, number, nbr
SELECT
subject,
number,
nbr,
enrolled,
RANK() OVER (
PARTITION BY subject, number
ORDER BY enrolled DESC
) AS rank
FROM (
SELECT subject, number, nbr, enrolled
FROM enrollment
WHERE term = 1248 AND enrolled > 100
GROUP BY subject, number, nbr, enrolled
)
ORDER BY subject, number, nbr
LIMIT 200;
--
\echo
\echo Query #8
-- Rank departments in Spring 2025 by their total enrollment, i.e., who has the
-- most students enrolled across all sections. (Hint: two nested subqueries)
--
-- Schema: subject, total, rank
-- Order: subject
SELECT t1.subject,
t1.total,
RANK() OVER (ORDER BY t1.total DESC) AS rank
FROM (
SELECT subject, SUM(enrolled) as total
FROM (
SELECT DISTINCT subject, term, nbr, enrolled
FROM enrollment
WHERE term = 1251
)
GROUP BY subject
) AS t1
ORDER BY subject
LIMIT 200;

93
Mongo/mongo-notes.js Normal file
View File

@@ -0,0 +1,93 @@
/* Nicholas Tamassia */
db.order_lines.find()
{ "_id": ObjectId('692130567e8e9828c389b03d'), "product_order": 1511, "product": 212, "quantity": 2 }
// Find all orders for product 212
db.order_lines.find({ product: 212 });
{ _id: ObjectId('692130567e8e9828c389b03d'), product_order: 1511, product: 212, quantity: 2 }
{ _id: ObjectId('692130567e8e9828c389b055'), product_order: 1538, product: 212, quantity: 15 }
{ _id: ObjectId('692130567e8e9828c389b059'), product_order: 1577, product: 212, quantity: 6 }
// Find all products with less than 20 available quantity
db.products.find({ available_quantity: { $lt: 20 } })
{ _id: 185, name: 'Chateau Petrus, 1975', type: 'red', available_quantity: 5 }
{ _id: 219, name: 'Marques de Caceres, Rioja Crianza, 2010 ', type: 'red', available_quantity: 0 }
{ _id: 265, name: 'Chateau Sociando-Mallet, Haut-Medoc, 1998', type: 'red', available_quantity: 17 }
{ _id: 331, name: 'Chateau La Commanderie, Lalande-de-Pomerol, 1998', type: 'red', available_quantity: 3 }
{ _id: 494, name: 'Veuve-Cliquot, Brut, 2012', type: 'sparkling', available_quantity: 1 }
{ _id: 523, name: 'Chateau Andron Blanquet, Saint Estephe, 1979', type: 'red', available_quantity: 13 }
{ _id: 783, name: "Clos D'Opleeuw, Chardonnay, 2012", type: 'white', available_quantity: 8 }
// Find all suppliers in New York
{ _id: 21, name: 'Deliwines', address: '240, Avenue of the Americas', city: 'New York', status: 20 }
// What rose and white wines have more than 50 quantity available?
db.products.find({ type: { $in: [ 'rose', 'white' ] }, available_quantity: { $gt: 50 } })
{ _id: 119, name: 'Chateau Miraval, Cotes de Provence Rose, 2015', type: 'rose', available_quantity: 126 }
{ _id: 289, name: 'Chateau Saint Estève de Neri, 2015', type: 'rose', available_quantity: 126 }
{ _id: 300, name: 'Chateau des Rontets, Chardonnay, Birbettes', type: 'white', available_quantity: 64 }
{ _id: 632, name: 'Meneghetti, Chardonnay, 2010', type: 'white', available_quantity: 83 }
{ _id: 668, name: 'Gallo Family Vineyards, Grenache, 2014', type: 'rose', available_quantity: 95 }
{ _id: 899, name: 'Trimbach, Riesling, 1989', type: 'white', available_quantity: 142 }
// Find all restaurants in London. Show only the id, name, and rating. Order by rating descending
db.restaurants.find({ location: "London" }, { name: 1, rating: 1 }).sort({ rating: -1 })
{ _id: 55, name: 'Alasia', rating: 'Not yet rated' }
{ _id: 101, name: 'Anokha Indian Bar & Restaurant', rating: 'Not yet rated' }
{ _id: 15, name: 'Aarthi', rating: 6 }
{ _id: 165, name: 'Bamboo Box', rating: 5.5 }
{ _id: 174, name: 'Barbican Tandoori', rating: 5.5 }
{ _id: 23, name: 'Absolute Caribbean', rating: 5 }
{ _id: 24, name: 'Absolute Caribbean', rating: 5 }
{ _id: 31, name: 'Admiral Pizza', rating: 5 }
{ _id: 40, name: 'AK Chicken Food', rating: 5 }
{ _id: 59, name: 'Alfa Pizza & Chicken', rating: 5 }
{ _id: 67, name: 'All Nations Dalston', rating: 5 }
{ _id: 137, name: 'Azeri Cuisine', rating: 5 }
{ _id: 138, name: 'Azka Turkish Meze', rating: 5 }
{ _id: 166, name: 'Bamboo Garden', rating: 5 }
{ _id: 186, name: 'Bedouin Lounge Grill & Mezza Bar', rating: 5 }
{ _id: 199, name: 'Bengal Berties', rating: 5 }
{ _id: 200, name: 'Bengal Brasserie', rating: 5 }
{ _id: 202, name: 'Bengal Lancer', rating: 5 }
{ _id: 1, name: '@ Thai Restaurant', rating: 4.5 }
{ _id: 35, name: 'Ai Sushi', rating: 4.5 }
// Find people who have given a rating of 5 to any restaurant. Show only the name and restaurant ID. Order by name ascending
db.restaurants.find({ rating: 5 }, { name: 1 }).sort({ name: 1 })
{ _id: 3, name: '23rd Street Pizza' }
{ _id: 4, name: '23rd Street Pizza' }
{ _id: 5, name: '333 Chinese Takeaway' }
{ _id: 6, name: '4 Seasons Pizza & Grill' }
{ _id: 7, name: '5 Star Pizza' }
{ _id: 8, name: '5 Star Pizza' }
{ _id: 9, name: '62 Worksop Fish Bar' }
{ _id: 11, name: '9 Inch CFC' }
{ _id: 40, name: 'AK Chicken Food' }
{ _id: 41, name: 'AK Grill' }
{ _id: 16, name: "Aayan's" }
{ _id: 19, name: 'Abdul Spice' }
{ _id: 21, name: 'Abidap Connection' }
{ _id: 22, name: 'Abo Ali - Lebanese Cuisine' }
{ _id: 23, name: 'Absolute Caribbean' }
{ _id: 24, name: 'Absolute Caribbean' }
{ _id: 27, name: "Adam's Pizzeria" }
{ _id: 26, name: 'Adams Pizza Corner' }
{ _id: 28, name: 'Adana Pizza' }
{ _id: 29, name: 'Adeel Balti & Pizza Bar' }
// Find all restaurants containing "Thai" in the type of food. Hint: Use { $regex: "Thai", $options: "i" }. Return the name and address of the restaurant
db.restaurants.find({ type_of_food: { $regex: "Thai", $options: "i" } }, { name: 1, address: 1, _id: 0 })
{ address: '30 Greyhound Road Hammersmith', name: '@ Thai Restaurant' }
{ address: '235-241 High Street', name: "Anna's Thai Restaurant" }
{ address: '235-241 High Street', name: "Anna's Thai Restaurant" }
{ address: '21 Market Road', name: 'Asian Box' }
{ address: '18 Fortess Road', name: 'Baan Thai' }
{ address: 'Unit 23 55-59 Weir Road', name: 'Bamboo Baboom' }
{ address: 'Unit 23 55-59 Weir Road', name: 'Bamboo Baboom' }
{ address: '194 Shoreditch High Street', name: 'Bamboo Box' }
{ address: '1 Ecclesall Road', name: 'Ban Thai' }
{ address: '55 Southgate Elland', name: 'Bang Thai Dee' }
{ address: '21 Rose Street', name: 'Bhan Thai' }

78
Neo4j/neo4j-notes.md Normal file
View File

@@ -0,0 +1,78 @@
# Writing Queries for Neo4j
## Part 1
Name: Nicholas Tamassia
## Part 2
NEO4J_URI=bolt://44.202.200.48 NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=towers-possession-electrician NEO4J_DATABASE=neo4j
## Part 3
**Find movies released after 2000**
```cypher
MATCH (m:Movie)
WHERE m.released > 2000
RETURN m.title, m.released
ORDER BY m.released DESC;
```
**Top 5 most common Actors**
```cypher
MATCH (p:Person)-[:ACTED_IN]->(m)
RETURN p.name AS actor, COUNT(m) AS movies
ORDER BY movies DESC
LIMIT 5;
```
**All movies Tom Hanks was in**
```cypher
MATCH (p:Person {name: "Tom Hanks"})-[:ACTED_IN]->(m:Movie)
RETURN m.title, m.released;
```
**Movies directed by Lana Wachowski**
```cypher
MATCH (d:Person {name: "Lana Wachowski"})-[:DIRECTED]->(m)
RETURN m.title, m.released;
```
**People who have acted together**
```cypher
MATCH (p1:Person)-[:ACTED_IN]->(m:Movie)<-[:ACTED_IN]-(p2:Person)
WHERE p1 <> p2
RETURN DISTINCT p1.name, p2.name, m.title
ORDER BY m.title;
```
**Actor who have worked with Tom Hanks but not each other**
```cypher
MATCH (tom:Person {name: "Tom Hanks"})-[:ACTED_IN]->(m)<-[:ACTED_IN]-(coactor)
RETURN DISTINCT coactor.name;
```
**Average release year to Tom Cruise movies**
```cypher
MATCH (p:Person {name:"Tom Cruise"})-[:ACTED_IN]->(m)
RETURN p.name, avg(m.released) AS avgReleaseYear;
```
**The shortest Actor/Director path between Tom Hanks and Keanu Reeves**
```cypher
MATCH path = shortestPath(
(a:Person {name:"Keanu Reeves"})-
[:ACTED_IN|DIRECTED*]-
(b:Person {name:"Tom Hanks"})
)
RETURN path;
```

Submodule Project updated: 3f5702f8b5...df9167b4ec

View File

@@ -19,6 +19,7 @@
psycopg==3.2.11 psycopg==3.2.11
psycopg-binary==3.2.11 psycopg-binary==3.2.11
Flask-AppBuilder==5.0.1 Flask-AppBuilder==5.0.1
Markdown==3.10
''; '';
}; };
}; };