Files
CS374-Database-Systems/Models-and-Data/fakedata.py

190 lines
5.4 KiB
Python
Raw Normal View History

2025-10-21 11:35:21 -04:00
"""Generate fake data for the Conference Review System."""
__author__ = "Nicholas Tamassia"
import random
from faker import Faker
from sqlalchemy import create_engine
from sqlalchemy.orm import Session, sessionmaker
from models import (
Affiliation,
Base,
Conference,
History,
Paper,
PaperAuthor,
Person,
PersonAffiliation,
Review,
Reviewer,
Topic,
t_expertise,
t_paper_topic,
)
DB_URL = "postgresql+psycopg://tamassno:113880616@localhost/sec2"
fake = Faker()
def add_data(session: Session):
affiliations: list[Affiliation] = []
for _ in range(4):
aff = Affiliation(
org_name=fake.company(), website=fake.url(), country=fake.country()
)
affiliations.append(aff)
session.add_all(affiliations)
session.commit()
persons: list[Person] = []
reviewers: list[Reviewer] = []
for _ in range(5):
p = Person(
email=fake.unique.email(),
first_name=fake.first_name(),
last_name=fake.last_name(),
)
persons.append(p)
for _ in range(5):
r = Reviewer(
email=fake.unique.email(),
first_name=fake.first_name(),
last_name=fake.last_name(),
phone=fake.phone_number(),
)
reviewers.append(r)
session.add_all(persons + reviewers)
session.commit()
person_affiliations: list[PersonAffiliation] = []
people_pool = random.sample(persons + reviewers, 8)
for person in people_pool:
aff = random.choice(affiliations)
pa = PersonAffiliation(
email=person.email,
org_name=aff.org_name,
from_date=fake.date_between(start_date="-5y", end_date="-1y"),
to_date=fake.date_between(start_date="-1y", end_date="today"),
)
person_affiliations.append(pa)
session.add_all(person_affiliations)
session.commit()
conference = Conference(year=2025, location=fake.city())
session.add(conference)
session.commit()
topics: list[Topic] = []
for _ in range(20):
t = Topic(topic_name=fake.bs().title())
topics.append(t)
session.add_all(topics)
session.commit()
papers: list[Paper] = []
for _ in range(3):
contact = random.choice(persons + reviewers)
p = Paper(
title=fake.sentence(nb_words=6),
abstract=fake.paragraph(nb_sentences=3),
filename=f"{fake.word()}.pdf",
contact_email=contact.email,
year=conference.year,
)
papers.append(p)
session.add_all(papers)
session.commit()
paper_authors: list[PaperAuthor] = []
for _ in range(8):
paper = random.choice(papers)
author = random.choice(persons + reviewers)
pa = PaperAuthor(
paper_id=paper.paper_id, email=author.email, rank=random.randint(1, 5)
)
paper_authors.append(pa)
session.add_all(paper_authors)
session.commit()
paper_topic_data: list[dict[str, int]] = []
used_pairs: set[tuple[int, int]] = set()
while len(paper_topic_data) < 9:
paper = random.choice(papers)
topic = random.choice(topics)
if (paper.paper_id, topic.topic_id) not in used_pairs:
used_pairs.add((paper.paper_id, topic.topic_id))
paper_topic_data.append(
{"paper_id": paper.paper_id, "topic_id": topic.topic_id}
)
_ = session.execute(t_paper_topic.insert(), paper_topic_data)
session.commit()
expertise_data: list[dict[str, str | int]] = []
used_expertise: set[tuple[str, int]] = set()
while len(expertise_data) < 10:
reviewer = random.choice(reviewers)
topic = random.choice(topics)
if (reviewer.email, topic.topic_id) not in used_expertise:
used_expertise.add((reviewer.email, topic.topic_id))
expertise_data.append({"email": reviewer.email, "topic_id": topic.topic_id})
_ = session.execute(t_expertise.insert(), expertise_data)
session.commit()
reviews: list[Review] = []
posible_reviews = [
(paper.paper_id, reviewer.email) for paper in papers for reviewer in reviewers
]
for _ in range(3):
paper_id, email = random.choice(posible_reviews)
rv = Review(
paper_id=paper_id,
email=email,
merit=random.randint(1, 5),
relevance=random.randint(1, 5),
readability=random.randint(1, 5),
originality=random.randint(1, 5),
author_comments=fake.sentence(),
committee_comments=fake.sentence(),
)
reviews.append(rv)
session.add_all(reviews)
session.commit()
statuses = ["SUBMITTED", "UNDER_REVIEW", "REVISION", "ACCEPTED", "PUBLISHED"]
histories: list[History] = []
for _ in range(5):
paper = random.choice(papers)
h = History(
paper_id=paper.paper_id,
timestamp=fake.date_time_between(start_date="-1y", end_date="now"),
paper_status=random.choice(statuses),
notes=fake.sentence(),
)
histories.append(h)
session.add_all(histories)
session.commit()
def main():
engine = create_engine(DB_URL)
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
add_data(session)
session.close()
if __name__ == "__main__":
main()