Add Assignment 4, Prolog stuff

2024-04-30 13:58:22 -07:00 · 2024-04-30 13:58:22 -07:00 · de50816932
parent 1bf67d7ee3
commit de50816932
5 changed files with 1464 additions and 0 deletions
--- a/Assignment4/Assign4.pl
+++ b/Assignment4/Assign4.pl
@ -0,0 +1,103 @@
+:- style_check(-singleton).
+% DO NOT MODIFY 
+
+:- consult('ReadCSV.pl').
+:- use_module(library(date)).
+:- read_csv_and_store('EmployeeData.csv').
+
+% DO NOT MODIFY
+
+is_seattle_employee(Name) :- person(_, Name, _, _, _, _, _, _, _, _, _, _, 'Seattle',_).
+
+is_senior_manager_in_IT(Name) :- person(_, Name, 'Sr. Manger', 'IT', _, _, _, _, _, _, _, _, _, _).
+
+is_director_finance_miami(Name) :- person(_, Name, 'Director', 'Finance', _, _, _, _, _, _, _, _, 'Miami', _).
+
+is_asian_US_manufacturing_40M(Name, Unit, Gender, Ethnicity, Age) :- person(_, Name, _, _, Unit, Gender, Ethnicity, Age, _, _, _, _, _, _), Age > 40, Unit = 'Manufacturing', Gender = 'Male', Ethnicity = 'Asian'.
+
+% format calls write internally
+greet(EEID) :- person(EEID, Name, Job, Dept, Unit, _, _, _, _, _, _, _, _, _), format('Hello, ~w, ~w, ~w, ~w!~n', [Name, Job, Dept, Unit]).
+
+years_until_retirement(Name, Age, Until) :- person(_, Name, _, _, _, _, _, Age, _, _, _, _, _, _), Until is 65-Age.
+
+is_rd_black_midAge(Name, Unit, Ethnicity, Age) :- person(_, Name, _, _, Unit, _, Ethnicity, Age, _, _, _, _, _, _), Ethnicity = 'Black', Unit = 'Research & Development', Age >= 25, Age =< 50.
+
+is_ITorFin_PHXorMIAorAUS(Name, Dept, City) :- person(_, Name, _, Dept, _, _, _, _, _, _, _, _, City, _),
+	(Dept = 'IT'; Dept = 'Finance'),
+	(City = 'Phoenix'; City = 'Miami'; City = 'Austin').
+
+is_female_senior_role(Name, Title) :- person(_, Name, Title,_,_,'Female',_,_,_,_,_,_,_,_), atom_concat('Sr.', _, Title).
+
+% Helper predicate that converts a salary string to a number
+%                          Remove $.
+convert_salary(In, Out) :- atom_concat('$', NoDollar, In),
+	% Split by commas, and additionally remove trailing whitespace
+	split_string(NoDollar, ',', '\t\s', NoCommaList),
+	% Concatenate the list back into an atom.
+	atomic_list_concat(NoCommaList, SalaryAtom),
+	% Convert the atom into a number.
+	atom_number(SalaryAtom, Out).
+
+is_highly_paid_senior_manager(Name, Salary) :- person(_, Name, 'Sr. Manger', _, _, _, _, _, _, Salary, _, _, _, _),
+	convert_salary(Salary, SalaryN),
+	SalaryN > 120000.
+
+% Divisibility test
+divides(N,X) :- 0 is X mod N.
+
+% prime/2 is helper predicate. 
+% it is a recursive algorithm that tests all the numbers from N up to x,
+% ensuring they all do not divide N.
+% Base case.
+prime(N, N) :- true, !.
+% X should not divide N, and the next number also should not.
+prime(X, N) :- not(divides(X, N)), Next is X+1, prime(Next, N).
+
+% prime/1 calls prime/2.
+prime(N) :- prime(2,N).
+
+is_prime_age(Name, Age) :- person(_, Name, _, _, _, _, _, Age, _, _, _, _, _, _), prime(Age).
+
+% Uses convert_salary.
+total_salary(Name, Salary) :- person(_, Name, _, _, _, _, _, _, _, AnnSal, BonusP, _, _, _),
+	convert_salary(AnnSal, AnnSalN), atom_concat(Bonus, '%', BonusP), atom_number(Bonus, BonusN),
+	Salary is AnnSalN + (AnnSalN * (BonusN / 100)).
+
+takehome_salary(Name, Title, Salary) :- person(_, Name, Title,_,_,_,_,_,_,_,_,_,_,_),
+	% Get the total salary.
+	total_salary(Name, AS),
+	% Semicolon causes backtracking.
+	(
+		% "One of these clauses should be true"
+		AS =< 50000, Tax = 20/100;
+		AS > 50000, AS =< 100000, Tax = 25/100;
+		AS > 100000, AS =< 200000, Tax = 30/100;
+		AS > 200000, Tax = 35/100
+		% Unified value for Tax is then used here.
+	), Salary is AS - (AS * Tax).
+
+total_years(Name, Years) :- person(_,Name,_,_,_,_,_,_,Hire,_,_,_,_,Exit),
+	% parse timestamp to get the year number
+	split_string(Hire, '/', '', [_,_,HireYrS]),
+	atom_number(HireYrS, HireYrShort),
+	% since it's a short number, add 1900 if > 50
+	% but otherwise add 2000
+	(HireYrShort < 50, HireYr is HireYrShort + 2000;
+		HireYr is HireYrShort + 1900),
+	% find "exit" year
+	(Exit = '', get_time(T), stamp_date_time(T, DT, local), date_time_value(year, DT, CurYear), Years is CurYear - HireYr, !;
+		% parse exit year
+		split_string(Exit, '/', '', [_,_,ExitYrS]),
+		atom_number(ExitYrS, ExitYrShort),
+		(ExitYrShort < 50, ExitYr is ExitYrShort + 2000;
+			ExitYr is ExitYrShort + 1900),
+			Years is ExitYr - HireYr, !).
+
+
+title_salary(Title, Salary) :- person(_,_,Title,_,_,_,_,_,_,SalaryS,_,_,_,_), convert_salary(SalaryS, Salary).
+
+% bagof() gets al the possible salaries for a title. Then, i use foldl
+% to sum up all of the salaries, and divide by the number of salaries,
+% which is the formula for the average.
+average_salary(Title, AvgSalary) :- bagof(Salary, title_salary(Title, Salary), SalList), foldl(plus, SalList, 0, Total), length(SalList, N), AvgSalary is Total / N.
+
--- a/Assignment4/EmployeeData.csv
+++ b/Assignment4/EmployeeData.csv
--- a/Assignment4/ReadCSV.pl
+++ b/Assignment4/ReadCSV.pl
@ -0,0 +1,17 @@
+:- use_module(library(csv)).
+
+% Predicate to read data from a CSV file and store it as rules
+read_csv_and_store(Filename) :-
+    csv_read_file(Filename, [_|Rows], []),
+    process_rows(Rows).
+
+% Process each row in the CSV file and store data as rules
+process_rows([]).
+process_rows([Row|Rows]) :-
+    process_row(Row),
+    process_rows(Rows).
+
+% Store data from a row as a rule
+process_row(row(EEID, Name, Job, Department, Unit, Gender, Ethnicity, Age, Hired, Salary, Bonus, Country, City, Exited)) :-
+    assert(person(EEID, Name, Job, Department, Unit, Gender, Ethnicity, Age, Hired, Salary, Bonus, Country, City, Exited)).
+
--- a/Assignment4/report4.pdf
+++ b/Assignment4/report4.pdf
--- a/Assignment4/report4.tex
+++ b/Assignment4/report4.tex
@ -0,0 +1,343 @@
+%! TeX program = lualatex
+\RequirePackage[l2tabu,orthodox]{nag}
+\DocumentMetadata{lang=en-US}
+\documentclass[a4paper]{scrartcl}
+\usepackage{geometry}
+\usepackage{graphicx}
+%\usepackage{tikz}
+%\usepackage{tikz-uml}
+\usepackage{hyperref}
+\usepackage{caption}
+\usepackage{subcaption}
+\usepackage{newfloat}
+\usepackage{fancyvrb}
+\usepackage{minted}[newfloat=true]
+\usepackage{bookmark}
+\usepackage{fontspec}
+\usepackage{microtype}
+
+% Math packages
+%\usepackage{amsmath}
+%\usepackage{mathtools}
+%\usepackage{amsthm}
+%\usepackage{thmtools}
+%\usepackage{lualatex-math}
+%\usepackage[warnings-off={mathtools-colon,mathtools-overbracket},math-style=ISO,bold-style=ISO]{unicode-math}
+
+% Fonts
+\usepackage{newcomputermodern}
+\setmonofont{0xProto}[Scale=MatchLowercase]
+
+\newcommand*{\figref}[2][]{%
+  \hyperref[{fig:#2}]{%
+    Figure~\ref*{fig:#2}%
+    \ifx\\#1\\%
+    \else
+      \,#1%
+    \fi
+  }%
+}
+
+\newcommand*{\lstref}[2][]{%
+  \hyperref[{lst:#2}]{%
+    Listing~\ref*{lst:#2}%
+    \ifx\\#1\\%
+    \else
+      \,#1%
+    \fi
+  }%
+}
+
+
+\setminted{breaklines=true,frame=single,linenos=true}
+\newenvironment{longlisting}{\captionsetup{type=listing}}{}
+\newenvironment{longfigure}{\captionsetup{type=figure}}{}
+\setlength{\belowcaptionskip}{10pt}
+\setlength{\abovecaptionskip}{8pt}
+%\DeclarePairedDelimiter\ceil{\lceil}{\rceil}
+%\DeclarePairedDelimiter\floor{\lfloor}{\rfloor}
+
+%\declaretheorem[within=chapter]{definition}
+%\declaretheorem[sibling=definition]{theorem}
+%\declaretheorem[sibling=definition]{corollary}
+%\declaretheorem[sibling=definition]{principle}
+
+\usepackage{polyglossia}
+\usepackage[backend=biber]{biblatex}
+
+\setdefaultlanguage[variant=american,ordinalmonthday=true]{english}
+
+\day=3
+\month=5
+\year=2024
+
+\title{Logical Programming in Prolog}
+\subtitle{Assignment 4}
+\author{Juan Pablo Zendejas}
+\date{\today}
+
+\begin{document}
+
+\maketitle
+%\listoftheorems[ignoreall,onlynamed={theorem,corollary,principle}]
+%\listoftheorems[ignoreall,onlynamed={definition},title={List of Definitions}]
+%\tableofcontents
+
+In this assignment, I was tasked with creating Prolog predicates that
+query the database of employees given in a CSV file. I created 15 rules
+as specified in the assignment PDF. Overall, this assignment was very
+interesting to me. Prolog is very different from any other programming
+language I've used, even Haskell. Getting used to the eccentricities of
+SWI-Prolog and reading its documentation gave me a lot of insight. In
+addition, my background of formal logic from Discrete Mathematics taken
+here at SDSU gave me a lot of parallels that helped me understand how to
+write code.
+
+I did not receive any external help except from consulting the SWI
+Prolog handbook/documentation found on their website. This documentation
+was very useful to figure out what predicates to use to transform some
+of the data.
+
+\section{CSV File Importing}
+
+First I had to change the \texttt{ReadCSV.pl} file to import all the
+rows from the CSV, ignoring the first title row. The source code is
+displayed in \lstref{readcsv}.
+
+\begin{listing}
+	\inputminted[label={ReadCSV.pl}]{prolog}{ReadCSV.pl}
+	\caption{CSV reader.}
+	\label{lst:readcsv}
+\end{listing}
+
+This part wasn't too bad. I simply had to ignore the first row, which I
+did by using Pattern Matching on the \texttt{csv\_read\_file} predicate to
+ignore the first entry of the list and simply take the tail as the
+\texttt{Rows} variable.
+
+Then, on \texttt{process\_row}, I simply filled in the \texttt{assert}
+call with all the names of the columns.
+
+\section{Writing Rules}
+
+Now, I was tasked with using the imported facts from the CSV and writing
+rules that would be used as queries to the dataset.
+
+\subsection{Rule One}
+Simple rule, displayed in \lstref{seattleempl}. The \texttt{:-} symbol
+is reminiscent of ``yields'' from mathematics. I use the underscore to
+discard the variables from the \texttt{people} predicate that I don't
+need, and just check if a person exists with that name and is from
+Seattle.
+
+\begin{listing}
+	\inputminted[label={is\_seattle\_employee},firstline=10,lastline=10]{prolog}{Assign4.pl}
+	\caption{Rule to determine if an employee is from Seattle.}
+	\label{lst:seattleempl}
+\end{listing}
+
+\subsection{Rule Two}
+Very similar to rule one, displayed in \lstref{seniormangerit}. Now, we
+just have more filters to apply, which is done by replacing some of the
+underscores with concrete values.
+
+\begin{listing}
+	\inputminted[label={is\_senior\_manager\_in\_it},firstline=12,lastline=12]{prolog}{Assign4.pl}
+	\caption{Rule to determine if an employee is a senior manager in IT.}
+	\label{lst:seniormangerit}
+\end{listing}
+
+\subsection{Rule Three}
+Displayed in \lstref{rule3}. Standard like the previous rules.
+
+\begin{listing}
+	\inputminted[label={is\_director\_finance\_miami},firstline=14,lastline=14]{prolog}{Assign4.pl}
+	\caption{Rule to determine if an employee is a Director of Finance
+	in Miami.}
+	\label{lst:rule3}
+\end{listing}
+
+\subsection{Rule Four}
+This rule is a little more complicated. It's displayed in
+\lstref{rule4}. Since this predicate has more arguments, I have them
+directly wired into the \texttt{person} predicate. Then, I simply have
+conditions that specify the values of those predicates. In addition, I
+had to do this for \texttt{Age} because I need to check if it's greater
+than 40.
+
+\begin{listing}
+	\inputminted[label={is\_asian\_US\_manufacturing\_40M},firstline=16,lastline=16]{prolog}{Assign4.pl}
+	\caption{Rule to determine if an employee is a 40 or older Asian
+	male working in manufacturing in the US.}
+	\label{lst:rule4}
+\end{listing}
+
+\subsection{Rule Five}
+This rule is given an employee ID, and should print a greeting for that
+employee. The source code is displayed in \lstref{rule5}. I looked in
+the SWI-Prolog documentation to figure out how to format a string, and I
+found the \texttt{format/2} predicate. It basically uses the ~ symbol to
+define placeholders. Then, using the \texttt{person} predicate to get
+the data from the employee ID and outputting it with \texttt{format/2}
+was pretty easy.
+
+\begin{listing}
+	\inputminted[label={greet},firstline=19,lastline=19]{prolog}{Assign4.pl}
+	\caption{Rule to greet an employee from their ID.}
+	\label{lst:rule5}
+\end{listing}
+
+\subsection{Rule Six}
+This rule calculates an employee's number of years until retirement. The
+retirement age is given as 65. This was fairly simple, I had to use the
+\texttt{is} operator to calculate a value and unify it with the
+predicate's argument. The source code is displayed in \lstref{rule6}.
+
+\begin{listing}
+	\inputminted[label={years\_until\_retirement},firstline=21,lastline=21]{prolog}{Assign4.pl}
+	\caption{Rule to determine an employee's years until retirement.}
+	\label{lst:rule6}
+\end{listing}
+
+\subsection{Rule Seven}
+This rule is similar to the previous ones, but now I define two
+conditions on the age, using the less than and greater than operators.
+Prolog is interesting in this regard because it has the order of the
+symbols swapped from what it usually is. Something to remember for sure.
+The source code is displayed in \lstref{rule7}.
+
+\begin{listing}
+	\inputminted[label={is\_rd\_black\_midAge},firstline=23,lastline=23]{prolog}{Assign4.pl}
+	\caption{Rule to determine if an employee is middle-aged, black, and
+	in research and development.}
+	\label{lst:rule7}
+\end{listing}
+
+\subsection{Rule Eight}
+This rule introduced me to the semicolon operator, and how to use it to
+make more complicated rules. Basically, the semicolon operator tells
+Prolog to backtrack and try other possibilities if one of the dependent
+propositions is false. I can wrap stuff in parentheses and use the
+semicolon to define ``or''-like statements. The source code is displayed in
+\lstref{rule8}.
+
+\begin{listing}
+	\inputminted[label={is\_ITorFin\_PHXorMIAorAUS},firstline=25,lastline=27]{prolog}{Assign4.pl}
+	\caption{Rule to determine if an employee is in IT or finance, and
+	in Phoenix or Miami or Austin.}
+	\label{lst:rule8}
+\end{listing}
+
+\subsection{Rule Nine}
+This rule introduced me to the \texttt{atom\_concat} predicate. It was
+really interesting to learn that in Prolog, predicates like these can
+work in different ways based on which variable needs to be unified.
+Thus, even though the predicate is called \texttt{concat}, it can also
+split. That's what I did here to remove the `Sr.' from the employee's
+title. The predicate is only true (can be unified) when the title starts
+with Sr. The source code is displayed in \lstref{rule9}.
+
+\begin{listing}
+	\inputminted[label={is\_female\_senior\_role},firstline=29,lastline=29]{prolog}{Assign4.pl}
+	\caption{Rule to determine if an employee is both female and in a
+	senior role.}
+	\label{lst:rule9}
+\end{listing}
+
+\subsection{Rule Ten}
+For this rule, I had to create a helper predicate
+\texttt{convert\_salary/2}, which is shown in \lstref{convertsalary}.
+Now, I had to write predicates that reasoned with numbers instead of
+just atom equality. The source code is shown in \lstref{rule10}. This
+can be done with typical symbols like less than, greater than. I also
+have to write code to convert the string representation of the salary
+into a number; this was done by first removing the dollar sign, then
+removing the comma and parsing it into a number. The SWI Prolog
+documentation helped here.
+
+\begin{listing}
+	\inputminted[label={convert\_salary},firstline=33,lastline=39]{prolog}{Assign4.pl}
+	\caption{Rule to convert a salary string to a number.}
+	\label{lst:convertsalary}
+\end{listing}
+
+\begin{listing}
+	\inputminted[label={is\_highly\_paid\_senior\_manager},firstline=41,lastline=43]{prolog}{Assign4.pl}
+	\caption{Rule to determine if an employee is a highly paid senior
+	manager.}
+	\label{lst:rule10}
+\end{listing}
+
+\subsection{Rule Eleven}
+This rule determines if an employee has an age that's prime. This one
+was difficult. I thought I could use an algorithm similar to Haskell,
+but that was a generator. I ended up using a naive recursive algorithm
+that checks if all the numbers less than the age do NOT divide the age.
+Probably inefficient, but it works. The source code is shown in
+\lstref{rule11}. 
+
+\begin{listing}
+	\inputminted[label={is\_prime\_age},firstline=45,lastline=59]{prolog}{Assign4.pl}
+	\caption{Rule to determine if an employee has a prime age.}
+	\label{lst:rule11}
+\end{listing}
+
+\subsection{Rule Twelve}
+This rule calculates an employee's salary after bonuses, but before
+taxes. Now that I wrote \texttt{convert\_salary/2}, this predicate has a
+fairly simple and easy to read implementation. It is displayed in
+\lstref{rule12}.
+
+\begin{listing}
+	\inputminted[label={total\_salary},firstline=62,lastline=64]{prolog}{Assign4.pl}
+	\caption{Rule to determine an employee's total salary.}
+	\label{lst:rule12}
+\end{listing}
+
+\subsection{Rule Thirteen}
+Now, this rule calculates the take-home salary of an employee. This is
+done by using a tax bracketed system based on the employee's total
+salary. Again, I had to use the semicolon to provide backtracking and
+alternatives for Prolog, which worked as a conditional to set the tax
+level. The code is displayed in \lstref{rule13}.
+
+\begin{listing}
+	\inputminted[label={takehome\_salary},firstline=66,lastline=77]{prolog}{Assign4.pl}
+	\caption{Rule to determine an employee's take-home salary.}
+	\label{lst:rule13}
+\end{listing}
+
+\subsection{Rule Fourteen}
+This was a pretty complicated predicate to write, but it was fun. The
+source code is displayed in \lstref{rule14}. I used the SWI-Prolog
+documentation to figure out how to use the date-time objects and
+predicates. One of the difficult parts is the strange date format in the
+CSV, which I had to parse using \texttt{split\_string/4} and some pattern
+matching. Then, I had to convert the short year number into a year in
+the 20th or 21st century.
+
+\begin{listing}
+	\inputminted[label={total\_years},firstline=79,lastline=94]{prolog}{Assign4.pl}
+	\caption{Rule to determine an employee's years of service.}
+	\label{lst:rule14}
+\end{listing}
+
+\subsection{Rule Fifteen}
+Finally, I had to write a rule to determine the average salary for an
+entire job title. The source code is displayed in \lstref{rule15}. 
+Using the ways of Prolog backtracking, this actually
+wasn't too bad. Essentially, I wrote a predicate that simply gets the
+salary for a specific person with a title. There are many ways to unify
+the Salary variable that make the predicate true. Then, I use the Prolog
+\texttt{bagof/3} predicate to collect every possible value that
+satisfies the predicate into a list. Then, getting the average is easy.
+
+
+\begin{listing}
+	\inputminted[label={average\_salary},firstline=97,lastline=102]{prolog}{Assign4.pl}
+	\caption{Rule to determine the average salary for a job title.}
+	\label{lst:rule15}
+\end{listing}
+
+
+\end{document}