Last Name Match by Year Bar Graph

To support this question here: https://stackoverflow.com/questions/48633906/comparing-two-string-columns-python

The sample code below provides the ability to import a csv file with a list of last names, the year, and a second list of last names. The code then performs a match in the two lists and increments the number of matches per year. The output of the last name matches per year is a bar graph.

import os
import tkinter as tk
from tkinter import filedialog
import csv
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt

def findLastNameMatch():

#create empty lists to separate csv row values
 lastName1 = []
 year = []
 lastName2 = []

#create empty lists for order pairs
 l1YrOP = []
 yrCountOP = []

#open csv file that contains the three columns
 #column names must match the names listed in the row below
 file_path = filedialog.askopenfilename()

print("\n")

#print file path for validation
 print(file_path)

print("\n")

#open csv file
 with open(file_path, 'r') as csvfile:
 reader = csv.DictReader(csvfile)

#loop through csv file
 for row in reader:

#print columns for validation
 print(row['LastName1'], row['Year'], row['LastName2'])

#push back values into the appropriate list
 lastName1.append(row['LastName1'])
 year.append(row['Year'])
 lastName2.append(row['LastName2'])

#count elements in each list to validate that they match

print("\n")

print("The number of elements in lastName1 is " + str(len(lastName1)) + "\n")
print("The number of elements in lastName1 is " + str(len(year)) + "\n")
print("The number of elements in lastName1 is " + str(len(lastName2)) + "\n")

#create orderd pair lists for year and count (count will start at zero)

for c in year:
 yrCountOP.append([c,0])

print("\n")

for d in yrCountOP:
 print(d)

print("\n")

#create ordered pair list between lastName1 and year

for a,b in zip(year,lastName1):
 list1PlusYr = a + "," + str(b)
 l1YrOP.append(list1PlusYr)

for z in l1YrOP:
 print(z)

print("\n")

#find last name list matches between column 1 and 3 and retun the year and return corresponding index

for y in lastName2:
 for x in lastName1:
   if x == y:
   print(x + " is a match")
   print("Last Name List 1 Index of match is: " + str(lastName1.index(x)) + " : Last Name: " + x)

   print("\n")

   totalVal = len(yrCountOP)
   iteration = 0

   #match returned index to year ordered pair and increment second pair in year list
   for a,b in yrCountOP:
     #validate ability to return index values
     # print("Current iteration: " + str(iteration))
     #match index value

     if iteration == lastName1.index(x):
       print("index match found: " + str(iteration))

       yrCountOP[iteration] = [a,b+1]

     iteration = iteration + 1
     # print("New iteration: " + str(iteration))

print("\n")

#validate change in order pair
 for g in yrCountOP:
  print(g)

yVal = []
xVal = []

for a,b in yrCountOP:
 yVal.append(b)
 xVal.append(a)

print("\n")

#create histogram with y coordinate is number of matches and x coordinate the years

plt.xlabel('Years')
plt.ylabel('Number of Last Name Matches')
plt.title(r'Last Name Matches per Year')

x = np.arange(totalVal)

plt.bar(x, height= yVal)
plt.xticks(x, xVal)

plt.show()

#Match the year

findLastNameMatch()
%d bloggers like this: