Chatbot POC

import sys
import random
import nltk
from nltk.corpus import state_union
from nltk.tokenize import PunktSentenceTokenizer
import numpy as np
import string
import re

#I should create classes and functions of static variables 

#define list of greetings
greetingList = np.array(["Hello","hello","Hi","hi", "Whats Up", "whats up", "Hola", "hola"])
questionList = np.array(["do", "Do", "how", "How", "where", "Where", "why", "Why", "what", "What", "when", "When"])
#could probably substitute this for a database query
answersList = {'sky':'blue'}
currentDiscussion = {}
pastDiscussion = {}

#greetings function
def greetings(): 
	#select random greeting from list
	print(random.choice(greetingList))
	userInput()

#take user input
def userInput():
	takeInput = input()
	for greet in greetingList: 
		if(greet == takeInput):
			print("What can I do for you today?")
			takeInput = input()
	analyzeInput(takeInput)
	takeInput = ""
	currentDiscussion.clear()
	userInput()

#Tokenize words
def process_content(processInput):
    try:
        words = nltk.word_tokenize(processInput)
        tagged = nltk.pos_tag(words)
        #print(tagged)
        currentDiscussion.update(tagged)
    except Exception as e:
        print(str(e))

#analyze input
def analyzeInput(newInput):
	#self refernece to test passing data between functions
	#print(newInput)
	#determine if question or statement
	
	totChar = len(newInput)
	#print(totChar)

	b = 0

	#loop through each word
	for i in newInput:
		#print(i)
		#loop through each letter
		for a in i:
			b += 1
			#print(b)
			#print(a)
			if (b == totChar):
				#print(a)
				if (a == "?"): 
					isQuestion(newInput)
				elif (a == "."):
					isStatement(newInput)
				else: 
					isStatement(newInput)

	if(newInput == "goodbye"):
		end()

#is question? 
def isQuestion(questionInput): 
	
	#to test that the question was successfully returned. 
	#I will need to add additional criteria to assess if a question has been asked based on the first word
	
	#print("input is a question")

# use NLTK to tokenize the words that are returned.
# once the words are tokenized I will determine what to output
# I need data to analyze the input and data to decide what output I need
# I will need a database of output 
# How do I expand my input and output database?	
	process_content(questionInput)
	
#once I have processed the question then what? 
#I want to take the part of speech and then find meaning
#once meaning is found then I will figure out what to do next. 

#in the current discussion dictionary return the pronouns
#in the current discussion dictionary return the verbs

	#print(currentDiscussion)

	allWords = []
	allPOS = []

	questionWord = ""
	subject = ""
	predicate = []

	iterator = 0
	
	for value, key in currentDiscussion.items():
		
		allWords.append(value)
		allPOS.append(key)

	for value, key in currentDiscussion.items():
	
		# print(iterator)
		# print(value)
		# print(key)

		if(value in questionList and iterator == 0):
			questionWord = value

		if(value == "you" or value == "You"): 
			subject = "I"
			predicate.append(allWords[iterator+1:len(allWords) - 1])

		iterator += 1

	print("\n")

	#print(questionWord)
	#print(subject)
	#print(predicate)

	newPredicate = ""
	for i in predicate: 
		newPredicate += str(i)

	newPredicate1 = re.sub('[,\']', '', newPredicate)

	if(questionWord == "do" or questionWord == "Do"): 
		print("Yes, " + subject + " "  + newPredicate1[1:len(newPredicate1)-1])

	posLength = len(allPOS)
	#print(posLength)

	if(questionWord == "what" or questionWord == "What"):
		#color + of + sky
		#NN + IN ++ NN
		
		#if NN + IN + NN
		#if NN + VBZ + DT + NN

		#it is almost like I need to try different combinations of words to find a pattern that will work 
		#so I know that it is not going to be the first one so I can rule that out...

		if(allPOS[1]=="NN" and allPOS[2]=="VBZ" and allPOS[3]=="DT" and allPOS[4] == "NN"):
			for question, answer in answersList.items(): 
				if(allWords[4] == "sky"): 
					print(answer)
	print("\n")

#look at the ordering of the sentence to determine the meaning
#It feels like with questions there is a decision tree that I need to follow

#a verb at the beginning of the question emmits a response
#do means a binary response
#what is an answer to the question
#where is an answer in the form of a place
#why is an answer in the form of internal motivation

#a verb following a pronoun is describing that pronoun

#what I am doing right now though is not algorithmnic. Right now it feels like I am doing alot of hard coding

def isStatement(statementInput): 
	print("input is a statement")

#end program
def end():
	sys.exit()

greetings()

#High level structure:
#Take input
	#randomized or structured? greeting. There seems to be purpose and logic to specific types of greetings. 
	#These are based on relationships
#Analyze input
#Output
	#randomized or structured? ending. There seems to be purpose and logic to specific types of endings. 
	#These are based on relationships

	#once I know it is a question then what? 
	#I want to then drill down into the question

	#next step is to understand the token syntax

# POS tag list:

# CC	coordinating conjunction
# CD	cardinal digit
# DT	determiner
# EX	existential there (like: "there is" ... think of it like "there exists")
# FW	foreign word
# IN	preposition/subordinating conjunction
# JJ	adjective	'big'
# JJR	adjective, comparative	'bigger'
# JJS	adjective, superlative	'biggest'
# LS	list marker	1)
# MD	modal	could, will
# NN	noun, singular 'desk'
# NNS	noun plural	'desks'
# NNP	proper noun, singular	'Harrison'
# NNPS	proper noun, plural	'Americans'
# PDT	predeterminer	'all the kids'
# POS	possessive ending	parent\'s
# PRP	personal pronoun	I, he, she
# PRP$	possessive pronoun	my, his, hers
# RB	adverb	very, silently,
# RBR	adverb, comparative	better
# RBS	adverb, superlative	best
# RP	particle	give up
# TO	to	go 'to' the store.
# UH	interjection	errrrrrrrm
# VB	verb, base form	take
# VBD	verb, past tense	took
# VBG	verb, gerund/present participle	taking
# VBN	verb, past participle	taken
# VBP	verb, sing. present, non-3d	take
# VBZ	verb, 3rd person sing. present	takes
# WDT	wh-determiner	which
# WP	wh-pronoun	who, what
# WP$	possessive wh-pronoun	whose
# WRB	wh-abverb	where, when

#I'm getting to the point here where I need to start doing some design.
#Also, I need to connect my app to a database.
Share this:

Like this: