import sys
import random
import nltk
from nltk.corpus import state_union
from nltk.tokenize import PunktSentenceTokenizer
import numpy as np
import string
import re
#I should create classes and functions of static variables
#define list of greetings
greetingList = np.array(["Hello","hello","Hi","hi", "Whats Up", "whats up", "Hola", "hola"])
questionList = np.array(["do", "Do", "how", "How", "where", "Where", "why", "Why", "what", "What", "when", "When"])
#could probably substitute this for a database query
answersList = {'sky':'blue'}
currentDiscussion = {}
pastDiscussion = {}
#greetings function
def greetings():
#select random greeting from list
print(random.choice(greetingList))
userInput()
#take user input
def userInput():
takeInput = input()
for greet in greetingList:
if(greet == takeInput):
print("What can I do for you today?")
takeInput = input()
analyzeInput(takeInput)
takeInput = ""
currentDiscussion.clear()
userInput()
#Tokenize words
def process_content(processInput):
try:
words = nltk.word_tokenize(processInput)
tagged = nltk.pos_tag(words)
#print(tagged)
currentDiscussion.update(tagged)
except Exception as e:
print(str(e))
#analyze input
def analyzeInput(newInput):
#self refernece to test passing data between functions
#print(newInput)
#determine if question or statement
totChar = len(newInput)
#print(totChar)
b = 0
#loop through each word
for i in newInput:
#print(i)
#loop through each letter
for a in i:
b += 1
#print(b)
#print(a)
if (b == totChar):
#print(a)
if (a == "?"):
isQuestion(newInput)
elif (a == "."):
isStatement(newInput)
else:
isStatement(newInput)
if(newInput == "goodbye"):
end()
#is question?
def isQuestion(questionInput):
#to test that the question was successfully returned.
#I will need to add additional criteria to assess if a question has been asked based on the first word
#print("input is a question")
# use NLTK to tokenize the words that are returned.
# once the words are tokenized I will determine what to output
# I need data to analyze the input and data to decide what output I need
# I will need a database of output
# How do I expand my input and output database?
process_content(questionInput)
#once I have processed the question then what?
#I want to take the part of speech and then find meaning
#once meaning is found then I will figure out what to do next.
#in the current discussion dictionary return the pronouns
#in the current discussion dictionary return the verbs
#print(currentDiscussion)
allWords = []
allPOS = []
questionWord = ""
subject = ""
predicate = []
iterator = 0
for value, key in currentDiscussion.items():
allWords.append(value)
allPOS.append(key)
for value, key in currentDiscussion.items():
# print(iterator)
# print(value)
# print(key)
if(value in questionList and iterator == 0):
questionWord = value
if(value == "you" or value == "You"):
subject = "I"
predicate.append(allWords[iterator+1:len(allWords) - 1])
iterator += 1
print("\n")
#print(questionWord)
#print(subject)
#print(predicate)
newPredicate = ""
for i in predicate:
newPredicate += str(i)
newPredicate1 = re.sub('[,\']', '', newPredicate)
if(questionWord == "do" or questionWord == "Do"):
print("Yes, " + subject + " " + newPredicate1[1:len(newPredicate1)-1])
posLength = len(allPOS)
#print(posLength)
if(questionWord == "what" or questionWord == "What"):
#color + of + sky
#NN + IN ++ NN
#if NN + IN + NN
#if NN + VBZ + DT + NN
#it is almost like I need to try different combinations of words to find a pattern that will work
#so I know that it is not going to be the first one so I can rule that out...
if(allPOS[1]=="NN" and allPOS[2]=="VBZ" and allPOS[3]=="DT" and allPOS[4] == "NN"):
for question, answer in answersList.items():
if(allWords[4] == "sky"):
print(answer)
print("\n")
#look at the ordering of the sentence to determine the meaning
#It feels like with questions there is a decision tree that I need to follow
#a verb at the beginning of the question emmits a response
#do means a binary response
#what is an answer to the question
#where is an answer in the form of a place
#why is an answer in the form of internal motivation
#a verb following a pronoun is describing that pronoun
#what I am doing right now though is not algorithmnic. Right now it feels like I am doing alot of hard coding
def isStatement(statementInput):
print("input is a statement")
#end program
def end():
sys.exit()
greetings()
#High level structure:
#Take input
#randomized or structured? greeting. There seems to be purpose and logic to specific types of greetings.
#These are based on relationships
#Analyze input
#Output
#randomized or structured? ending. There seems to be purpose and logic to specific types of endings.
#These are based on relationships
#once I know it is a question then what?
#I want to then drill down into the question
#next step is to understand the token syntax
# POS tag list:
# CC coordinating conjunction
# CD cardinal digit
# DT determiner
# EX existential there (like: "there is" ... think of it like "there exists")
# FW foreign word
# IN preposition/subordinating conjunction
# JJ adjective 'big'
# JJR adjective, comparative 'bigger'
# JJS adjective, superlative 'biggest'
# LS list marker 1)
# MD modal could, will
# NN noun, singular 'desk'
# NNS noun plural 'desks'
# NNP proper noun, singular 'Harrison'
# NNPS proper noun, plural 'Americans'
# PDT predeterminer 'all the kids'
# POS possessive ending parent\'s
# PRP personal pronoun I, he, she
# PRP$ possessive pronoun my, his, hers
# RB adverb very, silently,
# RBR adverb, comparative better
# RBS adverb, superlative best
# RP particle give up
# TO to go 'to' the store.
# UH interjection errrrrrrrm
# VB verb, base form take
# VBD verb, past tense took
# VBG verb, gerund/present participle taking
# VBN verb, past participle taken
# VBP verb, sing. present, non-3d take
# VBZ verb, 3rd person sing. present takes
# WDT wh-determiner which
# WP wh-pronoun who, what
# WP$ possessive wh-pronoun whose
# WRB wh-abverb where, when
#I'm getting to the point here where I need to start doing some design.
#Also, I need to connect my app to a database.
Like this:
Like Loading...