import sys import random import nltk from nltk.corpus import state_union from nltk.tokenize import PunktSentenceTokenizer import numpy as np import string import re #I should create classes and functions of static variables #define list of greetings greetingList = np.array(["Hello","hello","Hi","hi", "Whats Up", "whats up", "Hola", "hola"]) questionList = np.array(["do", "Do", "how", "How", "where", "Where", "why", "Why", "what", "What", "when", "When"]) #could probably substitute this for a database query answersList = {'sky':'blue'} currentDiscussion = {} pastDiscussion = {} #greetings function def greetings(): #select random greeting from list print(random.choice(greetingList)) userInput() #take user input def userInput(): takeInput = input() for greet in greetingList: if(greet == takeInput): print("What can I do for you today?") takeInput = input() analyzeInput(takeInput) takeInput = "" currentDiscussion.clear() userInput() #Tokenize words def process_content(processInput): try: words = nltk.word_tokenize(processInput) tagged = nltk.pos_tag(words) #print(tagged) currentDiscussion.update(tagged) except Exception as e: print(str(e)) #analyze input def analyzeInput(newInput): #self refernece to test passing data between functions #print(newInput) #determine if question or statement totChar = len(newInput) #print(totChar) b = 0 #loop through each word for i in newInput: #print(i) #loop through each letter for a in i: b += 1 #print(b) #print(a) if (b == totChar): #print(a) if (a == "?"): isQuestion(newInput) elif (a == "."): isStatement(newInput) else: isStatement(newInput) if(newInput == "goodbye"): end() #is question? def isQuestion(questionInput): #to test that the question was successfully returned. #I will need to add additional criteria to assess if a question has been asked based on the first word #print("input is a question") # use NLTK to tokenize the words that are returned. # once the words are tokenized I will determine what to output # I need data to analyze the input and data to decide what output I need # I will need a database of output # How do I expand my input and output database? process_content(questionInput) #once I have processed the question then what? #I want to take the part of speech and then find meaning #once meaning is found then I will figure out what to do next. #in the current discussion dictionary return the pronouns #in the current discussion dictionary return the verbs #print(currentDiscussion) allWords = [] allPOS = [] questionWord = "" subject = "" predicate = [] iterator = 0 for value, key in currentDiscussion.items(): allWords.append(value) allPOS.append(key) for value, key in currentDiscussion.items(): # print(iterator) # print(value) # print(key) if(value in questionList and iterator == 0): questionWord = value if(value == "you" or value == "You"): subject = "I" predicate.append(allWords[iterator+1:len(allWords) - 1]) iterator += 1 print("\n") #print(questionWord) #print(subject) #print(predicate) newPredicate = "" for i in predicate: newPredicate += str(i) newPredicate1 = re.sub('[,\']', '', newPredicate) if(questionWord == "do" or questionWord == "Do"): print("Yes, " + subject + " " + newPredicate1[1:len(newPredicate1)-1]) posLength = len(allPOS) #print(posLength) if(questionWord == "what" or questionWord == "What"): #color + of + sky #NN + IN ++ NN #if NN + IN + NN #if NN + VBZ + DT + NN #it is almost like I need to try different combinations of words to find a pattern that will work #so I know that it is not going to be the first one so I can rule that out... if(allPOS[1]=="NN" and allPOS[2]=="VBZ" and allPOS[3]=="DT" and allPOS[4] == "NN"): for question, answer in answersList.items(): if(allWords[4] == "sky"): print(answer) print("\n") #look at the ordering of the sentence to determine the meaning #It feels like with questions there is a decision tree that I need to follow #a verb at the beginning of the question emmits a response #do means a binary response #what is an answer to the question #where is an answer in the form of a place #why is an answer in the form of internal motivation #a verb following a pronoun is describing that pronoun #what I am doing right now though is not algorithmnic. Right now it feels like I am doing alot of hard coding def isStatement(statementInput): print("input is a statement") #end program def end(): sys.exit() greetings() #High level structure: #Take input #randomized or structured? greeting. There seems to be purpose and logic to specific types of greetings. #These are based on relationships #Analyze input #Output #randomized or structured? ending. There seems to be purpose and logic to specific types of endings. #These are based on relationships #once I know it is a question then what? #I want to then drill down into the question #next step is to understand the token syntax # POS tag list: # CC coordinating conjunction # CD cardinal digit # DT determiner # EX existential there (like: "there is" ... think of it like "there exists") # FW foreign word # IN preposition/subordinating conjunction # JJ adjective 'big' # JJR adjective, comparative 'bigger' # JJS adjective, superlative 'biggest' # LS list marker 1) # MD modal could, will # NN noun, singular 'desk' # NNS noun plural 'desks' # NNP proper noun, singular 'Harrison' # NNPS proper noun, plural 'Americans' # PDT predeterminer 'all the kids' # POS possessive ending parent\'s # PRP personal pronoun I, he, she # PRP$ possessive pronoun my, his, hers # RB adverb very, silently, # RBR adverb, comparative better # RBS adverb, superlative best # RP particle give up # TO to go 'to' the store. # UH interjection errrrrrrrm # VB verb, base form take # VBD verb, past tense took # VBG verb, gerund/present participle taking # VBN verb, past participle taken # VBP verb, sing. present, non-3d take # VBZ verb, 3rd person sing. present takes # WDT wh-determiner which # WP wh-pronoun who, what # WP$ possessive wh-pronoun whose # WRB wh-abverb where, when #I'm getting to the point here where I need to start doing some design. #Also, I need to connect my app to a database.