from nltk.tokenize import sent_tokenize, word_tokenize # tokenizing - work tokenizers...sentence tokenizers # lexicon adn corpras # corpora - body of tex # lexicon - words and their meaning example_text = "Hello there, how are you. The sky is pinkish blue yay!" #will split the two sentences into a list separated by sentences print(sent_tokenize(example_text)) #splits each word up in the sentence into elements in a list #the punctuation is also taken as its own element print(word_tokenize(example_text)) #this is more of preprocessing rather than analysis #later on we will disucss part of speech tagging