from nltk.tokenize import sent_tokenize, word_tokenize
# tokenizing - work tokenizers...sentence tokenizers
# lexicon adn corpras
# corpora - body of tex
# lexicon - words and their meaning
example_text = "Hello there, how are you. The sky is pinkish blue yay!"
#will split the two sentences into a list separated by sentences
print(sent_tokenize(example_text))
#splits each word up in the sentence into elements in a list
#the punctuation is also taken as its own element
print(word_tokenize(example_text))
#this is more of preprocessing rather than analysis
#later on we will disucss part of speech tagging
Like this:
Like Loading...