Python – NLTK – Tokenizing

from nltk.tokenize import sent_tokenize, word_tokenize

# tokenizing - work tokenizers...sentence tokenizers
# lexicon adn corpras
# corpora - body of tex
# lexicon - words and their meaning

example_text = "Hello there, how are you. The sky is pinkish blue yay!"

#will split the two sentences into a list separated by sentences

#splits each word up in the sentence into elements in a list
#the punctuation is also taken as its own element

#this is more of preprocessing rather than analysis
#later on we will disucss part of speech tagging

%d bloggers like this: