#stop words
#nltk will not generate insights for you it will help you analyze and pull apart text
#stop words are words that you pull out and are not needed - they have filler words and in terms of data analysis they are not useful
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
example_sentence = "This is an example showing off stop word filtration"
#these words are words already complied by NLTK you can also append additional stop words
stop_words = set(stopwords.words("english"))
#the list is not comprehensive but they are common words that can be removed when doing data analysis
#tokenize the example sentence
words = word_tokenize(example_sentence)
#filtered_sentence
filtered_sentence = []
for w in words:
if w not in stop_words:
filtered_sentence.append(w)
print(filtered_sentence)
#result:
#['This', 'example', 'showing', 'stop', 'word', 'filtration']
# you will see that words in the stop words list have been removed from the list
Like this:
Like Loading...