NLTK (Natural Language Toolkit)

NLTK is one of the most widely-used Python libraries for natural language processing. It provides easy-to-use interfaces for over 50 corpora and lexical resources, along with a suite of text processing libraries for classification, tokenization, stemming, tagging, parsing, and semantic reasoning.

Key Features of NLTK

10 Practical Examples

import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer

# Example 1: Download NLTK data (run once)
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')

# Example 2: Sentence tokenization -> throwing Look up error
text = "Hello there! How are you doing today?"
sentences = sent_tokenize(text)
print(sentences)

# Example 3: Word tokenization
words = word_tokenize(text)
print(words)

# Example 4: Remove stopwords
stop_words = set(stopwords.words('english'))
filtered_words = [w for w in words if w.lower() not in stop_words]
print(filtered_words)

# Example 5: Stemming
ps = PorterStemmer()
stemmed_words = [ps.stem(w) for w in filtered_words]
print(stemmed_words)

# Example 6: Lemmatization
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(w) for w in filtered_words]
print(lemmatized_words)

# Example 7: POS tagging
nltk.download('averaged_perceptron_tagger_eng')
pos_tags = nltk.pos_tag(words)
print(pos_tags)

# Example 8: Named Entity Recognition (NER)
nltk.download('maxent_ne_chunker_tab')
nltk.download('words')
from nltk import ne_chunk
tree = ne_chunk(pos_tags)
print(tree)

# Example 9: Frequency distribution of words
freq_dist = nltk.FreqDist(words)
print(freq_dist.most_common(5))

# Example 10: Concordance
text_obj = nltk.Text(words)
text_obj.concordance("today")