Lemmatization is the process of reducing words to their base or dictionary form called a lemma. Unlike stemming, lemmatization considers the context and part of speech, resulting in meaningful root forms.
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
lemmatizer = WordNetLemmatizer()
words = ["running", "runs", "runner", "better", "cats", "studies", "wolves", "was", "geese"]
# Example 1: Lemmatize without POS (default noun)
print([lemmatizer.lemmatize(word) for word in words])
# Example 2: Lemmatize as verb
print([lemmatizer.lemmatize(word, pos='v') for word in words])
# Example 3: Lemmatize "better" as adjective
print(lemmatizer.lemmatize("better", pos='a'))
# Example 4: Lemmatize "was" as verb
print(lemmatizer.lemmatize("was", pos='v'))
# Example 5: Lemmatize plural nouns
print(lemmatizer.lemmatize("wolves"))
print(lemmatizer.lemmatize("geese"))
# Example 6: Lemmatize with context (sentence)
sentence = ["The", "cats", "are", "running", "fast"]
print([lemmatizer.lemmatize(word, pos='v') if word in ["running", "are"] else lemmatizer.lemmatize(word) for word in sentence])
# Example 7: Lemmatize irregular verbs
print(lemmatizer.lemmatize("went", pos='v'))
print(lemmatizer.lemmatize("gone", pos='v'))
# Example 8: Lemmatize adjectives
print(lemmatizer.lemmatize("faster", pos='a'))
print(lemmatizer.lemmatize("fastest", pos='a'))
# Example 9: Lemmatize words with suffixes
print(lemmatizer.lemmatize("happiness", pos='n'))
# Example 10: Lemmatize compound words (no change)
print(lemmatizer.lemmatize("multiuser"))