Text Summarization is the process of automatically creating a concise and meaningful summary of a longer text document. It helps in quickly understanding large volumes of text by extracting the most important information.
from sklearn.feature_extraction.text import TfidfVectorizer
text = """Natural language processing (NLP) is a field of artificial intelligence that enables computers to understand, interpret, and generate human language.
It is used in many real-world applications like sentiment analysis, chatbots, machine translation, and summarization.
With the rise of deep learning, NLP has seen rapid advancements in the quality of understanding and generation."""
sentences = text.split('. ')
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(sentences)
print("TF-IDF matrix shape:", X.shape)
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=2, random_state=42)
kmeans.fit(X)
top_sentence_indices = kmeans.cluster_centers_.argsort()[:, -1]
summary_sentences = [sentences[i] for i in top_sentence_indices]
print("Summary (extractive):", " ".join(summary_sentences))
from transformers import pipeline
summarizer = pipeline("summarization")
summary = summarizer(text, max_length=50, min_length=25, do_sample=False)
print("Summary (abstractive):", summary[0]['summary_text'])
import networkx as nx
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
similarity_matrix = cosine_similarity(X)
nx_graph = nx.from_numpy_array(similarity_matrix)
scores = nx.pagerank(nx_graph)
ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
for i in range(2):
print("TextRank Sentence:", ranked_sentences[i][1])
docs = [
"Natural language processing enables computers to understand human language.",
"Applications include translation, sentiment analysis, and chatbots."
]
combined_text = " ".join(docs)
summary = summarizer(combined_text, max_length=40, min_length=15, do_sample=False)
print("Summary of multiple docs:", summary[0]['summary_text'])
news_article = """The stock market saw a significant dip today due to concerns over rising inflation and interest rates.
Experts believe the Federal Reserve may take measures to control the situation, which could further impact investor sentiment."""
news_summary = summarizer(news_article, max_length=40, min_length=20, do_sample=False)
print("News Summary:", news_summary[0]['summary_text'])
def chunk_text(text, max_tokens=512):
words = text.split()
return [" ".join(words[i:i+max_tokens]) for i in range(0, len(words), max_tokens)]
long_text = text * 10 # Simulate long input
chunks = chunk_text(long_text)
summaries = [summarizer(chunk, max_length=50, min_length=25, do_sample=False)[0]['summary_text'] for chunk in chunks]
print("Chunked Summary:", " ".join(summaries))
from rouge_score import rouge_scorer
reference = "NLP helps computers understand and process human language."
candidate = summary[0]['summary_text']
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
scores = scorer.score(reference, candidate)
print("ROUGE Scores:", scores)
custom_summary = summarizer(
text,
max_length=60,
min_length=30,
do_sample=True,
temperature=0.7,
top_k=50,
top_p=0.95
)
print("Customized Summary:", custom_summary[0]['summary_text'])
def chatbot_response(user_input):
if len(user_input.split()) > 30:
return summarizer(user_input, max_length=40, min_length=20, do_sample=False)[0]['summary_text']
return user_input
user_message = """Hey assistant, I just read a long article about how AI is transforming healthcare through better diagnostics, predictive analytics, and robotic surgeries."""
print("Bot:", chatbot_response(user_message))