[Python] How to extract and export word count from the document
Import packages
import numpy as np import pandas as pd import re # for regular expression import nltk # package for natural language processing import csv # for importing csv format data import openpyxl # for exporting xlsx format data
stoplist = set(stopwords.words('english') + list(punctuation))tokens_without_sw = [word for word in tokenized ifnot word in stopwords.words()] Ngram = FreqDist(ngrams(tokens_without_sw, 2)).most_common()
ngramdf = pd.Series(Ngram) ## convert into pandas dataframe ngramdf
len(tokens_without_sw) tokens_without_sw
# create top 300 keywords (You can change the number of words)df = pd.Series(tokens_without_sw).value_counts(0).head(300)pd.Series(tokens_without_sw).value_counts(0).head(300)
Export Results into CSV file
# Export the dataframe into CSV file df.to_csv('after_freq.csv', index=True)