

# https://www.globalsino.com/ICs/
# Simple Word Cloud applications with text input from a .csv file

import collections
import numpy as np
import pandas as pd
import matplotlib.cm as cm
import matplotlib.pyplot as plt
from matplotlib import rcParams
from wordcloud import WordCloud, STOPWORDS
from queue import Queue


# Convert a CSV file to a TXT file
q = Queue()
# Input a csv file
theCSVfile = r"C:\GlobalSino2\ICs\4304ForWordCloudCSV.csv"
# Generates a text file
theTXTfile = r"C:\GlobalSino2\ICs\4304ForWordCloudCSV.txt"
with open(theCSVfile,'r+') as f:
    for CSVline in f:
        q.put(CSVline)
        with open(theTXTfile,"a") as done:
            TXTline = q.get()
            done.write(TXTline)

# Read a text file
textFile = open(theTXTfile)
myText = textFile.read()
stopwords = STOPWORDS

# ============
# Create stopword
stopwords = set(STOPWORDS)
# Generate a word cloud image
wordcloud = WordCloud(width = 3000, height = 2000, random_state=1, background_color='black', colormap='Set2', collocations=False, stopwords = STOPWORDS).generate(myText)
# Display the generated image
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

# ============

filtered_words = [word for word in myText.split() if word not in stopwords]
counted_words = collections.Counter(filtered_words)
words = []
counts = []
for letter, count in counted_words.most_common(10):
    words.append(letter)
    counts.append(count)
colors = cm.rainbow(np.linspace(0, 1, 10))
rcParams['figure.figsize'] = 20, 10
plt.title('Top words in the text vs their count')
plt.xlabel('Counts')
plt.ylabel('Words')
plt.barh(words, counts, color=colors)
plt.show()
