

# https://www.globalsino.com/ICs/
# Simple Word Cloud applications


import collections
import numpy as np
import pandas as pd
import matplotlib.cm as cm
import matplotlib.pyplot as plt
from matplotlib import rcParams
from wordcloud import WordCloud, STOPWORDS
myText = """
Material 12345.67 characterizations usually 12345.67 attempt to link the specific aspects 12345.67  of the materials microstructure (e.g., 12345
crystalline phase, 12345.67 particle shape and size, etc.) to its physical and chemical properties (e.g., electrical conductivity, selectivity,
reactivity, etc.). Because the earliest characterizations were only conducted using optical microscopy, minimal structural information was obtained
due to the limitation of its spatial resolution. However, by the mid-1920s de Broglie hypothesized that high-speed electrons exist as waves and that
their wavelengths are shorter than visible light. Because the early TEMs were designed after the basic concept of a transmission light microscope with
KÃ¶hler illumination, both systems have an illumination source, a series of lenses and apertures placed within their columns, a specimen stage,
a viewing port, and an image recording system.
"""
stopwords = STOPWORDS

# ============
# Create stopword
stopwords = set(STOPWORDS)
# Generate a word cloud image
wordcloud = WordCloud(width = 3000, height = 2000, random_state=1, background_color='black', colormap='Set2', collocations=False, stopwords = STOPWORDS).generate(myText)
# Display the generated image
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

# ============

filtered_words = [word for word in myText.split() if word not in stopwords]
counted_words = collections.Counter(filtered_words)
words = []
counts = []
for letter, count in counted_words.most_common(10):
    words.append(letter)
    counts.append(count)
colors = cm.rainbow(np.linspace(0, 1, 10))
rcParams['figure.figsize'] = 20, 10
plt.title('Top words in the text vs their count')
plt.xlabel('Counts')
plt.ylabel('Words')
plt.barh(words, counts, color=colors)
plt.show()
