##Stack Overflow Tags Count Interctive Chart
###Visualizing Stack Overflow Tags Count using Bokeh + Python
Final Implementaion: https://amrrs.github.io/Stackoverflow-Tags-Count-Interactive-Chart/index.html
%matplotlib inline
import urllib2
from bs4 import BeautifulSoup as bs
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.style as style
df = {'Language':[],'Tag_Count':[]}
def extract_tagged(url):
#print('Extracting Content')
content = urllib2.urlopen(url).read()
soup = bs(content,'html.parser')
for tag in soup.find_all('a',attrs={'class':'post-tag'}):
df['Language'].append(tag.text)
for count in soup.find_all('span',attrs={'class':'item-multiplier-count'}):
df['Tag_Count'].append(count.text)
for i in range(1,3):
extract_tagged('http://stackoverflow.com/tags?page='+str(i)+'&tab=popular')
df['Tag_Count']=[int(i) for i in df['Tag_Count']]
df2= pd.DataFrame(df)
df2
Language | Tag_Count | |
---|---|---|
0 | javascript | 1322430 |
1 | java | 1211460 |
2 | c# | 1059649 |
3 | php | 1037622 |
4 | android | 951441 |
5 | jquery | 815753 |
6 | python | 702187 |
7 | html | 624090 |
8 | c++ | 498218 |
9 | ios | 491622 |
10 | css | 449688 |
11 | mysql | 446022 |
12 | sql | 372281 |
13 | asp.net | 309594 |
14 | objective-c | 273279 |
15 | ruby-on-rails | 267520 |
16 | .net | 247565 |
17 | c | 242388 |
18 | angularjs | 220785 |
19 | iphone | 216758 |
20 | arrays | 213383 |
21 | sql-server | 191419 |
22 | json | 187001 |
23 | ruby | 176818 |
24 | r | 170712 |
25 | ajax | 163189 |
26 | regex | 161942 |
27 | node.js | 160166 |
28 | xml | 152779 |
29 | asp.net-mvc | 150274 |
... | ... | ... |
186 | spring | 104353 |
187 | html5 | 99885 |
188 | multithreading | 94466 |
189 | git | 81724 |
190 | oracle | 80351 |
191 | bash | 79683 |
192 | forms | 79080 |
193 | image | 78246 |
194 | mongodb | 77513 |
195 | 77273 | |
196 | vba | 76476 |
197 | twitter-bootstrap | 76445 |
198 | osx | 75235 |
199 | algorithm | 72916 |
200 | winforms | 72327 |
201 | apache | 70032 |
202 | matlab | 68705 |
203 | performance | 67784 |
204 | entity-framework | 65151 |
205 | swing | 65048 |
206 | postgresql | 65032 |
207 | visual-studio | 64854 |
208 | python-2.7 | 63613 |
209 | scala | 62468 |
210 | linq | 62168 |
211 | hibernate | 61346 |
212 | list | 61315 |
213 | css3 | 61139 |
214 | excel-vba | 57296 |
215 | qt | 56533 |
216 rows × 2 columns
style.use('ggplot')
sorted_short_df = df2.sort_values(by='Tag_Count',ascending=False).head(20).set_index('Language')
#sorted_short_df.plot(kind='bar')
sorted_short_df
Tag_Count | |
---|---|
Language | |
javascript | 1322436 |
javascript | 1322430 |
javascript | 1322430 |
java | 1211466 |
java | 1211460 |
java | 1211460 |
c# | 1059652 |
c# | 1059649 |
c# | 1059649 |
php | 1037622 |
php | 1037622 |
php | 1037622 |
android | 951441 |
android | 951441 |
android | 951440 |
jquery | 815756 |
jquery | 815753 |
jquery | 815753 |
python | 702196 |
python | 702187 |
from bokeh.charts import Bar, output_file, show
from bokeh.embed import components
from bokeh.models import HoverTool
hover = HoverTool(names=["Language", "Tag_Count"])
plot = Bar(df2, label='Language', values='Tag_Count', title="Stack Overflow Popular Tags", width=1000,legend=False,tooltips=[('Tag_Count:', '@height'), ('Language:', '@Language')])
#output_file("stack_tag_sorted.html",title='Stack Overflow Popular Tags')
script,div = components(plot)
js = open('chart1.js','w')
js.write(script)
js.close()
css = open('style1.css','w')
css.write(div)
css.close()
#show(plot)