forked from kapilkchaurasia/Data-mining-python-script
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlinkedin_mining.py
More file actions
91 lines (64 loc) · 2.46 KB
/
linkedin_mining.py
File metadata and controls
91 lines (64 loc) · 2.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# GIVE YOU THE ID OF ALL CONNECTIONS OF YOURS AND USING ID FEILD TO DATA MINE "EDUCATIONS AND JOB FREQUENCY " OF YOUR CONNECTION
# BY LOOKING "https://developer.linkedin.com/documents/profile-fields#fullprofile" YOU CAN EASILY MANIPULATE BELOW SRC CODE AND DATA MINE ACCORDING TO YOUR NEED
from linkedin import linkedin
import json
import re
# Define CONSUMER_KEY, CONSUMER_SECRET,
# USER_TOKEN, and USER_SECRET from the credentials
# provided in your LinkedIn application
CONSUMER_KEY = 'xxxxxx'
CONSUMER_SECRET = 'xxxxxxxxxxxx'
USER_TOKEN = 'XXXXXXXXXXXX'
USER_SECRET = 'XXXXX'
RETURN_URL = '' # Not required for developer authentication
# Instantiate the developer authentication class
auth = linkedin.LinkedInDeveloperAuthentication(CONSUMER_KEY, CONSUMER_SECRET,
USER_TOKEN, USER_SECRET,
RETURN_URL,
permissions=linkedin.PERMISSIONS.enums.values())
# Pass it in to the app...
app = linkedin.LinkedInApplication(auth)
# Use the app...
connections=app.get_connections()
connections_data = '/home/kapil.ch/linkedin DM/linkedindata'
f = open(connections_data, 'w')
f.write(json.dumps(connections, indent=1))
f.close()
# DATA MINING EDUCATION FEILD
for k,v in connections.items():
if(k=='values' ):
for x in v:
for k1,v1 in x.items():
#print k1
if(k1=="id"):
#print v1
xyz= app.get_profile(member_id=v1,selectors= ['person:(educations)'])
print xyz
# DATA MINING JOBS OF YOUR CONNECTION IN SORTED ORDER
titles = []
for contact in contacts:
titles.extend([t.strip() for t in contact['Job Title'].split('/')
if contact['Job Title'].strip() != ''])
# Replace common/known abbreviations
for i, _ in enumerate(titles):
for transform in transforms:
titles[i] = titles[i].replace(*transform)
# Print out a table of titles sorted by frequency
pt = PrettyTable(field_names=['Title', 'Freq'])
pt.align = 'l'
c = Counter(titles)
[pt.add_row([title, freq])
for (title, freq) in sorted(c.items(), key=itemgetter(1), reverse=True)
if freq > 1]
print pt
# Print out a table of tokens sorted by frequency
tokens = []
for title in titles:
tokens.extend([t.strip(',') for t in title.split()])
pt = PrettyTable(field_names=['Token', 'Freq'])
pt.align = 'l'
c = Counter(tokens)
[pt.add_row([token, freq])
for (token, freq) in sorted(c.items(), key=itemgetter(1), reverse=True)
if freq > 1 and len(token) > 2]
print pt