Skip to content

Commit f9e2e9d

Browse files
Merge branch 'fani-lab:main' into main
2 parents 1020265 + c204ab8 commit f9e2e9d

File tree

7 files changed

+100
-89
lines changed

7 files changed

+100
-89
lines changed

src/main.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,10 @@ def run(cfg):
101101
embcfg.model.gnn.pytorch = cfg.pytorch
102102
OmegaConf.resolve(embcfg)
103103
cfg.data.embedding.config = embcfg
104-
cls, method = cfg.data.embedding.class_method.split('_')
104+
cls, method = cfg.data.embedding.class_method.split('_') if cfg.data.embedding.class_method.find('_') else (cfg.data.embedding.class_method, None)
105105
cls = get_class(cls)
106-
#t2v = cls(cfg.data.output, cfg.data.acceleration, cfg.data.embedding.config.model[cls.__name__.lower()])
107-
t2v = cls(cfg.data.output, cfg.acceleration, cfg.data.embedding.config.model[cls.__name__.lower()])
108-
t2v.name = method
106+
#t2v = cls(cfg.data.output, cfg.data.acceleration, method, cfg.data.embedding.config.model[cls.__name__.lower()])
107+
t2v = cls(cfg.data.output, cfg.acceleration, method, cfg.data.embedding.config.model[cls.__name__.lower()])
109108
t2v.train(teamsvecs, indexes, splits)
110109

111110
if cfg.cmd and any(c in cfg.cmd for c in ['train', 'test', 'eval']):

src/mdl/emb/__config__.yaml

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ model:
77
ns: 5 # number of negative samples for each positive sample
88
lr: 0.001 #learning rate
99
es: 5 # earlystopping's patience. to bypass, set it to a higher value than epoch
10-
save_per_epoch: False
10+
save_per_epoch: True
1111

1212
# Quoc V. Le, Tomás Mikolov: Distributed Representations of Sentences and Documents. ICML 2014: 1188-1196
1313
d2v: # we use gensim
@@ -22,7 +22,7 @@ model:
2222
d: ${dim}
2323
e: ${model.epochs}
2424
lr: ${model.lr}
25-
save_per_epoch: ${model.save_per_epoch}
25+
spe: ${model.save_per_epoch}
2626

2727
gnn:
2828
graph:
@@ -41,13 +41,13 @@ model:
4141
#max
4242
#mul
4343
pre: # if set, use pretrained D2v vectors as initial node features of graph data
44-
#d8.e100.w10.d2v.dm1.skillmember # skill, member, team
45-
#d8.e100.w10.d2v.dm1.skill # skill, team
46-
#d9.e100.w10.d2v.dm1.skill # if not exists, train from scratch!
44+
#d2v.d8.e100.w10.dm1.skillmember # skill, member, team
45+
#d2v.d8.e100.w10.dm1.skill # skill, team
46+
#d2v.d9.e100.w10.dm1.skill # if not exists, train from scratch!
4747

4848
pytorch: #will be overriden dynamically in ./src/main.py from root ./__config__.yaml
4949
seed: ${model.seed}
50-
model: #dummy tag as placeholder. don't worry or touch it.
50+
model: #dummy tag as placeholder. don't worry or touch it.
5151

5252
#Grover, Aditya, and Jure Leskovec. "node2vec: Scalable feature learning for networks." Proceedings of the 22nd ACM SIGKDD international conference on Knowledge discovery and data mining. 2016.
5353
#p: Return parameter, controls likelihood of revisiting the previous node. high p → discourages going back, low p → encourages backtracking
@@ -75,18 +75,33 @@ model:
7575
#Dong, Chawla, Swami. "metapath2vec: Scalable representation learning for heterogeneous networks." SIGKDD 2017.
7676
m2v:
7777
metapath_name: # to have an embedding for a node type, it should be part of the metapath, e.g., for locations
78-
#[[[member, rev_to, skill], [skill, to, member]], ms-sm] # for sm graph structure
79-
[[[member, to, team], [team, rev_to, skill], [skill, to, team], [team, rev_to, member]], 'mts-stm'] #m->t->s, s->t->m for stm. Note: when we build the graph, m->t, s->t, l->t. So, make it undirected, t->m, t->s get 'rev_to' label
80-
#[[[member, to, team],[team, rev_to, loc], [loc, to, team], [team, rev_to, member]], mtl-ltm] #stml
78+
##member → skill → member → skill, recommend skills to members, for sm graph structure
79+
#[[[member, rev_to, skill], [skill, to, member], [member, rev_to, skill]], msm]
80+
81+
##team → skill → team → member for stm. Given a team, rank likely members, recommend members to a team
82+
[[[team, rev_to, skill], [skill, to, team ], [team, rev_to, member]], tstm]
83+
## member → team → skill → team for stm. Given a member, recommend likely teams
84+
#[[[member, to, team], [team, rev_to, skill], [skill, to, team], mtst]
85+
86+
##stml (member → team → location → team) Members worked on teams in the same location as the target team. Recommending teams to members or ranking likely team assignments for new members
87+
#[[[member, to, team], [team, rev_to, loc], [loc, to, team]], mtlt]
88+
89+
##Combines skill and location similarity (similar domain and location) to find members from teams that share skills and location with the target team. Location is important (e.g., for co-location constraints) and to reflect real-world assignment feasibility
90+
#[[[team, rev_to, skill], [skill, to, team], [team, rev_to, loc], [loc, to, team], [team, rev_to, member]], tstlt]
91+
## (member → team → location → team), Member to similar teams in area, Teams near where member has worked
92+
#[[member, to, team], [team, rev_to, loc], [loc, to, team], mtlt]
93+
## (team → location → team → member), Recommend local team members, Members in geographically close teams
94+
#[[team, rev_to, loc], [loc, to, team], [team, rev_to, member]], tltm]
95+
8196
d: ${dim}
82-
w: ${model.d2v.w}
97+
w: 4 #${model.d2v.w} ## assert walk_length + 1 >= context_size
8398
e: ${model.epochs}
8499
b: ${model.batch_size}
85100
lr: ${model.lr}
86101
es: ${model.es}
87102
ns: ${model.gnn.n2v.ns}
88-
save_per_epoch: ${model.save_per_epoch}
89-
wl: ${model.gnn.n2v.wl} #walk_length
103+
spe: ${model.save_per_epoch}
104+
wl: 3 #${model.gnn.n2v.wl} #walk_length AttributeError: The 'walk_length' is longer than the given 'metapath', but the 'metapath' does not denote a cycle
90105
wn: ${model.gnn.n2v.wn} #walks_per_node
91106

92107
#Thomas N. Kipf, Max Welling: Semi-Supervised Classification with Graph Convolutional Networks. ICLR (Poster) 2017

src/mdl/emb/d2v.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,7 @@
1414

1515
class D2v(T2v):
1616
gensim = None
17-
def __init__(self, output, device, cgf):
18-
super().__init__(output, device, cgf)
19-
self.name = 'd2v'
17+
def __init__(self, output, device, cgf): super().__init__(output, device, 'd2v', cgf)
2018

2119
def _prep(self, teamsvecs, indexes, splits):
2220
datafile = self.output + f'/{self.cfg.embtype}.docs.pkl'
@@ -52,7 +50,7 @@ def _prep(self, teamsvecs, indexes, splits):
5250

5351
def train(self, teamsvecs, indexes, splits):
5452
# to select/create correct model file in the output directory
55-
self.modelfilepath = self.output + f'/d{self.cfg.d}.e{self.cfg.e}.{self.name}.w{self.cfg.w}.dm{self.cfg.dm}.{self.cfg.embtype}'
53+
self.modelfilepath = self.output + f'/{self.name}.d{self.cfg.d}.e{self.cfg.e}.w{self.cfg.w}.dm{self.cfg.dm}.{self.cfg.embtype}'
5654
try:
5755
log.info(f"Loading the model {self.modelfilepath} for {(teamsvecs['skill'].shape[0], self.cfg.d)} embeddings ...")
5856
self.__class__.gensim = opentf.install_import('gensim==4.3.3', 'gensim')

0 commit comments

Comments
 (0)