Spaces:

ashu316
/

tgn-playground

Running

App Files Files Community

ashu316 commited on 11 days ago

Commit

41aae2b

verified ·

1 Parent(s): 1249292

Upload 14 files

Browse files

Files changed (15) hide show

.gitattributes +1 -0
data/ml_wikipedia.csv +3 -0
data/ml_wikipedia.npy +3 -0
data/ml_wikipedia_node.npy +3 -0
model/temporal_attention.py +90 -0
model/tgn.py +278 -0
model/time_encoding.py +25 -0
modules/embedding_module.py +291 -0
modules/memory.py +75 -0
modules/memory_updater.py +68 -0
modules/message_aggregator.py +90 -0
modules/message_function.py +40 -0
modules/tgn.py +278 -0
utils/data_processing.py +184 -0
utils/utils.py +186 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data/ml_wikipedia.csv filter=lfs diff=lfs merge=lfs -text

data/ml_wikipedia.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85fd8e50e5ffbb1348173b85d0b7b69ee270550f511c505daf062c9bf9db8027
+size 347159369

data/ml_wikipedia.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f601ac36dfaafdd78759d174611204a0660be70c93aa16f30edb56a7bc642b53
+size 216685728

data/ml_wikipedia_node.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85f2054b5fe9d76188a5bf014232c81a64b3ffedb5a146ff30e1daa71215278b
+size 12697856

model/temporal_attention.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import torch
+from torch import nn
+from utils.utils import MergeLayer
+class TemporalAttentionLayer(torch.nn.Module):
+  """
+  Temporal attention layer. Return the temporal embedding of a node given the node itself,
+   its neighbors and the edge timestamps.
+  """
+  def __init__(self, n_node_features, n_neighbors_features, n_edge_features, time_dim,
+               output_dimension, n_head=2,
+               dropout=0.1):
+    super(TemporalAttentionLayer, self).__init__()
+    self.n_head = n_head
+    self.feat_dim = n_node_features
+    self.time_dim = time_dim
+    self.query_dim = n_node_features + time_dim
+    self.key_dim = n_neighbors_features + time_dim + n_edge_features
+    self.merger = MergeLayer(self.query_dim, n_node_features, n_node_features, output_dimension)
+    self.multi_head_target = nn.MultiheadAttention(embed_dim=self.query_dim,
+                                                   kdim=self.key_dim,
+                                                   vdim=self.key_dim,
+                                                   num_heads=n_head,
+                                                   dropout=dropout)
+  def forward(self, src_node_features, src_time_features, neighbors_features,
+              neighbors_time_features, edge_features, neighbors_padding_mask):
+    """
+    "Temporal attention model
+    :param src_node_features: float Tensor of shape [batch_size, n_node_features]
+    :param src_time_features: float Tensor of shape [batch_size, 1, time_dim]
+    :param neighbors_features: float Tensor of shape [batch_size, n_neighbors, n_node_features]
+    :param neighbors_time_features: float Tensor of shape [batch_size, n_neighbors,
+    time_dim]
+    :param edge_features: float Tensor of shape [batch_size, n_neighbors, n_edge_features]
+    :param neighbors_padding_mask: float Tensor of shape [batch_size, n_neighbors]
+    :return:
+    attn_output: float Tensor of shape [1, batch_size, n_node_features]
+    attn_output_weights: [batch_size, 1, n_neighbors]
+    """
+    src_node_features_unrolled = torch.unsqueeze(src_node_features, dim=1)
+    query = torch.cat([src_node_features_unrolled, src_time_features], dim=2)
+    key = torch.cat([neighbors_features, edge_features, neighbors_time_features], dim=2)
+    # print(neighbors_features.shape, edge_features.shape, neighbors_time_features.shape)
+    # Reshape tensors so to expected shape by multi head attention
+    query = query.permute([1, 0, 2])  # [1, batch_size, num_of_features]
+    key = key.permute([1, 0, 2])  # [n_neighbors, batch_size, num_of_features]
+    # Compute mask of which source nodes have no valid neighbors
+    invalid_neighborhood_mask = neighbors_padding_mask.all(dim=1, keepdim=True)
+    # If a source node has no valid neighbor, set it's first neighbor to be valid. This will
+    # force the attention to just 'attend' on this neighbor (which has the same features as all
+    # the others since they are fake neighbors) and will produce an equivalent result to the
+    # original tgat paper which was forcing fake neighbors to all have same attention of 1e-10
+    neighbors_padding_mask[invalid_neighborhood_mask.squeeze(), 0] = False
+    # print(query.shape, key.shape)
+    attn_output, attn_output_weights = self.multi_head_target(query=query, key=key, value=key,
+                                                              key_padding_mask=neighbors_padding_mask)
+    # mask = torch.unsqueeze(neighbors_padding_mask, dim=2)  # mask [B, N, 1]
+    # mask = mask.permute([0, 2, 1])
+    # attn_output, attn_output_weights = self.multi_head_target(q=query, k=key, v=key,
+    #                                                           mask=mask)
+    attn_output = attn_output.squeeze()
+    attn_output_weights = attn_output_weights.squeeze()
+    # Source nodes with no neighbors have an all zero attention output. The attention output is
+    # then added or concatenated to the original source node features and then fed into an MLP.
+    # This means that an all zero vector is not used.
+    attn_output = attn_output.masked_fill(invalid_neighborhood_mask, 0)
+    attn_output_weights = attn_output_weights.masked_fill(invalid_neighborhood_mask, 0)
+    # Skip connection with temporal attention over neighborhood and the features of the node itself
+    attn_output = self.merger(attn_output, src_node_features)
+    return attn_output, attn_output_weights

model/tgn.py ADDED Viewed

	@@ -0,0 +1,278 @@

+import logging
+import numpy as np
+import torch
+from collections import defaultdict
+from utils.utils import MergeLayer
+from modules.memory import Memory
+from modules.message_aggregator import get_message_aggregator
+from modules.message_function import get_message_function
+from modules.memory_updater import get_memory_updater
+from modules.embedding_module import get_embedding_module
+from model.time_encoding import TimeEncode
+class TGN(torch.nn.Module):
+  def __init__(self, neighbor_finder, node_features, edge_features, device, n_layers=2,
+               n_heads=2, dropout=0.1, use_memory=False,
+               memory_update_at_start=True, message_dimension=100,
+               memory_dimension=500, embedding_module_type="graph_attention",
+               message_function="mlp",
+               mean_time_shift_src=0, std_time_shift_src=1, mean_time_shift_dst=0,
+               std_time_shift_dst=1, n_neighbors=None, aggregator_type="last",
+               memory_updater_type="gru",
+               use_destination_embedding_in_message=False,
+               use_source_embedding_in_message=False,
+               dyrep=False):
+    super(TGN, self).__init__()
+    self.n_layers = n_layers
+    self.neighbor_finder = neighbor_finder
+    self.device = device
+    self.logger = logging.getLogger(__name__)
+    self.node_raw_features = torch.from_numpy(node_features.astype(np.float32)).to(device)
+    self.edge_raw_features = torch.from_numpy(edge_features.astype(np.float32)).to(device)
+    self.n_node_features = self.node_raw_features.shape[1]
+    self.n_nodes = self.node_raw_features.shape[0]
+    self.n_edge_features = self.edge_raw_features.shape[1]
+    self.embedding_dimension = self.n_node_features
+    self.n_neighbors = n_neighbors
+    self.embedding_module_type = embedding_module_type
+    self.use_destination_embedding_in_message = use_destination_embedding_in_message
+    self.use_source_embedding_in_message = use_source_embedding_in_message
+    self.dyrep = dyrep
+    self.use_memory = use_memory
+    self.time_encoder = TimeEncode(dimension=self.n_node_features)
+    self.memory = None
+    self.mean_time_shift_src = mean_time_shift_src
+    self.std_time_shift_src = std_time_shift_src
+    self.mean_time_shift_dst = mean_time_shift_dst
+    self.std_time_shift_dst = std_time_shift_dst
+    if self.use_memory:
+      self.memory_dimension = memory_dimension
+      self.memory_update_at_start = memory_update_at_start
+      raw_message_dimension = 2 * self.memory_dimension + self.n_edge_features + \
+                              self.time_encoder.dimension
+      message_dimension = message_dimension if message_function != "identity" else raw_message_dimension
+      self.memory = Memory(n_nodes=self.n_nodes,
+                           memory_dimension=self.memory_dimension,
+                           input_dimension=message_dimension,
+                           message_dimension=message_dimension,
+                           device=device)
+      self.message_aggregator = get_message_aggregator(aggregator_type=aggregator_type,
+                                                       device=device)
+      self.message_function = get_message_function(module_type=message_function,
+                                                   raw_message_dimension=raw_message_dimension,
+                                                   message_dimension=message_dimension)
+      self.memory_updater = get_memory_updater(module_type=memory_updater_type,
+                                               memory=self.memory,
+                                               message_dimension=message_dimension,
+                                               memory_dimension=self.memory_dimension,
+                                               device=device)
+    self.embedding_module_type = embedding_module_type
+    self.embedding_module = get_embedding_module(module_type=embedding_module_type,
+                                                 node_features=self.node_raw_features,
+                                                 edge_features=self.edge_raw_features,
+                                                 memory=self.memory,
+                                                 neighbor_finder=self.neighbor_finder,
+                                                 time_encoder=self.time_encoder,
+                                                 n_layers=self.n_layers,
+                                                 n_node_features=self.n_node_features,
+                                                 n_edge_features=self.n_edge_features,
+                                                 n_time_features=self.n_node_features,
+                                                 embedding_dimension=self.embedding_dimension,
+                                                 device=self.device,
+                                                 n_heads=n_heads, dropout=dropout,
+                                                 use_memory=use_memory,
+                                                 n_neighbors=self.n_neighbors)
+    # MLP to compute probability on an edge given two node embeddings
+    self.affinity_score = MergeLayer(self.n_node_features, self.n_node_features,
+                                     self.n_node_features,
+                                     1)
+  def compute_temporal_embeddings(self, source_nodes, destination_nodes, negative_nodes, edge_times,
+                                  edge_idxs, n_neighbors=20):
+    """
+    Compute temporal embeddings for sources, destinations, and negatively sampled destinations.
+    source_nodes [batch_size]: source ids.
+    :param destination_nodes [batch_size]: destination ids
+    :param negative_nodes [batch_size]: ids of negative sampled destination
+    :param edge_times [batch_size]: timestamp of interaction
+    :param edge_idxs [batch_size]: index of interaction
+    :param n_neighbors [scalar]: number of temporal neighbor to consider in each convolutional
+    layer
+    :return: Temporal embeddings for sources, destinations and negatives
+    """
+    n_samples = len(source_nodes)
+    nodes = np.concatenate([source_nodes, destination_nodes, negative_nodes])
+    positives = np.concatenate([source_nodes, destination_nodes])
+    timestamps = np.concatenate([edge_times, edge_times, edge_times])
+    memory = None
+    time_diffs = None
+    if self.use_memory:
+      if self.memory_update_at_start:
+        # Update memory for all nodes with messages stored in previous batches
+        memory, last_update = self.get_updated_memory(list(range(self.n_nodes)),
+                                                      self.memory.messages)
+      else:
+        memory = self.memory.get_memory(list(range(self.n_nodes)))
+        last_update = self.memory.last_update
+      ### Compute differences between the time the memory of a node was last updated,
+      ### and the time for which we want to compute the embedding of a node
+      source_time_diffs = torch.LongTensor(edge_times).to(self.device) - last_update[
+        source_nodes].long()
+      source_time_diffs = (source_time_diffs - self.mean_time_shift_src) / self.std_time_shift_src
+      destination_time_diffs = torch.LongTensor(edge_times).to(self.device) - last_update[
+        destination_nodes].long()
+      destination_time_diffs = (destination_time_diffs - self.mean_time_shift_dst) / self.std_time_shift_dst
+      negative_time_diffs = torch.LongTensor(edge_times).to(self.device) - last_update[
+        negative_nodes].long()
+      negative_time_diffs = (negative_time_diffs - self.mean_time_shift_dst) / self.std_time_shift_dst
+      time_diffs = torch.cat([source_time_diffs, destination_time_diffs, negative_time_diffs],
+                             dim=0)
+    # Compute the embeddings using the embedding module
+    node_embedding = self.embedding_module.compute_embedding(memory=memory,
+                                                             source_nodes=nodes,
+                                                             timestamps=timestamps,
+                                                             n_layers=self.n_layers,
+                                                             n_neighbors=n_neighbors,
+                                                             time_diffs=time_diffs)
+    source_node_embedding = node_embedding[:n_samples]
+    destination_node_embedding = node_embedding[n_samples: 2 * n_samples]
+    negative_node_embedding = node_embedding[2 * n_samples:]
+    if self.use_memory:
+      if self.memory_update_at_start:
+        # Persist the updates to the memory only for sources and destinations (since now we have
+        # new messages for them)
+        self.update_memory(positives, self.memory.messages)
+        assert torch.allclose(memory[positives], self.memory.get_memory(positives), atol=1e-5), \
+          "Something wrong in how the memory was updated"
+        # Remove messages for the positives since we have already updated the memory using them
+        self.memory.clear_messages(positives)
+      unique_sources, source_id_to_messages = self.get_raw_messages(source_nodes,
+                                                                    source_node_embedding,
+                                                                    destination_nodes,
+                                                                    destination_node_embedding,
+                                                                    edge_times, edge_idxs)
+      unique_destinations, destination_id_to_messages = self.get_raw_messages(destination_nodes,
+                                                                              destination_node_embedding,
+                                                                              source_nodes,
+                                                                              source_node_embedding,
+                                                                              edge_times, edge_idxs)
+      if self.memory_update_at_start:
+        self.memory.store_raw_messages(unique_sources, source_id_to_messages)
+        self.memory.store_raw_messages(unique_destinations, destination_id_to_messages)
+      else:
+        self.update_memory(unique_sources, source_id_to_messages)
+        self.update_memory(unique_destinations, destination_id_to_messages)
+      if self.dyrep:
+        source_node_embedding = memory[source_nodes]
+        destination_node_embedding = memory[destination_nodes]
+        negative_node_embedding = memory[negative_nodes]
+    return source_node_embedding, destination_node_embedding, negative_node_embedding
+  def compute_edge_probabilities(self, source_nodes, destination_nodes, negative_nodes, edge_times,
+                                 edge_idxs, n_neighbors=20):
+    """
+    Compute probabilities for edges between sources and destination and between sources and
+    negatives by first computing temporal embeddings using the TGN encoder and then feeding them
+    into the MLP decoder.
+    :param destination_nodes [batch_size]: destination ids
+    :param negative_nodes [batch_size]: ids of negative sampled destination
+    :param edge_times [batch_size]: timestamp of interaction
+    :param edge_idxs [batch_size]: index of interaction
+    :param n_neighbors [scalar]: number of temporal neighbor to consider in each convolutional
+    layer
+    :return: Probabilities for both the positive and negative edges
+    """
+    n_samples = len(source_nodes)
+    source_node_embedding, destination_node_embedding, negative_node_embedding = self.compute_temporal_embeddings(
+      source_nodes, destination_nodes, negative_nodes, edge_times, edge_idxs, n_neighbors)
+    score = self.affinity_score(torch.cat([source_node_embedding, source_node_embedding], dim=0),
+                                torch.cat([destination_node_embedding,
+                                           negative_node_embedding])).squeeze(dim=0)
+    pos_score = score[:n_samples]
+    neg_score = score[n_samples:]
+    return pos_score.sigmoid(), neg_score.sigmoid()
+  def update_memory(self, nodes, messages):
+    # Aggregate messages for the same nodes
+    unique_nodes, unique_messages, unique_timestamps = \
+      self.message_aggregator.aggregate(
+        nodes,
+        messages)
+    if len(unique_nodes) > 0:
+      unique_messages = self.message_function.compute_message(unique_messages)
+    # Update the memory with the aggregated messages
+    self.memory_updater.update_memory(unique_nodes, unique_messages,
+                                      timestamps=unique_timestamps)
+  def get_updated_memory(self, nodes, messages):
+    # Aggregate messages for the same nodes
+    unique_nodes, unique_messages, unique_timestamps = \
+      self.message_aggregator.aggregate(
+        nodes,
+        messages)
+    if len(unique_nodes) > 0:
+      unique_messages = self.message_function.compute_message(unique_messages)
+    updated_memory, updated_last_update = self.memory_updater.get_updated_memory(unique_nodes,
+                                                                                 unique_messages,
+                                                                                 timestamps=unique_timestamps)
+    return updated_memory, updated_last_update
+  def get_raw_messages(self, source_nodes, source_node_embedding, destination_nodes,
+                       destination_node_embedding, edge_times, edge_idxs):
+    edge_times = torch.from_numpy(edge_times).float().to(self.device)
+    edge_features = self.edge_raw_features[edge_idxs]
+    source_memory = self.memory.get_memory(source_nodes) if not \
+      self.use_source_embedding_in_message else source_node_embedding
+    destination_memory = self.memory.get_memory(destination_nodes) if \
+      not self.use_destination_embedding_in_message else destination_node_embedding
+    source_time_delta = edge_times - self.memory.last_update[source_nodes]
+    source_time_delta_encoding = self.time_encoder(source_time_delta.unsqueeze(dim=1)).view(len(
+      source_nodes), -1)
+    source_message = torch.cat([source_memory, destination_memory, edge_features,
+                                source_time_delta_encoding],
+                               dim=1)
+    messages = defaultdict(list)
+    unique_sources = np.unique(source_nodes)
+    for i in range(len(source_nodes)):
+      messages[source_nodes[i]].append((source_message[i], edge_times[i]))
+    return unique_sources, messages
+  def set_neighbor_finder(self, neighbor_finder):
+    self.neighbor_finder = neighbor_finder
+    self.embedding_module.neighbor_finder = neighbor_finder

model/time_encoding.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import torch
+import numpy as np
+class TimeEncode(torch.nn.Module):
+  # Time Encoding proposed by TGAT
+  def __init__(self, dimension):
+    super(TimeEncode, self).__init__()
+    self.dimension = dimension
+    self.w = torch.nn.Linear(1, dimension)
+    self.w.weight = torch.nn.Parameter((torch.from_numpy(1 / 10 ** np.linspace(0, 9, dimension)))
+                                       .float().reshape(dimension, -1))
+    self.w.bias = torch.nn.Parameter(torch.zeros(dimension).float())
+  def forward(self, t):
+    # t has shape [batch_size, seq_len]
+    # Add dimension at the end to apply linear layer --> [batch_size, seq_len, 1]
+    t = t.unsqueeze(dim=2)
+    # output has shape [batch_size, seq_len, dimension]
+    output = torch.cos(self.w(t))
+    return output

modules/embedding_module.py ADDED Viewed

	@@ -0,0 +1,291 @@

+import torch
+from torch import nn
+import numpy as np
+import math
+from model.temporal_attention import TemporalAttentionLayer
+class EmbeddingModule(nn.Module):
+  def __init__(self, node_features, edge_features, memory, neighbor_finder, time_encoder, n_layers,
+               n_node_features, n_edge_features, n_time_features, embedding_dimension, device,
+               dropout):
+    super(EmbeddingModule, self).__init__()
+    self.node_features = node_features
+    self.edge_features = edge_features
+    # self.memory = memory
+    self.neighbor_finder = neighbor_finder
+    self.time_encoder = time_encoder
+    self.n_layers = n_layers
+    self.n_node_features = n_node_features
+    self.n_edge_features = n_edge_features
+    self.n_time_features = n_time_features
+    self.dropout = dropout
+    self.embedding_dimension = embedding_dimension
+    self.device = device
+  def compute_embedding(self, memory, source_nodes, timestamps, n_layers, n_neighbors=20, time_diffs=None,
+                        use_time_proj=True):
+    return NotImplemented
+class IdentityEmbedding(EmbeddingModule):
+  def compute_embedding(self, memory, source_nodes, timestamps, n_layers, n_neighbors=20, time_diffs=None,
+                        use_time_proj=True):
+    return memory[source_nodes, :]
+class TimeEmbedding(EmbeddingModule):
+  def __init__(self, node_features, edge_features, memory, neighbor_finder, time_encoder, n_layers,
+               n_node_features, n_edge_features, n_time_features, embedding_dimension, device,
+               n_heads=2, dropout=0.1, use_memory=True, n_neighbors=1):
+    super(TimeEmbedding, self).__init__(node_features, edge_features, memory,
+                                        neighbor_finder, time_encoder, n_layers,
+                                        n_node_features, n_edge_features, n_time_features,
+                                        embedding_dimension, device, dropout)
+    class NormalLinear(nn.Linear):
+      # From Jodie code
+      def reset_parameters(self):
+        stdv = 1. / math.sqrt(self.weight.size(1))
+        self.weight.data.normal_(0, stdv)
+        if self.bias is not None:
+          self.bias.data.normal_(0, stdv)
+    self.embedding_layer = NormalLinear(1, self.n_node_features)
+  def compute_embedding(self, memory, source_nodes, timestamps, n_layers, n_neighbors=20, time_diffs=None,
+                        use_time_proj=True):
+    source_embeddings = memory[source_nodes, :] * (1 + self.embedding_layer(time_diffs.unsqueeze(1)))
+    return source_embeddings
+class GraphEmbedding(EmbeddingModule):
+  def __init__(self, node_features, edge_features, memory, neighbor_finder, time_encoder, n_layers,
+               n_node_features, n_edge_features, n_time_features, embedding_dimension, device,
+               n_heads=2, dropout=0.1, use_memory=True):
+    super(GraphEmbedding, self).__init__(node_features, edge_features, memory,
+                                         neighbor_finder, time_encoder, n_layers,
+                                         n_node_features, n_edge_features, n_time_features,
+                                         embedding_dimension, device, dropout)
+    self.use_memory = use_memory
+    self.device = device
+  def compute_embedding(self, memory, source_nodes, timestamps, n_layers, n_neighbors=20, time_diffs=None,
+                        use_time_proj=True):
+    """Recursive implementation of curr_layers temporal graph attention layers.
+    src_idx_l [batch_size]: users / items input ids.
+    cut_time_l [batch_size]: scalar representing the instant of the time where we want to extract the user / item representation.
+    curr_layers [scalar]: number of temporal convolutional layers to stack.
+    num_neighbors [scalar]: number of temporal neighbor to consider in each convolutional layer.
+    """
+    assert (n_layers >= 0)
+    source_nodes_torch = torch.from_numpy(source_nodes).long().to(self.device)
+    timestamps_torch = torch.unsqueeze(torch.from_numpy(timestamps).float().to(self.device), dim=1)
+    # query node always has the start time -> time span == 0
+    source_nodes_time_embedding = self.time_encoder(torch.zeros_like(
+      timestamps_torch))
+    source_node_features = self.node_features[source_nodes_torch, :]
+    if self.use_memory:
+      source_node_features = memory[source_nodes, :] + source_node_features
+    if n_layers == 0:
+      return source_node_features
+    else:
+      source_node_conv_embeddings = self.compute_embedding(memory,
+                                                           source_nodes,
+                                                           timestamps,
+                                                           n_layers=n_layers - 1,
+                                                           n_neighbors=n_neighbors)
+      neighbors, edge_idxs, edge_times = self.neighbor_finder.get_temporal_neighbor(
+        source_nodes,
+        timestamps,
+        n_neighbors=n_neighbors)
+      neighbors_torch = torch.from_numpy(neighbors).long().to(self.device)
+      edge_idxs = torch.from_numpy(edge_idxs).long().to(self.device)
+      edge_deltas = timestamps[:, np.newaxis] - edge_times
+      edge_deltas_torch = torch.from_numpy(edge_deltas).float().to(self.device)
+      neighbors = neighbors.flatten()
+      neighbor_embeddings = self.compute_embedding(memory,
+                                                   neighbors,
+                                                   np.repeat(timestamps, n_neighbors),
+                                                   n_layers=n_layers - 1,
+                                                   n_neighbors=n_neighbors)
+      effective_n_neighbors = n_neighbors if n_neighbors > 0 else 1
+      neighbor_embeddings = neighbor_embeddings.view(len(source_nodes), effective_n_neighbors, -1)
+      edge_time_embeddings = self.time_encoder(edge_deltas_torch)
+      edge_features = self.edge_features[edge_idxs, :]
+      mask = neighbors_torch == 0
+      source_embedding = self.aggregate(n_layers, source_node_conv_embeddings,
+                                        source_nodes_time_embedding,
+                                        neighbor_embeddings,
+                                        edge_time_embeddings,
+                                        edge_features,
+                                        mask)
+      return source_embedding
+  def aggregate(self, n_layers, source_node_features, source_nodes_time_embedding,
+                neighbor_embeddings,
+                edge_time_embeddings, edge_features, mask):
+    return NotImplemented
+class GraphSumEmbedding(GraphEmbedding):
+  def __init__(self, node_features, edge_features, memory, neighbor_finder, time_encoder, n_layers,
+               n_node_features, n_edge_features, n_time_features, embedding_dimension, device,
+               n_heads=2, dropout=0.1, use_memory=True):
+    super(GraphSumEmbedding, self).__init__(node_features=node_features,
+                                            edge_features=edge_features,
+                                            memory=memory,
+                                            neighbor_finder=neighbor_finder,
+                                            time_encoder=time_encoder, n_layers=n_layers,
+                                            n_node_features=n_node_features,
+                                            n_edge_features=n_edge_features,
+                                            n_time_features=n_time_features,
+                                            embedding_dimension=embedding_dimension,
+                                            device=device,
+                                            n_heads=n_heads, dropout=dropout,
+                                            use_memory=use_memory)
+    self.linear_1 = torch.nn.ModuleList([torch.nn.Linear(embedding_dimension + n_time_features +
+                                                         n_edge_features, embedding_dimension)
+                                         for _ in range(n_layers)])
+    self.linear_2 = torch.nn.ModuleList(
+      [torch.nn.Linear(embedding_dimension + n_node_features + n_time_features,
+                       embedding_dimension) for _ in range(n_layers)])
+  def aggregate(self, n_layer, source_node_features, source_nodes_time_embedding,
+                neighbor_embeddings,
+                edge_time_embeddings, edge_features, mask):
+    neighbors_features = torch.cat([neighbor_embeddings, edge_time_embeddings, edge_features],
+                                   dim=2)
+    neighbor_embeddings = self.linear_1[n_layer - 1](neighbors_features)
+    neighbors_sum = torch.nn.functional.relu(torch.sum(neighbor_embeddings, dim=1))
+    source_features = torch.cat([source_node_features,
+                                 source_nodes_time_embedding.squeeze()], dim=1)
+    source_embedding = torch.cat([neighbors_sum, source_features], dim=1)
+    source_embedding = self.linear_2[n_layer - 1](source_embedding)
+    return source_embedding
+class GraphAttentionEmbedding(GraphEmbedding):
+  def __init__(self, node_features, edge_features, memory, neighbor_finder, time_encoder, n_layers,
+               n_node_features, n_edge_features, n_time_features, embedding_dimension, device,
+               n_heads=2, dropout=0.1, use_memory=True):
+    super(GraphAttentionEmbedding, self).__init__(node_features, edge_features, memory,
+                                                  neighbor_finder, time_encoder, n_layers,
+                                                  n_node_features, n_edge_features,
+                                                  n_time_features,
+                                                  embedding_dimension, device,
+                                                  n_heads, dropout,
+                                                  use_memory)
+    self.attention_models = torch.nn.ModuleList([TemporalAttentionLayer(
+      n_node_features=n_node_features,
+      n_neighbors_features=n_node_features,
+      n_edge_features=n_edge_features,
+      time_dim=n_time_features,
+      n_head=n_heads,
+      dropout=dropout,
+      output_dimension=n_node_features)
+      for _ in range(n_layers)])
+  def aggregate(self, n_layer, source_node_features, source_nodes_time_embedding,
+                neighbor_embeddings,
+                edge_time_embeddings, edge_features, mask):
+    attention_model = self.attention_models[n_layer - 1]
+    source_embedding, _ = attention_model(source_node_features,
+                                          source_nodes_time_embedding,
+                                          neighbor_embeddings,
+                                          edge_time_embeddings,
+                                          edge_features,
+                                          mask)
+    return source_embedding
+def get_embedding_module(module_type, node_features, edge_features, memory, neighbor_finder,
+                         time_encoder, n_layers, n_node_features, n_edge_features, n_time_features,
+                         embedding_dimension, device,
+                         n_heads=2, dropout=0.1, n_neighbors=None,
+                         use_memory=True):
+  if module_type == "graph_attention":
+    return GraphAttentionEmbedding(node_features=node_features,
+                                    edge_features=edge_features,
+                                    memory=memory,
+                                    neighbor_finder=neighbor_finder,
+                                    time_encoder=time_encoder,
+                                    n_layers=n_layers,
+                                    n_node_features=n_node_features,
+                                    n_edge_features=n_edge_features,
+                                    n_time_features=n_time_features,
+                                    embedding_dimension=embedding_dimension,
+                                    device=device,
+                                    n_heads=n_heads, dropout=dropout, use_memory=use_memory)
+  elif module_type == "graph_sum":
+    return GraphSumEmbedding(node_features=node_features,
+                              edge_features=edge_features,
+                              memory=memory,
+                              neighbor_finder=neighbor_finder,
+                              time_encoder=time_encoder,
+                              n_layers=n_layers,
+                              n_node_features=n_node_features,
+                              n_edge_features=n_edge_features,
+                              n_time_features=n_time_features,
+                              embedding_dimension=embedding_dimension,
+                              device=device,
+                              n_heads=n_heads, dropout=dropout, use_memory=use_memory)
+  elif module_type == "identity":
+    return IdentityEmbedding(node_features=node_features,
+                             edge_features=edge_features,
+                             memory=memory,
+                             neighbor_finder=neighbor_finder,
+                             time_encoder=time_encoder,
+                             n_layers=n_layers,
+                             n_node_features=n_node_features,
+                             n_edge_features=n_edge_features,
+                             n_time_features=n_time_features,
+                             embedding_dimension=embedding_dimension,
+                             device=device,
+                             dropout=dropout)
+  elif module_type == "time":
+    return TimeEmbedding(node_features=node_features,
+                         edge_features=edge_features,
+                         memory=memory,
+                         neighbor_finder=neighbor_finder,
+                         time_encoder=time_encoder,
+                         n_layers=n_layers,
+                         n_node_features=n_node_features,
+                         n_edge_features=n_edge_features,
+                         n_time_features=n_time_features,
+                         embedding_dimension=embedding_dimension,
+                         device=device,
+                         dropout=dropout,
+                         n_neighbors=n_neighbors)
+  else:
+    raise ValueError("Embedding Module {} not supported".format(module_type))

modules/memory.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import torch
+from torch import nn
+from collections import defaultdict
+from copy import deepcopy
+class Memory(nn.Module):
+  def __init__(self, n_nodes, memory_dimension, input_dimension, message_dimension=None,
+               device="cpu", combination_method='sum'):
+    super(Memory, self).__init__()
+    self.n_nodes = n_nodes
+    self.memory_dimension = memory_dimension
+    self.input_dimension = input_dimension
+    self.message_dimension = message_dimension
+    self.device = device
+    self.combination_method = combination_method
+    self.__init_memory__()
+  def __init_memory__(self):
+    """
+    Initializes the memory to all zeros. It should be called at the start of each epoch.
+    """
+    # Treat memory as parameter so that it is saved and loaded together with the model
+    self.memory = nn.Parameter(torch.zeros((self.n_nodes, self.memory_dimension)).to(self.device),
+                               requires_grad=False)
+    self.last_update = nn.Parameter(torch.zeros(self.n_nodes).to(self.device),
+                                    requires_grad=False)
+    self.messages = defaultdict(list)
+  def store_raw_messages(self, nodes, node_id_to_messages):
+    for node in nodes:
+      self.messages[node].extend(node_id_to_messages[node])
+  def get_memory(self, node_idxs):
+    return self.memory[node_idxs, :]
+  def set_memory(self, node_idxs, values):
+    self.memory[node_idxs, :] = values
+  def get_last_update(self, node_idxs):
+    return self.last_update[node_idxs]
+  def backup_memory(self):
+    messages_clone = {}
+    for k, v in self.messages.items():
+      messages_clone[k] = [(x[0].clone(), x[1].clone()) for x in v]
+    return self.memory.data.clone(), self.last_update.data.clone(), messages_clone
+  def restore_memory(self, memory_backup):
+    self.memory.data, self.last_update.data = memory_backup[0].clone(), memory_backup[1].clone()
+    self.messages = defaultdict(list)
+    for k, v in memory_backup[2].items():
+      self.messages[k] = [(x[0].clone(), x[1].clone()) for x in v]
+  def detach_memory(self):
+    self.memory.detach_()
+    # Detach all stored messages
+    for k, v in self.messages.items():
+      new_node_messages = []
+      for message in v:
+        new_node_messages.append((message[0].detach(), message[1]))
+      self.messages[k] = new_node_messages
+  def clear_messages(self, nodes):
+    for node in nodes:
+      self.messages[node] = []

modules/memory_updater.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from torch import nn
+import torch
+class MemoryUpdater(nn.Module):
+  def update_memory(self, unique_node_ids, unique_messages, timestamps):
+    pass
+class SequenceMemoryUpdater(MemoryUpdater):
+  def __init__(self, memory, message_dimension, memory_dimension, device):
+    super(SequenceMemoryUpdater, self).__init__()
+    self.memory = memory
+    self.layer_norm = torch.nn.LayerNorm(memory_dimension)
+    self.message_dimension = message_dimension
+    self.device = device
+  def update_memory(self, unique_node_ids, unique_messages, timestamps):
+    if len(unique_node_ids) <= 0:
+      return
+    assert (self.memory.get_last_update(unique_node_ids) <= timestamps).all().item(), "Trying to " \
+                                                                                     "update memory to time in the past"
+    memory = self.memory.get_memory(unique_node_ids)
+    self.memory.last_update[unique_node_ids] = timestamps
+    updated_memory = self.memory_updater(unique_messages, memory)
+    self.memory.set_memory(unique_node_ids, updated_memory)
+  def get_updated_memory(self, unique_node_ids, unique_messages, timestamps):
+    if len(unique_node_ids) <= 0:
+      return self.memory.memory.data.clone(), self.memory.last_update.data.clone()
+    assert (self.memory.get_last_update(unique_node_ids) <= timestamps).all().item(), "Trying to " \
+                                                                                     "update memory to time in the past"
+    updated_memory = self.memory.memory.data.clone()
+    updated_memory[unique_node_ids] = self.memory_updater(unique_messages, updated_memory[unique_node_ids])
+    updated_last_update = self.memory.last_update.data.clone()
+    updated_last_update[unique_node_ids] = timestamps
+    return updated_memory, updated_last_update
+class GRUMemoryUpdater(SequenceMemoryUpdater):
+  def __init__(self, memory, message_dimension, memory_dimension, device):
+    super(GRUMemoryUpdater, self).__init__(memory, message_dimension, memory_dimension, device)
+    self.memory_updater = nn.GRUCell(input_size=message_dimension,
+                                     hidden_size=memory_dimension)
+class RNNMemoryUpdater(SequenceMemoryUpdater):
+  def __init__(self, memory, message_dimension, memory_dimension, device):
+    super(RNNMemoryUpdater, self).__init__(memory, message_dimension, memory_dimension, device)
+    self.memory_updater = nn.RNNCell(input_size=message_dimension,
+                                     hidden_size=memory_dimension)
+def get_memory_updater(module_type, memory, message_dimension, memory_dimension, device):
+  if module_type == "gru":
+    return GRUMemoryUpdater(memory, message_dimension, memory_dimension, device)
+  elif module_type == "rnn":
+    return RNNMemoryUpdater(memory, message_dimension, memory_dimension, device)

modules/message_aggregator.py ADDED Viewed

	@@ -0,0 +1,90 @@

+from collections import defaultdict
+import torch
+import numpy as np
+class MessageAggregator(torch.nn.Module):
+  """
+  Abstract class for the message aggregator module, which given a batch of node ids and
+  corresponding messages, aggregates messages with the same node id.
+  """
+  def __init__(self, device):
+    super(MessageAggregator, self).__init__()
+    self.device = device
+  def aggregate(self, node_ids, messages):
+    """
+    Given a list of node ids, and a list of messages of the same length, aggregate different
+    messages for the same id using one of the possible strategies.
+    :param node_ids: A list of node ids of length batch_size
+    :param messages: A tensor of shape [batch_size, message_length]
+    :param timestamps A tensor of shape [batch_size]
+    :return: A tensor of shape [n_unique_node_ids, message_length] with the aggregated messages
+    """
+  def group_by_id(self, node_ids, messages, timestamps):
+    node_id_to_messages = defaultdict(list)
+    for i, node_id in enumerate(node_ids):
+      node_id_to_messages[node_id].append((messages[i], timestamps[i]))
+    return node_id_to_messages
+class LastMessageAggregator(MessageAggregator):
+  def __init__(self, device):
+    super(LastMessageAggregator, self).__init__(device)
+  def aggregate(self, node_ids, messages):
+    """Only keep the last message for each node"""
+    unique_node_ids = np.unique(node_ids)
+    unique_messages = []
+    unique_timestamps = []
+    to_update_node_ids = []
+    for node_id in unique_node_ids:
+        if len(messages[node_id]) > 0:
+            to_update_node_ids.append(node_id)
+            unique_messages.append(messages[node_id][-1][0])
+            unique_timestamps.append(messages[node_id][-1][1])
+    unique_messages = torch.stack(unique_messages) if len(to_update_node_ids) > 0 else []
+    unique_timestamps = torch.stack(unique_timestamps) if len(to_update_node_ids) > 0 else []
+    return to_update_node_ids, unique_messages, unique_timestamps
+class MeanMessageAggregator(MessageAggregator):
+  def __init__(self, device):
+    super(MeanMessageAggregator, self).__init__(device)
+  def aggregate(self, node_ids, messages):
+    """Only keep the last message for each node"""
+    unique_node_ids = np.unique(node_ids)
+    unique_messages = []
+    unique_timestamps = []
+    to_update_node_ids = []
+    n_messages = 0
+    for node_id in unique_node_ids:
+      if len(messages[node_id]) > 0:
+        n_messages += len(messages[node_id])
+        to_update_node_ids.append(node_id)
+        unique_messages.append(torch.mean(torch.stack([m[0] for m in messages[node_id]]), dim=0))
+        unique_timestamps.append(messages[node_id][-1][1])
+    unique_messages = torch.stack(unique_messages) if len(to_update_node_ids) > 0 else []
+    unique_timestamps = torch.stack(unique_timestamps) if len(to_update_node_ids) > 0 else []
+    return to_update_node_ids, unique_messages, unique_timestamps
+def get_message_aggregator(aggregator_type, device):
+  if aggregator_type == "last":
+    return LastMessageAggregator(device=device)
+  elif aggregator_type == "mean":
+    return MeanMessageAggregator(device=device)
+  else:
+    raise ValueError("Message aggregator {} not implemented".format(aggregator_type))

modules/message_function.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from torch import nn
+class MessageFunction(nn.Module):
+  """
+  Module which computes the message for a given interaction.
+  """
+  def compute_message(self, raw_messages):
+    return None
+class MLPMessageFunction(MessageFunction):
+  def __init__(self, raw_message_dimension, message_dimension):
+    super(MLPMessageFunction, self).__init__()
+    self.mlp = self.layers = nn.Sequential(
+      nn.Linear(raw_message_dimension, raw_message_dimension // 2),
+      nn.ReLU(),
+      nn.Linear(raw_message_dimension // 2, message_dimension),
+    )
+  def compute_message(self, raw_messages):
+    messages = self.mlp(raw_messages)
+    return messages
+class IdentityMessageFunction(MessageFunction):
+  def compute_message(self, raw_messages):
+    return raw_messages
+def get_message_function(module_type, raw_message_dimension, message_dimension):
+  if module_type == "mlp":
+    return MLPMessageFunction(raw_message_dimension, message_dimension)
+  elif module_type == "identity":
+    return IdentityMessageFunction()

modules/tgn.py ADDED Viewed

	@@ -0,0 +1,278 @@

+import logging
+import numpy as np
+import torch
+from collections import defaultdict
+from utils.utils import MergeLayer
+from modules.memory import Memory
+from modules.message_aggregator import get_message_aggregator
+from modules.message_function import get_message_function
+from modules.memory_updater import get_memory_updater
+from modules.embedding_module import get_embedding_module
+from model.time_encoding import TimeEncode
+class TGN(torch.nn.Module):
+  def __init__(self, neighbor_finder, node_features, edge_features, device, n_layers=2,
+               n_heads=2, dropout=0.1, use_memory=False,
+               memory_update_at_start=True, message_dimension=100,
+               memory_dimension=500, embedding_module_type="graph_attention",
+               message_function="mlp",
+               mean_time_shift_src=0, std_time_shift_src=1, mean_time_shift_dst=0,
+               std_time_shift_dst=1, n_neighbors=None, aggregator_type="last",
+               memory_updater_type="gru",
+               use_destination_embedding_in_message=False,
+               use_source_embedding_in_message=False,
+               dyrep=False):
+    super(TGN, self).__init__()
+    self.n_layers = n_layers
+    self.neighbor_finder = neighbor_finder
+    self.device = device
+    self.logger = logging.getLogger(__name__)
+    self.node_raw_features = torch.from_numpy(node_features.astype(np.float32)).to(device)
+    self.edge_raw_features = torch.from_numpy(edge_features.astype(np.float32)).to(device)
+    self.n_node_features = self.node_raw_features.shape[1]
+    self.n_nodes = self.node_raw_features.shape[0]
+    self.n_edge_features = self.edge_raw_features.shape[1]
+    self.embedding_dimension = self.n_node_features
+    self.n_neighbors = n_neighbors
+    self.embedding_module_type = embedding_module_type
+    self.use_destination_embedding_in_message = use_destination_embedding_in_message
+    self.use_source_embedding_in_message = use_source_embedding_in_message
+    self.dyrep = dyrep
+    self.use_memory = use_memory
+    self.time_encoder = TimeEncode(dimension=self.n_node_features)
+    self.memory = None
+    self.mean_time_shift_src = mean_time_shift_src
+    self.std_time_shift_src = std_time_shift_src
+    self.mean_time_shift_dst = mean_time_shift_dst
+    self.std_time_shift_dst = std_time_shift_dst
+    if self.use_memory:
+      self.memory_dimension = memory_dimension
+      self.memory_update_at_start = memory_update_at_start
+      raw_message_dimension = 2 * self.memory_dimension + self.n_edge_features + \
+                              self.time_encoder.dimension
+      message_dimension = message_dimension if message_function != "identity" else raw_message_dimension
+      self.memory = Memory(n_nodes=self.n_nodes,
+                           memory_dimension=self.memory_dimension,
+                           input_dimension=message_dimension,
+                           message_dimension=message_dimension,
+                           device=device)
+      self.message_aggregator = get_message_aggregator(aggregator_type=aggregator_type,
+                                                       device=device)
+      self.message_function = get_message_function(module_type=message_function,
+                                                   raw_message_dimension=raw_message_dimension,
+                                                   message_dimension=message_dimension)
+      self.memory_updater = get_memory_updater(module_type=memory_updater_type,
+                                               memory=self.memory,
+                                               message_dimension=message_dimension,
+                                               memory_dimension=self.memory_dimension,
+                                               device=device)
+    self.embedding_module_type = embedding_module_type
+    self.embedding_module = get_embedding_module(module_type=embedding_module_type,
+                                                 node_features=self.node_raw_features,
+                                                 edge_features=self.edge_raw_features,
+                                                 memory=self.memory,
+                                                 neighbor_finder=self.neighbor_finder,
+                                                 time_encoder=self.time_encoder,
+                                                 n_layers=self.n_layers,
+                                                 n_node_features=self.n_node_features,
+                                                 n_edge_features=self.n_edge_features,
+                                                 n_time_features=self.n_node_features,
+                                                 embedding_dimension=self.embedding_dimension,
+                                                 device=self.device,
+                                                 n_heads=n_heads, dropout=dropout,
+                                                 use_memory=use_memory,
+                                                 n_neighbors=self.n_neighbors)
+    # MLP to compute probability on an edge given two node embeddings
+    self.affinity_score = MergeLayer(self.n_node_features, self.n_node_features,
+                                     self.n_node_features,
+                                     1)
+  def compute_temporal_embeddings(self, source_nodes, destination_nodes, negative_nodes, edge_times,
+                                  edge_idxs, n_neighbors=20):
+    """
+    Compute temporal embeddings for sources, destinations, and negatively sampled destinations.
+    source_nodes [batch_size]: source ids.
+    :param destination_nodes [batch_size]: destination ids
+    :param negative_nodes [batch_size]: ids of negative sampled destination
+    :param edge_times [batch_size]: timestamp of interaction
+    :param edge_idxs [batch_size]: index of interaction
+    :param n_neighbors [scalar]: number of temporal neighbor to consider in each convolutional
+    layer
+    :return: Temporal embeddings for sources, destinations and negatives
+    """
+    n_samples = len(source_nodes)
+    nodes = np.concatenate([source_nodes, destination_nodes, negative_nodes])
+    positives = np.concatenate([source_nodes, destination_nodes])
+    timestamps = np.concatenate([edge_times, edge_times, edge_times])
+    memory = None
+    time_diffs = None
+    if self.use_memory:
+      if self.memory_update_at_start:
+        # Update memory for all nodes with messages stored in previous batches
+        memory, last_update = self.get_updated_memory(list(range(self.n_nodes)),
+                                                      self.memory.messages)
+      else:
+        memory = self.memory.get_memory(list(range(self.n_nodes)))
+        last_update = self.memory.last_update
+      ### Compute differences between the time the memory of a node was last updated,
+      ### and the time for which we want to compute the embedding of a node
+      source_time_diffs = torch.LongTensor(edge_times).to(self.device) - last_update[
+        source_nodes].long()
+      source_time_diffs = (source_time_diffs - self.mean_time_shift_src) / self.std_time_shift_src
+      destination_time_diffs = torch.LongTensor(edge_times).to(self.device) - last_update[
+        destination_nodes].long()
+      destination_time_diffs = (destination_time_diffs - self.mean_time_shift_dst) / self.std_time_shift_dst
+      negative_time_diffs = torch.LongTensor(edge_times).to(self.device) - last_update[
+        negative_nodes].long()
+      negative_time_diffs = (negative_time_diffs - self.mean_time_shift_dst) / self.std_time_shift_dst
+      time_diffs = torch.cat([source_time_diffs, destination_time_diffs, negative_time_diffs],
+                             dim=0)
+    # Compute the embeddings using the embedding module
+    node_embedding = self.embedding_module.compute_embedding(memory=memory,
+                                                             source_nodes=nodes,
+                                                             timestamps=timestamps,
+                                                             n_layers=self.n_layers,
+                                                             n_neighbors=n_neighbors,
+                                                             time_diffs=time_diffs)
+    source_node_embedding = node_embedding[:n_samples]
+    destination_node_embedding = node_embedding[n_samples: 2 * n_samples]
+    negative_node_embedding = node_embedding[2 * n_samples:]
+    if self.use_memory:
+      if self.memory_update_at_start:
+        # Persist the updates to the memory only for sources and destinations (since now we have
+        # new messages for them)
+        self.update_memory(positives, self.memory.messages)
+        assert torch.allclose(memory[positives], self.memory.get_memory(positives), atol=1e-5), \
+          "Something wrong in how the memory was updated"
+        # Remove messages for the positives since we have already updated the memory using them
+        self.memory.clear_messages(positives)
+      unique_sources, source_id_to_messages = self.get_raw_messages(source_nodes,
+                                                                    source_node_embedding,
+                                                                    destination_nodes,
+                                                                    destination_node_embedding,
+                                                                    edge_times, edge_idxs)
+      unique_destinations, destination_id_to_messages = self.get_raw_messages(destination_nodes,
+                                                                              destination_node_embedding,
+                                                                              source_nodes,
+                                                                              source_node_embedding,
+                                                                              edge_times, edge_idxs)
+      if self.memory_update_at_start:
+        self.memory.store_raw_messages(unique_sources, source_id_to_messages)
+        self.memory.store_raw_messages(unique_destinations, destination_id_to_messages)
+      else:
+        self.update_memory(unique_sources, source_id_to_messages)
+        self.update_memory(unique_destinations, destination_id_to_messages)
+      if self.dyrep:
+        source_node_embedding = memory[source_nodes]
+        destination_node_embedding = memory[destination_nodes]
+        negative_node_embedding = memory[negative_nodes]
+    return source_node_embedding, destination_node_embedding, negative_node_embedding
+  def compute_edge_probabilities(self, source_nodes, destination_nodes, negative_nodes, edge_times,
+                                 edge_idxs, n_neighbors=20):
+    """
+    Compute probabilities for edges between sources and destination and between sources and
+    negatives by first computing temporal embeddings using the TGN encoder and then feeding them
+    into the MLP decoder.
+    :param destination_nodes [batch_size]: destination ids
+    :param negative_nodes [batch_size]: ids of negative sampled destination
+    :param edge_times [batch_size]: timestamp of interaction
+    :param edge_idxs [batch_size]: index of interaction
+    :param n_neighbors [scalar]: number of temporal neighbor to consider in each convolutional
+    layer
+    :return: Probabilities for both the positive and negative edges
+    """
+    n_samples = len(source_nodes)
+    source_node_embedding, destination_node_embedding, negative_node_embedding = self.compute_temporal_embeddings(
+      source_nodes, destination_nodes, negative_nodes, edge_times, edge_idxs, n_neighbors)
+    score = self.affinity_score(torch.cat([source_node_embedding, source_node_embedding], dim=0),
+                                torch.cat([destination_node_embedding,
+                                           negative_node_embedding])).squeeze(dim=0)
+    pos_score = score[:n_samples]
+    neg_score = score[n_samples:]
+    return pos_score.sigmoid(), neg_score.sigmoid()
+  def update_memory(self, nodes, messages):
+    # Aggregate messages for the same nodes
+    unique_nodes, unique_messages, unique_timestamps = \
+      self.message_aggregator.aggregate(
+        nodes,
+        messages)
+    if len(unique_nodes) > 0:
+      unique_messages = self.message_function.compute_message(unique_messages)
+    # Update the memory with the aggregated messages
+    self.memory_updater.update_memory(unique_nodes, unique_messages,
+                                      timestamps=unique_timestamps)
+  def get_updated_memory(self, nodes, messages):
+    # Aggregate messages for the same nodes
+    unique_nodes, unique_messages, unique_timestamps = \
+      self.message_aggregator.aggregate(
+        nodes,
+        messages)
+    if len(unique_nodes) > 0:
+      unique_messages = self.message_function.compute_message(unique_messages)
+    updated_memory, updated_last_update = self.memory_updater.get_updated_memory(unique_nodes,
+                                                                                 unique_messages,
+                                                                                 timestamps=unique_timestamps)
+    return updated_memory, updated_last_update
+  def get_raw_messages(self, source_nodes, source_node_embedding, destination_nodes,
+                       destination_node_embedding, edge_times, edge_idxs):
+    edge_times = torch.from_numpy(edge_times).float().to(self.device)
+    edge_features = self.edge_raw_features[edge_idxs]
+    source_memory = self.memory.get_memory(source_nodes) if not \
+      self.use_source_embedding_in_message else source_node_embedding
+    destination_memory = self.memory.get_memory(destination_nodes) if \
+      not self.use_destination_embedding_in_message else destination_node_embedding
+    source_time_delta = edge_times - self.memory.last_update[source_nodes]
+    source_time_delta_encoding = self.time_encoder(source_time_delta.unsqueeze(dim=1)).view(len(
+      source_nodes), -1)
+    source_message = torch.cat([source_memory, destination_memory, edge_features,
+                                source_time_delta_encoding],
+                               dim=1)
+    messages = defaultdict(list)
+    unique_sources = np.unique(source_nodes)
+    for i in range(len(source_nodes)):
+      messages[source_nodes[i]].append((source_message[i], edge_times[i]))
+    return unique_sources, messages
+  def set_neighbor_finder(self, neighbor_finder):
+    self.neighbor_finder = neighbor_finder
+    self.embedding_module.neighbor_finder = neighbor_finder

utils/data_processing.py ADDED Viewed

	@@ -0,0 +1,184 @@

+import numpy as np
+import random
+import pandas as pd
+class Data:
+  def __init__(self, sources, destinations, timestamps, edge_idxs, labels):
+    self.sources = sources
+    self.destinations = destinations
+    self.timestamps = timestamps
+    self.edge_idxs = edge_idxs
+    self.labels = labels
+    self.n_interactions = len(sources)
+    self.unique_nodes = set(sources) | set(destinations)
+    self.n_unique_nodes = len(self.unique_nodes)
+def get_data_node_classification(dataset_name, use_validation=False):
+  ### Load data and train val test split
+  graph_df = pd.read_csv('./data/ml_{}.csv'.format(dataset_name))
+  edge_features = np.load('./data/ml_{}.npy'.format(dataset_name))
+  node_features = np.load('./data/ml_{}_node.npy'.format(dataset_name))
+  val_time, test_time = list(np.quantile(graph_df.ts, [0.70, 0.85]))
+  sources = graph_df.u.values
+  destinations = graph_df.i.values
+  edge_idxs = graph_df.idx.values
+  labels = graph_df.label.values
+  timestamps = graph_df.ts.values
+  random.seed(2020)
+  train_mask = timestamps <= val_time if use_validation else timestamps <= test_time
+  test_mask = timestamps > test_time
+  val_mask = np.logical_and(timestamps <= test_time, timestamps > val_time) if use_validation else test_mask
+  full_data = Data(sources, destinations, timestamps, edge_idxs, labels)
+  train_data = Data(sources[train_mask], destinations[train_mask], timestamps[train_mask],
+                    edge_idxs[train_mask], labels[train_mask])
+  val_data = Data(sources[val_mask], destinations[val_mask], timestamps[val_mask],
+                  edge_idxs[val_mask], labels[val_mask])
+  test_data = Data(sources[test_mask], destinations[test_mask], timestamps[test_mask],
+                   edge_idxs[test_mask], labels[test_mask])
+  return full_data, node_features, edge_features, train_data, val_data, test_data
+def get_data(dataset_name, different_new_nodes_between_val_and_test=False, randomize_features=False):
+  ### Load data and train val test split
+  graph_df = pd.read_csv('./data/ml_{}.csv'.format(dataset_name))
+  edge_features = np.load('./data/ml_{}.npy'.format(dataset_name))
+  node_features = np.load('./data/ml_{}_node.npy'.format(dataset_name))
+  if randomize_features:
+    node_features = np.random.rand(node_features.shape[0], node_features.shape[1])
+  val_time, test_time = list(np.quantile(graph_df.ts, [0.70, 0.85]))
+  sources = graph_df.u.values
+  destinations = graph_df.i.values
+  edge_idxs = graph_df.idx.values
+  labels = graph_df.label.values
+  timestamps = graph_df.ts.values
+  full_data = Data(sources, destinations, timestamps, edge_idxs, labels)
+  random.seed(2020)
+  node_set = set(sources) | set(destinations)
+  n_total_unique_nodes = len(node_set)
+  # Compute nodes which appear at test time
+  test_node_set = set(sources[timestamps > val_time]).union(
+    set(destinations[timestamps > val_time]))
+  # Sample nodes which we keep as new nodes (to test inductiveness), so than we have to remove all
+  # their edges from training
+  new_test_node_set = set(random.sample(test_node_set, int(0.1 * n_total_unique_nodes)))
+  # Mask saying for each source and destination whether they are new test nodes
+  new_test_source_mask = graph_df.u.map(lambda x: x in new_test_node_set).values
+  new_test_destination_mask = graph_df.i.map(lambda x: x in new_test_node_set).values
+  # Mask which is true for edges with both destination and source not being new test nodes (because
+  # we want to remove all edges involving any new test node)
+  observed_edges_mask = np.logical_and(~new_test_source_mask, ~new_test_destination_mask)
+  # For train we keep edges happening before the validation time which do not involve any new node
+  # used for inductiveness
+  train_mask = np.logical_and(timestamps <= val_time, observed_edges_mask)
+  train_data = Data(sources[train_mask], destinations[train_mask], timestamps[train_mask],
+                    edge_idxs[train_mask], labels[train_mask])
+  # define the new nodes sets for testing inductiveness of the model
+  train_node_set = set(train_data.sources).union(train_data.destinations)
+  assert len(train_node_set & new_test_node_set) == 0
+  new_node_set = node_set - train_node_set
+  val_mask = np.logical_and(timestamps <= test_time, timestamps > val_time)
+  test_mask = timestamps > test_time
+  if different_new_nodes_between_val_and_test:
+    n_new_nodes = len(new_test_node_set) // 2
+    val_new_node_set = set(list(new_test_node_set)[:n_new_nodes])
+    test_new_node_set = set(list(new_test_node_set)[n_new_nodes:])
+    edge_contains_new_val_node_mask = np.array(
+      [(a in val_new_node_set or b in val_new_node_set) for a, b in zip(sources, destinations)])
+    edge_contains_new_test_node_mask = np.array(
+      [(a in test_new_node_set or b in test_new_node_set) for a, b in zip(sources, destinations)])
+    new_node_val_mask = np.logical_and(val_mask, edge_contains_new_val_node_mask)
+    new_node_test_mask = np.logical_and(test_mask, edge_contains_new_test_node_mask)
+  else:
+    edge_contains_new_node_mask = np.array(
+      [(a in new_node_set or b in new_node_set) for a, b in zip(sources, destinations)])
+    new_node_val_mask = np.logical_and(val_mask, edge_contains_new_node_mask)
+    new_node_test_mask = np.logical_and(test_mask, edge_contains_new_node_mask)
+  # validation and test with all edges
+  val_data = Data(sources[val_mask], destinations[val_mask], timestamps[val_mask],
+                  edge_idxs[val_mask], labels[val_mask])
+  test_data = Data(sources[test_mask], destinations[test_mask], timestamps[test_mask],
+                   edge_idxs[test_mask], labels[test_mask])
+  # validation and test with edges that at least has one new node (not in training set)
+  new_node_val_data = Data(sources[new_node_val_mask], destinations[new_node_val_mask],
+                           timestamps[new_node_val_mask],
+                           edge_idxs[new_node_val_mask], labels[new_node_val_mask])
+  new_node_test_data = Data(sources[new_node_test_mask], destinations[new_node_test_mask],
+                            timestamps[new_node_test_mask], edge_idxs[new_node_test_mask],
+                            labels[new_node_test_mask])
+  print("The dataset has {} interactions, involving {} different nodes".format(full_data.n_interactions,
+                                                                      full_data.n_unique_nodes))
+  print("The training dataset has {} interactions, involving {} different nodes".format(
+    train_data.n_interactions, train_data.n_unique_nodes))
+  print("The validation dataset has {} interactions, involving {} different nodes".format(
+    val_data.n_interactions, val_data.n_unique_nodes))
+  print("The test dataset has {} interactions, involving {} different nodes".format(
+    test_data.n_interactions, test_data.n_unique_nodes))
+  print("The new node validation dataset has {} interactions, involving {} different nodes".format(
+    new_node_val_data.n_interactions, new_node_val_data.n_unique_nodes))
+  print("The new node test dataset has {} interactions, involving {} different nodes".format(
+    new_node_test_data.n_interactions, new_node_test_data.n_unique_nodes))
+  print("{} nodes were used for the inductive testing, i.e. are never seen during training".format(
+    len(new_test_node_set)))
+  return node_features, edge_features, full_data, train_data, val_data, test_data, \
+         new_node_val_data, new_node_test_data
+def compute_time_statistics(sources, destinations, timestamps):
+  last_timestamp_sources = dict()
+  last_timestamp_dst = dict()
+  all_timediffs_src = []
+  all_timediffs_dst = []
+  for k in range(len(sources)):
+    source_id = sources[k]
+    dest_id = destinations[k]
+    c_timestamp = timestamps[k]
+    if source_id not in last_timestamp_sources.keys():
+      last_timestamp_sources[source_id] = 0
+    if dest_id not in last_timestamp_dst.keys():
+      last_timestamp_dst[dest_id] = 0
+    all_timediffs_src.append(c_timestamp - last_timestamp_sources[source_id])
+    all_timediffs_dst.append(c_timestamp - last_timestamp_dst[dest_id])
+    last_timestamp_sources[source_id] = c_timestamp
+    last_timestamp_dst[dest_id] = c_timestamp
+  assert len(all_timediffs_src) == len(sources)
+  assert len(all_timediffs_dst) == len(sources)
+  mean_time_shift_src = np.mean(all_timediffs_src)
+  std_time_shift_src = np.std(all_timediffs_src)
+  mean_time_shift_dst = np.mean(all_timediffs_dst)
+  std_time_shift_dst = np.std(all_timediffs_dst)
+  return mean_time_shift_src, std_time_shift_src, mean_time_shift_dst, std_time_shift_dst

utils/utils.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import numpy as np
+import torch
+class MergeLayer(torch.nn.Module):
+  def __init__(self, dim1, dim2, dim3, dim4):
+    super().__init__()
+    self.fc1 = torch.nn.Linear(dim1 + dim2, dim3)
+    self.fc2 = torch.nn.Linear(dim3, dim4)
+    self.act = torch.nn.ReLU()
+    torch.nn.init.xavier_normal_(self.fc1.weight)
+    torch.nn.init.xavier_normal_(self.fc2.weight)
+  def forward(self, x1, x2):
+    x = torch.cat([x1, x2], dim=1)
+    h = self.act(self.fc1(x))
+    return self.fc2(h)
+class MLP(torch.nn.Module):
+  def __init__(self, dim, drop=0.3):
+    super().__init__()
+    self.fc_1 = torch.nn.Linear(dim, 80)
+    self.fc_2 = torch.nn.Linear(80, 10)
+    self.fc_3 = torch.nn.Linear(10, 1)
+    self.act = torch.nn.ReLU()
+    self.dropout = torch.nn.Dropout(p=drop, inplace=False)
+  def forward(self, x):
+    x = self.act(self.fc_1(x))
+    x = self.dropout(x)
+    x = self.act(self.fc_2(x))
+    x = self.dropout(x)
+    return self.fc_3(x).squeeze(dim=1)
+class EarlyStopMonitor(object):
+  def __init__(self, max_round=3, higher_better=True, tolerance=1e-10):
+    self.max_round = max_round
+    self.num_round = 0
+    self.epoch_count = 0
+    self.best_epoch = 0
+    self.last_best = None
+    self.higher_better = higher_better
+    self.tolerance = tolerance
+  def early_stop_check(self, curr_val):
+    if not self.higher_better:
+      curr_val *= -1
+    if self.last_best is None:
+      self.last_best = curr_val
+    elif (curr_val - self.last_best) / np.abs(self.last_best) > self.tolerance:
+      self.last_best = curr_val
+      self.num_round = 0
+      self.best_epoch = self.epoch_count
+    else:
+      self.num_round += 1
+    self.epoch_count += 1
+    return self.num_round >= self.max_round
+class RandEdgeSampler(object):
+  def __init__(self, src_list, dst_list, seed=None):
+    self.seed = None
+    self.src_list = np.unique(src_list)
+    self.dst_list = np.unique(dst_list)
+    if seed is not None:
+      self.seed = seed
+      self.random_state = np.random.RandomState(self.seed)
+  def sample(self, size):
+    if self.seed is None:
+      src_index = np.random.randint(0, len(self.src_list), size)
+      dst_index = np.random.randint(0, len(self.dst_list), size)
+    else:
+      src_index = self.random_state.randint(0, len(self.src_list), size)
+      dst_index = self.random_state.randint(0, len(self.dst_list), size)
+    return self.src_list[src_index], self.dst_list[dst_index]
+  def reset_random_state(self):
+    self.random_state = np.random.RandomState(self.seed)
+def get_neighbor_finder(data, uniform, max_node_idx=None):
+  max_node_idx = max(data.sources.max(), data.destinations.max()) if max_node_idx is None else max_node_idx
+  adj_list = [[] for _ in range(max_node_idx + 1)]
+  for source, destination, edge_idx, timestamp in zip(data.sources, data.destinations,
+                                                      data.edge_idxs,
+                                                      data.timestamps):
+    adj_list[source].append((destination, edge_idx, timestamp))
+    adj_list[destination].append((source, edge_idx, timestamp))
+  return NeighborFinder(adj_list, uniform=uniform)
+class NeighborFinder:
+  def __init__(self, adj_list, uniform=False, seed=None):
+    self.node_to_neighbors = []
+    self.node_to_edge_idxs = []
+    self.node_to_edge_timestamps = []
+    for neighbors in adj_list:
+      # Neighbors is a list of tuples (neighbor, edge_idx, timestamp)
+      # We sort the list based on timestamp
+      sorted_neighhbors = sorted(neighbors, key=lambda x: x[2])
+      self.node_to_neighbors.append(np.array([x[0] for x in sorted_neighhbors]))
+      self.node_to_edge_idxs.append(np.array([x[1] for x in sorted_neighhbors]))
+      self.node_to_edge_timestamps.append(np.array([x[2] for x in sorted_neighhbors]))
+    self.uniform = uniform
+    if seed is not None:
+      self.seed = seed
+      self.random_state = np.random.RandomState(self.seed)
+  def find_before(self, src_idx, cut_time):
+    """
+    Extracts all the interactions happening before cut_time for user src_idx in the overall interaction graph. The returned interactions are sorted by time.
+    Returns 3 lists: neighbors, edge_idxs, timestamps
+    """
+    i = np.searchsorted(self.node_to_edge_timestamps[src_idx], cut_time)
+    return self.node_to_neighbors[src_idx][:i], self.node_to_edge_idxs[src_idx][:i], self.node_to_edge_timestamps[src_idx][:i]
+  def get_temporal_neighbor(self, source_nodes, timestamps, n_neighbors=20):
+    """
+    Given a list of users ids and relative cut times, extracts a sampled temporal neighborhood of each user in the list.
+    Params
+    ------
+    src_idx_l: List[int]
+    cut_time_l: List[float],
+    num_neighbors: int
+    """
+    assert (len(source_nodes) == len(timestamps))
+    tmp_n_neighbors = n_neighbors if n_neighbors > 0 else 1
+    # NB! All interactions described in these matrices are sorted in each row by time
+    neighbors = np.zeros((len(source_nodes), tmp_n_neighbors)).astype(
+      np.int32)  # each entry in position (i,j) represent the id of the item targeted by user src_idx_l[i] with an interaction happening before cut_time_l[i]
+    edge_times = np.zeros((len(source_nodes), tmp_n_neighbors)).astype(
+      np.float32)  # each entry in position (i,j) represent the timestamp of an interaction between user src_idx_l[i] and item neighbors[i,j] happening before cut_time_l[i]
+    edge_idxs = np.zeros((len(source_nodes), tmp_n_neighbors)).astype(
+      np.int32)  # each entry in position (i,j) represent the interaction index of an interaction between user src_idx_l[i] and item neighbors[i,j] happening before cut_time_l[i]
+    for i, (source_node, timestamp) in enumerate(zip(source_nodes, timestamps)):
+      source_neighbors, source_edge_idxs, source_edge_times = self.find_before(source_node,
+                                                   timestamp)  # extracts all neighbors, interactions indexes and timestamps of all interactions of user source_node happening before cut_time
+      if len(source_neighbors) > 0 and n_neighbors > 0:
+        if self.uniform:  # if we are applying uniform sampling, shuffles the data above before sampling
+          sampled_idx = np.random.randint(0, len(source_neighbors), n_neighbors)
+          neighbors[i, :] = source_neighbors[sampled_idx]
+          edge_times[i, :] = source_edge_times[sampled_idx]
+          edge_idxs[i, :] = source_edge_idxs[sampled_idx]
+          # re-sort based on time
+          pos = edge_times[i, :].argsort()
+          neighbors[i, :] = neighbors[i, :][pos]
+          edge_times[i, :] = edge_times[i, :][pos]
+          edge_idxs[i, :] = edge_idxs[i, :][pos]
+        else:
+          # Take most recent interactions
+          source_edge_times = source_edge_times[-n_neighbors:]
+          source_neighbors = source_neighbors[-n_neighbors:]
+          source_edge_idxs = source_edge_idxs[-n_neighbors:]
+          assert (len(source_neighbors) <= n_neighbors)
+          assert (len(source_edge_times) <= n_neighbors)
+          assert (len(source_edge_idxs) <= n_neighbors)
+          neighbors[i, n_neighbors - len(source_neighbors):] = source_neighbors
+          edge_times[i, n_neighbors - len(source_edge_times):] = source_edge_times
+          edge_idxs[i, n_neighbors - len(source_edge_idxs):] = source_edge_idxs
+    return neighbors, edge_idxs, edge_times