我试图了解如何修改Tensorflow Recommenders教程中的一些代码。特别想了解如何更改对Movie模型调用函数中嵌入变量的引用
电影模型
class MovieModel(tf.keras.Model): def __init__(self): super().__init__() max_tokens = 10_000 self.title_embedding = tf.keras.Sequential([ tf.keras.layers.StringLookup( vocabulary=unique_movie_titles, mask_token=None), tf.keras.layers.Embedding(len(unique_movie_titles) + 1, 32) ]) self.title_vectorizer = tf.keras.layers.TextVectorization( max_tokens=max_tokens) self.title_text_embedding = tf.keras.Sequential([ self.title_vectorizer, tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True), tf.keras.layers.GlobalAveragePooling1D(), ]) self.title_vectorizer.adapt(movies) def call(self, titles): return tf.concat([ self.title_embedding(titles), self.title_text_embedding(titles), ], axis=1)
如果我改变:
def call(self, titles): return tf.concat([ self.title_embedding(titles), self.title_text_embedding(titles), ], axis=1)
至:
def call(self, inputs): return tf.concat([ self.title_embedding(inputs["movie_title"]), self.title_text_embedding(inputs["movie_title"]), ], axis=1)
我得到了错误:
TypeError: Only integers, slices (`:`), ellipsis (`...`), tf.newaxis (`None`) and scalar tf.int32/tf.int64 tensors are valid indices, got 'movie_title' Call arguments received: • inputs=tf.Tensor(shape=(None,), dtype=string)
当我运行组合的用户和电影模型时:
class MovielensModel(tfrs.models.Model): def __init__(self, use_timestamps): super().__init__() self.query_model = tf.keras.Sequential([ UserModel(use_timestamps), tf.keras.layers.Dense(32) ]) self.candidate_model = tf.keras.Sequential([ MovieModel(), tf.keras.layers.Dense(32) ]) self.task = tfrs.tasks.Retrieval( metrics=tfrs.metrics.FactorizedTopK( candidates=movies.batch(128).map(self.candidate_model), ), ) def compute_loss(self, features, training=False): # We only pass the user id and timestamp features into the query model. This # is to ensure that the training inputs would have the same keys as the # query inputs. Otherwise the discrepancy in input structure would cause an # error when loading the query model after saving it. query_embeddings = self.query_model({ "user_id": features["user_id"], "timestamp": features["timestamp"], }) movie_embeddings = self.candidate_model({"movie_title":features["movie_title"]}) return self.task(query_embeddings, movie_embeddings)
我假设将调用函数中的输入变量更改为更具体的变量不会更改输出,但出于某种原因,情况并非如此。我将其镜像为具有与UserModel相似的结构,但不确定为什么它不能以相同的方式工作
用户模型:
class UserModel(tf.keras.Model): def __init__(self, use_timestamps): super().__init__() self._use_timestamps = use_timestamps self.user_embedding = tf.keras.Sequential([ tf.keras.layers.StringLookup( vocabulary=unique_user_ids, mask_token=None), tf.keras.layers.Embedding(len(unique_user_ids) + 1, 32), ]) if use_timestamps: self.timestamp_embedding = tf.keras.Sequential([ tf.keras.layers.Discretization(timestamp_buckets.tolist()), tf.keras.layers.Embedding(len(timestamp_buckets) + 1, 32), ]) self.normalized_timestamp = tf.keras.layers.Normalization( axis=None ) self.normalized_timestamp.adapt(timestamps) def call(self, inputs): if not self._use_timestamps: return self.user_embedding(inputs["user_id"]) return tf.concat([ self.user_embedding(inputs["user_id"]), self.timestamp_embedding(inputs["timestamp"]), tf.reshape(self.normalized_timestamp(inputs["timestamp"]), (-1, 1)), ], axis=1)