我试图了解如何修改Tensorflow Recommenders教程中的一些代码。特别想了解如何更改对Movie模型调用函数中嵌入变量的引用
电影模型
class MovieModel(tf.keras.Model):
def __init__(self):
super().__init__()
max_tokens = 10_000
self.title_embedding = tf.keras.Sequential([
tf.keras.layers.StringLookup(
vocabulary=unique_movie_titles, mask_token=None),
tf.keras.layers.Embedding(len(unique_movie_titles) + 1, 32)
])
self.title_vectorizer = tf.keras.layers.TextVectorization(
max_tokens=max_tokens)
self.title_text_embedding = tf.keras.Sequential([
self.title_vectorizer,
tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
tf.keras.layers.GlobalAveragePooling1D(),
])
self.title_vectorizer.adapt(movies)
def call(self, titles):
return tf.concat([
self.title_embedding(titles),
self.title_text_embedding(titles),
], axis=1)
如果我改变:
def call(self, titles):
return tf.concat([
self.title_embedding(titles),
self.title_text_embedding(titles),
], axis=1)
至:
def call(self, inputs):
return tf.concat([
self.title_embedding(inputs["movie_title"]),
self.title_text_embedding(inputs["movie_title"]),
], axis=1)
我得到了错误:
TypeError: Only integers, slices (`:`), ellipsis (`...`), tf.newaxis (`None`) and scalar tf.int32/tf.int64 tensors are valid indices, got 'movie_title' Call arguments received: • inputs=tf.Tensor(shape=(None,), dtype=string)
当我运行组合的用户和电影模型时:
class MovielensModel(tfrs.models.Model):
def __init__(self, use_timestamps):
super().__init__()
self.query_model = tf.keras.Sequential([
UserModel(use_timestamps),
tf.keras.layers.Dense(32)
])
self.candidate_model = tf.keras.Sequential([
MovieModel(),
tf.keras.layers.Dense(32)
])
self.task = tfrs.tasks.Retrieval(
metrics=tfrs.metrics.FactorizedTopK(
candidates=movies.batch(128).map(self.candidate_model),
),
)
def compute_loss(self, features, training=False):
# We only pass the user id and timestamp features into the query model. This
# is to ensure that the training inputs would have the same keys as the
# query inputs. Otherwise the discrepancy in input structure would cause an
# error when loading the query model after saving it.
query_embeddings = self.query_model({
"user_id": features["user_id"],
"timestamp": features["timestamp"],
})
movie_embeddings = self.candidate_model({"movie_title":features["movie_title"]})
return self.task(query_embeddings, movie_embeddings)
我假设将调用函数中的输入变量更改为更具体的变量不会更改输出,但出于某种原因,情况并非如此。我将其镜像为具有与UserModel相似的结构,但不确定为什么它不能以相同的方式工作
用户模型:
class UserModel(tf.keras.Model):
def __init__(self, use_timestamps):
super().__init__()
self._use_timestamps = use_timestamps
self.user_embedding = tf.keras.Sequential([
tf.keras.layers.StringLookup(
vocabulary=unique_user_ids, mask_token=None),
tf.keras.layers.Embedding(len(unique_user_ids) + 1, 32),
])
if use_timestamps:
self.timestamp_embedding = tf.keras.Sequential([
tf.keras.layers.Discretization(timestamp_buckets.tolist()),
tf.keras.layers.Embedding(len(timestamp_buckets) + 1, 32),
])
self.normalized_timestamp = tf.keras.layers.Normalization(
axis=None
)
self.normalized_timestamp.adapt(timestamps)
def call(self, inputs):
if not self._use_timestamps:
return self.user_embedding(inputs["user_id"])
return tf.concat([
self.user_embedding(inputs["user_id"]),
self.timestamp_embedding(inputs["timestamp"]),
tf.reshape(self.normalized_timestamp(inputs["timestamp"]), (-1, 1)),
], axis=1)