TP_WordEmbedding/wordvec.py

18 lines
535 B
Python
Raw Normal View History

2022-09-25 23:30:57 +02:00
import gensim.downloader as api
wv = api.load('word2vec-google-news-300')
# Affichage de quelques mots du vocabulaire
# for index, word in enumerate(wv.index_to_key):
# if index == 10:
# break
# print(f"word #{index}/{len(wv.index_to_key)} is {word}")
print(wv.most_similar(positive=['car'], topn=5))
2022-09-26 13:27:41 +02:00
print(wv.most_similar(positive=['voiture'], topn=5))
vec_father = wv['father']
vec_man = wv['man']
vec_woman = wv['woman']
result = wv.most_similar(positive=(vec_father - vec_man + vec_woman), topn=1)
print(result)