TP_WordEmbedding/wordvec.py

import gensim.downloader as api
wv = api.load('word2vec-google-news-300')

# Affichage de quelques mots du vocabulaire
# for index, word in enumerate(wv.index_to_key):
#     if index == 10:
#         break
#     print(f"word #{index}/{len(wv.index_to_key)} is {word}")

print(wv.most_similar(positive=['car'], topn=5))
print(wv.most_similar(positive=['voiture'], topn=5))

vec_father = wv['father']
vec_man = wv['man']
vec_woman = wv['woman']

result = wv.most_similar(positive=(vec_father - vec_man + vec_woman), topn=1)
print(result)