TP_WordEmbedding/wordvec.py

import gensim.downloader as api
wv = api.load('word2vec-google-news-300')

# Affichage de quelques mots du vocabulaire
# for index, word in enumerate(wv.index_to_key):
#     if index == 10:
#         break
#     print(f"word #{index}/{len(wv.index_to_key)} is {word}")

print(wv.most_similar(positive=['car'], topn=5))
print(wv.most_similar(positive=['voiture'], topn=5))

vec_father = wv['father']
vec_man = wv['man']
vec_woman = wv['woman']

result = wv.most_similar(positive=(vec_father - vec_man + vec_woman), topn=1)
print(result)
word2vec sim 2022-09-25 23:30:57 +02:00			`import gensim.downloader as api`
			`wv = api.load('word2vec-google-news-300')`

			`# Affichage de quelques mots du vocabulaire`
			`# for index, word in enumerate(wv.index_to_key):`
			`# if index == 10:`
			`# break`
			`# print(f"word #{index}/{len(wv.index_to_key)} is {word}")`

			`print(wv.most_similar(positive=['car'], topn=5))`
w2v article 2022-09-26 13:27:41 +02:00			`print(wv.most_similar(positive=['voiture'], topn=5))`

			`vec_father = wv['father']`
			`vec_man = wv['man']`
			`vec_woman = wv['woman']`

			`result = wv.most_similar(positive=(vec_father - vec_man + vec_woman), topn=1)`
			`print(result)`