-
# 총 문서(검색어)의 수 N = len(docs) def tf(t, d): return d.count(t) def idf(t): df = 0 for doc in docs: df += t in doc return log(N/(df+1)) def tfidf(t, d): return tf(t,d)* idf(t) result = [] # 아래 연산을 반복 for i in range(N): result.append([]) d = docs[i] for j in range(len(vocab)): t = vocab[j] result[-1].append(tf(t, d)) tf_ = pd.DataFrame(result, columns = vocab) result = [] for j in range(len(vocab)): t = vocab[j] result.append(idf(t)) idf_ = pd.DataFrame(result, index=vocab, columns=["IDF"]) idf_ result = [] for i in range(N): result.append([]) d = docs[i] for j in range(len(vocab)): t = vocab[j] result[-1].append(tfidf(t,d)) tfidf_ = pd.DataFrame(result, columns = vocab) tfidf_