import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import numpy as np
# 必要なNLTKデータをダウンロード
nltk.download('punkt')
# サンプルデータ
texts = [
"I love playing with my toys.",
"I am studying for my exams.",
"Retirement is just around the corner.",
"I just got my first job.",
"I'm planning my vacation."
]
ages = [10, 20, 65, 25, 35] # サンプルの精神年齢
# テキストをベクトル化
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(texts)
y = np.array(ages)
# 訓練データとテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
# ロジスティック回帰モデルを訓練
model = LogisticRegression()
model.fit(X_train, y_train)
# 新しいテキストの精神年齢を予測する関数
def predict_age(text):
text_vec = vectorizer.transform([text])
predicted_age = model.predict(text_vec)
return predicted_age[0]
# テスト
new_text = "I just started my first job."
predicted_age = predict_age(new_text)
print(f"The predicted mental age is: {predicted_age}")
!pip install nltk
!pip install sklearn
コメントを残す