試行錯誤

テキスト精神年齢分析(試作)

import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import numpy as np

# 必要なNLTKデータをダウンロード
nltk.download('punkt')

# サンプルデータ
texts = [
    "I love playing with my toys.", 
    "I am studying for my exams.", 
    "Retirement is just around the corner.",
    "I just got my first job.",  
    "I'm planning my vacation."
]
ages = [10, 20, 65, 25, 35]  # サンプルの精神年齢

# テキストをベクトル化
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(texts)
y = np.array(ages)

# 訓練データとテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

# ロジスティック回帰モデルを訓練
model = LogisticRegression()
model.fit(X_train, y_train)

# 新しいテキストの精神年齢を予測する関数
def predict_age(text):
    text_vec = vectorizer.transform([text])
    predicted_age = model.predict(text_vec)
    return predicted_age[0]

# テスト
new_text = "I just started my first job."
predicted_age = predict_age(new_text)
print(f"The predicted mental age is: {predicted_age}")

!pip install nltk
!pip install sklearn

2025年3月11日

テキスト感情分析(サンプル)
Text
“If you remember the feelings you had with the person who gave you the guardian spirit, the guardian spirit that person gave you will be relocated. Some people knowingly have relationships with men from a distance, so if you switch partners one after another, you will notice that there is someone with ill intentions. If you shake a negative guardian spirit off of someone by instinct, it will leave along with the guardian spirit. If you focus your attention and write the word “written apology,” the childlike being will disappear.”
2025年3月8日

テキスト感情分析(試作2)

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib import rcParams
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
from typing import List, Tuple

# matplotlib のデフォルトフォントを設定（日本語フォントの問題を回避）
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['DejaVu Sans']

# NLTK のダウンロード
nltk.download('vader_lexicon')

def calculate_sentiment_scores(text: str) -> dict:
    """テキストの感情スコアを計算する。

    Args:
        text (str): 分析するテキスト。

    Returns:
        dict: 感情スコア（pos, neg, neu, compound）。
    """
    sid = SentimentIntensityAnalyzer()
    return sid.polarity_scores(text)

def create_radar_chart(labels: List[str], values: List[float], title: str):
    """レーダーチャートを作成する。

    Args:
        labels (List[str]): レーダーチャートのラベル。
        values (List[float]): レーダーチャートの値。
        title (str): レーダーチャートのタイトル。
    """
    angles = np.linspace(0, 2 * np.pi, len(labels), endpoint=False).tolist()
    angles += angles[:1]
    values += values[:1]

    fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))
    ax.fill(angles, values, color='blue', alpha=0.25)
    ax.plot(angles, values, color='blue', linewidth=2)

    ax.set_yticklabels([])
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(labels)

    ax.set_title(title, size=20, color='blue', y=1.1)
    plt.show()

def analyze_sentiment_from_csv(file_path: str) -> None:
    """CSVファイルから感情分析を行い、レーダーチャートを表示する。

    Args:
        file_path (str): CSVファイルのパス。
    """
    try:
        df = pd.read_csv(file_path)
    except FileNotFoundError:
        print(f"エラー：ファイル '{file_path}' が見つかりません。")
        return

    df['Scores'] = df['Text'].apply(calculate_sentiment_scores)
    df = pd.concat([df.drop(['Scores'], axis=1), df['Scores'].apply(pd.Series)], axis=1)

    pos_mean = df['pos'].mean()
    neg_mean = df['neg'].mean()
    neu_mean = df['neu'].mean()
    compound_mean = df['compound'].mean()

    labels = ['Positive', 'Negative', 'Neutral', 'Compound']
    values = [pos_mean, neg_mean, neu_mean, compound_mean]
    create_radar_chart(labels, values, 'Emotion Analysis Radar Chart')

if __name__ == "__main__":
    analyze_sentiment_from_csv('Untitled1.csv')

ライブラリのインポート

pandas: データ分析ライブラリ
matplotlib.pyplot: グラフ描画ライブラリ
rcParams: matplotlibの設定をカスタマイズするためのモジュール
SentimentIntensityAnalyzer: NLTKの感情分析ツール
Counter: 要素の出現回数をカウントするツール（ここでは使用されていません）

CSVファイル名を入力する場所は、コードの最後の行にあるanalyze_sentiment()関数の引数です。

Python

# 実行例
analyze_sentiment('Untitled1.csv')

この例では、'Untitled1.csv'がCSVファイル名です。この部分を、分析したいCSVファイルの実際のファイル名に置き換えてください。

例:

data.csvというファイルを使用する場合:

analyze_sentiment(‘data.csv’)

my_data.csvというファイルを使用する場合:

analyze_sentiment(‘my_data.csv’)

⚠️英語Text用

2025年3月8日

テキスト感情分析(試作1)

import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk

# matplotlib のデフォルトフォントを設定（日本語フォントの問題を回避）
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['DejaVu Sans']

# NLTK のダウンロード
nltk.download('vader_lexicon')

def analyze_sentiment(file_path):
    """
    CSVファイルのテキストデータの感情分析を行い、結果をグラフとテキストで表示する。

    Args:
        file_path (str): CSVファイルのパス。
    """
    try:
        # CSVファイルの読み込み
        df = pd.read_csv(file_path)
    except FileNotFoundError:
        print(f"エラー：指定されたファイルが見つかりませんでした。ファイルパス: {file_path}")
        return

    # VADER Sentiment Analyzer の初期化
    sid = SentimentIntensityAnalyzer()

    # 感情スコアを計算する関数
    def calculate_sentiment_scores(text):
        return sid.polarity_scores(text)

    # テキストデータの感情スコアを計算
    df['scores'] = df['Text'].apply(calculate_sentiment_scores)

    # スコアを別々の列に展開
    df = pd.concat([df.drop(['scores'], axis=1), df['scores'].apply(pd.Series)], axis=1)

    # スコアの平均値を計算
    pos_mean = df['pos'].mean()
    neg_mean = df['neg'].mean()
    neu_mean = df['neu'].mean()
    compound_mean = df['compound'].mean()

    # 棒グラフの作成
    labels = ['Positive', 'Negative', 'Neutral', 'Compound', 'Fear']
    values = [pos_mean, neg_mean, neu_mean, compound_mean, 0]  # 恐怖はゼロで初期化
    colors = ['gold', 'red', 'grey', 'blue', 'purple']

    fig, ax = plt.subplots(figsize=(10, 6))
    bars = ax.bar(labels, values, color=colors)

    # 各項目の数値を表示
    for bar, value in zip(bars, values):
        ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.01, f'{value:.2f}', ha='center', fontsize=12)

    # タイトルとラベルの設定
    ax.set_title('Emotion Analysis Results', fontsize=15)
    ax.set_xlabel('Emotion Category', fontsize=12)
    ax.set_ylabel('Score', fontsize=12)

    # 説明テキストを追加
    descriptions = [
        f"Positive (gold): Optimistic or positive emotion - {pos_mean:.2f}",
        f"Negative (red): Pessimistic or negative emotion - {neg_mean:.2f}",
        f"Neutral (grey): Neither particularly positive nor negative emotion - {neu_mean:.2f}",
        f"Compound (blue): Overall balance of emotions - {compound_mean:.2f}",
        "Fear (purple): Fear or anxiety - 0.00"
    ]
    description_text = "\n".join(descriptions)
    plt.figtext(0.5, -0.3, description_text, ha='center', fontsize=12, wrap=True)
    plt.show()

    # 感情分析結果の概要を表示
    sentiment_summary = f"""
    感情分析結果の概要:
    ポジティブ: {pos_mean:.2f}
    ネガティブ: {neg_mean:.2f}
    中立: {neu_mean:.2f}
    複合: {compound_mean:.2f}
    恐怖: 0.00
    """
    print(sentiment_summary)

# 実行例
analyze_sentiment('Untitled4.csv')