ローカルで自作Shazam(Python)

PythonでShazamのようなものを作ってみました。(Windows 11)
比較に使用したのは、
David Guetta & Sia – Beautiful People (Official Video)

Rihanna ft Sia – Say Yes (Lyrics)

Beautiful People (Workout Version 138 BPM)

ROSE – number one girl (official music video)


import librosa
import numpy as np
import os
import sklearn.metrics.pairwise

# --- 設定 ---
TARGET_DIR = r"C:\shared\shazamize\wav"
QUERY_FILE = "youtube_beautiful_people_official.wav"
QUERY_PATH = os.path.join(TARGET_DIR, QUERY_FILE)

def get_super_chroma(path):
    """時間解像度を高め、音の遷移を強調した特徴量を抽出"""
    try:
        y, sr = librosa.load(path, duration=30, sr=22050, mono=True)
        # CENSを採用し、時間方向の粒度を細かく設定
        chroma = librosa.feature.chroma_cens(y=y, sr=sr, hop_length=256)
        return chroma
    except Exception:
        return None

def calculate_precision_dtw(chroma_query, chroma_target):
    """指数関数を用いてスコア分布を最適化"""
    # 距離行列の計算
    dist_matrix = sklearn.metrics.pairwise.cosine_distances(chroma_query.T, chroma_target.T)
    # DTW実行
    D, wp = librosa.sequence.dtw(C=dist_matrix)
    # 1ステップあたりの平均距離
    avg_dist = D[-1, -1] / len(wp)
    
    # 係数を-2.0に調整：同一曲が0.9付近に、別曲が0.7未満に落ちるようスケーリング
    score = np.exp(-2.0 * avg_dist) 
    return score

# --- メイン処理 ---
print(f"検索元を解析中: {QUERY_FILE}")
q_chroma = get_super_chroma(QUERY_PATH)

if q_chroma is None:
    print("検索元の解析に失敗しました。パスを確認してください。")
else:
    results = []
    # フォルダ内の音楽ファイルを取得
    all_files = [f for f in os.listdir(TARGET_DIR) if f.lower().endswith(('.wav', '.mp3', '.flac', '.m4a'))]
    
    print(f"全 {len(all_files)} ファイルをスキャン中...")
    
    for filename in all_files:
        if filename == QUERY_FILE: continue
        
        path = os.path.join(TARGET_DIR, filename)
        t_chroma = get_super_chroma(path)
        
        if t_chroma is not None:
            score = calculate_precision_dtw(q_chroma, t_chroma)
            results.append((score, filename))

    # スコア順にソート
    results.sort(key=lambda x: x[0], reverse=True)

    # 表示
    print(f"\n=== 超精密判定ランキング (Top 10) ===")
    print(f"{'順位':<4} | {'スコア':<8} | {'判定':<12} | {'ファイル名'}") print("-" * 80) # 今回の実測値に基づくしきい値設定 # 同一アレンジの底が0.8台後半〜0.9付近になるよう調整 for rank, (score, name) in enumerate(results[:10], 1): if score > 0.82:
            status = "★同一アレンジ"
        elif score > 0.75:
            status = "△酷似/Edit版"
        else:
            status = "×別曲"
            
        print(f"{rank:>4} | {score:.4f} | {status:<12} | {name}")

結果は、以下のようになりました。


検索元を解析中: youtube_beautiful_people_official.wav
全 5 ファイルをスキャン中...

=== 超精密判定ランキング (Top 10) ===
順位 | スコア | 判定 | ファイル名
--------------------------------------------------------------------------------
1 | 0.8793 | ★同一アレンジ | youtube_beautiful_people_workout.wav
2 | 0.8714 | ★同一アレンジ | youtube_beautiful_people_official_2.wav
3 | 0.8424 | ★同一アレンジ | youtube_say_yes.wav
4 | 0.8154 | △酷似/Edit版 | youtube_number_one_girl.wav

Number One Girl 以外は同一曲のアレンジなので、よい結果が出ました。