Initial commit of PhoneCoach project

2026-05-16 19:35:26 +00:00 · 2025-08-16 03:37:24 +05:30
parent f14485a833
commit 5326b855dd
8 changed files with 436 additions and 0 deletions
@@ -0,0 +1,39 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Python virtual environments
+venv/
+.env
+.envrc
+
+# Distribution / packaging
+build/
+dist/
+*.egg-info/
+*.egg
+*.manifest
+*.spec
+
+# Logs
+*.log
+
+# IDE / Editor files
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# OS-specific
+.DS_Store
+Thumbs.db
+
+# Chrome extension build outputs
+*.zip
+
+# Other temporary files
+*.tmp
@@ -0,0 +1,91 @@
+from fastapi import FastAPI, File, UploadFile, Form
+import uvicorn
+import os
+from fastapi.middleware.cors import CORSMiddleware
+import whisper
+import difflib
+import pronouncing
+
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+UPLOAD_DIR = "uploads"
+os.makedirs(UPLOAD_DIR, exist_ok=True)
+
+
+model = whisper.load_model("small")
+
+def calculate_similarity(expected: str, actual: str) -> float:
+    seq = difflib.SequenceMatcher(None, expected.split(), actual.split())
+    return seq.ratio()
+
+
+def text_to_phonemes(text: str) -> str:
+    words = text.lower().split()
+    phoneme_list = []
+    for word in words:
+        phones = pronouncing.phones_for_word(word)
+        if phones:
+            phoneme_list.append(phones[0])
+        else:
+            phoneme_list.append("[UNK]")  # mark as unknown pronunciation
+    return " ".join(phoneme_list)
+
+
+def phoneme_diff(expected: str, actual: str):
+    expected_list = expected.split()
+    actual_list = actual.split()
+    diff_result = []
+    matcher = difflib.SequenceMatcher(None, expected_list, actual_list)
+
+    for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
+        if opcode == "equal":
+            diff_result.extend([(p, "match") for p in expected_list[i1:i2]])
+        elif opcode == "replace":
+            diff_result.extend([(p, "mismatch") for p in expected_list[i1:i2]])
+        elif opcode == "delete":
+            diff_result.extend([(p, "missing") for p in expected_list[i1:i2]])
+        elif opcode == "insert":
+            diff_result.extend([(p, "extra") for p in actual_list[j1:j2]])
+    return diff_result
+
+@app.post("/upload")
+async def upload_audio(
+    file: UploadFile = File(...),
+    expected_text: str = Form(...)
+):
+
+    file_path = os.path.join(UPLOAD_DIR, file.filename)
+    with open(file_path, "wb") as f:
+        f.write(await file.read())
+
+
+    result = model.transcribe(file_path)
+    transcript = result["text"]
+
+    expected_phonemes = text_to_phonemes(expected_text)
+    actual_phonemes = text_to_phonemes(transcript)
+
+
+    similarity = calculate_similarity(expected_phonemes, actual_phonemes)
+    differences = phoneme_diff(expected_phonemes, actual_phonemes)
+
+    return {
+        "message": "Audio processed successfully",
+        "transcript": transcript,
+        "expected_text": expected_text,
+        "expected_phonemes": expected_phonemes,
+        "actual_phonemes": actual_phonemes,
+        "similarity_score": round(similarity, 3),
+        "phoneme_diff": differences
+    }
+
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)
@@ -0,0 +1,8 @@
+let selectedText = "";
+document.addEventListener("mouseup", () => {
+  const selection = window.getSelection().toString().trim();
+  if (selection) {
+    selectedText = selection;
+    chrome.storage.local.set({ selectedText });
+  }
+});
@@ -0,0 +1,17 @@
+{
+  "manifest_version": 3,
+  "name": "PhonoCoach",
+  "version": "1.0",
+  "description": "Lets you practise pronunciation on any webpage you're on",
+  "permissions": ["storage", "activeTab", "scripting"],
+  "host_permissions": ["http://localhost:8000/*"],
+  "action": {
+    "default_popup": "popup.html"
+  },
+  "content_scripts": [
+    {
+      "matches": ["<all_urls>"],
+      "js": ["content.js"]
+    }
+  ]
+}
@@ -0,0 +1,120 @@
+body {
+    font-family: "Segoe UI", Tahoma, Geneva, Verdana, sans-serif;
+    margin: 10px;
+    width: 350px;
+    background: linear-gradient(135deg, #f9f9f9, #e0f7fa);
+    color: #333;
+}
+
+h3 {
+    text-align: center;
+    color: #2c3e50;
+    margin-bottom: 12px;
+}
+
+#displayText {
+    display: block;
+    width: 100%;
+    white-space: normal;
+    word-wrap: break-word;
+    border: 1px solid #ccc;
+    padding: 10px;
+    background-color: #ffffffcc;
+    min-height: 50px;
+    max-height: 150px;
+    overflow-y: auto;
+    margin-bottom: 10px;
+    font-size: 1.4em;
+    border-radius: 8px;
+    box-shadow: 0 2px 6px rgba(0, 0, 0, 0.1);
+}
+
+#recordBtn {
+    display: block;
+    width: 100%;
+    padding: 10px;
+    background: #03a9f4;
+    color: white;
+    border: none;
+    border-radius: 8px;
+    font-size: 1.2em;
+    cursor: pointer;
+    transition: background 0.3s ease;
+}
+
+#recordBtn:hover {
+    background: #0288d1;
+}
+
+#status {
+    text-align: center;
+    font-weight: 500;
+    margin-top: 8px;
+    color: #555;
+}
+
+#similarityScore {
+    font-weight: bold;
+    color: #2a9d8f;
+    margin-top: 10px;
+    text-align: center;
+    font-size: 1.3em;
+}
+
+#phonemeDiff {
+    margin-top: 10px;
+    display: flex;
+    flex-wrap: wrap;
+    gap: 8px;
+    max-height: 150px;
+    overflow-y: auto;
+}
+
+.phoneme {
+    padding: 10px 16px;
+    border-radius: 8px;
+    font-size: 1.6em; /* bigger font size for phonemes */
+    font-weight: 700;
+    transition: transform 0.2s;
+}
+
+.phoneme:hover {
+    transform: scale(1.1);
+    cursor: pointer;
+}
+
+#pronunciationTips {
+    margin-top: 10px;
+    font-style: italic;
+    color: #555;
+    text-align: center;
+}
+
+#generalTips {
+    margin-top: 12px;
+    padding: 10px;
+    background: #f0f4f8;
+    border-radius: 8px;
+    box-shadow: 0 1px 4px rgba(0, 0, 0, 0.1);
+    color: #333;
+}
+
+#generalTips h4 {
+    margin-top: 0;
+    margin-bottom: 8px;
+    font-weight: 600;
+    color: #2c3e50;
+    font-size: 1.1em;
+}
+
+#generalTips ul {
+    padding-left: 20px;
+    margin: 0;
+}
+
+#generalTips ul li {
+    margin-bottom: 6px;
+    line-height: 1.4;
+    font-size: 1.1em;
+    list-style-type: disc;
+}
@@ -0,0 +1,26 @@
+<!doctype html>
+<html lang="en">
+    <head>
+        <meta charset="UTF-8" />
+        <meta name="viewport" content="width=device-width, initial-scale=1" />
+        <title>PhonoCoach</title>
+        <link rel="stylesheet" href="popup.css" />
+    </head>
+    <body>
+        <h3>Selected Text</h3>
+        <div id="displayText">No selected text</div>
+
+        <button id="recordBtn">🎙 Record</button>
+        <p id="status"></p>
+
+        <div id="similarityScore"></div>
+        <p id="pronunciationTips"></p>
+
+        <div id="phonemeDiff"></div>
+
+        <!-- Color legend (shown after accuracy) -->
+        <div id="colorLegend"></div>
+
+        <script src="popup.js"></script>
+    </body>
+</html>
@@ -0,0 +1,135 @@
+chrome.storage.local.get("selectedText", (data) => {
+  document.getElementById("displayText").textContent =
+    data.selectedText || "No text selected";
+});
+
+let mediaRecorder;
+let audioChunks = [];
+let isRecording = false;
+
+const recordBtn = document.getElementById("recordBtn");
+const status = document.getElementById("status");
+const similarityScoreElem = document.getElementById("similarityScore");
+
+async function requestMicPermission() {
+  try {
+    const permissionStatus = await navigator.permissions.query({
+      name: "microphone",
+    });
+    if (permissionStatus.state === "granted") {
+      return true;
+    } else if (permissionStatus.state === "prompt") {
+      await navigator.mediaDevices.getUserMedia({ audio: true });
+      return true;
+    } else {
+      return false;
+    }
+  } catch (err) {
+    console.error("Permission API error:", err);
+    return false;
+  }
+}
+
+recordBtn.addEventListener("click", async () => {
+  if (!isRecording) {
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      mediaRecorder = new MediaRecorder(stream);
+      audioChunks = [];
+
+      mediaRecorder.ondataavailable = (event) => {
+        if (event.data.size > 0) {
+          audioChunks.push(event.data);
+        }
+      };
+
+      mediaRecorder.onstop = async () => {
+        recordBtn.disabled = true;
+
+        const audioBlob = new Blob(audioChunks, { type: "audio/wav" });
+        const formData = new FormData();
+        formData.append("file", audioBlob, "recording.wav");
+
+        const expectedText =
+          document.getElementById("displayText").textContent || "";
+        formData.append("expected_text", expectedText);
+
+        status.textContent = "Uploading...";
+        similarityScoreElem.textContent = "";
+
+        try {
+          const response = await fetch("http://127.0.0.1:8000/upload", {
+            method: "POST",
+            body: formData,
+          });
+
+          if (response.ok) {
+            const result = await response.json();
+
+            similarityScoreElem.textContent = `Pronunciation Accuracy: ${(result.similarity_score * 100).toFixed(1)}%`;
+
+            const tipsElem = document.getElementById("pronunciationTips");
+            const score = result.similarity_score;
+
+            if (score > 0.9) {
+              tipsElem.textContent =
+                "Great job! Keep practicing to maintain your clear pronunciation.";
+            } else if (score > 0.7) {
+              tipsElem.textContent =
+                "Good effort! Try slowing down and emphasizing each word.";
+            } else {
+              tipsElem.textContent =
+                "Keep practicing! Focus on vowel sounds and word stress.";
+            }
+
+            const legendElem = document.getElementById("colorLegend");
+            legendElem.innerHTML = `
+                            <h4>Color Coding:</h4>
+                            <ul>
+                                <li style="color: green;">Green: Correct phoneme</li>
+                                <li style="color: red;">Red: Incorrect phoneme</li>
+                                <li style="color: orange;">Orange: Missing phoneme</li>
+                                <li style="color: blue;">Blue: Extra phoneme</li>
+                                <li style="color: gray;">[UNK]: Unknown/Unrecognized phoneme</li>
+                            </ul>
+                                <h3>Note: This extension leverages OpenAI’s Whisper ASR model for automatic speech recognition, enabling accurate transcription and pronunciation analysis.</h3>`;
+
+            const phonemeDiffElem = document.getElementById("phonemeDiff");
+            phonemeDiffElem.innerHTML = "";
+            result.phoneme_diff.forEach(([phoneme, status]) => {
+              const span = document.createElement("span");
+              span.textContent = phoneme + " ";
+              if (status === "match") span.style.color = "green";
+              else if (status === "mismatch") span.style.color = "red";
+              else if (status === "missing") span.style.color = "orange";
+              else if (status === "extra") span.style.color = "blue";
+              phonemeDiffElem.appendChild(span);
+            });
+
+            status.textContent = "✅ Uploaded successfully!";
+          } else {
+            status.textContent = "❌ Upload failed.";
+          }
+        } catch (err) {
+          console.error(err);
+          status.textContent = "⚠️ Error uploading.";
+        } finally {
+          recordBtn.disabled = false;
+        }
+      };
+
+      mediaRecorder.start();
+      isRecording = true;
+      recordBtn.textContent = "⏹ Stop";
+      status.textContent = "🎙 Recording...";
+      similarityScoreElem.textContent = "";
+    } catch (err) {
+      console.error(err);
+      status.textContent = "⚠️ Microphone access denied.";
+    }
+  } else {
+    mediaRecorder.stop();
+    isRecording = false;
+    recordBtn.textContent = "🎙 Record";
+  }
+});