diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..09fa76c --- /dev/null +++ b/.gitignore @@ -0,0 +1,39 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# Python virtual environments +venv/ +.env +.envrc + +# Distribution / packaging +build/ +dist/ +*.egg-info/ +*.egg +*.manifest +*.spec + +# Logs +*.log + +# IDE / Editor files +.vscode/ +.idea/ +*.swp +*.swo + +# Jupyter Notebook +.ipynb_checkpoints + +# OS-specific +.DS_Store +Thumbs.db + +# Chrome extension build outputs +*.zip + +# Other temporary files +*.tmp diff --git a/backend/server.py b/backend/server.py new file mode 100644 index 0000000..7a16b3e --- /dev/null +++ b/backend/server.py @@ -0,0 +1,91 @@ +from fastapi import FastAPI, File, UploadFile, Form +import uvicorn +import os +from fastapi.middleware.cors import CORSMiddleware +import whisper +import difflib +import pronouncing + +app = FastAPI() +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +UPLOAD_DIR = "uploads" +os.makedirs(UPLOAD_DIR, exist_ok=True) + + +model = whisper.load_model("small") + +def calculate_similarity(expected: str, actual: str) -> float: + seq = difflib.SequenceMatcher(None, expected.split(), actual.split()) + return seq.ratio() + + +def text_to_phonemes(text: str) -> str: + words = text.lower().split() + phoneme_list = [] + for word in words: + phones = pronouncing.phones_for_word(word) + if phones: + phoneme_list.append(phones[0]) + else: + phoneme_list.append("[UNK]") # mark as unknown pronunciation + return " ".join(phoneme_list) + + +def phoneme_diff(expected: str, actual: str): + expected_list = expected.split() + actual_list = actual.split() + diff_result = [] + matcher = difflib.SequenceMatcher(None, expected_list, actual_list) + + for opcode, i1, i2, j1, j2 in matcher.get_opcodes(): + if opcode == "equal": + diff_result.extend([(p, "match") for p in expected_list[i1:i2]]) + elif opcode == "replace": + diff_result.extend([(p, "mismatch") for p in expected_list[i1:i2]]) + elif opcode == "delete": + diff_result.extend([(p, "missing") for p in expected_list[i1:i2]]) + elif opcode == "insert": + diff_result.extend([(p, "extra") for p in actual_list[j1:j2]]) + return diff_result + +@app.post("/upload") +async def upload_audio( + file: UploadFile = File(...), + expected_text: str = Form(...) +): + + file_path = os.path.join(UPLOAD_DIR, file.filename) + with open(file_path, "wb") as f: + f.write(await file.read()) + + + result = model.transcribe(file_path) + transcript = result["text"] + + expected_phonemes = text_to_phonemes(expected_text) + actual_phonemes = text_to_phonemes(transcript) + + + similarity = calculate_similarity(expected_phonemes, actual_phonemes) + differences = phoneme_diff(expected_phonemes, actual_phonemes) + + return { + "message": "Audio processed successfully", + "transcript": transcript, + "expected_text": expected_text, + "expected_phonemes": expected_phonemes, + "actual_phonemes": actual_phonemes, + "similarity_score": round(similarity, 3), + "phoneme_diff": differences + } + +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/backend/uploads/recording.wav b/backend/uploads/recording.wav new file mode 100644 index 0000000..fb3526d Binary files /dev/null and b/backend/uploads/recording.wav differ diff --git a/frontend/content.js b/frontend/content.js new file mode 100644 index 0000000..bc87e6c --- /dev/null +++ b/frontend/content.js @@ -0,0 +1,8 @@ +let selectedText = ""; +document.addEventListener("mouseup", () => { + const selection = window.getSelection().toString().trim(); + if (selection) { + selectedText = selection; + chrome.storage.local.set({ selectedText }); + } +}); diff --git a/frontend/manifest.json b/frontend/manifest.json new file mode 100644 index 0000000..391b867 --- /dev/null +++ b/frontend/manifest.json @@ -0,0 +1,17 @@ +{ + "manifest_version": 3, + "name": "PhonoCoach", + "version": "1.0", + "description": "Lets you practise pronunciation on any webpage you're on", + "permissions": ["storage", "activeTab", "scripting"], + "host_permissions": ["http://localhost:8000/*"], + "action": { + "default_popup": "popup.html" + }, + "content_scripts": [ + { + "matches": [""], + "js": ["content.js"] + } + ] +} diff --git a/frontend/popup.css b/frontend/popup.css new file mode 100644 index 0000000..7ba8b4f --- /dev/null +++ b/frontend/popup.css @@ -0,0 +1,120 @@ +body { + font-family: "Segoe UI", Tahoma, Geneva, Verdana, sans-serif; + margin: 10px; + width: 350px; + background: linear-gradient(135deg, #f9f9f9, #e0f7fa); + color: #333; +} + +h3 { + text-align: center; + color: #2c3e50; + margin-bottom: 12px; +} + +#displayText { + display: block; + width: 100%; + white-space: normal; + word-wrap: break-word; + border: 1px solid #ccc; + padding: 10px; + background-color: #ffffffcc; + min-height: 50px; + max-height: 150px; + overflow-y: auto; + margin-bottom: 10px; + font-size: 1.4em; + border-radius: 8px; + box-shadow: 0 2px 6px rgba(0, 0, 0, 0.1); +} + +#recordBtn { + display: block; + width: 100%; + padding: 10px; + background: #03a9f4; + color: white; + border: none; + border-radius: 8px; + font-size: 1.2em; + cursor: pointer; + transition: background 0.3s ease; +} + +#recordBtn:hover { + background: #0288d1; +} + +#status { + text-align: center; + font-weight: 500; + margin-top: 8px; + color: #555; +} + +#similarityScore { + font-weight: bold; + color: #2a9d8f; + margin-top: 10px; + text-align: center; + font-size: 1.3em; +} + +#phonemeDiff { + margin-top: 10px; + display: flex; + flex-wrap: wrap; + gap: 8px; + max-height: 150px; + overflow-y: auto; +} + +.phoneme { + padding: 10px 16px; + border-radius: 8px; + font-size: 1.6em; /* bigger font size for phonemes */ + font-weight: 700; + transition: transform 0.2s; +} + +.phoneme:hover { + transform: scale(1.1); + cursor: pointer; +} + +#pronunciationTips { + margin-top: 10px; + font-style: italic; + color: #555; + text-align: center; +} + +#generalTips { + margin-top: 12px; + padding: 10px; + background: #f0f4f8; + border-radius: 8px; + box-shadow: 0 1px 4px rgba(0, 0, 0, 0.1); + color: #333; +} + +#generalTips h4 { + margin-top: 0; + margin-bottom: 8px; + font-weight: 600; + color: #2c3e50; + font-size: 1.1em; +} + +#generalTips ul { + padding-left: 20px; + margin: 0; +} + +#generalTips ul li { + margin-bottom: 6px; + line-height: 1.4; + font-size: 1.1em; + list-style-type: disc; +} diff --git a/frontend/popup.html b/frontend/popup.html new file mode 100644 index 0000000..f56cd81 --- /dev/null +++ b/frontend/popup.html @@ -0,0 +1,26 @@ + + + + + + PhonoCoach + + + +

Selected Text

+
No selected text
+ + +

+ +
+

+ +
+ + +
+ + + + diff --git a/frontend/popup.js b/frontend/popup.js new file mode 100644 index 0000000..15bc4d2 --- /dev/null +++ b/frontend/popup.js @@ -0,0 +1,135 @@ +chrome.storage.local.get("selectedText", (data) => { + document.getElementById("displayText").textContent = + data.selectedText || "No text selected"; +}); + +let mediaRecorder; +let audioChunks = []; +let isRecording = false; + +const recordBtn = document.getElementById("recordBtn"); +const status = document.getElementById("status"); +const similarityScoreElem = document.getElementById("similarityScore"); + +async function requestMicPermission() { + try { + const permissionStatus = await navigator.permissions.query({ + name: "microphone", + }); + if (permissionStatus.state === "granted") { + return true; + } else if (permissionStatus.state === "prompt") { + await navigator.mediaDevices.getUserMedia({ audio: true }); + return true; + } else { + return false; + } + } catch (err) { + console.error("Permission API error:", err); + return false; + } +} + +recordBtn.addEventListener("click", async () => { + if (!isRecording) { + try { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + mediaRecorder = new MediaRecorder(stream); + audioChunks = []; + + mediaRecorder.ondataavailable = (event) => { + if (event.data.size > 0) { + audioChunks.push(event.data); + } + }; + + mediaRecorder.onstop = async () => { + recordBtn.disabled = true; + + const audioBlob = new Blob(audioChunks, { type: "audio/wav" }); + const formData = new FormData(); + formData.append("file", audioBlob, "recording.wav"); + + const expectedText = + document.getElementById("displayText").textContent || ""; + formData.append("expected_text", expectedText); + + status.textContent = "Uploading..."; + similarityScoreElem.textContent = ""; + + try { + const response = await fetch("http://127.0.0.1:8000/upload", { + method: "POST", + body: formData, + }); + + if (response.ok) { + const result = await response.json(); + + similarityScoreElem.textContent = `Pronunciation Accuracy: ${(result.similarity_score * 100).toFixed(1)}%`; + + const tipsElem = document.getElementById("pronunciationTips"); + const score = result.similarity_score; + + if (score > 0.9) { + tipsElem.textContent = + "Great job! Keep practicing to maintain your clear pronunciation."; + } else if (score > 0.7) { + tipsElem.textContent = + "Good effort! Try slowing down and emphasizing each word."; + } else { + tipsElem.textContent = + "Keep practicing! Focus on vowel sounds and word stress."; + } + + const legendElem = document.getElementById("colorLegend"); + legendElem.innerHTML = ` +

Color Coding:

+ +

Note: This extension leverages OpenAIโ€™s Whisper ASR model for automatic speech recognition, enabling accurate transcription and pronunciation analysis.

`; + + const phonemeDiffElem = document.getElementById("phonemeDiff"); + phonemeDiffElem.innerHTML = ""; + result.phoneme_diff.forEach(([phoneme, status]) => { + const span = document.createElement("span"); + span.textContent = phoneme + " "; + if (status === "match") span.style.color = "green"; + else if (status === "mismatch") span.style.color = "red"; + else if (status === "missing") span.style.color = "orange"; + else if (status === "extra") span.style.color = "blue"; + phonemeDiffElem.appendChild(span); + }); + + status.textContent = "โœ… Uploaded successfully!"; + } else { + status.textContent = "โŒ Upload failed."; + } + } catch (err) { + console.error(err); + status.textContent = "โš ๏ธ Error uploading."; + } finally { + recordBtn.disabled = false; + } + }; + + mediaRecorder.start(); + isRecording = true; + recordBtn.textContent = "โน Stop"; + status.textContent = "๐ŸŽ™ Recording..."; + similarityScoreElem.textContent = ""; + } catch (err) { + console.error(err); + status.textContent = "โš ๏ธ Microphone access denied."; + } + } else { + mediaRecorder.stop(); + isRecording = false; + recordBtn.textContent = "๐ŸŽ™ Record"; + } +});