Initial commit of PhoneCoach project

This commit is contained in:
2025-08-16 03:37:24 +05:30
parent f14485a833
commit 5326b855dd
8 changed files with 436 additions and 0 deletions
+39
View File
@@ -0,0 +1,39 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# Python virtual environments
venv/
.env
.envrc
# Distribution / packaging
build/
dist/
*.egg-info/
*.egg
*.manifest
*.spec
# Logs
*.log
# IDE / Editor files
.vscode/
.idea/
*.swp
*.swo
# Jupyter Notebook
.ipynb_checkpoints
# OS-specific
.DS_Store
Thumbs.db
# Chrome extension build outputs
*.zip
# Other temporary files
*.tmp
+91
View File
@@ -0,0 +1,91 @@
from fastapi import FastAPI, File, UploadFile, Form
import uvicorn
import os
from fastapi.middleware.cors import CORSMiddleware
import whisper
import difflib
import pronouncing
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
UPLOAD_DIR = "uploads"
os.makedirs(UPLOAD_DIR, exist_ok=True)
model = whisper.load_model("small")
def calculate_similarity(expected: str, actual: str) -> float:
seq = difflib.SequenceMatcher(None, expected.split(), actual.split())
return seq.ratio()
def text_to_phonemes(text: str) -> str:
words = text.lower().split()
phoneme_list = []
for word in words:
phones = pronouncing.phones_for_word(word)
if phones:
phoneme_list.append(phones[0])
else:
phoneme_list.append("[UNK]") # mark as unknown pronunciation
return " ".join(phoneme_list)
def phoneme_diff(expected: str, actual: str):
expected_list = expected.split()
actual_list = actual.split()
diff_result = []
matcher = difflib.SequenceMatcher(None, expected_list, actual_list)
for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
if opcode == "equal":
diff_result.extend([(p, "match") for p in expected_list[i1:i2]])
elif opcode == "replace":
diff_result.extend([(p, "mismatch") for p in expected_list[i1:i2]])
elif opcode == "delete":
diff_result.extend([(p, "missing") for p in expected_list[i1:i2]])
elif opcode == "insert":
diff_result.extend([(p, "extra") for p in actual_list[j1:j2]])
return diff_result
@app.post("/upload")
async def upload_audio(
file: UploadFile = File(...),
expected_text: str = Form(...)
):
file_path = os.path.join(UPLOAD_DIR, file.filename)
with open(file_path, "wb") as f:
f.write(await file.read())
result = model.transcribe(file_path)
transcript = result["text"]
expected_phonemes = text_to_phonemes(expected_text)
actual_phonemes = text_to_phonemes(transcript)
similarity = calculate_similarity(expected_phonemes, actual_phonemes)
differences = phoneme_diff(expected_phonemes, actual_phonemes)
return {
"message": "Audio processed successfully",
"transcript": transcript,
"expected_text": expected_text,
"expected_phonemes": expected_phonemes,
"actual_phonemes": actual_phonemes,
"similarity_score": round(similarity, 3),
"phoneme_diff": differences
}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)
Binary file not shown.
+8
View File
@@ -0,0 +1,8 @@
let selectedText = "";
document.addEventListener("mouseup", () => {
const selection = window.getSelection().toString().trim();
if (selection) {
selectedText = selection;
chrome.storage.local.set({ selectedText });
}
});
+17
View File
@@ -0,0 +1,17 @@
{
"manifest_version": 3,
"name": "PhonoCoach",
"version": "1.0",
"description": "Lets you practise pronunciation on any webpage you're on",
"permissions": ["storage", "activeTab", "scripting"],
"host_permissions": ["http://localhost:8000/*"],
"action": {
"default_popup": "popup.html"
},
"content_scripts": [
{
"matches": ["<all_urls>"],
"js": ["content.js"]
}
]
}
+120
View File
@@ -0,0 +1,120 @@
body {
font-family: "Segoe UI", Tahoma, Geneva, Verdana, sans-serif;
margin: 10px;
width: 350px;
background: linear-gradient(135deg, #f9f9f9, #e0f7fa);
color: #333;
}
h3 {
text-align: center;
color: #2c3e50;
margin-bottom: 12px;
}
#displayText {
display: block;
width: 100%;
white-space: normal;
word-wrap: break-word;
border: 1px solid #ccc;
padding: 10px;
background-color: #ffffffcc;
min-height: 50px;
max-height: 150px;
overflow-y: auto;
margin-bottom: 10px;
font-size: 1.4em;
border-radius: 8px;
box-shadow: 0 2px 6px rgba(0, 0, 0, 0.1);
}
#recordBtn {
display: block;
width: 100%;
padding: 10px;
background: #03a9f4;
color: white;
border: none;
border-radius: 8px;
font-size: 1.2em;
cursor: pointer;
transition: background 0.3s ease;
}
#recordBtn:hover {
background: #0288d1;
}
#status {
text-align: center;
font-weight: 500;
margin-top: 8px;
color: #555;
}
#similarityScore {
font-weight: bold;
color: #2a9d8f;
margin-top: 10px;
text-align: center;
font-size: 1.3em;
}
#phonemeDiff {
margin-top: 10px;
display: flex;
flex-wrap: wrap;
gap: 8px;
max-height: 150px;
overflow-y: auto;
}
.phoneme {
padding: 10px 16px;
border-radius: 8px;
font-size: 1.6em; /* bigger font size for phonemes */
font-weight: 700;
transition: transform 0.2s;
}
.phoneme:hover {
transform: scale(1.1);
cursor: pointer;
}
#pronunciationTips {
margin-top: 10px;
font-style: italic;
color: #555;
text-align: center;
}
#generalTips {
margin-top: 12px;
padding: 10px;
background: #f0f4f8;
border-radius: 8px;
box-shadow: 0 1px 4px rgba(0, 0, 0, 0.1);
color: #333;
}
#generalTips h4 {
margin-top: 0;
margin-bottom: 8px;
font-weight: 600;
color: #2c3e50;
font-size: 1.1em;
}
#generalTips ul {
padding-left: 20px;
margin: 0;
}
#generalTips ul li {
margin-bottom: 6px;
line-height: 1.4;
font-size: 1.1em;
list-style-type: disc;
}
+26
View File
@@ -0,0 +1,26 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>PhonoCoach</title>
<link rel="stylesheet" href="popup.css" />
</head>
<body>
<h3>Selected Text</h3>
<div id="displayText">No selected text</div>
<button id="recordBtn">🎙 Record</button>
<p id="status"></p>
<div id="similarityScore"></div>
<p id="pronunciationTips"></p>
<div id="phonemeDiff"></div>
<!-- Color legend (shown after accuracy) -->
<div id="colorLegend"></div>
<script src="popup.js"></script>
</body>
</html>
+135
View File
@@ -0,0 +1,135 @@
chrome.storage.local.get("selectedText", (data) => {
document.getElementById("displayText").textContent =
data.selectedText || "No text selected";
});
let mediaRecorder;
let audioChunks = [];
let isRecording = false;
const recordBtn = document.getElementById("recordBtn");
const status = document.getElementById("status");
const similarityScoreElem = document.getElementById("similarityScore");
async function requestMicPermission() {
try {
const permissionStatus = await navigator.permissions.query({
name: "microphone",
});
if (permissionStatus.state === "granted") {
return true;
} else if (permissionStatus.state === "prompt") {
await navigator.mediaDevices.getUserMedia({ audio: true });
return true;
} else {
return false;
}
} catch (err) {
console.error("Permission API error:", err);
return false;
}
}
recordBtn.addEventListener("click", async () => {
if (!isRecording) {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorder = new MediaRecorder(stream);
audioChunks = [];
mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0) {
audioChunks.push(event.data);
}
};
mediaRecorder.onstop = async () => {
recordBtn.disabled = true;
const audioBlob = new Blob(audioChunks, { type: "audio/wav" });
const formData = new FormData();
formData.append("file", audioBlob, "recording.wav");
const expectedText =
document.getElementById("displayText").textContent || "";
formData.append("expected_text", expectedText);
status.textContent = "Uploading...";
similarityScoreElem.textContent = "";
try {
const response = await fetch("http://127.0.0.1:8000/upload", {
method: "POST",
body: formData,
});
if (response.ok) {
const result = await response.json();
similarityScoreElem.textContent = `Pronunciation Accuracy: ${(result.similarity_score * 100).toFixed(1)}%`;
const tipsElem = document.getElementById("pronunciationTips");
const score = result.similarity_score;
if (score > 0.9) {
tipsElem.textContent =
"Great job! Keep practicing to maintain your clear pronunciation.";
} else if (score > 0.7) {
tipsElem.textContent =
"Good effort! Try slowing down and emphasizing each word.";
} else {
tipsElem.textContent =
"Keep practicing! Focus on vowel sounds and word stress.";
}
const legendElem = document.getElementById("colorLegend");
legendElem.innerHTML = `
<h4>Color Coding:</h4>
<ul>
<li style="color: green;">Green: Correct phoneme</li>
<li style="color: red;">Red: Incorrect phoneme</li>
<li style="color: orange;">Orange: Missing phoneme</li>
<li style="color: blue;">Blue: Extra phoneme</li>
<li style="color: gray;">[UNK]: Unknown/Unrecognized phoneme</li>
</ul>
<h3>Note: This extension leverages OpenAIs Whisper ASR model for automatic speech recognition, enabling accurate transcription and pronunciation analysis.</h3>`;
const phonemeDiffElem = document.getElementById("phonemeDiff");
phonemeDiffElem.innerHTML = "";
result.phoneme_diff.forEach(([phoneme, status]) => {
const span = document.createElement("span");
span.textContent = phoneme + " ";
if (status === "match") span.style.color = "green";
else if (status === "mismatch") span.style.color = "red";
else if (status === "missing") span.style.color = "orange";
else if (status === "extra") span.style.color = "blue";
phonemeDiffElem.appendChild(span);
});
status.textContent = "✅ Uploaded successfully!";
} else {
status.textContent = "❌ Upload failed.";
}
} catch (err) {
console.error(err);
status.textContent = "⚠️ Error uploading.";
} finally {
recordBtn.disabled = false;
}
};
mediaRecorder.start();
isRecording = true;
recordBtn.textContent = "⏹ Stop";
status.textContent = "🎙 Recording...";
similarityScoreElem.textContent = "";
} catch (err) {
console.error(err);
status.textContent = "⚠️ Microphone access denied.";
}
} else {
mediaRecorder.stop();
isRecording = false;
recordBtn.textContent = "🎙 Record";
}
});