mirror of
https://github.com/Manoj-HV30/PhonoCoach.git
synced 2026-05-16 19:35:26 +00:00
Initial commit of PhoneCoach project
This commit is contained in:
+39
@@ -0,0 +1,39 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# Python virtual environments
|
||||
venv/
|
||||
.env
|
||||
.envrc
|
||||
|
||||
# Distribution / packaging
|
||||
build/
|
||||
dist/
|
||||
*.egg-info/
|
||||
*.egg
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
|
||||
# IDE / Editor files
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# OS-specific
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Chrome extension build outputs
|
||||
*.zip
|
||||
|
||||
# Other temporary files
|
||||
*.tmp
|
||||
@@ -0,0 +1,91 @@
|
||||
from fastapi import FastAPI, File, UploadFile, Form
|
||||
import uvicorn
|
||||
import os
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
import whisper
|
||||
import difflib
|
||||
import pronouncing
|
||||
|
||||
app = FastAPI()
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
UPLOAD_DIR = "uploads"
|
||||
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
||||
|
||||
|
||||
model = whisper.load_model("small")
|
||||
|
||||
def calculate_similarity(expected: str, actual: str) -> float:
|
||||
seq = difflib.SequenceMatcher(None, expected.split(), actual.split())
|
||||
return seq.ratio()
|
||||
|
||||
|
||||
def text_to_phonemes(text: str) -> str:
|
||||
words = text.lower().split()
|
||||
phoneme_list = []
|
||||
for word in words:
|
||||
phones = pronouncing.phones_for_word(word)
|
||||
if phones:
|
||||
phoneme_list.append(phones[0])
|
||||
else:
|
||||
phoneme_list.append("[UNK]") # mark as unknown pronunciation
|
||||
return " ".join(phoneme_list)
|
||||
|
||||
|
||||
def phoneme_diff(expected: str, actual: str):
|
||||
expected_list = expected.split()
|
||||
actual_list = actual.split()
|
||||
diff_result = []
|
||||
matcher = difflib.SequenceMatcher(None, expected_list, actual_list)
|
||||
|
||||
for opcode, i1, i2, j1, j2 in matcher.get_opcodes():
|
||||
if opcode == "equal":
|
||||
diff_result.extend([(p, "match") for p in expected_list[i1:i2]])
|
||||
elif opcode == "replace":
|
||||
diff_result.extend([(p, "mismatch") for p in expected_list[i1:i2]])
|
||||
elif opcode == "delete":
|
||||
diff_result.extend([(p, "missing") for p in expected_list[i1:i2]])
|
||||
elif opcode == "insert":
|
||||
diff_result.extend([(p, "extra") for p in actual_list[j1:j2]])
|
||||
return diff_result
|
||||
|
||||
@app.post("/upload")
|
||||
async def upload_audio(
|
||||
file: UploadFile = File(...),
|
||||
expected_text: str = Form(...)
|
||||
):
|
||||
|
||||
file_path = os.path.join(UPLOAD_DIR, file.filename)
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(await file.read())
|
||||
|
||||
|
||||
result = model.transcribe(file_path)
|
||||
transcript = result["text"]
|
||||
|
||||
expected_phonemes = text_to_phonemes(expected_text)
|
||||
actual_phonemes = text_to_phonemes(transcript)
|
||||
|
||||
|
||||
similarity = calculate_similarity(expected_phonemes, actual_phonemes)
|
||||
differences = phoneme_diff(expected_phonemes, actual_phonemes)
|
||||
|
||||
return {
|
||||
"message": "Audio processed successfully",
|
||||
"transcript": transcript,
|
||||
"expected_text": expected_text,
|
||||
"expected_phonemes": expected_phonemes,
|
||||
"actual_phonemes": actual_phonemes,
|
||||
"similarity_score": round(similarity, 3),
|
||||
"phoneme_diff": differences
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||
Binary file not shown.
@@ -0,0 +1,8 @@
|
||||
let selectedText = "";
|
||||
document.addEventListener("mouseup", () => {
|
||||
const selection = window.getSelection().toString().trim();
|
||||
if (selection) {
|
||||
selectedText = selection;
|
||||
chrome.storage.local.set({ selectedText });
|
||||
}
|
||||
});
|
||||
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"manifest_version": 3,
|
||||
"name": "PhonoCoach",
|
||||
"version": "1.0",
|
||||
"description": "Lets you practise pronunciation on any webpage you're on",
|
||||
"permissions": ["storage", "activeTab", "scripting"],
|
||||
"host_permissions": ["http://localhost:8000/*"],
|
||||
"action": {
|
||||
"default_popup": "popup.html"
|
||||
},
|
||||
"content_scripts": [
|
||||
{
|
||||
"matches": ["<all_urls>"],
|
||||
"js": ["content.js"]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,120 @@
|
||||
body {
|
||||
font-family: "Segoe UI", Tahoma, Geneva, Verdana, sans-serif;
|
||||
margin: 10px;
|
||||
width: 350px;
|
||||
background: linear-gradient(135deg, #f9f9f9, #e0f7fa);
|
||||
color: #333;
|
||||
}
|
||||
|
||||
h3 {
|
||||
text-align: center;
|
||||
color: #2c3e50;
|
||||
margin-bottom: 12px;
|
||||
}
|
||||
|
||||
#displayText {
|
||||
display: block;
|
||||
width: 100%;
|
||||
white-space: normal;
|
||||
word-wrap: break-word;
|
||||
border: 1px solid #ccc;
|
||||
padding: 10px;
|
||||
background-color: #ffffffcc;
|
||||
min-height: 50px;
|
||||
max-height: 150px;
|
||||
overflow-y: auto;
|
||||
margin-bottom: 10px;
|
||||
font-size: 1.4em;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 2px 6px rgba(0, 0, 0, 0.1);
|
||||
}
|
||||
|
||||
#recordBtn {
|
||||
display: block;
|
||||
width: 100%;
|
||||
padding: 10px;
|
||||
background: #03a9f4;
|
||||
color: white;
|
||||
border: none;
|
||||
border-radius: 8px;
|
||||
font-size: 1.2em;
|
||||
cursor: pointer;
|
||||
transition: background 0.3s ease;
|
||||
}
|
||||
|
||||
#recordBtn:hover {
|
||||
background: #0288d1;
|
||||
}
|
||||
|
||||
#status {
|
||||
text-align: center;
|
||||
font-weight: 500;
|
||||
margin-top: 8px;
|
||||
color: #555;
|
||||
}
|
||||
|
||||
#similarityScore {
|
||||
font-weight: bold;
|
||||
color: #2a9d8f;
|
||||
margin-top: 10px;
|
||||
text-align: center;
|
||||
font-size: 1.3em;
|
||||
}
|
||||
|
||||
#phonemeDiff {
|
||||
margin-top: 10px;
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 8px;
|
||||
max-height: 150px;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.phoneme {
|
||||
padding: 10px 16px;
|
||||
border-radius: 8px;
|
||||
font-size: 1.6em; /* bigger font size for phonemes */
|
||||
font-weight: 700;
|
||||
transition: transform 0.2s;
|
||||
}
|
||||
|
||||
.phoneme:hover {
|
||||
transform: scale(1.1);
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
#pronunciationTips {
|
||||
margin-top: 10px;
|
||||
font-style: italic;
|
||||
color: #555;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
#generalTips {
|
||||
margin-top: 12px;
|
||||
padding: 10px;
|
||||
background: #f0f4f8;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 1px 4px rgba(0, 0, 0, 0.1);
|
||||
color: #333;
|
||||
}
|
||||
|
||||
#generalTips h4 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 8px;
|
||||
font-weight: 600;
|
||||
color: #2c3e50;
|
||||
font-size: 1.1em;
|
||||
}
|
||||
|
||||
#generalTips ul {
|
||||
padding-left: 20px;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
#generalTips ul li {
|
||||
margin-bottom: 6px;
|
||||
line-height: 1.4;
|
||||
font-size: 1.1em;
|
||||
list-style-type: disc;
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<title>PhonoCoach</title>
|
||||
<link rel="stylesheet" href="popup.css" />
|
||||
</head>
|
||||
<body>
|
||||
<h3>Selected Text</h3>
|
||||
<div id="displayText">No selected text</div>
|
||||
|
||||
<button id="recordBtn">🎙 Record</button>
|
||||
<p id="status"></p>
|
||||
|
||||
<div id="similarityScore"></div>
|
||||
<p id="pronunciationTips"></p>
|
||||
|
||||
<div id="phonemeDiff"></div>
|
||||
|
||||
<!-- Color legend (shown after accuracy) -->
|
||||
<div id="colorLegend"></div>
|
||||
|
||||
<script src="popup.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,135 @@
|
||||
chrome.storage.local.get("selectedText", (data) => {
|
||||
document.getElementById("displayText").textContent =
|
||||
data.selectedText || "No text selected";
|
||||
});
|
||||
|
||||
let mediaRecorder;
|
||||
let audioChunks = [];
|
||||
let isRecording = false;
|
||||
|
||||
const recordBtn = document.getElementById("recordBtn");
|
||||
const status = document.getElementById("status");
|
||||
const similarityScoreElem = document.getElementById("similarityScore");
|
||||
|
||||
async function requestMicPermission() {
|
||||
try {
|
||||
const permissionStatus = await navigator.permissions.query({
|
||||
name: "microphone",
|
||||
});
|
||||
if (permissionStatus.state === "granted") {
|
||||
return true;
|
||||
} else if (permissionStatus.state === "prompt") {
|
||||
await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("Permission API error:", err);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
recordBtn.addEventListener("click", async () => {
|
||||
if (!isRecording) {
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
||||
mediaRecorder = new MediaRecorder(stream);
|
||||
audioChunks = [];
|
||||
|
||||
mediaRecorder.ondataavailable = (event) => {
|
||||
if (event.data.size > 0) {
|
||||
audioChunks.push(event.data);
|
||||
}
|
||||
};
|
||||
|
||||
mediaRecorder.onstop = async () => {
|
||||
recordBtn.disabled = true;
|
||||
|
||||
const audioBlob = new Blob(audioChunks, { type: "audio/wav" });
|
||||
const formData = new FormData();
|
||||
formData.append("file", audioBlob, "recording.wav");
|
||||
|
||||
const expectedText =
|
||||
document.getElementById("displayText").textContent || "";
|
||||
formData.append("expected_text", expectedText);
|
||||
|
||||
status.textContent = "Uploading...";
|
||||
similarityScoreElem.textContent = "";
|
||||
|
||||
try {
|
||||
const response = await fetch("http://127.0.0.1:8000/upload", {
|
||||
method: "POST",
|
||||
body: formData,
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
const result = await response.json();
|
||||
|
||||
similarityScoreElem.textContent = `Pronunciation Accuracy: ${(result.similarity_score * 100).toFixed(1)}%`;
|
||||
|
||||
const tipsElem = document.getElementById("pronunciationTips");
|
||||
const score = result.similarity_score;
|
||||
|
||||
if (score > 0.9) {
|
||||
tipsElem.textContent =
|
||||
"Great job! Keep practicing to maintain your clear pronunciation.";
|
||||
} else if (score > 0.7) {
|
||||
tipsElem.textContent =
|
||||
"Good effort! Try slowing down and emphasizing each word.";
|
||||
} else {
|
||||
tipsElem.textContent =
|
||||
"Keep practicing! Focus on vowel sounds and word stress.";
|
||||
}
|
||||
|
||||
const legendElem = document.getElementById("colorLegend");
|
||||
legendElem.innerHTML = `
|
||||
<h4>Color Coding:</h4>
|
||||
<ul>
|
||||
<li style="color: green;">Green: Correct phoneme</li>
|
||||
<li style="color: red;">Red: Incorrect phoneme</li>
|
||||
<li style="color: orange;">Orange: Missing phoneme</li>
|
||||
<li style="color: blue;">Blue: Extra phoneme</li>
|
||||
<li style="color: gray;">[UNK]: Unknown/Unrecognized phoneme</li>
|
||||
</ul>
|
||||
<h3>Note: This extension leverages OpenAI’s Whisper ASR model for automatic speech recognition, enabling accurate transcription and pronunciation analysis.</h3>`;
|
||||
|
||||
const phonemeDiffElem = document.getElementById("phonemeDiff");
|
||||
phonemeDiffElem.innerHTML = "";
|
||||
result.phoneme_diff.forEach(([phoneme, status]) => {
|
||||
const span = document.createElement("span");
|
||||
span.textContent = phoneme + " ";
|
||||
if (status === "match") span.style.color = "green";
|
||||
else if (status === "mismatch") span.style.color = "red";
|
||||
else if (status === "missing") span.style.color = "orange";
|
||||
else if (status === "extra") span.style.color = "blue";
|
||||
phonemeDiffElem.appendChild(span);
|
||||
});
|
||||
|
||||
status.textContent = "✅ Uploaded successfully!";
|
||||
} else {
|
||||
status.textContent = "❌ Upload failed.";
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
status.textContent = "⚠️ Error uploading.";
|
||||
} finally {
|
||||
recordBtn.disabled = false;
|
||||
}
|
||||
};
|
||||
|
||||
mediaRecorder.start();
|
||||
isRecording = true;
|
||||
recordBtn.textContent = "⏹ Stop";
|
||||
status.textContent = "🎙 Recording...";
|
||||
similarityScoreElem.textContent = "";
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
status.textContent = "⚠️ Microphone access denied.";
|
||||
}
|
||||
} else {
|
||||
mediaRecorder.stop();
|
||||
isRecording = false;
|
||||
recordBtn.textContent = "🎙 Record";
|
||||
}
|
||||
});
|
||||
Reference in New Issue
Block a user