Initial commit

This commit is contained in:
Sosokker 2025-01-14 13:48:54 +07:00
commit f7205ada20
6 changed files with 158 additions and 0 deletions

30
.gitignore vendored Normal file
View File

@ -0,0 +1,30 @@
.idea
.ipynb_checkpoints
.mypy_cache
.vscode
__pycache__
.pytest_cache
htmlcov
dist
site
.coverage*
coverage.xml
.netlify
test.db
log.txt
Pipfile.lock
env3.*
env
docs_build
site_build
venv
docs.zip
archive.zip
# vim temporary files
*~
.*.sw?
.cache
# macOS
.DS_Store

17
README.md Normal file
View File

@ -0,0 +1,17 @@
# Image Caption Generator
### Installation
```bash
python -m venv venv
.\venv\Scripts\activate
pip install -r requirements.txt
```
### Usage
```bash
uvicorn app:app --reload
```
Access through [http://127.0.0.1:8000](http://127.0.0.1:8000)

41
app.py Normal file
View File

@ -0,0 +1,41 @@
from fastapi import FastAPI, UploadFile, Request
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
from PIL import Image
import torch
import io
app = FastAPI()
app.mount("/static", StaticFiles(directory="static"), name="static")
templates = Jinja2Templates(directory="templates")
model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
def generate_caption(image: Image.Image) -> str:
"""Generate a caption for the uploaded image."""
inputs = processor(images=image, return_tensors="pt").to(device)
outputs = model.generate(**inputs)
caption = tokenizer.decode(outputs[0], skip_special_tokens=True)
return caption
@app.get("/", response_class=HTMLResponse)
async def home(request: Request):
return templates.TemplateResponse("index.html", {"request": request})
@app.post("/upload/")
async def upload_image(file: UploadFile):
image = Image.open(io.BytesIO(await file.read())).convert("RGB")
caption = generate_caption(image)
return {"caption": caption}

6
requirements.txt Normal file
View File

@ -0,0 +1,6 @@
fastapi
uvicorn
transformers
torch
pillow
python-multipart

BIN
static/placeholder.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1015 KiB

64
templates/index.html Normal file
View File

@ -0,0 +1,64 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Image Caption Generator</title>
<style>
body {
font-family: Arial, sans-serif;
max-width: 600px;
margin: 0 auto;
padding: 20px;
text-align: center;
}
img {
max-width: 100%;
margin: 20px 0;
}
</style>
</head>
<body>
<h1>Image Caption Generator</h1>
<form id="uploadForm">
<label for="file">Upload an image:</label>
<input type="file" id="file" name="file" accept="image/*" required />
<button type="submit">Generate Caption</button>
</form>
<h2>Generated Caption:</h2>
<p id="caption">No caption generated yet.</p>
<img id="uploadedImage" src="/static/placeholder.jpg" alt="Uploaded Image" />
<script>
const form = document.getElementById("uploadForm");
const captionElement = document.getElementById("caption");
const uploadedImage = document.getElementById("uploadedImage");
form.addEventListener("submit", async (event) => {
event.preventDefault();
const fileInput = document.getElementById("file");
const file = fileInput.files[0];
if (!file) return;
const formData = new FormData();
formData.append("file", file);
// Update the displayed image
const reader = new FileReader();
reader.onload = (e) => {
uploadedImage.src = e.target.result;
};
reader.readAsDataURL(file);
// Send the file to the server
const response = await fetch("/upload/", {
method: "POST",
body: formData,
});
const result = await response.json();
captionElement.textContent = result.caption;
});
</script>
</body>
</html>