mirror of
https://github.com/Sosokker/image-caption-generator.git
synced 2025-12-18 18:14:05 +01:00
Initial commit
This commit is contained in:
commit
f7205ada20
30
.gitignore
vendored
Normal file
30
.gitignore
vendored
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
.idea
|
||||||
|
.ipynb_checkpoints
|
||||||
|
.mypy_cache
|
||||||
|
.vscode
|
||||||
|
__pycache__
|
||||||
|
.pytest_cache
|
||||||
|
htmlcov
|
||||||
|
dist
|
||||||
|
site
|
||||||
|
.coverage*
|
||||||
|
coverage.xml
|
||||||
|
.netlify
|
||||||
|
test.db
|
||||||
|
log.txt
|
||||||
|
Pipfile.lock
|
||||||
|
env3.*
|
||||||
|
env
|
||||||
|
docs_build
|
||||||
|
site_build
|
||||||
|
venv
|
||||||
|
docs.zip
|
||||||
|
archive.zip
|
||||||
|
|
||||||
|
# vim temporary files
|
||||||
|
*~
|
||||||
|
.*.sw?
|
||||||
|
.cache
|
||||||
|
|
||||||
|
# macOS
|
||||||
|
.DS_Store
|
||||||
17
README.md
Normal file
17
README.md
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# Image Caption Generator
|
||||||
|
|
||||||
|
### Installation
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python -m venv venv
|
||||||
|
.\venv\Scripts\activate
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uvicorn app:app --reload
|
||||||
|
```
|
||||||
|
|
||||||
|
Access through [http://127.0.0.1:8000](http://127.0.0.1:8000)
|
||||||
41
app.py
Normal file
41
app.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
from fastapi import FastAPI, UploadFile, Request
|
||||||
|
from fastapi.responses import HTMLResponse
|
||||||
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
from fastapi.templating import Jinja2Templates
|
||||||
|
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
|
||||||
|
from PIL import Image
|
||||||
|
import torch
|
||||||
|
import io
|
||||||
|
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
app.mount("/static", StaticFiles(directory="static"), name="static")
|
||||||
|
|
||||||
|
templates = Jinja2Templates(directory="templates")
|
||||||
|
|
||||||
|
model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
||||||
|
processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
|
||||||
|
|
||||||
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
model.to(device)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_caption(image: Image.Image) -> str:
|
||||||
|
"""Generate a caption for the uploaded image."""
|
||||||
|
inputs = processor(images=image, return_tensors="pt").to(device)
|
||||||
|
outputs = model.generate(**inputs)
|
||||||
|
caption = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||||
|
return caption
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/", response_class=HTMLResponse)
|
||||||
|
async def home(request: Request):
|
||||||
|
return templates.TemplateResponse("index.html", {"request": request})
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/upload/")
|
||||||
|
async def upload_image(file: UploadFile):
|
||||||
|
image = Image.open(io.BytesIO(await file.read())).convert("RGB")
|
||||||
|
caption = generate_caption(image)
|
||||||
|
return {"caption": caption}
|
||||||
6
requirements.txt
Normal file
6
requirements.txt
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
fastapi
|
||||||
|
uvicorn
|
||||||
|
transformers
|
||||||
|
torch
|
||||||
|
pillow
|
||||||
|
python-multipart
|
||||||
BIN
static/placeholder.jpg
Normal file
BIN
static/placeholder.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1015 KiB |
64
templates/index.html
Normal file
64
templates/index.html
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||||
|
<title>Image Caption Generator</title>
|
||||||
|
<style>
|
||||||
|
body {
|
||||||
|
font-family: Arial, sans-serif;
|
||||||
|
max-width: 600px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 20px;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
img {
|
||||||
|
max-width: 100%;
|
||||||
|
margin: 20px 0;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Image Caption Generator</h1>
|
||||||
|
<form id="uploadForm">
|
||||||
|
<label for="file">Upload an image:</label>
|
||||||
|
<input type="file" id="file" name="file" accept="image/*" required />
|
||||||
|
<button type="submit">Generate Caption</button>
|
||||||
|
</form>
|
||||||
|
<h2>Generated Caption:</h2>
|
||||||
|
<p id="caption">No caption generated yet.</p>
|
||||||
|
<img id="uploadedImage" src="/static/placeholder.jpg" alt="Uploaded Image" />
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const form = document.getElementById("uploadForm");
|
||||||
|
const captionElement = document.getElementById("caption");
|
||||||
|
const uploadedImage = document.getElementById("uploadedImage");
|
||||||
|
|
||||||
|
form.addEventListener("submit", async (event) => {
|
||||||
|
event.preventDefault();
|
||||||
|
const fileInput = document.getElementById("file");
|
||||||
|
const file = fileInput.files[0];
|
||||||
|
if (!file) return;
|
||||||
|
|
||||||
|
const formData = new FormData();
|
||||||
|
formData.append("file", file);
|
||||||
|
|
||||||
|
// Update the displayed image
|
||||||
|
const reader = new FileReader();
|
||||||
|
reader.onload = (e) => {
|
||||||
|
uploadedImage.src = e.target.result;
|
||||||
|
};
|
||||||
|
reader.readAsDataURL(file);
|
||||||
|
|
||||||
|
// Send the file to the server
|
||||||
|
const response = await fetch("/upload/", {
|
||||||
|
method: "POST",
|
||||||
|
body: formData,
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await response.json();
|
||||||
|
captionElement.textContent = result.caption;
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
Loading…
Reference in New Issue
Block a user