Initial commit

2025-12-18 01:54:05 +01:00 · 2025-01-14 13:48:54 +07:00 · 2025-01-14 13:48:54 +07:00 · f7205ada20
commit f7205ada20
6 changed files with 158 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,30 @@
+.idea
+.ipynb_checkpoints
+.mypy_cache
+.vscode
+__pycache__
+.pytest_cache
+htmlcov
+dist
+site
+.coverage*
+coverage.xml
+.netlify
+test.db
+log.txt
+Pipfile.lock
+env3.*
+env
+docs_build
+site_build
+venv
+docs.zip
+archive.zip
+
+# vim temporary files
+*~
+.*.sw?
+.cache
+
+# macOS
+.DS_Store
--- a/README.md
+++ b/README.md
@ -0,0 +1,17 @@
+# Image Caption Generator
+
+### Installation
+
+```bash
+python -m venv venv
+.\venv\Scripts\activate
+pip install -r requirements.txt
+```
+
+### Usage
+
+```bash
+uvicorn app:app --reload
+```
+
+Access through [http://127.0.0.1:8000](http://127.0.0.1:8000)
--- a/app.py
+++ b/app.py
@ -0,0 +1,41 @@
+from fastapi import FastAPI, UploadFile, Request
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
+from PIL import Image
+import torch
+import io
+
+app = FastAPI()
+
+app.mount("/static", StaticFiles(directory="static"), name="static")
+
+templates = Jinja2Templates(directory="templates")
+
+model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+
+
+def generate_caption(image: Image.Image) -> str:
+    """Generate a caption for the uploaded image."""
+    inputs = processor(images=image, return_tensors="pt").to(device)
+    outputs = model.generate(**inputs)
+    caption = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return caption
+
+
+@app.get("/", response_class=HTMLResponse)
+async def home(request: Request):
+    return templates.TemplateResponse("index.html", {"request": request})
+
+
+@app.post("/upload/")
+async def upload_image(file: UploadFile):
+    image = Image.open(io.BytesIO(await file.read())).convert("RGB")
+    caption = generate_caption(image)
+    return {"caption": caption}
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,6 @@
+fastapi
+uvicorn
+transformers
+torch
+pillow
+python-multipart
--- a/static/placeholder.jpg
+++ b/static/placeholder.jpg
--- a/templates/index.html
+++ b/templates/index.html
@ -0,0 +1,64 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Image Caption Generator</title>
+    <style>
+      body {
+        font-family: Arial, sans-serif;
+        max-width: 600px;
+        margin: 0 auto;
+        padding: 20px;
+        text-align: center;
+      }
+      img {
+        max-width: 100%;
+        margin: 20px 0;
+      }
+    </style>
+  </head>
+  <body>
+    <h1>Image Caption Generator</h1>
+    <form id="uploadForm">
+      <label for="file">Upload an image:</label>
+      <input type="file" id="file" name="file" accept="image/*" required />
+      <button type="submit">Generate Caption</button>
+    </form>
+    <h2>Generated Caption:</h2>
+    <p id="caption">No caption generated yet.</p>
+    <img id="uploadedImage" src="/static/placeholder.jpg" alt="Uploaded Image" />
+
+    <script>
+      const form = document.getElementById("uploadForm");
+      const captionElement = document.getElementById("caption");
+      const uploadedImage = document.getElementById("uploadedImage");
+
+      form.addEventListener("submit", async (event) => {
+        event.preventDefault();
+        const fileInput = document.getElementById("file");
+        const file = fileInput.files[0];
+        if (!file) return;
+
+        const formData = new FormData();
+        formData.append("file", file);
+
+        // Update the displayed image
+        const reader = new FileReader();
+        reader.onload = (e) => {
+          uploadedImage.src = e.target.result;
+        };
+        reader.readAsDataURL(file);
+
+        // Send the file to the server
+        const response = await fetch("/upload/", {
+          method: "POST",
+          body: formData,
+        });
+
+        const result = await response.json();
+        captionElement.textContent = result.caption;
+      });
+    </script>
+  </body>
+</html>