Initial commit

2025-12-18 18:14:05 +01:00 · 2025-01-14 13:48:54 +07:00 · 2025-01-14 13:48:54 +07:00 · f7205ada20
commit f7205ada20
6 changed files with 158 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,30 @@
 .idea
 .ipynb_checkpoints
 .mypy_cache
 .vscode
 __pycache__
 .pytest_cache
 htmlcov
 dist
 site
 .coverage*
 coverage.xml
 .netlify
 test.db
 log.txt
 Pipfile.lock
 env3.*
 env
 docs_build
 site_build
 venv
 docs.zip
 archive.zip
 # vim temporary files
 *~
 .*.sw?
 .cache
 # macOS
 .DS_Store
--- a/README.md
+++ b/README.md
@ -0,0 +1,17 @@
 # Image Caption Generator
 ### Installation
 ```bash
 python -m venv venv
 .\venv\Scripts\activate
 pip install -r requirements.txt
 ```
 ### Usage
 ```bash
 uvicorn app:app --reload
 ```
 Access through [http://127.0.0.1:8000](http://127.0.0.1:8000)
--- a/app.py
+++ b/app.py
@ -0,0 +1,41 @@
 from fastapi import FastAPI, UploadFile, Request
 from fastapi.responses import HTMLResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
 from PIL import Image
 import torch
 import io
 app = FastAPI()
 app.mount("/static", StaticFiles(directory="static"), name="static")
 templates = Jinja2Templates(directory="templates")
 model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
 processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
 tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
 def generate_caption(image: Image.Image) -> str:
    """Generate a caption for the uploaded image."""
    inputs = processor(images=image, return_tensors="pt").to(device)
    outputs = model.generate(**inputs)
    caption = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return caption
@app.get("/", response_class=HTMLResponse)
 async def home(request: Request):
    return templates.TemplateResponse("index.html", {"request": request})
@app.post("/upload/")
 async def upload_image(file: UploadFile):
    image = Image.open(io.BytesIO(await file.read())).convert("RGB")
    caption = generate_caption(image)
    return {"caption": caption}
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,6 @@
 fastapi
 uvicorn
 transformers
 torch
 pillow
 python-multipart
--- a/static/placeholder.jpg
+++ b/static/placeholder.jpg
--- a/templates/index.html
+++ b/templates/index.html
@ -0,0 +1,64 @@
 <!DOCTYPE html>
 <html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Image Caption Generator</title>
    <style>
      body {
        font-family: Arial, sans-serif;
        max-width: 600px;
        margin: 0 auto;
        padding: 20px;
        text-align: center;
      }
      img {
        max-width: 100%;
        margin: 20px 0;
      }
    </style>
  </head>
  <body>
    <h1>Image Caption Generator</h1>
    <form id="uploadForm">
      <label for="file">Upload an image:</label>
      <input type="file" id="file" name="file" accept="image/*" required />
      <button type="submit">Generate Caption</button>
    </form>
    <h2>Generated Caption:</h2>
    <p id="caption">No caption generated yet.</p>
    <img id="uploadedImage" src="/static/placeholder.jpg" alt="Uploaded Image" />
    <script>
      const form = document.getElementById("uploadForm");
      const captionElement = document.getElementById("caption");
      const uploadedImage = document.getElementById("uploadedImage");
      form.addEventListener("submit", async (event) => {
        event.preventDefault();
        const fileInput = document.getElementById("file");
        const file = fileInput.files[0];
        if (!file) return;
        const formData = new FormData();
        formData.append("file", file);
        // Update the displayed image
        const reader = new FileReader();
        reader.onload = (e) => {
          uploadedImage.src = e.target.result;
        };
        reader.readAsDataURL(file);
        // Send the file to the server
        const response = await fetch("/upload/", {
          method: "POST",
          body: formData,
        });
        const result = await response.json();
        captionElement.textContent = result.caption;
      });
    </script>
  </body>
 </html>