commit f7205ada2011b38417c2d90db6102e87601acf9a Author: Sosokker Date: Tue Jan 14 13:48:54 2025 +0700 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1fc5630 --- /dev/null +++ b/.gitignore @@ -0,0 +1,30 @@ +.idea +.ipynb_checkpoints +.mypy_cache +.vscode +__pycache__ +.pytest_cache +htmlcov +dist +site +.coverage* +coverage.xml +.netlify +test.db +log.txt +Pipfile.lock +env3.* +env +docs_build +site_build +venv +docs.zip +archive.zip + +# vim temporary files +*~ +.*.sw? +.cache + +# macOS +.DS_Store \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..5997bac --- /dev/null +++ b/README.md @@ -0,0 +1,17 @@ +# Image Caption Generator + +### Installation + +```bash +python -m venv venv +.\venv\Scripts\activate +pip install -r requirements.txt +``` + +### Usage + +```bash +uvicorn app:app --reload +``` + +Access through [http://127.0.0.1:8000](http://127.0.0.1:8000) diff --git a/app.py b/app.py new file mode 100644 index 0000000..8899a80 --- /dev/null +++ b/app.py @@ -0,0 +1,41 @@ +from fastapi import FastAPI, UploadFile, Request +from fastapi.responses import HTMLResponse +from fastapi.staticfiles import StaticFiles +from fastapi.templating import Jinja2Templates +from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer +from PIL import Image +import torch +import io + +app = FastAPI() + +app.mount("/static", StaticFiles(directory="static"), name="static") + +templates = Jinja2Templates(directory="templates") + +model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning") +processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning") +tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning") + +device = "cuda" if torch.cuda.is_available() else "cpu" +model.to(device) + + +def generate_caption(image: Image.Image) -> str: + """Generate a caption for the uploaded image.""" + inputs = processor(images=image, return_tensors="pt").to(device) + outputs = model.generate(**inputs) + caption = tokenizer.decode(outputs[0], skip_special_tokens=True) + return caption + + +@app.get("/", response_class=HTMLResponse) +async def home(request: Request): + return templates.TemplateResponse("index.html", {"request": request}) + + +@app.post("/upload/") +async def upload_image(file: UploadFile): + image = Image.open(io.BytesIO(await file.read())).convert("RGB") + caption = generate_caption(image) + return {"caption": caption} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3de1c66 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +fastapi +uvicorn +transformers +torch +pillow +python-multipart \ No newline at end of file diff --git a/static/placeholder.jpg b/static/placeholder.jpg new file mode 100644 index 0000000..03bebc4 Binary files /dev/null and b/static/placeholder.jpg differ diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..c7a9897 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,64 @@ + + + + + + Image Caption Generator + + + +

Image Caption Generator

+
+ + + +
+

Generated Caption:

+

No caption generated yet.

+ Uploaded Image + + + +