Save the following as drive_pdf_search.py (or integrate the functions into your own project).
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Google‑Drive PDF‑search feature
--------------------------------
Searches public/shared PDF files on a user’s Google Drive (or a service‑account drive)
matching a free‑text query, then optionally downloads the chosen file.
Author: ChatGPT (2024‑06)
License: MIT (you may adapt it for your own product)
"""
import os
import io
import sys
import json
import pathlib
from typing import List, Dict, Optional
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
# ----------------------------------------------------------------------
# 1️⃣ OAuth / credentials handling
# ----------------------------------------------------------------------
SCOPES = ["https://www.googleapis.com/auth/drive.readonly"]
TOKEN_FILE = "token.json" # cached token for desktop flow
CRED_FILE = "credentials.json" # client‑secret downloaded from GCP
def get_drive_service() -> "googleapiclient.discovery.Resource":
"""
Returns an authenticated Drive service object.
Works for a normal desktop OAuth flow (or a service‑account JSON if you
replace the logic with `service_account.Credentials.from_service_account_file`).
"""
creds: Optional[Credentials] = None
# Load cached token if it exists
if os.path.exists(TOKEN_FILE):
creds = Credentials.from_authorized_user_file(TOKEN_FILE, SCOPES)
# If token is missing or expired, run the flow
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
if not os.path.exists(CRED_FILE):
raise FileNotFoundError(
f"OAuth client file 'CRED_FILE' not found. "
"Download it from Google Cloud Console."
)
flow = InstalledAppFlow.from_client_secrets_file(CRED_FILE, SCOPES)
creds = flow.run_local_server(port=0)
# Save the token for next run
with open(TOKEN_FILE, "w", encoding="utf-8") as t:
t.write(creds.to_json())
# Build the Drive service
service = build("drive", "v3", credentials=creds, cache_discovery=False)
return service
# ----------------------------------------------------------------------
# 2️⃣ Search helper
# ----------------------------------------------------------------------
def search_pdfs(service, query: str, page_size: int = 20) -> List[Dict]:
"""
Searches the user's Drive for PDFs whose name contains `query`.
Parameters
----------
service : googleapiclient.discovery.Resource
Authenticated Drive service.
query : str
Free‑text search term (e.g. "kambi kathakal amma makan malayalam pdf").
page_size : int
Max results returned (Drive caps at 1000 per page).
Returns
-------
list[dict]
A list of dicts with the most useful metadata:
"id": <file_id>,
"name": <file_name>,
"size": <bytes>,
"modifiedTime": <ISO‑timestamp>,
"downloadUrl": <webContentLink>
"""
# Drive's query language:
# name contains '<term>' AND mimeType = 'application/pdf' AND trashed = false
escaped = query.replace("'", "\\'")
q = f"name contains 'escaped' and mimeType = 'application/pdf' and trashed = false"
fields = "files(id,name,size,modifiedTime,webContentLink),nextPageToken"
result = service.files().list(
q=q,
spaces="drive",
fields=fields,
pageSize=page_size,
).execute()
files = result.get("files", [])
# Normalise output – we only keep what the UI needs
normalized = [
"id": f["id"],
"name": f["name"],
"size": int(f.get("size", 0)),
"modifiedTime": f["modifiedTime"],
"downloadUrl": f.get("webContentLink"),
for f in files
]
return normalized
# ----------------------------------------------------------------------
# 3️⃣ Download helper
# ----------------------------------------------------------------------
def download_pdf(service, file_id: str, destination: pathlib.Path) -> pathlib.Path:
"""
Streams a PDF from Drive to `destination` (creates parent dirs if needed).
Returns the absolute path of the saved file.
"""
request = service.files().get_media(fileId=file_id)
destination.parent.mkdir(parents=True, exist_ok=True)
fh = io.FileIO(destination, mode="wb")
downloader = MediaIoBaseDownload(fh, request, chunksize=1024 * 256) # 256 KB chunks
done = False
while not done:
status, done = downloader.next_chunk()
if status:
print(f"\rDownloading… int(status.progress() * 100)% ", end="", flush=True)
print("\n✅ Download finished:", destination.resolve())
return destination.resolve()
# ----------------------------------------------------------------------
# 4️⃣ Small CLI demo (feel free to drop this into Flask/FastAPI later)
# ----------------------------------------------------------------------
def main_cli():
if len(sys.argv) < 2:
print("Usage: python drive_pdf_search.py \"search term\"")
sys.exit(1)
query = " ".join(sys.argv[1:])
print(f"🔎 Searching Drive for PDFs matching: query!r")
service = get_drive_service()
hits = search_pdfs(service, query)
if not hits:
print("❌ No matching PDFs found (or none are shared publicly).")
return
# Pretty‑print results
print("\nFound files:")
for idx, f in enumerate(hits, start=1):
size_mb = f["size"] / (1024 * 1024)
print(
f"[idx] f['name'] • size_mb:.2f MiB • Modified: f['modifiedTime']"
)
# Ask the user which file to download (or skip)
choice = input("\nEnter the number to download, or press Enter to skip: ").strip()
if not choice:
print("🛑 Skipping download.")
return
try:
idx = int(choice) - 1
selected = hits[idx]
except (ValueError, IndexError):
print("⚠️ Invalid selection.")
return
# Destination folder – change as you like
out_dir = pathlib.Path.cwd() / "downloads"
out_path = out_dir / selected["name"]
download_pdf(service, selected["id"], out_path)
if __name__ == "__main__":
# When run directly, act as a tiny CLI demo.
# In a real app you would import the functions above and call them from your own UI.
main_cli()
Since PDF Drive no longer reliably hosts fresh content, the Malayalam Kambi community has moved to other platforms for "new" drafts: kambi kathakal amma makan malayalam pdf drive new
Disclaimer: This section is for informational purposes. Downloading copyrighted material may be illegal in your jurisdiction. Reader discretion is advised. Save the following as drive_pdf_search
If a user insists on searching for "kambi kathakal amma makan malayalam pdf drive new" , here is what the typical process looks like: Since PDF Drive no longer reliably hosts fresh
Below is a minimal FastAPI wrapper that exposes two endpoints:
# fastapi_app.py
from fastapi import FastAPI, HTTPException, Query
from fastapi.responses import StreamingResponse
from typing import List
import pathlib
import io
# Re‑use the helpers from drive_pdf_search.py
from drive_pdf_search import get_drive_service, search_pdfs, download_pdf
app = FastAPI(title="Kambi‑Kathakal PDF Finder")
# A single, global Drive service (creates token on first request)
drive_service = get_drive_service()
@app.get("/search", response_model=List[dict])
def api_search(q: str = Query(..., description="Search term, e.g. 'kambi kathakal amma makan'")):
results = search_pdfs(drive_service, q)
if not results:
raise HTTPException(status_code=404, detail="No PDFs found")
return results
@app.get("/download/file_id")
def api_download(file_id: str):
"""Streams the PDF directly to the client."""
request = drive_service.files().get_media(fileId=file_id)
fh = io.BytesIO()
downloader = MediaIoBaseDownload(fh, request)
done = False
while not done:
status, done = downloader.next_chunk()
# (optional) you could log progress here
fh.seek(0)
# Guess a filename – Drive API gives us the name via a separate call if you need it.
# For brevity we just use the ID.
filename = f"file_id.pdf"
return StreamingResponse(
fh,
media_type="application/pdf",
headers="Content-Disposition": f"attachment; filename=filename",
)
Run it:
uvicorn fastapi_app:app --reload
Now: