Question:

What is the name of the colony shown?
A. Virginia
B. Connecticut
C. Georgia
D. North Carolina

# @title Fetch IIIF manifest for [Photographs](https://eap.bl.uk/archive-file/EAP699-23-1)
import requests
manifest_url = "https://eap.bl.uk/archive-file/EAP699-23-1/manifest"

header = {
      "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/11"
  }
manifest = requests.get(manifest_url, headers=header)
if manifest.status_code != 200:
    raise Exception(f"Error downloading manifest: {manifest.status_code}")

manifest = manifest.json()

images = []
for item in manifest['items']:
  image_uri = item['items'][0]['items'][0]['body']['id']
  images.append(image_uri)
images

from google.colab import userdata

base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
api_key= userdata.get('DASHSCOPE_API_KEY') # We will provide you with an API key for the workshop
model = "qwen3-vl-plus" #or "qwen3-vl-flash" (faster and cheaper), "qwen-vl-ocr" (for typewritten texts) For a full list of available models, see https://www.alibabacloud.com/help/en/model-studio/models

prompt = """
This is a series of photographs. Think step-by-step to identify common elements and connections between the images. Give a detailed explanation of your reasoning and a summary of your findings.
"""

import os
import asyncio
import aiohttp
from openai import OpenAI
from tqdm.asyncio import tqdm
import requests
import base64
from mimetypes import guess_type
from IPython.display import Markdown as md

# Function to download an image from a URL and return its base64 data URL representation
def url_to_base64_data_url(image_url):
    try:
        # Add a user-agent header to mimic a browser, which can sometimes help with access
        response = requests.get(image_url, stream=True)
        response.raise_for_status()  # Raise an exception for bad status codes

        image_data = response.content

        # Try to get the mime type from the response headers; fallback if not available
        mime_type = response.headers.get("Content-Type")
        if not mime_type:
            # Attempt to guess mime type from content if header is missing
            # This is a basic guess and might not be perfect for all image types
            if image_data.startswith(b'\x89PNG'):
                mime_type = 'image/png'
            elif image_data.startswith(b'\xff\xd8'):
                mime_type = 'image/jpeg'
            elif image_data.startswith(b'GIF89a') or image_data.startswith(b'GIF87a'):
                mime_type = 'image/gif'
            else:
                mime_type = 'application/octet-stream' # Default fallback

        base64_encoded_data = base64.b64encode(image_data).decode("utf-8")

        return f"data:{mime_type};base64,{base64_encoded_data}"
    except requests.exceptions.RequestException as e:
        print(f"Error downloading image from {image_url}: {e}")
        return None

# Use AsyncOpenAI instead of OpenAI
client = OpenAI(
    api_key=api_key,
    base_url=base_url,
)

# Prepare the image content with base64 encoded images
image_contents = []
for img_url in images:
    base64_image_data_url = url_to_base64_data_url(img_url)
    if base64_image_data_url:
        image_contents.append({"type": "image_url", "image_url": {"url": base64_image_data_url}})
    else:
        # Handle cases where image download failed, e.g., skip or raise an error
        print(f"Skipping image {img_url} due to download error.")

# Add the text prompt at the end
image_contents.append({"type": "text", "text": prompt})

response = client.chat.completions.create(
    model="qwen3-vl-plus", # Or qwen3-vl-long
    messages=[
        {
            "role": "user",
            "content": image_contents
        }
    ]
)
md(response.choices[0].message.content)

%pip install -q portkey-ai

from google.colab import userdata
from portkey_ai import Portkey


AI_SANDBOX_KEY = userdata.get('SANDBOX_KEY')

client = Portkey(api_key=AI_SANDBOX_KEY)

# @title
model_name = "gemini-3-pro-preview" # @param ["gpt-5","gpt-4o","gemini-3-pro-preview"]

# Install the Portkey AI Gateway and Mimetypes packages with pip
#   pip -i portkey-ai mimetypes
#
# For more information on the SDK see https://portkey.ai/docs/api-reference/sdk/python
#
from portkey_ai import Portkey
import os
import base64
from mimetypes import guess_type
from google.colab import userdata
from IPython.display import Markdown as md

# Before executing this code, define the API Key within an enironment variable in your OS
# Linux BASH example: export PORTKEY_API_KEY=<key provided to you>

# Import API key from OS environment variables
AI_SANDBOX_KEY = userdata.get('SANDBOX_KEY')

client = Portkey(api_key=AI_SANDBOX_KEY)


# Base 64 encode local image and return text to be included in AI prompt
def local_image_to_data_url(image_path):
    """
    Get the url of a local image
    """
    mime_type, _ = guess_type(image_path)

    if mime_type is None:
        mime_type = "application/octet-stream"

    with open(image_path, "rb") as image_file:
        base64_encoded_data = base64.b64encode(image_file.read()).decode("utf-8")

    return f"data:{mime_type};base64,{base64_encoded_data}"


# This function will submit a simple prompt and image file to the chosen model
def image_prompt_example(model_to_be_used, image_file, prompt):
    # Establish a connection to your Azure OpenAI instance

    try:
        response = client.chat.completions.create(
        model=model_to_be_used,
        # Prompt parameters may also be defined, depending on model capabilities
        #temperature=0.5, # temperature = how creative/random the model is in generating response - 0 to 1 with 1 being most creative
        #max_tokens=1000, # max_tokens = token limit on context to send to the model
        #top_p=0.5, # top_p = diversity of generated text by the model considering probability attached to token - 0 to 1 - ex. top_p of 0.1 = only tokens within the top 10% probability are considered
        messages=[
        {"role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": local_image_to_data_url(image_file)}}]}, # user prompt
               ]
        )
        return md(response.choices[0].message.content)

    except Exception as e:
        print(e.message)

!wget https://blog.gdeltproject.org/wp-content/uploads/16187-scaled.jpg

from PIL import Image
Image.open("16187-scaled.jpg")

prompt = """Attached are two pages from the Nuremberg Chronicle.
At the bottom of the page a previous owner of the book drew four circles and wrote Latin text in them.
It was probably written sometime from 1493 to 1600s for context.
I've also attached zoomed up images of the four circles.
For each of them, transcribe the Latin and translate into English and tell me its meaning and importance.
They relate to the text in the page, so use the page text to decipher and contextualize them."""

image_prompt_example(model_name,"16187-scaled.jpg",prompt)

bit.ly/visual-reasoning¶

Training data for visual reasoning¶

Demo of Qwen3-VL¶

Multi-image reasoning¶

Thinking and Text Recognition (OCR + HTR)¶

Gemini 3.0 Using AI Sandbox¶

Gemini As Indiana Jones: How Gemini 3.0 Deciphered The Mystery Of A Nuremberg Chronicle Leaf's 500-Year-Old Roundels¶

Build a Reasoning Model (From Scratch)¶