python
复制代码
import paddle
from PIL import Image
from clip import tokenize, load_model
import glob, json, os
import cv2
from PIL import Image
from tqdm import tqdm_notebook
import numpy as np
from sklearn.preprocessing import normalize
import matplotlib.pyplot as plt
model, transforms = load_model('ViT_B_32', pretrained=True)
en_match_words = {
"scerario" : ["suburbs","city street","expressway","tunnel","parking-lot","gas or charging stations","unknown"],
"weather" : ["clear","cloudy","raining","foggy","snowy","unknown"],
"period" : ["daytime","dawn or dusk","night","unknown"],
"road_structure" : ["normal","crossroads","T-junction","ramp","lane merging","parking lot entrance","round about","unknown"],
"general_obstacle" : ["nothing","speed bumper","traffic cone","water horse","stone","manhole cover","nothing","unknown"],
"abnormal_condition" : ["uneven","oil or water stain","standing water","cracked","nothing","unknown"],
"ego_car_behavior" : ["slow down","go straight","turn right","turn left","stop","U-turn","speed up","lane change","others"],
"closest_participants_type" : ["passenger car","bus","truck","pedestrain","policeman","nothing","others","unknown"],
"closest_participants_behavior" : ["slow down","go straight","turn right","turn left","stop","U-turn","speed up","lane change","others"],
}
submit_json = {
"author" : "abc" ,
"time" : "231011",
"model" : "model_name",
"test_results" : []
}
paths = glob.glob('./初赛测试视频/*')
paths.sort()
for video_path in paths:
print(video_path)
clip_id = video_path.split('/')[-1]
cap = cv2.VideoCapture(video_path)
img = cap.read()[1]
image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
image = Image.fromarray(image)
image = transforms(image).unsqueeze(0)
single_video_result = {
"clip_id": clip_id,
"scerario" : "cityroad",
"weather":"unknown",
"period":"night",
"road_structure":"ramp",
"general_obstacle":"nothing",
"abnormal_condition":"nothing",
"ego_car_behavior":"turning right",
"closest_participants_type":"passenger car",
"closest_participants_behavior":"braking"
}
for keyword in en_match_words.keys():
if keyword not in ["weather", "road_structure"]:
continue
texts = np.array(en_match_words[keyword])
with paddle.no_grad():
logits_per_image, logits_per_text = model(image, tokenize(en_match_words[keyword]))
probs = paddle.nn.functional.softmax(logits_per_image, axis=-1)
probs = probs.numpy()
single_video_result[keyword] = texts[probs[0].argsort()[::-1][0]]
submit_json["test_results"].append(single_video_result)
with open('clip_result.json', 'w', encoding='utf-8') as up:
json.dump(submit_json, up, ensure_ascii=False)