2023-11-04 21:09:17 竞赛 编辑:黎为乐
2023年安徽省大数据与人工智能应用竞赛
人工智能赛道(网络赛-本科组)-答题卡
2023年9月
注:请将每一道题的所写源代码与运行结果截图保存到答题卡对应位置;
将答题卡以队伍编码.docx形式命名在系统进行提交。
第一部分:人工智能基础环境搭建部署(10分)
注:任务1与任务2任选一题完成即可。
o任务1
o任务2
第二部分:样本数据预处理(30分)
注:任务1与任务2都需要完成。
o任务1(15分)。
import pandas as pd
data = pd.read_csv("task2_1.csv")
data = data.dropna(subset=['WORK_PROVINCE'])
non_alphanumeric_count = data[data['WORK_PROVINCE'].str.isalnum()]['WORK_PROVINCE'].count()
print("非字母组合代码的样本个数:", non_alphanumeric_count)
top_20_avg = data.nlargest(20, 'AVG_FLIGHT_COUNT')['AVG_FLIGHT_COUNT'].mean()
print("AVG_FLIGHT_COUNT列最大的20个数值的平均值:", top_20_avg)
o任务2(15分)。
import cv2
import numpy as np
img = cv2.imread('data/task2/task2_2.jpg')
img = cv2.resize(img, (650, 360))
img1 = img[:, 325:]
img2 = cv2.cvtColor(img1, cv2.COLOR_BGR2YCrCb)
img3 = np.zeros_like(img2)
for i in range(img2.shape[0]):
for j in range(img2.shape[1]):
if 133 <= img2[i, j][1] <= 173 and 77 <= img2[i, j][2] <= 127:
img3[i, j] = [255, 255, 255]
cv2.imshow('img1', img1)
cv2.imshow('img3', img3)
cv2.waitKey(0)
cv2.destroyAllWindows()
第三部分:传统机器学习算法设计及应用(20分)
import os
import cv2
import numpy as np
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
def extract_sift_features(images, num_clusters):
sift = cv2.SIFT_create()
descriptors = []
for image in images:
kp, desc = sift.detectAndCompute(image, None)
if desc is not None:
descriptors.extend(desc)
descriptors = np.array(descriptors)
kmeans = KMeans(n_clusters=num_clusters)
kmeans.fit(descriptors)
return kmeans
data_dir = "data/task3"
categories = ["cat", "dog"]
num_clusters = 100
images = []
labels = []
for category_idx, category in enumerate(categories):
category_dir = os.path.join(data_dir, category)
for filename in os.listdir(category_dir):
if filename.endswith(".jpg"):
image_path = os.path.join(category_dir, filename)
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
images.append(image)
labels.append(category_idx)
images = np.array(images)
labels = np.array(labels)
kmeans = extract_sift_features(images, num_clusters)
def image_to_bow(image, kmeans_model):
kp, desc = sift.detectAndCompute(image, None)
if desc is not None:
bow = kmeans_model.predict(desc)
bow_hist = np.bincount(bow, minlength=num_clusters)
return bow_hist
else:
return np.zeros(num_clusters)
X = []
for image in images:
bow_vector = image_to_bow(image, kmeans)
X.append(bow_vector)
X = np.array(X)
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.1, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")
第四部分:深度学习算法设计及应用(20分)
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import SGD
import cv2
# 读取训练集和测试集的路径以及标签
train_path = np.loadtxt('trainlabels.txt', delimiter=' ',encoding='gbk',dtype='str')
test_path = np.loadtxt('testlabels.txt', delimiter=' ',encoding='gbk',dtype='str')
train_labels = train_path[:, 1:]
train_path = train_path[:, 0]
test_labels = test_path[:, 1:]
test_path = test_path[:, 0]
def preprocess_data(img_path, labels):
img = cv2.imread(img_path)
img = cv2.resize(img,(64, 64))
img = np.array(img)
img = img / 255.0 # 归一化
return img, labels
train_data, train_labels = zip(*[preprocess_data(train_path[i], train_labels[i]) for i in range(len(train_path))])
test_data, test_labels = zip(*[preprocess_data(test_path[i], test_labels[i]) for i in range(len(test_path))])
train_data = np.array(train_data)
train_labels = np.array(train_labels,dtype='int')
test_data = np.array(test_data)
test_labels = np.array(test_labels,dtype='int')
train_data = train_data.reshape((train_data.shape[0], 64, 64, 3))
test_data = test_data.reshape((test_data.shape[0], 64, 64, 3))
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(64, 64, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(7, activation='sigmoid'))
model.compile(loss='mean_squared_error', optimizer=SGD(learning_rate=0.001,decay=0.0001), metrics=['accuracy'])
model.fit(train_data, train_labels, epochs=100, batch_size=32)
loss, accuracy = model.evaluate(test_data, test_labels)
print('Test accuracy: %.2f' % (accuracy*100))
第五部分:人工智能技术综合应用(20分)
import jieba
import jieba
from jieba.analyse import extract_tags
import codecs
with codecs.open('task5data.txt', 'r', encoding='utf-8') as file:
text = file.read()
keywords = extract_tags(text, topK=30, withWeight=True)
keyword_dict = {key: weight for key, weight in keywords}
sentences = text.split('。')
sentence_importance = []
for sentence in sentences:
words = jieba.lcut(sentence)
importance = sum(keyword_dict.get(word, 0) for word in words)
sentence_importance.append((sentence, importance))
sentences = sorted(sentence_importance, key=lambda x: x[1], reverse=True)
print("summary:")
print('\n'.join([i[0 ]for i in sorted_sentences[:3]]))