Comments (11)
可以正常训练了,coco.py文件修改如下:
def pull_item(self, index):
id_ = self.ids[index]
im_ann = self.coco.loadImgs(id_)[0]
width = im_ann["width"]
height = im_ann["height"]
name_f = im_ann["file_name"]
# load image and preprocess
img_file = os.path.join(
#self.data_dir, self.name, "{:012}".format(id_) + ".jpg"
self.data_dir, self.name, "{}".format(name_f)
)
img = cv2.imread(img_file)
assert img is not None
主要原因:标准的coco格式文件的id和file_name直接存在关系,如:id: 397133,file_name: "000000397133.jpg",而我自己的coco集中id和file_name没有直接关系,"id": 2, "file_name": "38_1796_0.jpg",所以通过id是无法找到图的,解析file_name来完成。
from yolox.
It seems you didn't properly set the path of your dataset.
from yolox.
修改coco.py
def pull_item(self, index):
id_ = self.ids[index]
img_file = self.data_dir + self.name + id_ + ".jpg"
img = cv2.imread(img_file)
height, width, c = img.shape
assert img is not None
res = self.load_anno(index)
img_info = (height, width)
return img, res, img_info, id_
from yolox.
修改coco.py
def pull_item(self, index):
id_ = self.ids[index]
img_file = self.data_dir + self.name + id_ + ".jpg"
img = cv2.imread(img_file)
height, width, c = img.shape
assert img is not Noneres = self.load_anno(index) img_info = (height, width) return img, res, img_info, id_
改过来之后还是报错,感觉好像是多线程读取数据之间的问题,读完几张以后会有错误。请问您改过之后就可以训练了吗
from yolox.
def load_anno(self, index):
id_ = self.ids[index]
anno_ids = self.coco.getAnnIds(imgIds=[id_], iscrowd=False)
annotations = self.coco.loadAnns(anno_ids)
img_file = self.data_dir + self.name + id_ + ".jpg"
img = cv2.imread(img_file)
height, width, c = img.shape
valid_objs = []
for obj in annotations:
x1 = np.max((0, obj["bbox"][0]))
y1 = np.max((0, obj["bbox"][1]))
x2 = np.min((width - 1, x1 + np.max((0, obj["bbox"][2] - 1))))
y2 = np.min((height - 1, y1 + np.max((0, obj["bbox"][3] - 1))))
if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
obj["clean_bbox"] = [x1, y1, x2, y2]
valid_objs.append(obj)
objs = valid_objs
num_objs = len(objs)
res = np.zeros((num_objs, 5))
for ix, obj in enumerate(objs):
cls = self.class_ids.index(obj["category_id"])
res[ix, 0:4] = obj["clean_bbox"]
res[ix, 4] = cls
return res
这个函数也做同样的修改,我这边完整的训练完一轮,然后训练第二轮的时候报其他错误了
from yolox.
def load_anno(self, index):
id_ = self.ids[index]
anno_ids = self.coco.getAnnIds(imgIds=[id_], iscrowd=False)
annotations = self.coco.loadAnns(anno_ids)
img_file = self.data_dir + self.name + id_ + ".jpg"
img = cv2.imread(img_file)
height, width, c = img.shape
valid_objs = []
for obj in annotations:
x1 = np.max((0, obj["bbox"][0]))
y1 = np.max((0, obj["bbox"][1]))
x2 = np.min((width - 1, x1 + np.max((0, obj["bbox"][2] - 1))))
y2 = np.min((height - 1, y1 + np.max((0, obj["bbox"][3] - 1))))
if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
obj["clean_bbox"] = [x1, y1, x2, y2]
valid_objs.append(obj)
objs = valid_objs
num_objs = len(objs)
res = np.zeros((num_objs, 5))
for ix, obj in enumerate(objs):
cls = self.class_ids.index(obj["category_id"])
res[ix, 0:4] = obj["clean_bbox"]
res[ix, 4] = cls
return res
这个函数也做同样的修改,我这边完整的训练完一轮,然后训练第二轮的时候报其他错误了
这个地方应该没问题,因为我把我的数据格式转成COCO格式了,所以用它自带的就可以了。
问题在于他在读取数据的时候launch函数会报错,我觉得可能还是他自己的机制的问题把
from yolox.
def load_anno(self, index):
id_ = self.ids[index]
anno_ids = self.coco.getAnnIds(imgIds=[id_], iscrowd=False)
annotations = self.coco.loadAnns(anno_ids)
img_file = self.data_dir + self.name + id_ + ".jpg"
img = cv2.imread(img_file)
height, width, c = img.shape
valid_objs = []
for obj in annotations:
x1 = np.max((0, obj["bbox"][0]))
y1 = np.max((0, obj["bbox"][1]))
x2 = np.min((width - 1, x1 + np.max((0, obj["bbox"][2] - 1))))
y2 = np.min((height - 1, y1 + np.max((0, obj["bbox"][3] - 1))))
if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
obj["clean_bbox"] = [x1, y1, x2, y2]
valid_objs.append(obj)
objs = valid_objs
num_objs = len(objs)
res = np.zeros((num_objs, 5))
for ix, obj in enumerate(objs):
cls = self.class_ids.index(obj["category_id"])
res[ix, 0:4] = obj["clean_bbox"]
res[ix, 4] = cls
return res
这个函数也做同样的修改,我这边完整的训练完一轮,然后训练第二轮的时候报其他错误了
我把数据集重新转换成COCO格式可以开始训练了 ,不用更改原先的函数。
from yolox.
def load_anno(self, index):
id_ = self.ids[index]
anno_ids = self.coco.getAnnIds(imgIds=[id_], iscrowd=False)
annotations = self.coco.loadAnns(anno_ids)
img_file = self.data_dir + self.name + id_ + ".jpg"
img = cv2.imread(img_file)
height, width, c = img.shape
valid_objs = []
for obj in annotations:
x1 = np.max((0, obj["bbox"][0]))
y1 = np.max((0, obj["bbox"][1]))
x2 = np.min((width - 1, x1 + np.max((0, obj["bbox"][2] - 1))))
y2 = np.min((height - 1, y1 + np.max((0, obj["bbox"][3] - 1))))
if obj["area"] > 0 and x2 >= x1 and y2 >= y1:
obj["clean_bbox"] = [x1, y1, x2, y2]
valid_objs.append(obj)
objs = valid_objs
num_objs = len(objs)
res = np.zeros((num_objs, 5))
for ix, obj in enumerate(objs):
cls = self.class_ids.index(obj["category_id"])
res[ix, 0:4] = obj["clean_bbox"]
res[ix, 4] = cls
return res
这个函数也做同样的修改,我这边完整的训练完一轮,然后训练第二轮的时候报其他错误了我把数据集重新转换成COCO格式可以开始训练了 ,不用更改原先的函数。
我训练的时候遇到了这个问题,请问你这边有遇到吗
` x = torch.cuda.FloatTensor(256, 1024, block_mem)
│ │ │ └ -1722
│ │ └ <class 'torch.cuda.FloatTensor'>
│ └ <module 'torch.cuda' from '/home/cqu/anaconda3/envs/pytorch38/lib/python3.8/site-packages/torch/cuda/init.py'>
└ <module 'torch' from '/home/cqu/anaconda3/envs/pytorch38/lib/python3.8/site-packages/torch/init.py'>
RuntimeError: Trying to create tensor with negative dimension -1722: [256, 1024, -1722]`
出问题的代码是这一段
`def get_total_and_free_memory_in_Mb(cuda_device):
devices_info_str = os.popen(
"nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader"
)
devices_info = devices_info_str.read().strip().split("\n")
total, used = devices_info[int(cuda_device)].split(",")
return int(total), int(used)
def occumpy_mem(cuda_device, mem_ratio=0.9):
"""
pre-allocate gpu memory for training to avoid memory Fragmentation.
"""
total, used = get_total_and_free_memory_in_Mb(cuda_device)
max_mem = int(total * mem_ratio)
print(max_mem , used)
block_mem = max_mem - used
x = torch.cuda.FloatTensor(256, 1024, block_mem)
del x
time.sleep(5)`
我把mem_ratio从0.9改成1就不报前面egative dimension 的错误了,但是不管用多大bs,GPU申请内存总是差一点,不知道怎么解决
`File "/home/cqu/wjw/test/DandC/YOLOX-main/yolox/utils/metric.py", line 39, in occumpy_mem
x = torch.cuda.FloatTensor(256, 1024, block_mem)
│ │ │ └ 705
│ │ └ <class 'torch.cuda.FloatTensor'>
│ └ <module 'torch.cuda' from '/home/cqu/anaconda3/envs/pytorch38/lib/python3.8/site-packages/torch/cuda/init.py'>
└ <module 'torch' from '/home/cqu/anaconda3/envs/pytorch38/lib/python3.8/site-packages/torch/init.py'>
RuntimeError: CUDA out of memory. Tried to allocate 706.00 MiB (GPU 0; 23.70 GiB total capacity; 285.06 MiB already allocated; 705.06 MiB free; 306.00 MiB reserved in total by PyTorch)`
from yolox.
batch改小点也不管用吗
from yolox.
乌龙问题,GPU有其他程序在跑,占用了显存。。。。。
from yolox.
乌龙问题,GPU有其他程序在跑,占用了显存。。。。。
有点道理
from yolox.
Related Issues (20)
- How do I get the name and similarity of an object
- 将yolox利用onnxruntime推理时模型输出后结果需要利用下面代码映射到原图上,但是直接利用pth进行推理时,没有看到这一步,这个是为什么?
- No detection when using yolox with libtorch.
- YOLOX installation Issue - Clone Issue: Public Key Denied HOT 1
- Image data repeat resize and compute ratio bugs HOT 5
- Bug: mosaic_prob is no 1 or 0, AttributeError: 'int' object has no attribute 'numel'
- Seeking Guidance on Tuning Data Augmentation Parameters in YOLOX
- python 的 tensorrt 模型 怎么解码输出结果 HOT 1
- Given groups=1, weight of size [168, 337, 1, 1], expected input[1, 336, 2, 2] to have 337 channels, but got 336 channels instead
- mAP一直是下降状态
- TypeError: topk(): argument 'k' must be int, not Tensor HOT 1
- When I modify the training classes, there are some errors while training (KeyError: 'cat' if self.cache and self.cache_type == "ram":)
- Training AP is always 0 using coco128 dataset HOT 2
- Query Regarding `class_agnostic` Parameter in COCOEvaluator
- How can I evaluate model inference on CPU?
- IndexError: list index out of range
- mAP NAN while training with custom dataset HOT 1
- Bad accuracy when training on custom data
- Significant Increase in Initialization Time for YOLOX TensorRT Model on NVIDIA-Jetson Nano 4gb
- How are false positives reduced in YOLOX?
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from yolox.